coyotte508 HF staff coyotte508 HF staff julien-c HF staff victor HF staff commited on
Commit
3da7ec7
1 Parent(s): 88e9476

Export to parquet (#151)

Browse files

Co-authored-by: Eliott C. <coyotte508@gmail.com>
Co-authored-by: Julien Chaumond <julien@huggingface.co>
Co-authored-by: Victor Mustar <victor.mustar@gmail.com>

.env CHANGED
@@ -72,3 +72,7 @@ MODELS=`[
72
  PUBLIC_ORIGIN=#https://hf.co
73
  PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable
74
  PUBLIC_DEPRECATED_GOOGLE_ANALYTICS_ID=#UA-XXXXXXXX-X / Leave empty to disable
 
 
 
 
 
72
  PUBLIC_ORIGIN=#https://hf.co
73
  PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable
74
  PUBLIC_DEPRECATED_GOOGLE_ANALYTICS_ID=#UA-XXXXXXXX-X / Leave empty to disable
75
+
76
+ PARQUET_EXPORT_DATASET=
77
+ PARQUET_EXPORT_HF_TOKEN=
78
+ PARQUET_EXPORT_SECRET=
package-lock.json CHANGED
@@ -8,6 +8,7 @@
8
  "name": "chat-ui",
9
  "version": "0.1.0",
10
  "dependencies": {
 
11
  "@huggingface/inference": "^2.2.0",
12
  "autoprefixer": "^10.4.14",
13
  "date-fns": "^2.29.3",
@@ -16,6 +17,7 @@
16
  "marked": "^4.3.0",
17
  "mongodb": "^5.3.0",
18
  "nanoid": "^4.0.2",
 
19
  "postcss": "^8.4.21",
20
  "tailwind-scrollbar": "^3.0.0",
21
  "tailwindcss": "^3.3.1",
@@ -27,6 +29,7 @@
27
  "@sveltejs/kit": "^1.15.10",
28
  "@tailwindcss/typography": "^0.5.9",
29
  "@types/marked": "^4.0.8",
 
30
  "@typescript-eslint/eslint-plugin": "^5.45.0",
31
  "@typescript-eslint/parser": "^5.45.0",
32
  "eslint": "^8.28.0",
@@ -473,6 +476,17 @@
473
  "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
474
  }
475
  },
 
 
 
 
 
 
 
 
 
 
 
476
  "node_modules/@huggingface/inference": {
477
  "version": "2.2.0",
478
  "resolved": "https://registry.npmjs.org/@huggingface/inference/-/inference-2.2.0.tgz",
@@ -893,6 +907,24 @@
893
  "resolved": "https://registry.npmjs.org/@types/node/-/node-18.13.0.tgz",
894
  "integrity": "sha512-gC3TazRzGoOnoKAhUx+Q0t8S9Tzs74z7m0ipwGpSqQrleP14hKxP4/JUeEQcD3W1/aIpnWl8pHowI7WokuZpXg=="
895
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
896
  "node_modules/@types/pug": {
897
  "version": "2.0.6",
898
  "resolved": "https://registry.npmjs.org/@types/pug/-/pug-2.0.6.tgz",
@@ -1248,6 +1280,25 @@
1248
  "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
1249
  "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
1250
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1251
  "node_modules/binary-extensions": {
1252
  "version": "2.2.0",
1253
  "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz",
@@ -1256,6 +1307,12 @@
1256
  "node": ">=8"
1257
  }
1258
  },
 
 
 
 
 
 
1259
  "node_modules/brace-expansion": {
1260
  "version": "1.1.11",
1261
  "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
@@ -1276,6 +1333,14 @@
1276
  "node": ">=8"
1277
  }
1278
  },
 
 
 
 
 
 
 
 
1279
  "node_modules/browserslist": {
1280
  "version": "4.21.5",
1281
  "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.21.5.tgz",
@@ -2204,6 +2269,11 @@
2204
  "node": ">=8"
2205
  }
2206
  },
 
 
 
 
 
2207
  "node_modules/highlight.js": {
2208
  "version": "11.7.0",
2209
  "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.7.0.tgz",
@@ -2269,6 +2339,11 @@
2269
  "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
2270
  "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
2271
  },
 
 
 
 
 
2272
  "node_modules/ip": {
2273
  "version": "2.0.0",
2274
  "resolved": "https://registry.npmjs.org/ip/-/ip-2.0.0.tgz",
@@ -2520,6 +2595,16 @@
2520
  "node": ">=10"
2521
  }
2522
  },
 
 
 
 
 
 
 
 
 
 
2523
  "node_modules/magic-string": {
2524
  "version": "0.30.0",
2525
  "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.0.tgz",
@@ -2741,6 +2826,11 @@
2741
  "integrity": "sha512-Tj+HTDSJJKaZnfiuw+iaF9skdPpTo2GtEly5JHnWV/hfv2Qj/9RKsGISQtLh2ox3l5EAGw487hnBee0sIJ6v2g==",
2742
  "dev": true
2743
  },
 
 
 
 
 
2744
  "node_modules/node-releases": {
2745
  "version": "2.0.10",
2746
  "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.10.tgz",
@@ -2790,6 +2880,14 @@
2790
  "node": ">= 6"
2791
  }
2792
  },
 
 
 
 
 
 
 
 
2793
  "node_modules/once": {
2794
  "version": "1.4.0",
2795
  "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
@@ -2872,6 +2970,34 @@
2872
  "node": ">=6"
2873
  }
2874
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2875
  "node_modules/path-exists": {
2876
  "version": "4.0.0",
2877
  "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@@ -3197,6 +3323,15 @@
3197
  "node": ">=6"
3198
  }
3199
  },
 
 
 
 
 
 
 
 
 
3200
  "node_modules/queue-microtask": {
3201
  "version": "1.2.3",
3202
  "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
@@ -3473,6 +3608,11 @@
3473
  "npm": ">= 3.0.0"
3474
  }
3475
  },
 
 
 
 
 
3476
  "node_modules/socks": {
3477
  "version": "2.7.1",
3478
  "resolved": "https://registry.npmjs.org/socks/-/socks-2.7.1.tgz",
@@ -3842,6 +3982,19 @@
3842
  "node": ">=0.8"
3843
  }
3844
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
3845
  "node_modules/tiny-glob": {
3846
  "version": "0.2.9",
3847
  "resolved": "https://registry.npmjs.org/tiny-glob/-/tiny-glob-0.2.9.tgz",
@@ -4053,6 +4206,11 @@
4053
  "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
4054
  "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
4055
  },
 
 
 
 
 
4056
  "node_modules/vite": {
4057
  "version": "4.2.1",
4058
  "resolved": "https://registry.npmjs.org/vite/-/vite-4.2.1.tgz",
@@ -4180,6 +4338,26 @@
4180
  "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
4181
  "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="
4182
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4183
  "node_modules/yallist": {
4184
  "version": "4.0.0",
4185
  "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
 
8
  "name": "chat-ui",
9
  "version": "0.1.0",
10
  "dependencies": {
11
+ "@huggingface/hub": "^0.5.1",
12
  "@huggingface/inference": "^2.2.0",
13
  "autoprefixer": "^10.4.14",
14
  "date-fns": "^2.29.3",
 
17
  "marked": "^4.3.0",
18
  "mongodb": "^5.3.0",
19
  "nanoid": "^4.0.2",
20
+ "parquetjs": "^0.11.2",
21
  "postcss": "^8.4.21",
22
  "tailwind-scrollbar": "^3.0.0",
23
  "tailwindcss": "^3.3.1",
 
29
  "@sveltejs/kit": "^1.15.10",
30
  "@tailwindcss/typography": "^0.5.9",
31
  "@types/marked": "^4.0.8",
32
+ "@types/parquetjs": "^0.10.3",
33
  "@typescript-eslint/eslint-plugin": "^5.45.0",
34
  "@typescript-eslint/parser": "^5.45.0",
35
  "eslint": "^8.28.0",
 
476
  "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
477
  }
478
  },
479
+ "node_modules/@huggingface/hub": {
480
+ "version": "0.5.1",
481
+ "resolved": "https://registry.npmjs.org/@huggingface/hub/-/hub-0.5.1.tgz",
482
+ "integrity": "sha512-ZaE2gY8NY+XwIOL7+gBhPq19PXG4gbGSSJ7zwWLoq6MKP+nsgkQk/c7fBFrxgBwR6lNd0AJMHPRCjwTndqsqWQ==",
483
+ "dependencies": {
484
+ "hash-wasm": "^4.9.0"
485
+ },
486
+ "engines": {
487
+ "node": ">=18"
488
+ }
489
+ },
490
  "node_modules/@huggingface/inference": {
491
  "version": "2.2.0",
492
  "resolved": "https://registry.npmjs.org/@huggingface/inference/-/inference-2.2.0.tgz",
 
907
  "resolved": "https://registry.npmjs.org/@types/node/-/node-18.13.0.tgz",
908
  "integrity": "sha512-gC3TazRzGoOnoKAhUx+Q0t8S9Tzs74z7m0ipwGpSqQrleP14hKxP4/JUeEQcD3W1/aIpnWl8pHowI7WokuZpXg=="
909
  },
910
+ "node_modules/@types/node-int64": {
911
+ "version": "0.4.29",
912
+ "resolved": "https://registry.npmjs.org/@types/node-int64/-/node-int64-0.4.29.tgz",
913
+ "integrity": "sha512-rHXvenLTj/CcsmNAebaBOhxQ2MqEGl3yXZZcZ21XYR+gzGTTcpOy2N4IxpvTCz48loyQNatHvfn6GhIbbZ1R3Q==",
914
+ "dev": true,
915
+ "dependencies": {
916
+ "@types/node": "*"
917
+ }
918
+ },
919
+ "node_modules/@types/parquetjs": {
920
+ "version": "0.10.3",
921
+ "resolved": "https://registry.npmjs.org/@types/parquetjs/-/parquetjs-0.10.3.tgz",
922
+ "integrity": "sha512-n0xVEor3+3qHfCmFAf0pO4m/Pxc5JEmiVkEWWqJexN+p11/Nr+rqABKcIEj4X6tGKF1cnVIeBqy67mW2Yd+Kbg==",
923
+ "dev": true,
924
+ "dependencies": {
925
+ "@types/node-int64": "*"
926
+ }
927
+ },
928
  "node_modules/@types/pug": {
929
  "version": "2.0.6",
930
  "resolved": "https://registry.npmjs.org/@types/pug/-/pug-2.0.6.tgz",
 
1280
  "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
1281
  "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
1282
  },
1283
+ "node_modules/base64-js": {
1284
+ "version": "1.5.1",
1285
+ "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
1286
+ "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
1287
+ "funding": [
1288
+ {
1289
+ "type": "github",
1290
+ "url": "https://github.com/sponsors/feross"
1291
+ },
1292
+ {
1293
+ "type": "patreon",
1294
+ "url": "https://www.patreon.com/feross"
1295
+ },
1296
+ {
1297
+ "type": "consulting",
1298
+ "url": "https://feross.org/support"
1299
+ }
1300
+ ]
1301
+ },
1302
  "node_modules/binary-extensions": {
1303
  "version": "2.2.0",
1304
  "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz",
 
1307
  "node": ">=8"
1308
  }
1309
  },
1310
+ "node_modules/bindings": {
1311
+ "version": "1.2.1",
1312
+ "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.2.1.tgz",
1313
+ "integrity": "sha512-u4cBQNepWxYA55FunZSM7wMi55yQaN0otnhhilNoWHq0MfOfJeQx0v0mRRpolGOExPjZcl6FtB0BB8Xkb88F0g==",
1314
+ "optional": true
1315
+ },
1316
  "node_modules/brace-expansion": {
1317
  "version": "1.1.11",
1318
  "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
 
1333
  "node": ">=8"
1334
  }
1335
  },
1336
+ "node_modules/brotli": {
1337
+ "version": "1.3.3",
1338
+ "resolved": "https://registry.npmjs.org/brotli/-/brotli-1.3.3.tgz",
1339
+ "integrity": "sha512-oTKjJdShmDuGW94SyyaoQvAjf30dZaHnjJ8uAF+u2/vGJkJbJPJAT1gDiOJP5v1Zb6f9KEyW/1HpuaWIXtGHPg==",
1340
+ "dependencies": {
1341
+ "base64-js": "^1.1.2"
1342
+ }
1343
+ },
1344
  "node_modules/browserslist": {
1345
  "version": "4.21.5",
1346
  "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.21.5.tgz",
 
2269
  "node": ">=8"
2270
  }
2271
  },
2272
+ "node_modules/hash-wasm": {
2273
+ "version": "4.9.0",
2274
+ "resolved": "https://registry.npmjs.org/hash-wasm/-/hash-wasm-4.9.0.tgz",
2275
+ "integrity": "sha512-7SW7ejyfnRxuOc7ptQHSf4LDoZaWOivfzqw+5rpcQku0nHfmicPKE51ra9BiRLAmT8+gGLestr1XroUkqdjL6w=="
2276
+ },
2277
  "node_modules/highlight.js": {
2278
  "version": "11.7.0",
2279
  "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.7.0.tgz",
 
2339
  "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
2340
  "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
2341
  },
2342
+ "node_modules/int53": {
2343
+ "version": "0.2.4",
2344
+ "resolved": "https://registry.npmjs.org/int53/-/int53-0.2.4.tgz",
2345
+ "integrity": "sha512-a5jlKftS7HUOhkUyYD7j2sJ/ZnvWiNlZS1ldR+g1ifQ+/UuZXIE+YTc/lK1qGj/GwAU5F8Z0e1eVq2t1J5Ob2g=="
2346
+ },
2347
  "node_modules/ip": {
2348
  "version": "2.0.0",
2349
  "resolved": "https://registry.npmjs.org/ip/-/ip-2.0.0.tgz",
 
2595
  "node": ">=10"
2596
  }
2597
  },
2598
+ "node_modules/lzo": {
2599
+ "version": "0.4.11",
2600
+ "resolved": "https://registry.npmjs.org/lzo/-/lzo-0.4.11.tgz",
2601
+ "integrity": "sha512-apQHNoW2Alg72FMqaC/7pn03I7umdgSVFt2KRkCXXils4Z9u3QBh1uOtl2O5WmZIDLd9g6Lu4lIdOLmiSTFVCQ==",
2602
+ "hasInstallScript": true,
2603
+ "optional": true,
2604
+ "dependencies": {
2605
+ "bindings": "~1.2.1"
2606
+ }
2607
+ },
2608
  "node_modules/magic-string": {
2609
  "version": "0.30.0",
2610
  "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.0.tgz",
 
2826
  "integrity": "sha512-Tj+HTDSJJKaZnfiuw+iaF9skdPpTo2GtEly5JHnWV/hfv2Qj/9RKsGISQtLh2ox3l5EAGw487hnBee0sIJ6v2g==",
2827
  "dev": true
2828
  },
2829
+ "node_modules/node-int64": {
2830
+ "version": "0.4.0",
2831
+ "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz",
2832
+ "integrity": "sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw=="
2833
+ },
2834
  "node_modules/node-releases": {
2835
  "version": "2.0.10",
2836
  "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.10.tgz",
 
2880
  "node": ">= 6"
2881
  }
2882
  },
2883
+ "node_modules/object-stream": {
2884
+ "version": "0.0.1",
2885
+ "resolved": "https://registry.npmjs.org/object-stream/-/object-stream-0.0.1.tgz",
2886
+ "integrity": "sha512-+NPJnRvX9RDMRY9mOWOo/NDppBjbZhXirNNSu2IBnuNboClC9h1ZGHXgHBLDbJMHsxeJDq922aVmG5xs24a/cA==",
2887
+ "engines": {
2888
+ "node": ">=0.10"
2889
+ }
2890
+ },
2891
  "node_modules/once": {
2892
  "version": "1.4.0",
2893
  "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
 
2970
  "node": ">=6"
2971
  }
2972
  },
2973
+ "node_modules/parquetjs": {
2974
+ "version": "0.11.2",
2975
+ "resolved": "https://registry.npmjs.org/parquetjs/-/parquetjs-0.11.2.tgz",
2976
+ "integrity": "sha512-Y6FOc3Oi2AxY4TzJPz7fhICCR8tQNL3p+2xGQoUAMbmlJBR7+JJmMrwuyMjIpDiM7G8Wj/8oqOH4UDUmu4I5ZA==",
2977
+ "dependencies": {
2978
+ "brotli": "^1.3.0",
2979
+ "bson": "^1.0.4",
2980
+ "int53": "^0.2.4",
2981
+ "object-stream": "0.0.1",
2982
+ "snappyjs": "^0.6.0",
2983
+ "thrift": "^0.11.0",
2984
+ "varint": "^5.0.0"
2985
+ },
2986
+ "engines": {
2987
+ "node": ">=7.6"
2988
+ },
2989
+ "optionalDependencies": {
2990
+ "lzo": "^0.4.0"
2991
+ }
2992
+ },
2993
+ "node_modules/parquetjs/node_modules/bson": {
2994
+ "version": "1.1.6",
2995
+ "resolved": "https://registry.npmjs.org/bson/-/bson-1.1.6.tgz",
2996
+ "integrity": "sha512-EvVNVeGo4tHxwi8L6bPj3y3itEvStdwvvlojVxxbyYfoaxJ6keLgrTuKdyfEAszFK+H3olzBuafE0yoh0D1gdg==",
2997
+ "engines": {
2998
+ "node": ">=0.6.19"
2999
+ }
3000
+ },
3001
  "node_modules/path-exists": {
3002
  "version": "4.0.0",
3003
  "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
 
3323
  "node": ">=6"
3324
  }
3325
  },
3326
+ "node_modules/q": {
3327
+ "version": "1.5.1",
3328
+ "resolved": "https://registry.npmjs.org/q/-/q-1.5.1.tgz",
3329
+ "integrity": "sha512-kV/CThkXo6xyFEZUugw/+pIOywXcDbFYgSct5cT3gqlbkBE1SJdwy6UQoZvodiWF/ckQLZyDE/Bu1M6gVu5lVw==",
3330
+ "engines": {
3331
+ "node": ">=0.6.0",
3332
+ "teleport": ">=0.2.0"
3333
+ }
3334
+ },
3335
  "node_modules/queue-microtask": {
3336
  "version": "1.2.3",
3337
  "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
 
3608
  "npm": ">= 3.0.0"
3609
  }
3610
  },
3611
+ "node_modules/snappyjs": {
3612
+ "version": "0.6.1",
3613
+ "resolved": "https://registry.npmjs.org/snappyjs/-/snappyjs-0.6.1.tgz",
3614
+ "integrity": "sha512-YIK6I2lsH072UE0aOFxxY1dPDCS43I5ktqHpeAsuLNYWkE5pGxRGWfDM4/vSUfNzXjC1Ivzt3qx31PCLmc9yqg=="
3615
+ },
3616
  "node_modules/socks": {
3617
  "version": "2.7.1",
3618
  "resolved": "https://registry.npmjs.org/socks/-/socks-2.7.1.tgz",
 
3982
  "node": ">=0.8"
3983
  }
3984
  },
3985
+ "node_modules/thrift": {
3986
+ "version": "0.11.0",
3987
+ "resolved": "https://registry.npmjs.org/thrift/-/thrift-0.11.0.tgz",
3988
+ "integrity": "sha512-UpsBhOC45a45TpeHOXE4wwYwL8uD2apbHTbtBvkwtUU4dNwCjC7DpQTjw2Q6eIdfNtw+dKthdwq94uLXTJPfFw==",
3989
+ "dependencies": {
3990
+ "node-int64": "^0.4.0",
3991
+ "q": "^1.5.0",
3992
+ "ws": ">= 2.2.3"
3993
+ },
3994
+ "engines": {
3995
+ "node": ">= 4.1.0"
3996
+ }
3997
+ },
3998
  "node_modules/tiny-glob": {
3999
  "version": "0.2.9",
4000
  "resolved": "https://registry.npmjs.org/tiny-glob/-/tiny-glob-0.2.9.tgz",
 
4206
  "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
4207
  "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
4208
  },
4209
+ "node_modules/varint": {
4210
+ "version": "5.0.2",
4211
+ "resolved": "https://registry.npmjs.org/varint/-/varint-5.0.2.tgz",
4212
+ "integrity": "sha512-lKxKYG6H03yCZUpAGOPOsMcGxd1RHCu1iKvEHYDPmTyq2HueGhD73ssNBqqQWfvYs04G9iUFRvmAVLW20Jw6ow=="
4213
+ },
4214
  "node_modules/vite": {
4215
  "version": "4.2.1",
4216
  "resolved": "https://registry.npmjs.org/vite/-/vite-4.2.1.tgz",
 
4338
  "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
4339
  "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="
4340
  },
4341
+ "node_modules/ws": {
4342
+ "version": "8.13.0",
4343
+ "resolved": "https://registry.npmjs.org/ws/-/ws-8.13.0.tgz",
4344
+ "integrity": "sha512-x9vcZYTrFPC7aSIbj7sRCYo7L/Xb8Iy+pW0ng0wt2vCJv7M9HOMy0UoN3rr+IFC7hb7vXoqS+P9ktyLLLhO+LA==",
4345
+ "engines": {
4346
+ "node": ">=10.0.0"
4347
+ },
4348
+ "peerDependencies": {
4349
+ "bufferutil": "^4.0.1",
4350
+ "utf-8-validate": ">=5.0.2"
4351
+ },
4352
+ "peerDependenciesMeta": {
4353
+ "bufferutil": {
4354
+ "optional": true
4355
+ },
4356
+ "utf-8-validate": {
4357
+ "optional": true
4358
+ }
4359
+ }
4360
+ },
4361
  "node_modules/yallist": {
4362
  "version": "4.0.0",
4363
  "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
package.json CHANGED
@@ -17,6 +17,7 @@
17
  "@sveltejs/kit": "^1.15.10",
18
  "@tailwindcss/typography": "^0.5.9",
19
  "@types/marked": "^4.0.8",
 
20
  "@typescript-eslint/eslint-plugin": "^5.45.0",
21
  "@typescript-eslint/parser": "^5.45.0",
22
  "eslint": "^8.28.0",
@@ -35,6 +36,7 @@
35
  "type": "module",
36
  "dependencies": {
37
  "@huggingface/inference": "^2.2.0",
 
38
  "autoprefixer": "^10.4.14",
39
  "date-fns": "^2.29.3",
40
  "dotenv": "^16.0.3",
@@ -42,6 +44,7 @@
42
  "marked": "^4.3.0",
43
  "mongodb": "^5.3.0",
44
  "nanoid": "^4.0.2",
 
45
  "postcss": "^8.4.21",
46
  "tailwind-scrollbar": "^3.0.0",
47
  "tailwindcss": "^3.3.1",
 
17
  "@sveltejs/kit": "^1.15.10",
18
  "@tailwindcss/typography": "^0.5.9",
19
  "@types/marked": "^4.0.8",
20
+ "@types/parquetjs": "^0.10.3",
21
  "@typescript-eslint/eslint-plugin": "^5.45.0",
22
  "@typescript-eslint/parser": "^5.45.0",
23
  "eslint": "^8.28.0",
 
36
  "type": "module",
37
  "dependencies": {
38
  "@huggingface/inference": "^2.2.0",
39
+ "@huggingface/hub": "^0.5.1",
40
  "autoprefixer": "^10.4.14",
41
  "date-fns": "^2.29.3",
42
  "dotenv": "^16.0.3",
 
44
  "marked": "^4.3.0",
45
  "mongodb": "^5.3.0",
46
  "nanoid": "^4.0.2",
47
+ "parquetjs": "^0.11.2",
48
  "postcss": "^8.4.21",
49
  "tailwind-scrollbar": "^3.0.0",
50
  "tailwindcss": "^3.3.1",
src/hooks.server.ts CHANGED
@@ -14,7 +14,11 @@ export const handle: Handle = async ({ event, resolve }) => {
14
 
15
  event.locals.sessionId = token || crypto.randomUUID();
16
 
17
- if (event.request.method === "POST" && !event.url.pathname.startsWith(`${base}/settings`)) {
 
 
 
 
18
  const hasAcceptedEthicsModal = await collections.settings.countDocuments({
19
  sessionId: event.locals.sessionId,
20
  ethicsModalAcceptedAt: { $exists: true },
 
14
 
15
  event.locals.sessionId = token || crypto.randomUUID();
16
 
17
+ if (
18
+ event.request.method === "POST" &&
19
+ !event.url.pathname.startsWith(`${base}/settings`) &&
20
+ !event.url.pathname.startsWith(`${base}/admin`)
21
+ ) {
22
  const hasAcceptedEthicsModal = await collections.settings.countDocuments({
23
  sessionId: event.locals.sessionId,
24
  ethicsModalAcceptedAt: { $exists: true },
src/routes/admin/export/+server.ts ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ PARQUET_EXPORT_DATASET,
3
+ PARQUET_EXPORT_HF_TOKEN,
4
+ PARQUET_EXPORT_SECRET,
5
+ } from "$env/static/private";
6
+ import { collections } from "$lib/server/database.js";
7
+ import type { Message } from "$lib/types/Message.js";
8
+ import { error } from "@sveltejs/kit";
9
+ import { pathToFileURL } from "node:url";
10
+ import { unlink, writeFile } from "node:fs/promises";
11
+ import { uploadFile } from "@huggingface/hub";
12
+ import parquet from "parquetjs";
13
+ import { z } from "zod";
14
+
15
+ // Triger like this:
16
+ // curl -X POST "http://localhost:5173/chat/admin/export" -H "Authorization: Bearer <PARQUET_EXPORT_SECRET>" -H "Content-Type: application/json" -d '{"model": "OpenAssistant/oasst-sft-6-llama-30b-xor"}'
17
+
18
+ export async function POST({ request }) {
19
+ if (!PARQUET_EXPORT_SECRET || !PARQUET_EXPORT_DATASET || !PARQUET_EXPORT_HF_TOKEN) {
20
+ throw error(500, "Parquet export is not configured.");
21
+ }
22
+
23
+ if (request.headers.get("Authorization") !== `Bearer ${PARQUET_EXPORT_SECRET}`) {
24
+ throw error(403);
25
+ }
26
+
27
+ const { model } = z
28
+ .object({
29
+ model: z.string(),
30
+ })
31
+ .parse(await request.json());
32
+
33
+ const schema = new parquet.ParquetSchema({
34
+ title: { type: "UTF8" },
35
+ created_at: { type: "TIMESTAMP_MILLIS" },
36
+ updated_at: { type: "TIMESTAMP_MILLIS" },
37
+ messages: { repeated: true, fields: { from: { type: "UTF8" }, content: { type: "UTF8" } } },
38
+ });
39
+
40
+ const fileName = `/tmp/conversations-${new Date().toJSON().slice(0, 10)}-${Date.now()}.parquet`;
41
+
42
+ const writer = await parquet.ParquetWriter.openFile(schema, fileName);
43
+
44
+ let count = 0;
45
+ console.log("Exporting conversations for model", model);
46
+
47
+ for await (const conversation of collections.settings.aggregate<{
48
+ title: string;
49
+ created_at: Date;
50
+ updated_at: Date;
51
+ messages: Message[];
52
+ }>([
53
+ { $match: { shareConversationsWithModelAuthors: true } },
54
+ {
55
+ $lookup: {
56
+ from: "conversations",
57
+ localField: "sessionId",
58
+ foreignField: "sessionId",
59
+ as: "conversations",
60
+ pipeline: [{ $match: { model } }],
61
+ },
62
+ },
63
+ { $unwind: "$conversations" },
64
+ {
65
+ $project: {
66
+ title: "$conversations.title",
67
+ created_at: "$conversations.createdAt",
68
+ updated_at: "$conversations.updatedAt",
69
+ messages: "$conversations.messages",
70
+ },
71
+ },
72
+ ])) {
73
+ await writer.appendRow({
74
+ title: conversation.title,
75
+ created_at: conversation.created_at,
76
+ updated_at: conversation.updated_at,
77
+ messages: conversation.messages.map((message: Message) => ({
78
+ from: message.from,
79
+ content: message.content,
80
+ })),
81
+ });
82
+ ++count;
83
+
84
+ if (count % 1_000 === 0) {
85
+ console.log("Exported", count, "conversations");
86
+ }
87
+ }
88
+
89
+ await writer.close();
90
+
91
+ console.log("Uploading", fileName, "to Hugging Face Hub");
92
+
93
+ await uploadFile({
94
+ file: pathToFileURL(fileName),
95
+ credentials: { accessToken: PARQUET_EXPORT_HF_TOKEN },
96
+ repo: {
97
+ type: "dataset",
98
+ name: PARQUET_EXPORT_DATASET,
99
+ },
100
+ });
101
+
102
+ console.log("Upload done");
103
+
104
+ await unlink(fileName);
105
+
106
+ return new Response();
107
+ }