davanstrien HF staff commited on
Commit
739cf2e
0 Parent(s):
Files changed (5) hide show
  1. .gitattributes +35 -0
  2. README.md +17 -0
  3. app.py +149 -0
  4. requirements.in +4 -0
  5. requirements.txt +438 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Corpus Creator
3
+ emoji: 🦀
4
+ colorFrom: pink
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 4.36.1
8
+ app_file: app.py
9
+ pinned: false
10
+ hf_oauth_scopes:
11
+ - read-repos
12
+ - write-repos
13
+ - manage-repos
14
+ hf_oauth: true
15
+ ---
16
+
17
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from functools import lru_cache
3
+ from pathlib import Path
4
+
5
+ import gradio as gr
6
+ from datasets import Dataset
7
+ from gradio_log import Log
8
+ from huggingface_hub import DatasetCard
9
+ from llama_index.core import SimpleDirectoryReader
10
+ from llama_index.core.node_parser import SentenceSplitter
11
+ from llama_index.core.schema import MetadataMode
12
+ from tqdm.auto import tqdm
13
+
14
+ log_file = "logs.txt"
15
+ Path(log_file).touch(exist_ok=True)
16
+
17
+ logging.basicConfig(filename="logs.txt", level=logging.INFO)
18
+ logging.getLogger().addHandler(logging.FileHandler(log_file))
19
+
20
+
21
+ def load_corpus(files, chunk_size=256, chunk_overlap=0, verbose=True):
22
+ if verbose:
23
+ gr.Info("Loading files...")
24
+ reader = SimpleDirectoryReader(input_files=files)
25
+ docs = reader.load_data()
26
+ if verbose:
27
+ print(f"Loaded {len(docs)} docs")
28
+
29
+ parser = SentenceSplitter.from_defaults(
30
+ chunk_size=chunk_size, chunk_overlap=chunk_overlap
31
+ )
32
+ nodes = parser.get_nodes_from_documents(docs, show_progress=verbose)
33
+
34
+ if verbose:
35
+ print(f"Parsed {len(nodes)} nodes")
36
+
37
+ docs = {
38
+ node.node_id: node.get_content(metadata_mode=MetadataMode.NONE)
39
+ for node in tqdm(nodes)
40
+ }
41
+ # remove empty docs
42
+ docs = {k: v for k, v in docs.items() if v}
43
+ return docs
44
+
45
+
46
+ def upload_file(
47
+ files,
48
+ chunk_size: int = 256,
49
+ chunk_overlap: int = 0,
50
+ hub_id: str = None,
51
+ private: bool = False,
52
+ oauth_token: gr.OAuthToken = None,
53
+ ):
54
+ print("loading files")
55
+ file_paths = [file.name for file in files]
56
+ print("parsing into sentences")
57
+ corpus = load_corpus(file_paths, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
58
+ print("Creating dataset")
59
+ dataset = Dataset.from_dict({"ids": corpus.keys(), "texts": corpus.values()})
60
+ message = f"Dataset created has: \n - {len(dataset)} rows"
61
+ if hub_id:
62
+ if oauth_token is not None:
63
+ gr.Info("Uploading to Hugging Face Hub")
64
+ dataset.push_to_hub(hub_id, token=oauth_token.token, private=private)
65
+ update_dataset_card(hub_id, oauth_token.token, chunk_size, chunk_overlap)
66
+ message += f"\n\nUploaded to [{hub_id}](https://huggingface.co/{hub_id}"
67
+ else:
68
+ raise gr.Error("Please login to Hugging Face Hub to push to hub")
69
+
70
+ return dataset.to_pandas(), message
71
+
72
+
73
+ def update_dataset_card(
74
+ hub_id,
75
+ token,
76
+ chunk_size,
77
+ chunk_overlap,
78
+ ):
79
+ card = DatasetCard.load(hub_id, token=token)
80
+ if not card.text:
81
+ # add template description to card text
82
+ card.text += f"""This dataset was created using [Corpus Creator](https://huggingface.co/spaces/davanstrien/corpus-creator). This dataset was created by parsing a corpus of text files into chunks of sentences using Llama Index.
83
+ This processing was done with a chunk size of {chunk_size} and a chunk overlap of {chunk_overlap}."""
84
+ tags = card.data.get("tags", [])
85
+ tags.append("corpus-creator")
86
+ card.data["tags"] = tags
87
+ card.push_to_hub(hub_id, token=token)
88
+
89
+
90
+ description = """
91
+ Corpus Creator is a tool designed to help you easily convert a collection of text files into a dataset suitable for various natural language processing (NLP) tasks.
92
+ In particular the app is focused on splitting texts into chunks of a specified size and overlap. This can be useful for preparing data for synthetic data generation, pipelines or annotation tasks.
93
+ The resulting text chunks are stored in a dataset that can be previewed and uploaded to the Hugging Face Hub for easy sharing and access by the community.
94
+ The chunking is done using `Llama-index`'s [`SentenceSplitter`](https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/modules/?h=sentencesplitter#sentencesplitter) classes.
95
+
96
+ ### Usage:
97
+ - Login: Start by logging in to your Hugging Face account using the provided login button.
98
+ - Set Parameters: Customize the chunk size and overlap according to your requirements.
99
+ - Upload Files: Use the upload button to load file(s) for processing.
100
+ - Preview Dataset: View the created dataset in a dataframe format before uploading it to the Hugging Face Hub.
101
+ - Upload to Hub: Optionally, specify the Hub ID and choose whether to make the dataset private before pushing it to the Hugging Face Hub."""
102
+
103
+ with gr.Blocks() as demo:
104
+ gr.HTML(
105
+ """<h1 style='text-align: center;'> Corpus Creator</h1>
106
+ <center><i> &#128193; From random files to a Hugging Face dataset in a single step &#128193; </i></center>"""
107
+ )
108
+ gr.Markdown(description)
109
+ with gr.Row():
110
+ gr.LoginButton()
111
+ with gr.Column():
112
+ gr.Markdown(
113
+ "To upload to the Hub, add an ID for where you want to push the dataset"
114
+ )
115
+ hub_id = gr.Textbox(value=None, label="Hub ID")
116
+ with gr.Row():
117
+ chunk_size = gr.Number(
118
+ 256,
119
+ label="Chunk size (size to split text into)",
120
+ minimum=10,
121
+ maximum=4096,
122
+ step=1,
123
+ )
124
+ chunk_overlap = gr.Number(
125
+ 0,
126
+ label="Chunk overlap (overlap size between chunks)",
127
+ minimum=0,
128
+ maximum=4096,
129
+ step=1,
130
+ )
131
+ private = gr.Checkbox(False, label="Upload dataset to a private repo?")
132
+ upload_button = gr.UploadButton(
133
+ "Load files to corpus",
134
+ file_types=[
135
+ "text",
136
+ ],
137
+ file_count="multiple",
138
+ )
139
+ summary = gr.Markdown()
140
+
141
+ with gr.Accordion("detailed logs", open=False):
142
+ Log(log_file, dark=True, xterm_font_size=12)
143
+ corpus_preview_df = gr.DataFrame()
144
+ upload_button.upload(
145
+ upload_file,
146
+ inputs=[upload_button, chunk_size, chunk_overlap, hub_id, private],
147
+ outputs=[corpus_preview_df, summary],
148
+ )
149
+ demo.launch(debug=True)
requirements.in ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio[oauth]
2
+ llama_index
3
+ gradio_log
4
+ datasets
requirements.txt ADDED
@@ -0,0 +1,438 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile requirements.in -o requirements.txt
3
+ aiofiles==23.2.1
4
+ # via gradio
5
+ aiohttp==3.9.5
6
+ # via
7
+ # datasets
8
+ # fsspec
9
+ # llama-index-core
10
+ # llama-index-legacy
11
+ aiosignal==1.3.1
12
+ # via aiohttp
13
+ altair==5.3.0
14
+ # via gradio
15
+ annotated-types==0.7.0
16
+ # via pydantic
17
+ anyio==4.4.0
18
+ # via
19
+ # httpx
20
+ # openai
21
+ # starlette
22
+ # watchfiles
23
+ attrs==23.2.0
24
+ # via
25
+ # aiohttp
26
+ # jsonschema
27
+ # referencing
28
+ authlib==1.3.1
29
+ # via gradio
30
+ beautifulsoup4==4.12.3
31
+ # via llama-index-readers-file
32
+ certifi==2024.6.2
33
+ # via
34
+ # httpcore
35
+ # httpx
36
+ # requests
37
+ cffi==1.16.0
38
+ # via cryptography
39
+ charset-normalizer==3.3.2
40
+ # via requests
41
+ click==8.1.7
42
+ # via
43
+ # nltk
44
+ # typer
45
+ # uvicorn
46
+ contourpy==1.2.1
47
+ # via matplotlib
48
+ cryptography==42.0.8
49
+ # via authlib
50
+ cycler==0.12.1
51
+ # via matplotlib
52
+ dataclasses-json==0.6.7
53
+ # via
54
+ # llama-index-core
55
+ # llama-index-legacy
56
+ datasets==2.20.0
57
+ # via -r requirements.in
58
+ deprecated==1.2.14
59
+ # via
60
+ # llama-index-core
61
+ # llama-index-legacy
62
+ dill==0.3.8
63
+ # via
64
+ # datasets
65
+ # multiprocess
66
+ dirtyjson==1.0.8
67
+ # via
68
+ # llama-index-core
69
+ # llama-index-legacy
70
+ distro==1.9.0
71
+ # via openai
72
+ dnspython==2.6.1
73
+ # via email-validator
74
+ email-validator==2.1.2
75
+ # via fastapi
76
+ fastapi==0.111.0
77
+ # via gradio
78
+ fastapi-cli==0.0.4
79
+ # via fastapi
80
+ ffmpy==0.3.2
81
+ # via gradio
82
+ filelock==3.15.1
83
+ # via
84
+ # datasets
85
+ # huggingface-hub
86
+ fonttools==4.53.0
87
+ # via matplotlib
88
+ frozenlist==1.4.1
89
+ # via
90
+ # aiohttp
91
+ # aiosignal
92
+ fsspec==2024.5.0
93
+ # via
94
+ # datasets
95
+ # gradio-client
96
+ # huggingface-hub
97
+ # llama-index-core
98
+ # llama-index-legacy
99
+ gradio==4.36.1
100
+ # via
101
+ # -r requirements.in
102
+ # gradio-log
103
+ gradio-client==1.0.1
104
+ # via gradio
105
+ gradio-log==0.0.4
106
+ # via -r requirements.in
107
+ greenlet==3.0.3
108
+ # via sqlalchemy
109
+ h11==0.14.0
110
+ # via
111
+ # httpcore
112
+ # uvicorn
113
+ httpcore==1.0.5
114
+ # via httpx
115
+ httptools==0.6.1
116
+ # via uvicorn
117
+ httpx==0.27.0
118
+ # via
119
+ # fastapi
120
+ # gradio
121
+ # gradio-client
122
+ # llama-index-core
123
+ # llama-index-legacy
124
+ # llamaindex-py-client
125
+ # openai
126
+ huggingface-hub==0.23.4
127
+ # via
128
+ # datasets
129
+ # gradio
130
+ # gradio-client
131
+ idna==3.7
132
+ # via
133
+ # anyio
134
+ # email-validator
135
+ # httpx
136
+ # requests
137
+ # yarl
138
+ importlib-resources==6.4.0
139
+ # via gradio
140
+ itsdangerous==2.2.0
141
+ # via gradio
142
+ jinja2==3.1.4
143
+ # via
144
+ # altair
145
+ # fastapi
146
+ # gradio
147
+ joblib==1.4.2
148
+ # via nltk
149
+ jsonschema==4.22.0
150
+ # via altair
151
+ jsonschema-specifications==2023.12.1
152
+ # via jsonschema
153
+ kiwisolver==1.4.5
154
+ # via matplotlib
155
+ llama-index==0.10.45
156
+ # via -r requirements.in
157
+ llama-index-agent-openai==0.2.7
158
+ # via
159
+ # llama-index
160
+ # llama-index-program-openai
161
+ llama-index-cli==0.1.12
162
+ # via llama-index
163
+ llama-index-core==0.10.44
164
+ # via
165
+ # llama-index
166
+ # llama-index-agent-openai
167
+ # llama-index-cli
168
+ # llama-index-embeddings-openai
169
+ # llama-index-indices-managed-llama-cloud
170
+ # llama-index-llms-openai
171
+ # llama-index-multi-modal-llms-openai
172
+ # llama-index-program-openai
173
+ # llama-index-question-gen-openai
174
+ # llama-index-readers-file
175
+ # llama-index-readers-llama-parse
176
+ # llama-parse
177
+ llama-index-embeddings-openai==0.1.10
178
+ # via
179
+ # llama-index
180
+ # llama-index-cli
181
+ llama-index-indices-managed-llama-cloud==0.1.6
182
+ # via llama-index
183
+ llama-index-legacy==0.9.48
184
+ # via llama-index
185
+ llama-index-llms-openai==0.1.22
186
+ # via
187
+ # llama-index
188
+ # llama-index-agent-openai
189
+ # llama-index-cli
190
+ # llama-index-multi-modal-llms-openai
191
+ # llama-index-program-openai
192
+ # llama-index-question-gen-openai
193
+ llama-index-multi-modal-llms-openai==0.1.6
194
+ # via llama-index
195
+ llama-index-program-openai==0.1.6
196
+ # via
197
+ # llama-index
198
+ # llama-index-question-gen-openai
199
+ llama-index-question-gen-openai==0.1.3
200
+ # via llama-index
201
+ llama-index-readers-file==0.1.25
202
+ # via llama-index
203
+ llama-index-readers-llama-parse==0.1.4
204
+ # via llama-index
205
+ llama-parse==0.4.4
206
+ # via llama-index-readers-llama-parse
207
+ llamaindex-py-client==0.1.19
208
+ # via
209
+ # llama-index-core
210
+ # llama-index-indices-managed-llama-cloud
211
+ markdown-it-py==3.0.0
212
+ # via rich
213
+ markupsafe==2.1.5
214
+ # via
215
+ # gradio
216
+ # jinja2
217
+ marshmallow==3.21.3
218
+ # via dataclasses-json
219
+ matplotlib==3.9.0
220
+ # via gradio
221
+ mdurl==0.1.2
222
+ # via markdown-it-py
223
+ multidict==6.0.5
224
+ # via
225
+ # aiohttp
226
+ # yarl
227
+ multiprocess==0.70.16
228
+ # via datasets
229
+ mypy-extensions==1.0.0
230
+ # via typing-inspect
231
+ nest-asyncio==1.6.0
232
+ # via
233
+ # llama-index-core
234
+ # llama-index-legacy
235
+ networkx==3.3
236
+ # via
237
+ # llama-index-core
238
+ # llama-index-legacy
239
+ nltk==3.8.1
240
+ # via
241
+ # llama-index-core
242
+ # llama-index-legacy
243
+ numpy==2.0.0
244
+ # via
245
+ # altair
246
+ # contourpy
247
+ # datasets
248
+ # gradio
249
+ # llama-index-core
250
+ # llama-index-legacy
251
+ # matplotlib
252
+ # pandas
253
+ # pyarrow
254
+ openai==1.34.0
255
+ # via
256
+ # llama-index-agent-openai
257
+ # llama-index-core
258
+ # llama-index-legacy
259
+ orjson==3.10.5
260
+ # via
261
+ # fastapi
262
+ # gradio
263
+ packaging==24.1
264
+ # via
265
+ # altair
266
+ # datasets
267
+ # gradio
268
+ # gradio-client
269
+ # huggingface-hub
270
+ # marshmallow
271
+ # matplotlib
272
+ pandas==2.2.2
273
+ # via
274
+ # altair
275
+ # datasets
276
+ # gradio
277
+ # llama-index-core
278
+ # llama-index-legacy
279
+ pillow==10.3.0
280
+ # via
281
+ # gradio
282
+ # llama-index-core
283
+ # matplotlib
284
+ pyarrow==16.1.0
285
+ # via datasets
286
+ pyarrow-hotfix==0.6
287
+ # via datasets
288
+ pycparser==2.22
289
+ # via cffi
290
+ pydantic==2.7.4
291
+ # via
292
+ # fastapi
293
+ # gradio
294
+ # llamaindex-py-client
295
+ # openai
296
+ pydantic-core==2.18.4
297
+ # via pydantic
298
+ pydub==0.25.1
299
+ # via gradio
300
+ pygments==2.18.0
301
+ # via rich
302
+ pyparsing==3.1.2
303
+ # via matplotlib
304
+ pypdf==4.2.0
305
+ # via llama-index-readers-file
306
+ python-dateutil==2.9.0.post0
307
+ # via
308
+ # matplotlib
309
+ # pandas
310
+ python-dotenv==1.0.1
311
+ # via uvicorn
312
+ python-multipart==0.0.9
313
+ # via
314
+ # fastapi
315
+ # gradio
316
+ pytz==2024.1
317
+ # via pandas
318
+ pyyaml==6.0.1
319
+ # via
320
+ # datasets
321
+ # gradio
322
+ # huggingface-hub
323
+ # llama-index-core
324
+ # uvicorn
325
+ referencing==0.35.1
326
+ # via
327
+ # jsonschema
328
+ # jsonschema-specifications
329
+ regex==2024.5.15
330
+ # via
331
+ # nltk
332
+ # tiktoken
333
+ requests==2.32.3
334
+ # via
335
+ # datasets
336
+ # huggingface-hub
337
+ # llama-index-core
338
+ # llama-index-legacy
339
+ # tiktoken
340
+ rich==13.7.1
341
+ # via typer
342
+ rpds-py==0.18.1
343
+ # via
344
+ # jsonschema
345
+ # referencing
346
+ ruff==0.4.9
347
+ # via gradio
348
+ semantic-version==2.10.0
349
+ # via gradio
350
+ shellingham==1.5.4
351
+ # via typer
352
+ six==1.16.0
353
+ # via python-dateutil
354
+ sniffio==1.3.1
355
+ # via
356
+ # anyio
357
+ # httpx
358
+ # openai
359
+ soupsieve==2.5
360
+ # via beautifulsoup4
361
+ sqlalchemy==2.0.30
362
+ # via
363
+ # llama-index-core
364
+ # llama-index-legacy
365
+ starlette==0.37.2
366
+ # via fastapi
367
+ striprtf==0.0.26
368
+ # via llama-index-readers-file
369
+ tenacity==8.4.1
370
+ # via
371
+ # llama-index-core
372
+ # llama-index-legacy
373
+ tiktoken==0.7.0
374
+ # via
375
+ # llama-index-core
376
+ # llama-index-legacy
377
+ tomlkit==0.12.0
378
+ # via gradio
379
+ toolz==0.12.1
380
+ # via altair
381
+ tqdm==4.66.4
382
+ # via
383
+ # datasets
384
+ # huggingface-hub
385
+ # llama-index-core
386
+ # nltk
387
+ # openai
388
+ typer==0.12.3
389
+ # via
390
+ # fastapi-cli
391
+ # gradio
392
+ typing-extensions==4.12.2
393
+ # via
394
+ # fastapi
395
+ # gradio
396
+ # gradio-client
397
+ # huggingface-hub
398
+ # llama-index-core
399
+ # llama-index-legacy
400
+ # openai
401
+ # pydantic
402
+ # pydantic-core
403
+ # sqlalchemy
404
+ # typer
405
+ # typing-inspect
406
+ typing-inspect==0.9.0
407
+ # via
408
+ # dataclasses-json
409
+ # llama-index-core
410
+ # llama-index-legacy
411
+ tzdata==2024.1
412
+ # via pandas
413
+ ujson==5.10.0
414
+ # via fastapi
415
+ urllib3==2.2.2
416
+ # via
417
+ # gradio
418
+ # requests
419
+ uvicorn==0.30.1
420
+ # via
421
+ # fastapi
422
+ # gradio
423
+ uvloop==0.19.0
424
+ # via uvicorn
425
+ watchfiles==0.22.0
426
+ # via uvicorn
427
+ websockets==11.0.3
428
+ # via
429
+ # gradio-client
430
+ # uvicorn
431
+ wrapt==1.16.0
432
+ # via
433
+ # deprecated
434
+ # llama-index-core
435
+ xxhash==3.4.1
436
+ # via datasets
437
+ yarl==1.9.4
438
+ # via aiohttp