somewheresy commited on
Commit
21bee4f
1 Parent(s): a3225ba

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +0 -13
  2. app.log +0 -0
  3. app.py +275 -0
  4. requirements.txt +308 -0
README.md CHANGED
@@ -1,13 +0,0 @@
1
- ---
2
- title: Dataclysm
3
- emoji: 🐠
4
- colorFrom: purple
5
- colorTo: yellow
6
- sdk: streamlit
7
- sdk_version: 1.30.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.log ADDED
File without changes
app.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ from sklearn.manifold import TSNE
6
+ from datasets import load_dataset, Dataset
7
+ from sklearn.cluster import KMeans
8
+ import plotly.graph_objects as go
9
+ import time
10
+ import logging
11
+
12
+
13
+ # Additional libraries for querying
14
+ from FlagEmbedding import FlagModel
15
+
16
+ # Global variables and dataset loading
17
+ global dataset_name
18
+ dataset_name = 'somewheresystems/dataclysm-arxiv'
19
+ st.session_state.dataclysm_arxiv = load_dataset(dataset_name, split="train")
20
+ total_samples = len(st.session_state.dataclysm_arxiv)
21
+
22
+ logging.basicConfig(filename='app.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s', level=logging.INFO)
23
+ # Load the dataset once at the start
24
+ # Initialize the model for querying
25
+ model = FlagModel('BAAI/bge-small-en-v1.5', query_instruction_for_retrieval="Represent this sentence for searching relevant passages:", use_fp16=True)
26
+
27
+
28
+ def load_data(num_samples):
29
+ start_time = time.time()
30
+ dataset_name = 'somewheresystems/dataclysm-arxiv'
31
+ # Load the dataset
32
+ logging.info(f'Loading dataset...')
33
+ dataset = load_dataset(dataset_name)
34
+ total_samples = len(dataset['train'])
35
+
36
+ logging.info('Converting to pandas dataframe...')
37
+ # Convert the dataset to a pandas DataFrame
38
+ df = dataset['train'].to_pandas()
39
+
40
+ # Adjust num_samples if it's more than the total number of samples
41
+ num_samples = min(num_samples, total_samples)
42
+ st.sidebar.text(f'Number of samples: {num_samples} ({num_samples / total_samples:.2%} of total)')
43
+
44
+ # Randomly sample the dataframe
45
+ df = df.sample(n=num_samples)
46
+
47
+ # Assuming 'embeddings' column contains the embeddings
48
+ embeddings = df['title_embedding'].tolist()
49
+ print("embeddings length: " + str(len(embeddings)))
50
+
51
+ # Convert list of lists to numpy array
52
+ embeddings = np.array(embeddings, dtype=object)
53
+ end_time = time.time() # End timing
54
+ st.sidebar.text(f'Data loading completed in {end_time - start_time:.3f} seconds')
55
+ return df, embeddings
56
+
57
+ def perform_tsne(embeddings):
58
+ start_time = time.time()
59
+ logging.info('Performing t-SNE...')
60
+
61
+ n_samples = len(embeddings)
62
+ perplexity = min(30, n_samples - 1) if n_samples > 1 else 1
63
+
64
+ # Check if all embeddings have the same length
65
+ if len(set([len(embed) for embed in embeddings])) > 1:
66
+ raise ValueError("All embeddings should have the same length")
67
+
68
+ # Dimensionality Reduction with t-SNE
69
+ tsne = TSNE(n_components=3, perplexity=perplexity, n_iter=300)
70
+
71
+ # Create a placeholder for progress bar
72
+ progress_text = st.empty()
73
+ progress_text.text("t-SNE in progress...")
74
+
75
+ tsne_results = tsne.fit_transform(np.vstack(embeddings.tolist()))
76
+
77
+ # Update progress bar to indicate completion
78
+ progress_text.text(f"t-SNE completed. Processed {n_samples} samples with perplexity {perplexity}.")
79
+ end_time = time.time() # End timing
80
+ st.sidebar.text(f't-SNE completed in {end_time - start_time:.3f} seconds')
81
+ return tsne_results
82
+
83
+
84
+ def perform_clustering(df, tsne_results):
85
+ start_time = time.time()
86
+ # Perform KMeans clustering
87
+ logging.info('Performing k-means clustering...')
88
+ # Step 3: Visualization with Plotly
89
+ df['tsne-3d-one'] = tsne_results[:,0]
90
+ df['tsne-3d-two'] = tsne_results[:,1]
91
+ df['tsne-3d-three'] = tsne_results[:,2]
92
+
93
+ # Perform KMeans clustering
94
+ kmeans = KMeans(n_clusters=16) # Change the number of clusters as needed
95
+ df['cluster'] = kmeans.fit_predict(df[['tsne-3d-one', 'tsne-3d-two', 'tsne-3d-three']])
96
+ end_time = time.time() # End timing
97
+ st.sidebar.text(f'k-means clustering completed in {end_time - start_time:.3f} seconds')
98
+ return df
99
+
100
+ def main():
101
+ # Custom CSS
102
+ custom_css = """
103
+ <style>
104
+ /* Define the font */
105
+ @font-face {
106
+ font-family: 'F';
107
+ src: url('https://fonts.googleapis.com/css2?family=Martian+Mono&display=swap') format('truetype');
108
+ }
109
+ /* Apply the font to all elements */
110
+ * {
111
+ font-family: 'F', sans-serif !important;
112
+ color: #F8F8F8; /* Set the font color to F8F8F8 */
113
+ }
114
+ /* Add your CSS styles here */
115
+ h1 {
116
+ text-align: center;
117
+ }
118
+ h2,h3,h4 {
119
+ text-align: justify;
120
+ font-size: 8px
121
+ }
122
+ body {
123
+ text-align: justify;
124
+ }
125
+ .stSlider .css-1cpxqw2 {
126
+ background: #202020;
127
+ }
128
+ .stButton > button {
129
+ background-color: #202020;
130
+ width: 100%;
131
+ border: none;
132
+ padding: 10px 24px;
133
+ border-radius: 5px;
134
+ font-size: 16px;
135
+ font-weight: bold;
136
+ }
137
+ .reportview-container .main .block-container {
138
+ padding: 2rem;
139
+ background-color: #202020;
140
+ }
141
+ </style>
142
+ """
143
+
144
+ # Inject custom CSS with markdown
145
+ st.markdown(custom_css, unsafe_allow_html=True)
146
+ st.sidebar.markdown(
147
+ f'<img src="https://www.somewhere.systems/S2-white-logo.png" style="float: bottom-left; width: 32px; height: 32px; opacity: 1.0; animation: fadein 2s;">',
148
+ unsafe_allow_html=True
149
+ )
150
+ st.sidebar.title('Spatial Search Engine')
151
+
152
+ # Check if data needs to be loaded
153
+ if 'data_loaded' not in st.session_state or not st.session_state.data_loaded:
154
+ # User input for number of samples
155
+ num_samples = st.sidebar.slider('Select number of samples', 1000, total_samples, 1000)
156
+
157
+ if st.sidebar.button('Initialize'):
158
+ st.sidebar.text('Initializing data pipeline...')
159
+
160
+ # Define a function to reshape the embeddings and add FAISS index if it doesn't exist
161
+ def reshape_and_add_faiss_index(dataset, column_name):
162
+
163
+ # Ensure the shape of the embedding is (1000, 384) and not (1000, 1, 384)
164
+ # As each row in title_embedding is shaped like this: [[-0.08477783203125, -0.009719848632812, ...]]
165
+ # We need to flatten it to [-0.08477783203125, -0.009719848632812, ...]
166
+ print(f"Flattening {column_name} and adding FAISS index...")
167
+ # Flatten the embeddings
168
+ dataset[column_name] = dataset[column_name].apply(lambda x: np.array(x).flatten())
169
+ # Add the FAISS index
170
+ dataset = Dataset.from_pandas(dataset).add_faiss_index(column=column_name)
171
+ print(f"FAISS index for {column_name} added.")
172
+
173
+ return dataset
174
+
175
+
176
+
177
+ # Load data and perform t-SNE and clustering
178
+ df, embeddings = load_data(num_samples)
179
+
180
+ # Combine embeddings and df back into one df
181
+ # Convert embeddings to list of lists before assigning to df
182
+ embeddings_list = [embedding.flatten().tolist() for embedding in embeddings]
183
+ df['title_embedding'] = embeddings_list
184
+ # Print the first few rows of the dataframe to check
185
+ print(df.head())
186
+ # Add FAISS indices for 'title_embedding'
187
+ st.session_state.dataclysm_title_indexed = reshape_and_add_faiss_index(df, 'title_embedding')
188
+ tsne_results = perform_tsne(embeddings)
189
+ df = perform_clustering(df, tsne_results)
190
+ # Store results in session state
191
+ st.session_state.df = df
192
+ st.session_state.tsne_results = tsne_results
193
+ st.session_state.data_loaded = True
194
+
195
+ # Create custom hover text
196
+ df['hovertext'] = df.apply(
197
+ lambda row: f"<b>Title:</b> {row['title']}<br><b>arXiv ID:</b> {row['id']}<br><b>Key:</b> {row.name}", axis=1
198
+ )
199
+ st.sidebar.text("Datasets loaded, titles indexed.")
200
+
201
+ # Create the plot
202
+ fig = go.Figure(data=[go.Scatter3d(
203
+ x=df['tsne-3d-one'],
204
+ y=df['tsne-3d-two'],
205
+ z=df['tsne-3d-three'],
206
+ mode='markers',
207
+ hovertext=df['hovertext'],
208
+ hoverinfo='text',
209
+ marker=dict(
210
+ size=1,
211
+ color=df['cluster'],
212
+ colorscale='Viridis',
213
+ opacity=0.8
214
+ )
215
+ )])
216
+
217
+ fig.update_layout(
218
+ plot_bgcolor='#202020',
219
+ height=800,
220
+ margin=dict(l=0, r=0, b=0, t=0),
221
+ scene=dict(
222
+ xaxis=dict(showbackground=True, backgroundcolor="#000000"),
223
+ yaxis=dict(showbackground=True, backgroundcolor="#000000"),
224
+ zaxis=dict(showbackground=True, backgroundcolor="#000000"),
225
+ ),
226
+ scene_camera=dict(eye=dict(x=0.001, y=0.001, z=0.001))
227
+ )
228
+ st.session_state.fig = fig
229
+
230
+ # Display the plot if data is loaded
231
+ if 'data_loaded' in st.session_state and st.session_state.data_loaded:
232
+ st.plotly_chart(st.session_state.fig, use_container_width=True)
233
+
234
+
235
+ # Sidebar for detailed view
236
+ if 'df' in st.session_state:
237
+ # Sidebar for querying
238
+ with st.sidebar:
239
+ st.sidebar.markdown("### Query Embeddings")
240
+ query = st.text_input("Enter your query:")
241
+ if st.button("Search"):
242
+ # Define the model
243
+ print("Initializing model...")
244
+ model = FlagModel('BAAI/bge-small-en-v1.5',
245
+ query_instruction_for_retrieval="Represent this sentence for searching relevant passages:",
246
+ use_fp16=True)
247
+ print("Model initialized.")
248
+
249
+ query_embedding = model.encode([query])
250
+ # Retrieve examples by title similarity (or abstract, depending on your preference)
251
+ scores_title, retrieved_examples_title = st.session_state.dataclysm_title_indexed.get_nearest_examples('title_embedding', query_embedding, k=10)
252
+ df_query = pd.DataFrame(retrieved_examples_title)
253
+ df_query['proximity'] = scores_title
254
+ df_query = df_query.sort_values(by='proximity', ascending=True)
255
+ # Limit similarity score to 3 decimal points
256
+ df_query['proximity'] = df_query['proximity'].round(3)
257
+ # Fix the <a href link> to display properly
258
+ df_query['URL'] = df_query['id'].apply(lambda x: f'<a href="https://arxiv.org/abs/{x}" target="_blank">Link</a>')
259
+ st.sidebar.markdown(df_query[['title', 'proximity', 'id']].to_html(escape=False), unsafe_allow_html=True)
260
+ st.sidebar.markdown("# Detailed View")
261
+ selected_index = st.sidebar.selectbox("Select Key", st.session_state.df.id)
262
+
263
+ # Display metadata for the selected article
264
+ selected_row = st.session_state.df[st.session_state.df['id'] == selected_index].iloc[0]
265
+ st.markdown(f"### Title\n{selected_row['title']}", unsafe_allow_html=True)
266
+ st.markdown(f"### Abstract\n{selected_row['abstract']}", unsafe_allow_html=True)
267
+ st.markdown(f"[Read the full paper](https://arxiv.org/abs/{selected_row['id']})", unsafe_allow_html=True)
268
+ st.markdown(f"[Download PDF](https://arxiv.org/pdf/{selected_row['id']})", unsafe_allow_html=True)
269
+
270
+
271
+
272
+ if __name__ == "__main__":
273
+ main()
274
+
275
+
requirements.txt ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.25.0
2
+ aiofiles==23.2.1
3
+ aiohttp==3.9.1
4
+ aiosignal==1.3.1
5
+ altair==5.2.0
6
+ annotated-types==0.6.0
7
+ anyio==4.2.0
8
+ apache-beam==2.52.0
9
+ appdirs==1.4.4
10
+ appnope==0.1.3
11
+ asgiref==3.7.2
12
+ astor==0.8.1
13
+ asttokens==2.4.1
14
+ attrs==23.2.0
15
+ backoff==2.2.1
16
+ beautifulsoup4==4.12.2
17
+ bitsandbytes==0.42.0
18
+ blessed==1.20.0
19
+ blinker==1.7.0
20
+ boto==2.49.0
21
+ build==1.0.3
22
+ CacheControl==0.13.1
23
+ cachetools==5.3.2
24
+ certifi==2023.11.17
25
+
26
+ charset-normalizer==3.3.2
27
+ ci-info==0.3.0
28
+ cleo==2.1.0
29
+ click==8.1.7
30
+ cloudpickle==2.2.1
31
+ colorama==0.4.6
32
+ comm==0.2.0
33
+ configobj==5.0.8
34
+ configparser==6.0.0
35
+ contourpy==1.2.0
36
+ crashtest==0.4.1
37
+ crcmod==1.7
38
+ cryptography==41.0.7
39
+ cycler==0.12.1
40
+ dataclasses==0.6
41
+ dataclasses-json==0.6.3
42
+ datasets==2.14.7
43
+ debugpy==1.8.0
44
+ decorator==5.1.1
45
+ Deprecated==1.2.14
46
+ dill==0.3.7
47
+ diskcache==5.6.3
48
+ distlib==0.3.8
49
+ distro==1.9.0
50
+ dnspython==2.4.2
51
+ docarray==0.40.0
52
+ docker==7.0.0
53
+ docker-pycreds==0.4.0
54
+ docopt==0.6.2
55
+
56
+ dulwich==0.21.7
57
+ ecdsa==0.18.0
58
+ editor==1.6.5
59
+ etelemetry==0.3.1
60
+ executing==2.0.1
61
+ faiss-cpu==1.7.4
62
+ fastapi==0.108.0
63
+ fastavro==1.9.2
64
+ fasteners==0.19
65
+ fastjsonschema==2.19.1
66
+ filelock==3.13.1
67
+ fitz==0.0.1.dev2
68
+ FlagEmbedding==1.1.8
69
+ fonttools==4.47.0
70
+ frontend==0.0.3
71
+ frozenlist==1.4.1
72
+ fsspec==2023.10.0
73
+ future==0.18.3
74
+ gcs-oauth2-boto-plugin==3.0
75
+ git-python==1.0.3
76
+ gitdb==4.0.11
77
+ GitPython==3.1.40
78
+ google-apitools==0.5.32
79
+ google-auth==2.26.2
80
+ google-reauth==0.1.1
81
+ googleapis-common-protos==1.62.0
82
+ greenlet==3.0.3
83
+ grpcio==1.57.0
84
+ grpcio-health-checking==1.57.0
85
+ grpcio-reflection==1.57.0
86
+ gsutil==5.27
87
+ h11==0.14.0
88
+ hdfs==2.7.3
89
+ hf_transfer==0.1.4
90
+ html2image==2.0.4.3
91
+ httpcore==1.0.2
92
+ httplib2==0.20.4
93
+ httptools==0.6.1
94
+ httpx==0.26.0
95
+ huggingface-hub==0.17.3
96
+ idna==3.6
97
+ importlib-metadata==6.11.0
98
+ inquirer==3.2.1
99
+ installer==0.7.0
100
+ isodate==0.6.1
101
+ itsdangerous==2.1.2
102
+ jaraco.classes==3.3.0
103
+ jcloud==0.3
104
+ jedi==0.19.1
105
+ jina==3.23.2
106
+ jina-hubble-sdk==0.39.0
107
+ Jinja2==3.1.2
108
+ joblib==1.3.2
109
+ Js2Py==0.74
110
+ jsonschema==4.20.0
111
+ jsonschema-specifications==2023.12.1
112
+ jupyter_client==8.6.0
113
+ jupyter_core==5.5.1
114
+ keyring==24.3.0
115
+ kiwisolver==1.4.5
116
+ litellm==1.16.19
117
+ llama-index==0.9.24
118
+ llama_cpp_python==0.2.26
119
+ looseversion==1.3.0
120
+ lxml==5.0.0
121
+ markdown-it-py==3.0.0
122
+ MarkupSafe==2.1.3
123
+ marshmallow==3.20.1
124
+ matplotlib==3.8.2
125
+ matplotlib-inline==0.1.6
126
+ mdurl==0.1.2
127
+
128
+ monotonic==1.6
129
+ more-itertools==10.1.0
130
+ MouseInfo==0.1.3
131
+ mpmath==1.3.0
132
+ msgpack==1.0.7
133
+ multidict==6.0.4
134
+ multiprocess==0.70.15
135
+ mwparserfromhell==0.6.5
136
+ mypy-extensions==1.0.0
137
+ nest-asyncio==1.5.8
138
+ networkx==3.2.1
139
+ nibabel==5.2.0
140
+ nipype==1.8.6
141
+ nltk==3.8.1
142
+ numpy==1.26.2
143
+ oauth2client==4.1.3
144
+ objsize==0.6.1
145
+ open-interpreter==0.2.0
146
+ openai==1.6.1
147
+ opencv-python==4.9.0.80
148
+ opentelemetry-api==1.19.0
149
+ opentelemetry-exporter-otlp==1.19.0
150
+ opentelemetry-exporter-otlp-proto-common==1.19.0
151
+ opentelemetry-exporter-otlp-proto-grpc==1.19.0
152
+ opentelemetry-exporter-otlp-proto-http==1.19.0
153
+ opentelemetry-exporter-prometheus==0.41b0
154
+ opentelemetry-instrumentation==0.40b0
155
+ opentelemetry-instrumentation-aiohttp-client==0.40b0
156
+ opentelemetry-instrumentation-asgi==0.40b0
157
+ opentelemetry-instrumentation-fastapi==0.40b0
158
+ opentelemetry-instrumentation-grpc==0.40b0
159
+ opentelemetry-proto==1.19.0
160
+ opentelemetry-sdk==1.19.0
161
+ opentelemetry-semantic-conventions==0.40b0
162
+ opentelemetry-util-http==0.40b0
163
+ orjson==3.9.10
164
+ packaging==23.2
165
+ pandas==2.1.4
166
+ parso==0.8.3
167
+ pathlib==1.0.1
168
+ pathspec==0.12.1
169
+ pdfminer.six==20221105
170
+ pdfplumber==0.10.3
171
+ peft==0.7.1
172
+ pexpect==4.9.0
173
+ Pillow==10.1.0
174
+ pkginfo==1.9.6
175
+ platformdirs==4.0.0
176
+ plotly==5.18.0
177
+ plyer==2.1.0
178
+ poetry==1.7.1
179
+ poetry-core==1.8.1
180
+ poetry-plugin-export==1.6.0
181
+ posthog==3.1.0
182
+ pretty-traceback==2023.1020
183
+ prometheus-client==0.19.0
184
+ prompt-toolkit==3.0.43
185
+ proto-plus==1.23.0
186
+ protobuf==4.25.1
187
+ prov==2.0.0
188
+ psutil==5.9.7
189
+ ptyprocess==0.7.0
190
+ pure-eval==0.2.2
191
+ pyarrow==11.0.0
192
+ pyarrow-hotfix==0.6
193
+ pyasn1==0.5.1
194
+ pyasn1-modules==0.3.0
195
+ PyAutoGUI==0.9.54
196
+
197
+ pydantic==2.5.3
198
+ pydantic-settings==2.1.0
199
+ pydantic_core==2.14.6
200
+ pydeck==0.8.1b0
201
+ pydot==1.4.2
202
+ PyGetWindow==0.0.9
203
+ Pygments==2.17.2
204
+ pyjsparser==2.7.1
205
+ PyMonCtl==0.7
206
+ pymongo==4.6.1
207
+ PyMsgBox==1.0.9
208
+ pyopencl==2023.1.4
209
+ pyOpenSSL==23.3.0
210
+ pypandoc==1.12
211
+ pyparsing==3.1.1
212
+ pypdf==3.17.4
213
+ PyPDF2==3.0.1
214
+ pypdfium2==4.25.0
215
+ pyperclip==1.8.2
216
+ pyproject_hooks==1.0.0
217
+ PyRect==0.2.0
218
+ PyScreeze==0.1.30
219
+ pytesseract==0.3.10
220
+ python-dateutil==2.8.2
221
+ python-dotenv==1.0.0
222
+ python-jose==3.3.0
223
+ python-multipart==0.0.6
224
+ pytils==0.4.1
225
+ pytools==2023.1.1
226
+ pytweening==1.0.7
227
+ pytz==2023.3.post1
228
+ pyu2f==0.1.5
229
+ PyWinBox==0.6
230
+ PyWinCtl==0.3
231
+ pyxnat==1.6
232
+ PyYAML==6.0.1
233
+ pyzmq==25.1.2
234
+ rapidfuzz==3.6.1
235
+ ray==2.9.0
236
+ rdflib==7.0.0
237
+ readchar==4.0.5
238
+ referencing==0.32.0
239
+ regex==2023.12.25
240
+ requests==2.31.0
241
+ requests-toolbelt==1.0.0
242
+ retry-decorator==1.1.1
243
+ rich==13.7.0
244
+ rpds-py==0.16.2
245
+ rsa==4.7.2
246
+ rubicon-objc==0.4.7
247
+ runs==1.2.0
248
+ safetensors==0.4.1
249
+ scikit-learn==1.3.2
250
+ scipy==1.11.4
251
+ sentence-transformers==2.2.2
252
+ sentencepiece==0.1.99
253
+ sentry-sdk==1.39.2
254
+ setproctitle==1.3.3
255
+ shellingham==1.5.4
256
+ simplejson==3.19.2
257
+ six==1.16.0
258
+ smmap==5.0.1
259
+ sniffio==1.3.0
260
+ soupsieve==2.5
261
+ SQLAlchemy==2.0.24
262
+ sse-starlette==1.8.2
263
+ stack-data==0.6.3
264
+ starlette==0.32.0.post1
265
+ starlette-context==0.3.6
266
+ streamlit==1.30.0
267
+ sympy==1.12
268
+ tenacity==8.2.3
269
+ threadpoolctl==3.2.0
270
+ tiktoken==0.4.0
271
+ tinygrad==0.7.0
272
+ tokenizers==0.14.1
273
+ tokentrim==0.1.13
274
+ toml==0.10.2
275
+ tomlkit==0.12.3
276
+ tools==0.1.9
277
+ tornado==6.4
278
+ tqdm==4.66.1
279
+ traitlets==5.14.0
280
+ traits==6.3.2
281
+ transformers==4.34.0
282
+ trove-classifiers==2023.11.29
283
+ types-requests==2.31.0.6
284
+ types-urllib3==1.26.25.14
285
+ typing-inspect==0.9.0
286
+ typing_extensions==4.9.0
287
+ tzdata==2023.4
288
+ tzlocal==5.2
289
+ urllib3==2.1.0
290
+ uvicorn==0.24.0.post1
291
+ uvloop==0.19.0
292
+ validators==0.22.0
293
+ virtualenv==20.25.0
294
+ wandb==0.16.2
295
+ watchdog==3.0.0
296
+ watchfiles==0.21.0
297
+ wcwidth==0.2.12
298
+ websocket-client==1.7.0
299
+ websockets==12.0
300
+ wget==3.2
301
+ wrapt==1.16.0
302
+ xattr==0.10.1
303
+ xmod==1.8.1
304
+ xxhash==3.4.1
305
+ yarl==1.9.4
306
+ youtube-dl==2021.12.17
307
+ zipp==3.17.0
308
+ zstandard==0.22.0