Tennish commited on
Commit
4ec2e0f
β€’
1 Parent(s): b7d036c
Files changed (3) hide show
  1. README.md +3 -23
  2. app.py +42 -506
  3. requirements.txt +3 -16
README.md CHANGED
@@ -1,24 +1,4 @@
1
- ---
2
- title: Open LLM Leaderboard
3
- emoji: πŸ†
4
- colorFrom: green
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 4.9.0
8
- app_file: app.py
9
- pinned: true
10
- license: apache-2.0
11
- duplicated_from: HuggingFaceH4/open_llm_leaderboard
12
- fullWidth: true
13
- startup_duration_timeout: 1h
14
- space_ci:
15
- private: true
16
- secrets:
17
- - HF_TOKEN
18
- - H4_TOKEN
19
- tags:
20
- - leaderboard
21
- short_description: Track, rank and evaluate open LLMs and chatbots
22
- ---
23
 
24
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # pdf-table-extraction-streamlit
2
+ Streamlit App using Camelot
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ https://huggingface.co/spaces/Amrrs/pdf-table-extractor
app.py CHANGED
@@ -1,526 +1,62 @@
1
- import os
2
- import logging
3
- import gradio as gr
4
- import pandas as pd
5
- from apscheduler.schedulers.background import BackgroundScheduler
6
- from huggingface_hub import snapshot_download
7
- from gradio_space_ci import enable_space_ci
 
 
 
 
 
 
8
 
9
- from src.display.about import (
10
- CITATION_BUTTON_LABEL,
11
- CITATION_BUTTON_TEXT,
12
- EVALUATION_QUEUE_TEXT,
13
- FAQ_TEXT,
14
- INTRODUCTION_TEXT,
15
- LLM_BENCHMARKS_TEXT,
16
- TITLE,
17
- )
18
- from src.display.css_html_js import custom_css
19
- from src.display.utils import (
20
- BENCHMARK_COLS,
21
- COLS,
22
- EVAL_COLS,
23
- EVAL_TYPES,
24
- NUMERIC_INTERVALS,
25
- TYPES,
26
- AutoEvalColumn,
27
- ModelType,
28
- Precision,
29
- WeightType,
30
- fields,
31
- )
32
- from src.envs import (
33
- API,
34
- DYNAMIC_INFO_FILE_PATH,
35
- DYNAMIC_INFO_PATH,
36
- DYNAMIC_INFO_REPO,
37
- EVAL_REQUESTS_PATH,
38
- EVAL_RESULTS_PATH,
39
- H4_TOKEN,
40
- IS_PUBLIC,
41
- QUEUE_REPO,
42
- REPO_ID,
43
- RESULTS_REPO,
44
- )
45
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
46
- from src.scripts.update_all_request_files import update_dynamic_files
47
- from src.submission.submit import add_new_eval
48
- from src.tools.collections import update_collections
49
- from src.tools.plots import create_metric_plot_obj, create_plot_df, create_scores_df
50
 
51
 
52
- # Start ephemeral Spaces on PRs (see config in README.md)
53
- enable_space_ci()
54
 
 
 
55
 
56
- def restart_space():
57
- API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
58
 
59
 
60
- def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3):
61
- """Attempt to download dataset with retries."""
62
- attempt = 0
63
- while attempt < max_attempts:
64
- try:
65
- print(f"Downloading {repo_id} to {local_dir}")
66
- snapshot_download(
67
- repo_id=repo_id,
68
- local_dir=local_dir,
69
- repo_type=repo_type,
70
- tqdm_class=None,
71
- etag_timeout=30,
72
- max_workers=8,
73
- )
74
- return
75
- except Exception as e:
76
- logging.error(f"Error downloading {repo_id}: {e}")
77
- attempt += 1
78
- if attempt == max_attempts:
79
- restart_space()
80
 
 
81
 
82
- def init_space(full_init: bool = True):
83
- """Initializes the application space, loading only necessary data."""
84
- if full_init:
85
- # These downloads only occur on full initialization
86
- download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
87
- download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
88
- download_dataset(RESULTS_REPO, EVAL_RESULTS_PATH)
89
 
90
- # Always retrieve the leaderboard DataFrame
91
- raw_data, original_df = get_leaderboard_df(
92
- results_path=EVAL_RESULTS_PATH,
93
- requests_path=EVAL_REQUESTS_PATH,
94
- dynamic_path=DYNAMIC_INFO_FILE_PATH,
95
- cols=COLS,
96
- benchmark_cols=BENCHMARK_COLS,
97
- )
98
 
99
- if full_init:
100
- # Collection update only happens on full initialization
101
- update_collections(original_df)
102
 
103
- leaderboard_df = original_df.copy()
104
-
105
- # Evaluation queue DataFrame retrieval is independent of initialization detail level
106
- eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 
 
107
 
108
- return leaderboard_df, raw_data, original_df, eval_queue_dfs
 
109
 
110
- # Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
111
- # This controls whether a full initialization should be performed.
112
- do_full_init = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
113
 
114
- # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
115
- # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
116
- leaderboard_df, raw_data, original_df, eval_queue_dfs = init_space(full_init=do_full_init)
117
- finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
118
 
 
119
 
120
- # Data processing for plots now only on demand in the respective Gradio tab
121
- def load_and_create_plots():
122
- plot_df = create_plot_df(create_scores_df(raw_data))
123
- return plot_df
124
 
 
 
 
125
 
126
- # Searching and filtering
127
- def update_table(
128
- hidden_df: pd.DataFrame,
129
- columns: list,
130
- type_query: list,
131
- precision_query: str,
132
- size_query: list,
133
- hide_models: list,
134
- query: str,
135
- ):
136
- filtered_df = filter_models(
137
- df=hidden_df,
138
- type_query=type_query,
139
- size_query=size_query,
140
- precision_query=precision_query,
141
- hide_models=hide_models,
142
- )
143
- filtered_df = filter_queries(query, filtered_df)
144
- df = select_columns(filtered_df, columns)
145
- return df
146
 
147
-
148
- def load_query(request: gr.Request): # triggered only once at startup => read query parameter if it exists
149
- query = request.query_params.get("query") or ""
150
- return (
151
- query,
152
- query,
153
- ) # return one for the "search_bar", one for a hidden component that triggers a reload only if value has changed
154
-
155
-
156
- def search_model(df: pd.DataFrame, query: str) -> pd.DataFrame:
157
- return df[(df[AutoEvalColumn.fullname.name].str.contains(query, case=False, na=False))]
158
-
159
- def search_license(df: pd.DataFrame, query: str) -> pd.DataFrame:
160
- return df[df[AutoEvalColumn.license.name].str.contains(query, case=False, na=False)]
161
-
162
- def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
163
- always_here_cols = [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
164
- dummy_col = [AutoEvalColumn.fullname.name]
165
- filtered_df = df[always_here_cols + [c for c in COLS if c in df.columns and c in columns] + dummy_col]
166
- return filtered_df
167
-
168
- def filter_queries(query: str, df: pd.DataFrame):
169
- tmp_result_df = []
170
-
171
- # Empty query return the same df
172
- if query == "":
173
- return df
174
-
175
- # all_queries = [q.strip() for q in query.split(";")]
176
- # license_queries = []
177
- all_queries = [q.strip() for q in query.split(";") if q.strip() != ""]
178
- model_queries = [q for q in all_queries if not q.startswith("licence")]
179
- license_queries_raw = [q for q in all_queries if q.startswith("license")]
180
- license_queries = [
181
- q.replace("license:", "").strip() for q in license_queries_raw if q.replace("license:", "").strip() != ""
182
- ]
183
-
184
- # Handling model name search
185
- for query in model_queries:
186
- tmp_df = search_model(df, query)
187
- if len(tmp_df) > 0:
188
- tmp_result_df.append(tmp_df)
189
-
190
- if not tmp_result_df and not license_queries:
191
- # Nothing is found, no license_queries -> return empty df
192
- return pd.DataFrame(columns=df.columns)
193
-
194
- if tmp_result_df:
195
- df = pd.concat(tmp_result_df)
196
- df = df.drop_duplicates(
197
- subset=[AutoEvalColumn.model.name, AutoEvalColumn.precision.name, AutoEvalColumn.revision.name]
198
- )
199
-
200
- if not license_queries:
201
- return df
202
-
203
- # Handling license search
204
- tmp_result_df = []
205
- for query in license_queries:
206
- tmp_df = search_license(df, query)
207
- if len(tmp_df) > 0:
208
- tmp_result_df.append(tmp_df)
209
-
210
- if not tmp_result_df:
211
- # Nothing is found, return empty df
212
- return pd.DataFrame(columns=df.columns)
213
-
214
- df = pd.concat(tmp_result_df)
215
- df = df.drop_duplicates(
216
- subset=[AutoEvalColumn.model.name, AutoEvalColumn.precision.name, AutoEvalColumn.revision.name]
217
- )
218
-
219
- return df
220
-
221
-
222
- def filter_models(
223
- df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, hide_models: list
224
- ) -> pd.DataFrame:
225
- # Show all models
226
- if "Private or deleted" in hide_models:
227
- filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
228
- else:
229
- filtered_df = df
230
-
231
- if "Contains a merge/moerge" in hide_models:
232
- filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
233
-
234
- if "MoE" in hide_models:
235
- filtered_df = filtered_df[filtered_df[AutoEvalColumn.moe.name] == False]
236
-
237
- if "Flagged" in hide_models:
238
- filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
239
-
240
- type_emoji = [t[0] for t in type_query]
241
- filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
242
- filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
243
-
244
- numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
245
- params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
246
- mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
247
- filtered_df = filtered_df.loc[mask]
248
-
249
- return filtered_df
250
-
251
-
252
- leaderboard_df = filter_models(
253
- df=leaderboard_df,
254
- type_query=[t.to_str(" : ") for t in ModelType],
255
- size_query=list(NUMERIC_INTERVALS.keys()),
256
- precision_query=[i.value.name for i in Precision],
257
- hide_models=["Private or deleted", "Contains a merge/moerge", "Flagged"], # Deleted, merges, flagged, MoEs
258
- )
259
-
260
- demo = gr.Blocks(css=custom_css)
261
- with demo:
262
- gr.HTML(TITLE)
263
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
264
-
265
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
266
- with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
267
- with gr.Row():
268
- with gr.Column():
269
- with gr.Row():
270
- search_bar = gr.Textbox(
271
- placeholder="πŸ” Search models or licenses (e.g., 'model_name; license: MIT') and press ENTER...",
272
- show_label=False,
273
- elem_id="search-bar",
274
- )
275
- with gr.Row():
276
- shown_columns = gr.CheckboxGroup(
277
- choices=[
278
- c.name
279
- for c in fields(AutoEvalColumn)
280
- if not c.hidden and not c.never_hidden and not c.dummy
281
- ],
282
- value=[
283
- c.name
284
- for c in fields(AutoEvalColumn)
285
- if c.displayed_by_default and not c.hidden and not c.never_hidden
286
- ],
287
- label="Select columns to show",
288
- elem_id="column-select",
289
- interactive=True,
290
- )
291
- with gr.Row():
292
- hide_models = gr.CheckboxGroup(
293
- label="Hide models",
294
- choices=["Private or deleted", "Contains a merge/moerge", "Flagged", "MoE"],
295
- value=["Private or deleted", "Contains a merge/moerge", "Flagged"],
296
- interactive=True,
297
- )
298
- with gr.Column(min_width=320):
299
- # with gr.Box(elem_id="box-filter"):
300
- filter_columns_type = gr.CheckboxGroup(
301
- label="Model types",
302
- choices=[t.to_str() for t in ModelType],
303
- value=[t.to_str() for t in ModelType],
304
- interactive=True,
305
- elem_id="filter-columns-type",
306
- )
307
- filter_columns_precision = gr.CheckboxGroup(
308
- label="Precision",
309
- choices=[i.value.name for i in Precision],
310
- value=[i.value.name for i in Precision],
311
- interactive=True,
312
- elem_id="filter-columns-precision",
313
- )
314
- filter_columns_size = gr.CheckboxGroup(
315
- label="Model sizes (in billions of parameters)",
316
- choices=list(NUMERIC_INTERVALS.keys()),
317
- value=list(NUMERIC_INTERVALS.keys()),
318
- interactive=True,
319
- elem_id="filter-columns-size",
320
- )
321
-
322
- leaderboard_table = gr.components.Dataframe(
323
- value=leaderboard_df[
324
- [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
325
- + shown_columns.value
326
- + [AutoEvalColumn.fullname.name]
327
- ],
328
- headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
329
- datatype=TYPES,
330
- elem_id="leaderboard-table",
331
- interactive=False,
332
- visible=True,
333
- )
334
-
335
- # Dummy leaderboard for handling the case when the user uses backspace key
336
- hidden_leaderboard_table_for_search = gr.components.Dataframe(
337
- value=original_df[COLS],
338
- headers=COLS,
339
- datatype=TYPES,
340
- visible=False,
341
- )
342
- search_bar.submit(
343
- update_table,
344
- [
345
- hidden_leaderboard_table_for_search,
346
- shown_columns,
347
- filter_columns_type,
348
- filter_columns_precision,
349
- filter_columns_size,
350
- hide_models,
351
- search_bar,
352
- ],
353
- leaderboard_table,
354
- )
355
-
356
- # Define a hidden component that will trigger a reload only if a query parameter has been set
357
- hidden_search_bar = gr.Textbox(value="", visible=False)
358
- hidden_search_bar.change(
359
- update_table,
360
- [
361
- hidden_leaderboard_table_for_search,
362
- shown_columns,
363
- filter_columns_type,
364
- filter_columns_precision,
365
- filter_columns_size,
366
- hide_models,
367
- search_bar,
368
- ],
369
- leaderboard_table,
370
- )
371
- # Check query parameter once at startup and update search bar + hidden component
372
- demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
373
-
374
- for selector in [
375
- shown_columns,
376
- filter_columns_type,
377
- filter_columns_precision,
378
- filter_columns_size,
379
- hide_models,
380
- ]:
381
- selector.change(
382
- update_table,
383
- [
384
- hidden_leaderboard_table_for_search,
385
- shown_columns,
386
- filter_columns_type,
387
- filter_columns_precision,
388
- filter_columns_size,
389
- hide_models,
390
- search_bar,
391
- ],
392
- leaderboard_table,
393
- queue=True,
394
- )
395
-
396
- with gr.TabItem("πŸ“ˆ Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
397
- with gr.Row():
398
- with gr.Column():
399
- plot_df = load_and_create_plots()
400
- chart = create_metric_plot_obj(
401
- plot_df,
402
- [AutoEvalColumn.average.name],
403
- title="Average of Top Scores and Human Baseline Over Time (from last update)",
404
- )
405
- gr.Plot(value=chart, min_width=500)
406
- with gr.Column():
407
- plot_df = load_and_create_plots()
408
- chart = create_metric_plot_obj(
409
- plot_df,
410
- BENCHMARK_COLS,
411
- title="Top Scores and Human Baseline Over Time (from last update)",
412
- )
413
- gr.Plot(value=chart, min_width=500)
414
-
415
- with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=3):
416
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
417
-
418
- with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=4):
419
- gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
420
-
421
- with gr.TabItem("πŸš€ Submit ", elem_id="llm-benchmark-tab-table", id=5):
422
- with gr.Column():
423
- with gr.Row():
424
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
425
-
426
- with gr.Row():
427
- gr.Markdown("# βœ‰οΈβœ¨ Submit your model here!", elem_classes="markdown-text")
428
-
429
- with gr.Row():
430
- with gr.Column():
431
- model_name_textbox = gr.Textbox(label="Model name")
432
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
433
- private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
434
- model_type = gr.Dropdown(
435
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
436
- label="Model type",
437
- multiselect=False,
438
- value=ModelType.FT.to_str(" : "),
439
- interactive=True,
440
- )
441
-
442
- with gr.Column():
443
- precision = gr.Dropdown(
444
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
445
- label="Precision",
446
- multiselect=False,
447
- value="float16",
448
- interactive=True,
449
- )
450
- weight_type = gr.Dropdown(
451
- choices=[i.value.name for i in WeightType],
452
- label="Weights type",
453
- multiselect=False,
454
- value="Original",
455
- interactive=True,
456
- )
457
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
458
-
459
- with gr.Column():
460
- with gr.Accordion(
461
- f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})",
462
- open=False,
463
- ):
464
- with gr.Row():
465
- finished_eval_table = gr.components.Dataframe(
466
- value=finished_eval_queue_df,
467
- headers=EVAL_COLS,
468
- datatype=EVAL_TYPES,
469
- row_count=5,
470
- )
471
- with gr.Accordion(
472
- f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})",
473
- open=False,
474
- ):
475
- with gr.Row():
476
- running_eval_table = gr.components.Dataframe(
477
- value=running_eval_queue_df,
478
- headers=EVAL_COLS,
479
- datatype=EVAL_TYPES,
480
- row_count=5,
481
- )
482
-
483
- with gr.Accordion(
484
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
485
- open=False,
486
- ):
487
- with gr.Row():
488
- pending_eval_table = gr.components.Dataframe(
489
- value=pending_eval_queue_df,
490
- headers=EVAL_COLS,
491
- datatype=EVAL_TYPES,
492
- row_count=5,
493
- )
494
-
495
- submit_button = gr.Button("Submit Eval")
496
- submission_result = gr.Markdown()
497
- submit_button.click(
498
- add_new_eval,
499
- [
500
- model_name_textbox,
501
- base_model_name_textbox,
502
- revision_name_textbox,
503
- precision,
504
- private,
505
- weight_type,
506
- model_type,
507
- ],
508
- submission_result,
509
- )
510
-
511
- with gr.Row():
512
- with gr.Accordion("πŸ“™ Citation", open=False):
513
- citation_button = gr.Textbox(
514
- value=CITATION_BUTTON_TEXT,
515
- label=CITATION_BUTTON_LABEL,
516
- lines=20,
517
- elem_id="citation-button",
518
- show_copy_button=True,
519
- )
520
-
521
- scheduler = BackgroundScheduler()
522
- scheduler.add_job(restart_space, "interval", hours=3) # restarted every 3h
523
- scheduler.add_job(update_dynamic_files, "interval", hours=2) # launched every 2 hour
524
- scheduler.start()
525
-
526
- demo.queue(default_concurrency_limit=40).launch()
 
1
+ import streamlit as st # data app development
2
+ import subprocess # process in the os
3
+ from subprocess import STDOUT, check_call #os process manipuation
4
+ import os #os process manipuation
5
+ import base64 # byte object into a pdf file
6
+ import camelot as cam # extracting tables from PDFs
7
+ import cv2
8
+ # to run this only once and it's cached
9
+ @st.cache
10
+ def gh():
11
+ """install ghostscript on the linux machine"""
12
+ proc = subprocess.Popen('apt-get install -y ghostscript', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
13
+ proc.wait()
14
 
15
+ gh()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
 
 
 
18
 
19
+ st.title("PDF Table Extractor")
20
+ st.subheader("with `Camelot` Python library")
21
 
22
+ st.image("https://raw.githubusercontent.com/camelot-dev/camelot/master/docs/_static/camelot.png", width=200)
 
23
 
24
 
25
+ # file uploader on streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ input_pdf = st.file_uploader(label = "upload your pdf here", type = 'pdf')
28
 
29
+ st.markdown("### Page Number")
 
 
 
 
 
 
30
 
31
+ page_number = st.text_input("Enter the page # from where you want to extract the PDF eg: 3", value = 1)
 
 
 
 
 
 
 
32
 
33
+ # run this only when a PDF is uploaded
 
 
34
 
35
+ if input_pdf is not None:
36
+ # byte object into a PDF file
37
+ with open("input.pdf", "wb") as f:
38
+ base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
39
+ f.write(base64.b64decode(base64_pdf))
40
+ f.close()
41
 
42
+ # read the pdf and parse it using stream
43
+ table = cam.read_pdf("input.pdf", pages = page_number, flavor = 'stream')
44
 
45
+ st.markdown("### Number of Tables")
 
 
46
 
47
+ # display the output after parsing
48
+ st.write(table)
 
 
49
 
50
+ # display the table
51
 
52
+ if len(table) > 0:
 
 
 
53
 
54
+ # extract the index value of the table
55
+
56
+ option = st.selectbox(label = "Select the Table to be displayed", options = range(len(table) + 1))
57
 
58
+ st.markdown('### Output Table')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ # display the dataframe
61
+
62
+ st.dataframe(table[int(option)-1].df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,16 +1,3 @@
1
- APScheduler==3.10.1
2
- black==23.11.0
3
- click==8.1.3
4
- datasets==2.14.5
5
- huggingface-hub>=0.18.0
6
- matplotlib==3.8.4
7
- numpy==1.26.0
8
- pandas==2.2.2
9
- plotly==5.14.1
10
- python-dateutil==2.8.2
11
- requests==2.28.2
12
- sentencepiece
13
- tqdm==4.65.0
14
- transformers==4.40.0
15
- tokenizers>=0.15.0
16
- gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/gradio-space-ci@0.2.3 # CI !!!
 
1
+ opencv-python-headless
2
+ camelot-py
3
+ streamlit