Louis-François Bouchard Omar Solano commited on
Commit
0f06abd
1 Parent(s): 5c3db13

Advanced rag course update (#44)

Browse files

* Openai activeloop data (#37)

* adding openai and activeloop data

* fixing issues with names

* concurrency

* black

* black

* revert to gradio3.50 for concurrency

---------

Co-authored-by: Omar Solano <omar@designstripe.com>

* ensure gradio version for HF

* Updates to files

* Push to advanced rag course

* Formatting

* formatting

---------

Co-authored-by: Omar Solano <omar@designstripe.com>

.gitignore CHANGED
@@ -162,7 +162,9 @@ cython_debug/
162
  *.zip
163
  deeplake_store/
164
  .DS_Store
165
-
 
 
166
  .vscode/
167
  evals/
168
  local_dataset/
 
162
  *.zip
163
  deeplake_store/
164
  .DS_Store
165
+ __pycache__/
166
+ .env
167
+ env/
168
  .vscode/
169
  evals/
170
  local_dataset/
.vscode/launch.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.2.0",
3
+ "configurations": [
4
+ {
5
+ "name": "Python: App",
6
+ "type": "python",
7
+ "request": "launch",
8
+ "program": "${workspaceFolder}/data/process_csvs_store.py",
9
+ "console": "integratedTerminal",
10
+ "justMyCode": false,
11
+ "python": "/Users/louis/miniconda3/envs/buster/bin/python",
12
+ "env": {
13
+ "ACTIVELOOP_TOKEN": "eyJhbGciOiJIUzUxMiIsImlhdCI6MTY5Njc4MjIyMiwiZXhwIjoxNzI4NDA0NTk1fQ.eyJpZCI6Im9tYXJzb2xhbm8ifQ.BlvUc350-boJv4hnN67ksMgGSy7x4nAWcBO7R5RZ22Cw0ifR2AOmmM-RJutBWIspQDiegs03rJxXCCfprc6O_A",
14
+ "OPENAI_API_KEY": "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"
15
+ }
16
+ }
17
+ ]
18
+ }
app.py CHANGED
@@ -23,6 +23,7 @@ CONCURRENCY_COUNT = int(os.getenv("CONCURRENCY_COUNT", 64))
23
 
24
  AVAILABLE_SOURCES_UI = [
25
  "Gen AI 360: LLMs",
 
26
  "Gen AI 360: LangChain",
27
  "Towards AI Blog",
28
  "Activeloop Docs",
@@ -35,6 +36,7 @@ AVAILABLE_SOURCES_UI = [
35
  AVAILABLE_SOURCES = [
36
  "llm_course",
37
  "langchain_course",
 
38
  "towards_ai",
39
  "activeloop",
40
  "hf_transformers",
 
23
 
24
  AVAILABLE_SOURCES_UI = [
25
  "Gen AI 360: LLMs",
26
+ "Gen AU 360: Advanced RAG",
27
  "Gen AI 360: LangChain",
28
  "Towards AI Blog",
29
  "Activeloop Docs",
 
36
  AVAILABLE_SOURCES = [
37
  "llm_course",
38
  "langchain_course",
39
+ "advanced_rag_course",
40
  "towards_ai",
41
  "activeloop",
42
  "hf_transformers",
data/markdown_parser.py CHANGED
@@ -50,12 +50,13 @@ def get_title_link_from_md_title(md_title: str, title_link_data: dict):
50
  return data["title"], data["link"]
51
  # default back to course link if not found...
52
  print("\nNot found: ", md_title)
53
- return md_title, "https://learn.activeloop.ai/courses/llms/"
54
 
55
 
56
  if __name__ == "__main__":
57
- folder_path = "/path/to/folder/with/md_content/"
58
- folder_path = "/Users/louis/Downloads/llm_course"
 
59
  # folder_path = "/Users/louis/Downloads/d22d1e98-345f-490d-870e-3b082938741c_Export-0a33c13f-6d42-4a94-8f23-7459e7b2c024/LangChain & Vector Databases in Production 92657e0d65da4201bfdd6db915a4eb9f"
60
  md_files = find_md_files(folder_path)
61
 
@@ -76,8 +77,8 @@ if __name__ == "__main__":
76
 
77
  # with open("data/title_link_langchaincourse.json", "r") as f:
78
  # title_link_data = json.load(f)
79
-
80
- with open("/Users/louis/Downloads/output2.json", "r") as f:
81
  title_link_data = json.load(f)
82
 
83
  for md_file in tqdm(md_files):
@@ -102,7 +103,7 @@ if __name__ == "__main__":
102
  chunk = {
103
  "title": title,
104
  "content": headers + "\n" + substring,
105
- "source": "llm_course",
106
  "url": link,
107
  }
108
  chunks.append(chunk)
@@ -112,4 +113,4 @@ if __name__ == "__main__":
112
  df = drop_outlier_chunks(df, max_tokens_by_chunk=2000)
113
 
114
  print(f"Exported {len(df)} chunks from {len(md_files)} articles.")
115
- df.to_csv("llm_course.csv")
 
50
  return data["title"], data["link"]
51
  # default back to course link if not found...
52
  print("\nNot found: ", md_title)
53
+ return md_title, "https://learn.activeloop.ai/courses/rag/"
54
 
55
 
56
  if __name__ == "__main__":
57
+ # folder_path = "/path/to/folder/with/md_content/"
58
+ # This path is the link to the course folder with all md files
59
+ folder_path = "/Users/louis/Downloads/rag_course_advanced"
60
  # folder_path = "/Users/louis/Downloads/d22d1e98-345f-490d-870e-3b082938741c_Export-0a33c13f-6d42-4a94-8f23-7459e7b2c024/LangChain & Vector Databases in Production 92657e0d65da4201bfdd6db915a4eb9f"
61
  md_files = find_md_files(folder_path)
62
 
 
77
 
78
  # with open("data/title_link_langchaincourse.json", "r") as f:
79
  # title_link_data = json.load(f)
80
+ # This file contains a json with only two column, "title, link", to fit the title of the md files and link on the course platform.
81
+ with open("/Users/louis/Downloads/output.json", "r") as f:
82
  title_link_data = json.load(f)
83
 
84
  for md_file in tqdm(md_files):
 
103
  chunk = {
104
  "title": title,
105
  "content": headers + "\n" + substring,
106
+ "source": "advanced_rag_course",
107
  "url": link,
108
  }
109
  chunks.append(chunk)
 
113
  df = drop_outlier_chunks(df, max_tokens_by_chunk=2000)
114
 
115
  print(f"Exported {len(df)} chunks from {len(md_files)} articles.")
116
+ df.to_csv("advanced_rag_course.csv")
data/process_csvs_store.py CHANGED
@@ -2,26 +2,35 @@ import pandas as pd
2
  import time
3
  import os
4
  from buster.documents_manager import DeepLakeDocumentsManager
 
 
 
5
 
6
- DEEPLAKE_DATASET = os.getenv("DEEPLAKE_DATASET", "dataset-ai-tutor")
 
 
7
  DEEPLAKE_ORG = os.getenv("DEEPLAKE_ORG", "towards_ai")
8
 
9
- df1 = pd.read_csv("./data/llm_course.csv")
10
- df2 = pd.read_csv("./data/hf_transformers.csv")
11
- df3 = pd.read_csv("./data/langchain_course.csv")
12
- df4 = pd.read_csv("./data/filtered_tai_v2.csv")
13
- df5 = pd.read_csv("./data/wiki.csv") # , encoding="ISO-8859-1")
14
- df6 = pd.read_csv("./data/openai.csv")
15
- df7 = pd.read_csv("./data/activeloop.csv")
16
- df8 = pd.read_csv("./data/langchain_docs.csv")
17
 
18
- print(len(df1), len(df2), len(df3), len(df4), len(df5), len(df6), len(df7), len(df8))
 
19
 
20
  dataset_path = f"hub://{DEEPLAKE_ORG}/{DEEPLAKE_DATASET}"
 
 
 
 
21
 
22
  dm = DeepLakeDocumentsManager(
23
  vector_store_path=dataset_path,
24
- overwrite=True,
25
  required_columns=["url", "content", "source", "title"],
26
  )
27
 
@@ -30,77 +39,98 @@ dm.batch_add(
30
  batch_size=3000,
31
  min_time_interval=60,
32
  num_workers=32,
33
- csv_embeddings_filename="embeddings.csv",
34
- csv_errors_filename="tmp.csv",
35
  csv_overwrite=False,
36
  )
37
 
38
- dm.batch_add(
39
- df=df2,
40
- batch_size=3000,
41
- min_time_interval=60,
42
- num_workers=32,
43
- csv_embeddings_filename="embeddings.csv",
44
- csv_errors_filename="tmp.csv",
45
- csv_overwrite=False,
46
- )
47
 
48
- dm.batch_add(
49
- df=df3,
50
- batch_size=3000,
51
- min_time_interval=60,
52
- num_workers=32,
53
- csv_embeddings_filename="embeddings.csv",
54
- csv_errors_filename="tmp.csv",
55
- csv_overwrite=False,
56
- )
57
 
58
- dm.batch_add(
59
- df=df4,
60
- batch_size=3000,
61
- min_time_interval=60,
62
- num_workers=32,
63
- csv_embeddings_filename="embeddings.csv",
64
- csv_errors_filename="tmp.csv",
65
- csv_overwrite=False,
66
- )
67
 
68
- dm.batch_add(
69
- df=df5,
70
- batch_size=3000,
71
- min_time_interval=60,
72
- num_workers=32,
73
- csv_embeddings_filename="embeddings.csv",
74
- csv_errors_filename="tmp.csv",
75
- csv_overwrite=False,
76
- )
77
 
78
- dm.batch_add(
79
- df=df6,
80
- batch_size=3000,
81
- min_time_interval=60,
82
- num_workers=32,
83
- csv_embeddings_filename="embeddings.csv",
84
- csv_overwrite=False,
85
- csv_errors_filename="tmp.csv",
86
- )
87
 
88
- dm.batch_add(
89
- df=df7,
90
- batch_size=3000,
91
- min_time_interval=60,
92
- num_workers=32,
93
- csv_embeddings_filename="embeddings.csv",
94
- csv_errors_filename="tmp.csv",
95
- csv_overwrite=False,
96
- )
97
 
98
- dm.batch_add(
99
- df=df8,
100
- batch_size=3000,
101
- min_time_interval=60,
102
- num_workers=32,
103
- csv_embeddings_filename="embeddings.csv",
104
- csv_errors_filename="tmp.csv",
105
- csv_overwrite=False,
106
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import time
3
  import os
4
  from buster.documents_manager import DeepLakeDocumentsManager
5
+ from deeplake.core.vectorstore import VectorStore
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+ import numpy as np
8
 
9
+ # from openai import OpenAI
10
+
11
+ DEEPLAKE_DATASET = os.getenv("DEEPLAKE_DATASET", "ai-tutor-dataset")
12
  DEEPLAKE_ORG = os.getenv("DEEPLAKE_ORG", "towards_ai")
13
 
14
+ # df1 = pd.read_csv("./data/jobs.csv", encoding='ISO-8859-1') # or 'latin1' or 'cp1252'
15
+ # df2 = pd.read_csv("./data/hf_transformers.csv")
16
+ # df3 = pd.read_csv("./data/langchain_course.csv")
17
+ # df4 = pd.read_csv("./data/filtered_tai_v2.csv")
18
+ # df5 = pd.read_csv("./data/wiki.csv") # , encoding="ISO-8859-1")
19
+ # df6 = pd.read_csv("./data/openai.csv")
20
+ df1 = pd.read_csv("./advanced_rag_course.csv")
 
21
 
22
+ # print(len(df1), len(df2), len(df3), len(df4), len(df5), len(df6))
23
+ print(len(df1))
24
 
25
  dataset_path = f"hub://{DEEPLAKE_ORG}/{DEEPLAKE_DATASET}"
26
+ # dataset_path = f"{DEEPLAKE_DATASET}"
27
+ # because wrong name
28
+ # df1['content'] = df1['cleaned_description']
29
+ # print(np.sum(df1.content.isna()), len(df1) )
30
 
31
  dm = DeepLakeDocumentsManager(
32
  vector_store_path=dataset_path,
33
+ overwrite=False,
34
  required_columns=["url", "content", "source", "title"],
35
  )
36
 
 
39
  batch_size=3000,
40
  min_time_interval=60,
41
  num_workers=32,
 
 
42
  csv_overwrite=False,
43
  )
44
 
45
+ # dm.batch_add(
46
+ # df=df2,
47
+ # batch_size=3000,
48
+ # min_time_interval=60,
49
+ # num_workers=32,
50
+ # csv_embeddings_filename="embeddings.csv",
51
+ # csv_errors_filename="tmp.csv",
52
+ # csv_overwrite=False,
53
+ # )
54
 
55
+ # dm.batch_add(
56
+ # df=df3,
57
+ # batch_size=3000,
58
+ # min_time_interval=60,
59
+ # num_workers=32,
60
+ # csv_embeddings_filename="embeddings.csv",
61
+ # csv_errors_filename="tmp.csv",
62
+ # csv_overwrite=False,
63
+ # )
64
 
65
+ # dm.batch_add(
66
+ # df=df4,
67
+ # batch_size=3000,
68
+ # min_time_interval=60,
69
+ # num_workers=32,
70
+ # csv_embeddings_filename="embeddings.csv",
71
+ # csv_errors_filename="tmp.csv",
72
+ # csv_overwrite=False,
73
+ # )
74
 
75
+ # dm.batch_add(
76
+ # df=df5,
77
+ # batch_size=3000,
78
+ # min_time_interval=60,
79
+ # num_workers=32,
80
+ # csv_embeddings_filename="embeddings.csv",
81
+ # csv_errors_filename="tmp.csv",
82
+ # csv_overwrite=False,
83
+ # )
84
 
85
+ # dm.batch_add(
86
+ # df=df6,
87
+ # batch_size=3000,
88
+ # min_time_interval=60,
89
+ # num_workers=32,
90
+ # csv_embeddings_filename="embeddings.csv",
91
+ # csv_overwrite=False,
92
+ # csv_errors_filename="tmp.csv",
93
+ # )
94
 
95
+ # dm.batch_add(
96
+ # df=df7,
97
+ # batch_size=3000,
98
+ # min_time_interval=60,
99
+ # num_workers=32,
100
+ # csv_embeddings_filename="embeddings.csv",
101
+ # csv_errors_filename="tmp.csv",
102
+ # csv_overwrite=False,
103
+ # )
104
 
105
+
106
+ # client = OpenAI()
107
+
108
+ # openai_embeddings = OpenAIEmbeddings()
109
+ # def get_embedding(text, model="text-embedding-ada-002"):
110
+ # # Call to OpenAI's API to create the embedding
111
+ # response = client.embeddings.create(input=[text], model=model)
112
+
113
+ # # Extract the embedding data from the response
114
+ # embedding = response.data[0].embedding
115
+
116
+ # # Convert the ndarray to a list
117
+ # if isinstance(embedding, np.ndarray):
118
+ # embedding = embedding.tolist()
119
+
120
+ # return embedding
121
+
122
+
123
+ # vs = VectorStore(
124
+ # dataset_path,
125
+ # runtime='compute_engine',
126
+ # token=os.environ['ACTIVELOOP_TOKEN']
127
+ # )
128
+
129
+ # data = vs.search(query = "select * where shape(embedding)[0] == 0")
130
+
131
+ # vs.update_embedding(embedding_source_tensor = "text",
132
+ # query = "select * where shape(embedding)[0] == 0",
133
+ # exec_option = "compute_engine",
134
+ # embedding_function=get_embedding)
135
+
136
+ # data2 = vs.search(query = "select * where shape(embedding)[0] == 0")
data/scrapper_to_csv.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import re
3
+
4
+
5
+ def parse_markdown_file(file_path):
6
+ entries = []
7
+ with open(file_path, "r", encoding="utf-8") as file:
8
+ current_url, current_title, current_content = "", "", ""
9
+ inside_page = False
10
+
11
+ for line in file:
12
+ if line.strip() == "--": # Check for page separator
13
+ if inside_page:
14
+ # Process the previous page
15
+ process_content(
16
+ entries, current_url, current_title, current_content
17
+ )
18
+ current_content = ""
19
+
20
+ inside_page = True
21
+ # Read URL and title
22
+ current_url = next(file).strip().split(" ", 1)[1]
23
+ current_title = (
24
+ next(file).strip().split(" ", 1)[1].replace("\n", " ")
25
+ ) # Replace new lines in title
26
+ # Skip the next two lines (description and keywords)
27
+ next(file)
28
+ next(file)
29
+ # print(f"Detected Page: Title - {current_title}, URL - {current_url}") # Debugging
30
+
31
+ elif inside_page:
32
+ current_content += line
33
+
34
+ if inside_page:
35
+ process_content(entries, current_url, current_title, current_content)
36
+
37
+ df = pd.DataFrame(entries)
38
+ return df
39
+
40
+
41
+ def process_content(entries, url, title, content):
42
+ # Regular expression to match markdown headers
43
+ header_pattern = re.compile(r"^## (.+)$", re.MULTILINE)
44
+
45
+ # Split the content into sections based on headers
46
+ sections = re.split(header_pattern, content)
47
+ section_title = "Main" # Default section title for content before the first header
48
+
49
+ # Initial content before the first header (if any)
50
+ if not sections[0].startswith("##") and sections[0].strip():
51
+ add_content_section(entries, title, url, "Main", sections[0])
52
+
53
+ # Process each section
54
+ for i in range(1, len(sections), 2):
55
+ section_header = sections[i].strip()
56
+ section_text = (
57
+ sections[i + 1].strip().replace("\n", " ")
58
+ ) # Replace new lines in content
59
+
60
+ add_content_section(entries, title, url, section_header, section_text)
61
+
62
+
63
+ def add_content_section(entries, title, url, section_title, section_text):
64
+ full_section = f"{section_title}: {section_text}".replace(
65
+ "\n", " "
66
+ ) # Replace new lines in content
67
+ for j in range(0, len(full_section), 6000):
68
+ entries.append(
69
+ {
70
+ "title": title,
71
+ "url": url,
72
+ "source": "langchain",
73
+ "content": full_section[j : j + 6000],
74
+ }
75
+ )
76
+
77
+
78
+ markdown_file_path = "data/langchain_scrape.md"
79
+ df = parse_markdown_file(markdown_file_path)
80
+ print("Final DataFrame:")
81
+ print(df.head()) # Print the first few rows for verification
82
+ df.to_csv("data/langchain.csv", index=False)
data/tmp.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import time
3
+ import os
4
+ from deeplake.core.vectorstore import VectorStore
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ import logging
7
+
8
+ from buster.documents_manager import DeepLakeDocumentsManager
9
+ from buster.llm_utils import get_openai_embedding_constructor
10
+
11
+ # Set the logging level of `httpx` to WARNING or higher to suppress annoying INFO logs
12
+ logging.getLogger("httpx").setLevel(logging.WARNING)
13
+
14
+ openai_embedding_fn = get_openai_embedding_constructor(
15
+ client_kwargs={"max_retries": 10}
16
+ )
17
+
18
+ # from openai import OpenAI
19
+
20
+ DEEPLAKE_DATASET = os.getenv("DEEPLAKE_DATASET", "ai-tutor-dataset")
21
+ DEEPLAKE_ORG = os.getenv("DEEPLAKE_ORG", "towards_ai")
22
+
23
+ df1 = pd.read_csv("./data/langchain.csv") # or 'latin1' or 'cp1252'
24
+ df2 = pd.read_csv("./data/hf_transformers.csv")
25
+ df3 = pd.read_csv("./data/langchain_course.csv")
26
+ df4 = pd.read_csv("./data/filtered_tai_v2.csv")
27
+ df5 = pd.read_csv("./data/wiki.csv") # , encoding="ISO-8859-1")
28
+ df6 = pd.read_csv("./data/openai.csv")
29
+ df7 = pd.read_csv("./data/activeloop.csv")
30
+ df8 = pd.read_csv("./data/llm_course.csv")
31
+
32
+ print(
33
+ f"Number of samples: {len(df1)},{len(df2)},{len(df3)},{len(df4)},{len(df5)},{len(df6)},{len(df7)},{len(df8)}"
34
+ )
35
+
36
+ dataset_path = f"hub://{DEEPLAKE_ORG}/{DEEPLAKE_DATASET}"
37
+
38
+ dm = DeepLakeDocumentsManager(
39
+ vector_store_path=dataset_path,
40
+ overwrite=True,
41
+ )
42
+
43
+ dm.batch_add(
44
+ df=df1,
45
+ batch_size=3000,
46
+ min_time_interval=60,
47
+ num_workers=32,
48
+ embedding_fn=openai_embedding_fn,
49
+ csv_filename="embeddings.csv",
50
+ csv_overwrite=False,
51
+ )
52
+
53
+ dm.batch_add(
54
+ df=df2,
55
+ batch_size=3000,
56
+ min_time_interval=60,
57
+ num_workers=32,
58
+ embedding_fn=openai_embedding_fn,
59
+ csv_filename="embeddings.csv",
60
+ csv_overwrite=False,
61
+ )
62
+
63
+ dm.batch_add(
64
+ df=df3,
65
+ batch_size=3000,
66
+ min_time_interval=60,
67
+ num_workers=32,
68
+ embedding_fn=openai_embedding_fn,
69
+ csv_filename="embeddings.csv",
70
+ csv_overwrite=False,
71
+ )
72
+
73
+ dm.batch_add(
74
+ df=df4,
75
+ batch_size=3000,
76
+ min_time_interval=60,
77
+ num_workers=32,
78
+ embedding_fn=openai_embedding_fn,
79
+ csv_filename="embeddings.csv",
80
+ csv_overwrite=False,
81
+ )
82
+
83
+ dm.batch_add(
84
+ df=df5,
85
+ batch_size=3000,
86
+ min_time_interval=60,
87
+ num_workers=32,
88
+ embedding_fn=openai_embedding_fn,
89
+ csv_filename="embeddings.csv",
90
+ csv_overwrite=False,
91
+ )
92
+
93
+ dm.batch_add(
94
+ df=df6,
95
+ batch_size=3000,
96
+ min_time_interval=60,
97
+ num_workers=32,
98
+ embedding_fn=openai_embedding_fn,
99
+ csv_filename="embeddings.csv",
100
+ csv_overwrite=False,
101
+ )
102
+
103
+ dm.batch_add(
104
+ df=df7,
105
+ batch_size=3000,
106
+ min_time_interval=60,
107
+ num_workers=32,
108
+ embedding_fn=openai_embedding_fn,
109
+ csv_filename="embeddings.csv",
110
+ csv_overwrite=False,
111
+ )
112
+
113
+ dm.batch_add(
114
+ df=df8,
115
+ batch_size=3000,
116
+ min_time_interval=60,
117
+ num_workers=32,
118
+ embedding_fn=openai_embedding_fn,
119
+ csv_filename="embeddings.csv",
120
+ csv_overwrite=False,
121
+ )
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  git+https://github.com/jerpint/buster@better-fallback
2
  gradio==3.50.2
3
- deeplake==3.8.9
 
1
  git+https://github.com/jerpint/buster@better-fallback
2
  gradio==3.50.2
3
+ deeplake==3.8.9