Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,8 @@ from pathlib import Path
|
|
15 |
import chromadb
|
16 |
from unidecode import unidecode
|
17 |
|
|
|
|
|
18 |
from transformers import AutoTokenizer
|
19 |
import transformers
|
20 |
import torch
|
@@ -22,16 +24,16 @@ import tqdm
|
|
22 |
import accelerate
|
23 |
import re
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
]
|
35 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
36 |
|
37 |
# Load PDF document and create doc splits
|
@@ -43,7 +45,6 @@ def load_doc(list_file_path, chunk_size, chunk_overlap):
|
|
43 |
pages = []
|
44 |
for loader in loaders:
|
45 |
pages.extend(loader.load())
|
46 |
-
# text_splitter = RecursiveCharacterTextSplitter(chunk_size = 600, chunk_overlap = 50)
|
47 |
text_splitter = RecursiveCharacterTextSplitter(
|
48 |
chunk_size = chunk_size,
|
49 |
chunk_overlap = chunk_overlap)
|
@@ -69,7 +70,7 @@ def create_db(splits, collection_name):
|
|
69 |
def load_db():
|
70 |
embedding = HuggingFaceEmbeddings()
|
71 |
vectordb = Chroma(
|
72 |
-
|
73 |
embedding_function=embedding)
|
74 |
return vectordb
|
75 |
|
|
|
15 |
import chromadb
|
16 |
from unidecode import unidecode
|
17 |
|
18 |
+
from babi_app import BabyAGI
|
19 |
+
|
20 |
from transformers import AutoTokenizer
|
21 |
import transformers
|
22 |
import torch
|
|
|
24 |
import accelerate
|
25 |
import re
|
26 |
|
27 |
+
default_persist_directory = './chroma_HF/'
|
28 |
+
list_llm = ["mistralai/Mistral-7B-Instruct-v0.2",
|
29 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
30 |
+
"mistralai/Mistral-7B-Instruct-v0.1",
|
31 |
+
"google/gemma-7b-it", "google/gemma-2b-it",
|
32 |
+
"HuggingFaceH4/zephyr-7b-beta", "HuggingFaceH4/zephyr-7b-gemma-v0.1",
|
33 |
+
"meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2",
|
34 |
+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct",
|
35 |
+
"tiiuae/falcon-7b-instruct", "google/flan-t5-xxl"
|
36 |
+
]
|
37 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
38 |
|
39 |
# Load PDF document and create doc splits
|
|
|
45 |
pages = []
|
46 |
for loader in loaders:
|
47 |
pages.extend(loader.load())
|
|
|
48 |
text_splitter = RecursiveCharacterTextSplitter(
|
49 |
chunk_size = chunk_size,
|
50 |
chunk_overlap = chunk_overlap)
|
|
|
70 |
def load_db():
|
71 |
embedding = HuggingFaceEmbeddings()
|
72 |
vectordb = Chroma(
|
73 |
+
persist_directory=default_persist_directory,
|
74 |
embedding_function=embedding)
|
75 |
return vectordb
|
76 |
|