K00B404 commited on
Commit
c29bcdf
1 Parent(s): 2fc8513

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -12
app.py CHANGED
@@ -15,6 +15,8 @@ from pathlib import Path
15
  import chromadb
16
  from unidecode import unidecode
17
 
 
 
18
  from transformers import AutoTokenizer
19
  import transformers
20
  import torch
@@ -22,16 +24,16 @@ import tqdm
22
  import accelerate
23
  import re
24
 
25
-
26
-
27
- # default_persist_directory = './chroma_HF/'
28
- list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
29
- "google/gemma-7b-it","google/gemma-2b-it", \
30
- "HuggingFaceH4/zephyr-7b-beta", "HuggingFaceH4/zephyr-7b-gemma-v0.1", \
31
- "meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2", \
32
- "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct", \
33
- "google/flan-t5-xxl"
34
- ]
35
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
36
 
37
  # Load PDF document and create doc splits
@@ -43,7 +45,6 @@ def load_doc(list_file_path, chunk_size, chunk_overlap):
43
  pages = []
44
  for loader in loaders:
45
  pages.extend(loader.load())
46
- # text_splitter = RecursiveCharacterTextSplitter(chunk_size = 600, chunk_overlap = 50)
47
  text_splitter = RecursiveCharacterTextSplitter(
48
  chunk_size = chunk_size,
49
  chunk_overlap = chunk_overlap)
@@ -69,7 +70,7 @@ def create_db(splits, collection_name):
69
  def load_db():
70
  embedding = HuggingFaceEmbeddings()
71
  vectordb = Chroma(
72
- # persist_directory=default_persist_directory,
73
  embedding_function=embedding)
74
  return vectordb
75
 
 
15
  import chromadb
16
  from unidecode import unidecode
17
 
18
+ from babi_app import BabyAGI
19
+
20
  from transformers import AutoTokenizer
21
  import transformers
22
  import torch
 
24
  import accelerate
25
  import re
26
 
27
+ default_persist_directory = './chroma_HF/'
28
+ list_llm = ["mistralai/Mistral-7B-Instruct-v0.2",
29
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
30
+ "mistralai/Mistral-7B-Instruct-v0.1",
31
+ "google/gemma-7b-it", "google/gemma-2b-it",
32
+ "HuggingFaceH4/zephyr-7b-beta", "HuggingFaceH4/zephyr-7b-gemma-v0.1",
33
+ "meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2",
34
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct",
35
+ "tiiuae/falcon-7b-instruct", "google/flan-t5-xxl"
36
+ ]
37
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
38
 
39
  # Load PDF document and create doc splits
 
45
  pages = []
46
  for loader in loaders:
47
  pages.extend(loader.load())
 
48
  text_splitter = RecursiveCharacterTextSplitter(
49
  chunk_size = chunk_size,
50
  chunk_overlap = chunk_overlap)
 
70
  def load_db():
71
  embedding = HuggingFaceEmbeddings()
72
  vectordb = Chroma(
73
+ persist_directory=default_persist_directory,
74
  embedding_function=embedding)
75
  return vectordb
76