ryanrwatkins commited on
Commit
6a04a92
1 Parent(s): 73a141a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -4
app.py CHANGED
@@ -17,11 +17,17 @@ from langchain.document_loaders import PyPDFLoader
17
  # Use Chroma in Colab to create vector embeddings, I then saved them to HuggingFace so now I have to set it use them here.
18
  #from chromadb.config import Settings
19
  #client = chromadb.Client(Settings(
20
- # chroma_db_impl="duckdb+parquet",
21
  # persist_directory="./embeddings" # Optional, defaults to .chromadb/ in the current directory
22
  #))
23
 
24
 
 
 
 
 
 
 
25
  def get_empty_state():
26
  return {"total_tokens": 0, "messages": []}
27
 
@@ -60,6 +66,27 @@ def on_prompt_template_change(prompt_template):
60
  def submit_message(prompt, prompt_template, temperature, max_tokens, context_length, state):
61
 
62
  openai.api_key = os.environ['openai_key']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  history = state['messages']
64
 
65
  if not prompt:
@@ -79,11 +106,9 @@ def submit_message(prompt, prompt_template, temperature, max_tokens, context_len
79
 
80
  # completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=system_prompt + history[-context_length*2:] + [prompt_msg], temperature=temperature, max_tokens=max_tokens)
81
 
82
- persist_directory = "./embeddings"
83
- vectordb = Chroma.from_documents(romeoandjuliet_doc, embeddings, persist_directory=persist_directory)
84
  completion = ChatVectorDBChain.from_llm(OpenAI(temperature=temperature, max_tokens=max_tokens, model_name="gpt-3.5-turbo"), vectordb, return_source_documents=True)
85
  result = completion({"question": system_prompt + history[-context_length*2:] + [prompt_msg]})
86
- # from https://blog.devgenius.io/chat-with-document-s-using-openai-chatgpt-api-and-text-embedding-6a0ce3dc8bc8
87
 
88
  history.append(prompt_msg)
89
  history.append(completion.choices[0].message.to_dict())
 
17
  # Use Chroma in Colab to create vector embeddings, I then saved them to HuggingFace so now I have to set it use them here.
18
  #from chromadb.config import Settings
19
  #client = chromadb.Client(Settings(
20
+ ## chroma_db_impl="duckdb+parquet",
21
  # persist_directory="./embeddings" # Optional, defaults to .chromadb/ in the current directory
22
  #))
23
 
24
 
25
+
26
+
27
+
28
+
29
+
30
+
31
  def get_empty_state():
32
  return {"total_tokens": 0, "messages": []}
33
 
 
66
  def submit_message(prompt, prompt_template, temperature, max_tokens, context_length, state):
67
 
68
  openai.api_key = os.environ['openai_key']
69
+
70
+
71
+ # load in all the files
72
+ path = './files'
73
+ #pdf_files = glob.glob(os.path.join(path, "*.pdf"))
74
+ pdf_files = glob.glob(os.path.join(path, "*.pdf"))
75
+
76
+ for file in pdf_files:
77
+ loader = PyPDFLoader(file)
78
+ pages = loader.load_and_split()
79
+ text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=0)
80
+ split_pages = text_splitter.split_documents(pages)
81
+
82
+ persist_directory = "./embeddings"
83
+ embeddings = OpenAIEmbeddings()
84
+ vectordb = Chroma.from_documents(split_pages, embeddings, persist_directory=persist_directory)
85
+ vectordb.persist()
86
+
87
+
88
+
89
+
90
  history = state['messages']
91
 
92
  if not prompt:
 
106
 
107
  # completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=system_prompt + history[-context_length*2:] + [prompt_msg], temperature=temperature, max_tokens=max_tokens)
108
 
 
 
109
  completion = ChatVectorDBChain.from_llm(OpenAI(temperature=temperature, max_tokens=max_tokens, model_name="gpt-3.5-turbo"), vectordb, return_source_documents=True)
110
  result = completion({"question": system_prompt + history[-context_length*2:] + [prompt_msg]})
111
+ # from https://blog.devgenius.io/chat-with-document-s-using-openai-chatgpt-api-and-text-embedding-6a0ce3dc8bc8
112
 
113
  history.append(prompt_msg)
114
  history.append(completion.choices[0].message.to_dict())