kmirijan commited on
Commit
05b89be
1 Parent(s): 696b79d

Adding run file and requirements

Browse files
Files changed (2) hide show
  1. app.py +50 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain.document_loaders import UnstructuredFileLoader, DirectoryLoader
3
+ from langchain.embeddings.openai import OpenAIEmbeddings
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain.chains import RetrievalQA
7
+ import os
8
+
9
+ destination_folder = './data/'
10
+ txt_dir_loader = DirectoryLoader(destination_folder,
11
+ loader_cls=UnstructuredFileLoader)
12
+ data = txt_dir_loader.load()
13
+
14
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
15
+ chunk_overlap=20)
16
+ documents = text_splitter.split_documents(data)
17
+
18
+ embeddings = OpenAIEmbeddings()
19
+
20
+ persist_directory = "vector_db"
21
+ vectordb = Chroma.from_documents(documents=documents,
22
+ embedding=embeddings,
23
+ persist_directory=persist_directory)
24
+ vectordb.persist()
25
+ vectordb = None
26
+ vectordb = Chroma(persist_directory=persist_directory,
27
+ embedding_function=embeddings)
28
+
29
+ llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
30
+ doc_retriever = vectordb.as_retriever()
31
+ hp_qa = RetrievalQA.from_chain_type(llm=llm,
32
+ chain_type="stuff",
33
+ retriever=doc_retriever)
34
+
35
+ def answer_question(query):
36
+ return(hp_qa.run(query))
37
+
38
+ if __name__ == "main":
39
+ import gradio as gr
40
+
41
+ gr.Interface(
42
+ answer_question,
43
+ [
44
+ gr.inputs.Textbox(lines=2, label="Query"),
45
+ ],
46
+ gr.outputs.Textbox(label="Response"),
47
+ title="Ask Harry Potter",
48
+ description=""" Ask Harry Potter is a tool that let's you ask a question with
49
+ the books' text as reference""",
50
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ openai
2
+ langchain
3
+ tiktoken
4
+ chromadb
5
+
6
+ "unstructured[local-inference]"
7
+ "detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2"
8
+ layoutparser[layoutmodels,tesseract]