fcuadra commited on
Commit
f4aaadf
1 Parent(s): 025acb6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import CharacterTextSplitter
2
+ from langchain.embeddings import HuggingFaceEmbeddings
3
+ from langchain.vectorstores import Chroma
4
+ from langchain import HuggingFacePipeline
5
+ from langchain.chains import RetrievalQA
6
+ from transformers import AutoTokenizer
7
+ from langchain.retrievers import WikipediaRetriever
8
+ import pickle
9
+ import os
10
+
11
+ retriever = WikipediaRetriever(lang="en")
12
+
13
+ data = retriever.get_relevant_documents(query="Economics")
14
+
15
+ bloomz_tokenizer = AutoTokenizer.from_pretrained('bigscience/bloomz-1b7')
16
+
17
+ text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(bloomz_tokenizer, chunk_size=100, chunk_overlap=0, separator='\n')
18
+
19
+ documents = text_splitter.split_documents(data)
20
+
21
+ embeddings = HuggingFaceEmbeddings()
22
+
23
+ persist_directory = "vector_db"
24
+
25
+ vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory)
26
+
27
+ vectordb.persist()
28
+ vectordb = None
29
+
30
+ vectordb_persist = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
31
+
32
+ llm = HuggingFacePipeline.from_model_id(
33
+ model_id="bigscience/bloomz-1b7",
34
+ task="text-generation",
35
+ model_kwargs={"temperature" : 0, "max_length" : 500})
36
+
37
+ doc_retriever = vectordb_persist.as_retriever()
38
+
39
+ wikipedia_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever)
40
+
41
+ def make_inference(query):
42
+ inference = wikipedia_qa.run(query)
43
+ return inference
44
+
45
+ if __name__ == "__main__":
46
+ # make a gradio interface
47
+ import gradio as gr
48
+
49
+ gr.Interface(
50
+ make_inference,
51
+ gr.inputs.Textbox(lines=2, label="Query"),
52
+ gr.outputs.Textbox(label="Response"),
53
+ title="Ask_Wikipedia about Economics",
54
+ description="️Building a QA application to Wikipedia",
55
+ ).launch()