ramortegui commited on
Commit
59277db
1 Parent(s): 82468e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -0
app.py CHANGED
@@ -8,15 +8,24 @@ from transformers import AutoTokenizer
8
 
9
  bshtml_dir_loader = DirectoryLoader('./data/', loader_cls=BSHTMLLoader)
10
  data = bshtml_dir_loader.load()
 
11
 
12
  bloomz_tokenizer = AutoTokenizer.from_pretrained("bigscience/bloomz-1b7")
13
 
 
 
14
  text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(bloomz_tokenizer,
15
  chunk_size=100,
16
  chunk_overlap=0,
17
  separator="\n")
 
 
 
 
18
  documents = text_splitter.split_documents(data)
19
 
 
 
20
  embeddings = HuggingFaceEmbeddings()
21
 
22
  llm = HuggingFacePipeline.from_model_id(
@@ -24,12 +33,24 @@ llm = HuggingFacePipeline.from_model_id(
24
  task="text-generation",
25
  model_kwargs={"temperature" : 0, "max_length" : 500})
26
 
 
 
27
  vectordb = Chroma.from_documents(documents=documents, embedding=embeddings)
 
 
 
28
  doc_retriever = vectordb.as_retriever()
 
 
 
 
29
  shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever)
30
 
 
 
31
  def query(query):
32
  shakespeare_qa.run(query)
 
33
 
34
  iface = gr.Interface(fn=query, inputs="text", outputs="text")
35
  iface.launch()
 
8
 
9
  bshtml_dir_loader = DirectoryLoader('./data/', loader_cls=BSHTMLLoader)
10
  data = bshtml_dir_loader.load()
11
+ print("loading documents")
12
 
13
  bloomz_tokenizer = AutoTokenizer.from_pretrained("bigscience/bloomz-1b7")
14
 
15
+ print("add tokenizer")
16
+
17
  text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(bloomz_tokenizer,
18
  chunk_size=100,
19
  chunk_overlap=0,
20
  separator="\n")
21
+
22
+
23
+ print("Add text spliters")
24
+
25
  documents = text_splitter.split_documents(data)
26
 
27
+ print("Getting HF embeddings")
28
+
29
  embeddings = HuggingFaceEmbeddings()
30
 
31
  llm = HuggingFacePipeline.from_model_id(
 
33
  task="text-generation",
34
  model_kwargs={"temperature" : 0, "max_length" : 500})
35
 
36
+ print("Adding LLM hugginFacePipeline with bigscience bloomz")
37
+
38
  vectordb = Chroma.from_documents(documents=documents, embedding=embeddings)
39
+
40
+ print("Getting vectors")
41
+
42
  doc_retriever = vectordb.as_retriever()
43
+
44
+ print("Creating Retreiver")
45
+
46
+
47
  shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever)
48
 
49
+ print("Add shakespeare qa")
50
+
51
  def query(query):
52
  shakespeare_qa.run(query)
53
+
54
 
55
  iface = gr.Interface(fn=query, inputs="text", outputs="text")
56
  iface.launch()