nehatarey commited on
Commit
86eebeb
β€’
1 Parent(s): 4f9a121

pushing app updates

Browse files
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import HuggingFacePipeline
2
+ from langchain.chains import RetrievalQA
3
+ from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.vectorstores import Chroma
7
+ from transformers import AutoTokenizer
8
+
9
+ import gradio as gr
10
+
11
+ bshtml_dir_loader = DirectoryLoader('./data/', loader_cls=BSHTMLLoader)
12
+ data = bshtml_dir_loader.load()
13
+
14
+ bloomz_tokenizer = AutoTokenizer.from_pretrained("bigscience/bloomz-1b7")
15
+
16
+
17
+ text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(bloomz_tokenizer,
18
+ chunk_size=100,
19
+ chunk_overlap=0,
20
+ separator="\n")
21
+
22
+ documents = text_splitter.split_documents(data)
23
+
24
+ embeddings = HuggingFaceEmbeddings()
25
+
26
+ llm = HuggingFacePipeline.from_model_id(
27
+ model_id="bigscience/bloomz-1b7",
28
+ task="text-generation",
29
+ model_kwargs={"temperature" : 0, "max_length" : 500})
30
+
31
+
32
+ vectordb = Chroma.from_documents(documents=documents, embedding=embeddings)
33
+
34
+ doc_retriever = vectordb.as_retriever()
35
+
36
+
37
+ tedlasso_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever)
38
+
39
+
40
+ def query(query):
41
+ return tedlasso_qa.run(query)
42
+
43
+ iface = gr.Interface(fn=query, inputs="text", outputs="text")
44
+ iface.launch()
data/Ted Lasso S1 E01 _Pilot_ _ Recap - TV Tropes.html ADDED
The diff for this file is too large to render. See raw diff
 
data/Ted Lasso S1 E02 _Biscuits_ _ Recap - TV Tropes.html ADDED
The diff for this file is too large to render. See raw diff
 
data/Ted Lasso S1 E03 _Trent Crimm_ The Independent_ _ Recap - TV Tropes.html ADDED
The diff for this file is too large to render. See raw diff
 
data/Ted Lasso S1 E04 _For the Children_ _ Recap - TV Tropes.html ADDED
The diff for this file is too large to render. See raw diff
 
data/Ted Lasso S1 E05 _Tan Lines_ _ Recap - TV Tropes.html ADDED
The diff for this file is too large to render. See raw diff
 
data/Ted Lasso S1 E06 _Two Aces_ _ Recap - TV Tropes.html ADDED
The diff for this file is too large to render. See raw diff
 
data/Ted Lasso S1 E07 _Make Rebecca Great Again_ _ Recap - TV Tropes.html ADDED
The diff for this file is too large to render. See raw diff
 
data/Ted Lasso S1 E08 _The Diamond Dogs_ _ Recap - TV Tropes.html ADDED
The diff for this file is too large to render. See raw diff
 
data/Ted Lasso S1 E09 _All Apologies_ _ Recap - TV Tropes.html ADDED
The diff for this file is too large to render. See raw diff
 
data/Ted Lasso S1 E10 _The Hope that Kills You_ _ Recap - TV Tropes.html ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ lxml
3
+ beautifulsoup4
4
+ chromadb
5
+ sentence_transformers
6
+ transformers
7
+ xformers
8
+ huggingface-hub