Aabbhishekk commited on
Commit
7eaa9e0
1 Parent(s): 2ec9cfb

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import streamlit as st
3
+ import os
4
+ from PyPDF2 import PdfReader
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.chains.question_answering import load_qa_chain
9
+ from langchain.llms import OpenAI
10
+ from langchain.callbacks import get_openai_callback
11
+ from langchain.embeddings import HuggingFaceEmbeddings
12
+ from langchain import HuggingFaceHub, LLMChain
13
+ from langchain.embeddings import HuggingFaceHubEmbeddings
14
+ token = os.environ['HF_TOKEN']
15
+ repo_id = "sentence-transformers/all-mpnet-base-v2"
16
+ hf = HuggingFaceHubEmbeddings(
17
+ repo_id=repo_id,
18
+ task="feature-extraction",
19
+ huggingfacehub_api_token= token,
20
+ )
21
+
22
+
23
+
24
+
25
+ def main():
26
+ load_dotenv()
27
+ st.set_page_config(page_title="Ask your PDF")
28
+ st.header("Ask your PDF 💬")
29
+
30
+ # upload file
31
+ pdf = st.file_uploader("Upload your PDF", type="pdf")
32
+
33
+ # extract the text
34
+ if pdf is not None:
35
+ pdf_reader = PdfReader(pdf)
36
+ text = ""
37
+ for page in pdf_reader.pages:
38
+ text += page.extract_text()
39
+
40
+ # split into chunks
41
+ text_splitter = CharacterTextSplitter(
42
+ separator="\n",
43
+ chunk_size=1000,
44
+ chunk_overlap=200,
45
+ length_function=len
46
+ )
47
+ chunks = text_splitter.split_text(text)
48
+
49
+ # create embeddings
50
+ # embeddings = OpenAIEmbeddings()
51
+ # embeddings = query(chunks)
52
+ # embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
53
+ embeddings = hf
54
+ knowledge_base = FAISS.from_texts(chunks, embeddings)
55
+
56
+ # show user input
57
+ user_question = st.text_input("Ask a question about your PDF:")
58
+ if user_question:
59
+ docs = knowledge_base.similarity_search(user_question)
60
+
61
+ # llm = OpenAI()
62
+
63
+ hub_llm = HuggingFaceHub(
64
+ repo_id='HuggingFaceH4/zephyr-7b-beta',
65
+ model_kwargs={'temperature':0.01,"max_length": 2048,},
66
+ huggingfacehub_api_token=token)
67
+ llm = hub_llm
68
+ chain = load_qa_chain(llm, chain_type="stuff")
69
+ with get_openai_callback() as cb:
70
+ response = chain.run(input_documents=docs, question=user_question)
71
+ print(cb)
72
+
73
+ st.write(response)
74
+
75
+
76
+ if __name__ == '__main__':
77
+ main()