arifdroid commited on
Commit
7cdaa96
1 Parent(s): 301a974

initial commit

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain import PromptTemplate
3
+ from langchain.llms import HuggingFaceHub
4
+ from langchain.chains import RetrievalQA
5
+ from langchain.embeddings import SentenceTransformerEmbeddings
6
+ from qdrant_client import QdrantClient
7
+ from langchain.vectorstores import Qdrant
8
+ from huggingface_hub import login
9
+ from langchain.document_loaders import PyPDFLoader
10
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ from langchain.vectorstores import Chroma
12
+ import os
13
+
14
+ # Set up Streamlit UI
15
+ st.title("HuggingFace QA with Langchain and Qdrant")
16
+ st.write("This app leverages a Language Model to provide answers to your questions using retrieved context.")
17
+
18
+ # Load HuggingFace token from environment variable for HuggingFace Space
19
+ huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
20
+
21
+ # Log in to HuggingFace Hub
22
+ if huggingface_token:
23
+ login(token=huggingface_token)
24
+ else:
25
+ st.error("HuggingFace token not found. Please set the HUGGINGFACE_TOKEN environment variable.")
26
+
27
+ # HuggingFace Inference API Configuration
28
+ config = {
29
+ 'max_new_tokens': 1024,
30
+ 'temperature': 0.1,
31
+ 'top_k': 50,
32
+ 'top_p': 0.9
33
+ }
34
+
35
+ # Use HuggingFaceHub for LLM
36
+ llm = HuggingFaceHub(repo_id="stanford-crfm/BioMedLM", model_kwargs=config, huggingfacehub_api_token=huggingface_token)
37
+
38
+ st.write("LLM Initialized....")
39
+
40
+ prompt_template = """Use the following pieces of information to answer the user's question.
41
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
42
+
43
+ Context: {context}
44
+ Question: {question}
45
+
46
+ Only return the helpful answer below and nothing else.
47
+ Helpful answer:
48
+ """
49
+
50
+ embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
51
+
52
+ # PDF Loader and Document Processing
53
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
54
+ if uploaded_file is not None:
55
+ loader = PyPDFLoader(uploaded_file)
56
+ documents = loader.load()
57
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
58
+ docs = text_splitter.split_documents(documents)
59
+
60
+ # Create Chroma Vector Store from PDF
61
+ db = Chroma.from_documents(docs, embeddings)
62
+ retriever = db.as_retriever(search_kwargs={"k": 1})
63
+ else:
64
+ # Use Qdrant if no PDF is uploaded
65
+ url = "http://localhost:6333"
66
+ client = QdrantClient(
67
+ url=url, prefer_grpc=False
68
+ )
69
+ db = Qdrant(client=client, embeddings=embeddings, collection_name="vector_db")
70
+ retriever = db.as_retriever(search_kwargs={"k": 1})
71
+
72
+ prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
73
+
74
+ # Streamlit Form to get user input
75
+ with st.form(key='query_form'):
76
+ query = st.text_input("Enter your question here:")
77
+ submit_button = st.form_submit_button(label='Get Answer')
78
+
79
+ # Handle form submission
80
+ if submit_button and query:
81
+ chain_type_kwargs = {"prompt": prompt}
82
+ qa = RetrievalQA.from_chain_type(
83
+ llm=llm,
84
+ chain_type="stuff",
85
+ retriever=retriever,
86
+ return_source_documents=True,
87
+ chain_type_kwargs=chain_type_kwargs,
88
+ verbose=True
89
+ )
90
+ response = qa(query)
91
+ answer = response['result']
92
+ source_document = response['source_documents'][0].page_content
93
+ doc = response['source_documents'][0].metadata.get('source', 'Uploaded PDF')
94
+
95
+ # Display the results
96
+ st.write("## Answer:")
97
+ st.write(answer)
98
+ st.write("## Source Document:")
99
+ st.write(source_document)
100
+ st.write("## Document Source:")
101
+ st.write(doc)