Rahul Bhoyar commited on
Commit
69f43f5
1 Parent(s): 6a89483

Initial commit

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +73 -0
  3. requirements.txt +15 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv/
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from PyPDF2 import PdfReader
4
+ import configparser
5
+ from typing_extensions import Concatenate
6
+ from PyPDF2 import PdfReader
7
+ from langchain.embeddings.openai import OpenAIEmbeddings
8
+ from langchain.text_splitter import CharacterTextSplitter
9
+ from langchain.vectorstores import FAISS
10
+ from langchain.chains.question_answering import load_qa_chain
11
+ from langchain.llms import OpenAI
12
+
13
+ from llama_index.llms import Gemini, HuggingFaceInferenceAPI,OpenAI
14
+ from llama_index import VectorStoreIndex, download_loader
15
+ from llama_index.embeddings import HuggingFaceEmbedding
16
+ from llama_index import ServiceContext
17
+ from llama_index.schema import Document
18
+
19
+ def read_pdf(uploaded_file):
20
+ pdf_reader = PdfReader(uploaded_file)
21
+ text = ""
22
+ for page_num in range(len(pdf_reader.pages)):
23
+ text += pdf_reader.pages[page_num].extract_text()
24
+ return text
25
+
26
+
27
+ def querying(document_search, chain):
28
+ query_text = st.text_input("Enter the Query for PDF:")
29
+ submit = st.button("Generate The response for the query")
30
+
31
+ if submit:
32
+ docs = document_search.similarity_search(query_text)
33
+ output = chain.run(input_documents=docs, question=query_text)
34
+ st.write(output)
35
+
36
+ def main():
37
+ st.title("PdfQuerier using LLAMA by Rahul Bhoyar")
38
+ hf_token = st.text_input("Enter your Hugging Face token:")
39
+
40
+ llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
41
+ st.markdown("Query your pdf file data with using this chatbot")
42
+ uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
43
+
44
+ embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
45
+ service_context = ServiceContext.from_defaults(llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae)
46
+
47
+ if uploaded_file is not None:
48
+ file_contents = read_pdf(uploaded_file)
49
+ documents = Document(text=file_contents)
50
+
51
+ st.success("Documents loaded successfully!")
52
+
53
+ # Create Vector Store Index
54
+ index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
55
+
56
+ # Persist Storage Context
57
+ index.storage_context.persist()
58
+
59
+ # Create Query Engine
60
+ query = st.text_input("Ask a question:")
61
+ query_engine = index.as_query_engine()
62
+ if query:
63
+ # Run Query
64
+ response = query_engine.query(query)
65
+
66
+ # Display Result
67
+ st.markdown(f"**Response:** {response}")
68
+ else:
69
+ st.warning("Please enter a valid pdf.")
70
+
71
+
72
+ if __name__ == "__main__":
73
+ main()
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ PyPDF2
4
+ faiss-cpu
5
+ tiktoken
6
+ watchdog
7
+ streamlit
8
+ fitz
9
+ llama-index
10
+ transformers[torch]
11
+ huggingface_hub[inference]
12
+ beautifulsoup4
13
+ unstructured
14
+ watchdog
15
+ transformers