nirmalaag commited on
Commit
85038b5
·
verified ·
1 Parent(s): 3e9b9fa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ from dotenv import load_dotenv
4
+ from streamlit_extras.add_vertical_space import add_vertical_space
5
+ from PyPDF2 import PdfReader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
8
+ from langchain.vectorstores import chroma
9
+ from langchain.chains.retrieval_qa.base import RetrievalQA
10
+ from langchain.chains.question_answering import load_qa_chain
11
+ from langchain_community.llms import huggingface_hub
12
+ from langchain.document_loaders.pdf import PyMuPDFLoader
13
+ #from transformers import AutoTokenizer, AutoModelForCausalLM
14
+ from ctransformers import AutoModelForCausalLM
15
+ import torch
16
+
17
+ #from langchain.llms import huggingface_endpoint
18
+ import os
19
+ import fitz
20
+ import tempfile
21
+
22
+ img = Image.open('image/nexio_logo1.png')
23
+ st.set_page_config(page_title="PDF Chatbot App",page_icon=img,layout="centered")
24
+
25
+ with st.sidebar:
26
+ st.title('🤖 AI PDF Chatbot 💬')
27
+ st.markdown('''
28
+ ## About
29
+ This app is an AI chatbot for the PDF files
30
+ ''')
31
+ add_vertical_space(12)
32
+ st.write('Powered by ')
33
+ st.image(image='image/nexio_logo2.png',width=150)
34
+
35
+ # load huggingface API key .env file
36
+ load_dotenv()
37
+
38
+ def main():
39
+ st.header("Chat with PDF 💬")
40
+
41
+ # upload pdf file
42
+ pdf = st.file_uploader("Upload your PDF file",type='pdf')
43
+
44
+ if pdf is not None:
45
+ pdf_reader = PdfReader(pdf)
46
+
47
+ text = ""
48
+ for page in pdf_reader.pages:
49
+ text += page.extract_text()
50
+
51
+ text_splitter = RecursiveCharacterTextSplitter(
52
+ chunk_size=1000,
53
+ chunk_overlap=200,
54
+ length_function=len
55
+ )
56
+ chunks = text_splitter.split_text(text=text)
57
+ #chunks = text_splitter.create_documents(text)
58
+
59
+ # embeddings
60
+ embeddings = HuggingFaceEmbeddings()
61
+ vector_store = chroma.Chroma.from_texts(chunks,embeddings)
62
+
63
+ # Accept user question
64
+ query = st.text_input("Ask questions about your PDF file:")
65
+
66
+ if query:
67
+ torch.cuda.empty_cache()
68
+ PATH = 'model/'
69
+ #llm = AutoModelForCausalLM.from_pretrained("CohereForAI/aya-101")
70
+ # llm = AutoModelForCausalLM.from_pretrained(PATH,local_files_only=True)
71
+ llm = huggingface_hub.HuggingFaceHub(repo_id="CohereForAI/aya-101",
72
+ model_kwargs={"temperature":1.0, "max_length":100})
73
+ docs = vector_store.similarity_search(query=query, k=1)
74
+ global chain
75
+ chain = load_qa_chain(llm=llm, chain_type="stuff")
76
+ response = chain.run(input_documents=docs, question=query)
77
+ # retriever=vector_store.as_retriever()
78
+ # st.write(retriever)
79
+ #chain = RetrievalQA.from_chain_type(llm=llm,chain_type="stuff",retriever=retriever)
80
+ #response = chain.run(chain)
81
+ st.write(response)
82
+
83
+
84
+
85
+ if __name__ == '__main__':
86
+ main()