Huzaifa367 commited on
Commit
237f3b8
·
verified ·
1 Parent(s): fc1df46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -47
app.py CHANGED
@@ -9,8 +9,6 @@ from langchain.prompts import PromptTemplate
9
  import tempfile
10
  from gtts import gTTS
11
  import os
12
- import docx
13
- from pptx import Presentation
14
 
15
  def text_to_speech(text):
16
  tts = gTTS(text=text, lang='en')
@@ -20,44 +18,19 @@ def text_to_speech(text):
20
  st.audio(temp_filename, format='audio/mp3')
21
  os.remove(temp_filename)
22
 
23
- def read_text_from_pdf(pdf_file):
24
- pdf_reader = PdfReader(pdf_file)
25
- text = ""
26
- for page in pdf_reader.pages:
27
- text += page.extract_text()
28
- return text
29
-
30
- def read_text_from_docx(docx_file):
31
- doc = docx.Document(docx_file)
32
- text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
33
- return text
34
-
35
- def read_text_from_pptx(pptx_file):
36
- presentation = Presentation(pptx_file)
37
- text = ""
38
- for slide in presentation.slides:
39
- for shape in slide.shapes:
40
- if hasattr(shape, "text"):
41
- text += shape.text + "\n"
42
- return text
43
-
44
- def get_text_from_file(file):
45
- content = ""
46
- if file.type == "application/pdf":
47
- content = read_text_from_pdf(file)
48
- elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
49
- content = read_text_from_docx(file)
50
- elif file.type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
51
- content = read_text_from_pptx(file)
52
- elif file.type == "text/plain":
53
- content = file.getvalue().decode("utf-8")
54
- return content
55
 
56
  def get_text_chunks(text):
57
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
58
  chunks = text_splitter.split_text(text)
59
  return chunks
60
-
61
  def get_vector_store(text_chunks, api_key):
62
  embeddings = HuggingFaceInferenceAPIEmbeddings(api_key=api_key, model_name="sentence-transformers/all-MiniLM-l6-v2")
63
  vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
@@ -89,10 +62,11 @@ def user_input(user_question, api_key):
89
  chain = get_conversational_chain()
90
 
91
  response = chain(
92
- {"input_documents": docs, "question": user_question},
93
- return_only_outputs=True
94
- )
95
 
 
 
96
  st.write("Replies:")
97
  if isinstance(response["output_text"], str):
98
  response_list = [response["output_text"]]
@@ -108,30 +82,28 @@ def main():
108
 
109
  st.set_page_config(layout="centered")
110
  st.header("Chat with DOCS")
111
- st.markdown("<h1 style='font-size:24px;'>ChatBot by Muhammad Huzaifa</h1>", unsafe_allow_html=True)
112
  api_key = st.secrets["inference_api_key"]
113
 
114
  with st.sidebar:
115
  st.title("Menu:")
116
- uploaded_files = st.file_uploader("Upload your files (PDF, DOCX, PPTX, TXT)", accept_multiple_files=True)
117
  if st.button("Submit & Process"):
118
  with st.spinner("Processing..."):
119
- raw_text = ""
120
- for file in uploaded_files:
121
- file_text = get_text_from_file(file)
122
- raw_text += file_text
123
  text_chunks = get_text_chunks(raw_text)
124
  get_vector_store(text_chunks, api_key)
125
  st.success("Done")
126
 
127
  # Check if any document is uploaded
128
- if uploaded_files:
129
  user_question = st.text_input("Ask a question from the Docs")
130
 
131
  if user_question:
132
  user_input(user_question, api_key)
133
  else:
134
- st.write("Please upload a document (PDF, DOCX, PPTX, TXT) first to ask questions.")
135
 
 
136
  if __name__ == "__main__":
137
- main()
 
9
  import tempfile
10
  from gtts import gTTS
11
  import os
 
 
12
 
13
  def text_to_speech(text):
14
  tts = gTTS(text=text, lang='en')
 
18
  st.audio(temp_filename, format='audio/mp3')
19
  os.remove(temp_filename)
20
 
21
+ def get_pdf_text(pdf_docs):
22
+ text=""
23
+ for pdf in pdf_docs:
24
+ pdf_reader= PdfReader(pdf)
25
+ for page in pdf_reader.pages:
26
+ text+= page.extract_text()
27
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def get_text_chunks(text):
30
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
31
  chunks = text_splitter.split_text(text)
32
  return chunks
33
+
34
  def get_vector_store(text_chunks, api_key):
35
  embeddings = HuggingFaceInferenceAPIEmbeddings(api_key=api_key, model_name="sentence-transformers/all-MiniLM-l6-v2")
36
  vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
 
62
  chain = get_conversational_chain()
63
 
64
  response = chain(
65
+ {"input_documents":docs, "question": user_question}
66
+ , return_only_outputs=True)
 
67
 
68
+ print(response) # Debugging line
69
+
70
  st.write("Replies:")
71
  if isinstance(response["output_text"], str):
72
  response_list = [response["output_text"]]
 
82
 
83
  st.set_page_config(layout="centered")
84
  st.header("Chat with DOCS")
85
+ st.markdown("<h1 style='font-size:20px;'>ChatBot by Muhammad Huzaifa</h1>", unsafe_allow_html=True)
86
  api_key = st.secrets["inference_api_key"]
87
 
88
  with st.sidebar:
89
  st.title("Menu:")
90
+ pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
91
  if st.button("Submit & Process"):
92
  with st.spinner("Processing..."):
93
+ raw_text = get_pdf_text(pdf_docs)
 
 
 
94
  text_chunks = get_text_chunks(raw_text)
95
  get_vector_store(text_chunks, api_key)
96
  st.success("Done")
97
 
98
  # Check if any document is uploaded
99
+ if pdf_docs:
100
  user_question = st.text_input("Ask a question from the Docs")
101
 
102
  if user_question:
103
  user_input(user_question, api_key)
104
  else:
105
+ st.write("Please upload a document first to ask questions.")
106
 
107
+
108
  if __name__ == "__main__":
109
+ main()