evanperez commited on
Commit
6b18013
1 Parent(s): 9f30023

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -11,17 +11,14 @@ import os
11
  import json
12
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TextStreamer, ConversationalPipeline
13
 
14
-
15
  ####CREDIT#####
16
- #Credit to the author (Sri Laxmi) of the original code reference: SriLaxmi1993
17
- #Sri LaxmiGithub Link: https://github.com/SriLaxmi1993/Document-Genie-using-RAG-Framwork
18
- #Sri Laxmi Youtube:https://www.youtube.com/watch?v=SkY2u4UUr6M&t=112s
19
  ###############
20
  os.system("pip install -r requirements.txt")
21
 
22
- import torch
23
- from transformers import AutoModelForCausalLM, AutoTokenizer
24
-
25
 
26
  #tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")
27
  #model = AutoModelForCausalLM.from_pretrained("google/gemma-7b")
@@ -32,12 +29,13 @@ st.set_page_config(page_title="Gemini RAG", layout="wide")
32
  # This is the first API key input; no need to repeat it in the main function.
33
  api_key = 'AIzaSyCvXRggpO2yNwIpZmoMy_5Xhm2bDyD-pOo'
34
 
35
- #os.mkdir('faiss_index')
36
 
37
- #empty faise_index and chat_history.json
 
 
38
  def delete_files_in_folder(folder_path):
39
  try:
40
-
41
  chat_history_file = "chat_history.json"
42
  if os.path.exists(chat_history_file):
43
  os.remove(chat_history_file)
@@ -70,16 +68,19 @@ def get_pdf_text(pdf_docs):
70
  text += page.extract_text()
71
  return text
72
 
 
73
  def get_text_chunks(text):
74
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=CH_size, chunk_overlap=CH_overlap)
75
  chunks = text_splitter.split_text(text)
76
  return chunks
77
 
 
78
  def get_vector_store(text_chunks, api_key):
79
  embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
80
  vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
81
  vector_store.save_local("faiss_index")
82
 
 
83
  def get_conversational_chain():
84
  prompt_template = """
85
  Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
@@ -94,7 +95,8 @@ def get_conversational_chain():
94
  chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
95
  return chain
96
 
97
- #chat history functionality
 
98
  def update_chat_history(question, reply):
99
  # Check if chat history file exists
100
  chat_history_file = "chat_history.json"
@@ -146,7 +148,8 @@ def main():
146
  with st.sidebar:
147
  st.title("Menu:")
148
 
149
- pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True, key="pdf_uploader")
 
150
  if st.button("Submit & Process", key="process_button") and api_key:
151
  with st.spinner("Processing..."):
152
  raw_text = get_pdf_text(pdf_docs)
@@ -154,5 +157,6 @@ def main():
154
  get_vector_store(text_chunks, api_key)
155
  st.success("Done")
156
 
 
157
  if __name__ == "__main__":
158
  main()
 
11
  import json
12
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TextStreamer, ConversationalPipeline
13
 
 
14
  ####CREDIT#####
15
+ # Credit to author (Sri Laxmi) of original code reference: SriLaxmi1993
16
+ # Sri LaxmiGithub Link: https://github.com/SriLaxmi1993/Document-Genie-using-RAG-Framwork
17
+ # Sri Laxmi Youtube:https://www.youtube.com/watch?v=SkY2u4UUr6M&t=112s
18
  ###############
19
  os.system("pip install -r requirements.txt")
20
 
21
+ # some model
 
 
22
 
23
  #tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")
24
  #model = AutoModelForCausalLM.from_pretrained("google/gemma-7b")
 
29
  # This is the first API key input; no need to repeat it in the main function.
30
  api_key = 'AIzaSyCvXRggpO2yNwIpZmoMy_5Xhm2bDyD-pOo'
31
 
 
32
 
33
+ # os.mkdir('faiss_index')
34
+
35
+ # empty faise_index and chat_history.json
36
  def delete_files_in_folder(folder_path):
37
  try:
38
+ # Iterate over all the files in the folder
39
  chat_history_file = "chat_history.json"
40
  if os.path.exists(chat_history_file):
41
  os.remove(chat_history_file)
 
68
  text += page.extract_text()
69
  return text
70
 
71
+
72
  def get_text_chunks(text):
73
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=CH_size, chunk_overlap=CH_overlap)
74
  chunks = text_splitter.split_text(text)
75
  return chunks
76
 
77
+
78
  def get_vector_store(text_chunks, api_key):
79
  embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
80
  vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
81
  vector_store.save_local("faiss_index")
82
 
83
+
84
  def get_conversational_chain():
85
  prompt_template = """
86
  Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
 
95
  chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
96
  return chain
97
 
98
+
99
+ # chat history functionality
100
  def update_chat_history(question, reply):
101
  # Check if chat history file exists
102
  chat_history_file = "chat_history.json"
 
148
  with st.sidebar:
149
  st.title("Menu:")
150
 
151
+ pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button",
152
+ accept_multiple_files=True, key="pdf_uploader")
153
  if st.button("Submit & Process", key="process_button") and api_key:
154
  with st.spinner("Processing..."):
155
  raw_text = get_pdf_text(pdf_docs)
 
157
  get_vector_store(text_chunks, api_key)
158
  st.success("Done")
159
 
160
+
161
  if __name__ == "__main__":
162
  main()