yogjoshi14 commited on
Commit
7a2d0fe
1 Parent(s): 011d448
Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -17,7 +17,6 @@ import textract
17
  st.set_page_config(page_title="chatbot")
18
  st.title("Chat with Documents")
19
 
20
-
21
  num_of_top_selection = 3
22
  CHUNK_SIZE = 500
23
  CHUNK_OVERLAP = 50
@@ -63,8 +62,7 @@ def get_text_from_docx(docx):
63
  return text
64
 
65
  def get_text_from_text_file(text_file):
66
- with open(text_file, 'r', encoding='utf-8') as file:
67
- text = file.read()
68
  return text
69
 
70
  def get_text_from_other_file(file_path):
@@ -83,7 +81,7 @@ def load_documents(docs):
83
  elif doc.name.lower().endswith('.docx'):
84
  text += get_text_from_docx(doc)
85
  elif doc.name.lower().endswith(('.txt', '.md')):
86
- text += get_text_from_text_file(doc)
87
  else:
88
  # Handle other file types, you can extend this as needed
89
  text += get_text_from_other_file(doc)
@@ -128,7 +126,7 @@ def input_fields():
128
  # st.text_input("Pinecone environment")
129
  st.session_state.pinecone_index = index_name
130
  # st.text_input("Pinecone index name")
131
- st.session_state.source_docs = st.file_uploader(label="Upload Documents", type="pdf", accept_multiple_files=True)
132
  #
133
 
134
 
@@ -137,7 +135,8 @@ def process_documents():
137
  if not st.session_state.pinecone_api_key or not st.session_state.pinecone_env or not st.session_state.pinecone_index or not st.session_state.source_docs:
138
  st.warning(f"Please upload the documents and provide the missing fields.")
139
  else:
140
- try:
 
141
  # for source_doc in st.session_state.source_docs:
142
  if st.session_state.source_docs:
143
  #
@@ -149,8 +148,8 @@ def process_documents():
149
  texts = split_documents(documents)
150
  #
151
  st.session_state.retriever = embeddings_on_pinecone(texts)
152
- except Exception as e:
153
- st.error(f"An error occurred: {e}")
154
 
155
  def boot():
156
  #
 
17
  st.set_page_config(page_title="chatbot")
18
  st.title("Chat with Documents")
19
 
 
20
  num_of_top_selection = 3
21
  CHUNK_SIZE = 500
22
  CHUNK_OVERLAP = 50
 
62
  return text
63
 
64
  def get_text_from_text_file(text_file):
65
+ text = text_file.read()
 
66
  return text
67
 
68
  def get_text_from_other_file(file_path):
 
81
  elif doc.name.lower().endswith('.docx'):
82
  text += get_text_from_docx(doc)
83
  elif doc.name.lower().endswith(('.txt', '.md')):
84
+ text += str(get_text_from_text_file(doc))
85
  else:
86
  # Handle other file types, you can extend this as needed
87
  text += get_text_from_other_file(doc)
 
126
  # st.text_input("Pinecone environment")
127
  st.session_state.pinecone_index = index_name
128
  # st.text_input("Pinecone index name")
129
+ st.session_state.source_docs = st.file_uploader(label="Upload Documents", accept_multiple_files=True)
130
  #
131
 
132
 
 
135
  if not st.session_state.pinecone_api_key or not st.session_state.pinecone_env or not st.session_state.pinecone_index or not st.session_state.source_docs:
136
  st.warning(f"Please upload the documents and provide the missing fields.")
137
  else:
138
+ # try:
139
+ if True:
140
  # for source_doc in st.session_state.source_docs:
141
  if st.session_state.source_docs:
142
  #
 
148
  texts = split_documents(documents)
149
  #
150
  st.session_state.retriever = embeddings_on_pinecone(texts)
151
+ # except Exception as e:
152
+ # st.error(f"An error occurred: {e}")
153
 
154
  def boot():
155
  #