elia-waefler commited on
Commit
6b63bdc
1 Parent(s): aac6922

add collumns

Browse files
Files changed (1) hide show
  1. app_V2.py +48 -46
app_V2.py CHANGED
@@ -1,5 +1,4 @@
1
  import tempfile
2
- import time
3
  import streamlit as st
4
  from PyPDF2 import PdfReader
5
  from langchain.text_splitter import CharacterTextSplitter
@@ -11,9 +10,9 @@ from langchain.chains import ConversationalRetrievalChain
11
  import os
12
  import pickle
13
  from datetime import datetime
14
- from backend.generate_metadata import generate_metadata, ingest, MODEL_NAME
15
-
16
 
 
17
  css = '''
18
  <style>
19
  .chat-message {
@@ -58,46 +57,6 @@ user_template = '''
58
  </div>
59
  '''
60
 
61
- def main():
62
-
63
- st.set_page_config(page_title="Doc Verify RAG", page_icon=":mag:")
64
- st.write('Anomaly detection for document metadata', unsafe_allow_html=True)
65
- st.header("Doc Verify RAG :mag:")
66
-
67
- if "openai_api_key" not in st.session_state:
68
- st.session_state.openai_api_key = False
69
- if "openai_org" not in st.session_state:
70
- st.session_state.openai_org = False
71
- if "classify" not in st.session_state:
72
- st.session_state.classify = False
73
-
74
- col1, col2 = st.columns(2)
75
- with col1:
76
- uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf", "txt"])
77
-
78
- if uploaded_file is not None:
79
- try:
80
- with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp:
81
- tmp.write(uploaded_file.read())
82
- file_path = tmp.name
83
- st.write(f'Created temporary file {file_path}')
84
-
85
- docs = ingest(file_path)
86
- st.write('## Querying Together.ai API')
87
- metadata = generate_metadata(docs)
88
- st.write(f'## Metadata Generated by {MODEL_NAME}')
89
- st.write(metadata)
90
-
91
- # Clean up the temporary file
92
- os.remove(file_path)
93
-
94
- except Exception as e:
95
- st.error(f'Error: {e}')
96
- with col2:
97
- if st.button("Abbruch MFH Holzweg 13"):
98
- st.session_state.user_space = "deconstruction"
99
-
100
-
101
 
102
  def get_pdf_text(pdf_docs):
103
  text = ""
@@ -166,13 +125,11 @@ def safe_vec_store():
166
  pickle.dump(vector_store, f)
167
 
168
 
 
169
  def main():
170
 
171
 
172
 
173
- def set_pw():
174
- st.session_state.openai_api_key = True
175
-
176
  st.subheader("Your documents")
177
 
178
  if st.session_state.classify:
@@ -239,6 +196,51 @@ def main():
239
 
240
  if st.button("Load Embeddings"):
241
  st.warning("this function is not in use, just upload the vectorstore")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
 
244
  if __name__ == '__main__':
 
1
  import tempfile
 
2
  import streamlit as st
3
  from PyPDF2 import PdfReader
4
  from langchain.text_splitter import CharacterTextSplitter
 
10
  import os
11
  import pickle
12
  from datetime import datetime
13
+ from backend.generate_metadata import generate_metadata, ingest
 
14
 
15
+ MODEL_NAME = "mixtral"
16
  css = '''
17
  <style>
18
  .chat-message {
 
57
  </div>
58
  '''
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  def get_pdf_text(pdf_docs):
62
  text = ""
 
125
  pickle.dump(vector_store, f)
126
 
127
 
128
+ """
129
  def main():
130
 
131
 
132
 
 
 
 
133
  st.subheader("Your documents")
134
 
135
  if st.session_state.classify:
 
196
 
197
  if st.button("Load Embeddings"):
198
  st.warning("this function is not in use, just upload the vectorstore")
199
+ """
200
+
201
+
202
+ def main():
203
+
204
+ st.set_page_config(page_title="Doc Verify RAG", page_icon=":mag:")
205
+ st.write('Anomaly detection for document metadata', unsafe_allow_html=True)
206
+ st.header("Doc Verify RAG :mag:")
207
+
208
+ def set_pw():
209
+ st.session_state.openai_api_key = True
210
+
211
+ if "openai_api_key" not in st.session_state:
212
+ st.session_state.openai_api_key = False
213
+ if "openai_org" not in st.session_state:
214
+ st.session_state.openai_org = False
215
+ if "classify" not in st.session_state:
216
+ st.session_state.classify = False
217
+
218
+ col1, col2 = st.columns(2)
219
+ with col1:
220
+ uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf", "txt"])
221
+
222
+ if uploaded_file is not None:
223
+ try:
224
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp:
225
+ tmp.write(uploaded_file.read())
226
+ file_path = tmp.name
227
+ st.write(f'Created temporary file {file_path}')
228
+
229
+ docs = ingest(file_path)
230
+ st.write('## Querying Together.ai API')
231
+ metadata = generate_metadata(docs)
232
+ st.write(f'## Metadata Generated by {MODEL_NAME}')
233
+ st.write(metadata)
234
+
235
+ # Clean up the temporary file
236
+ os.remove(file_path)
237
+
238
+ except Exception as e:
239
+ st.error(f'Error: {e}')
240
+ with col2:
241
+ OPENAI_API_KEY = st.text_input("OPENAI API KEY:", type="password",
242
+ disabled=st.session_state.openai_api_key, on_change=set_pw)
243
+ classification = st.file_uploader("upload the metadata", type=["csv", "txt"])
244
 
245
 
246
  if __name__ == '__main__':