awacke1 commited on
Commit
bf6cbe6
1 Parent(s): 348291b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -41
app.py CHANGED
@@ -11,7 +11,7 @@ import requests
11
  import time
12
  import re
13
  import textract
14
- import zipfile
15
 
16
 
17
  from datetime import datetime
@@ -20,7 +20,6 @@ from xml.etree import ElementTree as ET
20
  from bs4 import BeautifulSoup
21
  from collections import deque
22
  from audio_recorder_streamlit import audio_recorder
23
-
24
  from dotenv import load_dotenv
25
  from PyPDF2 import PdfReader
26
  from langchain.text_splitter import CharacterTextSplitter
@@ -31,7 +30,9 @@ from langchain.memory import ConversationBufferMemory
31
  from langchain.chains import ConversationalRetrievalChain
32
  from templates import css, bot_template, user_template
33
 
34
-
 
 
35
 
36
  def generate_filename(prompt, file_type):
37
  central = pytz.timezone('US/Central')
@@ -55,7 +56,10 @@ def transcribe_audio(openai_key, file_path, model):
55
  #st.write('Responses:')
56
  #st.write(chatResponse)
57
  filename = generate_filename(transcript, 'txt')
58
- create_file(filename, transcript, chatResponse)
 
 
 
59
  return transcript
60
  else:
61
  st.write(response.json())
@@ -72,7 +76,9 @@ def save_and_play_audio(audio_recorder):
72
  return filename
73
  return None
74
 
75
- def create_file(filename, prompt, response):
 
 
76
  if filename.endswith(".txt"):
77
  with open(filename, 'w') as file:
78
  file.write(f"{prompt}\n{response}")
@@ -275,55 +281,64 @@ def process_user_input(user_question):
275
  st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
276
  # Save file output from PDF query results
277
  filename = generate_filename(user_question, 'txt')
278
- create_file(filename, user_question, message.content)
279
-
 
 
280
  #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
281
 
282
- def create_zip_of_all_files():
283
- # Get all files in the current directory
284
- all_files = glob.glob("*.*")
285
-
286
- # Name the zip file based on the current date and time
287
- central = pytz.timezone('US/Central')
288
- safe_date_time = datetime.now(central).strftime("%Y%m%d_%H%M%S")
289
- zip_filename = f"all_files_{safe_date_time}.zip"
290
-
291
- with zipfile.ZipFile(zip_filename, 'w') as zipf:
292
- for file in all_files:
 
 
 
 
 
 
 
 
 
 
 
 
293
  zipf.write(file)
294
-
295
- return zip_filename
296
 
297
- def get_zip_download_link(zip_filepath):
298
- with open(zip_filepath, 'rb') as f:
 
 
 
299
  data = f.read()
300
-
301
  b64 = base64.b64encode(data).decode()
302
- file_name = os.path.basename(zip_filepath)
303
- href = f'<a href="data:application/zip;base64,{b64}" target="_blank" download="{file_name}">Download All Files</a>'
304
  return href
305
 
306
 
307
  def main():
308
- # Sidebar and global
309
  openai.api_key = os.getenv('OPENAI_API_KEY')
310
- st.set_page_config(page_title="GPT Streamlit Document Reasoner",layout="wide")
311
 
312
  # File type for output, model choice
313
- menu = ["htm", "txt", "xlsx", "csv", "md", "py"] #619
314
  choice = st.sidebar.selectbox("Output File Type:", menu)
315
  model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
316
-
317
  # Audio, transcribe, GPT:
318
  filename = save_and_play_audio(audio_recorder)
319
  if filename is not None:
320
  transcription = transcribe_audio(openai.api_key, filename, "whisper-1")
321
  st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
322
- filename=None # since transcription is finished next time just use the saved transcript
323
-
324
- if st.sidebar.button("📦 Download All"):
325
- zip_file_path = create_zip_of_all_files()
326
- st.sidebar.markdown(get_zip_download_link(zip_file_path), unsafe_allow_html=True)
327
 
328
  # prompt interfaces
329
  user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
@@ -333,9 +348,11 @@ def main():
333
  with collength:
334
  max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
335
  with colupload:
336
- uploaded_file = st.file_uploader("Add a file for context:", type=["pdf", "xml", "json", "xlsx","csv","html", "htm", "md", "txt"])
 
337
 
338
  # Document section chat
 
339
  document_sections = deque()
340
  document_responses = {}
341
  if uploaded_file is not None:
@@ -358,24 +375,49 @@ def main():
358
  st.write(response)
359
  document_responses[i] = response
360
  filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
361
- create_file(filename, user_prompt, response)
362
  st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
363
 
364
  if st.button('💬 Chat'):
365
  st.write('Reasoning with your inputs...')
366
- response = chat_with_model(user_prompt, ''.join(list(document_sections,)), model_choice) # *************************************
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  st.write('Response:')
368
  st.write(response)
369
 
370
  filename = generate_filename(user_prompt, choice)
371
- create_file(filename, user_prompt, response)
372
  st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
373
 
374
  all_files = glob.glob("*.*")
375
  all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 20] # exclude files with short names
376
  all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
377
 
378
- # sidebar of files
 
 
 
 
 
 
 
 
 
 
 
379
  file_contents=''
380
  next_action=''
381
  for file in all_files:
@@ -412,7 +454,7 @@ def main():
412
  st.write('Reasoning with your inputs...')
413
  response = chat_with_model(user_prompt, file_contents, model_choice)
414
  filename = generate_filename(file_contents, choice)
415
- create_file(filename, file_contents, response)
416
 
417
  st.experimental_rerun()
418
  #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
@@ -440,4 +482,5 @@ with st.sidebar:
440
  st.session_state.conversation = get_chain(vectorstore)
441
  st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
442
  filename = generate_filename(raw, 'txt')
443
- create_file(filename, raw, '')
 
 
11
  import time
12
  import re
13
  import textract
14
+ import zipfile # New import for zipping files
15
 
16
 
17
  from datetime import datetime
 
20
  from bs4 import BeautifulSoup
21
  from collections import deque
22
  from audio_recorder_streamlit import audio_recorder
 
23
  from dotenv import load_dotenv
24
  from PyPDF2 import PdfReader
25
  from langchain.text_splitter import CharacterTextSplitter
 
30
  from langchain.chains import ConversationalRetrievalChain
31
  from templates import css, bot_template, user_template
32
 
33
+ # page config and sidebar declares up front allow all other functions to see global class variables
34
+ st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
35
+ should_save = st.sidebar.checkbox("💾 Save")
36
 
37
  def generate_filename(prompt, file_type):
38
  central = pytz.timezone('US/Central')
 
56
  #st.write('Responses:')
57
  #st.write(chatResponse)
58
  filename = generate_filename(transcript, 'txt')
59
+ #create_file(filename, transcript, chatResponse)
60
+ response = chatResponse
61
+ user_prompt = transcript
62
+ create_file(filename, user_prompt, response, should_save)
63
  return transcript
64
  else:
65
  st.write(response.json())
 
76
  return filename
77
  return None
78
 
79
+ def create_file(filename, prompt, response, should_save=True):
80
+ if not should_save:
81
+ return
82
  if filename.endswith(".txt"):
83
  with open(filename, 'w') as file:
84
  file.write(f"{prompt}\n{response}")
 
281
  st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
282
  # Save file output from PDF query results
283
  filename = generate_filename(user_question, 'txt')
284
+ #create_file(filename, user_question, message.content)
285
+ response = message.content
286
+ user_prompt = user_question
287
+ create_file(filename, user_prompt, response, should_save)
288
  #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
289
 
290
+ def divide_prompt(prompt, max_length):
291
+ words = prompt.split()
292
+ chunks = []
293
+ current_chunk = []
294
+ current_length = 0
295
+ for word in words:
296
+ if len(word) + current_length <= max_length:
297
+ current_length += len(word) + 1 # Adding 1 to account for spaces
298
+ current_chunk.append(word)
299
+ else:
300
+ chunks.append(' '.join(current_chunk))
301
+ current_chunk = [word]
302
+ current_length = len(word)
303
+ chunks.append(' '.join(current_chunk)) # Append the final chunk
304
+ return chunks
305
+
306
+ def create_zip_of_files(files):
307
+ """
308
+ Create a zip file from a list of files.
309
+ """
310
+ zip_name = "all_files.zip"
311
+ with zipfile.ZipFile(zip_name, 'w') as zipf:
312
+ for file in files:
313
  zipf.write(file)
314
+ return zip_name
315
+
316
 
317
+ def get_zip_download_link(zip_file):
318
+ """
319
+ Generate a link to download the zip file.
320
+ """
321
+ with open(zip_file, 'rb') as f:
322
  data = f.read()
 
323
  b64 = base64.b64encode(data).decode()
324
+ href = f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
 
325
  return href
326
 
327
 
328
  def main():
 
329
  openai.api_key = os.getenv('OPENAI_API_KEY')
 
330
 
331
  # File type for output, model choice
332
+ menu = ["txt", "htm", "xlsx", "csv", "md", "py"]
333
  choice = st.sidebar.selectbox("Output File Type:", menu)
334
  model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
335
+
336
  # Audio, transcribe, GPT:
337
  filename = save_and_play_audio(audio_recorder)
338
  if filename is not None:
339
  transcription = transcribe_audio(openai.api_key, filename, "whisper-1")
340
  st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
341
+ filename = None
 
 
 
 
342
 
343
  # prompt interfaces
344
  user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
 
348
  with collength:
349
  max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
350
  with colupload:
351
+ uploaded_file = st.file_uploader("Add a file for context:", type=["pdf", "xml", "json", "xlsx", "csv", "html", "htm", "md", "txt"])
352
+
353
 
354
  # Document section chat
355
+
356
  document_sections = deque()
357
  document_responses = {}
358
  if uploaded_file is not None:
 
375
  st.write(response)
376
  document_responses[i] = response
377
  filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
378
+ create_file(filename, user_prompt, response, should_save)
379
  st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
380
 
381
  if st.button('💬 Chat'):
382
  st.write('Reasoning with your inputs...')
383
+
384
+ #response = chat_with_model(user_prompt, ''.join(list(document_sections,)), model_choice) # *************************************
385
+
386
+ # Divide the user_prompt into smaller sections
387
+ user_prompt_sections = divide_prompt(user_prompt, max_length)
388
+ full_response = ''
389
+ for prompt_section in user_prompt_sections:
390
+ # Process each section with the model
391
+ response = chat_with_model(prompt_section, ''.join(list(document_sections)), model_choice)
392
+ full_response += response + '\n' # Combine the responses
393
+
394
+ #st.write('Response:')
395
+ #st.write(full_response)
396
+
397
+ response = full_response
398
  st.write('Response:')
399
  st.write(response)
400
 
401
  filename = generate_filename(user_prompt, choice)
402
+ create_file(filename, user_prompt, response, should_save)
403
  st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
404
 
405
  all_files = glob.glob("*.*")
406
  all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 20] # exclude files with short names
407
  all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
408
 
409
+ # Added "Delete All" button
410
+ if st.sidebar.button("🗑 Delete All"):
411
+ for file in all_files:
412
+ os.remove(file)
413
+ st.experimental_rerun()
414
+
415
+ # Added "Download All" button
416
+ if st.sidebar.button("⬇️ Download All"):
417
+ zip_file = create_zip_of_files(all_files)
418
+ st.sidebar.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)
419
+
420
+ # Sidebar of Files Saving History and surfacing files as context of prompts and responses
421
  file_contents=''
422
  next_action=''
423
  for file in all_files:
 
454
  st.write('Reasoning with your inputs...')
455
  response = chat_with_model(user_prompt, file_contents, model_choice)
456
  filename = generate_filename(file_contents, choice)
457
+ create_file(filename, user_prompt, response, should_save)
458
 
459
  st.experimental_rerun()
460
  #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
 
482
  st.session_state.conversation = get_chain(vectorstore)
483
  st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
484
  filename = generate_filename(raw, 'txt')
485
+ create_file(filename, raw, '', should_save)
486
+ #create_file(filename, raw, '')