Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ import requests
|
|
11 |
import time
|
12 |
import re
|
13 |
import textract
|
14 |
-
import zipfile
|
15 |
|
16 |
|
17 |
from datetime import datetime
|
@@ -20,7 +20,6 @@ from xml.etree import ElementTree as ET
|
|
20 |
from bs4 import BeautifulSoup
|
21 |
from collections import deque
|
22 |
from audio_recorder_streamlit import audio_recorder
|
23 |
-
|
24 |
from dotenv import load_dotenv
|
25 |
from PyPDF2 import PdfReader
|
26 |
from langchain.text_splitter import CharacterTextSplitter
|
@@ -31,7 +30,9 @@ from langchain.memory import ConversationBufferMemory
|
|
31 |
from langchain.chains import ConversationalRetrievalChain
|
32 |
from templates import css, bot_template, user_template
|
33 |
|
34 |
-
|
|
|
|
|
35 |
|
36 |
def generate_filename(prompt, file_type):
|
37 |
central = pytz.timezone('US/Central')
|
@@ -55,7 +56,10 @@ def transcribe_audio(openai_key, file_path, model):
|
|
55 |
#st.write('Responses:')
|
56 |
#st.write(chatResponse)
|
57 |
filename = generate_filename(transcript, 'txt')
|
58 |
-
create_file(filename, transcript, chatResponse)
|
|
|
|
|
|
|
59 |
return transcript
|
60 |
else:
|
61 |
st.write(response.json())
|
@@ -72,7 +76,9 @@ def save_and_play_audio(audio_recorder):
|
|
72 |
return filename
|
73 |
return None
|
74 |
|
75 |
-
def create_file(filename, prompt, response):
|
|
|
|
|
76 |
if filename.endswith(".txt"):
|
77 |
with open(filename, 'w') as file:
|
78 |
file.write(f"{prompt}\n{response}")
|
@@ -275,55 +281,64 @@ def process_user_input(user_question):
|
|
275 |
st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
|
276 |
# Save file output from PDF query results
|
277 |
filename = generate_filename(user_question, 'txt')
|
278 |
-
create_file(filename, user_question, message.content)
|
279 |
-
|
|
|
|
|
280 |
#st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
281 |
|
282 |
-
def
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
zipf.write(file)
|
294 |
-
|
295 |
-
|
296 |
|
297 |
-
def get_zip_download_link(
|
298 |
-
|
|
|
|
|
|
|
299 |
data = f.read()
|
300 |
-
|
301 |
b64 = base64.b64encode(data).decode()
|
302 |
-
|
303 |
-
href = f'<a href="data:application/zip;base64,{b64}" target="_blank" download="{file_name}">Download All Files</a>'
|
304 |
return href
|
305 |
|
306 |
|
307 |
def main():
|
308 |
-
# Sidebar and global
|
309 |
openai.api_key = os.getenv('OPENAI_API_KEY')
|
310 |
-
st.set_page_config(page_title="GPT Streamlit Document Reasoner",layout="wide")
|
311 |
|
312 |
# File type for output, model choice
|
313 |
-
menu = ["
|
314 |
choice = st.sidebar.selectbox("Output File Type:", menu)
|
315 |
model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
|
316 |
-
|
317 |
# Audio, transcribe, GPT:
|
318 |
filename = save_and_play_audio(audio_recorder)
|
319 |
if filename is not None:
|
320 |
transcription = transcribe_audio(openai.api_key, filename, "whisper-1")
|
321 |
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
322 |
-
filename=None
|
323 |
-
|
324 |
-
if st.sidebar.button("π¦ Download All"):
|
325 |
-
zip_file_path = create_zip_of_all_files()
|
326 |
-
st.sidebar.markdown(get_zip_download_link(zip_file_path), unsafe_allow_html=True)
|
327 |
|
328 |
# prompt interfaces
|
329 |
user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
|
@@ -333,9 +348,11 @@ def main():
|
|
333 |
with collength:
|
334 |
max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
|
335 |
with colupload:
|
336 |
-
uploaded_file = st.file_uploader("Add a file for context:", type=["pdf", "xml", "json", "xlsx","csv","html", "htm", "md", "txt"])
|
|
|
337 |
|
338 |
# Document section chat
|
|
|
339 |
document_sections = deque()
|
340 |
document_responses = {}
|
341 |
if uploaded_file is not None:
|
@@ -358,24 +375,49 @@ def main():
|
|
358 |
st.write(response)
|
359 |
document_responses[i] = response
|
360 |
filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
|
361 |
-
create_file(filename, user_prompt, response)
|
362 |
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
363 |
|
364 |
if st.button('π¬ Chat'):
|
365 |
st.write('Reasoning with your inputs...')
|
366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
st.write('Response:')
|
368 |
st.write(response)
|
369 |
|
370 |
filename = generate_filename(user_prompt, choice)
|
371 |
-
create_file(filename, user_prompt, response)
|
372 |
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
373 |
|
374 |
all_files = glob.glob("*.*")
|
375 |
all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 20] # exclude files with short names
|
376 |
all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
|
377 |
|
378 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
379 |
file_contents=''
|
380 |
next_action=''
|
381 |
for file in all_files:
|
@@ -412,7 +454,7 @@ def main():
|
|
412 |
st.write('Reasoning with your inputs...')
|
413 |
response = chat_with_model(user_prompt, file_contents, model_choice)
|
414 |
filename = generate_filename(file_contents, choice)
|
415 |
-
create_file(filename,
|
416 |
|
417 |
st.experimental_rerun()
|
418 |
#st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
@@ -440,4 +482,5 @@ with st.sidebar:
|
|
440 |
st.session_state.conversation = get_chain(vectorstore)
|
441 |
st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
|
442 |
filename = generate_filename(raw, 'txt')
|
443 |
-
create_file(filename, raw, '')
|
|
|
|
11 |
import time
|
12 |
import re
|
13 |
import textract
|
14 |
+
import zipfile # New import for zipping files
|
15 |
|
16 |
|
17 |
from datetime import datetime
|
|
|
20 |
from bs4 import BeautifulSoup
|
21 |
from collections import deque
|
22 |
from audio_recorder_streamlit import audio_recorder
|
|
|
23 |
from dotenv import load_dotenv
|
24 |
from PyPDF2 import PdfReader
|
25 |
from langchain.text_splitter import CharacterTextSplitter
|
|
|
30 |
from langchain.chains import ConversationalRetrievalChain
|
31 |
from templates import css, bot_template, user_template
|
32 |
|
33 |
+
# page config and sidebar declares up front allow all other functions to see global class variables
|
34 |
+
st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
|
35 |
+
should_save = st.sidebar.checkbox("πΎ Save")
|
36 |
|
37 |
def generate_filename(prompt, file_type):
|
38 |
central = pytz.timezone('US/Central')
|
|
|
56 |
#st.write('Responses:')
|
57 |
#st.write(chatResponse)
|
58 |
filename = generate_filename(transcript, 'txt')
|
59 |
+
#create_file(filename, transcript, chatResponse)
|
60 |
+
response = chatResponse
|
61 |
+
user_prompt = transcript
|
62 |
+
create_file(filename, user_prompt, response, should_save)
|
63 |
return transcript
|
64 |
else:
|
65 |
st.write(response.json())
|
|
|
76 |
return filename
|
77 |
return None
|
78 |
|
79 |
+
def create_file(filename, prompt, response, should_save=True):
|
80 |
+
if not should_save:
|
81 |
+
return
|
82 |
if filename.endswith(".txt"):
|
83 |
with open(filename, 'w') as file:
|
84 |
file.write(f"{prompt}\n{response}")
|
|
|
281 |
st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
|
282 |
# Save file output from PDF query results
|
283 |
filename = generate_filename(user_question, 'txt')
|
284 |
+
#create_file(filename, user_question, message.content)
|
285 |
+
response = message.content
|
286 |
+
user_prompt = user_question
|
287 |
+
create_file(filename, user_prompt, response, should_save)
|
288 |
#st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
289 |
|
290 |
+
def divide_prompt(prompt, max_length):
|
291 |
+
words = prompt.split()
|
292 |
+
chunks = []
|
293 |
+
current_chunk = []
|
294 |
+
current_length = 0
|
295 |
+
for word in words:
|
296 |
+
if len(word) + current_length <= max_length:
|
297 |
+
current_length += len(word) + 1 # Adding 1 to account for spaces
|
298 |
+
current_chunk.append(word)
|
299 |
+
else:
|
300 |
+
chunks.append(' '.join(current_chunk))
|
301 |
+
current_chunk = [word]
|
302 |
+
current_length = len(word)
|
303 |
+
chunks.append(' '.join(current_chunk)) # Append the final chunk
|
304 |
+
return chunks
|
305 |
+
|
306 |
+
def create_zip_of_files(files):
|
307 |
+
"""
|
308 |
+
Create a zip file from a list of files.
|
309 |
+
"""
|
310 |
+
zip_name = "all_files.zip"
|
311 |
+
with zipfile.ZipFile(zip_name, 'w') as zipf:
|
312 |
+
for file in files:
|
313 |
zipf.write(file)
|
314 |
+
return zip_name
|
315 |
+
|
316 |
|
317 |
+
def get_zip_download_link(zip_file):
|
318 |
+
"""
|
319 |
+
Generate a link to download the zip file.
|
320 |
+
"""
|
321 |
+
with open(zip_file, 'rb') as f:
|
322 |
data = f.read()
|
|
|
323 |
b64 = base64.b64encode(data).decode()
|
324 |
+
href = f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
|
|
|
325 |
return href
|
326 |
|
327 |
|
328 |
def main():
|
|
|
329 |
openai.api_key = os.getenv('OPENAI_API_KEY')
|
|
|
330 |
|
331 |
# File type for output, model choice
|
332 |
+
menu = ["txt", "htm", "xlsx", "csv", "md", "py"]
|
333 |
choice = st.sidebar.selectbox("Output File Type:", menu)
|
334 |
model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
|
335 |
+
|
336 |
# Audio, transcribe, GPT:
|
337 |
filename = save_and_play_audio(audio_recorder)
|
338 |
if filename is not None:
|
339 |
transcription = transcribe_audio(openai.api_key, filename, "whisper-1")
|
340 |
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
341 |
+
filename = None
|
|
|
|
|
|
|
|
|
342 |
|
343 |
# prompt interfaces
|
344 |
user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
|
|
|
348 |
with collength:
|
349 |
max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
|
350 |
with colupload:
|
351 |
+
uploaded_file = st.file_uploader("Add a file for context:", type=["pdf", "xml", "json", "xlsx", "csv", "html", "htm", "md", "txt"])
|
352 |
+
|
353 |
|
354 |
# Document section chat
|
355 |
+
|
356 |
document_sections = deque()
|
357 |
document_responses = {}
|
358 |
if uploaded_file is not None:
|
|
|
375 |
st.write(response)
|
376 |
document_responses[i] = response
|
377 |
filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
|
378 |
+
create_file(filename, user_prompt, response, should_save)
|
379 |
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
380 |
|
381 |
if st.button('π¬ Chat'):
|
382 |
st.write('Reasoning with your inputs...')
|
383 |
+
|
384 |
+
#response = chat_with_model(user_prompt, ''.join(list(document_sections,)), model_choice) # *************************************
|
385 |
+
|
386 |
+
# Divide the user_prompt into smaller sections
|
387 |
+
user_prompt_sections = divide_prompt(user_prompt, max_length)
|
388 |
+
full_response = ''
|
389 |
+
for prompt_section in user_prompt_sections:
|
390 |
+
# Process each section with the model
|
391 |
+
response = chat_with_model(prompt_section, ''.join(list(document_sections)), model_choice)
|
392 |
+
full_response += response + '\n' # Combine the responses
|
393 |
+
|
394 |
+
#st.write('Response:')
|
395 |
+
#st.write(full_response)
|
396 |
+
|
397 |
+
response = full_response
|
398 |
st.write('Response:')
|
399 |
st.write(response)
|
400 |
|
401 |
filename = generate_filename(user_prompt, choice)
|
402 |
+
create_file(filename, user_prompt, response, should_save)
|
403 |
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
404 |
|
405 |
all_files = glob.glob("*.*")
|
406 |
all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 20] # exclude files with short names
|
407 |
all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
|
408 |
|
409 |
+
# Added "Delete All" button
|
410 |
+
if st.sidebar.button("π Delete All"):
|
411 |
+
for file in all_files:
|
412 |
+
os.remove(file)
|
413 |
+
st.experimental_rerun()
|
414 |
+
|
415 |
+
# Added "Download All" button
|
416 |
+
if st.sidebar.button("β¬οΈ Download All"):
|
417 |
+
zip_file = create_zip_of_files(all_files)
|
418 |
+
st.sidebar.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)
|
419 |
+
|
420 |
+
# Sidebar of Files Saving History and surfacing files as context of prompts and responses
|
421 |
file_contents=''
|
422 |
next_action=''
|
423 |
for file in all_files:
|
|
|
454 |
st.write('Reasoning with your inputs...')
|
455 |
response = chat_with_model(user_prompt, file_contents, model_choice)
|
456 |
filename = generate_filename(file_contents, choice)
|
457 |
+
create_file(filename, user_prompt, response, should_save)
|
458 |
|
459 |
st.experimental_rerun()
|
460 |
#st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
|
|
482 |
st.session_state.conversation = get_chain(vectorstore)
|
483 |
st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
|
484 |
filename = generate_filename(raw, 'txt')
|
485 |
+
create_file(filename, raw, '', should_save)
|
486 |
+
#create_file(filename, raw, '')
|