Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| st.session_state.em = "0" | |
| import os | |
| import json | |
| import requests | |
| from langchain.document_loaders import TextLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| import re | |
| import os | |
| import numpy as np | |
| st.set_page_config(layout="wide") | |
| # Path to the image | |
| image_path = 'fire.jpg' | |
| # Display the image with st.image | |
| st.image(image_path, caption='', use_column_width=True) | |
| started = 'docs' in st.session_state | |
| exec(open('start2.py').read()) | |
| os.environ["OPENAI_API_KEY"] = os.getenv('openkey') | |
| def extract_text_from_pdf(pdf_path): | |
| # Open the provided PDF file | |
| doc = fitz.open(pdf_path) | |
| # Initialize a text variable to store all the text | |
| text = "" | |
| # Iterate through each page in the PDF | |
| for page_num in range(len(doc)): | |
| # Get a page | |
| page = doc.load_page(page_num) | |
| # Extract text from the page and add it to the text variable | |
| text += page.get_text() | |
| # Close the document | |
| doc.close() | |
| return text | |
| def extract_text_from_pdf2(PDFfile): | |
| #import the PyPDF2 module | |
| import PyPDF2 | |
| #open the PDF file | |
| PDFfile = open('pc.pdf', 'rb') | |
| PDFfilereader = PyPDF2.PdfReader(PDFfile) | |
| #print the number of pages | |
| print(PDFfilereader.pages) | |
| #provide the page number | |
| txt = '' | |
| for pages in PDFfilereader.pages: | |
| #extracting the text in PDF file | |
| txt = txt + pages.extract_text() | |
| #close the PDF file | |
| PDFfile.close() | |
| return txt | |
| def strip_repeated_dots_and_blanks(text): | |
| # Replace multiple dots with a single dot | |
| text = re.sub(r'\.{2,}', '.', text) | |
| # Replace multiple spaces with a single space | |
| text = re.sub(r' {2,}', ' ', text) | |
| text = re.sub('\n \n', '\n\n', text) | |
| return text | |
| # Title of the page | |
| st.title('Peerstreet Question and Answer App') | |
| # Text input for the question | |
| question = st.text_input("Type your question here:") | |
| # A button to submit the question | |
| submit_button = st.button('Submit') | |
| st.markdown("For best results keep questions simple and to the point and use words that are likely to be found in the documents") | |
| st.markdown(""" Sample Questions: | |
| * When is the voting deadline? | |
| * What is the expected recovery for MPDN's? | |
| """) | |
| # Create tabs | |
| Answer_tab, Content_tab, Info_tab = st.tabs(["Answer", "Content used to create answer", "Infrmation about this app"]) | |
| # Placeholder for displaying the answer | |
| with Answer_tab: | |
| answer_placeholder = st.empty() | |
| with Content_tab: | |
| content_placeholder = st.empty() | |
| with Info_tab: | |
| st.markdown("""## Use at your own risk, accuracy of responses are not guaranteed. | |
| This app base its anwsers on 110 documents filed by the court. This does not include any scanned documents at this point | |
| as it takes more work to retrieve the text from them. It does include most orders filed by the court up to Feb 29th. | |
| This is a simple RAG (retrieval augmented generation) system and does not consider order of events when | |
| retrieving onformation and generating responses. It can also easily missinterpret information, but information used to generate the | |
| response is presented in the content tab with link to the full document so that you can read the details in its proper context. | |
| """ ) | |
| with open('results.json', 'r') as file: | |
| content = file.read() | |
| data_to_download = content.encode() | |
| # Create a download button | |
| st.download_button(label="Download Prior responses", | |
| data=data_to_download, | |
| file_name="results.json", | |
| mime="json") | |
| # Logic to display an answer when the submit button is pressed | |
| if submit_button: | |
| if question: # Check if there is a question typed | |
| # Process the question here (a placeholder answer is used in this example) | |
| try: | |
| if started: | |
| #Awnser = rag_chain.invoke(question) | |
| #contexts = retriever.get_relevant_documents(question) | |
| answer, selected_items,selected_sources,titles,dates,selected_chunks,highest_simularities = ask(question) | |
| answer_placeholder.markdown(escape_markdown(answer)) # Display the answer | |
| # Prepare the data to be saved | |
| data_to_save = { | |
| "query": question, | |
| "answer": answer, | |
| "selected_items": selected_items, | |
| "selected_sources": selected_sources, | |
| "selected_chunks": selected_chunks, | |
| "highest_similarities": [f"{sim:.2f}" for sim in highest_simularities] | |
| } | |
| # The file to which the data will be appended | |
| file_path = 'results.json' | |
| try: | |
| # Read the existing content of the file | |
| with open(file_path, 'r') as file: | |
| existing_data = json.load(file) | |
| except (FileNotFoundError, json.JSONDecodeError): | |
| # If the file doesn't exist or is empty, start with an empty list | |
| existing_data = [] | |
| # Append the new data | |
| existing_data.append(data_to_save) | |
| # Write the updated data back to the file | |
| with open(file_path, 'w') as file: | |
| json.dump(existing_data, file, indent=4) | |
| url = 'https://cases.stretto.com/public/x247/12208/PLEADINGS/' | |
| string = "" | |
| for k in range(len(selected_items)): | |
| temp = " [" + titles[k] + "](" + url + selected_sources[k] + ")" + " text block: " + selected_chunks[k] + " Relevance: " +f"{highest_simularities[k]:.2f}" + " Date:" + dates[k] | |
| string = string + "### Paragraph used. \n" + escape_markdown(selected_items[k]) + "\n\n source:" + temp + "\n" | |
| content_placeholder.markdown(string) | |
| else: | |
| answer_placeholder.markdown("Waiting for system to wake up "+ st.session_state.ln + " " + st.session_state.em ) | |
| except Exception as e: | |
| answer_placeholder.markdown(e) # Display the answer | |
| else: | |
| answer_placeholder.warning("Please type a question.") | |
| #if 'retriever' not in st.session_state: | |
| # st.session_state.em = "mm" | |
| #if 'retriever' not in st.session_state: | |
| # st.session_state.em = "1" | |
| # exec(open('start.py').read()) | |
| # st.session_state.em = "2" | |