Spaces:
Build error
Build error
| import streamlit as st | |
| st.set_page_config(layout="wide") | |
| from annotated_text import annotated_text, annotation | |
| import fitz | |
| import os | |
| import chromadb | |
| import uuid | |
| from pathlib import Path | |
| os.environ['OPENAI_API_KEY'] = os.environ['OPEN_API_KEY'] | |
| st.title("Contracts Summary ") | |
| import pandas as pd | |
| from langchain.retrievers import BM25Retriever, EnsembleRetriever | |
| from langchain.schema import Document | |
| from langchain.vectorstores import Chroma | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| import spacy | |
| # Load the English model from SpaCy | |
| nlp = spacy.load("en_core_web_md") | |
| def util_upload_file_and_return_list_docs(uploaded_files): | |
| #util_del_cwd() | |
| list_docs = [] | |
| list_save_path = [] | |
| for uploaded_file in uploaded_files: | |
| save_path = Path(os.getcwd(), uploaded_file.name) | |
| with open(save_path, mode='wb') as w: | |
| w.write(uploaded_file.getvalue()) | |
| #print('save_path:', save_path) | |
| docs = fitz.open(save_path) | |
| list_docs.append(docs) | |
| list_save_path.append(save_path) | |
| return(list_docs, list_save_path) | |
| def util_get_list_page_and_passage(list_docs, list_save_path): | |
| #page_documents = [] | |
| documents = [] | |
| for ind_doc, docs in enumerate(list_docs): | |
| text = '' | |
| for txt_index, txt_page in enumerate(docs): | |
| text = text + txt_page.get_text() | |
| documents.append(text) | |
| return(documents) | |
| documents = [] | |
| def get_summary_single_doc(text): | |
| from langchain.llms import OpenAI | |
| from langchain.chains.summarize import load_summarize_chain | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.prompts import PromptTemplate | |
| from langchain.llms import OpenAI | |
| from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
| LLM_KEY=os.environ.get("OPEN_API_KEY") | |
| text_splitter = CharacterTextSplitter( | |
| separator="\n", | |
| chunk_size=3000, | |
| chunk_overlap=20 | |
| ) | |
| #create the documents from list of texts | |
| texts = text_splitter.create_documents([text]) | |
| prompt_template = """Write a concise summary of the following: | |
| {text} | |
| CONCISE SUMMARY:""" | |
| prompt = PromptTemplate.from_template(prompt_template) | |
| refine_template = ( | |
| "Your job is to produce a final summary with key learnings\n" | |
| "We have provided an existing summary up to a certain point: {existing_answer}\n" | |
| "We have the opportunity to refine the existing summary" | |
| "(only if needed) with detailed context below.\n" | |
| "------------\n" | |
| "{text}\n" | |
| "------------\n" | |
| "Given the new context, refine the original summary" | |
| "If the context isn't useful, return the original summary." | |
| ) | |
| refine_prompt = PromptTemplate.from_template(refine_template) | |
| #Define the LLM | |
| # here we are using OpenAI's ChatGPT | |
| from langchain.chat_models import ChatOpenAI | |
| model_name = "gpt-3.5-turbo" | |
| llm=ChatOpenAI(temperature=0, openai_api_key=LLM_KEY, model_name=model_name) | |
| refine_chain = load_summarize_chain( | |
| llm, | |
| chain_type="refine", | |
| question_prompt=prompt, | |
| refine_prompt=refine_prompt, | |
| return_intermediate_steps=True, | |
| ) | |
| refine_outputs = refine_chain({'input_documents': texts}) | |
| return(refine_outputs['output_text']) | |
| with st.form("my_form"): | |
| multi = '''1. Download and Upload contract (PDF) . | |
| e.g. https://www.barc.gov.in/tenders/GCC-LPS.pdf | |
| e.g. https://www.montrosecounty.net/DocumentCenter/View/823/Sample-Construction-Contract | |
| ''' | |
| st.markdown(multi) | |
| multi = '''2. Press Summary .''' | |
| st.markdown(multi) | |
| multi = ''' | |
| ** Attempt is made for summary ** \n | |
| ''' | |
| st.markdown(multi) | |
| #uploaded_file = st.file_uploader("Choose a file") | |
| list_docs = [] | |
| list_save_path = [] | |
| uploaded_files = st.file_uploader("Choose file(s)", accept_multiple_files=True) | |
| submitted = st.form_submit_button("Summary") | |
| if submitted and (uploaded_files is not None): | |
| list_docs, list_save_path = util_upload_file_and_return_list_docs(uploaded_files) | |
| documents = util_get_list_page_and_passage(list_docs, list_save_path) | |
| for index, item in enumerate(documents): | |
| st.write('Summary' + str(index+1) + ' :: ') | |
| st.write(get_summary_single_doc(item)) | |