Spaces:

ysisodia-conga
/

contracts-summary

Build error

App Files Files Community

contracts-summary / app.py

ysisodia-conga

Update app.py

0e179de about 2 years ago

raw

history blame contribute delete

4.4 kB

	import streamlit as st
	st.set_page_config(layout="wide")
	from annotated_text import annotated_text, annotation
	import fitz
	import os
	import chromadb
	import uuid
	from pathlib import Path

	os.environ['OPENAI_API_KEY'] = os.environ['OPEN_API_KEY']
	st.title("Contracts Summary ")
	import pandas as pd

	from langchain.retrievers import BM25Retriever, EnsembleRetriever
	from langchain.schema import Document
	from langchain.vectorstores import Chroma
	from langchain.embeddings import HuggingFaceEmbeddings
	import spacy
	# Load the English model from SpaCy
	nlp = spacy.load("en_core_web_md")

	def util_upload_file_and_return_list_docs(uploaded_files):
	#util_del_cwd()
	list_docs = []
	list_save_path = []
	for uploaded_file in uploaded_files:
	save_path = Path(os.getcwd(), uploaded_file.name)
	with open(save_path, mode='wb') as w:
	w.write(uploaded_file.getvalue())
	#print('save_path:', save_path)
	docs = fitz.open(save_path)
	list_docs.append(docs)
	list_save_path.append(save_path)
	return(list_docs, list_save_path)


	def util_get_list_page_and_passage(list_docs, list_save_path):
	#page_documents = []
	documents = []
	for ind_doc, docs in enumerate(list_docs):
	text = ''
	for txt_index, txt_page in enumerate(docs):
	text = text + txt_page.get_text()
	documents.append(text)
	return(documents)




	documents = []


	def get_summary_single_doc(text):
	from langchain.llms import OpenAI
	from langchain.chains.summarize import load_summarize_chain
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.prompts import PromptTemplate
	from langchain.llms import OpenAI
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
	LLM_KEY=os.environ.get("OPEN_API_KEY")
	text_splitter = CharacterTextSplitter(
	separator="\n",
	chunk_size=3000,
	chunk_overlap=20
	)
	#create the documents from list of texts
	texts = text_splitter.create_documents([text])
	prompt_template = """Write a concise summary of the following:
	{text}
	CONCISE SUMMARY:"""
	prompt = PromptTemplate.from_template(prompt_template)

	refine_template = (
	"Your job is to produce a final summary with key learnings\n"
	"We have provided an existing summary up to a certain point: {existing_answer}\n"
	"We have the opportunity to refine the existing summary"
	"(only if needed) with detailed context below.\n"
	"------------\n"
	"{text}\n"
	"------------\n"
	"Given the new context, refine the original summary"
	"If the context isn't useful, return the original summary."
	)
	refine_prompt = PromptTemplate.from_template(refine_template)

	#Define the LLM
	# here we are using OpenAI's ChatGPT
	from langchain.chat_models import ChatOpenAI
	model_name = "gpt-3.5-turbo"
	llm=ChatOpenAI(temperature=0, openai_api_key=LLM_KEY, model_name=model_name)

	refine_chain = load_summarize_chain(
	llm,
	chain_type="refine",
	question_prompt=prompt,
	refine_prompt=refine_prompt,
	return_intermediate_steps=True,

	)
	refine_outputs = refine_chain({'input_documents': texts})
	return(refine_outputs['output_text'])


	with st.form("my_form"):
	multi = '''1. Download and Upload contract (PDF) .

	e.g. https://www.barc.gov.in/tenders/GCC-LPS.pdf

	e.g. https://www.montrosecounty.net/DocumentCenter/View/823/Sample-Construction-Contract
	'''
	st.markdown(multi)
	multi = '''2. Press Summary .'''
	st.markdown(multi)
	multi = '''
	Attempt is made for summary \n
	'''
	st.markdown(multi)
	#uploaded_file = st.file_uploader("Choose a file")

	list_docs = []
	list_save_path = []
	uploaded_files = st.file_uploader("Choose file(s)", accept_multiple_files=True)
	submitted = st.form_submit_button("Summary")

	if submitted and (uploaded_files is not None):
	list_docs, list_save_path = util_upload_file_and_return_list_docs(uploaded_files)
	documents = util_get_list_page_and_passage(list_docs, list_save_path)
	for index, item in enumerate(documents):
	st.write('Summary' + str(index+1) + ' :: ')
	st.write(get_summary_single_doc(item))