Spaces:

IronOne-AI-Labs
/

Annual_Report_Summarization_Demo

Running

App Files Files Community

Annual_Report_Summarization_Demo / app.py

RMWeerasinghe

Initial Commit

a4f8505 5 months ago

raw

history blame

No virus

10.7 kB

	import datetime
	import logging
	import nltk
	import validators
	import streamlit as st
	from summarizer import Summarizer
	from config import MODELS
	from warnings import filterwarnings

	filterwarnings("ignore")
	from utils import (
	clean_text,
	fetch_article_text,
	preprocess_text_for_abstractive_summarization,
	read_text_from_file,
	)


	from rouge import Rouge

	# def filer():
	# # return "logs/log "
	# today = datetime.datetime.today()
	# log_filename = f"logs/{today.year}-{today.month:02d}-{today.day:02d}.log"
	# return log_filename

	# file_handler = logging.FileHandler(filer())
	# # file_handler = logging.handlers.TimedRotatingFileHandler(filer(),when="D")
	# file_handler.setLevel(logging.INFO)

	# logging.basicConfig(
	# level=logging.DEBUG,
	# format="%(asctime)s %(levelname)s (%(name)s) : %(message)s",
	# datefmt="%Y-%m-%d %H:%M:%S",
	# handlers=[file_handler],
	# force=True,
	# )

	logger = logging.getLogger(__name__)


	if "api_key" not in st.session_state:
	st.session_state.api_key = " "


	@st.cache_resource
	def initialize_app():
	nltk.download("punkt")

	@st.cache_resource
	def init_summarizer(model_name,api_key=None):

	model_type = "local"
	if model_name == "OpenAI":
	model_type = "openai"

	model_path = MODELS[model_name]
	if model_type == "openai":
	#validation logic

	return Summarizer(model_path,model_type,api_key)
	else:
	logger.info(f"Model for summarization : {model_path}")
	return Summarizer(model_path, model_type)

	def load_app():
	st.title("Text Summarizer 📝")

	# st.markdown("Creator: [Atharva Ingle](https://github.com/Gladiator07)")
	# st.markdown(
	# "Source code: [GitHub Repository](https://github.com/Gladiator07/Text-Summarizer)"
	# )
	model_name = st.sidebar.selectbox(
	"Model Name", options=["Version 0", "Version 1","OpenAI"]
	)
	if model_name == "OpenAI":
	st.sidebar.text_input("Enter a valid OpenAI API Key",key = "api_key" ,type="password")

	summarizer_type = st.sidebar.selectbox(
	"Summarizer Type for Long Text", options=["Map Reduce", "Refine"]
	)

	st.markdown(
	"Enter a text or a url to get a concise summary of the article while conserving the overall meaning. This app supports text in the following formats:"
	)
	st.markdown(
	"""- Raw text in text box
	- URL of article/news to be summarized
	- .txt, .pdf, .docx file formats"""
	)
	st.markdown(
	"""This app supports abstractive summarization of documents:

	Abstractive Summarization: The abstractive approach involves rephrasing the complete document while capturing the complete meaning of the document. This type of summarization provides more human-like summary"""
	)
	st.markdown("---")
	# ---------------------------

	# ---------------------------
	inp_text = st.text_input("Enter text or a url here")
	st.markdown(
	"<h3 style='text-align: center; color: green;'>OR</h3>",
	unsafe_allow_html=True,
	)
	uploaded_file = st.file_uploader(
	"Upload a .txt, .pdf, .docx file for summarization"
	)

	is_url = validators.url(inp_text)
	if is_url:
	# complete text, chunks to summarize (list of sentences for long docs)
	logger.info("Text Input Type: URL")
	text, cleaned_txt = fetch_article_text(url=inp_text)
	elif uploaded_file:
	logger.info("Text Input Type: FILE")
	cleaned_txt = read_text_from_file(uploaded_file)
	cleaned_txt = clean_text(cleaned_txt)
	else:
	logger.info("Text Input Type: INPUT TEXT")
	cleaned_txt = clean_text(inp_text)

	# view summarized text (expander)
	with st.expander("View input text"):
	if is_url:
	st.write(cleaned_txt[0])
	else:
	st.write(cleaned_txt)
	summarize = st.button("Summarize")

	if is_url:
	text_to_summarize = " ".join([txt for txt in cleaned_txt])
	else:
	text_to_summarize = cleaned_txt

	return text_to_summarize, model_name, summarizer_type, summarize




	def get_summary(text_to_summarize,model_name, summarizer_type, summarize):

	while not summarize:
	continue

	else:

	logger.info(f"Model Name: {model_name}")
	logger.info(f"Summarization Type for Long Text: {summarizer_type}")

	api_key = st.session_state.api_key


	summarizer = init_summarizer(model_name,api_key)


	with st.spinner(
	text="Creating summary. This might take a few seconds ..."
	):

	if summarizer_type == "Refine":
	summarized_text, time = summarizer.summarize(text_to_summarize,"refine")
	return summarized_text, time
	else :
	summarized_text, time = summarizer.summarize(text_to_summarize,"map_reduce")
	return summarized_text, time




	def display_output(summarized_text,time):


	logger.info(f"SUMMARY: {summarized_text}")
	logger.info(f"Summary took {time}s")
	st.subheader("Summarized text")
	st.info(f"{summarized_text}")
	st.info(f"Time: {time}s")


	# def summarizer_app():
	# # ---------------------------------
	# # Main Application
	# # ---------------------------------
	# st.title("Text Summarizer 📝")

	# # st.markdown("Creator: [Atharva Ingle](https://github.com/Gladiator07)")
	# # st.markdown(
	# # "Source code: [GitHub Repository](https://github.com/Gladiator07/Text-Summarizer)"
	# # )
	# model_name = st.sidebar.selectbox(
	# "Model Name", options=["Version 0", "Version 1","OpenAI"]
	# )
	# if model_name == "OpenAI":
	# st.sidebar.text_input("Enter a valid OpenAI API Key",key = "api_key" ,type="password")

	# summarizer_type = st.sidebar.selectbox(
	# "Summarizer Type for Long Text", options=["Map Reduce", "Refine"]
	# )

	# st.markdown(
	# "Enter a text or a url to get a concise summary of the article while conserving the overall meaning. This app supports text in the following formats:"
	# )
	# st.markdown(
	# """- Raw text in text box
	# - URL of article/news to be summarized
	# - .txt, .pdf, .docx file formats"""
	# )
	# st.markdown(
	# """This app supports two type of summarization:

	# 1. Extractive Summarization: The extractive approach involves picking up the most important phrases and lines from the documents. It then combines all the important lines to create the summary. So, in this case, every line and word of the summary actually belongs to the original document which is summarized.
	# 2. Abstractive Summarization: The abstractive approach involves rephrasing the complete document while capturing the complete meaning of the document. This type of summarization provides more human-like summary"""
	# )
	# st.markdown("---")
	# # ---------------------------
	# # SETUP & Constants
	# # nltk.download("punkt")
	# # abs_tokenizer_name = "facebook/bart-large-cnn"
	# # abs_model_name = "facebook/bart-large-cnn"
	# # abs_tokenizer = AutoTokenizer.from_pretrained(abs_tokenizer_name)
	# # abs_max_length = 90
	# # abs_min_length = 30

	# # model_name_v0 = "IronOne-AI-Labs/long-t5-tglobal-16k-annual-reports-v0"
	# # model_name_v1 = "IronOne-AI-Labs/long-t5-tglobal-16k-annual-reports-v1"
	# # ---------------------------
	# inp_text = st.text_input("Enter text or a url here")
	# st.markdown(
	# "<h3 style='text-align: center; color: green;'>OR</h3>",
	# unsafe_allow_html=True,
	# )
	# uploaded_file = st.file_uploader(
	# "Upload a .txt, .pdf, .docx file for summarization"
	# )

	# is_url = validators.url(inp_text)
	# if is_url:
	# # complete text, chunks to summarize (list of sentences for long docs)
	# logger.info("Text Input Type: URL")
	# text, cleaned_txt = fetch_article_text(url=inp_text)
	# elif uploaded_file:
	# logger.info("Text Input Type: FILE")
	# cleaned_txt = read_text_from_file(uploaded_file)
	# cleaned_txt = clean_text(cleaned_txt)
	# else:
	# logger.info("Text Input Type: INPUT TEXT")
	# cleaned_txt = clean_text(inp_text)

	# # view summarized text (expander)
	# with st.expander("View input text"):
	# if is_url:
	# st.write(cleaned_txt[0])
	# else:
	# st.write(cleaned_txt)
	# summarize = st.button("Summarize")

	# # called on toggle button [summarize]
	# if summarize:
	# if is_url:
	# text_to_summarize = " ".join([txt for txt in cleaned_txt])
	# else:
	# text_to_summarize = cleaned_txt

	# logger.info(f"Model Name: {model_name}")
	# logger.info(f"Summarization Type for Long Text: {summarizer_type}")

	# api_key = st.session_state.api_key

	# print(api_key)

	# summarizer = init_summarizer(model_name,api_key)

	# with st.spinner(
	# text="Creating summary. This might take a few seconds ..."
	# ):
	# #ext_model = Summarizer()
	# #summarized_text = ext_model(text_to_summarize, num_sentences=5)

	# if summarizer_type == "Refine":
	# summarized_text, time = summarizer.summarize(text_to_summarize,"refine")
	# else :
	# summarized_text, time = summarizer.summarize(text_to_summarize,"map_reduce")


	# # elif model_name == "Version 1":
	# # with st.spinner(
	# # text="Creating summary. This might take a few seconds ..."
	# # ):
	# # if summarizer_type == "Refine":
	# # summarized_text, time = summarizer_v1.summarize(text_to_summarize,"refine")
	# # else :
	# # summarized_text, time = summarizer_v1.summarize(text_to_summarize,"map_reduce")

	# # final summarized output

	# logger.info(f"SUMMARY: {summarized_text}")
	# logger.info(f"Summary took {time}s")
	# st.subheader("Summarized text")
	# st.info(f"{summarized_text}")
	# st.info(f"Time: {time}s")

	# # st.subheader("Rogue Scores")
	# # rouge_sc = Rouge()
	# # ground_truth = cleaned_txt[0] if is_url else cleaned_txt
	# # score = rouge_sc.get_scores(summarized_text, ground_truth, avg=True)
	# # st.code(score)


	if __name__ == "__main__":
	initialize_app()
	text_to_summarize, model_name, summarizer_type, summarize = load_app()
	summarized_text,time = get_summary(text_to_summarize, model_name, summarizer_type, summarize)
	display_output(summarized_text,time)