from model import get_model from mapReduceSummarizer import get_map_reduce_chain from refineSummarizer import get_refine_chain from preprocess import prepare_for_summarize from transformers import AutoTokenizer from langchain.prompts import PromptTemplate from logging import getLogger import time logger = getLogger(__name__) def summarizer_init(model_name,model_type,api_key=None) -> None: # model_type = model_type tokenizer = AutoTokenizer.from_pretrained(model_name) base_summarizer = get_model(model_type,model_name,api_key) return tokenizer,base_summarizer def summarizer_summarize(model_type,tokenizer, base_summarizer, text:str,summarizer_type = "map_reduce")->str: text_to_summarize,length_type = prepare_for_summarize(text,tokenizer) if length_type =="short": logger.info("Processing Input Text less than 12000 Tokens") if model_type=="openai": llm = base_summarizer prompt = PromptTemplate.from_template( template="""Write a concise and complete summary in bullet points of the given annual report. Important: * Note that the summary should contain all important information and it should not contain any unwanted information. * Make sure to keep the summary as short as possible. And Summary should be in bullet points. Seperate each point with a new line. TEXT: {text} SUMMARY:""" ) llm_chain = prompt|llm start = time.time() summary = llm_chain.invoke({"text": text_to_summarize}) end = time.time() print(f"Summary generation took {round((end-start),2)}s.") return summary,round((end-start),2) elif model_type == "local": pipe = base_summarizer start = time.time() summary = pipe(text_to_summarize)[0]['summary_text'] end = time.time() print(f"Summary generation took {round((end-start),2)}s.") return summary,round((end-start),2) else: if summarizer_type == "refine": print("The text is too long, Running Refine Summarizer") llm_chain = get_refine_chain(base_summarizer,model_type) logger.info("Running Refine Chain for Summarization") start = time.time() summary = llm_chain.invoke({"input_documents": text_to_summarize}, return_only_outputs=True)['output_text'] end = time.time() print(f"Summary generation took {round((end-start),2)}s.") return summary,round((end-start),2) else: print("The text is too long, Running Map Reduce Summarizer") llm_chain = get_map_reduce_chain(base_summarizer,model_type=model_type) logger.info("Running Map Reduce Chain for Summarization") start = time.time() summary = llm_chain.invoke({"input_documents": text_to_summarize}, return_only_outputs=True)['output_text'] end = time.time() print(f"Summary generation took {round((end-start),2)}s.") return summary,round((end-start),2)