summarization / app.py
amankumar's picture
Update app.py
494bb4e
from transformers import pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import map_reduce_prompt, refine_prompts, stuff_prompt
# from langchain.chains import LLMChain
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain.llms import HuggingFacePipeline
from transformers import LlamaTokenizer, LlamaForCausalLM
import gradio as gr
print("Loading Pipeline Dolly...")
# print("Loading Pipeline...", str(File.name))
tokenizer = AutoTokenizer.from_pretrained("databricks/dolly-v2-3b", padding_side="left")
base_model = AutoModelForCausalLM.from_pretrained("databricks/dolly-v2-3b", device_map="auto", trust_remote_code=True, torch_dtype=torch.bfloat16)
instruct_pipeline = pipeline(
"text-generation",
model=base_model,
tokenizer=tokenizer,
max_length=2048,
temperature=0.6,
pad_token_id=tokenizer.eos_token_id,
top_p=0.95,
repetition_penalty=1.2
)
# instruct_pipeline = pipeline(model="databricks/dolly-v2-3b", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
# print(instruct_pipeline)
print("Dolly Pipeline Loaded!")
llm_dolly = HuggingFacePipeline(pipeline=instruct_pipeline)
# print("Loading Pipeline Alpaca...")
# tokenizer_alpaca = LlamaTokenizer.from_pretrained('minlik/chinese-alpaca-plus-7b-merged')
# model_alpaca = LlamaForCausalLM.from_pretrained('minlik/chinese-alpaca-plus-7b-merged')
# instruct_pipeline_alpaca = pipeline(
# "text-generation",
# model=model_alpaca,
# tokenizer=tokenizer_alpaca,
# max_length=1024,
# temperature=0.6,
# pad_token_id=tokenizer_alpaca.eos_token_id,
# top_p=0.95,
# repetition_penalty=1.2,
# device_map= "auto"
# )
# print("Pipeline Loaded Alpaca!")
# llm_alpaca = HuggingFacePipeline(pipeline=instruct_pipeline_alpaca)
def summarize(Model, File, Input_text):
prompt_template = """Write a concise summary of the following:
{text}
Summary in English:
"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
text_splitter = CharacterTextSplitter()
if File:
with open(str(File.name)) as f:
state_of_the_union = f.read()
text = state_of_the_union
else:
text = Input_text
print(text)
texts = text_splitter.split_text(text)
docs = [Document(page_content=t) for t in texts[:3]]
print("Printing Docs-------")
print(docs)
print("-----------------\n\n")
if Model=='Dolly':
chain = load_summarize_chain(llm_dolly, chain_type="refine", question_prompt=PROMPT)
else:
chain = load_summarize_chain(llm_dolly, chain_type="refine", question_prompt=PROMPT)
summary_text = chain({"input_documents": docs}, return_only_outputs=True)
print(summary_text["output_text"])
return summary_text["output_text"]
def greet(name):
return "Hello " + name + "!"
# with gr.Blocks() as demo:
# a = gr.File()
# gr.Interface(fn=summarize, inputs = [gr.inputs.Dropdown(["Dolly", "Alpaca"]), a , "text"], outputs="text", title="Summarization Tool")
demo = gr.Interface(fn=summarize, inputs = [gr.inputs.Dropdown(["Dolly", "Alpaca"]),gr.inputs.File(label="Upload .txt file"), "text"], outputs="text", title="Summarization Tool")
demo.queue().launch()