from transformers import pipeline from transformers import AutoModelForCausalLM, AutoTokenizer import torch from langchain import OpenAI, PromptTemplate, LLMChain from langchain.text_splitter import CharacterTextSplitter from langchain.chains.mapreduce import MapReduceChain from langchain.prompts import PromptTemplate from langchain.chains.summarize import map_reduce_prompt, refine_prompts, stuff_prompt # from langchain.chains import LLMChain from langchain.chains.summarize import load_summarize_chain from langchain.docstore.document import Document from langchain.llms import HuggingFacePipeline from transformers import LlamaTokenizer, LlamaForCausalLM import gradio as gr print("Loading Pipeline Dolly...") # print("Loading Pipeline...", str(File.name)) tokenizer = AutoTokenizer.from_pretrained("databricks/dolly-v2-3b", padding_side="left") base_model = AutoModelForCausalLM.from_pretrained("databricks/dolly-v2-3b", device_map="auto", trust_remote_code=True, torch_dtype=torch.bfloat16) instruct_pipeline = pipeline( "text-generation", model=base_model, tokenizer=tokenizer, max_length=2048, temperature=0.6, pad_token_id=tokenizer.eos_token_id, top_p=0.95, repetition_penalty=1.2 ) # instruct_pipeline = pipeline(model="databricks/dolly-v2-3b", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto") # print(instruct_pipeline) print("Dolly Pipeline Loaded!") llm_dolly = HuggingFacePipeline(pipeline=instruct_pipeline) # print("Loading Pipeline Alpaca...") # tokenizer_alpaca = LlamaTokenizer.from_pretrained('minlik/chinese-alpaca-plus-7b-merged') # model_alpaca = LlamaForCausalLM.from_pretrained('minlik/chinese-alpaca-plus-7b-merged') # instruct_pipeline_alpaca = pipeline( # "text-generation", # model=model_alpaca, # tokenizer=tokenizer_alpaca, # max_length=1024, # temperature=0.6, # pad_token_id=tokenizer_alpaca.eos_token_id, # top_p=0.95, # repetition_penalty=1.2, # device_map= "auto" # ) # print("Pipeline Loaded Alpaca!") # llm_alpaca = HuggingFacePipeline(pipeline=instruct_pipeline_alpaca) def summarize(Model, File, Input_text): prompt_template = """Write a concise summary of the following: {text} Summary in English: """ PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"]) text_splitter = CharacterTextSplitter() if File: with open(str(File.name)) as f: state_of_the_union = f.read() text = state_of_the_union else: text = Input_text print(text) texts = text_splitter.split_text(text) docs = [Document(page_content=t) for t in texts[:3]] print("Printing Docs-------") print(docs) print("-----------------\n\n") if Model=='Dolly': chain = load_summarize_chain(llm_dolly, chain_type="refine", question_prompt=PROMPT) else: chain = load_summarize_chain(llm_dolly, chain_type="refine", question_prompt=PROMPT) summary_text = chain({"input_documents": docs}, return_only_outputs=True) print(summary_text["output_text"]) return summary_text["output_text"] def greet(name): return "Hello " + name + "!" # with gr.Blocks() as demo: # a = gr.File() # gr.Interface(fn=summarize, inputs = [gr.inputs.Dropdown(["Dolly", "Alpaca"]), a , "text"], outputs="text", title="Summarization Tool") demo = gr.Interface(fn=summarize, inputs = [gr.inputs.Dropdown(["Dolly", "Alpaca"]),gr.inputs.File(label="Upload .txt file"), "text"], outputs="text", title="Summarization Tool") demo.queue().launch()