import gradio as gr import torch import transformers from transformers import AutoTokenizer from langchain import LLMChain, HuggingFacePipeline, PromptTemplate import os access_token = os.getenv("Llama2") def greet(text): model = "timdettmers/guanaco-33b-merged" tokenizer = AutoTokenizer.from_pretrained(model, token=access_token) pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", max_length=1000, do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, token=access_token ) llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0}) template = """Write a concise summary of the following: "{text}" CONCISE SUMMARY:""" prompt = PromptTemplate(template=template, input_variables=["text"]) llm_chain = LLMChain(prompt=prompt, llm=llm) text = text return llm_chain.run(text) with gr.Blocks() as demo: text = gr.Textbox(label="Text") summary = gr.Textbox(label="Summary") greet_btn = gr.Button("Submit") clear = gr.ClearButton([text, summary]) greet_btn.click(fn=greet, inputs=text, outputs=summary, api_name="greet") demo.launch()