import gradio as gr import torch import transformers from transformers import AutoTokenizer from langchain import LLMChain, HuggingFacePipeline, PromptTemplate import os access_token = os.getenv("Llama2") def greet(token, text): model = "meta-llama/Llama-2-7b-chat-hf" tokenizer = AutoTokenizer.from_pretrained(model, token=access_token) pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", max_length=1000, do_sample=True, top_k=10, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id ) llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0}) template = """ Write a summary of the following text delimited by triple backticks. Return your response which covers the key points of the text. ```{text}``` SUMMARY: """ prompt = PromptTemplate(template=template, input_variables=["text"]) llm_chain = LLMChain(prompt=prompt, llm=llm) summary = llm_chain.run(text) return summary with gr.Blocks() as demo: text = gr.Textbox(label="Text") summary = gr.Textbox(label="Summary") greet_btn = gr.Button("Submit") greet_btn.click(fn=greet, inputs=text, outputs=summary, api_name="greet") demo.launch()