# import gradio as gr # gr.load("models/manohar02/Llama-2-7b-quantize").launch() import gradio as gr from langchain import HuggingFacePipeline, PromptTemplate, LLMChain from transformers import AutoTokenizer import transformers import torch # Define the Hugging Face model model = "manohar02/Llama-2-7b-quantize" # Define the Hugging Face pipeline pipeline = transformers.pipeline( "text-generation", # task model=model, torch_dtype=torch.bfloat16, max_length=20000, do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=AutoTokenizer.from_pretrained(model).eos_token_id ) llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': 0}) # Define the template for summarization template = """ Write a concise summary of the following text delimited by triple backquotes. '''{text}''' SUMMARY: """ prompt = PromptTemplate(template=template, input_variables=["text"]) llm_chain = LLMChain(prompt=prompt, llm=llm) # Function to generate summary def generate_summary(text): summary = llm_chain.run(text) return summary.split('SUMMARY:')[-1].strip() # Create a Gradio interface iface = gr.Interface(fn=generate_summary, inputs="text", outputs="text", title="LLaMA2 Summarizer") iface.launch()