Spaces:

manohar02
/

manohar02-Llama-2-7b-quantize111

Runtime error

Update app.py

1c750c2 verified 11 days ago

No virus

1.26 kB

	# import gradio as gr

	# gr.load("models/manohar02/Llama-2-7b-quantize").launch()


	import gradio as gr
	from langchain import HuggingFacePipeline, PromptTemplate, LLMChain
	from transformers import AutoTokenizer
	import transformers
	import torch

	# Define the Hugging Face model
	model = "manohar02/Llama-2-7b-quantize"

	# Define the Hugging Face pipeline
	pipeline = transformers.pipeline(
	"text-generation", # task
	model=model,
	torch_dtype=torch.bfloat16,
	max_length=20000,
	do_sample=True,
	top_k=10,
	num_return_sequences=1,
	eos_token_id=AutoTokenizer.from_pretrained(model).eos_token_id
	)

	llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': 0})

	# Define the template for summarization
	template = """
	Write a concise summary of the following text delimited by triple backquotes.
	'''{text}'''
	SUMMARY:
	"""

	prompt = PromptTemplate(template=template, input_variables=["text"])

	llm_chain = LLMChain(prompt=prompt, llm=llm)

	# Function to generate summary
	def generate_summary(text):
	summary = llm_chain.run(text)
	return summary.split('SUMMARY:')[-1].strip()

	# Create a Gradio interface
	iface = gr.Interface(fn=generate_summary, inputs="text", outputs="text", title="LLaMA2 Summarizer")
	iface.launch()