Spaces:

tkay264
/

test-a

Runtime error

App Files Files Community

test-a / app.py

tkay264

Create app.py

ef7c029 over 1 year ago

raw

history blame contribute delete

2.49 kB

	from huggingface_hub import login
	import torch
	from peft import PeftModel, PeftConfig
	from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
	from transformers import pipeline
	import torch
	from peft import PeftModel
	from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
	from transformers import pipeline

	login("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")

	config = PeftConfig.from_pretrained("tkay264/model-test") #data-tk

	model = AutoModelForCausalLM.from_pretrained(
	"meta-llama/Llama-2-7b-chat-hf",
	#load_in_4bit=True,
	torch_dtype=torch.bfloat16,
	device_map={"": 0}
	)

	model = PeftModel.from_pretrained(model, "tkay264/model-test") #data-tk

	m = model.merge_and_unload()
	tok = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
	tok.bos_token_id = 1

	stop_token_ids = [0]


	from typing import Any, List, Optional
	import gradio as gr
	from langchain.llms.base import LLM
	from langchain.llms.utils import enforce_stop_tokens
	from langchain import PromptTemplate, LLMChain

	# Assuming necessary imports for the transcription model and pipeline are done.

	class HuggingFaceHugs(LLM):
	pipeline: Any

	def __init__(self, model, tokenizer, task="text-generation"):
	super().__init__()
	self.pipeline = pipeline(task, model=model, tokenizer=tokenizer)

	@property
	def _llm_type(self) -> str:
	return "huggingface_hub"

	def _call(self, prompt, stop: Optional[List[str]] = None):
	text = self.pipeline(prompt, max_length=100)[0]['generated_text']
	if stop is not None:
	text = enforce_stop_tokens(text, stop)
	return text[len(prompt):]

	# Adjust the prompt template as needed.
	template = "Question: {input} Answer: "
	prompt = PromptTemplate(template=template, input_variables=["input"])

	# Initialize the HuggingFaceHugs object with your model and tokenizer.
	# Replace 'model_name' and 'tokenizer_name' with your actual model and tokenizer.
	hf_model = HuggingFaceHugs(model=m, tokenizer=tok)

	chain = LLMChain(prompt=prompt, llm=hf_model)

	def echo(text):
	response = chain(text)
	return response

	# Create a Gradio interface with a textbox input and output
	demo = gr.Interface(
	fn=echo,
	inputs=gr.Textbox(placeholder="Enter text here", lines=2),
	outputs="text"
	)

	if __name__ == "__main__":
	demo.launch()