from huggingface_hub import login import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer from transformers import pipeline import torch from peft import PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer from transformers import pipeline login("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") config = PeftConfig.from_pretrained("tkay264/model-test") #data-tk model = AutoModelForCausalLM.from_pretrained( "meta-llama/Llama-2-7b-chat-hf", #load_in_4bit=True, torch_dtype=torch.bfloat16, device_map={"": 0} ) model = PeftModel.from_pretrained(model, "tkay264/model-test") #data-tk m = model.merge_and_unload() tok = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf") tok.bos_token_id = 1 stop_token_ids = [0] from typing import Any, List, Optional import gradio as gr from langchain.llms.base import LLM from langchain.llms.utils import enforce_stop_tokens from langchain import PromptTemplate, LLMChain # Assuming necessary imports for the transcription model and pipeline are done. class HuggingFaceHugs(LLM): pipeline: Any def __init__(self, model, tokenizer, task="text-generation"): super().__init__() self.pipeline = pipeline(task, model=model, tokenizer=tokenizer) @property def _llm_type(self) -> str: return "huggingface_hub" def _call(self, prompt, stop: Optional[List[str]] = None): text = self.pipeline(prompt, max_length=100)[0]['generated_text'] if stop is not None: text = enforce_stop_tokens(text, stop) return text[len(prompt):] # Adjust the prompt template as needed. template = "Question: {input} Answer: " prompt = PromptTemplate(template=template, input_variables=["input"]) # Initialize the HuggingFaceHugs object with your model and tokenizer. # Replace 'model_name' and 'tokenizer_name' with your actual model and tokenizer. hf_model = HuggingFaceHugs(model=m, tokenizer=tok) chain = LLMChain(prompt=prompt, llm=hf_model) def echo(text): response = chain(text) return response # Create a Gradio interface with a textbox input and output demo = gr.Interface( fn=echo, inputs=gr.Textbox(placeholder="Enter text here", lines=2), outputs="text" ) if __name__ == "__main__": demo.launch()