test-a / app.py
tkay264's picture
Create app.py
ef7c029
from huggingface_hub import login
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
from transformers import pipeline
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
from transformers import pipeline
login("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")
config = PeftConfig.from_pretrained("tkay264/model-test") #data-tk
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-chat-hf",
#load_in_4bit=True,
torch_dtype=torch.bfloat16,
device_map={"": 0}
)
model = PeftModel.from_pretrained(model, "tkay264/model-test") #data-tk
m = model.merge_and_unload()
tok = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
tok.bos_token_id = 1
stop_token_ids = [0]
from typing import Any, List, Optional
import gradio as gr
from langchain.llms.base import LLM
from langchain.llms.utils import enforce_stop_tokens
from langchain import PromptTemplate, LLMChain
# Assuming necessary imports for the transcription model and pipeline are done.
class HuggingFaceHugs(LLM):
pipeline: Any
def __init__(self, model, tokenizer, task="text-generation"):
super().__init__()
self.pipeline = pipeline(task, model=model, tokenizer=tokenizer)
@property
def _llm_type(self) -> str:
return "huggingface_hub"
def _call(self, prompt, stop: Optional[List[str]] = None):
text = self.pipeline(prompt, max_length=100)[0]['generated_text']
if stop is not None:
text = enforce_stop_tokens(text, stop)
return text[len(prompt):]
# Adjust the prompt template as needed.
template = "Question: {input} Answer: "
prompt = PromptTemplate(template=template, input_variables=["input"])
# Initialize the HuggingFaceHugs object with your model and tokenizer.
# Replace 'model_name' and 'tokenizer_name' with your actual model and tokenizer.
hf_model = HuggingFaceHugs(model=m, tokenizer=tok)
chain = LLMChain(prompt=prompt, llm=hf_model)
def echo(text):
response = chain(text)
return response
# Create a Gradio interface with a textbox input and output
demo = gr.Interface(
fn=echo,
inputs=gr.Textbox(placeholder="Enter text here", lines=2),
outputs="text"
)
if __name__ == "__main__":
demo.launch()