|
from huggingface_hub import login |
|
import torch |
|
from peft import PeftModel, PeftConfig |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer |
|
from transformers import pipeline |
|
import torch |
|
from peft import PeftModel |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer |
|
from transformers import pipeline |
|
|
|
login("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") |
|
|
|
config = PeftConfig.from_pretrained("tkay264/model-test") |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
"meta-llama/Llama-2-7b-chat-hf", |
|
|
|
torch_dtype=torch.bfloat16, |
|
device_map={"": 0} |
|
) |
|
|
|
model = PeftModel.from_pretrained(model, "tkay264/model-test") |
|
|
|
m = model.merge_and_unload() |
|
tok = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf") |
|
tok.bos_token_id = 1 |
|
|
|
stop_token_ids = [0] |
|
|
|
|
|
from typing import Any, List, Optional |
|
import gradio as gr |
|
from langchain.llms.base import LLM |
|
from langchain.llms.utils import enforce_stop_tokens |
|
from langchain import PromptTemplate, LLMChain |
|
|
|
|
|
|
|
class HuggingFaceHugs(LLM): |
|
pipeline: Any |
|
|
|
def __init__(self, model, tokenizer, task="text-generation"): |
|
super().__init__() |
|
self.pipeline = pipeline(task, model=model, tokenizer=tokenizer) |
|
|
|
@property |
|
def _llm_type(self) -> str: |
|
return "huggingface_hub" |
|
|
|
def _call(self, prompt, stop: Optional[List[str]] = None): |
|
text = self.pipeline(prompt, max_length=100)[0]['generated_text'] |
|
if stop is not None: |
|
text = enforce_stop_tokens(text, stop) |
|
return text[len(prompt):] |
|
|
|
|
|
template = "Question: {input} Answer: " |
|
prompt = PromptTemplate(template=template, input_variables=["input"]) |
|
|
|
|
|
|
|
hf_model = HuggingFaceHugs(model=m, tokenizer=tok) |
|
|
|
chain = LLMChain(prompt=prompt, llm=hf_model) |
|
|
|
def echo(text): |
|
response = chain(text) |
|
return response |
|
|
|
|
|
demo = gr.Interface( |
|
fn=echo, |
|
inputs=gr.Textbox(placeholder="Enter text here", lines=2), |
|
outputs="text" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|
|
|
|
|