Qwen1.5-7B-Chat

Runtime error

0x7o commited on Jan 21

Commit

b78628d

•

1 Parent(s): 1b3b9d7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,12 +5,8 @@ from transformers import StoppingCriteria, StoppingCriteriaList, TextIteratorStr
 from threading import Thread
 # Loading the tokenizer and model from Hugging Face's model hub.
-tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-# using CUDA for an optimal experience
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-model = model.to(device)
 # Defining a custom stopping criteria class for the model's text generation.
@@ -29,7 +25,7 @@ def predict(message, history):
     stop = StopOnTokens()
     # Formatting the input for the model.
-    messages = "</s>".join(["</s>".join(["\n<|user|>:" + item[0], "\n<|assistant|>:" + item[1]])
                         for item in history_transformer_format])
     model_inputs = tokenizer([messages], return_tensors="pt").to(device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
@@ -56,7 +52,7 @@ def predict(message, history):
 # Setting up the Gradio chat interface.
 gr.ChatInterface(predict,
-                 title="Tinyllama_chatBot",
-                 description="Ask Tiny llama any questions",
-                 examples=['How to cook a fish?', 'Who is the president of US now?']
                  ).launch()  # Launching the web interface.

 from threading import Thread
 # Loading the tokenizer and model from Hugging Face's model hub.
+tokenizer = AutoTokenizer.from_pretrained("0x7194633/fialka-13B-v3.1")
+model = AutoModelForCausalLM.from_pretrained("0x7194633/fialka-13B-v3.1", load_in_8bit=True).cuda()
 # Defining a custom stopping criteria class for the model's text generation.
     stop = StopOnTokens()
     # Formatting the input for the model.
+    messages = "</s>".join(["</s>".join(["\n<|user|>" + item[0], "\n<|assistant|>" + item[1]])
                         for item in history_transformer_format])
     model_inputs = tokenizer([messages], return_tensors="pt").to(device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
 # Setting up the Gradio chat interface.
 gr.ChatInterface(predict,
+                 title="Fialka 13B v3.1",
+                 description="Введите ваш запрос",
+                 examples=['Как приготовить рыбу?', 'Кто президент США?']
                  ).launch()  # Launching the web interface.