0x7o commited on
Commit
b78628d
1 Parent(s): 1b3b9d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -10
app.py CHANGED
@@ -5,12 +5,8 @@ from transformers import StoppingCriteria, StoppingCriteriaList, TextIteratorStr
5
  from threading import Thread
6
 
7
  # Loading the tokenizer and model from Hugging Face's model hub.
8
- tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
9
- model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
10
-
11
- # using CUDA for an optimal experience
12
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13
- model = model.to(device)
14
 
15
 
16
  # Defining a custom stopping criteria class for the model's text generation.
@@ -29,7 +25,7 @@ def predict(message, history):
29
  stop = StopOnTokens()
30
 
31
  # Formatting the input for the model.
32
- messages = "</s>".join(["</s>".join(["\n<|user|>:" + item[0], "\n<|assistant|>:" + item[1]])
33
  for item in history_transformer_format])
34
  model_inputs = tokenizer([messages], return_tensors="pt").to(device)
35
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
@@ -56,7 +52,7 @@ def predict(message, history):
56
 
57
  # Setting up the Gradio chat interface.
58
  gr.ChatInterface(predict,
59
- title="Tinyllama_chatBot",
60
- description="Ask Tiny llama any questions",
61
- examples=['How to cook a fish?', 'Who is the president of US now?']
62
  ).launch() # Launching the web interface.
 
5
  from threading import Thread
6
 
7
  # Loading the tokenizer and model from Hugging Face's model hub.
8
+ tokenizer = AutoTokenizer.from_pretrained("0x7194633/fialka-13B-v3.1")
9
+ model = AutoModelForCausalLM.from_pretrained("0x7194633/fialka-13B-v3.1", load_in_8bit=True).cuda()
 
 
 
 
10
 
11
 
12
  # Defining a custom stopping criteria class for the model's text generation.
 
25
  stop = StopOnTokens()
26
 
27
  # Formatting the input for the model.
28
+ messages = "</s>".join(["</s>".join(["\n<|user|>" + item[0], "\n<|assistant|>" + item[1]])
29
  for item in history_transformer_format])
30
  model_inputs = tokenizer([messages], return_tensors="pt").to(device)
31
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
 
52
 
53
  # Setting up the Gradio chat interface.
54
  gr.ChatInterface(predict,
55
+ title="Fialka 13B v3.1",
56
+ description="Введите ваш запрос",
57
+ examples=['Как приготовить рыбу?', 'Кто президент США?']
58
  ).launch() # Launching the web interface.