Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -2,9 +2,11 @@ import gradio as gr
|
|
2 |
import spaces
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
from llama_cpp import Llama
|
|
|
5 |
|
6 |
MAX_INPUT_LIMIT = 3584
|
7 |
MAX_NEW_TOKENS = 1536
|
|
|
8 |
MODEL_REPO = "Azure99/blossom-v5.1-34b-gguf"
|
9 |
MODEL_FILE = "model-q6_k.gguf"
|
10 |
MODEL_LOCAL_DIR = "./"
|
@@ -16,22 +18,24 @@ hf_hub_download(
|
|
16 |
)
|
17 |
|
18 |
llm: Llama = None
|
|
|
19 |
|
20 |
|
21 |
-
def get_input_ids(
|
22 |
-
prefix = "A chat between a human and an artificial intelligence bot.
|
|
|
23 |
patterns = []
|
24 |
for conv in history:
|
25 |
patterns.append(f'\n|Human|: {conv[0]}\n|Bot|: ')
|
26 |
patterns.append(f'{conv[1]}')
|
27 |
-
patterns.append(f'\n|Human|: {
|
28 |
patterns[0] = prefix + patterns[0]
|
29 |
|
30 |
input_ids = []
|
31 |
for i, pattern in enumerate(patterns):
|
32 |
-
input_ids +=
|
33 |
if i % 2 == 1:
|
34 |
-
input_ids += [
|
35 |
return input_ids
|
36 |
|
37 |
|
|
|
2 |
import spaces
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
from llama_cpp import Llama
|
5 |
+
from transformers import AutoTokenizer
|
6 |
|
7 |
MAX_INPUT_LIMIT = 3584
|
8 |
MAX_NEW_TOKENS = 1536
|
9 |
+
MODEL_HF = "Azure99/blossom-v5.1-34b"
|
10 |
MODEL_REPO = "Azure99/blossom-v5.1-34b-gguf"
|
11 |
MODEL_FILE = "model-q6_k.gguf"
|
12 |
MODEL_LOCAL_DIR = "./"
|
|
|
18 |
)
|
19 |
|
20 |
llm: Llama = None
|
21 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_HF)
|
22 |
|
23 |
|
24 |
+
def get_input_ids(inst, history):
|
25 |
+
prefix = ("A chat between a human and an artificial intelligence bot. "
|
26 |
+
"The bot gives helpful, detailed, and polite answers to the human's questions.")
|
27 |
patterns = []
|
28 |
for conv in history:
|
29 |
patterns.append(f'\n|Human|: {conv[0]}\n|Bot|: ')
|
30 |
patterns.append(f'{conv[1]}')
|
31 |
+
patterns.append(f'\n|Human|: {inst}\n|Bot|: ')
|
32 |
patterns[0] = prefix + patterns[0]
|
33 |
|
34 |
input_ids = []
|
35 |
for i, pattern in enumerate(patterns):
|
36 |
+
input_ids += tokenizer.encode(pattern, add_special_tokens=(i == 0))
|
37 |
if i % 2 == 1:
|
38 |
+
input_ids += [tokenizer.eos_token_id]
|
39 |
return input_ids
|
40 |
|
41 |
|