Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ import subprocess
|
|
5 |
import sys
|
6 |
|
7 |
# Force install the specific transformers version from the GitHub PR
|
8 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "--force-reinstall", "accelerate", "git+https://github.com/Muennighoff/transformers.git@olmoe"])
|
9 |
|
10 |
from transformers import OlmoeForCausalLM, AutoTokenizer
|
11 |
|
@@ -32,6 +32,13 @@ system_prompt = ("Adopt the persona of hilariously pissed off Andrej Karpathy "
|
|
32 |
"while always answering questions in full first principles analysis type of thinking "
|
33 |
"without using any analogies and always showing full working code or output in his answers.")
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
@spaces.GPU
|
36 |
def generate_response(message, history, temperature, max_new_tokens):
|
37 |
if model is None or tokenizer is None:
|
@@ -40,7 +47,7 @@ def generate_response(message, history, temperature, max_new_tokens):
|
|
40 |
messages = [{"role": "system", "content": system_prompt},
|
41 |
{"role": "user", "content": message}]
|
42 |
|
43 |
-
inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(DEVICE)
|
44 |
|
45 |
with torch.no_grad():
|
46 |
generate_ids = model.generate(
|
@@ -86,4 +93,4 @@ with gr.Blocks(css=css) as demo:
|
|
86 |
|
87 |
if __name__ == "__main__":
|
88 |
demo.queue(api_open=True)
|
89 |
-
demo.launch(debug=True, show_api=True, share=True)
|
|
|
5 |
import sys
|
6 |
|
7 |
# Force install the specific transformers version from the GitHub PR
|
8 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "--force-reinstall", "--no-deps", "accelerate", "git+https://github.com/Muennighoff/transformers.git@olmoe"])
|
9 |
|
10 |
from transformers import OlmoeForCausalLM, AutoTokenizer
|
11 |
|
|
|
32 |
"while always answering questions in full first principles analysis type of thinking "
|
33 |
"without using any analogies and always showing full working code or output in his answers.")
|
34 |
|
35 |
+
# Define a chat template
|
36 |
+
chat_template = {
|
37 |
+
"system": "<|system|>{content}<|end|>",
|
38 |
+
"user": "<|user|>{content}<|end|>",
|
39 |
+
"assistant": "<|assistant|>{content}<|end|>",
|
40 |
+
}
|
41 |
+
|
42 |
@spaces.GPU
|
43 |
def generate_response(message, history, temperature, max_new_tokens):
|
44 |
if model is None or tokenizer is None:
|
|
|
47 |
messages = [{"role": "system", "content": system_prompt},
|
48 |
{"role": "user", "content": message}]
|
49 |
|
50 |
+
inputs = tokenizer.apply_chat_template(messages, chat_template=chat_template, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(DEVICE)
|
51 |
|
52 |
with torch.no_grad():
|
53 |
generate_ids = model.generate(
|
|
|
93 |
|
94 |
if __name__ == "__main__":
|
95 |
demo.queue(api_open=True)
|
96 |
+
demo.launch(debug=True, show_api=True, share=True)
|