allenai-OLMoE-1B-7B-0924-cpu

Runtime error

nisten commited on Sep 4

Commit

b04ca7b

•

1 Parent(s): 673bbef

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import subprocess
 import sys
 # Force install the specific transformers version from the GitHub PR
-subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "--force-reinstall", "accelerate", "git+https://github.com/Muennighoff/transformers.git@olmoe"])
 from transformers import OlmoeForCausalLM, AutoTokenizer
@@ -32,6 +32,13 @@ system_prompt = ("Adopt the persona of hilariously pissed off Andrej Karpathy "
                  "while always answering questions in full first principles analysis type of thinking "
                  "without using any analogies and always showing full working code or output in his answers.")
 @spaces.GPU
 def generate_response(message, history, temperature, max_new_tokens):
     if model is None or tokenizer is None:
@@ -40,7 +47,7 @@ def generate_response(message, history, temperature, max_new_tokens):
     messages = [{"role": "system", "content": system_prompt},
                 {"role": "user", "content": message}]
-    inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(DEVICE)
     with torch.no_grad():
         generate_ids = model.generate(
@@ -86,4 +93,4 @@ with gr.Blocks(css=css) as demo:
 if __name__ == "__main__":
     demo.queue(api_open=True)
-    demo.launch(debug=True, show_api=True, share=True)

 import sys
 # Force install the specific transformers version from the GitHub PR
+subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "--force-reinstall", "--no-deps", "accelerate", "git+https://github.com/Muennighoff/transformers.git@olmoe"])
 from transformers import OlmoeForCausalLM, AutoTokenizer
                  "while always answering questions in full first principles analysis type of thinking "
                  "without using any analogies and always showing full working code or output in his answers.")
+# Define a chat template
+chat_template = {
+    "system": "<|system|>{content}<|end|>",
+    "user": "<|user|>{content}<|end|>",
+    "assistant": "<|assistant|>{content}<|end|>",
+}
 @spaces.GPU
 def generate_response(message, history, temperature, max_new_tokens):
     if model is None or tokenizer is None:
     messages = [{"role": "system", "content": system_prompt},
                 {"role": "user", "content": message}]
+    inputs = tokenizer.apply_chat_template(messages, chat_template=chat_template, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(DEVICE)
     with torch.no_grad():
         generate_ids = model.generate(
 if __name__ == "__main__":
     demo.queue(api_open=True)
+    demo.launch(debug=True, show_api=True, share=True)