zerogpu-2

Running on Zero

rphrp1985 commited on Jun 8

Commit

195b309

•

1 Parent(s): 302faf1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,18 +12,14 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 token=os.getenv('token')
 print('token = ',token)
-model_id = "CohereForAI/c4ai-command-r-plus-4bit"
-## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-bnb_config = BitsAndBytesConfig(load_in_8bit=True)
-# messages = [{"role": "system", "content": system_message}]
 tokenizer = AutoTokenizer.from_pretrained(model_id, token= token)
 model = AutoModelForCausalLM.from_pretrained(model_id, token= token)
 @spaces.GPU(duration=180)
 def respond(
     message,
@@ -34,21 +30,16 @@ def respond(
     top_p,
 ):
     messages = [{"role": "user", "content": "Hello, how are you?"}]
-# Format message with the command-r-plus chat template
     input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
-    ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
     gen_tokens = model.generate(
-        input_ids,
-        max_new_tokens=100,
-        do_sample=True,
-        temperature=0.3,
-        )
     gen_text = tokenizer.decode(gen_tokens[0])
     print(gen_text)
     yield gen_text

 token=os.getenv('token')
 print('token = ',token)
 tokenizer = AutoTokenizer.from_pretrained(model_id, token= token)
 model = AutoModelForCausalLM.from_pretrained(model_id, token= token)
 @spaces.GPU(duration=180)
 def respond(
     message,
     top_p,
 ):
     messages = [{"role": "user", "content": "Hello, how are you?"}]
     input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
     gen_tokens = model.generate(
+    input_ids,
+    max_new_tokens=100,
+    do_sample=True,
+    temperature=0.3,
+    )
     gen_text = tokenizer.decode(gen_tokens[0])
     print(gen_text)
     yield gen_text