Spaces:

Chengxb888
/

test

Sleeping

Chengxb888 commited on Jul 16, 2024

Commit

576bbe0

verified ·

1 Parent(s): 47c0611

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,36 +11,15 @@ def greet_json():
 @app.get("/hello/{msg}")
 def say_hello(msg: str):
     print("model")
-    torch.random.manual_seed(0)
-    model = AutoModelForCausalLM.from_pretrained(
-        "microsoft/Phi-3-mini-4k-instruct",
-        device_map="auto",
-        torch_dtype="auto",
-        trust_remote_code=True,
-    )
     print("token & msg")
-    tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")
-    messages = [
-        {"role": "system", "content": "You are a helpful AI assistant."},
-        {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
-        {"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."},
-        {"role": "user", "content": msg},
-    ]
-    print("pipe")
-    pipe = pipeline(
-        "text-generation",
-        model=model,
-        tokenizer=tokenizer,
-    )
     print("output")
- #   generation_args = {
- #       "max_new_tokens": 500,
- #       "return_full_text": False,
- #       "temperature": 0.0,
- #       "do_sample": False,
-#    }
-    output = pipe(messages) #, **generation_args)
     print("complete")
-    return {"message": output[0]['generated_text']}

 @app.get("/hello/{msg}")
 def say_hello(msg: str):
     print("model")
+    tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
+    model = AutoModelForCausalLM.from_pretrained(
+        "google/gemma-2b-it",
+        device_map="auto",
+        torch_dtype=torch.bfloat16
+    )
     print("token & msg")
+    input_ids = tokenizer(msg, return_tensors="pt").to("cpu")
     print("output")
+    outputs = model.generate(**input_ids, max_length=500)
     print("complete")
+    return {"message": tokenizer.decode(outputs[0])}