Spaces:
Sleeping
Sleeping
Chengxb888
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -11,36 +11,15 @@ def greet_json():
|
|
11 |
@app.get("/hello/{msg}")
|
12 |
def say_hello(msg: str):
|
13 |
print("model")
|
14 |
-
|
15 |
-
model = AutoModelForCausalLM.from_pretrained(
|
16 |
-
"
|
17 |
-
device_map="auto",
|
18 |
-
torch_dtype=
|
19 |
-
|
20 |
-
)
|
21 |
print("token & msg")
|
22 |
-
tokenizer =
|
23 |
-
|
24 |
-
messages = [
|
25 |
-
{"role": "system", "content": "You are a helpful AI assistant."},
|
26 |
-
{"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
|
27 |
-
{"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."},
|
28 |
-
{"role": "user", "content": msg},
|
29 |
-
]
|
30 |
-
print("pipe")
|
31 |
-
pipe = pipeline(
|
32 |
-
"text-generation",
|
33 |
-
model=model,
|
34 |
-
tokenizer=tokenizer,
|
35 |
-
)
|
36 |
print("output")
|
37 |
-
|
38 |
-
# "max_new_tokens": 500,
|
39 |
-
# "return_full_text": False,
|
40 |
-
# "temperature": 0.0,
|
41 |
-
# "do_sample": False,
|
42 |
-
# }
|
43 |
-
|
44 |
-
output = pipe(messages) #, **generation_args)
|
45 |
print("complete")
|
46 |
-
return {"message":
|
|
|
11 |
@app.get("/hello/{msg}")
|
12 |
def say_hello(msg: str):
|
13 |
print("model")
|
14 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
|
15 |
+
model = AutoModelForCausalLM.from_pretrained(
|
16 |
+
"google/gemma-2b-it",
|
17 |
+
device_map="auto",
|
18 |
+
torch_dtype=torch.bfloat16
|
19 |
+
)
|
|
|
20 |
print("token & msg")
|
21 |
+
input_ids = tokenizer(msg, return_tensors="pt").to("cpu")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
print("output")
|
23 |
+
outputs = model.generate(**input_ids, max_length=500)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
print("complete")
|
25 |
+
return {"message": tokenizer.decode(outputs[0])}
|