Spaces:
Runtime error
Runtime error
Update main.py
Browse files
main.py
CHANGED
@@ -7,7 +7,19 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
7 |
|
8 |
app = FastAPI()
|
9 |
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
class Item(BaseModel):
|
13 |
prompt: str
|
@@ -46,11 +58,14 @@ def generate(item: Item):
|
|
46 |
)
|
47 |
|
48 |
formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
51 |
|
52 |
-
for response in stream:
|
53 |
-
|
54 |
return output
|
55 |
|
56 |
@app.post("/generate/")
|
|
|
7 |
|
8 |
app = FastAPI()
|
9 |
|
10 |
+
model_id = "mistralai/Mistral-7B-v0.1
|
11 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
13 |
+
|
14 |
+
#prompt = "<s>[INST] Write a tweet on future of AI [/INST]"
|
15 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(0)
|
16 |
+
|
17 |
+
out = model.generate(**inputs, max_new_tokens=250, temperature = 0.6, top_p=0.95, tok_k=40)
|
18 |
+
|
19 |
+
print(tokenizer.decode(out[0], skip_special_tokens=True))
|
20 |
+
|
21 |
+
|
22 |
+
#client = InferenceClient("HFHAB/FinetunedMistralModel")
|
23 |
|
24 |
class Item(BaseModel):
|
25 |
prompt: str
|
|
|
58 |
)
|
59 |
|
60 |
formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
|
61 |
+
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(0)
|
62 |
+
out = model.generate(**inputs, max_new_tokens=250, temperature = 0.6, top_p=0.95, tok_k=40)
|
63 |
+
output = tokenizer.decode(out[0], skip_special_tokens=True)
|
64 |
+
#stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
65 |
+
#output = ""
|
66 |
|
67 |
+
#for response in stream:
|
68 |
+
# output += response.token.text
|
69 |
return output
|
70 |
|
71 |
@app.post("/generate/")
|