Spaces:
Sleeping
Sleeping
toaster61
commited on
Commit
·
559ea97
1
Parent(s):
be3d3fd
not working commit
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
import
|
| 2 |
-
from
|
| 3 |
|
| 4 |
app = Quart(__name__)
|
| 5 |
|
|
@@ -8,19 +8,20 @@ with open('system.prompt', 'r', encoding='utf-8') as f:
|
|
| 8 |
|
| 9 |
@app.post("/request")
|
| 10 |
async def echo():
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
output_ids = model.generate(
|
| 17 |
-
input_ids=input_ids,
|
| 18 |
-
do_sample=random.choice([True, False]), temperature=float(random.randint(7,20)) / 10.0,
|
| 19 |
-
max_new_tokens=data.get("max_tokens") or random.randomint(200,500),
|
| 20 |
-
eos_token_id=tokenizer.eos_token_id, return_full_text = False)
|
| 21 |
-
output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 22 |
return {"output": output}
|
| 23 |
|
| 24 |
@app.get("/")
|
| 25 |
async def get():
|
| 26 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from quart import Quart, request
|
| 2 |
+
from llama_cpp import Llama
|
| 3 |
|
| 4 |
app = Quart(__name__)
|
| 5 |
|
|
|
|
| 8 |
|
| 9 |
@app.post("/request")
|
| 10 |
async def echo():
|
| 11 |
+
try:
|
| 12 |
+
data = await request.get_json()
|
| 13 |
+
if data.get("max_tokens") != None and data.get("max_tokens") > 500: data['max_tokens'] = 500
|
| 14 |
+
userPrompt = prompt + "\n\nUser: " + data['request'] + "\nAssistant: "
|
| 15 |
+
except: return {"error": "Not enough data"}, 400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
return {"output": output}
|
| 17 |
|
| 18 |
@app.get("/")
|
| 19 |
async def get():
|
| 20 |
+
return '''<h1>Hello, world!</h1>
|
| 21 |
+
This is showcase how to make own server with OpenBuddy's model.<br>
|
| 22 |
+
I'm using here 3b model just for example. Also here's only CPU power.<br>
|
| 23 |
+
But you can use GPU power as well!<br>
|
| 24 |
+
<br>
|
| 25 |
+
<h1>How to GPU?</h1>
|
| 26 |
+
|
| 27 |
+
'''
|