toaster61 commited on
Commit
559ea97
1 Parent(s): be3d3fd

not working commit

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -1,5 +1,5 @@
1
- import random
2
- from quart import Quart
3
 
4
  app = Quart(__name__)
5
 
@@ -8,19 +8,20 @@ with open('system.prompt', 'r', encoding='utf-8') as f:
8
 
9
  @app.post("/request")
10
  async def echo():
11
- data = await request.get_json()
12
- if data.get("max_tokens") != None and data.get("max_tokens") > 500: data['max_tokens'] = 500
13
- userPrompt = prompt + "\n\nUser: " + data['request'] + "\nAssistant: "
14
- input_ids = tokenizer.encode(userPrompt, return_tensors='pt')
15
- with torch.no_grad():
16
- output_ids = model.generate(
17
- input_ids=input_ids,
18
- do_sample=random.choice([True, False]), temperature=float(random.randint(7,20)) / 10.0,
19
- max_new_tokens=data.get("max_tokens") or random.randomint(200,500),
20
- eos_token_id=tokenizer.eos_token_id, return_full_text = False)
21
- output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
22
  return {"output": output}
23
 
24
  @app.get("/")
25
  async def get():
26
- return "better to run it on own container"
 
 
 
 
 
 
 
 
1
+ from quart import Quart, request
2
+ from llama_cpp import Llama
3
 
4
  app = Quart(__name__)
5
 
 
8
 
9
  @app.post("/request")
10
  async def echo():
11
+ try:
12
+ data = await request.get_json()
13
+ if data.get("max_tokens") != None and data.get("max_tokens") > 500: data['max_tokens'] = 500
14
+ userPrompt = prompt + "\n\nUser: " + data['request'] + "\nAssistant: "
15
+ except: return {"error": "Not enough data"}, 400
 
 
 
 
 
 
16
  return {"output": output}
17
 
18
  @app.get("/")
19
  async def get():
20
+ return '''<h1>Hello, world!</h1>
21
+ This is showcase how to make own server with OpenBuddy's model.<br>
22
+ I'm using here 3b model just for example. Also here's only CPU power.<br>
23
+ But you can use GPU power as well!<br>
24
+ <br>
25
+ <h1>How to GPU?</h1>
26
+
27
+ '''