Spaces:
Sleeping
Sleeping
| from quart import Quart, request | |
| from llama_cpp import Llama | |
| app = Quart(__name__) | |
| with open('system.prompt', 'r', encoding='utf-8') as f: | |
| prompt = f.read() | |
| async def echo(): | |
| try: | |
| data = await request.get_json() | |
| if data.get("max_tokens") != None and data.get("max_tokens") > 500: data['max_tokens'] = 500 | |
| userPrompt = prompt + "\n\nUser: " + data['request'] + "\nAssistant: " | |
| except: return {"error": "Not enough data"}, 400 | |
| return {"output": output} | |
| async def get(): | |
| return '''<h1>Hello, world!</h1> | |
| This is showcase how to make own server with OpenBuddy's model.<br> | |
| I'm using here 3b model just for example. Also here's only CPU power.<br> | |
| But you can use GPU power as well!<br> | |
| <br> | |
| <h1>How to GPU?</h1> | |
| ''' |