Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	
		toaster61
		
	commited on
		
		
					Commit 
							
							·
						
						559ea97
	
1
								Parent(s):
							
							be3d3fd
								
not working commit
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 | 
            -
            import  | 
| 2 | 
            -
            from  | 
| 3 |  | 
| 4 | 
             
            app = Quart(__name__)
         | 
| 5 |  | 
| @@ -8,19 +8,20 @@ with open('system.prompt', 'r', encoding='utf-8') as f: | |
| 8 |  | 
| 9 | 
             
            @app.post("/request")
         | 
| 10 | 
             
            async def echo():
         | 
| 11 | 
            -
                 | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 14 | 
            -
             | 
| 15 | 
            -
                 | 
| 16 | 
            -
                    output_ids = model.generate(
         | 
| 17 | 
            -
                        input_ids=input_ids, 
         | 
| 18 | 
            -
                        do_sample=random.choice([True, False]), temperature=float(random.randint(7,20)) / 10.0,
         | 
| 19 | 
            -
                        max_new_tokens=data.get("max_tokens") or random.randomint(200,500), 
         | 
| 20 | 
            -
                        eos_token_id=tokenizer.eos_token_id, return_full_text = False)
         | 
| 21 | 
            -
                    output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
         | 
| 22 | 
             
                return {"output": output}
         | 
| 23 |  | 
| 24 | 
             
            @app.get("/")
         | 
| 25 | 
             
            async def get():
         | 
| 26 | 
            -
                return  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from quart import Quart, request
         | 
| 2 | 
            +
            from llama_cpp import Llama
         | 
| 3 |  | 
| 4 | 
             
            app = Quart(__name__)
         | 
| 5 |  | 
|  | |
| 8 |  | 
| 9 | 
             
            @app.post("/request")
         | 
| 10 | 
             
            async def echo():
         | 
| 11 | 
            +
                try:
         | 
| 12 | 
            +
                    data = await request.get_json()
         | 
| 13 | 
            +
                    if data.get("max_tokens") != None and data.get("max_tokens") > 500: data['max_tokens'] = 500
         | 
| 14 | 
            +
                    userPrompt = prompt + "\n\nUser: " + data['request'] + "\nAssistant: "
         | 
| 15 | 
            +
                except: return {"error": "Not enough data"}, 400
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 16 | 
             
                return {"output": output}
         | 
| 17 |  | 
| 18 | 
             
            @app.get("/")
         | 
| 19 | 
             
            async def get():
         | 
| 20 | 
            +
                return '''<h1>Hello, world!</h1>
         | 
| 21 | 
            +
            This is showcase how to make own server with OpenBuddy's model.<br>
         | 
| 22 | 
            +
            I'm using here 3b model just for example. Also here's only CPU power.<br>
         | 
| 23 | 
            +
            But you can use GPU power as well!<br>
         | 
| 24 | 
            +
            <br>
         | 
| 25 | 
            +
            <h1>How to GPU?</h1>
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            '''
         |