Update app.py
Browse files
app.py
CHANGED
@@ -3,16 +3,13 @@ from llama_cpp import Llama
|
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
import os
|
5 |
import requests
|
6 |
-
from llama_cpp.server.app import create_app
|
7 |
-
from llama_cpp.server.settings import Settings
|
8 |
-
|
9 |
|
10 |
os.system("ulimit -l unlimited")
|
11 |
|
12 |
app = FastAPI()
|
13 |
|
14 |
hf_hub_download("TheBloke/deepseek-coder-1.3b-base-GGUF", "deepseek-coder-1.3b-base.Q5_K_M.gguf", local_dir="./")
|
15 |
-
|
16 |
model_l = Llama(model_path="./deepseek-coder-1.3b-base.Q5_K_M.gguf", n_ctx=16192, n_gpu_layers=0, n_threads=2, use_mlock=True)
|
17 |
|
18 |
|
@@ -39,13 +36,6 @@ async def completion(request: Request):
|
|
39 |
except:
|
40 |
return {"responses": "Error!"}
|
41 |
|
42 |
-
'''
|
43 |
-
app = create_app(
|
44 |
-
Settings(
|
45 |
-
n_threads=2, # set to number of cpu cores
|
46 |
-
model="./deepseek-coder-1.3b-base.Q5_K_M.gguf",
|
47 |
-
embedding=True
|
48 |
-
))
|
49 |
|
50 |
if __name__ == "__main__":
|
51 |
import uvicorn
|
|
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
import os
|
5 |
import requests
|
|
|
|
|
|
|
6 |
|
7 |
os.system("ulimit -l unlimited")
|
8 |
|
9 |
app = FastAPI()
|
10 |
|
11 |
hf_hub_download("TheBloke/deepseek-coder-1.3b-base-GGUF", "deepseek-coder-1.3b-base.Q5_K_M.gguf", local_dir="./")
|
12 |
+
|
13 |
model_l = Llama(model_path="./deepseek-coder-1.3b-base.Q5_K_M.gguf", n_ctx=16192, n_gpu_layers=0, n_threads=2, use_mlock=True)
|
14 |
|
15 |
|
|
|
36 |
except:
|
37 |
return {"responses": "Error!"}
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
if __name__ == "__main__":
|
41 |
import uvicorn
|