Sergidev commited on
Commit
c901280
1 Parent(s): ad3c61d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -2,18 +2,19 @@ from fastapi import FastAPI, Request
2
  from fastapi.responses import HTMLResponse, StreamingResponse
3
  from fastapi.staticfiles import StaticFiles
4
  from modules.pmbl import PMBL
5
- from llama_cpp import Llama
6
 
7
- # Check CUDA availability
8
- llm = Llama(model_path="./PMB-7b.Q6_K.gguf", n_gpu_layers=-1)
9
- print(f"CUDA available: {llm.model.context_params.n_gpu_layers > 0}")
 
10
 
11
  app = FastAPI(docs_url=None, redoc_url=None)
12
 
13
  app.mount("/static", StaticFiles(directory="static"), name="static")
14
  app.mount("/templates", StaticFiles(directory="templates"), name="templates")
15
 
16
- pmbl = PMBL("./PMB-7b.Q6_K.gguf") # Path to your .gguf model
17
 
18
  @app.head("/")
19
  @app.get("/")
 
2
  from fastapi.responses import HTMLResponse, StreamingResponse
3
  from fastapi.staticfiles import StaticFiles
4
  from modules.pmbl import PMBL
5
+ import torch
6
 
7
+ print(f"CUDA available: {torch.cuda.is_available()}")
8
+ print(f"CUDA device count: {torch.cuda.device_count()}")
9
+ if torch.cuda.is_available():
10
+ print(f"CUDA device name: {torch.cuda.get_device_name(0)}")
11
 
12
  app = FastAPI(docs_url=None, redoc_url=None)
13
 
14
  app.mount("/static", StaticFiles(directory="static"), name="static")
15
  app.mount("/templates", StaticFiles(directory="templates"), name="templates")
16
 
17
+ pmbl = PMBL("./PMB-7b.Q6_K.gguf", gpu_layers=50)
18
 
19
  @app.head("/")
20
  @app.get("/")