Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,6 @@ import asyncio
|
|
10 |
import random
|
11 |
from io import BytesIO
|
12 |
import requests
|
13 |
-
import tempfile
|
14 |
|
15 |
app = FastAPI()
|
16 |
load_dotenv()
|
@@ -34,21 +33,24 @@ class ModelManager:
|
|
34 |
def load_unified_model(self):
|
35 |
model_configs = [
|
36 |
{
|
37 |
-
"repo_id": "
|
38 |
-
"filename": "
|
39 |
-
"name": "Llama-3.2-3B-Instruct-GGUF"
|
40 |
},
|
41 |
]
|
42 |
|
43 |
models = []
|
44 |
for config in model_configs:
|
45 |
-
with BytesIO() as model_data:
|
46 |
download_url = f"https://huggingface.co/{config['repo_id']}/resolve/main/{config['filename']}"
|
47 |
-
response = requests.get(download_url, headers={"Authorization": f"Bearer {HUGGINGFACE_TOKEN}"})
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
52 |
models.append(model)
|
53 |
|
54 |
self.params["tokens"] = models[0].tokenize(b"Hello")
|
|
|
10 |
import random
|
11 |
from io import BytesIO
|
12 |
import requests
|
|
|
13 |
|
14 |
app = FastAPI()
|
15 |
load_dotenv()
|
|
|
33 |
def load_unified_model(self):
|
34 |
model_configs = [
|
35 |
{
|
36 |
+
"repo_id": "TheBloke/Llama-2-7B-Chat-GGUF",
|
37 |
+
"filename": "llama-2-7b-chat.Q4_K_M.gguf",
|
|
|
38 |
},
|
39 |
]
|
40 |
|
41 |
models = []
|
42 |
for config in model_configs:
|
43 |
+
with BytesIO() as model_data:
|
44 |
download_url = f"https://huggingface.co/{config['repo_id']}/resolve/main/{config['filename']}"
|
45 |
+
response = requests.get(download_url, headers={"Authorization": f"Bearer {HUGGINGFACE_TOKEN}", "stream": True})
|
46 |
+
|
47 |
+
for chunk in response.iter_content(chunk_size=1024*1024):
|
48 |
+
if chunk:
|
49 |
+
model_data.write(chunk)
|
50 |
+
|
51 |
+
model_data.seek(0)
|
52 |
+
|
53 |
+
model = Llama(model_path="", model_data=model_data.read(), **self.params)
|
54 |
models.append(model)
|
55 |
|
56 |
self.params["tokens"] = models[0].tokenize(b"Hello")
|