Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -4,12 +4,13 @@ import os
|
|
4 |
import gradio as gr
|
5 |
from dotenv import load_dotenv
|
6 |
from fastapi import FastAPI, Request
|
7 |
-
from fastapi.responses import
|
8 |
import spaces
|
9 |
import asyncio
|
10 |
import random
|
11 |
from io import BytesIO
|
12 |
import requests
|
|
|
13 |
|
14 |
app = FastAPI()
|
15 |
load_dotenv()
|
@@ -27,28 +28,26 @@ class ModelManager:
|
|
27 |
"seed": -1,
|
28 |
"stop": ["</s>"],
|
29 |
"tokens": [],
|
30 |
-
"eos_token": None,
|
31 |
-
"pad_token": None,
|
32 |
}
|
33 |
self.unified_model = self.load_unified_model()
|
34 |
|
35 |
def load_unified_model(self):
|
36 |
model_configs = [
|
37 |
-
{"repo_id": "unsloth/Llama-3.2-3B-Instruct-GGUF", "filename": "Llama-3.2-3B-Instruct-Q4_K_M.gguf", "name": "Llama-3.2-3B-Instruct-GGUF"},
|
38 |
]
|
39 |
|
40 |
models = []
|
41 |
for config in model_configs:
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
45 |
|
46 |
self.params["tokens"] = models[0].tokenize(b"Hello")
|
47 |
-
self.
|
48 |
-
self.params["pad_token"] = models[0].pad_token
|
49 |
-
|
50 |
-
# Placeholder: Replace with your model combination logic
|
51 |
-
self.unified_model = models[0]
|
52 |
return self.unified_model
|
53 |
|
54 |
model_manager = ModelManager()
|
@@ -95,4 +94,4 @@ iface = gr.Interface(
|
|
95 |
)
|
96 |
|
97 |
if __name__ == "__main__":
|
98 |
-
iface.launch()
|
|
|
4 |
import gradio as gr
|
5 |
from dotenv import load_dotenv
|
6 |
from fastapi import FastAPI, Request
|
7 |
+
from fastapi.responses import StreamingResponse
|
8 |
import spaces
|
9 |
import asyncio
|
10 |
import random
|
11 |
from io import BytesIO
|
12 |
import requests
|
13 |
+
import tempfile
|
14 |
|
15 |
app = FastAPI()
|
16 |
load_dotenv()
|
|
|
28 |
"seed": -1,
|
29 |
"stop": ["</s>"],
|
30 |
"tokens": [],
|
|
|
|
|
31 |
}
|
32 |
self.unified_model = self.load_unified_model()
|
33 |
|
34 |
def load_unified_model(self):
|
35 |
model_configs = [
|
36 |
+
{"repo_id": "unsloth/Llama-3.2-3B-Instruct-GGUF", "filename": "Llama-3.2-3B-Instruct-Q4_K_M.gguf", "name": "Llama-3.2-3B-Instruct-GGUF"},
|
37 |
]
|
38 |
|
39 |
models = []
|
40 |
for config in model_configs:
|
41 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
42 |
+
model_path = os.path.join(tmpdir, config['filename'])
|
43 |
+
response = requests.get(f"https://huggingface.co/{config['repo_id']}/resolve/main/{config['filename']}", headers={"Authorization": f"Bearer {HUGGINGFACE_TOKEN}"})
|
44 |
+
with open(model_path, 'wb') as f:
|
45 |
+
f.write(response.content)
|
46 |
+
model = Llama.from_pretrained(model_path, repo_id=config['repo_id'], filename=config['filename'], **self.params)
|
47 |
+
models.append(model)
|
48 |
|
49 |
self.params["tokens"] = models[0].tokenize(b"Hello")
|
50 |
+
self.unified_model = models[0]
|
|
|
|
|
|
|
|
|
51 |
return self.unified_model
|
52 |
|
53 |
model_manager = ModelManager()
|
|
|
94 |
)
|
95 |
|
96 |
if __name__ == "__main__":
|
97 |
+
iface.launch()
|