Yhhxhfh commited on
Commit
5fead4a
1 Parent(s): d1487c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -13
app.py CHANGED
@@ -4,12 +4,13 @@ import os
4
  import gradio as gr
5
  from dotenv import load_dotenv
6
  from fastapi import FastAPI, Request
7
- from fastapi.responses import JSONResponse, StreamingResponse
8
  import spaces
9
  import asyncio
10
  import random
11
  from io import BytesIO
12
  import requests
 
13
 
14
  app = FastAPI()
15
  load_dotenv()
@@ -27,28 +28,26 @@ class ModelManager:
27
  "seed": -1,
28
  "stop": ["</s>"],
29
  "tokens": [],
30
- "eos_token": None,
31
- "pad_token": None,
32
  }
33
  self.unified_model = self.load_unified_model()
34
 
35
  def load_unified_model(self):
36
  model_configs = [
37
- {"repo_id": "unsloth/Llama-3.2-3B-Instruct-GGUF", "filename": "Llama-3.2-3B-Instruct-Q4_K_M.gguf", "name": "Llama-3.2-3B-Instruct-GGUF"},
38
  ]
39
 
40
  models = []
41
  for config in model_configs:
42
- model_data = BytesIO(requests.get(f"https://huggingface.co/{config['repo_id']}/resolve/main/{config['filename']}", headers={"Authorization": f"Bearer {HUGGINGFACE_TOKEN}"}).content)
43
- model = Llama.from_pretrained(model=model_data, repo_id=config['repo_id'], filename=config['filename'], **self.params)
44
- models.append(model)
 
 
 
 
45
 
46
  self.params["tokens"] = models[0].tokenize(b"Hello")
47
- self.params["eos_token"] = models[0].eos_token
48
- self.params["pad_token"] = models[0].pad_token
49
-
50
- # Placeholder: Replace with your model combination logic
51
- self.unified_model = models[0]
52
  return self.unified_model
53
 
54
  model_manager = ModelManager()
@@ -95,4 +94,4 @@ iface = gr.Interface(
95
  )
96
 
97
  if __name__ == "__main__":
98
- iface.launch()
 
4
  import gradio as gr
5
  from dotenv import load_dotenv
6
  from fastapi import FastAPI, Request
7
+ from fastapi.responses import StreamingResponse
8
  import spaces
9
  import asyncio
10
  import random
11
  from io import BytesIO
12
  import requests
13
+ import tempfile
14
 
15
  app = FastAPI()
16
  load_dotenv()
 
28
  "seed": -1,
29
  "stop": ["</s>"],
30
  "tokens": [],
 
 
31
  }
32
  self.unified_model = self.load_unified_model()
33
 
34
  def load_unified_model(self):
35
  model_configs = [
36
+ {"repo_id": "unsloth/Llama-3.2-3B-Instruct-GGUF", "filename": "Llama-3.2-3B-Instruct-Q4_K_M.gguf", "name": "Llama-3.2-3B-Instruct-GGUF"},
37
  ]
38
 
39
  models = []
40
  for config in model_configs:
41
+ with tempfile.TemporaryDirectory() as tmpdir:
42
+ model_path = os.path.join(tmpdir, config['filename'])
43
+ response = requests.get(f"https://huggingface.co/{config['repo_id']}/resolve/main/{config['filename']}", headers={"Authorization": f"Bearer {HUGGINGFACE_TOKEN}"})
44
+ with open(model_path, 'wb') as f:
45
+ f.write(response.content)
46
+ model = Llama.from_pretrained(model_path, repo_id=config['repo_id'], filename=config['filename'], **self.params)
47
+ models.append(model)
48
 
49
  self.params["tokens"] = models[0].tokenize(b"Hello")
50
+ self.unified_model = models[0]
 
 
 
 
51
  return self.unified_model
52
 
53
  model_manager = ModelManager()
 
94
  )
95
 
96
  if __name__ == "__main__":
97
+ iface.launch()