Spaces:
Running
Running
Update src/app.py
Browse filesadded access to custom embedding and small language models
- src/app.py +51 -3
src/app.py
CHANGED
|
@@ -12,12 +12,14 @@ import doc_loader
|
|
| 12 |
import modules.admin_panel as admin_panel
|
| 13 |
|
| 14 |
from openai import OpenAI
|
| 15 |
-
from google import genai
|
| 16 |
-
from google.genai import types
|
| 17 |
from datetime import datetime
|
| 18 |
from test_integration import run_tests
|
| 19 |
from core.QuizEngine import QuizEngine
|
| 20 |
from core.PineconeManager import PineconeManager
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# --- CONFIGURATION ---
|
| 23 |
st.set_page_config(page_title="Navy AI Toolkit", page_icon="⚓", layout="wide")
|
|
@@ -156,7 +158,52 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
|
|
| 156 |
except Exception as e:
|
| 157 |
return f"[OpenAI Error: {e}]", None
|
| 158 |
|
| 159 |
-
# --- ROUTE 3: LOCAL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
else:
|
| 161 |
model_map = {
|
| 162 |
"Granite 4 (IBM)": "granite4:latest",
|
|
@@ -264,6 +311,7 @@ with st.sidebar:
|
|
| 264 |
"Standard (All-MiniLM, 384d)": "sentence-transformers/all-MiniLM-L6-v2",
|
| 265 |
"High-Perf (MPNet, 768d)": "sentence-transformers/all-mpnet-base-v2",
|
| 266 |
"OpenAI Small (1536d)": "text-embedding-3-small"
|
|
|
|
| 267 |
}
|
| 268 |
embed_choice_label = st.selectbox("Select Embedding Model", list(embed_options.keys()))
|
| 269 |
st.session_state.active_embed_model = embed_options[embed_choice_label]
|
|
|
|
| 12 |
import modules.admin_panel as admin_panel
|
| 13 |
|
| 14 |
from openai import OpenAI
|
| 15 |
+
from google import genai
|
| 16 |
+
from google.genai import types
|
| 17 |
from datetime import datetime
|
| 18 |
from test_integration import run_tests
|
| 19 |
from core.QuizEngine import QuizEngine
|
| 20 |
from core.PineconeManager import PineconeManager
|
| 21 |
+
from huggingface_hub import hf_hub_download
|
| 22 |
+
from llama_cpp import Llama
|
| 23 |
|
| 24 |
# --- CONFIGURATION ---
|
| 25 |
st.set_page_config(page_title="Navy AI Toolkit", page_icon="⚓", layout="wide")
|
|
|
|
| 158 |
except Exception as e:
|
| 159 |
return f"[OpenAI Error: {e}]", None
|
| 160 |
|
| 161 |
+
# --- ROUTE 3: CUSTOM LOCAL GGUF (Gemma 2) ---
|
| 162 |
+
elif "Custom Gemma" in model_choice:
|
| 163 |
+
try:
|
| 164 |
+
# 1. Download Model (Cached automatically)
|
| 165 |
+
repo_id = "NavyDevilDoc/navy-custom-models"
|
| 166 |
+
filename = "gemma-2-9b-it.Q4_K_M.gguf"
|
| 167 |
+
|
| 168 |
+
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
| 169 |
+
|
| 170 |
+
# 2. Initialize Llama (The Engine)
|
| 171 |
+
# n_ctx=8192 matches Gemma 2's window. n_threads=8 utilizes your CPU Upgrade.
|
| 172 |
+
llm = Llama(
|
| 173 |
+
model_path=model_path,
|
| 174 |
+
n_ctx=8192,
|
| 175 |
+
n_threads=8,
|
| 176 |
+
verbose=False
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
# 3. Format Prompt for Gemma 2 (It is picky about ChatML/Instruction format)
|
| 180 |
+
# Gemma 2 format: <start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n
|
| 181 |
+
full_prompt = ""
|
| 182 |
+
for m in messages:
|
| 183 |
+
role = "model" if m["role"] == "assistant" else "user"
|
| 184 |
+
full_prompt += f"<start_of_turn>{role}\n{m['content']}<end_of_turn>\n"
|
| 185 |
+
full_prompt += "<start_of_turn>model\n"
|
| 186 |
+
|
| 187 |
+
# 4. Generate
|
| 188 |
+
output = llm(
|
| 189 |
+
full_prompt,
|
| 190 |
+
max_tokens=max_tokens,
|
| 191 |
+
stop=["<end_of_turn>"],
|
| 192 |
+
temperature=0.3
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
response_text = output['choices'][0]['text']
|
| 196 |
+
usage = {
|
| 197 |
+
"input": output['usage']['prompt_tokens'],
|
| 198 |
+
"output": output['usage']['completion_tokens']
|
| 199 |
+
}
|
| 200 |
+
return response_text, usage
|
| 201 |
+
|
| 202 |
+
except Exception as e:
|
| 203 |
+
return f"[GGUF Error: {e}]", None
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
# --- ROUTE 4: LOCAL/OPEN SOURCE ---
|
| 207 |
else:
|
| 208 |
model_map = {
|
| 209 |
"Granite 4 (IBM)": "granite4:latest",
|
|
|
|
| 311 |
"Standard (All-MiniLM, 384d)": "sentence-transformers/all-MiniLM-L6-v2",
|
| 312 |
"High-Perf (MPNet, 768d)": "sentence-transformers/all-mpnet-base-v2",
|
| 313 |
"OpenAI Small (1536d)": "text-embedding-3-small"
|
| 314 |
+
"Custom Navy (BGE, 768d)": "NavyDevilDoc/navy-custom-models/bge-finetuned" # NEW
|
| 315 |
}
|
| 316 |
embed_choice_label = st.selectbox("Select Embedding Model", list(embed_options.keys()))
|
| 317 |
st.session_state.active_embed_model = embed_options[embed_choice_label]
|