Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -156,6 +156,13 @@ OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
|
|
| 156 |
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma2:9b")
|
| 157 |
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "300"))
|
| 158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
KOREAN_SYSTEM_PROMPT = """๋น์ ์ ํ๊ตญ์ด ์ด์์คํดํธ์
๋๋ค. ํญ์ ํ๊ตญ์ด๋ก ๋ตํ์ธ์."""
|
| 161 |
|
|
@@ -250,16 +257,11 @@ def _call_ollama_chat(messages, model=OLLAMA_MODEL, temperature=0.8, top_p=0.9,
|
|
| 250 |
|
| 251 |
|
| 252 |
def call_ollama_stream(messages, *, model: str = OLLAMA_MODEL,
|
| 253 |
-
temperature: float = 0.
|
| 254 |
-
top_k: int =
|
| 255 |
-
num_predict: int =
|
| 256 |
system_prompt: str | None = None):
|
| 257 |
-
"""
|
| 258 |
-
Ollama /api/chat ์คํธ๋ฆฌ๋ฐ ์ ๋๋ ์ดํฐ.
|
| 259 |
-
Streamlit์์๋ st.write_stream(...)์ผ๋ก ๋ฐ๋ก ์ธ ์ ์์.
|
| 260 |
-
"""
|
| 261 |
url = f"{OLLAMA_HOST}/api/chat"
|
| 262 |
-
|
| 263 |
_msgs = []
|
| 264 |
if system_prompt:
|
| 265 |
_msgs.append({"role": "system", "content": system_prompt})
|
|
@@ -273,10 +275,12 @@ def call_ollama_stream(messages, *, model: str = OLLAMA_MODEL,
|
|
| 273 |
"top_p": top_p,
|
| 274 |
"top_k": top_k,
|
| 275 |
"repeat_penalty": repeat_penalty,
|
| 276 |
-
"num_predict": num_predict,
|
| 277 |
-
"num_ctx": num_ctx
|
|
|
|
|
|
|
| 278 |
},
|
| 279 |
-
"stream": True,
|
| 280 |
}
|
| 281 |
|
| 282 |
with requests.post(url, json=payload, stream=True, timeout=OLLAMA_TIMEOUT) as resp:
|
|
@@ -339,18 +343,21 @@ def render_llm_followup(chat_container, inline=False):
|
|
| 339 |
st.session_state.setdefault("llm_msgs", [])
|
| 340 |
st.session_state["llm_msgs"].append({"role": "user", "content": text})
|
| 341 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
# โ
์คํธ๋ฆฌ๋ฐ ํธ์ถ๋ก ๋ณ๊ฒฝ
|
| 343 |
try:
|
| 344 |
with st.chat_message("assistant"):
|
| 345 |
-
|
| 346 |
-
msgs = st.session_state["llm_msgs"]
|
| 347 |
full_text = st.write_stream(
|
| 348 |
call_ollama_stream(
|
| 349 |
msgs,
|
| 350 |
model=OLLAMA_MODEL,
|
| 351 |
system_prompt=KOREAN_SYSTEM_PROMPT,
|
| 352 |
-
|
| 353 |
-
num_ctx=2048
|
| 354 |
)
|
| 355 |
)
|
| 356 |
st.session_state["llm_msgs"].append({"role": "assistant", "content": full_text})
|
|
|
|
| 156 |
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma2:9b")
|
| 157 |
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "300"))
|
| 158 |
|
| 159 |
+
# --- CPU ์๋ ํ๋ ํ๋ผ๋ฏธํฐ ---
|
| 160 |
+
FAST_MODE = True
|
| 161 |
+
LLM_THREADS = int(os.getenv("LLM_THREADS", str(os.cpu_count() or 8))) # ํ์์ ENV๋ก ๋ฎ์ด์ฐ๊ธฐ
|
| 162 |
+
LLM_NUM_PREDICT = 80 if FAST_MODE else 200 # ์์ฑ ํ ํฐ ์ํ (CPU 9B๋ 80~128์ด ์พ์ )
|
| 163 |
+
LLM_NUM_CTX = 1024 if FAST_MODE else 2048 # ์ปจํ
์คํธ ์ฐฝ (์์์๋ก ๋นจ๋ผ์ง)
|
| 164 |
+
HISTORY_WINDOW = 4 # ์ต๊ทผ N๊ฐ์ ๋ฉ์์ง๋ง LLM์ ์ ๋ฌ
|
| 165 |
+
|
| 166 |
|
| 167 |
KOREAN_SYSTEM_PROMPT = """๋น์ ์ ํ๊ตญ์ด ์ด์์คํดํธ์
๋๋ค. ํญ์ ํ๊ตญ์ด๋ก ๋ตํ์ธ์."""
|
| 168 |
|
|
|
|
| 257 |
|
| 258 |
|
| 259 |
def call_ollama_stream(messages, *, model: str = OLLAMA_MODEL,
|
| 260 |
+
temperature: float = 0.7, top_p: float = 0.9,
|
| 261 |
+
top_k: int = 20, repeat_penalty: float = 1.1,
|
| 262 |
+
num_predict: int = LLM_NUM_PREDICT, num_ctx: int = LLM_NUM_CTX,
|
| 263 |
system_prompt: str | None = None):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
url = f"{OLLAMA_HOST}/api/chat"
|
|
|
|
| 265 |
_msgs = []
|
| 266 |
if system_prompt:
|
| 267 |
_msgs.append({"role": "system", "content": system_prompt})
|
|
|
|
| 275 |
"top_p": top_p,
|
| 276 |
"top_k": top_k,
|
| 277 |
"repeat_penalty": repeat_penalty,
|
| 278 |
+
"num_predict": num_predict,
|
| 279 |
+
"num_ctx": num_ctx,
|
| 280 |
+
"num_thread": LLM_THREADS, # โ
CPU ์ค๋ ๋ ์
|
| 281 |
+
# "use_mmap": True, # (์ต์
) ์ฒซ ํ ํฐ ์ง์ฐ ์ค์ด๊ธฐ ์๋
|
| 282 |
},
|
| 283 |
+
"stream": True,
|
| 284 |
}
|
| 285 |
|
| 286 |
with requests.post(url, json=payload, stream=True, timeout=OLLAMA_TIMEOUT) as resp:
|
|
|
|
| 343 |
st.session_state.setdefault("llm_msgs", [])
|
| 344 |
st.session_state["llm_msgs"].append({"role": "user", "content": text})
|
| 345 |
|
| 346 |
+
# โ
ํ์คํ ๋ฆฌ ์ฌ๋ผ์ด์ค(์ต๊ทผ N๊ฐ๋ง ์ ์ก)
|
| 347 |
+
def _last_msgs(n=HISTORY_WINDOW):
|
| 348 |
+
hist = st.session_state["llm_msgs"]
|
| 349 |
+
return hist[-n:] if len(hist) > n else hist
|
| 350 |
+
|
| 351 |
# โ
์คํธ๋ฆฌ๋ฐ ํธ์ถ๋ก ๋ณ๊ฒฝ
|
| 352 |
try:
|
| 353 |
with st.chat_message("assistant"):
|
| 354 |
+
msgs = _last_msgs() # โฌ
๏ธ ์ฌ๊ธฐ!
|
|
|
|
| 355 |
full_text = st.write_stream(
|
| 356 |
call_ollama_stream(
|
| 357 |
msgs,
|
| 358 |
model=OLLAMA_MODEL,
|
| 359 |
system_prompt=KOREAN_SYSTEM_PROMPT,
|
| 360 |
+
# num_predict/num_ctx๋ ๊ธฐ๋ณธ๊ฐ(์๋จ ์์) ์ฌ์ฉ
|
|
|
|
| 361 |
)
|
| 362 |
)
|
| 363 |
st.session_state["llm_msgs"].append({"role": "assistant", "content": full_text})
|