code-slicer commited on
Commit
38845c7
ยท
verified ยท
1 Parent(s): 8662746

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -15
app.py CHANGED
@@ -156,6 +156,13 @@ OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
156
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma2:9b")
157
  OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "300"))
158
 
 
 
 
 
 
 
 
159
 
160
  KOREAN_SYSTEM_PROMPT = """๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. ํ•ญ์ƒ ํ•œ๊ตญ์–ด๋กœ ๋‹ตํ•˜์„ธ์š”."""
161
 
@@ -250,16 +257,11 @@ def _call_ollama_chat(messages, model=OLLAMA_MODEL, temperature=0.8, top_p=0.9,
250
 
251
 
252
  def call_ollama_stream(messages, *, model: str = OLLAMA_MODEL,
253
- temperature: float = 0.8, top_p: float = 0.9,
254
- top_k: int = 40, repeat_penalty: float = 1.1,
255
- num_predict: int = 200, num_ctx: int = 2048,
256
  system_prompt: str | None = None):
257
- """
258
- Ollama /api/chat ์ŠคํŠธ๋ฆฌ๋ฐ ์ œ๋„ˆ๋ ˆ์ดํ„ฐ.
259
- Streamlit์—์„œ๋Š” st.write_stream(...)์œผ๋กœ ๋ฐ”๋กœ ์“ธ ์ˆ˜ ์žˆ์Œ.
260
- """
261
  url = f"{OLLAMA_HOST}/api/chat"
262
-
263
  _msgs = []
264
  if system_prompt:
265
  _msgs.append({"role": "system", "content": system_prompt})
@@ -273,10 +275,12 @@ def call_ollama_stream(messages, *, model: str = OLLAMA_MODEL,
273
  "top_p": top_p,
274
  "top_k": top_k,
275
  "repeat_penalty": repeat_penalty,
276
- "num_predict": num_predict, # CPU + 9B๋Š” 128~256 ๊ถŒ์žฅ
277
- "num_ctx": num_ctx # 2048~4096
 
 
278
  },
279
- "stream": True, # โœ… ํ•ต์‹ฌ
280
  }
281
 
282
  with requests.post(url, json=payload, stream=True, timeout=OLLAMA_TIMEOUT) as resp:
@@ -339,18 +343,21 @@ def render_llm_followup(chat_container, inline=False):
339
  st.session_state.setdefault("llm_msgs", [])
340
  st.session_state["llm_msgs"].append({"role": "user", "content": text})
341
 
 
 
 
 
 
342
  # โœ… ์ŠคํŠธ๋ฆฌ๋ฐ ํ˜ธ์ถœ๋กœ ๋ณ€๊ฒฝ
343
  try:
344
  with st.chat_message("assistant"):
345
- # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ + ํžˆ์Šคํ† ๋ฆฌ ๋ชจ๋‘ ๋ณด๋‚ด๊ธฐ
346
- msgs = st.session_state["llm_msgs"]
347
  full_text = st.write_stream(
348
  call_ollama_stream(
349
  msgs,
350
  model=OLLAMA_MODEL,
351
  system_prompt=KOREAN_SYSTEM_PROMPT,
352
- num_predict=200, # ํ•„์š”์‹œ 128~256 ์กฐ์ •
353
- num_ctx=2048
354
  )
355
  )
356
  st.session_state["llm_msgs"].append({"role": "assistant", "content": full_text})
 
156
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma2:9b")
157
  OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "300"))
158
 
159
+ # --- CPU ์†๋„ ํŠœ๋‹ ํŒŒ๋ผ๋ฏธํ„ฐ ---
160
+ FAST_MODE = True
161
+ LLM_THREADS = int(os.getenv("LLM_THREADS", str(os.cpu_count() or 8))) # ํ•„์š”์‹œ ENV๋กœ ๋ฎ์–ด์“ฐ๊ธฐ
162
+ LLM_NUM_PREDICT = 80 if FAST_MODE else 200 # ์ƒ์„ฑ ํ† ํฐ ์ƒํ•œ (CPU 9B๋Š” 80~128์ด ์พŒ์ )
163
+ LLM_NUM_CTX = 1024 if FAST_MODE else 2048 # ์ปจํ…์ŠคํŠธ ์ฐฝ (์ž‘์„์ˆ˜๋ก ๋นจ๋ผ์ง)
164
+ HISTORY_WINDOW = 4 # ์ตœ๊ทผ N๊ฐœ์˜ ๋ฉ”์‹œ์ง€๋งŒ LLM์— ์ „๋‹ฌ
165
+
166
 
167
  KOREAN_SYSTEM_PROMPT = """๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. ํ•ญ์ƒ ํ•œ๊ตญ์–ด๋กœ ๋‹ตํ•˜์„ธ์š”."""
168
 
 
257
 
258
 
259
  def call_ollama_stream(messages, *, model: str = OLLAMA_MODEL,
260
+ temperature: float = 0.7, top_p: float = 0.9,
261
+ top_k: int = 20, repeat_penalty: float = 1.1,
262
+ num_predict: int = LLM_NUM_PREDICT, num_ctx: int = LLM_NUM_CTX,
263
  system_prompt: str | None = None):
 
 
 
 
264
  url = f"{OLLAMA_HOST}/api/chat"
 
265
  _msgs = []
266
  if system_prompt:
267
  _msgs.append({"role": "system", "content": system_prompt})
 
275
  "top_p": top_p,
276
  "top_k": top_k,
277
  "repeat_penalty": repeat_penalty,
278
+ "num_predict": num_predict,
279
+ "num_ctx": num_ctx,
280
+ "num_thread": LLM_THREADS, # โœ… CPU ์Šค๋ ˆ๋“œ ์ˆ˜
281
+ # "use_mmap": True, # (์˜ต์…˜) ์ฒซ ํ† ํฐ ์ง€์—ฐ ์ค„์ด๊ธฐ ์‹œ๋„
282
  },
283
+ "stream": True,
284
  }
285
 
286
  with requests.post(url, json=payload, stream=True, timeout=OLLAMA_TIMEOUT) as resp:
 
343
  st.session_state.setdefault("llm_msgs", [])
344
  st.session_state["llm_msgs"].append({"role": "user", "content": text})
345
 
346
+ # โœ… ํžˆ์Šคํ† ๋ฆฌ ์Šฌ๋ผ์ด์Šค(์ตœ๊ทผ N๊ฐœ๋งŒ ์ „์†ก)
347
+ def _last_msgs(n=HISTORY_WINDOW):
348
+ hist = st.session_state["llm_msgs"]
349
+ return hist[-n:] if len(hist) > n else hist
350
+
351
  # โœ… ์ŠคํŠธ๋ฆฌ๋ฐ ํ˜ธ์ถœ๋กœ ๋ณ€๊ฒฝ
352
  try:
353
  with st.chat_message("assistant"):
354
+ msgs = _last_msgs() # โฌ…๏ธ ์—ฌ๊ธฐ!
 
355
  full_text = st.write_stream(
356
  call_ollama_stream(
357
  msgs,
358
  model=OLLAMA_MODEL,
359
  system_prompt=KOREAN_SYSTEM_PROMPT,
360
+ # num_predict/num_ctx๋Š” ๊ธฐ๋ณธ๊ฐ’(์ƒ๋‹จ ์ƒ์ˆ˜) ์‚ฌ์šฉ
 
361
  )
362
  )
363
  st.session_state["llm_msgs"].append({"role": "assistant", "content": full_text})