gbrabbit commited on
Commit
dd1d3d2
ยท
1 Parent(s): cbf7778

Auto commit at 24-2025-08 9:25:04

Browse files
lily_llm_api/api/routers/document_router.py CHANGED
@@ -5,11 +5,14 @@ from fastapi import APIRouter, HTTPException, UploadFile, File, Form
5
  from typing import Optional, List
6
  import logging
7
  import time
 
 
8
 
9
  from ...models.schemas import (
10
  DocumentUploadResponse, RAGQueryRequest, RAGQueryResponse,
11
  DocumentProcessResponse, MultimodalRAGResponse
12
  )
 
13
 
14
  logger = logging.getLogger(__name__)
15
  router = APIRouter()
@@ -24,27 +27,36 @@ async def upload_document(
24
  try:
25
  start_time = time.time()
26
 
27
- # ํŒŒ์ผ ์ฝ๊ธฐ
28
  content = await file.read()
29
  filename = file.filename
 
 
 
 
 
 
30
 
31
- # ๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ธฐ ์‚ฌ์šฉ
32
  try:
33
- from lily_llm_core.document_processor import document_processor
34
-
35
- # ๋ฌธ์„œ ์ฒ˜๋ฆฌ
36
- result = document_processor.process_document(
37
- content=content,
38
- filename=filename,
39
  user_id=user_id,
40
- room_id=room_id
 
41
  )
 
 
 
 
 
42
 
43
  if result.get("success"):
44
  processing_time = time.time() - start_time
45
  return DocumentUploadResponse(
46
  success=True,
47
- document_id=result.get("document_id", "unknown"),
48
  message="๋ฌธ์„œ ์—…๋กœ๋“œ ๋ฐ ์ฒ˜๋ฆฌ ์™„๋ฃŒ",
49
  chunks=result.get("chunks", 0),
50
  latex_count=result.get("latex_count", 0),
@@ -59,12 +71,26 @@ async def upload_document(
59
  )
60
 
61
  except ImportError:
62
- return DocumentUploadResponse(
63
- success=False,
64
- document_id="",
65
- message="๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ธฐ import ์‹คํŒจ",
66
- error="Document processor not available"
67
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  except Exception as e:
70
  logger.error(f"๋ฌธ์„œ ์—…๋กœ๋“œ ์‹คํŒจ: {e}")
@@ -269,23 +295,29 @@ async def batch_process_documents(
269
  results = []
270
 
271
  try:
272
- from lily_llm_core.document_processor import document_processor
273
 
274
  for file in files:
275
  content = await file.read()
276
  filename = file.filename
277
-
278
- result = document_processor.process_document(
279
- content=content,
280
- filename=filename,
 
 
 
 
 
281
  user_id=user_id,
282
- room_id=room_id
 
283
  )
284
 
285
  results.append({
286
  "filename": filename,
287
  "success": result.get("success", False),
288
- "document_id": result.get("document_id", ""),
289
  "chunks": result.get("chunks", 0),
290
  "error": result.get("error")
291
  })
@@ -299,7 +331,35 @@ async def batch_process_documents(
299
  }
300
 
301
  except ImportError:
302
- raise HTTPException(status_code=500, detail="Document processor not available")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
  except Exception as e:
305
  logger.error(f"์ผ๊ด„ ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
@@ -350,11 +410,15 @@ async def upload_multimodal_document(
350
 
351
  try:
352
  from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
353
-
354
- # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ฌธ์„œ ์ฒ˜๋ฆฌ
 
 
 
 
 
355
  result = hybrid_rag_processor.process_document(
356
- content=content,
357
- filename=filename,
358
  user_id=user_id,
359
  room_id=room_id
360
  )
 
5
  from typing import Optional, List
6
  import logging
7
  import time
8
+ import os
9
+ import uuid
10
 
11
  from ...models.schemas import (
12
  DocumentUploadResponse, RAGQueryRequest, RAGQueryResponse,
13
  DocumentProcessResponse, MultimodalRAGResponse
14
  )
15
+ from ...services.session_registry import set_user_for_room
16
 
17
  logger = logging.getLogger(__name__)
18
  router = APIRouter()
 
27
  try:
28
  start_time = time.time()
29
 
30
+ # ํŒŒ์ผ ์ฝ๊ธฐ ๋ฐ ์ž„์‹œ ์ €์žฅ (ํŒŒ์ผ ๊ฒฝ๋กœ ๊ธฐ๋ฐ˜ ์ฒ˜๋ฆฌ๊ธฐ ํ˜ธํ™˜)
31
  content = await file.read()
32
  filename = file.filename
33
+ temp_dir = os.path.join("data", "uploads")
34
+ os.makedirs(temp_dir, exist_ok=True)
35
+ temp_name = f"{int(time.time()*1000)}_{uuid.uuid4().hex}_{filename}"
36
+ temp_path = os.path.join(temp_dir, temp_name)
37
+ with open(temp_path, "wb") as f:
38
+ f.write(content)
39
 
40
+ # ๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ธฐ ์‚ฌ์šฉ (์šฐ์„  RAG์— ์ €์žฅ ํฌํ•จ ๊ฒฝ๋กœ)
41
  try:
42
+ from lily_llm_core.rag_processor import rag_processor
43
+ document_id = f"doc_{int(time.time()*1000)}_{uuid.uuid4().hex}"
44
+ result = rag_processor.process_and_store_document(
 
 
 
45
  user_id=user_id,
46
+ document_id=document_id,
47
+ file_path=temp_path,
48
  )
49
+ # ์—…๋กœ๋“œ ์‹œ ๋ฐฉ-์‚ฌ์šฉ์ž ๋งคํ•‘ ์ €์žฅ (ํ›„์† ์ƒ์„ฑ์—์„œ ์ž๋™ ๋ณด์ •)
50
+ try:
51
+ set_user_for_room(room_id, user_id)
52
+ except Exception:
53
+ pass
54
 
55
  if result.get("success"):
56
  processing_time = time.time() - start_time
57
  return DocumentUploadResponse(
58
  success=True,
59
+ document_id=result.get("document_id", document_id),
60
  message="๋ฌธ์„œ ์—…๋กœ๋“œ ๋ฐ ์ฒ˜๋ฆฌ ์™„๋ฃŒ",
61
  chunks=result.get("chunks", 0),
62
  latex_count=result.get("latex_count", 0),
 
71
  )
72
 
73
  except ImportError:
74
+ # ํด๋ฐฑ: ์ˆœ์ˆ˜ ๋ฌธ์„œ ํŒŒ์„œ๋กœ ์ฒ˜๋ฆฌ๋งŒ ์ˆ˜ํ–‰
75
+ try:
76
+ from lily_llm_core.document_processor import document_processor
77
+ docs = document_processor.process_document(temp_path)
78
+ processing_time = time.time() - start_time
79
+ return DocumentUploadResponse(
80
+ success=True,
81
+ document_id="",
82
+ message="๋ฌธ์„œ ์—…๋กœ๋“œ ๋ฐ ์ฒ˜๋ฆฌ ์™„๋ฃŒ (๋ฒกํ„ฐ ์ €์žฅ ๋ฏธ์ˆ˜ํ–‰)",
83
+ chunks=len(docs) if docs else 0,
84
+ latex_count=0,
85
+ auto_response=None
86
+ )
87
+ except Exception as e:
88
+ return DocumentUploadResponse(
89
+ success=False,
90
+ document_id="",
91
+ message="๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ธฐ import ์‹คํŒจ",
92
+ error=str(e)
93
+ )
94
 
95
  except Exception as e:
96
  logger.error(f"๋ฌธ์„œ ์—…๋กœ๋“œ ์‹คํŒจ: {e}")
 
295
  results = []
296
 
297
  try:
298
+ from lily_llm_core.rag_processor import rag_processor
299
 
300
  for file in files:
301
  content = await file.read()
302
  filename = file.filename
303
+ # ์ž„์‹œ ์ €์žฅ ํ›„ RAG์— ์ €์žฅ ํฌํ•จ ์ฒ˜๋ฆฌ
304
+ temp_dir = os.path.join("data", "uploads")
305
+ os.makedirs(temp_dir, exist_ok=True)
306
+ temp_name = f"{int(time.time()*1000)}_{uuid.uuid4().hex}_{filename}"
307
+ temp_path = os.path.join(temp_dir, temp_name)
308
+ with open(temp_path, "wb") as f:
309
+ f.write(content)
310
+ document_id = f"doc_{int(time.time()*1000)}_{uuid.uuid4().hex}"
311
+ result = rag_processor.process_and_store_document(
312
  user_id=user_id,
313
+ document_id=document_id,
314
+ file_path=temp_path,
315
  )
316
 
317
  results.append({
318
  "filename": filename,
319
  "success": result.get("success", False),
320
+ "document_id": result.get("document_id", document_id),
321
  "chunks": result.get("chunks", 0),
322
  "error": result.get("error")
323
  })
 
331
  }
332
 
333
  except ImportError:
334
+ # ํด๋ฐฑ: ์ €์žฅ ์—†์ด ์ฒ˜๋ฆฌ๋งŒ ์ˆ˜ํ–‰
335
+ try:
336
+ from lily_llm_core.document_processor import document_processor
337
+ for file in files:
338
+ content = await file.read()
339
+ filename = file.filename
340
+ temp_dir = os.path.join("data", "uploads")
341
+ os.makedirs(temp_dir, exist_ok=True)
342
+ temp_name = f"{int(time.time()*1000)}_{uuid.uuid4().hex}_{filename}"
343
+ temp_path = os.path.join(temp_dir, temp_name)
344
+ with open(temp_path, "wb") as f:
345
+ f.write(content)
346
+ docs = document_processor.process_document(temp_path)
347
+ results.append({
348
+ "filename": filename,
349
+ "success": bool(docs),
350
+ "document_id": "",
351
+ "chunks": len(docs) if docs else 0,
352
+ "error": None if docs else "processing failed"
353
+ })
354
+ processing_time = time.time() - start_time
355
+ return {
356
+ "status": "success",
357
+ "results": results,
358
+ "total_files": len(files),
359
+ "processing_time": processing_time
360
+ }
361
+ except Exception as e:
362
+ raise HTTPException(status_code=500, detail=str(e))
363
 
364
  except Exception as e:
365
  logger.error(f"์ผ๊ด„ ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
 
410
 
411
  try:
412
  from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
413
+ # ์ž„์‹œ ์ €์žฅ ํ›„ ํŒŒ์ผ ๊ฒฝ๋กœ ๊ธฐ๋ฐ˜ ์ฒ˜๋ฆฌ
414
+ temp_dir = os.path.join("data", "uploads")
415
+ os.makedirs(temp_dir, exist_ok=True)
416
+ temp_name = f"{int(time.time()*1000)}_{uuid.uuid4().hex}_{filename}"
417
+ temp_path = os.path.join(temp_dir, temp_name)
418
+ with open(temp_path, "wb") as f:
419
+ f.write(content)
420
  result = hybrid_rag_processor.process_document(
421
+ file_path=temp_path,
 
422
  user_id=user_id,
423
  room_id=room_id
424
  )
lily_llm_api/api/routers/generation_router.py CHANGED
@@ -10,6 +10,7 @@ from ...models.schemas import GenerateResponse, MultimodalGenerateResponse
10
  from ...services.generation_service import generate_sync
11
  from ...services.model_service import is_model_loaded
12
  from ...utils.system_utils import select_model_interactive
 
13
 
14
  logger = logging.getLogger(__name__)
15
  router = APIRouter()
@@ -31,12 +32,27 @@ async def generate(request: Request,
31
 
32
  start_time = time.time()
33
 
 
 
 
 
 
 
 
 
 
 
34
  # ์„ธ์…˜ ID๊ฐ€ ์—†์œผ๋ฉด ์ž๋™ ์ƒ์„ฑ (์ฑ„ํŒ…๋ฐฉ๋ณ„ ๊ณ ์œ  ์„ธ์…˜)
35
  if not session_id:
36
- # ์ฑ„ํŒ…๋ฐฉ + ์‚ฌ์šฉ์ž + ํƒ€์ž„์Šคํƒฌํ”„ ๊ธฐ๋ฐ˜์œผ๋กœ ๊ณ ์œ ํ•œ ์„ธ์…˜ ์ƒ์„ฑ
37
  timestamp = int(time.time())
38
  session_id = f"room_{room_id}_user_{user_id}_{timestamp}"
39
  print(f"๐Ÿ” [DEBUG] ์ž๋™ ์„ธ์…˜ ID ์ƒ์„ฑ: {session_id} (์ฑ„ํŒ…๋ฐฉ: {room_id}, ์‚ฌ์šฉ์ž: {user_id})")
 
 
 
 
 
 
40
 
41
  if use_context:
42
  try:
 
10
  from ...services.generation_service import generate_sync
11
  from ...services.model_service import is_model_loaded
12
  from ...utils.system_utils import select_model_interactive
13
+ from ...services.session_registry import get_user_for_room, set_user_for_room, set_user_for_session
14
 
15
  logger = logging.getLogger(__name__)
16
  router = APIRouter()
 
32
 
33
  start_time = time.time()
34
 
35
+ # ์‚ฌ์šฉ์ž๊ฐ€ ๋น„์–ด์žˆ๊ฑฐ๋‚˜ anonymous๋ฉด ๋ฃธ ๊ธฐ๋ฐ˜ ์ตœ๊ทผ ์‚ฌ์šฉ์ž ๋ณด์ •
36
+ if not user_id or user_id == "anonymous":
37
+ try:
38
+ recovered_user = get_user_for_room(room_id)
39
+ if recovered_user:
40
+ print(f"๐Ÿ” [DEBUG] ๋ฃธ ๊ธฐ๋ฐ˜ ์‚ฌ์šฉ์ž ๋ณด์ •: {user_id} -> {recovered_user} (room={room_id})")
41
+ user_id = recovered_user
42
+ except Exception:
43
+ pass
44
+
45
  # ์„ธ์…˜ ID๊ฐ€ ์—†์œผ๋ฉด ์ž๋™ ์ƒ์„ฑ (์ฑ„ํŒ…๋ฐฉ๋ณ„ ๊ณ ์œ  ์„ธ์…˜)
46
  if not session_id:
 
47
  timestamp = int(time.time())
48
  session_id = f"room_{room_id}_user_{user_id}_{timestamp}"
49
  print(f"๐Ÿ” [DEBUG] ์ž๋™ ์„ธ์…˜ ID ์ƒ์„ฑ: {session_id} (์ฑ„ํŒ…๋ฐฉ: {room_id}, ์‚ฌ์šฉ์ž: {user_id})")
50
+ else:
51
+ # ์ œ๊ณต๋œ ์„ธ์…˜์—๋„ ์‚ฌ์šฉ์ž ๋งคํ•‘ ์ €์žฅ
52
+ try:
53
+ set_user_for_session(session_id, user_id)
54
+ except Exception:
55
+ pass
56
 
57
  if use_context:
58
  try:
lily_llm_api/app.py CHANGED
@@ -1490,7 +1490,7 @@ async def manual_cleanup_all_sessions():
1490
  except Exception as e:
1491
  return {"status": "error", "message": str(e)}
1492
 
1493
- @app.post("/generate", response_model=GenerateResponse)
1494
  async def generate(request: Request,
1495
  prompt: str = Form(...),
1496
  image1: UploadFile = File(None),
@@ -1634,7 +1634,7 @@ async def generate_multimodal(prompt: str = Form(...),
1634
 
1635
 
1636
 
1637
- @app.get("/models")
1638
  async def list_models():
1639
  """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก"""
1640
  return {
 
1490
  except Exception as e:
1491
  return {"status": "error", "message": str(e)}
1492
 
1493
+ @app.post("/api/v2/generate", response_model=GenerateResponse)
1494
  async def generate(request: Request,
1495
  prompt: str = Form(...),
1496
  image1: UploadFile = File(None),
 
1634
 
1635
 
1636
 
1637
+ @app.get("/api/v2/models")
1638
  async def list_models():
1639
  """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก"""
1640
  return {
lily_llm_api/models/kanana_1_5_v_3b_instruct.py CHANGED
@@ -246,7 +246,7 @@ class Kanana15V3bInstructProfile:
246
  "<|-im_end|>", # ๐Ÿ”„ ์ž˜๋ชป๋œ ํ† ํฐ ์ถ”๊ฐ€
247
  "<image>",
248
  "user\n",
249
- "assistant\n"
250
  ]
251
 
252
  for pattern in patterns_to_remove:
 
246
  "<|-im_end|>", # ๐Ÿ”„ ์ž˜๋ชป๋œ ํ† ํฐ ์ถ”๊ฐ€
247
  "<image>",
248
  "user\n",
249
+ "assistant\n"
250
  ]
251
 
252
  for pattern in patterns_to_remove:
lily_llm_api/services/generation_service.py CHANGED
@@ -3,13 +3,26 @@ Generation service for Lily LLM API
3
  """
4
  import logging
5
  import time
6
- from typing import Optional, List
 
 
7
  from PIL import Image
8
  import io
9
  import torch
10
 
11
  logger = logging.getLogger(__name__)
12
 
 
 
 
 
 
 
 
 
 
 
 
13
  def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
14
  temperature: Optional[float] = None, top_p: Optional[float] = None,
15
  do_sample: Optional[bool] = None, use_context: bool = True, session_id: str = None,
@@ -21,6 +34,31 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
21
 
22
  current_profile = get_current_profile()
23
  current_model = get_current_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  print(f"๐Ÿ” [DEBUG] generate_sync ์‹œ์ž‘ - prompt ๊ธธ์ด: {len(prompt)}")
26
  print(f"๐Ÿ” [DEBUG] ํ˜„์žฌ ๋กœ๋“œ๋œ ๋ชจ๋ธ: {current_profile.display_name if current_profile else 'None'}")
@@ -47,16 +85,63 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
47
  if image_data_list and len([img for img in image_data_list if img]) > 0:
48
  all_image_data.extend(image_data_list)
49
  print(f"๐Ÿ” [DEBUG] ์ง์ ‘ ์ „๋‹ฌ๋œ ์ด๋ฏธ์ง€ {len(image_data_list)}๊ฐœ ์ถ”๊ฐ€")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
 
 
 
51
  if all_image_data and len([img for img in all_image_data if img]) > 0 and getattr(current_profile, 'multimodal', False):
52
  print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์‹œ์ž‘ - ์ด ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜: {len([img for img in all_image_data if img])}")
53
 
54
  # ๐Ÿ”„ ๊ณต์‹ ๋ฐฉ์‹: ๊ฐ„๋‹จํ•œ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
55
- max_images = min(len(all_image_data), 4)
56
  logger.info(f"๐Ÿ–ผ๏ธ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์‹œ์ž‘... (์ด๋ฏธ์ง€ {max_images}๊ฐœ)")
57
 
58
  try:
59
  metas_list = []
 
60
  for idx, image_bytes in enumerate(all_image_data[:max_images]):
61
  if image_bytes:
62
  try:
@@ -64,7 +149,6 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
64
  # ๐Ÿ”„ ๊ณต์‹ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ์‚ฌ์šฉ
65
  if processor and hasattr(processor, 'image_processor'):
66
  processed = processor.image_processor(pil_image)
67
- all_pixel_values.append(processed["pixel_values"])
68
  metas_list.append(processed.get("image_meta", {}))
69
  else:
70
  logger.warning(f"โš ๏ธ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Œ")
@@ -79,6 +163,28 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
79
  print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ: {combined_image_metas}")
80
  else:
81
  combined_image_metas = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  except Exception as e:
83
  logger.error(f"โŒ ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
84
  combined_image_metas = {}
@@ -103,7 +209,17 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
103
  except Exception as e:
104
  print(f"โš ๏ธ [DEBUG] ์ปจํ…์ŠคํŠธ ๋กœ๋“œ ์‹คํŒจ: {e}")
105
  context_prompt = ""
106
-
 
 
 
 
 
 
 
 
 
 
107
  except Exception as e:
108
  print(f"โš ๏ธ [DEBUG] ์ปจํ…์ŠคํŠธ ๋กœ๋“œ ์‹คํŒจ: {e} (์„ธ์…˜: {session_id})")
109
  context_prompt = ""
@@ -113,9 +229,13 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
113
 
114
  # ๐Ÿ”„ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (๊ณต์‹ ๋ฐฉ์‹)
115
  if all_pixel_values and len(all_pixel_values) > 0:
116
- # ๐Ÿ”„ ๊ณต์‹ Kanana ํ˜•์‹: Human: <image> ํ…์ŠคํŠธ
117
- formatted_prompt = f"Human: <image>{prompt}"
 
 
 
118
  print(f"๐Ÿ” [DEBUG] ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (๊ณต์‹ ํ˜•์‹): {formatted_prompt}")
 
119
  image_processed = True
120
  else:
121
  image_processed = False
@@ -180,32 +300,106 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
180
 
181
  if hasattr(tokenizer, 'encode_prompt'):
182
  print(f"๐Ÿ” [DEBUG] encode_prompt ๋ฉ”์„œ๋“œ ์‚ฌ์šฉ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
- # ์•ˆ์ „ํ•œ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ƒ์„ฑ
185
- safe_image_meta = {
186
- 'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
187
- 'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
188
- }
189
 
 
190
  try:
 
 
 
 
 
 
 
 
 
 
191
  inputs = tokenizer.encode_prompt(
192
  prompt=formatted_prompt,
193
- max_length=2048,
194
- image_meta=safe_image_meta
195
  )
 
196
 
 
197
  if 'seq_length' in inputs:
 
198
  del inputs['seq_length']
199
 
200
- input_ids = inputs['input_ids']
201
- attention_mask = inputs['attention_mask']
 
 
 
 
 
202
 
203
- # ํŠœํ”Œ์ธ ๊ฒฝ์šฐ ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์‚ฌ์šฉ
204
- if isinstance(input_ids, tuple):
205
- input_ids = input_ids[0]
206
- if isinstance(attention_mask, tuple):
207
- attention_mask = attention_mask[0]
208
-
 
 
 
 
 
209
  except Exception as e:
210
  print(f"โŒ [DEBUG] encode_prompt ์‹คํŒจ: {e}, ํด๋ฐฑ ์‚ฌ์šฉ")
211
  # ํด๋ฐฑ: ๊ธฐ๋ณธ ํ† ํฌ๋‚˜์ด์ € ์‚ฌ์šฉ
@@ -257,6 +451,11 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
257
  print(f"๐Ÿ” [DEBUG] ์ตœ์ข… input_ids shape: {input_ids.shape}")
258
  print(f"๐Ÿ” [DEBUG] ์ž…๋ ฅ ํ† ํฐ ์ˆ˜: {input_ids.shape[1]}")
259
 
 
 
 
 
 
260
  # --- 4. ์ƒ์„ฑ ์„ค์ • ---
261
  print(f"๐Ÿ” [DEBUG] ์ƒ์„ฑ ์„ค์ • ๊ตฌ์„ฑ ์‹œ์ž‘")
262
  gen_config = current_profile.get_generation_config()
@@ -316,9 +515,20 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
316
  print(f"๐Ÿ” [DEBUG] ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ถ”๋ก  ์‹คํ–‰")
317
  print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ํ…์„œ ๊ฐœ์ˆ˜: {len(all_pixel_values)}")
318
 
319
- # ์ด๋ฏธ์ง€ ํ…์„œ๋„ ๋””๋ฐ”์ด์Šค ํ™•์ธ
320
  pixel_values = torch.cat(all_pixel_values, dim=0)
321
  print(f"๐Ÿ” [DEBUG] ๊ฒฐํ•ฉ๋œ ์ด๋ฏธ์ง€ ํ…์„œ shape: {pixel_values.shape}")
 
 
 
 
 
 
 
 
 
 
 
322
  print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ํ…์„œ dtype: {pixel_values.dtype}")
323
 
324
  # ๐Ÿ”„ ๋ชจ๋ธ๊ณผ ๋™์ผํ•œ dtype์œผ๋กœ ๋ณ€ํ™˜ (์„ฑ๋Šฅ ์ตœ์ ํ™”)
@@ -342,6 +552,82 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
342
  print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์ด๋ฏธ์ง€ ํ…์„œ dtype: {pixel_values.dtype}")
343
  print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ƒ์„ฑ ์‹œ์ž‘ - ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ")
344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  # LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ์ ์šฉ๋œ ๋ชจ๋ธ์ธ์ง€ ํ™•์ธ
346
  try:
347
  from lily_llm_core.lora_manager import lora_manager
@@ -351,79 +637,69 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
351
  lora_model = lora_manager.get_model()
352
  if lora_model:
353
  print(f"๐Ÿ” [DEBUG] LoRA ๋ชจ๋ธ๋กœ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์‹คํ–‰")
354
- # ๐Ÿ”„ image_metas ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”๊ฐ€ (๊ณต์‹ ๋ฐฉ์‹)
355
- processed_image_metas = {}
356
-
357
- # ๐Ÿ”„ ๊ณต์‹ ๋ฐฉ์‹: vision_grid_thw๋ฅผ ํ…์„œ๋กœ ๋ณ€ํ™˜
358
- if 'vision_grid_thw' in combined_image_metas:
359
- vision_grid = combined_image_metas['vision_grid_thw']
360
- if isinstance(vision_grid, list):
361
- # ๐Ÿ”„ Kanana ๋ชจ๋ธ ์š”๊ตฌ์‚ฌํ•ญ: ๋ฐฐ์น˜ ์ฐจ์›์„ ๋งž์ถค
362
- if len(vision_grid) == 1 and len(vision_grid[0]) == 3:
363
- # [(1, 34, 52)] -> (1, 1, 34, 52) ํ…์„œ๋กœ ๋ณ€ํ™˜ (๋ฐฐ์น˜ ์ฐจ์› ์ถ”๊ฐ€)
364
- t, h, w = vision_grid[0]
365
- # ๐Ÿ”„ 4์ฐจ์› ํ…์„œ๋กœ ๋ณ€ํ™˜: (batch_size, T, H, W) ํ˜•ํƒœ
366
- processed_image_metas['vision_grid_thw'] = torch.tensor([[[t, h, w]]], dtype=torch.long)
367
- print(f"๐Ÿ” [DEBUG] vision_grid_thw ํ…์„œ ๋ณ€ํ™˜: {vision_grid} -> {processed_image_metas['vision_grid_thw'].shape}")
368
- else:
369
- # ๐Ÿ”„ ๋‹ค๋ฅธ ํ˜•ํƒœ์˜ ๊ฒฝ์šฐ ๋ฐฐ์น˜ ์ฐจ์› ์ถ”๊ฐ€
370
- processed_image_metas['vision_grid_thw'] = torch.tensor([vision_grid], dtype=torch.long)
371
- print(f"๐Ÿ” [DEBUG] vision_grid_thw ํ…์„œ ๋ณ€ํ™˜ (๊ธฐ๋ณธ): {vision_grid} -> {processed_image_metas['vision_grid_thw'].shape}")
372
- else:
373
- # ํ…์„œ์ธ ๊ฒฝ์šฐ ๋ฐฐ์น˜ ์ฐจ์› ํ™•์ธ ๋ฐ ์ถ”๊ฐ€
374
- if len(vision_grid.shape) == 3:
375
- processed_image_metas['vision_grid_thw'] = vision_grid.unsqueeze(0)
376
- else:
377
- processed_image_metas['vision_grid_thw'] = vision_grid
378
-
379
- # ๐Ÿ”„ ๋‹ค๋ฅธ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋„ ๋ฐฐ์น˜ ์ฐจ์› ๋งž์ถค
380
- for key, value in combined_image_metas.items():
381
- if key != 'vision_grid_thw':
382
- if isinstance(value, list):
383
- # ๋ฆฌ์ŠคํŠธ์ธ ๊ฒฝ์šฐ ๋ฐฐ์น˜ ์ฐจ์› ์ถ”๊ฐ€
384
- processed_image_metas[key] = [value]
385
- elif isinstance(value, torch.Tensor) and len(value.shape) == 2:
386
- # 2์ฐจ์› ํ…์„œ์ธ ๊ฒฝ์šฐ ๋ฐฐ์น˜ ์ฐจ์› ์ถ”๊ฐ€
387
- processed_image_metas[key] = value.unsqueeze(0)
388
- else:
389
- processed_image_metas[key] = value
390
 
391
  generate_kwargs = {
392
  'input_ids': input_ids,
393
  'attention_mask': attention_mask,
394
  'pixel_values': pixel_values,
395
- 'image_metas': processed_image_metas, # ๐Ÿ”„ ์ฒ˜๋ฆฌ๋œ ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ
396
  **gen_config
397
  }
398
  print(f"๐Ÿ” [DEBUG] LoRA ๋ชจ๋ธ ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ: {list(generate_kwargs.keys())}")
399
  print(f"๐Ÿ” [DEBUG] ์ฒ˜๋ฆฌ๋œ image_metas: {list(processed_image_metas.keys())}")
400
  print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ƒ์„ฑ ์‹œ์ž‘... (ํƒ€์ž„์•„์›ƒ ์—†์Œ)")
401
-
402
- generated_ids = lora_model.generate(**generate_kwargs)
 
 
 
 
 
403
  else:
404
  print(f"โš ๏ธ [DEBUG] LoRA ๋ชจ๋ธ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Œ, ๊ธฐ๋ณธ ๋ชจ๋ธ ์‚ฌ์šฉ")
405
- generated_ids = current_model.generate(
406
- input_ids=input_ids,
407
- attention_mask=attention_mask,
408
- pixel_values=pixel_values,
 
 
409
  **gen_config
410
- )
 
 
 
 
 
411
  else:
412
  print(f"๐Ÿ” [DEBUG] LoRA ์–ด๋Œ‘ํ„ฐ ์—†์Œ (๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ), ๊ธฐ๋ณธ ๋ชจ๋ธ ์‚ฌ์šฉ")
413
- generated_ids = current_model.generate(
414
- input_ids=input_ids,
415
- attention_mask=attention_mask,
416
- pixel_values=pixel_values,
 
 
417
  **gen_config
418
- )
 
 
 
 
 
419
  except ImportError:
420
  print(f"๐Ÿ” [DEBUG] LoRA ์ง€์› ์•ˆ๋จ, ๊ธฐ๋ณธ ๋ชจ๋ธ ์‚ฌ์šฉ")
421
- generated_ids = current_model.generate(
422
- input_ids=input_ids,
423
- attention_mask=attention_mask,
424
- pixel_values=pixel_values,
 
 
425
  **gen_config
426
- )
 
 
 
 
 
427
 
428
  else:
429
  # ํ…์ŠคํŠธ-only: ๊ธฐ์กด ๋ฐฉ์‹
@@ -574,10 +850,40 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
574
  traceback.print_exc()
575
  return {"error": f"Response extraction failed: {str(e)}"}
576
 
577
- # --- 7. ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜ ---
578
  total_time = time.time() - t_tok_start
579
  print(f"๐Ÿ” [DEBUG] ์ „์ฒด ์ฒ˜๋ฆฌ ์™„๋ฃŒ - ์ด ์†Œ์š”์‹œ๊ฐ„: {total_time:.3f}์ดˆ")
580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
581
  return {
582
  "generated_text": response,
583
  "processing_time": total_time,
 
3
  """
4
  import logging
5
  import time
6
+ from typing import Optional, List, Dict
7
+ from pathlib import Path
8
+ from .session_registry import get_user_for_room, get_user_for_session, set_user_for_session
9
  from PIL import Image
10
  import io
11
  import torch
12
 
13
  logger = logging.getLogger(__name__)
14
 
15
+ # ์„ธ์…˜๋ณ„ ์ตœ๊ทผ ์ด๋ฏธ์ง€ ์บ์‹œ (๊ฐ„๋‹จํ•œ ์ธ๋ฉ”๋ชจ๋ฆฌ)
16
+ # ์ฃผ์˜: ํ”„๋กœ์„ธ์Šค ์žฌ์‹œ์ž‘ ์‹œ ์ดˆ๊ธฐํ™”๋จ. ์ตœ๋Œ€ 4์žฅ ๋ณด๊ด€.
17
+ _session_image_cache: Dict[str, List[bytes]] = {}
18
+
19
+ # ์„ ํƒ์ : ๋ฒกํ„ฐ ์Šคํ† ์–ด์—์„œ ์ตœ๊ทผ ๋ฌธ์„œ ์ด๋ฏธ์ง€ ๋ณต๊ตฌ ์ง€์›
20
+ try:
21
+ from lily_llm_core.vector_store_manager import vector_store_manager, SimpleVectorStore
22
+ except Exception:
23
+ vector_store_manager = None
24
+ SimpleVectorStore = None
25
+
26
  def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
27
  temperature: Optional[float] = None, top_p: Optional[float] = None,
28
  do_sample: Optional[bool] = None, use_context: bool = True, session_id: str = None,
 
34
 
35
  current_profile = get_current_profile()
36
  current_model = get_current_model()
37
+
38
+ # ์‚ฌ์šฉ์ž ๋ณด์ •: session/room ๊ธฐ๋ฐ˜ ์ตœ๊ทผ ์‚ฌ์šฉ์ž ๋ณต๊ตฌ
39
+ try:
40
+ if (not user_id) or (user_id == "anonymous"):
41
+ recovered = get_user_for_session(session_id) or get_user_for_room(room_id)
42
+ if recovered:
43
+ print(f"๐Ÿ” [DEBUG] ์‚ฌ์šฉ์ž ๋ณด์ •: {user_id} -> {recovered} (room={room_id}, session={session_id})")
44
+ user_id = recovered
45
+ except Exception:
46
+ pass
47
+
48
+ # ์„ธ์…˜ ID ์ •๊ทœํ™”: ์ œ๊ณต๋˜์ง€ ์•Š๊ฑฐ๋‚˜ ์ผํšŒ์„ฑ์œผ๋กœ ๋ณด์ด๋Š” ๊ฒฝ์šฐ ๋ฃธ/์‚ฌ์šฉ์ž ๊ธฐ๋ฐ˜์œผ๋กœ ๊ณ ์ •
49
+ original_session_id = session_id
50
+ if not session_id or (isinstance(session_id, str) and session_id.startswith("room_") and session_id.count("_") >= 3):
51
+ # ์˜ˆ: room_default_user_anonymous_17559... ํ˜•ํƒœ๋ฅผ ์•ˆ์ •์ ์ธ ํ‚ค๋กœ ์น˜ํ™˜
52
+ stable_user = user_id or "anonymous"
53
+ stable_room = room_id or "default"
54
+ session_id = f"room_{stable_room}_{stable_user}"
55
+ if original_session_id and original_session_id != session_id:
56
+ print(f"๐Ÿ” [DEBUG] ์„ธ์…˜ ID ์ •๊ทœํ™”: {original_session_id} -> {session_id}")
57
+ # ์„ธ์…˜-์‚ฌ์šฉ์ž ๋งคํ•‘ ์ €์žฅ
58
+ try:
59
+ set_user_for_session(session_id, user_id)
60
+ except Exception:
61
+ pass
62
 
63
  print(f"๐Ÿ” [DEBUG] generate_sync ์‹œ์ž‘ - prompt ๊ธธ์ด: {len(prompt)}")
64
  print(f"๐Ÿ” [DEBUG] ํ˜„์žฌ ๋กœ๋“œ๋œ ๋ชจ๋ธ: {current_profile.display_name if current_profile else 'None'}")
 
85
  if image_data_list and len([img for img in image_data_list if img]) > 0:
86
  all_image_data.extend(image_data_list)
87
  print(f"๐Ÿ” [DEBUG] ์ง์ ‘ ์ „๋‹ฌ๋œ ์ด๋ฏธ์ง€ {len(image_data_list)}๊ฐœ ์ถ”๊ฐ€")
88
+ else:
89
+ # ํ˜„์žฌ ์š”์ฒญ์— ์ด๋ฏธ์ง€๊ฐ€ ์—†์œผ๋ฉด ์„ธ์…˜ ์บ์‹œ์—์„œ ๋ณต๊ตฌ ์‹œ๋„
90
+ if session_id and session_id in _session_image_cache and len(_session_image_cache[session_id]) > 0:
91
+ cached_imgs = _session_image_cache[session_id]
92
+ all_image_data.extend(cached_imgs)
93
+ print(f"๐Ÿ” [DEBUG] ์„ธ์…˜ ์บ์‹œ์—์„œ ์ด์ „ ์ด๋ฏธ์ง€ {len(cached_imgs)}๊ฐœ ๋ณต๊ตฌ (์„ธ์…˜: {session_id})")
94
+
95
+ # ์ถ”๊ฐ€ ๋ณต๊ตฌ: ์—ฌ์ „ํžˆ ์ด๋ฏธ์ง€๊ฐ€ ์—†๊ณ  ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ์ด๋ฉด, ์ตœ๊ทผ RAG ๋ฌธ์„œ์—์„œ ์ด๋ฏธ์ง€ ๋ฐ”์ดํŠธ ๋ณต์›
96
+ if (not all_image_data or len([img for img in all_image_data if img]) == 0) and getattr(current_profile, 'multimodal', False):
97
+ try:
98
+ if vector_store_manager is not None:
99
+ # ์‚ฌ์šฉ์ž ๋ฌธ์„œ ๋ชฉ๋ก ๊ฐ€์ ธ์˜ค๊ธฐ (์ตœ์‹ ์ˆœ ์ •๋ ฌ)
100
+ user_docs = vector_store_manager.get_user_documents(user_id)
101
+ if user_docs:
102
+ # last_updated > created_at ์šฐ์„  ์‚ฌ์šฉ
103
+ def _ts(d: Dict):
104
+ return d.get('last_updated') or d.get('created_at') or 0
105
+ user_docs.sort(key=_ts, reverse=True)
106
+ latest_doc_id = user_docs[0].get('document_id')
107
+ base_path = getattr(vector_store_manager, 'base_path', Path('./vector_stores'))
108
+ store_path = Path(base_path) / user_id / latest_doc_id
109
+ if SimpleVectorStore is not None:
110
+ store = SimpleVectorStore.load_local(str(store_path))
111
+ recovered = []
112
+ for doc in getattr(store, 'documents', []) or []:
113
+ try:
114
+ meta = getattr(doc, 'metadata', {}) or {}
115
+ imgs = meta.get('image_data_list')
116
+ if imgs and isinstance(imgs, list):
117
+ # bytes ๋งŒ ํ•„ํ„ฐ๋ง
118
+ recovered.extend([b for b in imgs if isinstance(b, (bytes, bytearray)) and len(b) > 0])
119
+ except Exception:
120
+ continue
121
+ if recovered:
122
+ all_image_data.extend(recovered[:4])
123
+ print(f"๐Ÿ” [DEBUG] RAG์—์„œ ์ด๋ฏธ์ง€ ๋ณต๊ตฌ: {len(recovered)}๊ฐœ (์‚ฌ์šฉ: {len(all_image_data)})")
124
+ else:
125
+ print("โš ๏ธ [DEBUG] SimpleVectorStore ์‚ฌ์šฉ ๋ถˆ๊ฐ€ - ์ด๋ฏธ์ง€ ๋ณต๊ตฌ ์ƒ๋žต")
126
+ else:
127
+ print("โš ๏ธ [DEBUG] ์‚ฌ์šฉ์ž ๋ฌธ์„œ๊ฐ€ ์—†์–ด ์ด๋ฏธ์ง€ ๋ณต๊ตฌ ๋ถˆ๊ฐ€")
128
+ else:
129
+ print("โš ๏ธ [DEBUG] vector_store_manager ๋ฏธ์‚ฌ์šฉ - ์ด๋ฏธ์ง€ ๋ณต๊ตฌ ๋น„ํ™œ์„ฑํ™”")
130
+ except Exception as e:
131
+ print(f"โš ๏ธ [DEBUG] RAG ๊ธฐ๋ฐ˜ ์ด๋ฏธ์ง€ ๋ณต๊ตฌ ์‹คํŒจ: {e}")
132
 
133
+ # ํ•ญ์ƒ ์ฐธ์กฐ ๊ฐ€๋Šฅํ•œ max_images ์ •์˜ (์ด๋ฏธ์ง€ ์—†์œผ๋ฉด 0)
134
+ max_images = min(len([img for img in all_image_data if img]) if all_image_data else 0, 4)
135
+
136
  if all_image_data and len([img for img in all_image_data if img]) > 0 and getattr(current_profile, 'multimodal', False):
137
  print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์‹œ์ž‘ - ์ด ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜: {len([img for img in all_image_data if img])}")
138
 
139
  # ๐Ÿ”„ ๊ณต์‹ ๋ฐฉ์‹: ๊ฐ„๋‹จํ•œ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
 
140
  logger.info(f"๐Ÿ–ผ๏ธ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์‹œ์ž‘... (์ด๋ฏธ์ง€ {max_images}๊ฐœ)")
141
 
142
  try:
143
  metas_list = []
144
+ # ๋จผ์ € ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋งŒ ์ˆ˜์ง‘
145
  for idx, image_bytes in enumerate(all_image_data[:max_images]):
146
  if image_bytes:
147
  try:
 
149
  # ๐Ÿ”„ ๊ณต์‹ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ์‚ฌ์šฉ
150
  if processor and hasattr(processor, 'image_processor'):
151
  processed = processor.image_processor(pil_image)
 
152
  metas_list.append(processed.get("image_meta", {}))
153
  else:
154
  logger.warning(f"โš ๏ธ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Œ")
 
163
  print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ: {combined_image_metas}")
164
  else:
165
  combined_image_metas = {}
166
+
167
+ # ์ด์ œ ์ด๋ฏธ์ง€ ํฌ๊ธฐ๋ฅผ ์กฐ์ •ํ•˜์—ฌ pixel_values ์ƒ์„ฑ
168
+ for idx, image_bytes in enumerate(all_image_data[:max_images]):
169
+ if image_bytes:
170
+ try:
171
+ pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
172
+ # ๐Ÿ”„ ๊ณต์‹ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ์‚ฌ์šฉ
173
+ if processor and hasattr(processor, 'image_processor'):
174
+ # KananaVImageProcessor๋Š” ๊ธฐ๋ณธ ํŒŒ๋ผ๋ฏธํ„ฐ๋งŒ ์ง€์›
175
+ processed = processor.image_processor(pil_image)
176
+ pixel_values = processed["pixel_values"]
177
+ # NOTE: pixel_values ๋ฐ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋Š” ํ”„๋กœ์„ธ์„œ ์‚ฐ์ถœ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ (์ž„์˜ ์กฐ์ • ๊ธˆ์ง€)
178
+ # ๋ชจ๋ธ ๋‚ด๋ถ€ prepare_mm_inputs๊ฐ€ ์ผ๊ด€์„ฑ ์žˆ๊ฒŒ ์ฒ˜๋ฆฌํ•˜๋„๋ก ๋งก๊น€
179
+
180
+ all_pixel_values.append(pixel_values)
181
+ metas_list.append(processed.get("image_meta", {}))
182
+
183
+ # ์ค‘๋ณต ์ฝ”๋“œ ์ œ๊ฑฐ - ์ด๋ฏธ ์œ„์—์„œ ์ฒ˜๋ฆฌ๋จ
184
+ else:
185
+ logger.warning(f"โš ๏ธ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Œ")
186
+ except Exception as e:
187
+ logger.warning(f"โš ๏ธ ์ด๋ฏธ์ง€ {idx} ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
188
  except Exception as e:
189
  logger.error(f"โŒ ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
190
  combined_image_metas = {}
 
209
  except Exception as e:
210
  print(f"โš ๏ธ [DEBUG] ์ปจํ…์ŠคํŠธ ๋กœ๋“œ ์‹คํŒจ: {e}")
211
  context_prompt = ""
212
+ # ์ด๋ฏธ์ง€ ๋ฐ”์ดํŠธ๋ฅผ ์„ธ์…˜ ์บ์‹œ์— ๋ณด๊ด€ (๏ฟฝ๏ฟฝ๏ฟฝ์Œ ํ„ด์— ์žฌ์‚ฌ์šฉ)
213
+ if session_id:
214
+ # ์›๋ณธ ์š”์ฒญ์— ์ด๋ฏธ์ง€๊ฐ€ ์žˆ์—ˆ๋‹ค๋ฉด ๊ทธ๊ฑธ ์šฐ์„  ๋ณด๊ด€, ์—†์œผ๋ฉด ๋ณต๊ตฌ๋œ ์ด๋ฏธ์ง€ ์œ ์ง€
215
+ if image_data_list and len([img for img in image_data_list if img]) > 0:
216
+ _session_image_cache[session_id] = list(image_data_list[:max_images])
217
+ elif session_id not in _session_image_cache:
218
+ _session_image_cache[session_id] = list(all_image_data[:max_images])
219
+ # ์ตœ๋Œ€ 4์žฅ ์ œํ•œ
220
+ _session_image_cache[session_id] = _session_image_cache[session_id][:4]
221
+ print(f"๐Ÿ” [DEBUG] ์„ธ์…˜ ์บ์‹œ ์—…๋ฐ์ดํŠธ: {len(_session_image_cache[session_id])}์žฅ ์ €์žฅ (์„ธ์…˜: {session_id})")
222
+
223
  except Exception as e:
224
  print(f"โš ๏ธ [DEBUG] ์ปจํ…์ŠคํŠธ ๋กœ๋“œ ์‹คํŒจ: {e} (์„ธ์…˜: {session_id})")
225
  context_prompt = ""
 
229
 
230
  # ๐Ÿ”„ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (๊ณต์‹ ๋ฐฉ์‹)
231
  if all_pixel_values and len(all_pixel_values) > 0:
232
+ # ๐Ÿ”„ ๊ณต์‹ Kanana ํ˜•์‹: ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜์— ๋งž๊ฒŒ <image> ํ† ํฐ ์ƒ์„ฑ
233
+ num_images = len(all_pixel_values)
234
+ image_tokens = "<image>" * num_images # ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜๋งŒํผ <image> ํ† ํฐ ์ƒ์„ฑ
235
+ # ๋‹ต๋ณ€ ์œ ๋„๋ฅผ ์œ„ํ•ด Assistant ํ”„๋ฆฌํ”ฝ์Šค ์ถ”๊ฐ€
236
+ formatted_prompt = f"Human: {image_tokens}{prompt}\nAssistant:"
237
  print(f"๐Ÿ” [DEBUG] ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (๊ณต์‹ ํ˜•์‹): {formatted_prompt}")
238
+ print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ํ† ํฐ ์ƒ์„ฑ: {num_images}๊ฐœ ์ด๋ฏธ์ง€ -> {image_tokens}")
239
  image_processed = True
240
  else:
241
  image_processed = False
 
300
 
301
  if hasattr(tokenizer, 'encode_prompt'):
302
  print(f"๐Ÿ” [DEBUG] encode_prompt ๋ฉ”์„œ๋“œ ์‚ฌ์šฉ")
303
+ print(f"๐Ÿ” [DEBUG] combined_image_metas: {combined_image_metas}")
304
+ print(f"๐Ÿ” [DEBUG] ์ด ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜: {len(all_image_data)}")
305
+
306
+ # ๐Ÿ”„ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๊ฒ€์ฆ ๋ฐ ์•ˆ์ „ํ™”
307
+ safe_image_meta = {}
308
+ if combined_image_metas:
309
+ # image_token_thw ๋ฐฐ์—ด ๊ธธ์ด ๊ฒ€์ฆ
310
+ if 'image_token_thw' in combined_image_metas:
311
+ image_token_thw = combined_image_metas['image_token_thw']
312
+ if isinstance(image_token_thw, list) and len(image_token_thw) > 0:
313
+ # ๋ฐฐ์—ด ๊ธธ์ด๊ฐ€ ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜์™€ ์ผ์น˜ํ•˜๋Š”์ง€ ํ™•์ธ
314
+ if len(image_token_thw) == len(all_pixel_values):
315
+ # ๐Ÿ”„ ์ถ”๊ฐ€ ๊ฒ€์ฆ: ๊ฐ ๋ฐฐ์—ด ์š”์†Œ๊ฐ€ ์œ ํšจํ•œ์ง€ ํ™•์ธ
316
+ valid_meta = True
317
+ for i, thw in enumerate(image_token_thw):
318
+ if not isinstance(thw, (list, tuple)) or len(thw) != 3:
319
+ print(f"โš ๏ธ [DEBUG] ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์š”์†Œ {i}๊ฐ€ ์œ ํšจํ•˜์ง€ ์•Š์Œ: {thw}")
320
+ valid_meta = False
321
+ break
322
+
323
+ if valid_meta:
324
+ safe_image_meta = combined_image_metas
325
+ print(f"๐Ÿ” [DEBUG] ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๊ฒ€์ฆ ํ†ต๊ณผ: {len(image_token_thw)}๊ฐœ ์ด๋ฏธ์ง€")
326
+ else:
327
+ print(f"โš ๏ธ [DEBUG] ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์š”์†Œ ๊ฒ€์ฆ ์‹คํŒจ, ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ")
328
+ safe_image_meta = {
329
+ 'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
330
+ 'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
331
+ }
332
+ else:
333
+ print(f"โš ๏ธ [DEBUG] ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๋ถˆ์ผ์น˜: ์ด๋ฏธ์ง€ {len(all_pixel_values)}๊ฐœ, ๋ฉ”ํƒ€ {len(image_token_thw)}๊ฐœ")
334
+ # ์•ˆ์ „ํ•œ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
335
+ safe_image_meta = {
336
+ 'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
337
+ 'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
338
+ }
339
+ else:
340
+ print(f"โš ๏ธ [DEBUG] image_token_thw๊ฐ€ ์œ ํšจํ•˜์ง€ ์•Š์Œ, ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ")
341
+ safe_image_meta = {
342
+ 'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
343
+ 'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
344
+ }
345
+ else:
346
+ print(f"โš ๏ธ [DEBUG] image_token_thw ์—†์Œ, ๊ธฐ๋ณธ๊ฐ’ ์ƒ์„ฑ")
347
+ safe_image_meta = {
348
+ 'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
349
+ 'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
350
+ }
351
+ else:
352
+ print(f"โš ๏ธ [DEBUG] combined_image_metas ์—†์Œ, ๊ธฐ๋ณธ๊ฐ’ ์ƒ์„ฑ")
353
+ safe_image_meta = {
354
+ 'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
355
+ 'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
356
+ }
357
 
358
+ print(f"๐Ÿ” [DEBUG] ์•ˆ์ „ํ™”๋œ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ: {safe_image_meta}")
 
 
 
 
359
 
360
+ # ๐Ÿ”„ ์•ˆ์ „ํ•œ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋กœ encode_prompt ํ˜ธ์ถœ
361
  try:
362
+ # ๐Ÿ”„ ์ถ”๊ฐ€ ์•ˆ์ „์žฅ์น˜: ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๋ณต์‚ฌ๋ณธ ์ƒ์„ฑ
363
+ final_meta = {}
364
+ for key, value in safe_image_meta.items():
365
+ if isinstance(value, list):
366
+ final_meta[key] = value.copy() # ๋ณต์‚ฌ๋ณธ ์ƒ์„ฑ
367
+ else:
368
+ final_meta[key] = value
369
+
370
+ print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ: {final_meta}")
371
+ # ๐Ÿ”„ ๊ณต์‹ ๋ฐฉ์‹: max_length ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”๊ฐ€
372
  inputs = tokenizer.encode_prompt(
373
  prompt=formatted_prompt,
374
+ max_length=2048, # ๊ณต์‹ ์ฝ”๋“œ์™€ ๋™์ผ
375
+ image_meta=final_meta
376
  )
377
+ print(f"๐Ÿ” [DEBUG] encode_prompt ์ถœ๋ ฅ: {list(inputs.keys())}")
378
 
379
+ # ๐Ÿ”„ encode_prompt ์ถœ๋ ฅ ์ •๊ทœํ™” (seq_length ์ œ๊ฑฐ)
380
  if 'seq_length' in inputs:
381
+ print(f"๐Ÿ” [DEBUG] seq_length ์ œ๊ฑฐ๋จ")
382
  del inputs['seq_length']
383
 
384
+ # ๐Ÿ”„ input_ids ์•ˆ์ „ํ•˜๊ฒŒ ์ถ”์ถœ (๊ณต์‹ ๋ฐฉ์‹)
385
+ if isinstance(inputs['input_ids'], tuple):
386
+ print(f"๐Ÿ” [DEBUG] input_ids๊ฐ€ ํŠœํ”Œ์ž„: {len(inputs['input_ids'])}๊ฐœ ์š”์†Œ")
387
+ input_ids = inputs['input_ids'][0] # ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์‚ฌ์šฉ
388
+ print(f"๐Ÿ” [DEBUG] input_ids ํŠœํ”Œ์—์„œ ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์ถ”์ถœ: {input_ids.shape}")
389
+ else:
390
+ input_ids = inputs['input_ids']
391
 
392
+ # ๐Ÿ”„ attention_mask๋„ ์•ˆ์ „ํ•˜๊ฒŒ ์ถ”์ถœ
393
+ if isinstance(inputs['attention_mask'], tuple):
394
+ print(f"๐Ÿ” [DEBUG] attention_mask๊ฐ€ ํŠœํ”Œ์ž„: {len(inputs['attention_mask'])}๊ฐœ ์š”์†Œ")
395
+ attention_mask = inputs['attention_mask'][0] # ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์‚ฌ์šฉ
396
+ print(f"๐Ÿ” [DEBUG] attention_mask ํŠœํ”Œ์—์„œ ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์ถ”์ถœ: {attention_mask.shape}")
397
+ else:
398
+ attention_mask = inputs['attention_mask']
399
+
400
+ # ๐Ÿ”„ ์ตœ์ข… ๊ฒ€์ฆ
401
+ print(f"๐Ÿ” [DEBUG] ์ตœ์ข… input_ids ํƒ€์ž…: {type(input_ids)}, shape: {input_ids.shape}")
402
+ print(f"๐Ÿ” [DEBUG] ์ตœ์ข… attention_mask ํƒ€์ž…: {type(attention_mask)}, shape: {attention_mask.shape}")
403
  except Exception as e:
404
  print(f"โŒ [DEBUG] encode_prompt ์‹คํŒจ: {e}, ํด๋ฐฑ ์‚ฌ์šฉ")
405
  # ํด๋ฐฑ: ๊ธฐ๋ณธ ํ† ํฌ๋‚˜์ด์ € ์‚ฌ์šฉ
 
451
  print(f"๐Ÿ” [DEBUG] ์ตœ์ข… input_ids shape: {input_ids.shape}")
452
  print(f"๐Ÿ” [DEBUG] ์ž…๋ ฅ ํ† ํฐ ์ˆ˜: {input_ids.shape[1]}")
453
 
454
+ # ๐Ÿ”„ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ: -1 ํ† ํฐ์€ ๋ชจ๋ธ ๋‚ด๋ถ€์—์„œ ์‹œ๊ฐ ์ž„๋ฒ ๋”ฉ์œผ๋กœ ๋Œ€์ฒด๋˜๋ฏ€๋กœ ์œ ์ง€
455
+ negative_mask = input_ids < 0
456
+ if negative_mask.any():
457
+ print(f"๐Ÿ” [DEBUG] -1 ํ† ํฐ ์œ ์ง€: {negative_mask.sum().item()}๊ฐœ")
458
+
459
  # --- 4. ์ƒ์„ฑ ์„ค์ • ---
460
  print(f"๐Ÿ” [DEBUG] ์ƒ์„ฑ ์„ค์ • ๊ตฌ์„ฑ ์‹œ์ž‘")
461
  gen_config = current_profile.get_generation_config()
 
515
  print(f"๐Ÿ” [DEBUG] ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ถ”๋ก  ์‹คํ–‰")
516
  print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ํ…์„œ ๊ฐœ์ˆ˜: {len(all_pixel_values)}")
517
 
518
+ # ์ด๋ฏธ์ง€ ํ…์„œ๋Š” ํ”„๋กœ์„ธ์„œ ์‚ฐ์ถœ๊ฐ’์„ ๊ทธ๋Œ€๋กœ ๊ฒฐํ•ฉ (์ž„์˜ ์กฐ์ • ๊ธˆ์ง€)
519
  pixel_values = torch.cat(all_pixel_values, dim=0)
520
  print(f"๐Ÿ” [DEBUG] ๊ฒฐํ•ฉ๋œ ์ด๋ฏธ์ง€ ํ…์„œ shape: {pixel_values.shape}")
521
+ # ๋””๋ฒ„๊น…: ์ด๋ฏธ์ง€๋ณ„ ํ† ํฐ ์˜คํ”„์…‹ ๋ฒ”์œ„ ์ถœ๋ ฅ
522
+ try:
523
+ offsets = []
524
+ start = 0
525
+ for i, img_t in enumerate(all_pixel_values):
526
+ end = start + img_t.shape[0]
527
+ offsets.append((start, end))
528
+ start = end
529
+ print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€๋ณ„ ํ† ํฐ ๋ฒ”์œ„: {offsets}")
530
+ except Exception as _e:
531
+ print(f"โš ๏ธ [DEBUG] ์ด๋ฏธ์ง€ ์˜คํ”„์…‹ ๊ณ„์‚ฐ ์‹คํŒจ: {_e}")
532
  print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ํ…์„œ dtype: {pixel_values.dtype}")
533
 
534
  # ๐Ÿ”„ ๋ชจ๋ธ๊ณผ ๋™์ผํ•œ dtype์œผ๋กœ ๋ณ€ํ™˜ (์„ฑ๋Šฅ ์ตœ์ ํ™”)
 
552
  print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์ด๋ฏธ์ง€ ํ…์„œ dtype: {pixel_values.dtype}")
553
  print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ƒ์„ฑ ์‹œ์ž‘ - ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ")
554
 
555
+ # ๐Ÿ”„ ๊ณตํ†ต ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ (๋ชจ๋“  ๋ชจ๋ธ ๊ฒฝ๋กœ์—์„œ ๋™์ผํ•˜๊ฒŒ)
556
+ processed_image_metas = {}
557
+
558
+ # ๐Ÿ”„ vision_grid_thw๋ฅผ ํ…์„œ๋กœ ๋ณ€ํ™˜ (๋ชจ๋ธ ๋‚ด๋ถ€ ์Šฌ๋ผ์ด์‹ฑ ํ˜ธํ™˜์„ฑ)
559
+ if 'vision_grid_thw' in combined_image_metas:
560
+ vision_grid = combined_image_metas['vision_grid_thw']
561
+ if isinstance(vision_grid, list):
562
+ # ๋ฆฌ์ŠคํŠธ๋ฅผ ํ…์„œ๋กœ ๋ณ€ํ™˜: [(1, 34, 52), (1, 14, 36)] -> tensor([[1, 34, 52], [1, 14, 36]])
563
+ _vg = torch.tensor(vision_grid, dtype=torch.long)
564
+ # rot_pos_emb๋Š” [N,3] ๋˜๋Š” ๋ฆฌ์ŠคํŠธ[(t,h,w)]๋ฅผ ๊ธฐ๋Œ€ํ•˜๋ฏ€๋กœ ๋ฐฐ์น˜ ์ฐจ์› ์—†์ด ์ „๋‹ฌ
565
+ processed_image_metas['vision_grid_thw'] = _vg # [N, 3]
566
+ print(f"๐Ÿ” [DEBUG] ๊ณตํ†ต - vision_grid_thw ํ…์„œ๋กœ ๋ณ€ํ™˜: {processed_image_metas['vision_grid_thw'].shape}")
567
+ else:
568
+ processed_image_metas['vision_grid_thw'] = vision_grid
569
+ print(f"๐Ÿ” [DEBUG] ๊ณตํ†ต - vision_grid_thw ์›๋ณธ ํ˜•ํƒœ ์œ ์ง€: {vision_grid}")
570
+
571
+ # ๐Ÿ”„ ๋‹ค๋ฅธ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋„ ๋ฐฐ์น˜ ์ฐจ์› ๋งž์ถค
572
+ for key, value in combined_image_metas.items():
573
+ if key != 'vision_grid_thw':
574
+ if isinstance(value, list):
575
+ # ๋ฆฌ์ŠคํŠธ์ธ ๊ฒฝ์šฐ ์˜ฌ๋ฐ”๋ฅธ ํ˜•ํƒœ๋กœ ๋ณ€ํ™˜
576
+ if key == 'image_token_thw':
577
+ # image_token_thw๋Š” [1, N, 3]๋กœ ์ „๋‹ฌ (๋ชจ๋ธ ๊ธฐ๋Œ€ ํ˜•ํƒœ)
578
+ _thw = torch.tensor(value, dtype=torch.long)
579
+ processed_image_metas[key] = _thw.unsqueeze(0)
580
+ else:
581
+ # ๋‹ค๋ฅธ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋Š” ๊ธฐ์กด ๋ฐฉ์‹
582
+ processed_image_metas[key] = [value]
583
+ elif isinstance(value, torch.Tensor) and len(value.shape) == 2:
584
+ # 2์ฐจ์› ํ…์„œ์ธ ๊ฒฝ์šฐ ๋ฐฐ์น˜ ์ฐจ์› ์ถ”๊ฐ€
585
+ processed_image_metas[key] = value.unsqueeze(0)
586
+ else:
587
+ processed_image_metas[key] = value
588
+
589
+ # ๐Ÿ”„ ์ฐธ๊ณ  ๋กœ๊ทธ๋งŒ ์ถœ๋ ฅ: ์ด๋ฏธ์ง€ ํ† ํฐ ์ˆ˜ ์ถ”์ • (์กฐ์ •์€ ํ•˜์ง€ ์•Š์Œ)
590
+ if 'image_token_thw' in processed_image_metas:
591
+ image_token_thw = processed_image_metas['image_token_thw']
592
+ if isinstance(image_token_thw, torch.Tensor):
593
+ total_image_tokens = 0
594
+ print(f"๐Ÿ” [DEBUG] image_token_thw shape: {image_token_thw.shape}")
595
+ print(f"๐Ÿ” [DEBUG] image_token_thw ๋‚ด์šฉ: {image_token_thw}")
596
+ for i in range(image_token_thw.shape[0]):
597
+ token_info = image_token_thw[i]
598
+ if len(token_info) == 3:
599
+ t, h, w = token_info
600
+ total_image_tokens += t * h * w
601
+ elif len(token_info) == 2:
602
+ h, w = token_info
603
+ total_image_tokens += h * w
604
+ print(f"๐Ÿ” [DEBUG] ๊ณ„์‚ฐ๋œ ์ด ์ด๋ฏธ์ง€ ํ† ํฐ ์ˆ˜(์ฐธ๊ณ ): {total_image_tokens}")
605
+ if isinstance(total_image_tokens, torch.Tensor):
606
+ total_image_tokens = total_image_tokens.sum().item()
607
+ print(f"๐Ÿ” [DEBUG] pixel_values ๊ธธ์ด: {pixel_values.shape[0]}, ์˜ˆ์ƒ: {total_image_tokens} (์กฐ์ • ์•ˆํ•จ)")
608
+
609
+ # ์•ˆ์ „ ๊ฐ€๋“œ: vision_grid_thw๊ฐ€ [1, N, 3]๋กœ ์˜ค๋ฉด [N, 3]๋กœ ๋ณ€ํ™˜
610
+ try:
611
+ if isinstance(processed_image_metas.get('vision_grid_thw', None), torch.Tensor):
612
+ _vg = processed_image_metas['vision_grid_thw']
613
+ if _vg.dim() == 3 and _vg.shape[0] == 1 and _vg.shape[-1] == 3:
614
+ processed_image_metas['vision_grid_thw'] = _vg.squeeze(0)
615
+ print(f"๐Ÿ” [DEBUG] vision_grid_thw ๋ฐฐ์น˜ ์ฐจ์› ์ œ๊ฑฐ: {processed_image_metas['vision_grid_thw'].shape}")
616
+ except Exception as _e:
617
+ print(f"โš ๏ธ [DEBUG] vision_grid_thw ์ •๊ทœํ™” ์‹คํŒจ: {_e}")
618
+
619
+ # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๊ฒฝ๋กœ๋„ ์บ์‹œ ์‚ฌ์šฉ ํ™œ์„ฑํ™”
620
+ try:
621
+ gen_config['use_cache'] = True
622
+ except Exception:
623
+ pass
624
+
625
+ # ๋ชจ๋ธ eval ๋ชจ๋“œ ์ „ํ™˜ (์„ฑ๋Šฅ/์ผ๊ด€์„ฑ)
626
+ try:
627
+ current_model.eval()
628
+ except Exception:
629
+ pass
630
+
631
  # LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ์ ์šฉ๋œ ๋ชจ๋ธ์ธ์ง€ ํ™•์ธ
632
  try:
633
  from lily_llm_core.lora_manager import lora_manager
 
637
  lora_model = lora_manager.get_model()
638
  if lora_model:
639
  print(f"๐Ÿ” [DEBUG] LoRA ๋ชจ๋ธ๋กœ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์‹คํ–‰")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
 
641
  generate_kwargs = {
642
  'input_ids': input_ids,
643
  'attention_mask': attention_mask,
644
  'pixel_values': pixel_values,
645
+ 'image_metas': processed_image_metas, # ๐Ÿ”„ ๊ณตํ†ต์œผ๋กœ ์ฒ˜๋ฆฌ๋œ ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ
646
  **gen_config
647
  }
648
  print(f"๐Ÿ” [DEBUG] LoRA ๋ชจ๋ธ ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ: {list(generate_kwargs.keys())}")
649
  print(f"๐Ÿ” [DEBUG] ์ฒ˜๋ฆฌ๋œ image_metas: {list(processed_image_metas.keys())}")
650
  print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ƒ์„ฑ ์‹œ์ž‘... (ํƒ€์ž„์•„์›ƒ ์—†์Œ)")
651
+ try:
652
+ lora_model.eval()
653
+ except Exception:
654
+ pass
655
+ import torch as _torch
656
+ with _torch.inference_mode():
657
+ generated_ids = lora_model.generate(**generate_kwargs)
658
  else:
659
  print(f"โš ๏ธ [DEBUG] LoRA ๋ชจ๋ธ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Œ, ๊ธฐ๋ณธ ๋ชจ๋ธ ์‚ฌ์šฉ")
660
+ # ๐Ÿ”„ LoRA ๋ชจ๋ธ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์„ ๋•Œ๋„ ๋™์ผํ•œ ํŒŒ๋ผ๋ฏธํ„ฐ ๊ตฌ์กฐ ์‚ฌ์šฉ (ํ†ต์ผ์„ฑ)
661
+ generate_kwargs = {
662
+ 'input_ids': input_ids,
663
+ 'attention_mask': attention_mask,
664
+ 'pixel_values': pixel_values,
665
+ 'image_metas': processed_image_metas, # ๐Ÿ”„ ๊ณตํ†ต์œผ๋กœ ์ฒ˜๋ฆฌ๋œ ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ
666
  **gen_config
667
+ }
668
+ print(f"๐Ÿ” [DEBUG] LoRA ๋ชจ๋ธ ์—†์Œ ์‹œ ๊ธฐ๋ณธ ๋ชจ๋ธ ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ: {list(generate_kwargs.keys())}")
669
+ print(f"๐Ÿ” [DEBUG] ์ฒ˜๋ฆฌ๋œ image_metas: {list(processed_image_metas.keys())}")
670
+ import torch as _torch
671
+ with _torch.inference_mode():
672
+ generated_ids = current_model.generate(**generate_kwargs)
673
  else:
674
  print(f"๐Ÿ” [DEBUG] LoRA ์–ด๋Œ‘ํ„ฐ ์—†์Œ (๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ), ๊ธฐ๋ณธ ๋ชจ๋ธ ์‚ฌ์šฉ")
675
+ # ๐Ÿ”„ ๊ธฐ๋ณธ ๋ชจ๋ธ๋„ ๋™์ผํ•œ ํŒŒ๋ผ๋ฏธํ„ฐ ๊ตฌ์กฐ ์‚ฌ์šฉ (ํ†ต์ผ์„ฑ)
676
+ generate_kwargs = {
677
+ 'input_ids': input_ids,
678
+ 'attention_mask': attention_mask,
679
+ 'pixel_values': pixel_values,
680
+ 'image_metas': processed_image_metas, # ๐Ÿ”„ ๊ณตํ†ต์œผ๋กœ ์ฒ˜๋ฆฌ๋œ ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ
681
  **gen_config
682
+ }
683
+ print(f"๐Ÿ” [DEBUG] ๊ธฐ๋ณธ ๋ชจ๋ธ ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ: {list(generate_kwargs.keys())}")
684
+ print(f"๐Ÿ” [DEBUG] ์ฒ˜๋ฆฌ๋œ image_metas: {list(processed_image_metas.keys())}")
685
+ import torch as _torch
686
+ with _torch.inference_mode():
687
+ generated_ids = current_model.generate(**generate_kwargs)
688
  except ImportError:
689
  print(f"๐Ÿ” [DEBUG] LoRA ์ง€์› ์•ˆ๋จ, ๊ธฐ๋ณธ ๋ชจ๋ธ ์‚ฌ์šฉ")
690
+ # ๐Ÿ”„ ImportError ๋ฐœ์ƒ ์‹œ์—๋„ ๋™์ผํ•œ ํŒŒ๋ผ๋ฏธํ„ฐ ๊ตฌ์กฐ ์‚ฌ์šฉ (ํ†ต์ผ์„ฑ)
691
+ generate_kwargs = {
692
+ 'input_ids': input_ids,
693
+ 'attention_mask': attention_mask,
694
+ 'pixel_values': pixel_values,
695
+ 'image_metas': processed_image_metas, # ๐Ÿ”„ ๊ณตํ†ต์œผ๋กœ ์ฒ˜๋ฆฌ๋œ ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ
696
  **gen_config
697
+ }
698
+ print(f"๐Ÿ” [DEBUG] ImportError ์‹œ ๊ธฐ๋ณธ ๋ชจ๋ธ ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ: {list(generate_kwargs.keys())}")
699
+ print(f"๐Ÿ” [DEBUG] ์ฒ˜๋ฆฌ๋œ image_metas: {list(processed_image_metas.keys())}")
700
+ import torch as _torch
701
+ with _torch.inference_mode():
702
+ generated_ids = current_model.generate(**generate_kwargs)
703
 
704
  else:
705
  # ํ…์ŠคํŠธ-only: ๊ธฐ์กด ๋ฐฉ์‹
 
850
  traceback.print_exc()
851
  return {"error": f"Response extraction failed: {str(e)}"}
852
 
853
+ # --- 7. ์ปจํ…์ŠคํŠธ ์ €์žฅ ๋ฐ ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜ ---
854
  total_time = time.time() - t_tok_start
855
  print(f"๐Ÿ” [DEBUG] ์ „์ฒด ์ฒ˜๋ฆฌ ์™„๋ฃŒ - ์ด ์†Œ์š”์‹œ๊ฐ„: {total_time:.3f}์ดˆ")
856
 
857
+ # ์ปจํ…์ŠคํŠธ ๋ˆ„์  ์ €์žฅ (์„ธ์…˜/๋ฃธ ๋‹จ์œ„)
858
+ try:
859
+ if use_context and session_id:
860
+ try:
861
+ from lily_llm_core.context_manager import context_manager
862
+ if context_manager:
863
+ # ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์ €์žฅ (์ด๋ฏธ์ง€ ์—ฌ๋ถ€ ๋ฉ”ํƒ€ ํฌํ•จ)
864
+ context_manager.add_user_message(
865
+ prompt,
866
+ metadata={
867
+ "session_id": session_id,
868
+ "room_id": room_id,
869
+ "images_used": bool(all_image_data and len([img for img in all_image_data if img]) > 0),
870
+ "num_images": len([img for img in all_image_data if img]) if all_image_data else 0,
871
+ },
872
+ )
873
+ # ์–ด์‹œ์Šคํ„ดํŠธ ๋ฉ”์‹œ์ง€ ์ €์žฅ
874
+ context_manager.add_assistant_message(
875
+ response,
876
+ metadata={
877
+ "session_id": session_id,
878
+ "room_id": room_id,
879
+ },
880
+ )
881
+ print(f"๐Ÿ” [DEBUG] ์ปจํ…์ŠคํŠธ ์ €์žฅ ์™„๋ฃŒ (์„ธ์…˜: {session_id}, ๋ฃธ: {room_id})")
882
+ except Exception as _ctx_e:
883
+ print(f"โš ๏ธ [DEBUG] ์ปจํ…์ŠคํŠธ ์ €์žฅ ์‹คํŒจ: {_ctx_e}")
884
+ except Exception:
885
+ pass
886
+
887
  return {
888
  "generated_text": response,
889
  "processing_time": total_time,
lily_llm_api/services/session_registry.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ๊ฐ„๋‹จํ•œ ์„ธ์…˜/๋ฃธ โ†” ์‚ฌ์šฉ์ž ๋งคํ•‘ ๋ ˆ์ง€์ŠคํŠธ๋ฆฌ
3
+ - ํ”„๋กœ์„ธ์Šค ๋ฉ”๋ชจ๋ฆฌ ๊ธฐ๋ฐ˜ (์„œ๋น„์Šค ์žฌ์‹œ์ž‘ ์‹œ ์ดˆ๊ธฐํ™”)
4
+ - ์—…๋กœ๋“œ/์ƒ์„ฑ ๊ฐ„ user_id ๋ถˆ์ผ์น˜ ๋ณด์ •์šฉ
5
+ """
6
+ from typing import Optional, Dict
7
+ import time
8
+
9
+ _room_to_user: Dict[str, str] = {}
10
+ _session_to_user: Dict[str, str] = {}
11
+ _last_user: Optional[str] = None
12
+ _last_updated_at: float = 0.0
13
+
14
+ def set_user_for_room(room_id: Optional[str], user_id: Optional[str]) -> None:
15
+ if not room_id or not user_id:
16
+ return
17
+ _room_to_user[str(room_id)] = str(user_id)
18
+ set_last_user(user_id)
19
+
20
+ def get_user_for_room(room_id: Optional[str]) -> Optional[str]:
21
+ if not room_id:
22
+ return None
23
+ return _room_to_user.get(str(room_id))
24
+
25
+ def set_user_for_session(session_id: Optional[str], user_id: Optional[str]) -> None:
26
+ if not session_id or not user_id:
27
+ return
28
+ _session_to_user[str(session_id)] = str(user_id)
29
+ set_last_user(user_id)
30
+
31
+ def get_user_for_session(session_id: Optional[str]) -> Optional[str]:
32
+ if not session_id:
33
+ return None
34
+ return _session_to_user.get(str(session_id))
35
+
36
+ def set_last_user(user_id: Optional[str]) -> None:
37
+ global _last_user, _last_updated_at
38
+ if not user_id:
39
+ return
40
+ _last_user = str(user_id)
41
+ _last_updated_at = time.time()
42
+
43
+ def get_last_user() -> Optional[str]:
44
+ return _last_user
45
+
46
+ def clear() -> None:
47
+ _room_to_user.clear()
48
+ _session_to_user.clear()
49
+ global _last_user, _last_updated_at
50
+ _last_user = None
51
+ _last_updated_at = 0.0
52
+
53
+
lily_llm_core/document_processor.py CHANGED
@@ -436,80 +436,69 @@ class DocumentProcessor:
436
  img_pil = Image.open(io.BytesIO(img_data))
437
 
438
  if self._is_valid_image(img_pil):
439
- # ์ด๋ฏธ์ง€์˜ ์œ„์น˜ ์ •๋ณด ์ถ”์ถœ (์ค‘์š”!)
440
  img_rect = self._get_image_rect(page, xref)
441
-
442
- if img_rect:
443
- bbox = BoundingBox(
444
- x0=img_rect.x0,
445
- y0=img_rect.y0,
446
- x1=img_rect.x1,
447
- y1=img_rect.y1
448
- )
449
-
450
- image_block = PDFBlock(
451
- block_id=f"page_{page_num + 1}_image_{img_idx}",
452
- block_type="image",
453
- bbox=bbox,
454
- content=img_data, # ๋ฐ”์ด๋„ˆ๋ฆฌ ๋ฐ์ดํ„ฐ
455
- page_num=page_num + 1,
456
- metadata={
457
- "image_size": img_pil.size,
458
- "image_format": "PNG",
459
- "image_mode": img_pil.mode,
460
- "xref": xref,
461
- "is_embedded": True
462
- }
463
- )
464
- image_blocks.append(image_block)
465
-
466
- logger.debug(f"๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ ๋ธ”๋ก ์ถ”์ถœ: ํŽ˜์ด์ง€ {page_num + 1}, "
467
- f"์œ„์น˜ ({bbox.x0:.1f}, {bbox.y0:.1f}, {bbox.x1:.1f}, {bbox.y1:.1f}), "
468
- f"ํฌ๊ธฐ {img_pil.size}")
469
 
470
  pix = None
471
 
472
  except Exception as e:
473
  logger.warning(f"โš ๏ธ ์ด๋ฏธ์ง€ {img_idx} ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
474
 
475
- # 2. ์ด๋ฏธ์ง€๊ฐ€ ์—†์œผ๋ฉด ์ „์ฒด ํŽ˜์ด์ง€ ๋ Œ๋”๋ง (fallback)
476
  if not image_blocks:
477
- # ํ˜„์žฌ ๋ชจ๋ธ์ด ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ์ธ์ง€ ํ™•์ธ
478
- try:
479
- from lily_llm_api.app_v2 import current_profile
480
- is_multimodal = hasattr(current_profile, 'multimodal') and current_profile.multimodal
481
-
482
- if is_multimodal:
483
- # ์ „์ฒด ํŽ˜์ด์ง€๋ฅผ ์ด๋ฏธ์ง€๋กœ ๋ Œ๋”๋ง
484
- pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # 2๋ฐฐ ํ•ด์ƒ๋„
485
- img_data = pix.tobytes("png")
486
-
487
- bbox = BoundingBox(
488
- x0=0, y0=0,
489
- x1=page.rect.width,
490
- y1=page.rect.height
491
- )
492
-
493
- image_block = PDFBlock(
494
- block_id=f"page_{page_num + 1}_fullpage",
495
- block_type="image",
496
- bbox=bbox,
497
- content=img_data,
498
- page_num=page_num + 1,
499
- metadata={
500
- "image_size": (pix.width, pix.height),
501
- "image_format": "PNG",
502
- "is_embedded": False,
503
- "is_full_page_render": True
504
- }
505
- )
506
- image_blocks.append(image_block)
507
-
508
- logger.debug(f"๐Ÿ“„ ์ „์ฒด ํŽ˜์ด์ง€ ๋ Œ๋”๋ง: ํŽ˜์ด์ง€ {page_num + 1}")
509
- pix = None
510
 
511
- except ImportError:
512
- pass # app_v2 import ์‹คํŒจ ์‹œ ๋ฌด์‹œ
513
 
514
  except Exception as e:
515
  logger.warning(f"โš ๏ธ ํŽ˜์ด์ง€ {page_num + 1} ์ด๋ฏธ์ง€ ๋ธ”๋ก ์ถ”์ถœ ์‹คํŒจ: {e}")
 
436
  img_pil = Image.open(io.BytesIO(img_data))
437
 
438
  if self._is_valid_image(img_pil):
439
+ # ์ด๋ฏธ์ง€์˜ ์œ„์น˜ ์ •๋ณด ์ถ”์ถœ (์ค‘์š”!)
440
  img_rect = self._get_image_rect(page, xref)
441
+ if img_rect:
442
+ bbox = BoundingBox(
443
+ x0=img_rect.x0,
444
+ y0=img_rect.y0,
445
+ x1=img_rect.x1,
446
+ y1=img_rect.y1
447
+ )
448
+
449
+ image_block = PDFBlock(
450
+ block_id=f"page_{page_num + 1}_image_{img_idx}",
451
+ block_type="image",
452
+ bbox=bbox,
453
+ content=img_data, # ๋ฐ”์ด๋„ˆ๋ฆฌ ๋ฐ์ดํ„ฐ
454
+ page_num=page_num + 1,
455
+ metadata={
456
+ "image_size": img_pil.size,
457
+ "image_format": "PNG",
458
+ "image_mode": img_pil.mode,
459
+ "xref": xref,
460
+ "is_embedded": True
461
+ }
462
+ )
463
+ image_blocks.append(image_block)
464
+
465
+ logger.debug(f"๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ ๋ธ”๋ก ์ถ”์ถœ: ํŽ˜์ด์ง€ {page_num + 1}, "
466
+ f"์œ„์น˜ ({bbox.x0:.1f}, {bbox.y0:.1f}, {bbox.x1:.1f}, {bbox.y1:.1f}), "
467
+ f"ํฌ๊ธฐ {img_pil.size}")
 
468
 
469
  pix = None
470
 
471
  except Exception as e:
472
  logger.warning(f"โš ๏ธ ์ด๋ฏธ์ง€ {img_idx} ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
473
 
474
+ # 2. ์ด๋ฏธ์ง€๊ฐ€ ์—†์œผ๋ฉด ์ „์ฒด ํŽ˜์ด์ง€ ๋ Œ๋”๋ง (fallback - ํ•ญ์ƒ ์ˆ˜ํ–‰)
475
  if not image_blocks:
476
+ pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # 2๋ฐฐ ํ•ด์ƒ๋„
477
+ img_data = pix.tobytes("png")
478
+
479
+ bbox = BoundingBox(
480
+ x0=0, y0=0,
481
+ x1=page.rect.width,
482
+ y1=page.rect.height
483
+ )
484
+
485
+ image_block = PDFBlock(
486
+ block_id=f"page_{page_num + 1}_fullpage",
487
+ block_type="image",
488
+ bbox=bbox,
489
+ content=img_data,
490
+ page_num=page_num + 1,
491
+ metadata={
492
+ "image_size": (pix.width, pix.height),
493
+ "image_format": "PNG",
494
+ "is_embedded": False,
495
+ "is_full_page_render": True
496
+ }
497
+ )
498
+ image_blocks.append(image_block)
 
 
 
 
 
 
 
 
 
 
499
 
500
+ logger.debug(f"๐Ÿ“„ ์ „์ฒด ํŽ˜์ด์ง€ ๋ Œ๋”๋ง: ํŽ˜์ด์ง€ {page_num + 1}")
501
+ pix = None
502
 
503
  except Exception as e:
504
  logger.warning(f"โš ๏ธ ํŽ˜์ด์ง€ {page_num + 1} ์ด๋ฏธ์ง€ ๋ธ”๋ก ์ถ”์ถœ ์‹คํŒจ: {e}")