Spaces:
Running
Running
Auto commit at 24-2025-08 9:25:04
Browse files- lily_llm_api/api/routers/document_router.py +92 -28
- lily_llm_api/api/routers/generation_router.py +17 -1
- lily_llm_api/app.py +2 -2
- lily_llm_api/models/kanana_1_5_v_3b_instruct.py +1 -1
- lily_llm_api/services/generation_service.py +383 -77
- lily_llm_api/services/session_registry.py +53 -0
- lily_llm_core/document_processor.py +54 -65
lily_llm_api/api/routers/document_router.py
CHANGED
@@ -5,11 +5,14 @@ from fastapi import APIRouter, HTTPException, UploadFile, File, Form
|
|
5 |
from typing import Optional, List
|
6 |
import logging
|
7 |
import time
|
|
|
|
|
8 |
|
9 |
from ...models.schemas import (
|
10 |
DocumentUploadResponse, RAGQueryRequest, RAGQueryResponse,
|
11 |
DocumentProcessResponse, MultimodalRAGResponse
|
12 |
)
|
|
|
13 |
|
14 |
logger = logging.getLogger(__name__)
|
15 |
router = APIRouter()
|
@@ -24,27 +27,36 @@ async def upload_document(
|
|
24 |
try:
|
25 |
start_time = time.time()
|
26 |
|
27 |
-
# ํ์ผ ์ฝ๊ธฐ
|
28 |
content = await file.read()
|
29 |
filename = file.filename
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
-
# ๋ฌธ์ ์ฒ๋ฆฌ๊ธฐ ์ฌ์ฉ
|
32 |
try:
|
33 |
-
from lily_llm_core.
|
34 |
-
|
35 |
-
|
36 |
-
result = document_processor.process_document(
|
37 |
-
content=content,
|
38 |
-
filename=filename,
|
39 |
user_id=user_id,
|
40 |
-
|
|
|
41 |
)
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
if result.get("success"):
|
44 |
processing_time = time.time() - start_time
|
45 |
return DocumentUploadResponse(
|
46 |
success=True,
|
47 |
-
document_id=result.get("document_id",
|
48 |
message="๋ฌธ์ ์
๋ก๋ ๋ฐ ์ฒ๋ฆฌ ์๋ฃ",
|
49 |
chunks=result.get("chunks", 0),
|
50 |
latex_count=result.get("latex_count", 0),
|
@@ -59,12 +71,26 @@ async def upload_document(
|
|
59 |
)
|
60 |
|
61 |
except ImportError:
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
except Exception as e:
|
70 |
logger.error(f"๋ฌธ์ ์
๋ก๋ ์คํจ: {e}")
|
@@ -269,23 +295,29 @@ async def batch_process_documents(
|
|
269 |
results = []
|
270 |
|
271 |
try:
|
272 |
-
from lily_llm_core.
|
273 |
|
274 |
for file in files:
|
275 |
content = await file.read()
|
276 |
filename = file.filename
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
|
|
|
|
|
|
|
|
|
|
281 |
user_id=user_id,
|
282 |
-
|
|
|
283 |
)
|
284 |
|
285 |
results.append({
|
286 |
"filename": filename,
|
287 |
"success": result.get("success", False),
|
288 |
-
"document_id": result.get("document_id",
|
289 |
"chunks": result.get("chunks", 0),
|
290 |
"error": result.get("error")
|
291 |
})
|
@@ -299,7 +331,35 @@ async def batch_process_documents(
|
|
299 |
}
|
300 |
|
301 |
except ImportError:
|
302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
|
304 |
except Exception as e:
|
305 |
logger.error(f"์ผ๊ด ๋ฌธ์ ์ฒ๋ฆฌ ์คํจ: {e}")
|
@@ -350,11 +410,15 @@ async def upload_multimodal_document(
|
|
350 |
|
351 |
try:
|
352 |
from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
|
353 |
-
|
354 |
-
|
|
|
|
|
|
|
|
|
|
|
355 |
result = hybrid_rag_processor.process_document(
|
356 |
-
|
357 |
-
filename=filename,
|
358 |
user_id=user_id,
|
359 |
room_id=room_id
|
360 |
)
|
|
|
5 |
from typing import Optional, List
|
6 |
import logging
|
7 |
import time
|
8 |
+
import os
|
9 |
+
import uuid
|
10 |
|
11 |
from ...models.schemas import (
|
12 |
DocumentUploadResponse, RAGQueryRequest, RAGQueryResponse,
|
13 |
DocumentProcessResponse, MultimodalRAGResponse
|
14 |
)
|
15 |
+
from ...services.session_registry import set_user_for_room
|
16 |
|
17 |
logger = logging.getLogger(__name__)
|
18 |
router = APIRouter()
|
|
|
27 |
try:
|
28 |
start_time = time.time()
|
29 |
|
30 |
+
# ํ์ผ ์ฝ๊ธฐ ๋ฐ ์์ ์ ์ฅ (ํ์ผ ๊ฒฝ๋ก ๊ธฐ๋ฐ ์ฒ๋ฆฌ๊ธฐ ํธํ)
|
31 |
content = await file.read()
|
32 |
filename = file.filename
|
33 |
+
temp_dir = os.path.join("data", "uploads")
|
34 |
+
os.makedirs(temp_dir, exist_ok=True)
|
35 |
+
temp_name = f"{int(time.time()*1000)}_{uuid.uuid4().hex}_{filename}"
|
36 |
+
temp_path = os.path.join(temp_dir, temp_name)
|
37 |
+
with open(temp_path, "wb") as f:
|
38 |
+
f.write(content)
|
39 |
|
40 |
+
# ๋ฌธ์ ์ฒ๋ฆฌ๊ธฐ ์ฌ์ฉ (์ฐ์ RAG์ ์ ์ฅ ํฌํจ ๊ฒฝ๋ก)
|
41 |
try:
|
42 |
+
from lily_llm_core.rag_processor import rag_processor
|
43 |
+
document_id = f"doc_{int(time.time()*1000)}_{uuid.uuid4().hex}"
|
44 |
+
result = rag_processor.process_and_store_document(
|
|
|
|
|
|
|
45 |
user_id=user_id,
|
46 |
+
document_id=document_id,
|
47 |
+
file_path=temp_path,
|
48 |
)
|
49 |
+
# ์
๋ก๋ ์ ๋ฐฉ-์ฌ์ฉ์ ๋งคํ ์ ์ฅ (ํ์ ์์ฑ์์ ์๋ ๋ณด์ )
|
50 |
+
try:
|
51 |
+
set_user_for_room(room_id, user_id)
|
52 |
+
except Exception:
|
53 |
+
pass
|
54 |
|
55 |
if result.get("success"):
|
56 |
processing_time = time.time() - start_time
|
57 |
return DocumentUploadResponse(
|
58 |
success=True,
|
59 |
+
document_id=result.get("document_id", document_id),
|
60 |
message="๋ฌธ์ ์
๋ก๋ ๋ฐ ์ฒ๋ฆฌ ์๋ฃ",
|
61 |
chunks=result.get("chunks", 0),
|
62 |
latex_count=result.get("latex_count", 0),
|
|
|
71 |
)
|
72 |
|
73 |
except ImportError:
|
74 |
+
# ํด๋ฐฑ: ์์ ๋ฌธ์ ํ์๋ก ์ฒ๋ฆฌ๋ง ์ํ
|
75 |
+
try:
|
76 |
+
from lily_llm_core.document_processor import document_processor
|
77 |
+
docs = document_processor.process_document(temp_path)
|
78 |
+
processing_time = time.time() - start_time
|
79 |
+
return DocumentUploadResponse(
|
80 |
+
success=True,
|
81 |
+
document_id="",
|
82 |
+
message="๋ฌธ์ ์
๋ก๋ ๋ฐ ์ฒ๋ฆฌ ์๋ฃ (๋ฒกํฐ ์ ์ฅ ๋ฏธ์ํ)",
|
83 |
+
chunks=len(docs) if docs else 0,
|
84 |
+
latex_count=0,
|
85 |
+
auto_response=None
|
86 |
+
)
|
87 |
+
except Exception as e:
|
88 |
+
return DocumentUploadResponse(
|
89 |
+
success=False,
|
90 |
+
document_id="",
|
91 |
+
message="๋ฌธ์ ์ฒ๋ฆฌ๊ธฐ import ์คํจ",
|
92 |
+
error=str(e)
|
93 |
+
)
|
94 |
|
95 |
except Exception as e:
|
96 |
logger.error(f"๋ฌธ์ ์
๋ก๋ ์คํจ: {e}")
|
|
|
295 |
results = []
|
296 |
|
297 |
try:
|
298 |
+
from lily_llm_core.rag_processor import rag_processor
|
299 |
|
300 |
for file in files:
|
301 |
content = await file.read()
|
302 |
filename = file.filename
|
303 |
+
# ์์ ์ ์ฅ ํ RAG์ ์ ์ฅ ํฌํจ ์ฒ๋ฆฌ
|
304 |
+
temp_dir = os.path.join("data", "uploads")
|
305 |
+
os.makedirs(temp_dir, exist_ok=True)
|
306 |
+
temp_name = f"{int(time.time()*1000)}_{uuid.uuid4().hex}_{filename}"
|
307 |
+
temp_path = os.path.join(temp_dir, temp_name)
|
308 |
+
with open(temp_path, "wb") as f:
|
309 |
+
f.write(content)
|
310 |
+
document_id = f"doc_{int(time.time()*1000)}_{uuid.uuid4().hex}"
|
311 |
+
result = rag_processor.process_and_store_document(
|
312 |
user_id=user_id,
|
313 |
+
document_id=document_id,
|
314 |
+
file_path=temp_path,
|
315 |
)
|
316 |
|
317 |
results.append({
|
318 |
"filename": filename,
|
319 |
"success": result.get("success", False),
|
320 |
+
"document_id": result.get("document_id", document_id),
|
321 |
"chunks": result.get("chunks", 0),
|
322 |
"error": result.get("error")
|
323 |
})
|
|
|
331 |
}
|
332 |
|
333 |
except ImportError:
|
334 |
+
# ํด๋ฐฑ: ์ ์ฅ ์์ด ์ฒ๋ฆฌ๋ง ์ํ
|
335 |
+
try:
|
336 |
+
from lily_llm_core.document_processor import document_processor
|
337 |
+
for file in files:
|
338 |
+
content = await file.read()
|
339 |
+
filename = file.filename
|
340 |
+
temp_dir = os.path.join("data", "uploads")
|
341 |
+
os.makedirs(temp_dir, exist_ok=True)
|
342 |
+
temp_name = f"{int(time.time()*1000)}_{uuid.uuid4().hex}_{filename}"
|
343 |
+
temp_path = os.path.join(temp_dir, temp_name)
|
344 |
+
with open(temp_path, "wb") as f:
|
345 |
+
f.write(content)
|
346 |
+
docs = document_processor.process_document(temp_path)
|
347 |
+
results.append({
|
348 |
+
"filename": filename,
|
349 |
+
"success": bool(docs),
|
350 |
+
"document_id": "",
|
351 |
+
"chunks": len(docs) if docs else 0,
|
352 |
+
"error": None if docs else "processing failed"
|
353 |
+
})
|
354 |
+
processing_time = time.time() - start_time
|
355 |
+
return {
|
356 |
+
"status": "success",
|
357 |
+
"results": results,
|
358 |
+
"total_files": len(files),
|
359 |
+
"processing_time": processing_time
|
360 |
+
}
|
361 |
+
except Exception as e:
|
362 |
+
raise HTTPException(status_code=500, detail=str(e))
|
363 |
|
364 |
except Exception as e:
|
365 |
logger.error(f"์ผ๊ด ๋ฌธ์ ์ฒ๋ฆฌ ์คํจ: {e}")
|
|
|
410 |
|
411 |
try:
|
412 |
from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
|
413 |
+
# ์์ ์ ์ฅ ํ ํ์ผ ๊ฒฝ๋ก ๊ธฐ๋ฐ ์ฒ๋ฆฌ
|
414 |
+
temp_dir = os.path.join("data", "uploads")
|
415 |
+
os.makedirs(temp_dir, exist_ok=True)
|
416 |
+
temp_name = f"{int(time.time()*1000)}_{uuid.uuid4().hex}_{filename}"
|
417 |
+
temp_path = os.path.join(temp_dir, temp_name)
|
418 |
+
with open(temp_path, "wb") as f:
|
419 |
+
f.write(content)
|
420 |
result = hybrid_rag_processor.process_document(
|
421 |
+
file_path=temp_path,
|
|
|
422 |
user_id=user_id,
|
423 |
room_id=room_id
|
424 |
)
|
lily_llm_api/api/routers/generation_router.py
CHANGED
@@ -10,6 +10,7 @@ from ...models.schemas import GenerateResponse, MultimodalGenerateResponse
|
|
10 |
from ...services.generation_service import generate_sync
|
11 |
from ...services.model_service import is_model_loaded
|
12 |
from ...utils.system_utils import select_model_interactive
|
|
|
13 |
|
14 |
logger = logging.getLogger(__name__)
|
15 |
router = APIRouter()
|
@@ -31,12 +32,27 @@ async def generate(request: Request,
|
|
31 |
|
32 |
start_time = time.time()
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
# ์ธ์
ID๊ฐ ์์ผ๋ฉด ์๋ ์์ฑ (์ฑํ
๋ฐฉ๋ณ ๊ณ ์ ์ธ์
)
|
35 |
if not session_id:
|
36 |
-
# ์ฑํ
๋ฐฉ + ์ฌ์ฉ์ + ํ์์คํฌํ ๊ธฐ๋ฐ์ผ๋ก ๊ณ ์ ํ ์ธ์
์์ฑ
|
37 |
timestamp = int(time.time())
|
38 |
session_id = f"room_{room_id}_user_{user_id}_{timestamp}"
|
39 |
print(f"๐ [DEBUG] ์๋ ์ธ์
ID ์์ฑ: {session_id} (์ฑํ
๋ฐฉ: {room_id}, ์ฌ์ฉ์: {user_id})")
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
if use_context:
|
42 |
try:
|
|
|
10 |
from ...services.generation_service import generate_sync
|
11 |
from ...services.model_service import is_model_loaded
|
12 |
from ...utils.system_utils import select_model_interactive
|
13 |
+
from ...services.session_registry import get_user_for_room, set_user_for_room, set_user_for_session
|
14 |
|
15 |
logger = logging.getLogger(__name__)
|
16 |
router = APIRouter()
|
|
|
32 |
|
33 |
start_time = time.time()
|
34 |
|
35 |
+
# ์ฌ์ฉ์๊ฐ ๋น์ด์๊ฑฐ๋ anonymous๋ฉด ๋ฃธ ๊ธฐ๋ฐ ์ต๊ทผ ์ฌ์ฉ์ ๋ณด์
|
36 |
+
if not user_id or user_id == "anonymous":
|
37 |
+
try:
|
38 |
+
recovered_user = get_user_for_room(room_id)
|
39 |
+
if recovered_user:
|
40 |
+
print(f"๐ [DEBUG] ๋ฃธ ๊ธฐ๋ฐ ์ฌ์ฉ์ ๋ณด์ : {user_id} -> {recovered_user} (room={room_id})")
|
41 |
+
user_id = recovered_user
|
42 |
+
except Exception:
|
43 |
+
pass
|
44 |
+
|
45 |
# ์ธ์
ID๊ฐ ์์ผ๋ฉด ์๋ ์์ฑ (์ฑํ
๋ฐฉ๋ณ ๊ณ ์ ์ธ์
)
|
46 |
if not session_id:
|
|
|
47 |
timestamp = int(time.time())
|
48 |
session_id = f"room_{room_id}_user_{user_id}_{timestamp}"
|
49 |
print(f"๐ [DEBUG] ์๋ ์ธ์
ID ์์ฑ: {session_id} (์ฑํ
๋ฐฉ: {room_id}, ์ฌ์ฉ์: {user_id})")
|
50 |
+
else:
|
51 |
+
# ์ ๊ณต๋ ์ธ์
์๋ ์ฌ์ฉ์ ๋งคํ ์ ์ฅ
|
52 |
+
try:
|
53 |
+
set_user_for_session(session_id, user_id)
|
54 |
+
except Exception:
|
55 |
+
pass
|
56 |
|
57 |
if use_context:
|
58 |
try:
|
lily_llm_api/app.py
CHANGED
@@ -1490,7 +1490,7 @@ async def manual_cleanup_all_sessions():
|
|
1490 |
except Exception as e:
|
1491 |
return {"status": "error", "message": str(e)}
|
1492 |
|
1493 |
-
@app.post("/generate", response_model=GenerateResponse)
|
1494 |
async def generate(request: Request,
|
1495 |
prompt: str = Form(...),
|
1496 |
image1: UploadFile = File(None),
|
@@ -1634,7 +1634,7 @@ async def generate_multimodal(prompt: str = Form(...),
|
|
1634 |
|
1635 |
|
1636 |
|
1637 |
-
@app.get("/models")
|
1638 |
async def list_models():
|
1639 |
"""์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก"""
|
1640 |
return {
|
|
|
1490 |
except Exception as e:
|
1491 |
return {"status": "error", "message": str(e)}
|
1492 |
|
1493 |
+
@app.post("/api/v2/generate", response_model=GenerateResponse)
|
1494 |
async def generate(request: Request,
|
1495 |
prompt: str = Form(...),
|
1496 |
image1: UploadFile = File(None),
|
|
|
1634 |
|
1635 |
|
1636 |
|
1637 |
+
@app.get("/api/v2/models")
|
1638 |
async def list_models():
|
1639 |
"""์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก"""
|
1640 |
return {
|
lily_llm_api/models/kanana_1_5_v_3b_instruct.py
CHANGED
@@ -246,7 +246,7 @@ class Kanana15V3bInstructProfile:
|
|
246 |
"<|-im_end|>", # ๐ ์๋ชป๋ ํ ํฐ ์ถ๊ฐ
|
247 |
"<image>",
|
248 |
"user\n",
|
249 |
-
"assistant\n"
|
250 |
]
|
251 |
|
252 |
for pattern in patterns_to_remove:
|
|
|
246 |
"<|-im_end|>", # ๐ ์๋ชป๋ ํ ํฐ ์ถ๊ฐ
|
247 |
"<image>",
|
248 |
"user\n",
|
249 |
+
"assistant\n"
|
250 |
]
|
251 |
|
252 |
for pattern in patterns_to_remove:
|
lily_llm_api/services/generation_service.py
CHANGED
@@ -3,13 +3,26 @@ Generation service for Lily LLM API
|
|
3 |
"""
|
4 |
import logging
|
5 |
import time
|
6 |
-
from typing import Optional, List
|
|
|
|
|
7 |
from PIL import Image
|
8 |
import io
|
9 |
import torch
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
|
14 |
temperature: Optional[float] = None, top_p: Optional[float] = None,
|
15 |
do_sample: Optional[bool] = None, use_context: bool = True, session_id: str = None,
|
@@ -21,6 +34,31 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
21 |
|
22 |
current_profile = get_current_profile()
|
23 |
current_model = get_current_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
print(f"๐ [DEBUG] generate_sync ์์ - prompt ๊ธธ์ด: {len(prompt)}")
|
26 |
print(f"๐ [DEBUG] ํ์ฌ ๋ก๋๋ ๋ชจ๋ธ: {current_profile.display_name if current_profile else 'None'}")
|
@@ -47,16 +85,63 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
47 |
if image_data_list and len([img for img in image_data_list if img]) > 0:
|
48 |
all_image_data.extend(image_data_list)
|
49 |
print(f"๐ [DEBUG] ์ง์ ์ ๋ฌ๋ ์ด๋ฏธ์ง {len(image_data_list)}๊ฐ ์ถ๊ฐ")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
|
|
|
|
|
|
51 |
if all_image_data and len([img for img in all_image_data if img]) > 0 and getattr(current_profile, 'multimodal', False):
|
52 |
print(f"๐ [DEBUG] ์ด๋ฏธ์ง ์ฒ๋ฆฌ ์์ - ์ด ์ด๋ฏธ์ง ๊ฐ์: {len([img for img in all_image_data if img])}")
|
53 |
|
54 |
# ๐ ๊ณต์ ๋ฐฉ์: ๊ฐ๋จํ ์ด๋ฏธ์ง ์ฒ๋ฆฌ
|
55 |
-
max_images = min(len(all_image_data), 4)
|
56 |
logger.info(f"๐ผ๏ธ ๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ ์์... (์ด๋ฏธ์ง {max_images}๊ฐ)")
|
57 |
|
58 |
try:
|
59 |
metas_list = []
|
|
|
60 |
for idx, image_bytes in enumerate(all_image_data[:max_images]):
|
61 |
if image_bytes:
|
62 |
try:
|
@@ -64,7 +149,6 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
64 |
# ๐ ๊ณต์ ์ด๋ฏธ์ง ํ๋ก์ธ์ ์ฌ์ฉ
|
65 |
if processor and hasattr(processor, 'image_processor'):
|
66 |
processed = processor.image_processor(pil_image)
|
67 |
-
all_pixel_values.append(processed["pixel_values"])
|
68 |
metas_list.append(processed.get("image_meta", {}))
|
69 |
else:
|
70 |
logger.warning(f"โ ๏ธ ์ด๋ฏธ์ง ํ๋ก์ธ์๋ฅผ ์ฐพ์ ์ ์์")
|
@@ -79,6 +163,28 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
79 |
print(f"๐ [DEBUG] ์ด๋ฏธ์ง ๋ฉํ๋ฐ์ดํฐ: {combined_image_metas}")
|
80 |
else:
|
81 |
combined_image_metas = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
except Exception as e:
|
83 |
logger.error(f"โ ์ด๋ฏธ์ง ์ ์ฒ๋ฆฌ ์คํจ: {e}")
|
84 |
combined_image_metas = {}
|
@@ -103,7 +209,17 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
103 |
except Exception as e:
|
104 |
print(f"โ ๏ธ [DEBUG] ์ปจํ
์คํธ ๋ก๋ ์คํจ: {e}")
|
105 |
context_prompt = ""
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
except Exception as e:
|
108 |
print(f"โ ๏ธ [DEBUG] ์ปจํ
์คํธ ๋ก๋ ์คํจ: {e} (์ธ์
: {session_id})")
|
109 |
context_prompt = ""
|
@@ -113,9 +229,13 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
113 |
|
114 |
# ๐ ๋ฉํฐ๋ชจ๋ฌ ํ๋กฌํํธ ๊ตฌ์ฑ (๊ณต์ ๋ฐฉ์)
|
115 |
if all_pixel_values and len(all_pixel_values) > 0:
|
116 |
-
# ๐ ๊ณต์ Kanana ํ์:
|
117 |
-
|
|
|
|
|
|
|
118 |
print(f"๐ [DEBUG] ๋ฉํฐ๋ชจ๋ฌ ํ๋กฌํํธ ๊ตฌ์ฑ (๊ณต์ ํ์): {formatted_prompt}")
|
|
|
119 |
image_processed = True
|
120 |
else:
|
121 |
image_processed = False
|
@@ -180,32 +300,106 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
180 |
|
181 |
if hasattr(tokenizer, 'encode_prompt'):
|
182 |
print(f"๐ [DEBUG] encode_prompt ๋ฉ์๋ ์ฌ์ฉ")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
|
184 |
-
|
185 |
-
safe_image_meta = {
|
186 |
-
'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
|
187 |
-
'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
|
188 |
-
}
|
189 |
|
|
|
190 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
inputs = tokenizer.encode_prompt(
|
192 |
prompt=formatted_prompt,
|
193 |
-
max_length=2048,
|
194 |
-
image_meta=
|
195 |
)
|
|
|
196 |
|
|
|
197 |
if 'seq_length' in inputs:
|
|
|
198 |
del inputs['seq_length']
|
199 |
|
200 |
-
input_ids
|
201 |
-
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
-
#
|
204 |
-
if isinstance(
|
205 |
-
|
206 |
-
|
207 |
-
attention_mask
|
208 |
-
|
|
|
|
|
|
|
|
|
|
|
209 |
except Exception as e:
|
210 |
print(f"โ [DEBUG] encode_prompt ์คํจ: {e}, ํด๋ฐฑ ์ฌ์ฉ")
|
211 |
# ํด๋ฐฑ: ๊ธฐ๋ณธ ํ ํฌ๋์ด์ ์ฌ์ฉ
|
@@ -257,6 +451,11 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
257 |
print(f"๐ [DEBUG] ์ต์ข
input_ids shape: {input_ids.shape}")
|
258 |
print(f"๐ [DEBUG] ์
๋ ฅ ํ ํฐ ์: {input_ids.shape[1]}")
|
259 |
|
|
|
|
|
|
|
|
|
|
|
260 |
# --- 4. ์์ฑ ์ค์ ---
|
261 |
print(f"๐ [DEBUG] ์์ฑ ์ค์ ๊ตฌ์ฑ ์์")
|
262 |
gen_config = current_profile.get_generation_config()
|
@@ -316,9 +515,20 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
316 |
print(f"๐ [DEBUG] ๋ฉํฐ๋ชจ๋ฌ ์ถ๋ก ์คํ")
|
317 |
print(f"๐ [DEBUG] ์ด๋ฏธ์ง ํ
์ ๊ฐ์: {len(all_pixel_values)}")
|
318 |
|
319 |
-
# ์ด๋ฏธ์ง
|
320 |
pixel_values = torch.cat(all_pixel_values, dim=0)
|
321 |
print(f"๐ [DEBUG] ๊ฒฐํฉ๋ ์ด๋ฏธ์ง ํ
์ shape: {pixel_values.shape}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
print(f"๐ [DEBUG] ์ด๋ฏธ์ง ํ
์ dtype: {pixel_values.dtype}")
|
323 |
|
324 |
# ๐ ๋ชจ๋ธ๊ณผ ๋์ผํ dtype์ผ๋ก ๋ณํ (์ฑ๋ฅ ์ต์ ํ)
|
@@ -342,6 +552,82 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
342 |
print(f"๐ [DEBUG] ์ต์ข
์ด๋ฏธ์ง ํ
์ dtype: {pixel_values.dtype}")
|
343 |
print(f"๐ [DEBUG] ๋ชจ๋ธ ์์ฑ ์์ - ๋ฉํฐ๋ชจ๋ฌ")
|
344 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
# LoRA ์ด๋ํฐ๊ฐ ์ ์ฉ๋ ๋ชจ๋ธ์ธ์ง ํ์ธ
|
346 |
try:
|
347 |
from lily_llm_core.lora_manager import lora_manager
|
@@ -351,79 +637,69 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
351 |
lora_model = lora_manager.get_model()
|
352 |
if lora_model:
|
353 |
print(f"๐ [DEBUG] LoRA ๋ชจ๋ธ๋ก ๋ฉํฐ๋ชจ๋ฌ ์์ฑ ์คํ")
|
354 |
-
# ๐ image_metas ํ๋ผ๋ฏธํฐ ์ถ๊ฐ (๊ณต์ ๋ฐฉ์)
|
355 |
-
processed_image_metas = {}
|
356 |
-
|
357 |
-
# ๐ ๊ณต์ ๋ฐฉ์: vision_grid_thw๋ฅผ ํ
์๋ก ๋ณํ
|
358 |
-
if 'vision_grid_thw' in combined_image_metas:
|
359 |
-
vision_grid = combined_image_metas['vision_grid_thw']
|
360 |
-
if isinstance(vision_grid, list):
|
361 |
-
# ๐ Kanana ๋ชจ๋ธ ์๊ตฌ์ฌํญ: ๋ฐฐ์น ์ฐจ์์ ๋ง์ถค
|
362 |
-
if len(vision_grid) == 1 and len(vision_grid[0]) == 3:
|
363 |
-
# [(1, 34, 52)] -> (1, 1, 34, 52) ํ
์๋ก ๋ณํ (๋ฐฐ์น ์ฐจ์ ์ถ๊ฐ)
|
364 |
-
t, h, w = vision_grid[0]
|
365 |
-
# ๐ 4์ฐจ์ ํ
์๋ก ๋ณํ: (batch_size, T, H, W) ํํ
|
366 |
-
processed_image_metas['vision_grid_thw'] = torch.tensor([[[t, h, w]]], dtype=torch.long)
|
367 |
-
print(f"๐ [DEBUG] vision_grid_thw ํ
์ ๋ณํ: {vision_grid} -> {processed_image_metas['vision_grid_thw'].shape}")
|
368 |
-
else:
|
369 |
-
# ๐ ๋ค๋ฅธ ํํ์ ๊ฒฝ์ฐ ๋ฐฐ์น ์ฐจ์ ์ถ๊ฐ
|
370 |
-
processed_image_metas['vision_grid_thw'] = torch.tensor([vision_grid], dtype=torch.long)
|
371 |
-
print(f"๐ [DEBUG] vision_grid_thw ํ
์ ๋ณํ (๊ธฐ๋ณธ): {vision_grid} -> {processed_image_metas['vision_grid_thw'].shape}")
|
372 |
-
else:
|
373 |
-
# ํ
์์ธ ๊ฒฝ์ฐ ๋ฐฐ์น ์ฐจ์ ํ์ธ ๋ฐ ์ถ๊ฐ
|
374 |
-
if len(vision_grid.shape) == 3:
|
375 |
-
processed_image_metas['vision_grid_thw'] = vision_grid.unsqueeze(0)
|
376 |
-
else:
|
377 |
-
processed_image_metas['vision_grid_thw'] = vision_grid
|
378 |
-
|
379 |
-
# ๐ ๋ค๋ฅธ ๋ฉํ๋ฐ์ดํฐ๋ ๋ฐฐ์น ์ฐจ์ ๋ง์ถค
|
380 |
-
for key, value in combined_image_metas.items():
|
381 |
-
if key != 'vision_grid_thw':
|
382 |
-
if isinstance(value, list):
|
383 |
-
# ๋ฆฌ์คํธ์ธ ๊ฒฝ์ฐ ๋ฐฐ์น ์ฐจ์ ์ถ๊ฐ
|
384 |
-
processed_image_metas[key] = [value]
|
385 |
-
elif isinstance(value, torch.Tensor) and len(value.shape) == 2:
|
386 |
-
# 2์ฐจ์ ํ
์์ธ ๊ฒฝ์ฐ ๋ฐฐ์น ์ฐจ์ ์ถ๊ฐ
|
387 |
-
processed_image_metas[key] = value.unsqueeze(0)
|
388 |
-
else:
|
389 |
-
processed_image_metas[key] = value
|
390 |
|
391 |
generate_kwargs = {
|
392 |
'input_ids': input_ids,
|
393 |
'attention_mask': attention_mask,
|
394 |
'pixel_values': pixel_values,
|
395 |
-
'image_metas': processed_image_metas, # ๐ ์ฒ๋ฆฌ๋ ์ด๋ฏธ์ง ๋ฉํ๋ฐ์ดํฐ
|
396 |
**gen_config
|
397 |
}
|
398 |
print(f"๐ [DEBUG] LoRA ๋ชจ๋ธ ์์ฑ ํ๋ผ๋ฏธํฐ: {list(generate_kwargs.keys())}")
|
399 |
print(f"๐ [DEBUG] ์ฒ๋ฆฌ๋ image_metas: {list(processed_image_metas.keys())}")
|
400 |
print(f"๐ [DEBUG] ๋ชจ๋ธ ์์ฑ ์์... (ํ์์์ ์์)")
|
401 |
-
|
402 |
-
|
|
|
|
|
|
|
|
|
|
|
403 |
else:
|
404 |
print(f"โ ๏ธ [DEBUG] LoRA ๋ชจ๋ธ์ ๊ฐ์ ธ์ฌ ์ ์์, ๊ธฐ๋ณธ ๋ชจ๋ธ ์ฌ์ฉ")
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
|
|
|
|
409 |
**gen_config
|
410 |
-
|
|
|
|
|
|
|
|
|
|
|
411 |
else:
|
412 |
print(f"๐ [DEBUG] LoRA ์ด๋ํฐ ์์ (๋ฉํฐ๋ชจ๋ฌ), ๊ธฐ๋ณธ ๋ชจ๋ธ ์ฌ์ฉ")
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
|
|
|
|
417 |
**gen_config
|
418 |
-
|
|
|
|
|
|
|
|
|
|
|
419 |
except ImportError:
|
420 |
print(f"๐ [DEBUG] LoRA ์ง์ ์๋จ, ๊ธฐ๋ณธ ๋ชจ๋ธ ์ฌ์ฉ")
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
|
|
|
|
425 |
**gen_config
|
426 |
-
|
|
|
|
|
|
|
|
|
|
|
427 |
|
428 |
else:
|
429 |
# ํ
์คํธ-only: ๊ธฐ์กด ๋ฐฉ์
|
@@ -574,10 +850,40 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
574 |
traceback.print_exc()
|
575 |
return {"error": f"Response extraction failed: {str(e)}"}
|
576 |
|
577 |
-
# --- 7. ๊ฒฐ๊ณผ ๋ฐํ ---
|
578 |
total_time = time.time() - t_tok_start
|
579 |
print(f"๐ [DEBUG] ์ ์ฒด ์ฒ๋ฆฌ ์๋ฃ - ์ด ์์์๊ฐ: {total_time:.3f}์ด")
|
580 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
return {
|
582 |
"generated_text": response,
|
583 |
"processing_time": total_time,
|
|
|
3 |
"""
|
4 |
import logging
|
5 |
import time
|
6 |
+
from typing import Optional, List, Dict
|
7 |
+
from pathlib import Path
|
8 |
+
from .session_registry import get_user_for_room, get_user_for_session, set_user_for_session
|
9 |
from PIL import Image
|
10 |
import io
|
11 |
import torch
|
12 |
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
15 |
+
# ์ธ์
๋ณ ์ต๊ทผ ์ด๋ฏธ์ง ์บ์ (๊ฐ๋จํ ์ธ๋ฉ๋ชจ๋ฆฌ)
|
16 |
+
# ์ฃผ์: ํ๋ก์ธ์ค ์ฌ์์ ์ ์ด๊ธฐํ๋จ. ์ต๋ 4์ฅ ๋ณด๊ด.
|
17 |
+
_session_image_cache: Dict[str, List[bytes]] = {}
|
18 |
+
|
19 |
+
# ์ ํ์ : ๋ฒกํฐ ์คํ ์ด์์ ์ต๊ทผ ๋ฌธ์ ์ด๋ฏธ์ง ๋ณต๊ตฌ ์ง์
|
20 |
+
try:
|
21 |
+
from lily_llm_core.vector_store_manager import vector_store_manager, SimpleVectorStore
|
22 |
+
except Exception:
|
23 |
+
vector_store_manager = None
|
24 |
+
SimpleVectorStore = None
|
25 |
+
|
26 |
def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
|
27 |
temperature: Optional[float] = None, top_p: Optional[float] = None,
|
28 |
do_sample: Optional[bool] = None, use_context: bool = True, session_id: str = None,
|
|
|
34 |
|
35 |
current_profile = get_current_profile()
|
36 |
current_model = get_current_model()
|
37 |
+
|
38 |
+
# ์ฌ์ฉ์ ๋ณด์ : session/room ๊ธฐ๋ฐ ์ต๊ทผ ์ฌ์ฉ์ ๋ณต๊ตฌ
|
39 |
+
try:
|
40 |
+
if (not user_id) or (user_id == "anonymous"):
|
41 |
+
recovered = get_user_for_session(session_id) or get_user_for_room(room_id)
|
42 |
+
if recovered:
|
43 |
+
print(f"๐ [DEBUG] ์ฌ์ฉ์ ๋ณด์ : {user_id} -> {recovered} (room={room_id}, session={session_id})")
|
44 |
+
user_id = recovered
|
45 |
+
except Exception:
|
46 |
+
pass
|
47 |
+
|
48 |
+
# ์ธ์
ID ์ ๊ทํ: ์ ๊ณต๋์ง ์๊ฑฐ๋ ์ผํ์ฑ์ผ๋ก ๋ณด์ด๋ ๊ฒฝ์ฐ ๋ฃธ/์ฌ์ฉ์ ๊ธฐ๋ฐ์ผ๋ก ๊ณ ์
|
49 |
+
original_session_id = session_id
|
50 |
+
if not session_id or (isinstance(session_id, str) and session_id.startswith("room_") and session_id.count("_") >= 3):
|
51 |
+
# ์: room_default_user_anonymous_17559... ํํ๋ฅผ ์์ ์ ์ธ ํค๋ก ์นํ
|
52 |
+
stable_user = user_id or "anonymous"
|
53 |
+
stable_room = room_id or "default"
|
54 |
+
session_id = f"room_{stable_room}_{stable_user}"
|
55 |
+
if original_session_id and original_session_id != session_id:
|
56 |
+
print(f"๐ [DEBUG] ์ธ์
ID ์ ๊ทํ: {original_session_id} -> {session_id}")
|
57 |
+
# ์ธ์
-์ฌ์ฉ์ ๋งคํ ์ ์ฅ
|
58 |
+
try:
|
59 |
+
set_user_for_session(session_id, user_id)
|
60 |
+
except Exception:
|
61 |
+
pass
|
62 |
|
63 |
print(f"๐ [DEBUG] generate_sync ์์ - prompt ๊ธธ์ด: {len(prompt)}")
|
64 |
print(f"๐ [DEBUG] ํ์ฌ ๋ก๋๋ ๋ชจ๋ธ: {current_profile.display_name if current_profile else 'None'}")
|
|
|
85 |
if image_data_list and len([img for img in image_data_list if img]) > 0:
|
86 |
all_image_data.extend(image_data_list)
|
87 |
print(f"๐ [DEBUG] ์ง์ ์ ๋ฌ๋ ์ด๋ฏธ์ง {len(image_data_list)}๊ฐ ์ถ๊ฐ")
|
88 |
+
else:
|
89 |
+
# ํ์ฌ ์์ฒญ์ ์ด๋ฏธ์ง๊ฐ ์์ผ๋ฉด ์ธ์
์บ์์์ ๋ณต๊ตฌ ์๋
|
90 |
+
if session_id and session_id in _session_image_cache and len(_session_image_cache[session_id]) > 0:
|
91 |
+
cached_imgs = _session_image_cache[session_id]
|
92 |
+
all_image_data.extend(cached_imgs)
|
93 |
+
print(f"๐ [DEBUG] ์ธ์
์บ์์์ ์ด์ ์ด๋ฏธ์ง {len(cached_imgs)}๊ฐ ๋ณต๊ตฌ (์ธ์
: {session_id})")
|
94 |
+
|
95 |
+
# ์ถ๊ฐ ๋ณต๊ตฌ: ์ฌ์ ํ ์ด๋ฏธ์ง๊ฐ ์๊ณ ๋ฉํฐ๋ชจ๋ฌ์ด๋ฉด, ์ต๊ทผ RAG ๋ฌธ์์์ ์ด๋ฏธ์ง ๋ฐ์ดํธ ๋ณต์
|
96 |
+
if (not all_image_data or len([img for img in all_image_data if img]) == 0) and getattr(current_profile, 'multimodal', False):
|
97 |
+
try:
|
98 |
+
if vector_store_manager is not None:
|
99 |
+
# ์ฌ์ฉ์ ๋ฌธ์ ๋ชฉ๋ก ๊ฐ์ ธ์ค๊ธฐ (์ต์ ์ ์ ๋ ฌ)
|
100 |
+
user_docs = vector_store_manager.get_user_documents(user_id)
|
101 |
+
if user_docs:
|
102 |
+
# last_updated > created_at ์ฐ์ ์ฌ์ฉ
|
103 |
+
def _ts(d: Dict):
|
104 |
+
return d.get('last_updated') or d.get('created_at') or 0
|
105 |
+
user_docs.sort(key=_ts, reverse=True)
|
106 |
+
latest_doc_id = user_docs[0].get('document_id')
|
107 |
+
base_path = getattr(vector_store_manager, 'base_path', Path('./vector_stores'))
|
108 |
+
store_path = Path(base_path) / user_id / latest_doc_id
|
109 |
+
if SimpleVectorStore is not None:
|
110 |
+
store = SimpleVectorStore.load_local(str(store_path))
|
111 |
+
recovered = []
|
112 |
+
for doc in getattr(store, 'documents', []) or []:
|
113 |
+
try:
|
114 |
+
meta = getattr(doc, 'metadata', {}) or {}
|
115 |
+
imgs = meta.get('image_data_list')
|
116 |
+
if imgs and isinstance(imgs, list):
|
117 |
+
# bytes ๋ง ํํฐ๋ง
|
118 |
+
recovered.extend([b for b in imgs if isinstance(b, (bytes, bytearray)) and len(b) > 0])
|
119 |
+
except Exception:
|
120 |
+
continue
|
121 |
+
if recovered:
|
122 |
+
all_image_data.extend(recovered[:4])
|
123 |
+
print(f"๐ [DEBUG] RAG์์ ์ด๋ฏธ์ง ๋ณต๊ตฌ: {len(recovered)}๊ฐ (์ฌ์ฉ: {len(all_image_data)})")
|
124 |
+
else:
|
125 |
+
print("โ ๏ธ [DEBUG] SimpleVectorStore ์ฌ์ฉ ๋ถ๊ฐ - ์ด๋ฏธ์ง ๋ณต๊ตฌ ์๋ต")
|
126 |
+
else:
|
127 |
+
print("โ ๏ธ [DEBUG] ์ฌ์ฉ์ ๋ฌธ์๊ฐ ์์ด ์ด๋ฏธ์ง ๋ณต๊ตฌ ๋ถ๊ฐ")
|
128 |
+
else:
|
129 |
+
print("โ ๏ธ [DEBUG] vector_store_manager ๋ฏธ์ฌ์ฉ - ์ด๋ฏธ์ง ๋ณต๊ตฌ ๋นํ์ฑํ")
|
130 |
+
except Exception as e:
|
131 |
+
print(f"โ ๏ธ [DEBUG] RAG ๊ธฐ๋ฐ ์ด๋ฏธ์ง ๋ณต๊ตฌ ์คํจ: {e}")
|
132 |
|
133 |
+
# ํญ์ ์ฐธ์กฐ ๊ฐ๋ฅํ max_images ์ ์ (์ด๋ฏธ์ง ์์ผ๋ฉด 0)
|
134 |
+
max_images = min(len([img for img in all_image_data if img]) if all_image_data else 0, 4)
|
135 |
+
|
136 |
if all_image_data and len([img for img in all_image_data if img]) > 0 and getattr(current_profile, 'multimodal', False):
|
137 |
print(f"๐ [DEBUG] ์ด๋ฏธ์ง ์ฒ๋ฆฌ ์์ - ์ด ์ด๋ฏธ์ง ๊ฐ์: {len([img for img in all_image_data if img])}")
|
138 |
|
139 |
# ๐ ๊ณต์ ๋ฐฉ์: ๊ฐ๋จํ ์ด๋ฏธ์ง ์ฒ๋ฆฌ
|
|
|
140 |
logger.info(f"๐ผ๏ธ ๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ ์์... (์ด๋ฏธ์ง {max_images}๊ฐ)")
|
141 |
|
142 |
try:
|
143 |
metas_list = []
|
144 |
+
# ๋จผ์ ๋ฉํ๋ฐ์ดํฐ๋ง ์์ง
|
145 |
for idx, image_bytes in enumerate(all_image_data[:max_images]):
|
146 |
if image_bytes:
|
147 |
try:
|
|
|
149 |
# ๐ ๊ณต์ ์ด๋ฏธ์ง ํ๋ก์ธ์ ์ฌ์ฉ
|
150 |
if processor and hasattr(processor, 'image_processor'):
|
151 |
processed = processor.image_processor(pil_image)
|
|
|
152 |
metas_list.append(processed.get("image_meta", {}))
|
153 |
else:
|
154 |
logger.warning(f"โ ๏ธ ์ด๋ฏธ์ง ํ๋ก์ธ์๋ฅผ ์ฐพ์ ์ ์์")
|
|
|
163 |
print(f"๐ [DEBUG] ์ด๋ฏธ์ง ๋ฉํ๋ฐ์ดํฐ: {combined_image_metas}")
|
164 |
else:
|
165 |
combined_image_metas = {}
|
166 |
+
|
167 |
+
# ์ด์ ์ด๋ฏธ์ง ํฌ๊ธฐ๋ฅผ ์กฐ์ ํ์ฌ pixel_values ์์ฑ
|
168 |
+
for idx, image_bytes in enumerate(all_image_data[:max_images]):
|
169 |
+
if image_bytes:
|
170 |
+
try:
|
171 |
+
pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
|
172 |
+
# ๐ ๊ณต์ ์ด๋ฏธ์ง ํ๋ก์ธ์ ์ฌ์ฉ
|
173 |
+
if processor and hasattr(processor, 'image_processor'):
|
174 |
+
# KananaVImageProcessor๋ ๊ธฐ๋ณธ ํ๋ผ๋ฏธํฐ๋ง ์ง์
|
175 |
+
processed = processor.image_processor(pil_image)
|
176 |
+
pixel_values = processed["pixel_values"]
|
177 |
+
# NOTE: pixel_values ๋ฐ ๋ฉํ๋ฐ์ดํฐ๋ ํ๋ก์ธ์ ์ฐ์ถ ๊ทธ๋๋ก ์ฌ์ฉ (์์ ์กฐ์ ๊ธ์ง)
|
178 |
+
# ๋ชจ๋ธ ๋ด๋ถ prepare_mm_inputs๊ฐ ์ผ๊ด์ฑ ์๊ฒ ์ฒ๋ฆฌํ๋๋ก ๋งก๊น
|
179 |
+
|
180 |
+
all_pixel_values.append(pixel_values)
|
181 |
+
metas_list.append(processed.get("image_meta", {}))
|
182 |
+
|
183 |
+
# ์ค๋ณต ์ฝ๋ ์ ๊ฑฐ - ์ด๋ฏธ ์์์ ์ฒ๋ฆฌ๋จ
|
184 |
+
else:
|
185 |
+
logger.warning(f"โ ๏ธ ์ด๋ฏธ์ง ํ๋ก์ธ์๋ฅผ ์ฐพ์ ์ ์์")
|
186 |
+
except Exception as e:
|
187 |
+
logger.warning(f"โ ๏ธ ์ด๋ฏธ์ง {idx} ์ฒ๋ฆฌ ์คํจ: {e}")
|
188 |
except Exception as e:
|
189 |
logger.error(f"โ ์ด๋ฏธ์ง ์ ์ฒ๋ฆฌ ์คํจ: {e}")
|
190 |
combined_image_metas = {}
|
|
|
209 |
except Exception as e:
|
210 |
print(f"โ ๏ธ [DEBUG] ์ปจํ
์คํธ ๋ก๋ ์คํจ: {e}")
|
211 |
context_prompt = ""
|
212 |
+
# ์ด๋ฏธ์ง ๋ฐ์ดํธ๋ฅผ ์ธ์
์บ์์ ๋ณด๊ด (๏ฟฝ๏ฟฝ๏ฟฝ์ ํด์ ์ฌ์ฌ์ฉ)
|
213 |
+
if session_id:
|
214 |
+
# ์๋ณธ ์์ฒญ์ ์ด๋ฏธ์ง๊ฐ ์์๋ค๋ฉด ๊ทธ๊ฑธ ์ฐ์ ๋ณด๊ด, ์์ผ๋ฉด ๋ณต๊ตฌ๋ ์ด๋ฏธ์ง ์ ์ง
|
215 |
+
if image_data_list and len([img for img in image_data_list if img]) > 0:
|
216 |
+
_session_image_cache[session_id] = list(image_data_list[:max_images])
|
217 |
+
elif session_id not in _session_image_cache:
|
218 |
+
_session_image_cache[session_id] = list(all_image_data[:max_images])
|
219 |
+
# ์ต๋ 4์ฅ ์ ํ
|
220 |
+
_session_image_cache[session_id] = _session_image_cache[session_id][:4]
|
221 |
+
print(f"๐ [DEBUG] ์ธ์
์บ์ ์
๋ฐ์ดํธ: {len(_session_image_cache[session_id])}์ฅ ์ ์ฅ (์ธ์
: {session_id})")
|
222 |
+
|
223 |
except Exception as e:
|
224 |
print(f"โ ๏ธ [DEBUG] ์ปจํ
์คํธ ๋ก๋ ์คํจ: {e} (์ธ์
: {session_id})")
|
225 |
context_prompt = ""
|
|
|
229 |
|
230 |
# ๐ ๋ฉํฐ๋ชจ๋ฌ ํ๋กฌํํธ ๊ตฌ์ฑ (๊ณต์ ๋ฐฉ์)
|
231 |
if all_pixel_values and len(all_pixel_values) > 0:
|
232 |
+
# ๐ ๊ณต์ Kanana ํ์: ์ด๋ฏธ์ง ๊ฐ์์ ๋ง๊ฒ <image> ํ ํฐ ์์ฑ
|
233 |
+
num_images = len(all_pixel_values)
|
234 |
+
image_tokens = "<image>" * num_images # ์ด๋ฏธ์ง ๊ฐ์๋งํผ <image> ํ ํฐ ์์ฑ
|
235 |
+
# ๋ต๋ณ ์ ๋๋ฅผ ์ํด Assistant ํ๋ฆฌํฝ์ค ์ถ๊ฐ
|
236 |
+
formatted_prompt = f"Human: {image_tokens}{prompt}\nAssistant:"
|
237 |
print(f"๐ [DEBUG] ๋ฉํฐ๋ชจ๋ฌ ํ๋กฌํํธ ๊ตฌ์ฑ (๊ณต์ ํ์): {formatted_prompt}")
|
238 |
+
print(f"๐ [DEBUG] ์ด๋ฏธ์ง ํ ํฐ ์์ฑ: {num_images}๊ฐ ์ด๋ฏธ์ง -> {image_tokens}")
|
239 |
image_processed = True
|
240 |
else:
|
241 |
image_processed = False
|
|
|
300 |
|
301 |
if hasattr(tokenizer, 'encode_prompt'):
|
302 |
print(f"๐ [DEBUG] encode_prompt ๋ฉ์๋ ์ฌ์ฉ")
|
303 |
+
print(f"๐ [DEBUG] combined_image_metas: {combined_image_metas}")
|
304 |
+
print(f"๐ [DEBUG] ์ด ์ด๋ฏธ์ง ๊ฐ์: {len(all_image_data)}")
|
305 |
+
|
306 |
+
# ๐ ๋ฉํ๋ฐ์ดํฐ ๊ฒ์ฆ ๋ฐ ์์ ํ
|
307 |
+
safe_image_meta = {}
|
308 |
+
if combined_image_metas:
|
309 |
+
# image_token_thw ๋ฐฐ์ด ๊ธธ์ด ๊ฒ์ฆ
|
310 |
+
if 'image_token_thw' in combined_image_metas:
|
311 |
+
image_token_thw = combined_image_metas['image_token_thw']
|
312 |
+
if isinstance(image_token_thw, list) and len(image_token_thw) > 0:
|
313 |
+
# ๋ฐฐ์ด ๊ธธ์ด๊ฐ ์ด๋ฏธ์ง ๊ฐ์์ ์ผ์นํ๋์ง ํ์ธ
|
314 |
+
if len(image_token_thw) == len(all_pixel_values):
|
315 |
+
# ๐ ์ถ๊ฐ ๊ฒ์ฆ: ๊ฐ ๋ฐฐ์ด ์์๊ฐ ์ ํจํ์ง ํ์ธ
|
316 |
+
valid_meta = True
|
317 |
+
for i, thw in enumerate(image_token_thw):
|
318 |
+
if not isinstance(thw, (list, tuple)) or len(thw) != 3:
|
319 |
+
print(f"โ ๏ธ [DEBUG] ๋ฉํ๋ฐ์ดํฐ ์์ {i}๊ฐ ์ ํจํ์ง ์์: {thw}")
|
320 |
+
valid_meta = False
|
321 |
+
break
|
322 |
+
|
323 |
+
if valid_meta:
|
324 |
+
safe_image_meta = combined_image_metas
|
325 |
+
print(f"๐ [DEBUG] ๋ฉํ๋ฐ์ดํฐ ๊ฒ์ฆ ํต๊ณผ: {len(image_token_thw)}๊ฐ ์ด๋ฏธ์ง")
|
326 |
+
else:
|
327 |
+
print(f"โ ๏ธ [DEBUG] ๋ฉํ๋ฐ์ดํฐ ์์ ๊ฒ์ฆ ์คํจ, ๊ธฐ๋ณธ๊ฐ ์ฌ์ฉ")
|
328 |
+
safe_image_meta = {
|
329 |
+
'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
|
330 |
+
'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
|
331 |
+
}
|
332 |
+
else:
|
333 |
+
print(f"โ ๏ธ [DEBUG] ๋ฉํ๋ฐ์ดํฐ ๋ถ์ผ์น: ์ด๋ฏธ์ง {len(all_pixel_values)}๊ฐ, ๋ฉํ {len(image_token_thw)}๊ฐ")
|
334 |
+
# ์์ ํ ๊ธฐ๋ณธ๊ฐ ์ฌ์ฉ
|
335 |
+
safe_image_meta = {
|
336 |
+
'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
|
337 |
+
'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
|
338 |
+
}
|
339 |
+
else:
|
340 |
+
print(f"โ ๏ธ [DEBUG] image_token_thw๊ฐ ์ ํจํ์ง ์์, ๊ธฐ๋ณธ๊ฐ ์ฌ์ฉ")
|
341 |
+
safe_image_meta = {
|
342 |
+
'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
|
343 |
+
'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
|
344 |
+
}
|
345 |
+
else:
|
346 |
+
print(f"โ ๏ธ [DEBUG] image_token_thw ์์, ๊ธฐ๋ณธ๊ฐ ์์ฑ")
|
347 |
+
safe_image_meta = {
|
348 |
+
'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
|
349 |
+
'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
|
350 |
+
}
|
351 |
+
else:
|
352 |
+
print(f"โ ๏ธ [DEBUG] combined_image_metas ์์, ๊ธฐ๋ณธ๊ฐ ์์ฑ")
|
353 |
+
safe_image_meta = {
|
354 |
+
'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
|
355 |
+
'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
|
356 |
+
}
|
357 |
|
358 |
+
print(f"๐ [DEBUG] ์์ ํ๋ ๋ฉํ๋ฐ์ดํฐ: {safe_image_meta}")
|
|
|
|
|
|
|
|
|
359 |
|
360 |
+
# ๐ ์์ ํ ๋ฉํ๋ฐ์ดํฐ๋ก encode_prompt ํธ์ถ
|
361 |
try:
|
362 |
+
# ๐ ์ถ๊ฐ ์์ ์ฅ์น: ๋ฉํ๋ฐ์ดํฐ ๋ณต์ฌ๋ณธ ์์ฑ
|
363 |
+
final_meta = {}
|
364 |
+
for key, value in safe_image_meta.items():
|
365 |
+
if isinstance(value, list):
|
366 |
+
final_meta[key] = value.copy() # ๋ณต์ฌ๋ณธ ์์ฑ
|
367 |
+
else:
|
368 |
+
final_meta[key] = value
|
369 |
+
|
370 |
+
print(f"๐ [DEBUG] ์ต์ข
๋ฉํ๋ฐ์ดํฐ: {final_meta}")
|
371 |
+
# ๐ ๊ณต์ ๋ฐฉ์: max_length ํ๋ผ๋ฏธํฐ ์ถ๊ฐ
|
372 |
inputs = tokenizer.encode_prompt(
|
373 |
prompt=formatted_prompt,
|
374 |
+
max_length=2048, # ๊ณต์ ์ฝ๋์ ๋์ผ
|
375 |
+
image_meta=final_meta
|
376 |
)
|
377 |
+
print(f"๐ [DEBUG] encode_prompt ์ถ๋ ฅ: {list(inputs.keys())}")
|
378 |
|
379 |
+
# ๐ encode_prompt ์ถ๋ ฅ ์ ๊ทํ (seq_length ์ ๊ฑฐ)
|
380 |
if 'seq_length' in inputs:
|
381 |
+
print(f"๐ [DEBUG] seq_length ์ ๊ฑฐ๋จ")
|
382 |
del inputs['seq_length']
|
383 |
|
384 |
+
# ๐ input_ids ์์ ํ๊ฒ ์ถ์ถ (๊ณต์ ๋ฐฉ์)
|
385 |
+
if isinstance(inputs['input_ids'], tuple):
|
386 |
+
print(f"๐ [DEBUG] input_ids๊ฐ ํํ์: {len(inputs['input_ids'])}๊ฐ ์์")
|
387 |
+
input_ids = inputs['input_ids'][0] # ์ฒซ ๋ฒ์งธ ์์ ์ฌ์ฉ
|
388 |
+
print(f"๐ [DEBUG] input_ids ํํ์์ ์ฒซ ๋ฒ์งธ ์์ ์ถ์ถ: {input_ids.shape}")
|
389 |
+
else:
|
390 |
+
input_ids = inputs['input_ids']
|
391 |
|
392 |
+
# ๐ attention_mask๋ ์์ ํ๊ฒ ์ถ์ถ
|
393 |
+
if isinstance(inputs['attention_mask'], tuple):
|
394 |
+
print(f"๐ [DEBUG] attention_mask๊ฐ ํํ์: {len(inputs['attention_mask'])}๊ฐ ์์")
|
395 |
+
attention_mask = inputs['attention_mask'][0] # ์ฒซ ๋ฒ์งธ ์์ ์ฌ์ฉ
|
396 |
+
print(f"๐ [DEBUG] attention_mask ํํ์์ ์ฒซ ๋ฒ์งธ ์์ ์ถ์ถ: {attention_mask.shape}")
|
397 |
+
else:
|
398 |
+
attention_mask = inputs['attention_mask']
|
399 |
+
|
400 |
+
# ๐ ์ต์ข
๊ฒ์ฆ
|
401 |
+
print(f"๐ [DEBUG] ์ต์ข
input_ids ํ์
: {type(input_ids)}, shape: {input_ids.shape}")
|
402 |
+
print(f"๐ [DEBUG] ์ต์ข
attention_mask ํ์
: {type(attention_mask)}, shape: {attention_mask.shape}")
|
403 |
except Exception as e:
|
404 |
print(f"โ [DEBUG] encode_prompt ์คํจ: {e}, ํด๋ฐฑ ์ฌ์ฉ")
|
405 |
# ํด๋ฐฑ: ๊ธฐ๋ณธ ํ ํฌ๋์ด์ ์ฌ์ฉ
|
|
|
451 |
print(f"๐ [DEBUG] ์ต์ข
input_ids shape: {input_ids.shape}")
|
452 |
print(f"๐ [DEBUG] ์
๋ ฅ ํ ํฐ ์: {input_ids.shape[1]}")
|
453 |
|
454 |
+
# ๐ ๋ฉํฐ๋ชจ๋ฌ: -1 ํ ํฐ์ ๋ชจ๋ธ ๋ด๋ถ์์ ์๊ฐ ์๋ฒ ๋ฉ์ผ๋ก ๋์ฒด๋๋ฏ๋ก ์ ์ง
|
455 |
+
negative_mask = input_ids < 0
|
456 |
+
if negative_mask.any():
|
457 |
+
print(f"๐ [DEBUG] -1 ํ ํฐ ์ ์ง: {negative_mask.sum().item()}๊ฐ")
|
458 |
+
|
459 |
# --- 4. ์์ฑ ์ค์ ---
|
460 |
print(f"๐ [DEBUG] ์์ฑ ์ค์ ๊ตฌ์ฑ ์์")
|
461 |
gen_config = current_profile.get_generation_config()
|
|
|
515 |
print(f"๐ [DEBUG] ๋ฉํฐ๋ชจ๋ฌ ์ถ๋ก ์คํ")
|
516 |
print(f"๐ [DEBUG] ์ด๋ฏธ์ง ํ
์ ๊ฐ์: {len(all_pixel_values)}")
|
517 |
|
518 |
+
# ์ด๋ฏธ์ง ํ
์๋ ํ๋ก์ธ์ ์ฐ์ถ๊ฐ์ ๊ทธ๋๋ก ๊ฒฐํฉ (์์ ์กฐ์ ๊ธ์ง)
|
519 |
pixel_values = torch.cat(all_pixel_values, dim=0)
|
520 |
print(f"๐ [DEBUG] ๊ฒฐํฉ๋ ์ด๋ฏธ์ง ํ
์ shape: {pixel_values.shape}")
|
521 |
+
# ๋๋ฒ๊น
: ์ด๋ฏธ์ง๋ณ ํ ํฐ ์คํ์
๋ฒ์ ์ถ๋ ฅ
|
522 |
+
try:
|
523 |
+
offsets = []
|
524 |
+
start = 0
|
525 |
+
for i, img_t in enumerate(all_pixel_values):
|
526 |
+
end = start + img_t.shape[0]
|
527 |
+
offsets.append((start, end))
|
528 |
+
start = end
|
529 |
+
print(f"๐ [DEBUG] ์ด๋ฏธ์ง๋ณ ํ ํฐ ๋ฒ์: {offsets}")
|
530 |
+
except Exception as _e:
|
531 |
+
print(f"โ ๏ธ [DEBUG] ์ด๋ฏธ์ง ์คํ์
๊ณ์ฐ ์คํจ: {_e}")
|
532 |
print(f"๐ [DEBUG] ์ด๋ฏธ์ง ํ
์ dtype: {pixel_values.dtype}")
|
533 |
|
534 |
# ๐ ๋ชจ๋ธ๊ณผ ๋์ผํ dtype์ผ๋ก ๋ณํ (์ฑ๋ฅ ์ต์ ํ)
|
|
|
552 |
print(f"๐ [DEBUG] ์ต์ข
์ด๋ฏธ์ง ํ
์ dtype: {pixel_values.dtype}")
|
553 |
print(f"๐ [DEBUG] ๋ชจ๋ธ ์์ฑ ์์ - ๋ฉํฐ๋ชจ๋ฌ")
|
554 |
|
555 |
+
# ๐ ๊ณตํต ์ด๋ฏธ์ง ๋ฉํ๋ฐ์ดํฐ ์ฒ๋ฆฌ (๋ชจ๋ ๋ชจ๋ธ ๊ฒฝ๋ก์์ ๋์ผํ๊ฒ)
|
556 |
+
processed_image_metas = {}
|
557 |
+
|
558 |
+
# ๐ vision_grid_thw๋ฅผ ํ
์๋ก ๋ณํ (๋ชจ๋ธ ๋ด๋ถ ์ฌ๋ผ์ด์ฑ ํธํ์ฑ)
|
559 |
+
if 'vision_grid_thw' in combined_image_metas:
|
560 |
+
vision_grid = combined_image_metas['vision_grid_thw']
|
561 |
+
if isinstance(vision_grid, list):
|
562 |
+
# ๋ฆฌ์คํธ๋ฅผ ํ
์๋ก ๋ณํ: [(1, 34, 52), (1, 14, 36)] -> tensor([[1, 34, 52], [1, 14, 36]])
|
563 |
+
_vg = torch.tensor(vision_grid, dtype=torch.long)
|
564 |
+
# rot_pos_emb๋ [N,3] ๋๋ ๋ฆฌ์คํธ[(t,h,w)]๋ฅผ ๊ธฐ๋ํ๋ฏ๋ก ๋ฐฐ์น ์ฐจ์ ์์ด ์ ๋ฌ
|
565 |
+
processed_image_metas['vision_grid_thw'] = _vg # [N, 3]
|
566 |
+
print(f"๐ [DEBUG] ๊ณตํต - vision_grid_thw ํ
์๋ก ๋ณํ: {processed_image_metas['vision_grid_thw'].shape}")
|
567 |
+
else:
|
568 |
+
processed_image_metas['vision_grid_thw'] = vision_grid
|
569 |
+
print(f"๐ [DEBUG] ๊ณตํต - vision_grid_thw ์๋ณธ ํํ ์ ์ง: {vision_grid}")
|
570 |
+
|
571 |
+
# ๐ ๋ค๋ฅธ ๋ฉํ๋ฐ์ดํฐ๋ ๋ฐฐ์น ์ฐจ์ ๋ง์ถค
|
572 |
+
for key, value in combined_image_metas.items():
|
573 |
+
if key != 'vision_grid_thw':
|
574 |
+
if isinstance(value, list):
|
575 |
+
# ๋ฆฌ์คํธ์ธ ๊ฒฝ์ฐ ์ฌ๋ฐ๋ฅธ ํํ๋ก ๋ณํ
|
576 |
+
if key == 'image_token_thw':
|
577 |
+
# image_token_thw๋ [1, N, 3]๋ก ์ ๋ฌ (๋ชจ๋ธ ๊ธฐ๋ ํํ)
|
578 |
+
_thw = torch.tensor(value, dtype=torch.long)
|
579 |
+
processed_image_metas[key] = _thw.unsqueeze(0)
|
580 |
+
else:
|
581 |
+
# ๋ค๋ฅธ ๋ฉํ๋ฐ์ดํฐ๋ ๊ธฐ์กด ๋ฐฉ์
|
582 |
+
processed_image_metas[key] = [value]
|
583 |
+
elif isinstance(value, torch.Tensor) and len(value.shape) == 2:
|
584 |
+
# 2์ฐจ์ ํ
์์ธ ๊ฒฝ์ฐ ๋ฐฐ์น ์ฐจ์ ์ถ๊ฐ
|
585 |
+
processed_image_metas[key] = value.unsqueeze(0)
|
586 |
+
else:
|
587 |
+
processed_image_metas[key] = value
|
588 |
+
|
589 |
+
# ๐ ์ฐธ๊ณ ๋ก๊ทธ๋ง ์ถ๋ ฅ: ์ด๋ฏธ์ง ํ ํฐ ์ ์ถ์ (์กฐ์ ์ ํ์ง ์์)
|
590 |
+
if 'image_token_thw' in processed_image_metas:
|
591 |
+
image_token_thw = processed_image_metas['image_token_thw']
|
592 |
+
if isinstance(image_token_thw, torch.Tensor):
|
593 |
+
total_image_tokens = 0
|
594 |
+
print(f"๐ [DEBUG] image_token_thw shape: {image_token_thw.shape}")
|
595 |
+
print(f"๐ [DEBUG] image_token_thw ๋ด์ฉ: {image_token_thw}")
|
596 |
+
for i in range(image_token_thw.shape[0]):
|
597 |
+
token_info = image_token_thw[i]
|
598 |
+
if len(token_info) == 3:
|
599 |
+
t, h, w = token_info
|
600 |
+
total_image_tokens += t * h * w
|
601 |
+
elif len(token_info) == 2:
|
602 |
+
h, w = token_info
|
603 |
+
total_image_tokens += h * w
|
604 |
+
print(f"๐ [DEBUG] ๊ณ์ฐ๋ ์ด ์ด๋ฏธ์ง ํ ํฐ ์(์ฐธ๊ณ ): {total_image_tokens}")
|
605 |
+
if isinstance(total_image_tokens, torch.Tensor):
|
606 |
+
total_image_tokens = total_image_tokens.sum().item()
|
607 |
+
print(f"๐ [DEBUG] pixel_values ๊ธธ์ด: {pixel_values.shape[0]}, ์์: {total_image_tokens} (์กฐ์ ์ํจ)")
|
608 |
+
|
609 |
+
# ์์ ๊ฐ๋: vision_grid_thw๊ฐ [1, N, 3]๋ก ์ค๋ฉด [N, 3]๋ก ๋ณํ
|
610 |
+
try:
|
611 |
+
if isinstance(processed_image_metas.get('vision_grid_thw', None), torch.Tensor):
|
612 |
+
_vg = processed_image_metas['vision_grid_thw']
|
613 |
+
if _vg.dim() == 3 and _vg.shape[0] == 1 and _vg.shape[-1] == 3:
|
614 |
+
processed_image_metas['vision_grid_thw'] = _vg.squeeze(0)
|
615 |
+
print(f"๐ [DEBUG] vision_grid_thw ๋ฐฐ์น ์ฐจ์ ์ ๊ฑฐ: {processed_image_metas['vision_grid_thw'].shape}")
|
616 |
+
except Exception as _e:
|
617 |
+
print(f"โ ๏ธ [DEBUG] vision_grid_thw ์ ๊ทํ ์คํจ: {_e}")
|
618 |
+
|
619 |
+
# ๋ฉํฐ๋ชจ๋ฌ ๊ฒฝ๋ก๋ ์บ์ ์ฌ์ฉ ํ์ฑํ
|
620 |
+
try:
|
621 |
+
gen_config['use_cache'] = True
|
622 |
+
except Exception:
|
623 |
+
pass
|
624 |
+
|
625 |
+
# ๋ชจ๋ธ eval ๋ชจ๋ ์ ํ (์ฑ๋ฅ/์ผ๊ด์ฑ)
|
626 |
+
try:
|
627 |
+
current_model.eval()
|
628 |
+
except Exception:
|
629 |
+
pass
|
630 |
+
|
631 |
# LoRA ์ด๋ํฐ๊ฐ ์ ์ฉ๋ ๋ชจ๋ธ์ธ์ง ํ์ธ
|
632 |
try:
|
633 |
from lily_llm_core.lora_manager import lora_manager
|
|
|
637 |
lora_model = lora_manager.get_model()
|
638 |
if lora_model:
|
639 |
print(f"๐ [DEBUG] LoRA ๋ชจ๋ธ๋ก ๋ฉํฐ๋ชจ๋ฌ ์์ฑ ์คํ")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
640 |
|
641 |
generate_kwargs = {
|
642 |
'input_ids': input_ids,
|
643 |
'attention_mask': attention_mask,
|
644 |
'pixel_values': pixel_values,
|
645 |
+
'image_metas': processed_image_metas, # ๐ ๊ณตํต์ผ๋ก ์ฒ๋ฆฌ๋ ์ด๋ฏธ์ง ๋ฉํ๋ฐ์ดํฐ
|
646 |
**gen_config
|
647 |
}
|
648 |
print(f"๐ [DEBUG] LoRA ๋ชจ๋ธ ์์ฑ ํ๋ผ๋ฏธํฐ: {list(generate_kwargs.keys())}")
|
649 |
print(f"๐ [DEBUG] ์ฒ๋ฆฌ๋ image_metas: {list(processed_image_metas.keys())}")
|
650 |
print(f"๐ [DEBUG] ๋ชจ๋ธ ์์ฑ ์์... (ํ์์์ ์์)")
|
651 |
+
try:
|
652 |
+
lora_model.eval()
|
653 |
+
except Exception:
|
654 |
+
pass
|
655 |
+
import torch as _torch
|
656 |
+
with _torch.inference_mode():
|
657 |
+
generated_ids = lora_model.generate(**generate_kwargs)
|
658 |
else:
|
659 |
print(f"โ ๏ธ [DEBUG] LoRA ๋ชจ๋ธ์ ๊ฐ์ ธ์ฌ ์ ์์, ๊ธฐ๋ณธ ๋ชจ๋ธ ์ฌ์ฉ")
|
660 |
+
# ๐ LoRA ๋ชจ๋ธ์ ๊ฐ์ ธ์ฌ ์ ์์ ๋๋ ๋์ผํ ํ๋ผ๋ฏธํฐ ๊ตฌ์กฐ ์ฌ์ฉ (ํต์ผ์ฑ)
|
661 |
+
generate_kwargs = {
|
662 |
+
'input_ids': input_ids,
|
663 |
+
'attention_mask': attention_mask,
|
664 |
+
'pixel_values': pixel_values,
|
665 |
+
'image_metas': processed_image_metas, # ๐ ๊ณตํต์ผ๋ก ์ฒ๋ฆฌ๋ ์ด๋ฏธ์ง ๋ฉํ๋ฐ์ดํฐ
|
666 |
**gen_config
|
667 |
+
}
|
668 |
+
print(f"๐ [DEBUG] LoRA ๋ชจ๋ธ ์์ ์ ๊ธฐ๋ณธ ๋ชจ๋ธ ์์ฑ ํ๋ผ๋ฏธํฐ: {list(generate_kwargs.keys())}")
|
669 |
+
print(f"๐ [DEBUG] ์ฒ๋ฆฌ๋ image_metas: {list(processed_image_metas.keys())}")
|
670 |
+
import torch as _torch
|
671 |
+
with _torch.inference_mode():
|
672 |
+
generated_ids = current_model.generate(**generate_kwargs)
|
673 |
else:
|
674 |
print(f"๐ [DEBUG] LoRA ์ด๋ํฐ ์์ (๋ฉํฐ๋ชจ๋ฌ), ๊ธฐ๋ณธ ๋ชจ๋ธ ์ฌ์ฉ")
|
675 |
+
# ๐ ๊ธฐ๋ณธ ๋ชจ๋ธ๋ ๋์ผํ ํ๋ผ๋ฏธํฐ ๊ตฌ์กฐ ์ฌ์ฉ (ํต์ผ์ฑ)
|
676 |
+
generate_kwargs = {
|
677 |
+
'input_ids': input_ids,
|
678 |
+
'attention_mask': attention_mask,
|
679 |
+
'pixel_values': pixel_values,
|
680 |
+
'image_metas': processed_image_metas, # ๐ ๊ณตํต์ผ๋ก ์ฒ๋ฆฌ๋ ์ด๋ฏธ์ง ๋ฉํ๋ฐ์ดํฐ
|
681 |
**gen_config
|
682 |
+
}
|
683 |
+
print(f"๐ [DEBUG] ๊ธฐ๋ณธ ๋ชจ๋ธ ์์ฑ ํ๋ผ๋ฏธํฐ: {list(generate_kwargs.keys())}")
|
684 |
+
print(f"๐ [DEBUG] ์ฒ๋ฆฌ๋ image_metas: {list(processed_image_metas.keys())}")
|
685 |
+
import torch as _torch
|
686 |
+
with _torch.inference_mode():
|
687 |
+
generated_ids = current_model.generate(**generate_kwargs)
|
688 |
except ImportError:
|
689 |
print(f"๐ [DEBUG] LoRA ์ง์ ์๋จ, ๊ธฐ๋ณธ ๋ชจ๋ธ ์ฌ์ฉ")
|
690 |
+
# ๐ ImportError ๋ฐ์ ์์๋ ๋์ผํ ํ๋ผ๋ฏธํฐ ๊ตฌ์กฐ ์ฌ์ฉ (ํต์ผ์ฑ)
|
691 |
+
generate_kwargs = {
|
692 |
+
'input_ids': input_ids,
|
693 |
+
'attention_mask': attention_mask,
|
694 |
+
'pixel_values': pixel_values,
|
695 |
+
'image_metas': processed_image_metas, # ๐ ๊ณตํต์ผ๋ก ์ฒ๋ฆฌ๋ ์ด๋ฏธ์ง ๋ฉํ๋ฐ์ดํฐ
|
696 |
**gen_config
|
697 |
+
}
|
698 |
+
print(f"๐ [DEBUG] ImportError ์ ๊ธฐ๋ณธ ๋ชจ๋ธ ์์ฑ ํ๋ผ๋ฏธํฐ: {list(generate_kwargs.keys())}")
|
699 |
+
print(f"๐ [DEBUG] ์ฒ๋ฆฌ๋ image_metas: {list(processed_image_metas.keys())}")
|
700 |
+
import torch as _torch
|
701 |
+
with _torch.inference_mode():
|
702 |
+
generated_ids = current_model.generate(**generate_kwargs)
|
703 |
|
704 |
else:
|
705 |
# ํ
์คํธ-only: ๊ธฐ์กด ๋ฐฉ์
|
|
|
850 |
traceback.print_exc()
|
851 |
return {"error": f"Response extraction failed: {str(e)}"}
|
852 |
|
853 |
+
# --- 7. ์ปจํ
์คํธ ์ ์ฅ ๋ฐ ๊ฒฐ๊ณผ ๋ฐํ ---
|
854 |
total_time = time.time() - t_tok_start
|
855 |
print(f"๐ [DEBUG] ์ ์ฒด ์ฒ๋ฆฌ ์๋ฃ - ์ด ์์์๊ฐ: {total_time:.3f}์ด")
|
856 |
|
857 |
+
# ์ปจํ
์คํธ ๋์ ์ ์ฅ (์ธ์
/๋ฃธ ๋จ์)
|
858 |
+
try:
|
859 |
+
if use_context and session_id:
|
860 |
+
try:
|
861 |
+
from lily_llm_core.context_manager import context_manager
|
862 |
+
if context_manager:
|
863 |
+
# ์ฌ์ฉ์ ๋ฉ์์ง ์ ์ฅ (์ด๋ฏธ์ง ์ฌ๋ถ ๋ฉํ ํฌํจ)
|
864 |
+
context_manager.add_user_message(
|
865 |
+
prompt,
|
866 |
+
metadata={
|
867 |
+
"session_id": session_id,
|
868 |
+
"room_id": room_id,
|
869 |
+
"images_used": bool(all_image_data and len([img for img in all_image_data if img]) > 0),
|
870 |
+
"num_images": len([img for img in all_image_data if img]) if all_image_data else 0,
|
871 |
+
},
|
872 |
+
)
|
873 |
+
# ์ด์์คํดํธ ๋ฉ์์ง ์ ์ฅ
|
874 |
+
context_manager.add_assistant_message(
|
875 |
+
response,
|
876 |
+
metadata={
|
877 |
+
"session_id": session_id,
|
878 |
+
"room_id": room_id,
|
879 |
+
},
|
880 |
+
)
|
881 |
+
print(f"๐ [DEBUG] ์ปจํ
์คํธ ์ ์ฅ ์๋ฃ (์ธ์
: {session_id}, ๋ฃธ: {room_id})")
|
882 |
+
except Exception as _ctx_e:
|
883 |
+
print(f"โ ๏ธ [DEBUG] ์ปจํ
์คํธ ์ ์ฅ ์คํจ: {_ctx_e}")
|
884 |
+
except Exception:
|
885 |
+
pass
|
886 |
+
|
887 |
return {
|
888 |
"generated_text": response,
|
889 |
"processing_time": total_time,
|
lily_llm_api/services/session_registry.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
๊ฐ๋จํ ์ธ์
/๋ฃธ โ ์ฌ์ฉ์ ๋งคํ ๋ ์ง์คํธ๋ฆฌ
|
3 |
+
- ํ๋ก์ธ์ค ๋ฉ๋ชจ๋ฆฌ ๊ธฐ๋ฐ (์๋น์ค ์ฌ์์ ์ ์ด๊ธฐํ)
|
4 |
+
- ์
๋ก๋/์์ฑ ๊ฐ user_id ๋ถ์ผ์น ๋ณด์ ์ฉ
|
5 |
+
"""
|
6 |
+
from typing import Optional, Dict
|
7 |
+
import time
|
8 |
+
|
9 |
+
_room_to_user: Dict[str, str] = {}
|
10 |
+
_session_to_user: Dict[str, str] = {}
|
11 |
+
_last_user: Optional[str] = None
|
12 |
+
_last_updated_at: float = 0.0
|
13 |
+
|
14 |
+
def set_user_for_room(room_id: Optional[str], user_id: Optional[str]) -> None:
|
15 |
+
if not room_id or not user_id:
|
16 |
+
return
|
17 |
+
_room_to_user[str(room_id)] = str(user_id)
|
18 |
+
set_last_user(user_id)
|
19 |
+
|
20 |
+
def get_user_for_room(room_id: Optional[str]) -> Optional[str]:
|
21 |
+
if not room_id:
|
22 |
+
return None
|
23 |
+
return _room_to_user.get(str(room_id))
|
24 |
+
|
25 |
+
def set_user_for_session(session_id: Optional[str], user_id: Optional[str]) -> None:
|
26 |
+
if not session_id or not user_id:
|
27 |
+
return
|
28 |
+
_session_to_user[str(session_id)] = str(user_id)
|
29 |
+
set_last_user(user_id)
|
30 |
+
|
31 |
+
def get_user_for_session(session_id: Optional[str]) -> Optional[str]:
|
32 |
+
if not session_id:
|
33 |
+
return None
|
34 |
+
return _session_to_user.get(str(session_id))
|
35 |
+
|
36 |
+
def set_last_user(user_id: Optional[str]) -> None:
|
37 |
+
global _last_user, _last_updated_at
|
38 |
+
if not user_id:
|
39 |
+
return
|
40 |
+
_last_user = str(user_id)
|
41 |
+
_last_updated_at = time.time()
|
42 |
+
|
43 |
+
def get_last_user() -> Optional[str]:
|
44 |
+
return _last_user
|
45 |
+
|
46 |
+
def clear() -> None:
|
47 |
+
_room_to_user.clear()
|
48 |
+
_session_to_user.clear()
|
49 |
+
global _last_user, _last_updated_at
|
50 |
+
_last_user = None
|
51 |
+
_last_updated_at = 0.0
|
52 |
+
|
53 |
+
|
lily_llm_core/document_processor.py
CHANGED
@@ -436,80 +436,69 @@ class DocumentProcessor:
|
|
436 |
img_pil = Image.open(io.BytesIO(img_data))
|
437 |
|
438 |
if self._is_valid_image(img_pil):
|
439 |
-
|
440 |
img_rect = self._get_image_rect(page, xref)
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
f"ํฌ๊ธฐ {img_pil.size}")
|
469 |
|
470 |
pix = None
|
471 |
|
472 |
except Exception as e:
|
473 |
logger.warning(f"โ ๏ธ ์ด๋ฏธ์ง {img_idx} ์ฒ๋ฆฌ ์คํจ: {e}")
|
474 |
|
475 |
-
# 2. ์ด๋ฏธ์ง๊ฐ ์์ผ๋ฉด ์ ์ฒด ํ์ด์ง ๋ ๋๋ง (fallback)
|
476 |
if not image_blocks:
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
"image_size": (pix.width, pix.height),
|
501 |
-
"image_format": "PNG",
|
502 |
-
"is_embedded": False,
|
503 |
-
"is_full_page_render": True
|
504 |
-
}
|
505 |
-
)
|
506 |
-
image_blocks.append(image_block)
|
507 |
-
|
508 |
-
logger.debug(f"๐ ์ ์ฒด ํ์ด์ง ๋ ๋๋ง: ํ์ด์ง {page_num + 1}")
|
509 |
-
pix = None
|
510 |
|
511 |
-
|
512 |
-
|
513 |
|
514 |
except Exception as e:
|
515 |
logger.warning(f"โ ๏ธ ํ์ด์ง {page_num + 1} ์ด๋ฏธ์ง ๋ธ๋ก ์ถ์ถ ์คํจ: {e}")
|
|
|
436 |
img_pil = Image.open(io.BytesIO(img_data))
|
437 |
|
438 |
if self._is_valid_image(img_pil):
|
439 |
+
# ์ด๋ฏธ์ง์ ์์น ์ ๋ณด ์ถ์ถ (์ค์!)
|
440 |
img_rect = self._get_image_rect(page, xref)
|
441 |
+
if img_rect:
|
442 |
+
bbox = BoundingBox(
|
443 |
+
x0=img_rect.x0,
|
444 |
+
y0=img_rect.y0,
|
445 |
+
x1=img_rect.x1,
|
446 |
+
y1=img_rect.y1
|
447 |
+
)
|
448 |
+
|
449 |
+
image_block = PDFBlock(
|
450 |
+
block_id=f"page_{page_num + 1}_image_{img_idx}",
|
451 |
+
block_type="image",
|
452 |
+
bbox=bbox,
|
453 |
+
content=img_data, # ๋ฐ์ด๋๋ฆฌ ๋ฐ์ดํฐ
|
454 |
+
page_num=page_num + 1,
|
455 |
+
metadata={
|
456 |
+
"image_size": img_pil.size,
|
457 |
+
"image_format": "PNG",
|
458 |
+
"image_mode": img_pil.mode,
|
459 |
+
"xref": xref,
|
460 |
+
"is_embedded": True
|
461 |
+
}
|
462 |
+
)
|
463 |
+
image_blocks.append(image_block)
|
464 |
+
|
465 |
+
logger.debug(f"๐ผ๏ธ ์ด๋ฏธ์ง ๋ธ๋ก ์ถ์ถ: ํ์ด์ง {page_num + 1}, "
|
466 |
+
f"์์น ({bbox.x0:.1f}, {bbox.y0:.1f}, {bbox.x1:.1f}, {bbox.y1:.1f}), "
|
467 |
+
f"ํฌ๊ธฐ {img_pil.size}")
|
|
|
468 |
|
469 |
pix = None
|
470 |
|
471 |
except Exception as e:
|
472 |
logger.warning(f"โ ๏ธ ์ด๋ฏธ์ง {img_idx} ์ฒ๋ฆฌ ์คํจ: {e}")
|
473 |
|
474 |
+
# 2. ์ด๋ฏธ์ง๊ฐ ์์ผ๋ฉด ์ ์ฒด ํ์ด์ง ๋ ๋๋ง (fallback - ํญ์ ์ํ)
|
475 |
if not image_blocks:
|
476 |
+
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # 2๋ฐฐ ํด์๋
|
477 |
+
img_data = pix.tobytes("png")
|
478 |
+
|
479 |
+
bbox = BoundingBox(
|
480 |
+
x0=0, y0=0,
|
481 |
+
x1=page.rect.width,
|
482 |
+
y1=page.rect.height
|
483 |
+
)
|
484 |
+
|
485 |
+
image_block = PDFBlock(
|
486 |
+
block_id=f"page_{page_num + 1}_fullpage",
|
487 |
+
block_type="image",
|
488 |
+
bbox=bbox,
|
489 |
+
content=img_data,
|
490 |
+
page_num=page_num + 1,
|
491 |
+
metadata={
|
492 |
+
"image_size": (pix.width, pix.height),
|
493 |
+
"image_format": "PNG",
|
494 |
+
"is_embedded": False,
|
495 |
+
"is_full_page_render": True
|
496 |
+
}
|
497 |
+
)
|
498 |
+
image_blocks.append(image_block)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
499 |
|
500 |
+
logger.debug(f"๐ ์ ์ฒด ํ์ด์ง ๋ ๋๋ง: ํ์ด์ง {page_num + 1}")
|
501 |
+
pix = None
|
502 |
|
503 |
except Exception as e:
|
504 |
logger.warning(f"โ ๏ธ ํ์ด์ง {page_num + 1} ์ด๋ฏธ์ง ๋ธ๋ก ์ถ์ถ ์คํจ: {e}")
|