SOY NV AI
Add Gemini API integration with REST API support, improve error handling, and add markdown bold formatting for messages
665bcdc
| from flask import Blueprint, render_template, request, jsonify, send_from_directory, redirect, url_for, flash | |
| from flask_login import login_user, logout_user, login_required, current_user | |
| from werkzeug.utils import secure_filename | |
| from app.database import db, UploadedFile, User, ChatSession, ChatMessage, DocumentChunk, ParentChunk, SystemConfig | |
| from app.vector_db import get_vector_db | |
| from app.gemini_client import get_gemini_client | |
| import requests | |
| import os | |
| from datetime import datetime | |
| import uuid | |
| import re | |
| import json | |
| main_bp = Blueprint('main', __name__) | |
| def admin_required(f): | |
| """κ΄λ¦¬μ κΆνμ΄ νμν λ°μ½λ μ΄ν°""" | |
| from functools import wraps | |
| def decorated_function(*args, **kwargs): | |
| if not current_user.is_admin: | |
| # API μμ²μΈ κ²½μ° JSON μλ΅ λ°ν | |
| if request.path.startswith('/api/'): | |
| return jsonify({'error': 'κ΄λ¦¬μ κΆνμ΄ νμν©λλ€.'}), 403 | |
| flash('κ΄λ¦¬μ κΆνμ΄ νμν©λλ€.', 'error') | |
| return redirect(url_for('main.index')) | |
| return f(*args, **kwargs) | |
| return decorated_function | |
| # Ollama κΈ°λ³Έ URL (νκ²½ λ³μλ‘ μ€μ κ°λ₯) | |
| OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434') | |
| # μ λ‘λ μ€μ | |
| UPLOAD_FOLDER = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'uploads') | |
| ALLOWED_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'epub'} | |
| # μ λ‘λ ν΄λ κ²½λ‘ μΆλ ₯ (λλ²κΉ μ©) | |
| print(f"[μ λ‘λ μ€μ ] μ λ‘λ ν΄λ κ²½λ‘: {UPLOAD_FOLDER}") | |
| print(f"[μ λ‘λ μ€μ ] μ λ‘λ ν΄λ μ‘΄μ¬ μ¬λΆ: {os.path.exists(UPLOAD_FOLDER)}") | |
| def allowed_file(filename): | |
| return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| def ensure_upload_folder(): | |
| """μ λ‘λ ν΄λκ° μμΌλ©΄ μμ±""" | |
| try: | |
| if not os.path.exists(UPLOAD_FOLDER): | |
| print(f"μ λ‘λ ν΄λ μμ± μ€: {UPLOAD_FOLDER}") | |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| if not os.path.exists(UPLOAD_FOLDER): | |
| raise Exception(f'μ λ‘λ ν΄λλ₯Ό μμ±ν μ μμ΅λλ€: {UPLOAD_FOLDER}') | |
| # ν΄λ μ°κΈ° κΆν νμΈ | |
| test_file = os.path.join(UPLOAD_FOLDER, '.write_test') | |
| try: | |
| with open(test_file, 'w') as f: | |
| f.write('test') | |
| os.remove(test_file) | |
| print(f"μ λ‘λ ν΄λ μ°κΈ° κΆν νμΈ μλ£: {UPLOAD_FOLDER}") | |
| except PermissionError as e: | |
| raise Exception(f'μ λ‘λ ν΄λμ μ°κΈ° κΆνμ΄ μμ΅λλ€: {UPLOAD_FOLDER} - {str(e)}') | |
| except Exception as e: | |
| raise Exception(f'μ λ‘λ ν΄λ μ°κΈ° ν μ€νΈ μ€ν¨: {UPLOAD_FOLDER} - {str(e)}') | |
| except Exception as e: | |
| print(f"μ λ‘λ ν΄λ μμ± μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| raise | |
| def split_text_into_chunks(text, min_chunk_size=200, max_chunk_size=1000, overlap=150): | |
| """μλ―Έ κΈ°λ° ν μ€νΈ μ²νΉ (λ¬Έμ₯κ³Ό λ¬Έλ¨ κ²½κ³λ₯Ό κ³ λ €νμ¬ λΆν )""" | |
| if not text or len(text.strip()) == 0: | |
| return [] | |
| # 1λ¨κ³: λ¬Έλ¨ λ¨μλ‘ λΆν (λΉ μ€ κΈ°μ€) | |
| paragraphs = re.split(r'\n\s*\n', text.strip()) | |
| paragraphs = [p.strip() for p in paragraphs if p.strip()] | |
| if not paragraphs: | |
| return [] | |
| # 2λ¨κ³: κ° λ¬Έλ¨μ λ¬Έμ₯ λ¨μλ‘ λΆν | |
| # λ¬Έμ₯ μ’ κ²° κΈ°νΈ: . ! ? (νκΈκ³Ό μλ¬Έ λͺ¨λ μ§μ) | |
| # ꡬλμ λ€μ 곡백μ΄λ μ€λ°κΏμ΄ μ€λ κ²½μ° λ¬Έμ₯ μ’ λ£λ‘ κ°μ£Ό | |
| sentence_pattern = r'([.!?]+)(?=\s+|$)' | |
| all_sentences = [] | |
| for para in paragraphs: | |
| # λ¬Έμ₯ λΆλ¦¬ (ꡬλμ ν¬ν¨) | |
| parts = re.split(sentence_pattern, para) | |
| combined_sentences = [] | |
| current_sentence = "" | |
| for i, part in enumerate(parts): | |
| if part.strip(): | |
| if re.match(r'^[.!?]+$', part): | |
| # ꡬλμ μΈ κ²½μ° νμ¬ λ¬Έμ₯μ μΆκ°νκ³ λ¬Έμ₯ μμ± | |
| current_sentence += part | |
| if current_sentence.strip(): | |
| combined_sentences.append(current_sentence.strip()) | |
| current_sentence = "" | |
| else: | |
| # ν μ€νΈμΈ κ²½μ° νμ¬ λ¬Έμ₯μ μΆκ° | |
| current_sentence += part | |
| # λ§μ§λ§ λ¬Έμ₯ μ²λ¦¬ (ꡬλμ μ΄ μλ κ²½μ°) | |
| if current_sentence.strip(): | |
| combined_sentences.append(current_sentence.strip()) | |
| # λ¬Έμ₯μ΄ νλλ μλ κ²½μ° (ꡬλμ μ΄ μ ν μλ λ¬Έλ¨) | |
| if not combined_sentences and para.strip(): | |
| combined_sentences.append(para.strip()) | |
| all_sentences.extend(combined_sentences) | |
| if not all_sentences: | |
| # λ¬Έμ₯ λΆλ¦¬κ° μ λλ κ²½μ° μλ³Έ ν μ€νΈλ₯Ό κ·Έλλ‘ λ°ν | |
| return [text] if text.strip() else [] | |
| # 3λ¨κ³: λ¬Έμ₯λ€μ λͺ¨μμ μλ―Έ μλ μ²ν¬ μμ± | |
| chunks = [] | |
| current_chunk = [] | |
| current_size = 0 | |
| for sentence in all_sentences: | |
| sentence_size = len(sentence) | |
| # νμ¬ μ²ν¬μ λ¬Έμ₯ μΆκ° μ μ΅λ ν¬κΈ°λ₯Ό μ΄κ³Όνλ κ²½μ° | |
| if current_size + sentence_size > max_chunk_size and current_chunk: | |
| # νμ¬ μ²ν¬ μ μ₯ (μ€λ°κΏ μ μ§) | |
| chunk_text = '\n'.join(current_chunk) | |
| if len(chunk_text.strip()) >= min_chunk_size: | |
| chunks.append(chunk_text) | |
| else: | |
| # μ΅μ ν¬κΈ° λ―Έλ§μ΄λ©΄ λ€μ μ²ν¬μ λ³ν© (μ€λ²λ© ν¨κ³Ό) | |
| if chunks: | |
| chunks[-1] = chunks[-1] + '\n' + chunk_text | |
| else: | |
| chunks.append(chunk_text) | |
| # μ€λ²λ©μ μν λ¬Έμ₯ μ μ§ (λ§μ§λ§ λͺ λ¬Έμ₯μ λ€μ μ²ν¬μ ν¬ν¨) | |
| overlap_sentences = [] | |
| overlap_size = 0 | |
| for s in reversed(current_chunk): | |
| if overlap_size + len(s) <= overlap: | |
| overlap_sentences.insert(0, s) | |
| overlap_size += len(s) + 1 # μ€λ°κΏ ν¬ν¨ | |
| else: | |
| break | |
| current_chunk = overlap_sentences + [sentence] | |
| current_size = overlap_size + sentence_size | |
| else: | |
| # νμ¬ μ²ν¬μ λ¬Έμ₯ μΆκ° | |
| current_chunk.append(sentence) | |
| current_size += sentence_size + 1 # μ€λ°κΏ ν¬ν¨ | |
| # λ§μ§λ§ μ²ν¬ μΆκ° | |
| if current_chunk: | |
| chunk_text = '\n'.join(current_chunk) | |
| if chunks and len(chunk_text.strip()) < min_chunk_size: | |
| # μ΅μ ν¬κΈ° λ―Έλ§μ΄λ©΄ μ΄μ μ²ν¬μ λ³ν© | |
| chunks[-1] = chunks[-1] + '\n' + chunk_text | |
| else: | |
| chunks.append(chunk_text) | |
| # λΉ μ²ν¬ μ κ±° λ° μ΅μ ν¬κΈ° λ―Έλ§ μ²ν¬ μ²λ¦¬ | |
| final_chunks = [] | |
| for chunk in chunks: | |
| chunk = chunk.strip() | |
| if chunk and len(chunk) >= min_chunk_size: | |
| final_chunks.append(chunk) | |
| elif chunk: | |
| # μ΅μ ν¬κΈ° λ―Έλ§ μ²ν¬λ μ΄μ μ²ν¬μ λ³ν© | |
| if final_chunks: | |
| final_chunks[-1] = final_chunks[-1] + '\n' + chunk | |
| else: | |
| final_chunks.append(chunk) | |
| return final_chunks if final_chunks else [text] if text.strip() else [] | |
| def create_chunks_for_file(file_id, content): | |
| """νμΌ λ΄μ©μ μλ―Έ κΈ°λ° μ²ν¬λ‘ λΆν νμ¬ μ μ₯ (λ²‘ν° DB ν¬ν¨)""" | |
| try: | |
| print(f"[μ²ν¬ μμ±] νμΌ ID {file_id}μ λν μ²ν¬ μμ± μμ") | |
| print(f"[μ²ν¬ μμ±] μλ³Έ ν μ€νΈ κΈΈμ΄: {len(content)}μ") | |
| # λ²‘ν° DB λ§€λμ κ°μ Έμ€κΈ° | |
| vector_db = get_vector_db() | |
| # κΈ°μ‘΄ μ²ν¬ μμ (DB + λ²‘ν° DB) | |
| existing_chunks = DocumentChunk.query.filter_by(file_id=file_id).all() | |
| if existing_chunks: | |
| print(f"[μ²ν¬ μμ±] κΈ°μ‘΄ μ²ν¬ {len(existing_chunks)}κ° μμ μ€...") | |
| # λ²‘ν° DBμμ μμ | |
| vector_db.delete_chunks_by_file_id(file_id) | |
| # DBμμ μμ | |
| DocumentChunk.query.filter_by(file_id=file_id).delete() | |
| db.session.commit() | |
| # μλ―Έ κΈ°λ° μ²νΉ (λ¬Έμ₯κ³Ό λ¬Έλ¨ κ²½κ³λ₯Ό κ³ λ €νμ¬ λΆν ) | |
| # min_chunk_size: μ΅μ 200μ, max_chunk_size: μ΅λ 1000μ, overlap: 150μ | |
| chunks = split_text_into_chunks(content, min_chunk_size=200, max_chunk_size=1000, overlap=150) | |
| print(f"[μ²ν¬ μμ±] λΆν λ μ²ν¬ μ: {len(chunks)}κ°") | |
| if len(chunks) == 0: | |
| print(f"[μ²ν¬ μμ±] κ²½κ³ : μ²ν¬κ° μμ±λμ§ μμμ΅λλ€. ν μ€νΈκ° λ무 μ§§κ±°λ λΉμ΄μμ μ μμ΅λλ€.") | |
| return 0 | |
| # κ° μ²ν¬λ₯Ό λ°μ΄ν°λ² μ΄μ€μ λ²‘ν° DBμ μ μ₯ | |
| saved_count = 0 | |
| vector_saved_count = 0 | |
| for idx, chunk_content in enumerate(chunks): | |
| try: | |
| # DBμ μ²ν¬ μ μ₯ | |
| chunk = DocumentChunk( | |
| file_id=file_id, | |
| chunk_index=idx, | |
| content=chunk_content | |
| ) | |
| db.session.add(chunk) | |
| db.session.flush() # ID μμ± | |
| # λ²‘ν° DBμ μ²ν¬ μΆκ° | |
| if vector_db.add_chunk( | |
| chunk_id=chunk.id, | |
| chunk_content=chunk_content, | |
| file_id=file_id, | |
| chunk_index=idx | |
| ): | |
| vector_saved_count += 1 | |
| saved_count += 1 | |
| # μ§ν μν© μΆλ ₯ (10κ°λ§λ€) | |
| if (idx + 1) % 10 == 0: | |
| print(f"[μ²ν¬ μμ±] μ§ν μ€: {idx + 1}/{len(chunks)}κ° μ²ν¬ μ μ₯ μ€... (DB: {saved_count}, λ²‘ν° DB: {vector_saved_count})") | |
| except Exception as e: | |
| print(f"[μ²ν¬ μμ±] κ²½κ³ : μ²ν¬ {idx} μ μ₯ μ€ μ€λ₯: {str(e)}") | |
| continue | |
| db.session.commit() | |
| print(f"[μ²ν¬ μμ±] μλ£: {saved_count}κ° μ²ν¬κ° λ°μ΄ν°λ² μ΄μ€μ μ μ₯λμμ΅λλ€. (λ²‘ν° DB: {vector_saved_count}κ°)") | |
| # μ μ₯ νμΈ | |
| verified_count = DocumentChunk.query.filter_by(file_id=file_id).count() | |
| if verified_count != saved_count: | |
| print(f"[μ²ν¬ μμ±] κ²½κ³ : μ μ₯λ μ²ν¬ μ({saved_count})μ νμΈλ μ²ν¬ μ({verified_count})κ° μΌμΉνμ§ μμ΅λλ€.") | |
| else: | |
| print(f"[μ²ν¬ μμ±] κ²μ¦ μλ£: {verified_count}κ° μ²ν¬κ° μ μμ μΌλ‘ μ μ₯λμμ΅λλ€.") | |
| return saved_count | |
| except Exception as e: | |
| db.session.rollback() | |
| print(f"[μ²ν¬ μμ±] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return 0 | |
| def create_parent_chunk_with_ai(file_id, content, model_name): | |
| """AIλ₯Ό μ¬μ©νμ¬ Parent Chunk μμ± (μΉμμ€ λΆμ)""" | |
| try: | |
| print(f"[Parent Chunk μμ±] νμΌ ID {file_id}μ λν Parent Chunk μμ± μμ") | |
| print(f"[Parent Chunk μμ±] μ¬μ© λͺ¨λΈ: {model_name}") | |
| print(f"[Parent Chunk μμ±] μλ³Έ ν μ€νΈ κΈΈμ΄: {len(content)}μ") | |
| # λͺ¨λΈλͺ μ΄ Noneμ΄κ±°λ λΉ λ¬Έμμ΄μΈ κ²½μ° μ²λ¦¬ | |
| if not model_name or not model_name.strip(): | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: λͺ¨λΈλͺ μ΄ μ 곡λμ§ μμμ΅λλ€.") | |
| return None | |
| # ν μ€νΈκ° λ무 κΈΈλ©΄ μΌλΆλ§ μ¬μ© (μ΅λ 50000μ) | |
| content_preview = content[:50000] if len(content) > 50000 else content | |
| if len(content) > 50000: | |
| print(f"[Parent Chunk μμ±] ν μ€νΈκ° κΈΈμ΄ μΌλΆλ§ μ¬μ©: {len(content_preview)}μ (μ 체: {len(content)}μ)") | |
| # λΆμ ν둬ννΈ μμ± | |
| analysis_prompt = f"""λ€μ μΉμμ€ ν μ€νΈλ₯Ό λΆμνμ¬ λ€μ νλͺ©λ€μ μμ±ν΄μ£ΌμΈμ. κ° νλͺ©μ λͺ ννκ³ κ΅¬μ²΄μ μΌλ‘ μμ±ν΄μ£ΌμΈμ. | |
| ν μ€νΈ λ΄μ©: | |
| {content_preview} | |
| μ ν μ€νΈλ₯Ό λΆμνμ¬ λ€μ νμμΌλ‘ λ΅λ³ν΄μ£ΌμΈμ: | |
| ## μΈκ³κ΄ μ€λͺ | |
| [μΈκ³κ΄μ λν μμΈν μ€λͺ μ μμ±νμΈμ. λ°°κ²½, μ€μ , κ·μΉ λ±μ ν¬ν¨νμΈμ.] | |
| ## μ£Όμ μΊλ¦ν° λΆμ | |
| [μ£Όμ λ±μ₯μΈλ¬Όλ€μ μ΄λ¦, μν , μ±κ²©, νΉμ§ λ±μ λΆμνμ¬ μμ±νμΈμ. κ° μΊλ¦ν°λ³λ‘ ꡬλΆνμ¬ μμ±νμΈμ.] | |
| ## μ£Όμ μ€ν 리 λΆμ | |
| [μ 체μ μΈ μ€ν 리 νλ¦, μ£Όμ μ¬κ±΄, κ°λ± ꡬ쑰 λ±μ λΆμνμ¬ μμ±νμΈμ.] | |
| ## μ£Όμ μνΌμλ λΆμ | |
| [μ€μν μνΌμλλ μ±ν°λ³ μ£Όμ λ΄μ©μ λΆμνμ¬ μμ±νμΈμ. μκ° μμλλ‘ μ 리νλ©΄ μ’μ΅λλ€.] | |
| ## κΈ°ν | |
| [μ μΉ΄ν κ³ λ¦¬μ ν¬ν¨λμ§ μμ§λ§ μ€μν μ 보λ νΉμ§ λ±μ μμ±νμΈμ.] | |
| κ° νλͺ©μ λͺ ννκ² κ΅¬λΆνμ¬ μμ±ν΄μ£ΌμΈμ.""" | |
| # λͺ¨λΈ νμ νμΈ (Gemini λλ Ollama) | |
| # Gemini λͺ¨λΈλͺ νμ: "gemini:λͺ¨λΈλͺ " λλ "gemini-1.5-flash" (μ λμ¬ μλ κ²½μ°λ μ§μ) | |
| model_name_lower = model_name.lower().strip() | |
| is_gemini = model_name_lower.startswith('gemini:') or model_name_lower.startswith('gemini-') | |
| print(f"[Parent Chunk μμ±] λͺ¨λΈ νμ νμΈ: is_gemini={is_gemini}, model_name={model_name}") | |
| if is_gemini: | |
| # Gemini API νΈμΆ | |
| # λͺ¨λΈλͺ μμ "gemini:" μ λμ¬ μ κ±° (λμλ¬Έμ κ΅¬λΆ μμ΄) | |
| gemini_model_name = model_name.strip() | |
| if gemini_model_name.lower().startswith('gemini:'): | |
| gemini_model_name = gemini_model_name.split(':', 1)[1].strip() | |
| # "gemini-"λ‘ μμνλ κ²½μ° (μ: "gemini-1.5-flash") κ·Έλλ‘ μ¬μ© | |
| print(f"[Parent Chunk μμ±] Gemini APIμ λΆμ μμ² μ μ‘ μ€... (λͺ¨λΈ: {gemini_model_name})") | |
| print(f"[Parent Chunk μμ±] μλ³Έ λͺ¨λΈλͺ : {model_name} -> Gemini λͺ¨λΈλͺ : {gemini_model_name}") | |
| gemini_client = get_gemini_client() | |
| if not gemini_client.is_configured(): | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: Gemini API ν€κ° μ€μ λμ§ μμμ΅λλ€.") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: Gemini ν΄λΌμ΄μΈνΈ μν νμΈ μ€...") | |
| # API ν€ μν λ€μ νμΈ | |
| from app.gemini_client import get_gemini_api_key | |
| api_key = get_gemini_api_key() | |
| if api_key: | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: API ν€λ μ‘΄μ¬νμ§λ§ ν΄λΌμ΄μΈνΈκ° μ€μ λμ§ μμμ΅λλ€. (κΈΈμ΄: {len(api_key)})") | |
| else: | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: API ν€κ° λ°μ΄ν°λ² μ΄μ€μ μμ΅λλ€.") | |
| return None | |
| print(f"[Parent Chunk μμ±] Gemini API ν€ νμΈ μλ£. API νΈμΆ μμ...") | |
| result = gemini_client.generate_response( | |
| prompt=analysis_prompt, | |
| model_name=gemini_model_name, | |
| temperature=0.7, | |
| max_output_tokens=8192 | |
| ) | |
| if result['error']: | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: Gemini API νΈμΆ μ€ν¨ - {result['error']}") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: result κ°μ²΄ λ΄μ©: {result}") | |
| return None | |
| if not result.get('response'): | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: Gemini API μλ΅μ΄ λΉμ΄μμ΅λλ€.") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: result κ°μ²΄ λ΄μ©: {result}") | |
| return None | |
| analysis_result = result['response'] | |
| print(f"[Parent Chunk μμ±] Gemini API μλ΅ μμ μ±κ³΅: {len(analysis_result)}μ") | |
| else: | |
| # Ollama API νΈμΆ | |
| print(f"[Parent Chunk μμ±] Ollama APIμ λΆμ μμ² μ μ‘ μ€... (λͺ¨λΈ: {model_name})") | |
| try: | |
| ollama_response = requests.post( | |
| f'{OLLAMA_BASE_URL}/api/chat', | |
| json={ | |
| 'model': model_name, | |
| 'messages': [ | |
| { | |
| 'role': 'user', | |
| 'content': analysis_prompt | |
| } | |
| ], | |
| 'stream': False | |
| }, | |
| timeout=300 # 5λΆ νμμμ | |
| ) | |
| if ollama_response.status_code != 200: | |
| error_detail = ollama_response.text if ollama_response.text else 'μμΈ μ 보 μμ' | |
| if ollama_response.status_code == 404: | |
| error_msg = f'Ollama API μ€λ₯ 404: λͺ¨λΈ "{model_name}"μ(λ₯Ό) μ°Ύμ μ μμ΅λλ€. λͺ¨λΈμ΄ Ollamaμ μ€μΉλμ΄ μλμ§ νμΈνμΈμ.' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: λ§μ½ Gemini λͺ¨λΈμ μ¬μ©νλ €λ©΄ λͺ¨λΈλͺ μ΄ 'gemini:' λλ 'gemini-'λ‘ μμν΄μΌ ν©λλ€.") | |
| else: | |
| error_msg = f'Ollama API μ€λ₯: {ollama_response.status_code} - {error_detail[:200]}' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| return None | |
| response_data = ollama_response.json() | |
| analysis_result = response_data.get('message', {}).get('content', '') | |
| print(f"[Parent Chunk μμ±] Ollama API μλ΅ μμ μ±κ³΅: {len(analysis_result)}μ") | |
| except requests.exceptions.RequestException as e: | |
| print(f"[Parent Chunk μμ±] β Ollama API μ°κ²° μ€λ₯: {str(e)}") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: Ollama URL: {OLLAMA_BASE_URL}") | |
| raise | |
| if not analysis_result: | |
| print(f"[Parent Chunk μμ±] β οΈ κ²½κ³ : λΆμ κ²°κ³Όκ° λΉμ΄μμ΅λλ€.") | |
| return None | |
| print(f"[Parent Chunk μμ±] λΆμ κ²°κ³Ό μμ μλ£: {len(analysis_result)}μ") | |
| # λΆμ κ²°κ³Ό νμ± | |
| world_view = "" | |
| characters = "" | |
| story = "" | |
| episodes = "" | |
| others = "" | |
| # κ° μΉμ μΆμΆ | |
| sections = { | |
| 'world_view': ['## μΈκ³κ΄ μ€λͺ ', '## μΈκ³κ΄', 'μΈκ³κ΄ μ€λͺ '], | |
| 'characters': ['## μ£Όμ μΊλ¦ν° λΆμ', '## μ£Όμ μΊλ¦ν°', 'μ£Όμ μΊλ¦ν° λΆμ', '## μΊλ¦ν°'], | |
| 'story': ['## μ£Όμ μ€ν 리 λΆμ', '## μ£Όμ μ€ν 리', 'μ£Όμ μ€ν 리 λΆμ', '## μ€ν 리'], | |
| 'episodes': ['## μ£Όμ μνΌμλ λΆμ', '## μ£Όμ μνΌμλ', 'μ£Όμ μνΌμλ λΆμ', '## μνΌμλ'], | |
| 'others': ['## κΈ°ν', 'κΈ°ν'] | |
| } | |
| lines = analysis_result.split('\n') | |
| current_section = None | |
| current_content = [] | |
| for line in lines: | |
| line_stripped = line.strip() | |
| # μΉμ ν€λ νμΈ | |
| section_found = False | |
| for section_key, section_headers in sections.items(): | |
| for header in section_headers: | |
| if header in line_stripped: | |
| # μ΄μ μΉμ μ μ₯ | |
| if current_section: | |
| if current_section == 'world_view': | |
| world_view = '\n'.join(current_content).strip() | |
| elif current_section == 'characters': | |
| characters = '\n'.join(current_content).strip() | |
| elif current_section == 'story': | |
| story = '\n'.join(current_content).strip() | |
| elif current_section == 'episodes': | |
| episodes = '\n'.join(current_content).strip() | |
| elif current_section == 'others': | |
| others = '\n'.join(current_content).strip() | |
| current_section = section_key | |
| current_content = [] | |
| section_found = True | |
| break | |
| if section_found: | |
| break | |
| if not section_found and current_section: | |
| # νμ¬ μΉμ μ λ΄μ© μΆκ° | |
| if line_stripped and not line_stripped.startswith('#'): | |
| current_content.append(line) | |
| # λ§μ§λ§ μΉμ μ μ₯ | |
| if current_section: | |
| if current_section == 'world_view': | |
| world_view = '\n'.join(current_content).strip() | |
| elif current_section == 'characters': | |
| characters = '\n'.join(current_content).strip() | |
| elif current_section == 'story': | |
| story = '\n'.join(current_content).strip() | |
| elif current_section == 'episodes': | |
| episodes = '\n'.join(current_content).strip() | |
| elif current_section == 'others': | |
| others = '\n'.join(current_content).strip() | |
| # νμ± μ€ν¨ μ μ 체 λ΄μ©μ "κΈ°ν"μ μ μ₯ | |
| if not world_view and not characters and not story and not episodes: | |
| print(f"[Parent Chunk μμ±] κ²½κ³ : μΉμ νμ± μ€ν¨. μ 체 λ΄μ©μ 'κΈ°ν'μ μ μ₯ν©λλ€.") | |
| others = analysis_result.strip() | |
| # κΈ°μ‘΄ Parent Chunk μμ (μμΌλ©΄) | |
| existing_parent = ParentChunk.query.filter_by(file_id=file_id).first() | |
| if existing_parent: | |
| db.session.delete(existing_parent) | |
| db.session.commit() | |
| print(f"[Parent Chunk μμ±] κΈ°μ‘΄ Parent Chunk μμ μλ£") | |
| # Parent Chunk μμ± λ° μ μ₯ | |
| parent_chunk = ParentChunk( | |
| file_id=file_id, | |
| world_view=world_view if world_view else None, | |
| characters=characters if characters else None, | |
| story=story if story else None, | |
| episodes=episodes if episodes else None, | |
| others=others if others else None | |
| ) | |
| db.session.add(parent_chunk) | |
| db.session.commit() | |
| print(f"[Parent Chunk μμ±] β μλ£: Parent Chunkκ° μμ±λμμ΅λλ€.") | |
| print(f"[Parent Chunk μμ±] - μΈκ³κ΄: {len(world_view)}μ") | |
| print(f"[Parent Chunk μμ±] - μΊλ¦ν°: {len(characters)}μ") | |
| print(f"[Parent Chunk μμ±] - μ€ν 리: {len(story)}μ") | |
| print(f"[Parent Chunk μμ±] - μνΌμλ: {len(episodes)}μ") | |
| print(f"[Parent Chunk μμ±] - κΈ°ν: {len(others)}μ") | |
| return parent_chunk | |
| except requests.exceptions.RequestException as e: | |
| error_msg = f'Ollama API μ°κ²° μ€λ₯: {str(e)}' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| import traceback | |
| traceback.print_exc() | |
| return None | |
| except Exception as e: | |
| db.session.rollback() | |
| error_msg = f'Parent Chunk μμ± μ€ μ€λ₯: {str(e)}' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| import traceback | |
| traceback.print_exc() | |
| return None | |
| def get_parent_chunks_for_files(file_ids): | |
| """νμΌ ID λͺ©λ‘μ λν Parent Chunk μ‘°ν (λ¬Έλ§₯ νμ μ©)""" | |
| try: | |
| if not file_ids: | |
| return [] | |
| parent_chunks = [] | |
| for file_id in file_ids: | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first() | |
| if parent_chunk: | |
| parent_chunks.append(parent_chunk) | |
| return parent_chunks | |
| except Exception as e: | |
| print(f"[Parent Chunk μ‘°ν] μ€λ₯: {str(e)}") | |
| return [] | |
| def search_relevant_chunks(query, file_ids=None, model_name=None, top_k=5, min_score=1): | |
| """ | |
| μ§λ¬Έκ³Ό κ΄λ ¨λ μ²ν¬ κ²μ (λ²‘ν° κ²μ + Re-ranking) | |
| 1. λ²‘ν° κ²μμΌλ‘ μ΄κΈ° 30κ° λ¬Έμ κ²μ | |
| 2. Cross-Encoderλ‘ λ¦¬λνΉ | |
| 3. μμ top_kκ° λ°ν (κΈ°λ³Έ 5κ°) | |
| """ | |
| try: | |
| # λ²‘ν° DB λ§€λμ κ°μ Έμ€κΈ° | |
| vector_db = get_vector_db() | |
| # νμΌ ID νμ₯ (μ΄μ΄μ μ λ‘λλ νμΌ ν¬ν¨) | |
| expanded_file_ids = None | |
| if file_ids: | |
| expanded_file_ids = list(file_ids) | |
| for file_id in file_ids: | |
| # μλ³Έ νμΌμΈ κ²½μ° μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| # μλ³Έ νμΌμ΄ μ νλ κ²½μ°, μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| parent_files = UploadedFile.query.filter(UploadedFile.id.in_(file_ids), UploadedFile.parent_file_id.is_(None)).all() | |
| for parent_file in parent_files: | |
| child_files = UploadedFile.query.filter_by(parent_file_id=parent_file.id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| # λͺ¨λΈ νν°λ§μ΄ νμν κ²½μ° νμΌ ID νν°λ§ | |
| if model_name and expanded_file_ids: | |
| filtered_files = UploadedFile.query.filter( | |
| UploadedFile.id.in_(expanded_file_ids), | |
| UploadedFile.model_name == model_name | |
| ).all() | |
| expanded_file_ids = [f.id for f in filtered_files] | |
| elif model_name and not expanded_file_ids: | |
| # νμΌ IDκ° μμΌλ©΄ λͺ¨λΈ μ΄λ¦μΌλ‘λ§ νν°λ§ | |
| filtered_files = UploadedFile.query.filter_by(model_name=model_name).all() | |
| expanded_file_ids = [f.id for f in filtered_files] | |
| # 1λ¨κ³: λ²‘ν° κ²μμΌλ‘ μ΄κΈ° 30κ° λ¬Έμ κ²μ | |
| print(f"[λ²‘ν° κ²μ] 쿼리: {query[:50]}..., νμΌ ID: {expanded_file_ids if expanded_file_ids else 'λͺ¨λ νμΌ'}") | |
| vector_results = vector_db.search_chunks( | |
| query=query, | |
| file_ids=expanded_file_ids, | |
| top_k=30 | |
| ) | |
| if not vector_results: | |
| print(f"[λ²‘ν° κ²μ] κ²°κ³Ό μμ, ν€μλ κΈ°λ° κ²μμΌλ‘ λ체") | |
| # λ²‘ν° κ²μ κ²°κ³Όκ° μμΌλ©΄ κΈ°μ‘΄ ν€μλ κΈ°λ° κ²μμΌλ‘ λ체 | |
| return search_relevant_chunks_fallback(query, file_ids, model_name, top_k, min_score) | |
| # 2λ¨κ³: Cross-Encoderλ‘ λ¦¬λνΉ | |
| print(f"[리λνΉ] {len(vector_results)}κ° μ²ν¬μ λν 리λνΉ μμ...") | |
| reranked_chunks = vector_db.rerank_chunks( | |
| query=query, | |
| chunks=vector_results, | |
| top_k=top_k | |
| ) | |
| # 3λ¨κ³: DBμμ μ²ν¬ κ°μ²΄ κ°μ Έμ€κΈ° | |
| final_chunks = [] | |
| for reranked in reranked_chunks: | |
| chunk_id = reranked['chunk_id'] | |
| chunk = DocumentChunk.query.get(chunk_id) | |
| if chunk: | |
| final_chunks.append(chunk) | |
| print(f"[λ²‘ν° κ²μ + 리λνΉ] μ΅μ’ {len(final_chunks)}κ° μ²ν¬ λ°ν") | |
| return final_chunks | |
| except Exception as e: | |
| print(f"[λ²‘ν° κ²μ] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| # μ€λ₯ μ κΈ°μ‘΄ ν€μλ κΈ°λ° κ²μμΌλ‘ λ체 | |
| print(f"[λ²‘ν° κ²μ] ν€μλ κΈ°λ° κ²μμΌλ‘ λ체") | |
| return search_relevant_chunks_fallback(query, file_ids, model_name, top_k, min_score) | |
| def search_relevant_chunks_fallback(query, file_ids=None, model_name=None, top_k=25, min_score=1): | |
| """κΈ°μ‘΄ ν€μλ κΈ°λ° κ²μ (Fallback)""" | |
| try: | |
| # κ²μ 쿼리 μ€λΉ - νκΈκ³Ό μλ¬Έ λ¨μ΄ λͺ¨λ μΆμΆ | |
| query_words = set(re.findall(r'[κ°-ν£]+|\w+', query.lower())) | |
| if not query_words: | |
| return [] | |
| # μ²ν¬ μ‘°ν | |
| query_obj = DocumentChunk.query.join(UploadedFile) | |
| if file_ids: | |
| # μ νλ νμΌ IDμ κ·Έ νμΌμ μ΄μ΄μ μ λ‘λλ λͺ¨λ νμΌ ID ν¬ν¨ | |
| expanded_file_ids = list(file_ids) | |
| for file_id in file_ids: | |
| # μλ³Έ νμΌμΈ κ²½μ° μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| # μλ³Έ νμΌμ΄ μ νλ κ²½μ°, μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| parent_files = UploadedFile.query.filter(UploadedFile.id.in_(file_ids), UploadedFile.parent_file_id.is_(None)).all() | |
| for parent_file in parent_files: | |
| child_files = UploadedFile.query.filter_by(parent_file_id=parent_file.id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| query_obj = query_obj.filter(UploadedFile.id.in_(expanded_file_ids)) | |
| if model_name: | |
| query_obj = query_obj.filter(UploadedFile.model_name == model_name) | |
| all_chunks = query_obj.all() | |
| if not all_chunks: | |
| return [] | |
| # κ° μ²ν¬μ κ΄λ ¨λ μ μ κ³μ° (κ°μ λ μκ³ λ¦¬μ¦) | |
| scored_chunks = [] | |
| for chunk in all_chunks: | |
| chunk_content_lower = chunk.content.lower() | |
| chunk_words = set(re.findall(r'[κ°-ν£]+|\w+', chunk_content_lower)) | |
| # 1. κ³΅ν΅ λ¨μ΄ μ (κΈ°λ³Έ μ μ) | |
| common_words = query_words & chunk_words | |
| base_score = len(common_words) | |
| # 2. 쿼리 λ¨μ΄μ λΉλ κ°μ€μΉ (μ€μν λ¨μ΄κ° λ λ§μ΄ λνλ μλ‘ λμ μ μ) | |
| frequency_score = 0 | |
| for word in query_words: | |
| frequency_score += chunk_content_lower.count(word) | |
| # 3. 쿼리 λ¨μ΄ λΉμ¨ (μ²ν¬μμ 쿼리 λ¨μ΄κ° μ°¨μ§νλ λΉμ¨) | |
| if len(chunk_words) > 0: | |
| ratio_score = len(common_words) / len(chunk_words) * 10 | |
| else: | |
| ratio_score = 0 | |
| # μ΅μ’ μ μ κ³μ° (κ°μ€μΉ μ μ©) | |
| final_score = base_score * 2 + frequency_score * 0.5 + ratio_score | |
| # μ΅μ μ μ μ΄μμΈ μ²ν¬λ§ ν¬ν¨ | |
| if final_score >= min_score: | |
| scored_chunks.append((final_score, chunk)) | |
| # μ μ μμΌλ‘ μ λ ¬νκ³ μμ kκ° μ ν | |
| scored_chunks.sort(key=lambda x: x[0], reverse=True) | |
| # top_kκ° μ ν | |
| top_chunks = [chunk for score, chunk in scored_chunks[:top_k]] | |
| return top_chunks | |
| except Exception as e: | |
| print(f"[ν€μλ κ²μ] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return [] | |
| def login(): | |
| """λ‘κ·ΈμΈ νμ΄μ§""" | |
| if current_user.is_authenticated: | |
| # κ΄λ¦¬μμΈ κ²½μ° κ΄λ¦¬μ νμ΄μ§λ‘ 리λ€μ΄λ νΈ | |
| if current_user.is_admin: | |
| return redirect(url_for('main.admin')) | |
| return redirect(url_for('main.index')) | |
| if request.method == 'POST': | |
| username = request.form.get('username', '').strip() | |
| password = request.form.get('password', '') | |
| if not username or not password: | |
| flash('μ¬μ©μλͺ κ³Ό λΉλ°λ²νΈλ₯Ό μ λ ₯ν΄μ£ΌμΈμ.', 'error') | |
| return render_template('login.html') | |
| user = User.query.filter_by(username=username).first() | |
| if user and user.check_password(password) and user.is_active: | |
| login_user(user) | |
| user.last_login = datetime.utcnow() | |
| db.session.commit() | |
| next_page = request.args.get('next') | |
| # κ΄λ¦¬μμΈ κ²½μ° κ΄λ¦¬μ νμ΄μ§λ‘ 리λ€μ΄λ νΈ | |
| if user.is_admin: | |
| return redirect(next_page) if next_page else redirect(url_for('main.admin')) | |
| return redirect(next_page) if next_page else redirect(url_for('main.index')) | |
| else: | |
| flash('μ¬μ©μλͺ λλ λΉλ°λ²νΈκ° μ¬λ°λ₯΄μ§ μμ΅λλ€.', 'error') | |
| return render_template('login.html') | |
| def logout(): | |
| """λ‘κ·Έμμ""" | |
| logout_user() | |
| flash('λ‘κ·Έμμλμμ΅λλ€.', 'info') | |
| return redirect(url_for('main.login')) | |
| def index(): | |
| return render_template('index.html') | |
| def admin(): | |
| """κ΄λ¦¬μ νμ΄μ§""" | |
| users = User.query.order_by(User.created_at.desc()).all() | |
| return render_template('admin.html', users=users) | |
| def admin_messages(): | |
| """κ΄λ¦¬μ λ©μμ§ νμΈ νμ΄μ§""" | |
| return render_template('admin_messages.html') | |
| def admin_webnovels(): | |
| """μΉμμ€ κ΄λ¦¬ νμ΄μ§""" | |
| return render_template('admin_webnovels.html') | |
| def get_users(): | |
| """μ¬μ©μ λͺ©λ‘ API""" | |
| try: | |
| users = User.query.order_by(User.created_at.desc()).all() | |
| return jsonify({ | |
| 'users': [user.to_dict() for user in users] | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'μ¬μ©μ λͺ©λ‘ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def create_user(): | |
| """μ¬μ©μ μμ± API""" | |
| try: | |
| data = request.json | |
| username = data.get('username', '').strip() | |
| nickname = data.get('nickname', '').strip() | |
| password = data.get('password', '') | |
| is_admin = data.get('is_admin', False) | |
| if not username or not password: | |
| return jsonify({'error': 'μ¬μ©μλͺ κ³Ό λΉλ°λ²νΈλ₯Ό μ λ ₯ν΄μ£ΌμΈμ.'}), 400 | |
| if User.query.filter_by(username=username).first(): | |
| return jsonify({'error': 'μ΄λ―Έ μ‘΄μ¬νλ μ¬μ©μλͺ μ λλ€.'}), 400 | |
| user = User(username=username, nickname=nickname if nickname else None, is_admin=is_admin, is_active=True) | |
| user.set_password(password) | |
| db.session.add(user) | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'μ¬μ©μκ° μ±κ³΅μ μΌλ‘ μμ±λμμ΅λλ€.', | |
| 'user': user.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'μ¬μ©μ μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def update_user(user_id): | |
| """μ¬μ©μ μ 보 μμ API""" | |
| try: | |
| user = User.query.get_or_404(user_id) | |
| data = request.json | |
| # μκΈ° μμ μ κ΄λ¦¬μ κΆνμ μ κ±°νλ κ²μ λ°©μ§ | |
| if user_id == current_user.id and data.get('is_admin') == False: | |
| return jsonify({'error': 'μκΈ° μμ μ κ΄λ¦¬μ κΆνμ μ κ±°ν μ μμ΅λλ€.'}), 400 | |
| if 'username' in data: | |
| new_username = data['username'].strip() | |
| if new_username != user.username: | |
| if User.query.filter_by(username=new_username).first(): | |
| return jsonify({'error': 'μ΄λ―Έ μ‘΄μ¬νλ μ¬μ©μλͺ μ λλ€.'}), 400 | |
| user.username = new_username | |
| if 'nickname' in data: | |
| user.nickname = data['nickname'].strip() if data['nickname'] else None | |
| if 'password' in data and data['password']: | |
| user.set_password(data['password']) | |
| if 'is_admin' in data: | |
| user.is_admin = data['is_admin'] | |
| if 'is_active' in data: | |
| user.is_active = data['is_active'] | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'μ¬μ©μ μ λ³΄κ° μ±κ³΅μ μΌλ‘ μμ λμμ΅λλ€.', | |
| 'user': user.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'μ¬μ©μ μ 보 μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_all_messages(): | |
| """μ 체 λ©μμ§ μ‘°ν (κ΄λ¦¬μμ©)""" | |
| try: | |
| user_id = request.args.get('user_id', type=int) | |
| session_id = request.args.get('session_id', type=int) | |
| page = request.args.get('page', 1, type=int) | |
| per_page = request.args.get('per_page', 50, type=int) | |
| query = ChatMessage.query.join(ChatSession) | |
| if user_id: | |
| query = query.filter(ChatSession.user_id == user_id) | |
| if session_id: | |
| query = query.filter(ChatMessage.session_id == session_id) | |
| messages = query.order_by(ChatMessage.created_at.desc())\ | |
| .paginate(page=page, per_page=per_page, error_out=False) | |
| return jsonify({ | |
| 'messages': [msg.to_dict() for msg in messages.items], | |
| 'total': messages.total, | |
| 'pages': messages.pages, | |
| 'current_page': page | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'λ©μμ§ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_all_sessions(): | |
| """μ 체 λν μΈμ μ‘°ν (κ΄λ¦¬μμ©)""" | |
| try: | |
| user_id = request.args.get('user_id', type=int) | |
| page = request.args.get('page', 1, type=int) | |
| per_page = request.args.get('per_page', 50, type=int) | |
| query = ChatSession.query | |
| if user_id: | |
| query = query.filter(ChatSession.user_id == user_id) | |
| sessions = query.order_by(ChatSession.updated_at.desc())\ | |
| .paginate(page=page, per_page=per_page, error_out=False) | |
| sessions_data = [] | |
| for session in sessions.items: | |
| session_dict = session.to_dict() | |
| session_dict['username'] = session.user.username if session.user else 'Unknown' | |
| session_dict['nickname'] = session.user.nickname if session.user else None | |
| sessions_data.append(session_dict) | |
| return jsonify({ | |
| 'sessions': sessions_data, | |
| 'total': sessions.total, | |
| 'pages': sessions.pages, | |
| 'current_page': page | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'λν μΈμ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def delete_user(user_id): | |
| """μ¬μ©μ μμ API""" | |
| try: | |
| user = User.query.get_or_404(user_id) | |
| # μκΈ° μμ μ μμ νλ κ²μ λ°©μ§ | |
| if user_id == current_user.id: | |
| return jsonify({'error': 'μκΈ° μμ μ μμ ν μ μμ΅λλ€.'}), 400 | |
| db.session.delete(user) | |
| db.session.commit() | |
| return jsonify({'message': 'μ¬μ©μκ° μ±κ³΅μ μΌλ‘ μμ λμμ΅λλ€.'}), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'μ¬μ©μ μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_gemini_api_key(): | |
| """Gemini API ν€ μ‘°ν""" | |
| try: | |
| # SystemConfigμμ API ν€ κ°μ Έμ€κΈ° (ν μ΄λΈμ΄ μμΌλ©΄ λΉ λ¬Έμμ΄ λ°ν) | |
| api_key = SystemConfig.get_config('gemini_api_key', '') | |
| # 보μμ μν΄ λ§μ€νΉλ κ° λ°ν (μ²μ 8μλ§ νμ) | |
| masked_key = api_key[:8] + '...' if api_key and len(api_key) > 8 else '' | |
| return jsonify({ | |
| 'has_api_key': bool(api_key), | |
| 'masked_key': masked_key | |
| }), 200 | |
| except Exception as e: | |
| print(f"[Gemini API ν€ μ‘°ν] μ€λ₯: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': f'API ν€ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def set_gemini_api_key(): | |
| """Gemini API ν€ μ μ₯/μ λ°μ΄νΈ""" | |
| try: | |
| if not request.is_json: | |
| return jsonify({'error': 'Content-Typeμ΄ application/jsonμ΄ μλλλ€.'}), 400 | |
| data = request.json | |
| if not data: | |
| return jsonify({'error': 'μμ² λ°μ΄ν°κ° μμ΅λλ€.'}), 400 | |
| api_key = data.get('api_key', '').strip() | |
| if not api_key: | |
| return jsonify({'error': 'API ν€λ₯Ό μ λ ₯ν΄μ£ΌμΈμ.'}), 400 | |
| # API ν€ μ μ₯ (SystemConfig.set_config λ΄λΆμμ ν μ΄λΈ μμ± μ²λ¦¬) | |
| SystemConfig.set_config( | |
| key='gemini_api_key', | |
| value=api_key, | |
| description='Google Gemini API ν€' | |
| ) | |
| # Gemini ν΄λΌμ΄μΈνΈμ API ν€ μ¬λ‘λ μλ¦Ό | |
| try: | |
| from app.gemini_client import reset_gemini_client | |
| reset_gemini_client() | |
| print(f"[Gemini] API ν€κ° μ λ°μ΄νΈλμ΄ ν΄λΌμ΄μΈνΈκ° μ¬λ‘λλμμ΅λλ€.") | |
| except Exception as e: | |
| print(f"[Gemini] API ν€ μ¬λ‘λ μ€ν¨: {e}") | |
| return jsonify({ | |
| 'message': 'Gemini API ν€κ° μ±κ³΅μ μΌλ‘ μ μ₯λμμ΅λλ€.', | |
| 'has_api_key': True | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| print(f"[Gemini API ν€ μ μ₯] μ€λ₯: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': f'API ν€ μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_ollama_models(): | |
| """Ollama λ° Geminiμμ μ¬μ© κ°λ₯ν λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ°""" | |
| try: | |
| all_models = [] | |
| # 1. Ollama λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° | |
| try: | |
| response = requests.get(f'{OLLAMA_BASE_URL}/api/tags', timeout=5) | |
| if response.status_code == 200: | |
| data = response.json() | |
| ollama_models = [{'name': model['name'], 'type': 'ollama'} for model in data.get('models', [])] | |
| all_models.extend(ollama_models) | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ {len(ollama_models)}κ° μΆκ°") | |
| except Exception as e: | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ λͺ©λ‘ μ‘°ν μ€ν¨: {e}") | |
| # 2. Gemini λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° | |
| try: | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| gemini_models = gemini_client.get_available_models() | |
| gemini_model_list = [{'name': f'gemini:{model_name}', 'type': 'gemini'} for model_name in gemini_models] | |
| all_models.extend(gemini_model_list) | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ {len(gemini_model_list)}κ° μΆκ°") | |
| else: | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini API ν€κ° μ€μ λμ§ μμ Gemini λͺ¨λΈμ λΆλ¬μ¬ μ μμ΅λλ€.") | |
| except Exception as e: | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ λͺ©λ‘ μ‘°ν μ€ν¨: {e}") | |
| if all_models: | |
| return jsonify({'models': all_models}) | |
| else: | |
| return jsonify({'error': 'μ¬μ© κ°λ₯ν λͺ¨λΈμ΄ μμ΅λλ€. Ollamaκ° μ€ν μ€μΈμ§, λλ Gemini API ν€κ° μ€μ λμλμ§ νμΈνμΈμ.', 'models': []}), 500 | |
| except Exception as e: | |
| return jsonify({'error': f'λͺ¨λΈ λͺ©λ‘μ κ°μ Έμ€λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}', 'models': []}), 500 | |
| def chat(): | |
| """μ±ν API μλν¬μΈνΈ""" | |
| try: | |
| data = request.json | |
| message = data.get('message', '') | |
| model = data.get('model', '') | |
| file_ids = [int(fid) for fid in data.get('file_ids', []) if fid] # μ νν μΉμμ€ νμΌ ID λͺ©λ‘ | |
| session_id = data.get('session_id', None) # λν μΈμ ID (μ μλ‘ λ³ν) | |
| if not message: | |
| return jsonify({'error': 'λ©μμ§κ° νμν©λλ€.'}), 400 | |
| # λͺ¨λΈμ΄ μ νλ κ²½μ° Ollama μ¬μ© | |
| if model: | |
| try: | |
| # RAG: μ§λ¬Έκ³Ό κ΄λ ¨λ μ²ν¬ κ²μ | |
| context = "" | |
| use_rag = True # RAG μ¬μ© μ¬λΆ | |
| if use_rag: | |
| print(f"\n[RAG κ²μ] λͺ¨λΈ: {model}, μ§λ¬Έ: {message[:50]}...") | |
| print(f"[RAG κ²μ] μ νλ νμΌ ID: {file_ids if file_ids else 'μμ (λͺ¨λ νμΌ κ²μ)'}") | |
| # 1λ¨κ³: Parent Chunkλ‘ λ¬Έλ§₯ νμ | |
| parent_chunks = [] | |
| if file_ids: | |
| print(f"[RAG κ²μ 1λ¨κ³] Parent Chunk μ‘°ν μμ...") | |
| parent_chunks = get_parent_chunks_for_files(file_ids) | |
| print(f"[RAG κ²μ 1λ¨κ³] Parent Chunk μ‘°ν μλ£: {len(parent_chunks)}κ° νμΌ") | |
| # 2λ¨κ³: λ²‘ν° κ²μ + 리λνΉμΌλ‘ Child Chunk μ λ° κ²μ | |
| print(f"[RAG κ²μ 2λ¨κ³] λ²‘ν° κ²μ + 리λνΉ μμ...") | |
| relevant_chunks = search_relevant_chunks( | |
| query=message, | |
| file_ids=file_ids if file_ids else None, | |
| model_name=model, | |
| top_k=5, # 리λνΉ ν μμ 5κ°λ§ μ ν | |
| min_score=0.5 # μ΅μ μ μ μκ³κ° | |
| ) | |
| print(f"[RAG κ²μ 2λ¨κ³] λ²‘ν° κ²μ + 리λνΉ μλ£: {len(relevant_chunks)}κ° μ²ν¬ (μμ 5κ°)") | |
| # 컨ν μ€νΈ κ΅¬μ± | |
| context_parts = [] | |
| # Parent Chunk μ 보 μΆκ° (λ¬Έλ§₯ νμ μ©) | |
| if parent_chunks: | |
| parent_context_sections = [] | |
| for parent_chunk in parent_chunks: | |
| file = parent_chunk.file | |
| file_info = f"\n=== {file.original_filename} μ 체 κ°μ ===\n" | |
| sections = [] | |
| if parent_chunk.world_view: | |
| sections.append(f"[μΈκ³κ΄]\n{parent_chunk.world_view}") | |
| if parent_chunk.characters: | |
| sections.append(f"[μ£Όμ μΊλ¦ν°]\n{parent_chunk.characters}") | |
| if parent_chunk.story: | |
| sections.append(f"[μ£Όμ μ€ν 리]\n{parent_chunk.story}") | |
| if parent_chunk.episodes: | |
| sections.append(f"[μ£Όμ μνΌμλ]\n{parent_chunk.episodes}") | |
| if parent_chunk.others: | |
| sections.append(f"[κΈ°ν μ 보]\n{parent_chunk.others}") | |
| if sections: | |
| parent_context_sections.append(file_info + "\n\n".join(sections)) | |
| if parent_context_sections: | |
| parent_context = "\n\n".join(parent_context_sections) | |
| context_parts.append(f"λ€μμ μΉμμ€μ μ 체μ μΈ λ¬Έλ§₯κ³Ό κ°μμ λλ€:\n\n{parent_context}") | |
| print(f"[RAG κ²μ] Parent Chunk 컨ν μ€νΈ μΆκ°: {len(parent_context)}μ") | |
| # Child Chunk μ 보 μΆκ° (μ λ° κ²μ κ²°κ³Ό) | |
| if relevant_chunks: | |
| child_context_parts = [] | |
| seen_files = set() | |
| for chunk in relevant_chunks: | |
| file = chunk.file | |
| if file.original_filename not in seen_files: | |
| seen_files.add(file.original_filename) | |
| print(f"[RAG κ²μ] μ¬μ©λ νμΌ: {file.original_filename} (λͺ¨λΈ: {file.model_name})") | |
| child_context_parts.append(f"[{file.original_filename} - μ²ν¬ {chunk.chunk_index + 1}]\n{chunk.content}") | |
| if child_context_parts: | |
| # 컨ν μ€νΈ κΈΈμ΄ νμΈ λ° μ΅μ ν | |
| full_child_context = "\n\n".join(child_context_parts) | |
| child_context_length = len(full_child_context) | |
| # Child Chunk 컨ν μ€νΈκ° λ무 κΈΈλ©΄ μΌλΆλ§ μ¬μ© (μ΅λ 15000μ) | |
| if child_context_length > 15000: | |
| truncated_parts = [] | |
| current_length = 0 | |
| for part in child_context_parts: | |
| if current_length + len(part) > 15000: | |
| break | |
| truncated_parts.append(part) | |
| current_length += len(part) | |
| full_child_context = "\n\n".join(truncated_parts) | |
| print(f"[RAG κ²μ] Child Chunk 컨ν μ€νΈ κΈΈμ΄ μ‘°μ : {child_context_length}μ β {len(full_child_context)}μ") | |
| context_parts.append(f"λ€μμ μ§λ¬Έκ³Ό κ΄λ ¨λ μΉμμ€μ ꡬ체μ μΈ λ΄μ©μ λλ€ (μ λ° κ²μ κ²°κ³Ό, μ΄ {len(relevant_chunks)}κ° μ²ν¬):\n\n{full_child_context}") | |
| print(f"[RAG κ²μ] Child Chunk 컨ν μ€νΈ μΆκ°: {len(full_child_context)}μ") | |
| # μ΅μ’ 컨ν μ€νΈ κ΅¬μ± | |
| if context_parts: | |
| full_context = "\n\n" + "\n\n---\n\n".join(context_parts) + "\n\n" | |
| # Parent Chunkμ Child Chunk λͺ¨λ μλ κ²½μ° | |
| if parent_chunks and relevant_chunks: | |
| context = f"""λ€μμ μ§λ¬Έμ λ΅νκΈ° μν μΉμμ€ μ 보μ λλ€: | |
| {full_context} | |
| μ μ 보λ₯Ό μ°Έκ³ νμ¬ λ΅λ³ν΄μ£ΌμΈμ: | |
| - λ¨Όμ μ 체μ μΈ λ¬Έλ§₯(Parent Chunk)μ μ΄ν΄νμ¬ μΉμμ€μ λ°°κ²½κ³Ό μ€μ μ νμ νμΈμ. | |
| - κ·Έ λ€μ ꡬ체μ μΈ λ΄μ©(Child Chunk)μ ν΅ν΄ μ§λ¬Έμ λν μ νν λ΅λ³μ μ 곡νμΈμ. | |
| - μΉμμ€μ λ§₯λ½κ³Ό μ€ν 리λ₯Ό κ³ λ €νμ¬ μΌκ΄μ± μλ λ΅λ³μ μμ±νμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| elif parent_chunks: | |
| # Parent Chunkλ§ μλ κ²½μ° | |
| context = f"""λ€μμ μΉμμ€μ μ 체μ μΈ λ¬Έλ§₯κ³Ό κ°μμ λλ€: | |
| {full_context} | |
| μ μ 보λ₯Ό μ°Έκ³ νμ¬ μ§λ¬Έμ λ΅λ³ν΄μ£ΌμΈμ. μΉμμ€μ λ°°κ²½κ³Ό μ€μ μ κ³ λ €νμ¬ λ΅λ³νμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| else: | |
| # Child Chunkλ§ μλ κ²½μ° | |
| context = f"""λ€μμ μ§λ¬Έκ³Ό κ΄λ ¨λ μΉμμ€μ ꡬ체μ μΈ λ΄μ©μ λλ€: | |
| {full_context} | |
| μ λ΄μ©μ μΆ©λΆν μ°Έκ³ νμ¬ λ€μ μ§λ¬Έμ μ ννκ³ μμΈνκ² λ΅λ³ν΄μ£ΌμΈμ. μΉμμ€μ λ§₯λ½κ³Ό μ€ν 리λ₯Ό κ³ λ €νμ¬ λ΅λ³ν΄μ£ΌμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| context += message | |
| print(f"[RAG κ²μ] μ΅μ’ 컨ν μ€νΈ μμ± μλ£ (Parent Chunk: {len(parent_chunks)}κ°, Child Chunk: {len(relevant_chunks)}κ°, μ΄ {len(context)}μ)") | |
| else: | |
| # RAG κ²μ κ²°κ³Όκ° μμΌλ©΄ κΈ°μ‘΄ λ°©μ μ¬μ© | |
| print(f"[RAG κ²μ] κ΄λ ¨ μ²ν¬λ₯Ό μ°Ύμ§ λͺ»νμ΅λλ€. μ 체 νμΌ λ΄μ© μ¬μ©") | |
| use_rag = False | |
| # RAG κ²μ κ²°κ³Όκ° μκ±°λ λΉνμ±νλ κ²½μ° κΈ°μ‘΄ λ°©μ μ¬μ© | |
| if not context and not use_rag: | |
| if file_ids: | |
| # μ νν νμΌ IDμ μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| expanded_file_ids = list(file_ids) | |
| for file_id in file_ids: | |
| # μλ³Έ νμΌμΈ κ²½μ° μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| uploaded_files = UploadedFile.query.filter( | |
| UploadedFile.id.in_(expanded_file_ids), | |
| UploadedFile.model_name == model | |
| ).all() | |
| print(f"[νμΌ μ¬μ©] μ νλ νμΌ IDλ‘ μ‘°ν (μ΄μ΄μ μ λ‘λ ν¬ν¨): {len(uploaded_files)}κ° νμΌ") | |
| else: | |
| # νμΌ IDκ° μμΌλ©΄ ν΄λΉ λͺ¨λΈμ λͺ¨λ νμΌ μ¬μ© (μλ³Έ λ° μ΄μ΄μ μ λ‘λ ν¬ν¨) | |
| uploaded_files = UploadedFile.query.filter_by(model_name=model).all() | |
| print(f"[νμΌ μ¬μ©] λͺ¨λΈ '{model}'μ λͺ¨λ νμΌ μ¬μ©: {len(uploaded_files)}κ° νμΌ") | |
| if uploaded_files: | |
| print(f"[νμΌ μ¬μ©] μ¬μ©λλ νμΌ λͺ©λ‘:") | |
| for f in uploaded_files: | |
| is_child = f.parent_file_id is not None | |
| prefix = " ββ " if is_child else " - " | |
| print(f"{prefix}{f.original_filename} (λͺ¨λΈ: {f.model_name})") | |
| context_parts = [] | |
| for file in uploaded_files: | |
| try: | |
| if os.path.exists(file.file_path): | |
| encoding = 'utf-8' | |
| try: | |
| with open(file.file_path, 'r', encoding=encoding) as f: | |
| file_content = f.read() | |
| except UnicodeDecodeError: | |
| with open(file.file_path, 'r', encoding='cp949') as f: | |
| file_content = f.read() | |
| # νμΌ λ΄μ©μ΄ λ무 κΈΈλ©΄ μΌλΆλ§ μ¬μ© (μ΅λ 20000μλ‘ μ¦κ°) | |
| if len(file_content) > 20000: | |
| file_content = file_content[:20000] + "..." | |
| context_parts.append(f"[{file.original_filename}]\n{file_content}") | |
| except Exception as e: | |
| print(f"νμΌ μ½κΈ° μ€λ₯ ({file.original_filename}): {str(e)}") | |
| continue | |
| if context_parts: | |
| context = "\n\n".join(context_parts) | |
| context = f"""λ€μμ νμ΅λ μΉμμ€ λ΄μ©μ λλ€: | |
| {context} | |
| μ λ΄μ©μ μ°Έκ³ νμ¬ λ€μ μ§λ¬Έμ λ΅λ³ν΄μ£ΌμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| # ν둬ννΈ κ΅¬μ± | |
| full_prompt = context + message if context else message | |
| # λͺ¨λΈ νμ νμΈ (Gemini λλ Ollama) | |
| is_gemini = model.startswith('gemini:') | |
| if is_gemini: | |
| # Gemini API νΈμΆ | |
| gemini_model_name = model.replace('gemini:', '') | |
| print(f"[Gemini] λͺ¨λΈ: {gemini_model_name}, μ§λ¬Έ: {message[:50]}...") | |
| gemini_client = get_gemini_client() | |
| if not gemini_client.is_configured(): | |
| return jsonify({'error': 'Gemini API ν€κ° μ€μ λμ§ μμμ΅λλ€. GEMINI_API_KEY νκ²½ λ³μλ₯Ό μ€μ νμΈμ.'}), 500 | |
| result = gemini_client.generate_response( | |
| prompt=full_prompt, | |
| model_name=gemini_model_name, | |
| temperature=0.7, | |
| max_output_tokens=8192 | |
| ) | |
| if result['error']: | |
| return jsonify({'error': result['error']}), 500 | |
| response_text = result['response'] | |
| else: | |
| # Ollama API νΈμΆ | |
| ollama_response = requests.post( | |
| f'{OLLAMA_BASE_URL}/api/generate', | |
| json={ | |
| 'model': model, | |
| 'prompt': full_prompt, | |
| 'stream': False | |
| }, | |
| timeout=120 # νμΌμ΄ λ§μ μ μμΌλ―λ‘ νμμμ μ¦κ° | |
| ) | |
| if ollama_response.status_code != 200: | |
| # μ€λ₯ μμΈ μ 보 κ°μ Έμ€κΈ° | |
| try: | |
| error_detail = ollama_response.json().get('error', ollama_response.text[:200]) | |
| except: | |
| error_detail = ollama_response.text[:200] if ollama_response.text else 'μμΈ μ 보 μμ' | |
| if ollama_response.status_code == 404: | |
| error_msg = f'λͺ¨λΈ "{model}"μ(λ₯Ό) μ°Ύμ μ μμ΅λλ€. λͺ¨λΈμ΄ Ollamaμ μ€μΉλμ΄ μλμ§ νμΈνμΈμ. (μ€λ₯: {error_detail})' | |
| else: | |
| error_msg = f'Ollama μλ² μ€λ₯: {ollama_response.status_code} (μ€λ₯: {error_detail})' | |
| return jsonify({'error': error_msg}), ollama_response.status_code | |
| ollama_data = ollama_response.json() | |
| response_text = ollama_data.get('response', 'μλ΅μ μμ±ν μ μμ΅λλ€.') | |
| # λν μΈμ μ λ©μμ§ μ μ₯ (Geminiμ Ollama 곡ν΅) | |
| session_id = data.get('session_id') | |
| session_dict = None | |
| if session_id: | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first() | |
| if session: | |
| # μ¬μ©μ λ©μμ§κ° μ΄λ―Έ μ μ₯λμ΄ μλμ§ νμΈ (μ€λ³΅ λ°©μ§) | |
| # κ°μ₯ μ΅κ·Ό λ©μμ§λ₯Ό νμΈνμ¬ μ€λ³΅ μ μ₯ λ°©μ§ | |
| latest_user_msg = ChatMessage.query.filter_by( | |
| session_id=session_id, | |
| role='user' | |
| ).order_by(ChatMessage.created_at.desc()).first() | |
| # μ΅κ·Ό 10μ΄ μ΄λ΄μ κ°μ λ΄μ©μ λ©μμ§κ° μμΌλ©΄ μ μ₯ | |
| should_save = True | |
| if latest_user_msg: | |
| time_diff = (datetime.utcnow() - latest_user_msg.created_at).total_seconds() | |
| if latest_user_msg.content == message and time_diff < 10: | |
| should_save = False | |
| print(f"[μ€λ³΅ λ°©μ§] μ΅κ·Ό {time_diff:.2f}μ΄ μ μ κ°μ λ©μμ§κ° μ μ₯λμ΄ μμ΅λλ€. μ μ₯μ 건λλλλ€.") | |
| if should_save: | |
| user_msg = ChatMessage( | |
| session_id=session_id, | |
| role='user', | |
| content=message | |
| ) | |
| db.session.add(user_msg) | |
| print(f"[λ©μμ§ μ μ₯] μ¬μ©μ λ©μμ§ μ μ₯: {message[:50]}...") | |
| # μΈμ μ λͺ© μ λ°μ΄νΈ (첫 μ¬μ©μ λ©μμ§μΈ κ²½μ°) | |
| title_needs_update = ( | |
| not session.title or | |
| session.title.strip() == '' or | |
| session.title == 'μ λν' | |
| ) | |
| if title_needs_update and message.strip(): | |
| # λ©μμ§ λ΄μ©μ μ λͺ©μΌλ‘ μ¬μ© (μ΅λ 30μ) | |
| title = message.strip()[:30] | |
| if len(message.strip()) > 30: | |
| title += '...' | |
| session.title = title | |
| print(f"[μΈμ μ λͺ©] μ λ°μ΄νΈ: '{title}' (μλ³Έ κΈΈμ΄: {len(message.strip())}μ)") | |
| elif title_needs_update: | |
| print(f"[μΈμ μ λͺ©] λ©μμ§κ° λΉμ΄μμ΄ μ λͺ©μ μ λ°μ΄νΈνμ§ μμ΅λλ€.") | |
| else: | |
| print(f"[λ©μμ§ μ μ₯] μ€λ³΅ λ©μμ§λ‘ μΈν΄ μ μ₯μ 건λλλλ€.") | |
| # AI μλ΅ μ μ₯ | |
| ai_msg = ChatMessage( | |
| session_id=session_id, | |
| role='ai', | |
| content=response_text | |
| ) | |
| db.session.add(ai_msg) | |
| session.updated_at = datetime.utcnow() | |
| db.session.commit() | |
| # μΈμ μ 보λ₯Ό μλ΅μ ν¬ν¨ (μ λͺ© μ λ°μ΄νΈ λ°μ) | |
| session_dict = session.to_dict() | |
| except Exception as e: | |
| print(f"λ©μμ§ μ μ₯ μ€λ₯: {str(e)}") | |
| db.session.rollback() | |
| session_dict = None | |
| response_data = {'response': response_text, 'session_id': session_id} | |
| if session_dict: | |
| response_data['session'] = session_dict | |
| return jsonify(response_data) | |
| except requests.exceptions.ConnectionError: | |
| return jsonify({'error': 'Ollama μλ²μ μ°κ²°ν μ μμ΅λλ€. Ollamaκ° μ€ν μ€μΈμ§ νμΈνμΈμ.'}), 503 | |
| except requests.exceptions.Timeout: | |
| return jsonify({'error': 'μλ΅ μκ°μ΄ μ΄κ³Όλμμ΅λλ€. λ μ§§μ λ©μμ§λ₯Ό μλν΄λ³΄μΈμ.'}), 504 | |
| except Exception as e: | |
| return jsonify({'error': f'Ollama ν΅μ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| else: | |
| # λͺ¨λΈμ΄ μ νλμ§ μμ κ²½μ° κΈ°λ³Έ μλ΅ | |
| response_text = f"μλ νμΈμ! '{message}'μ λν λ΅λ³μ μ€λΉ μ€μ λλ€.\n\nμ’μΈ‘ νλ¨μμ λ‘컬 AI λͺ¨λΈμ μ ννλ©΄ λ μ νν λ΅λ³μ μ 곡ν μ μμ΅λλ€." | |
| return jsonify({'response': response_text}) | |
| except Exception as e: | |
| return jsonify({'error': f'μ±ν μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def upload_file(): | |
| """μΉμμ€ νμΌ μ λ‘λ""" | |
| import sys | |
| import traceback | |
| # λͺ¨λ μΆλ ₯μ μ¦μ νλ¬μνμ¬ λ‘κ·Έκ° λ°λ‘ 보μ΄λλ‘ | |
| def log_print(*args, **kwargs): | |
| print(*args, **kwargs) | |
| sys.stdout.flush() | |
| try: | |
| log_print(f"\n{'='*60}") | |
| log_print(f"=== νμΌ μ λ‘λ μμ² μμ ===") | |
| log_print(f"μμ² λ©μλ: {request.method}") | |
| log_print(f"Content-Type: {request.content_type}") | |
| log_print(f"Content-Length: {request.content_length}") | |
| log_print(f"Form λ°μ΄ν° ν€: {list(request.form.keys())}") | |
| log_print(f"Files ν€: {list(request.files.keys())}") | |
| log_print(f"μ¬μ©μ: {current_user.username if current_user else 'None'}") | |
| log_print(f"{'='*60}\n") | |
| # μ λ‘λ ν΄λ νμΈ λ° μμ± | |
| try: | |
| ensure_upload_folder() | |
| log_print(f"[1/8] μ λ‘λ ν΄λ νμΈ μλ£: {UPLOAD_FOLDER}") | |
| except Exception as e: | |
| error_msg = f'μ λ‘λ ν΄λλ₯Ό μ€λΉν μ μμ΅λλ€: {str(e)}' | |
| log_print(f"[ERROR] {error_msg}") | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg, 'step': 'folder_check'}), 500 | |
| if 'file' not in request.files: | |
| error_msg = 'νμΌμ΄ μμ΅λλ€.' | |
| log_print(f"[ERROR] {error_msg}") | |
| log_print(f"μ¬μ© κ°λ₯ν ν€: {list(request.files.keys())}") | |
| return jsonify({'error': error_msg, 'step': 'file_check'}), 400 | |
| file = request.files['file'] | |
| model_name = request.form.get('model_name', '').strip() | |
| parent_file_id = request.form.get('parent_file_id', None) # μ΄μ΄μ μ λ‘λν κ²½μ° μλ³Έ νμΌ ID | |
| log_print(f"[2/8] νμΌ μμ : {file.filename if file else 'None'}") | |
| log_print(f"[2/8] λͺ¨λΈλͺ : {model_name if model_name else 'None (λΉμ΄μμ)'}") | |
| log_print(f"[2/8] μ΄μ΄μ μ λ‘λ: {parent_file_id if parent_file_id else 'μλμ€'}") | |
| if file.filename == '': | |
| error_msg = 'νμΌλͺ μ΄ μμ΅λλ€.' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'filename_check'}), 400 | |
| # λͺ¨λΈλͺ κ²μ¦ | |
| if not model_name: | |
| error_msg = 'AI λͺ¨λΈμ μ νν΄μ£ΌμΈμ.' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'model_check'}), 400 | |
| # parent_file_id κ²μ¦ (μ΄μ΄μ μ λ‘λμΈ κ²½μ°) | |
| parent_file = None | |
| if parent_file_id: | |
| try: | |
| parent_file_id = int(parent_file_id) | |
| parent_file = UploadedFile.query.filter_by( | |
| id=parent_file_id, | |
| uploaded_by=current_user.id | |
| ).first() | |
| if not parent_file: | |
| error_msg = 'μλ³Έ νμΌμ μ°Ύμ μ μμ΅λλ€.' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'parent_file_check'}), 404 | |
| # κ°μ λͺ¨λΈμΈμ§ νμΈ | |
| if parent_file.model_name != model_name: | |
| error_msg = 'κ°μ λͺ¨λΈμ νμΌμλ§ μ΄μ΄μ μ λ‘λν μ μμ΅λλ€.' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'model_mismatch'}), 400 | |
| log_print(f"[μ΄μ΄μ μ λ‘λ] μλ³Έ νμΌ: {parent_file.original_filename} (ID: {parent_file_id})") | |
| except (ValueError, TypeError): | |
| parent_file_id = None | |
| log_print(f"[κ²½κ³ ] μλͺ»λ parent_file_id: {parent_file_id}") | |
| log_print(f"[3/8] μ λ‘λ μλ: {file.filename}, λͺ¨λΈ: {model_name}") | |
| if not allowed_file(file.filename): | |
| error_msg = f'νμ©λμ§ μμ νμΌ νμμ λλ€. νμ© νμ: {", ".join(ALLOWED_EXTENSIONS)}' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'file_type_check'}), 400 | |
| log_print(f"[4/8] νμΌ νμ νμΈ μλ£: {file.filename}") | |
| # νμΌ ν¬κΈ° νμΈ (Content-Length ν€λ μ¬μ©) | |
| file_size = 0 | |
| try: | |
| # Content-Length ν€λ νμΈ | |
| if request.content_length: | |
| file_size = request.content_length | |
| print(f"Content-Lengthλ‘ νμΌ ν¬κΈ° νμΈ: {file_size} bytes") | |
| else: | |
| # Content-Lengthκ° μμΌλ©΄ νμΌ μ€νΈλ¦Όμμ ν¬κΈ° νμΈ μλ | |
| try: | |
| # νμΌ μ€νΈλ¦Όμ νμ¬ μμΉ μ μ₯ | |
| current_pos = file.tell() | |
| # νμΌ λμΌλ‘ μ΄λ | |
| file.seek(0, os.SEEK_END) | |
| file_size = file.tell() | |
| # μλ μμΉλ‘ 볡μ | |
| file.seek(current_pos, os.SEEK_SET) | |
| print(f"νμΌ μ€νΈλ¦ΌμΌλ‘ ν¬κΈ° νμΈ: {file_size} bytes") | |
| except (AttributeError, IOError, OSError) as e: | |
| print(f"νμΌ ν¬κΈ° νμΈ μ€ν¨ (μ μ₯ ν νμΈ): {str(e)}") | |
| file_size = 0 # μ μ₯ ν νμΈνλλ‘ 0μΌλ‘ μ€μ | |
| except Exception as e: | |
| print(f"νμΌ ν¬κΈ° νμΈ μ€λ₯: {str(e)}") | |
| file_size = 0 # μ μ₯ ν νμΈνλλ‘ 0μΌλ‘ μ€μ | |
| # νμΌ ν¬κΈ° μ¬μ μ²΄ν¬ (κ°λ₯ν κ²½μ°μλ§) | |
| if file_size > 0: | |
| if file_size > 100 * 1024 * 1024: # 100MB | |
| print(f"νμΌ ν¬κΈ° μ΄κ³Ό: {file_size} bytes") | |
| return jsonify({'error': 'νμΌ ν¬κΈ°κ° λ무 ν½λλ€. μ΅λ 100MBκΉμ§ μ λ‘λ κ°λ₯ν©λλ€.'}), 400 | |
| if file_size == 0: | |
| print("λΉ νμΌ μ λ‘λ μλ") | |
| return jsonify({'error': 'λΉ νμΌμ μ λ‘λν μ μμ΅λλ€.'}), 400 | |
| # μμ ν νμΌλͺ μμ± | |
| original_filename = file.filename | |
| filename = secure_filename(original_filename) | |
| if not filename: | |
| return jsonify({'error': 'μ ν¨νμ§ μμ νμΌλͺ μ λλ€.'}), 400 | |
| unique_filename = f"{uuid.uuid4().hex}_{filename}" | |
| file_path = os.path.join(UPLOAD_FOLDER, unique_filename) | |
| # νμΌ μ μ₯ | |
| try: | |
| log_print(f"[6/8] νμΌ μ μ₯ μλ: {file_path}") | |
| file.save(file_path) | |
| log_print(f"[6/8] νμΌ μ μ₯ μλ£: {file_path}") | |
| except IOError as e: | |
| error_msg = f'νμΌ μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}' | |
| log_print(f"[ERROR] νμΌ μ μ₯ IOError: {error_msg}") | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg, 'step': 'file_save'}), 500 | |
| except PermissionError as e: | |
| error_msg = f'νμΌ μ μ₯ κΆν μ€λ₯: {str(e)}' | |
| log_print(f"[ERROR] νμΌ μ μ₯ PermissionError: {error_msg}") | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg, 'step': 'file_save_permission'}), 500 | |
| except Exception as e: | |
| error_msg = f'νμΌ μ μ₯ μ€ν¨: {str(e)}' | |
| log_print(f"[ERROR] νμΌ μ μ₯ Exception: {error_msg}") | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg, 'step': 'file_save'}), 500 | |
| # μ μ₯λ νμΌ ν¬κΈ° νμΈ | |
| if not os.path.exists(file_path): | |
| error_msg = 'νμΌμ΄ μ μ₯λμ§ μμμ΅λλ€.' | |
| print(f"νμΌ μ‘΄μ¬ νμΈ μ€ν¨: {file_path}") | |
| return jsonify({'error': error_msg}), 500 | |
| saved_file_size = os.path.getsize(file_path) | |
| if saved_file_size == 0: | |
| os.remove(file_path) # λΉ νμΌ μμ | |
| error_msg = 'νμΌμ΄ μ λλ‘ μ μ₯λμ§ μμμ΅λλ€.' | |
| print(f"λΉ νμΌ μμ : {file_path}") | |
| return jsonify({'error': error_msg}), 500 | |
| print(f"μ μ₯λ νμΌ ν¬κΈ°: {saved_file_size} bytes") | |
| # λ°μ΄ν°λ² μ΄μ€μ μ μ₯ | |
| try: | |
| log_print(f"[7/8] λ°μ΄ν°λ² μ΄μ€ μ μ₯ μλ: {original_filename}") | |
| uploaded_file = UploadedFile( | |
| filename=unique_filename, | |
| original_filename=original_filename, | |
| file_path=file_path, | |
| file_size=saved_file_size, | |
| model_name=model_name, # μ΄λ―Έ κ²μ¦λ¨ | |
| uploaded_by=current_user.id, | |
| parent_file_id=parent_file_id if parent_file else None # μ΄μ΄μ μ λ‘λμΈ κ²½μ° | |
| ) | |
| db.session.add(uploaded_file) | |
| db.session.flush() # IDλ₯Ό μ»κΈ° μν΄ flush | |
| log_print(f"[7/8] λ°μ΄ν°λ² μ΄μ€ flush μλ£, νμΌ ID: {uploaded_file.id}") | |
| # ν μ€νΈ νμΌμΈ κ²½μ° μ²ν¬λ‘ λΆν νμ¬ μ μ₯ (RAGμ©) | |
| if original_filename.lower().endswith(('.txt', '.md')): | |
| try: | |
| log_print(f"[7/8] μ²ν¬ μμ± μμ: {original_filename}") | |
| log_print(f"[7/8] νμΌ ID: {uploaded_file.id}") | |
| # νμΌ λ΄μ© μ½κΈ° | |
| encoding = 'utf-8' | |
| try: | |
| with open(file_path, 'r', encoding=encoding) as f: | |
| content = f.read() | |
| log_print(f"[7/8] UTF-8 μΈμ½λ©μΌλ‘ νμΌ μ½κΈ° μ±κ³΅: {len(content)}μ") | |
| except UnicodeDecodeError: | |
| log_print(f"[7/8] UTF-8 μΈμ½λ© μ€ν¨, CP949 μλ: {original_filename}") | |
| with open(file_path, 'r', encoding='cp949') as f: | |
| content = f.read() | |
| log_print(f"[7/8] CP949 μΈμ½λ©μΌλ‘ νμΌ μ½κΈ° μ±κ³΅: {len(content)}μ") | |
| # μ²ν¬ μμ± λ° μ μ₯ | |
| log_print(f"[7/8] μ²ν¬ μμ± ν¨μ νΈμΆ μ€...") | |
| chunk_count = create_chunks_for_file(uploaded_file.id, content) | |
| if chunk_count > 0: | |
| log_print(f"[7/8] β μ±κ³΅: νμΌ {original_filename}μ {chunk_count}κ°μ μ²ν¬λ‘ λΆν νμ΅λλ€.") | |
| print(f"νμΌ {original_filename}μ {chunk_count}κ°μ μ²ν¬λ‘ λΆν νμ΅λλ€.") | |
| else: | |
| log_print(f"[7/8] β οΈ κ²½κ³ : μ²ν¬κ° μμ±λμ§ μμμ΅λλ€. (νμΌμ΄ λ무 μ§§κ±°λ λΉμ΄μμ μ μμ΅λλ€.)") | |
| print(f"κ²½κ³ : νμΌ {original_filename}μ λν μ²ν¬κ° μμ±λμ§ μμμ΅λλ€.") | |
| # Parent Chunk μμ± (AI λΆμ) | |
| log_print(f"[7/9] Parent Chunk μμ± μμ (AI λΆμ)...") | |
| parent_chunk = create_parent_chunk_with_ai(uploaded_file.id, content, model_name) | |
| if parent_chunk: | |
| log_print(f"[7/9] β Parent Chunk μμ± μλ£: {original_filename}") | |
| print(f"Parent Chunkκ° μμ±λμμ΅λλ€: {original_filename}") | |
| else: | |
| log_print(f"[7/9] β οΈ κ²½κ³ : Parent Chunk μμ± μ€ν¨: {original_filename}") | |
| print(f"κ²½κ³ : Parent Chunk μμ±μ μ€ν¨νμ΅λλ€: {original_filename}") | |
| except Exception as e: | |
| error_msg = f"μ²ν¬ μμ± μ€ μ€λ₯: {str(e)}" | |
| log_print(f"[7/8] β μ€λ₯: {error_msg}") | |
| print(error_msg) | |
| import traceback | |
| traceback.print_exc() | |
| # μ΅μ’ μ²ν¬ κ°μ νμΈ λ° μ μ₯ | |
| chunk_count = 0 | |
| if original_filename.lower().endswith(('.txt', '.md')): | |
| chunk_count = DocumentChunk.query.filter_by(file_id=uploaded_file.id).count() | |
| log_print(f"[8/8] μ΅μ’ μ²ν¬ κ°μ νμΈ: {chunk_count}κ°") | |
| db.session.commit() | |
| log_print(f"[8/8] λ°μ΄ν°λ² μ΄μ€ μ»€λ° μλ£: {original_filename}") | |
| log_print(f"[8/8] μ°κ²°λ λͺ¨λΈ: {model_name}") | |
| log_print(f"[8/8] μμ±λ μ²ν¬ μ: {chunk_count}") | |
| # νμ΅ μν μμ½ | |
| if chunk_count > 0: | |
| log_print(f"[8/8] β AI νμ΅ μ€λΉ μλ£: {chunk_count}κ° μ²ν¬κ° μ μ₯λμ΄ RAG κ²μμ μ¬μ© κ°λ₯ν©λλ€.") | |
| else: | |
| log_print(f"[8/8] β οΈ κ²½κ³ : μ²ν¬κ° μμ±λμ§ μμ RAG κ²μμ΄ λΆκ°λ₯ν©λλ€.") | |
| log_print(f"{'='*60}") | |
| log_print(f"=== νμΌ μ λ‘λ μ±κ³΅ ===") | |
| log_print(f"{'='*60}\n") | |
| except Exception as e: | |
| db.session.rollback() | |
| error_msg = f'λ°μ΄ν°λ² μ΄μ€ μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}' | |
| log_print(f"[ERROR] λ°μ΄ν°λ² μ΄μ€ μ μ₯ μ€λ₯: {error_msg}") | |
| traceback.print_exc() | |
| # λ°μ΄ν°λ² μ΄μ€ μ μ₯ μ€ν¨ μ νμΌλ μμ | |
| if os.path.exists(file_path): | |
| try: | |
| os.remove(file_path) | |
| log_print(f"μ€λ₯λ‘ μΈν νμΌ μμ : {file_path}") | |
| except Exception as del_e: | |
| log_print(f"νμΌ μμ μ€ν¨: {str(del_e)}") | |
| return jsonify({'error': error_msg, 'step': 'database_save'}), 500 | |
| log_print(f"[8/8] μ λ‘λ μλ£ - νμΌ: {original_filename}, λͺ¨λΈ: {model_name}, ν¬κΈ°: {saved_file_size} bytes") | |
| return jsonify({ | |
| 'message': f'νμΌμ΄ μ±κ³΅μ μΌλ‘ μ λ‘λλμμ΅λλ€. (λͺ¨λΈ: {model_name})', | |
| 'file': uploaded_file.to_dict(), | |
| 'model_name': model_name, | |
| 'chunk_count': chunk_count if 'chunk_count' in locals() else 0 | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| error_msg = str(e) | |
| error_type = type(e).__name__ | |
| log_print(f"\n{'='*60}") | |
| log_print(f"=== μ λ‘λ μ²λ¦¬ μ€ μμΈ λ°μ ===") | |
| log_print(f"μμΈ νμ : {error_type}") | |
| log_print(f"μλ¬ λ©μμ§: {error_msg}") | |
| traceback.print_exc() | |
| log_print(f"{'='*60}\n") | |
| # νμΌ ν¬κΈ° μ΄κ³Ό μ€λ₯ μ²λ¦¬ | |
| if '413' in error_msg or 'Request Entity Too Large' in error_msg or error_type == 'RequestEntityTooLarge': | |
| return jsonify({'error': 'νμΌ ν¬κΈ°κ° λ무 ν½λλ€. μ΅λ 100MBκΉμ§ μ λ‘λ κ°λ₯ν©λλ€.', 'step': 'file_size'}), 413 | |
| return jsonify({'error': f'νμΌ μ λ‘λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {error_type}: {error_msg}', 'step': 'exception'}), 500 | |
| def get_files(): | |
| """μ λ‘λλ νμΌ λͺ©λ‘ μ‘°ν""" | |
| try: | |
| model_name = request.args.get('model_name', None) | |
| # μλ³Έ νμΌλ§ μ‘°ν (parent_file_idκ° NoneμΈ νμΌ) | |
| query = UploadedFile.query.filter_by(parent_file_id=None) | |
| if model_name: | |
| query = query.filter_by(model_name=model_name) | |
| print(f"[νμΌ μ‘°ν] λͺ¨λΈ '{model_name}' νν°λ§") | |
| files = query.order_by(UploadedFile.uploaded_at.desc()).all() | |
| # κ° μλ³Έ νμΌμ λν΄ μ΄μ΄μ μ λ‘λλ νμΌλ ν¬ν¨ | |
| files_with_children = [] | |
| for file in files: | |
| file_dict = file.to_dict() | |
| # μ²ν¬ κ°μ μΆκ° | |
| chunk_count = DocumentChunk.query.filter_by(file_id=file.id).count() | |
| file_dict['chunk_count'] = chunk_count | |
| # μ΄μ΄μ μ λ‘λλ νμΌλ€λ μ‘°ν | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file.id).order_by(UploadedFile.uploaded_at.asc()).all() | |
| child_files_dict = [] | |
| for child in child_files: | |
| child_dict = child.to_dict() | |
| child_chunk_count = DocumentChunk.query.filter_by(file_id=child.id).count() | |
| child_dict['chunk_count'] = child_chunk_count | |
| child_files_dict.append(child_dict) | |
| file_dict['child_files'] = child_files_dict | |
| files_with_children.append(file_dict) | |
| # λͺ¨λΈλ³ ν΅κ³ μ 보 μΆκ° (μλ³Έ νμΌλ§ μΉ΄μ΄νΈ) | |
| model_stats = {} | |
| if not model_name: | |
| # λͺ¨λ λͺ¨λΈμ ν΅κ³ (μλ³Έ νμΌλ§) | |
| all_files = UploadedFile.query.filter_by(parent_file_id=None).all() | |
| for file in all_files: | |
| model = file.model_name or 'λ―Έμ§μ ' | |
| if model not in model_stats: | |
| model_stats[model] = {'count': 0, 'total_size': 0} | |
| model_stats[model]['count'] += 1 | |
| model_stats[model]['total_size'] += file.file_size | |
| else: | |
| # νΉμ λͺ¨λΈμ ν΅κ³ | |
| model_stats[model_name] = { | |
| 'count': len(files), | |
| 'total_size': sum(f.file_size for f in files) | |
| } | |
| print(f"[νμΌ μ‘°ν] μ‘°νλ μλ³Έ νμΌ μ: {len(files)}κ°") | |
| return jsonify({ | |
| 'files': files_with_children, | |
| 'model_stats': model_stats, | |
| 'filtered_model': model_name | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'νμΌ λͺ©λ‘ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_file_chunks(file_id): | |
| """νμΌμ μ²ν¬ μ 보 μ‘°ν (νμ΅ μν νμΈμ©)""" | |
| try: | |
| file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first() | |
| if not file: | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| chunks = DocumentChunk.query.filter_by(file_id=file_id).order_by(DocumentChunk.chunk_index.asc()).all() | |
| total_chunks = len(chunks) | |
| # μν μ²ν¬ (μ²μ 3κ°) | |
| sample_chunks = [] | |
| for chunk in chunks[:3]: | |
| sample_chunks.append({ | |
| 'index': chunk.chunk_index, | |
| 'content_preview': chunk.content[:100] + '...' if len(chunk.content) > 100 else chunk.content, | |
| 'content_length': len(chunk.content) | |
| }) | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'model_name': file.model_name, | |
| 'total_chunks': total_chunks, | |
| 'sample_chunks': sample_chunks, | |
| 'learning_status': 'ready' if total_chunks > 0 else 'not_ready', | |
| 'message': f'{total_chunks}κ° μ²ν¬κ° μ μ₯λμ΄ RAG κ²μμ μ¬μ© κ°λ₯ν©λλ€.' if total_chunks > 0 else 'μ²ν¬κ° μμ±λμ§ μμ RAG κ²μμ΄ λΆκ°λ₯ν©λλ€.' | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'μ²ν¬ μ 보 μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_file_parent_chunk(file_id): | |
| """νμΌμ Parent Chunk μ‘°ν""" | |
| try: | |
| file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first() | |
| if not file: | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first() | |
| if not parent_chunk: | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'has_parent_chunk': False, | |
| 'message': 'Parent Chunkκ° μμ±λμ§ μμμ΅λλ€.' | |
| }), 200 | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'has_parent_chunk': True, | |
| 'parent_chunk': parent_chunk.to_dict(), | |
| 'message': 'Parent Chunkκ° μ‘΄μ¬ν©λλ€.' | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'Parent Chunk μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def delete_file(file_id): | |
| """μ λ‘λλ νμΌ μμ (μ°κ΄λ λͺ¨λ νμΌλ ν¨κ» μμ )""" | |
| try: | |
| file = UploadedFile.query.get_or_404(file_id) | |
| # μλ³Έ νμΌμΈ κ²½μ° (parent_file_idκ° NoneμΈ κ²½μ°) | |
| # μ΄μ΄μ μ λ‘λλ λͺ¨λ νμΌλ ν¨κ» μμ | |
| files_to_delete = [] | |
| if file.parent_file_id is None: | |
| # μλ³Έ νμΌμ΄λ©΄, μ΄μ΄μ μ λ‘λλ λͺ¨λ νμΌλ μ°Ύμμ μμ | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all() | |
| files_to_delete = [file] + child_files | |
| print(f"[νμΌ μμ ] μλ³Έ νμΌ μμ : {file.original_filename}, μ°κ΄ νμΌ {len(child_files)}κ°λ ν¨κ» μμ ") | |
| else: | |
| # μ΄μ΄μ μ λ‘λλ νμΌμ΄λ©΄ μλ³Έ νμΌλ ν¨κ» μμ | |
| parent_file = UploadedFile.query.get(file.parent_file_id) | |
| if parent_file: | |
| # μλ³Έ νμΌκ³Ό λͺ¨λ μ°κ΄ νμΌ μμ | |
| all_child_files = UploadedFile.query.filter_by(parent_file_id=file.parent_file_id).all() | |
| files_to_delete = [parent_file] + all_child_files | |
| print(f"[νμΌ μμ ] μ΄μ΄μ μ λ‘λλ νμΌ μμ : {file.original_filename}, μλ³Έ λ° μ°κ΄ νμΌ {len(all_child_files)}κ°λ ν¨κ» μμ ") | |
| else: | |
| files_to_delete = [file] | |
| deleted_count = 0 | |
| deleted_files = [] | |
| for file_to_delete in files_to_delete: | |
| try: | |
| # νμΌ μμ€ν μμ μμ | |
| if os.path.exists(file_to_delete.file_path): | |
| os.remove(file_to_delete.file_path) | |
| print(f"[νμΌ μμ ] νμΌ μμ€ν μμ μμ : {file_to_delete.file_path}") | |
| # κ΄λ ¨ Child Chunk (DocumentChunk) μμ | |
| child_chunk_count = DocumentChunk.query.filter_by(file_id=file_to_delete.id).count() | |
| if child_chunk_count > 0: | |
| DocumentChunk.query.filter_by(file_id=file_to_delete.id).delete() | |
| print(f"[νμΌ μμ ] Child Chunk {child_chunk_count}κ° μμ μλ£") | |
| # λ²‘ν° DBμμλ ν΄λΉ νμΌμ μ²ν¬ μμ | |
| try: | |
| vector_db = get_vector_db() | |
| vector_db.delete_chunks_by_file_id(file_to_delete.id) | |
| print(f"[νμΌ μμ ] λ²‘ν° DBμμ μ²ν¬ μμ μλ£") | |
| except Exception as vector_e: | |
| print(f"[νμΌ μμ ] λ²‘ν° DB μμ μ€λ₯ (무μ): {str(vector_e)}") | |
| # κ΄λ ¨ Parent Chunk μμ | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_to_delete.id).first() | |
| if parent_chunk: | |
| db.session.delete(parent_chunk) | |
| print(f"[νμΌ μμ ] Parent Chunk μμ μλ£") | |
| deleted_files.append(file_to_delete.original_filename) | |
| db.session.delete(file_to_delete) | |
| deleted_count += 1 | |
| print(f"[νμΌ μμ ] λ°μ΄ν°λ² μ΄μ€μμ νμΌ μμ μλ£: {file_to_delete.original_filename}") | |
| except Exception as e: | |
| print(f"[νμΌ μμ μ€λ₯] {file_to_delete.original_filename}: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| db.session.commit() | |
| message = f'νμΌμ΄ μ±κ³΅μ μΌλ‘ μμ λμμ΅λλ€.' | |
| if deleted_count > 1: | |
| message = f'νμΌ {deleted_count}κ°κ° μ±κ³΅μ μΌλ‘ μμ λμμ΅λλ€. (μλ³Έ λ° μ°κ΄ νμΌ ν¬ν¨)' | |
| return jsonify({ | |
| 'message': message, | |
| 'deleted_count': deleted_count, | |
| 'deleted_files': deleted_files | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'νμΌ μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_file_content(file_id): | |
| """μ λ‘λλ νμΌ λ΄μ© μ‘°ν""" | |
| try: | |
| file = UploadedFile.query.get_or_404(file_id) | |
| if not os.path.exists(file.file_path): | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| # ν μ€νΈ νμΌ μ½κΈ° | |
| encoding = 'utf-8' | |
| try: | |
| with open(file.file_path, 'r', encoding=encoding) as f: | |
| content = f.read() | |
| except UnicodeDecodeError: | |
| # UTF-8λ‘ μ½μ μ μμΌλ©΄ λ€λ₯Έ μΈμ½λ© μλ | |
| with open(file.file_path, 'r', encoding='cp949') as f: | |
| content = f.read() | |
| return jsonify({ | |
| 'content': content, | |
| 'filename': file.original_filename | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'νμΌ λ΄μ© μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_chat_sessions(): | |
| """μ¬μ©μμ λν μΈμ λͺ©λ‘ μ‘°ν (μ΅κ·Ό 20κ°λ§ νμ)""" | |
| try: | |
| sessions = ChatSession.query.filter_by(user_id=current_user.id)\ | |
| .order_by(ChatSession.updated_at.desc())\ | |
| .limit(20).all() | |
| return jsonify({ | |
| 'sessions': [session.to_dict() for session in sessions] | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'λν μΈμ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def create_chat_session(): | |
| """μ λν μΈμ μμ±""" | |
| try: | |
| data = request.json | |
| title = data.get('title', 'μ λν') | |
| model_name = data.get('model_name', None) | |
| session = ChatSession( | |
| user_id=current_user.id, | |
| title=title, | |
| model_name=model_name | |
| ) | |
| db.session.add(session) | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'λν μΈμ μ΄ μμ±λμμ΅λλ€.', | |
| 'session': session.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'λν μΈμ μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_chat_session(session_id): | |
| """λν μΈμ μμΈ μ‘°ν (λ©μμ§ ν¬ν¨)""" | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first_or_404() | |
| session_dict = session.to_dict() | |
| session_dict['messages'] = [msg.to_dict() for msg in session.messages] | |
| return jsonify({'session': session_dict}), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'λν μΈμ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def update_chat_session(session_id): | |
| """λν μΈμ μμ (μ λͺ© λ±)""" | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first_or_404() | |
| data = request.json | |
| if 'title' in data: | |
| session.title = data['title'] | |
| session.updated_at = datetime.utcnow() | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'λν μΈμ μ΄ μμ λμμ΅λλ€.', | |
| 'session': session.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'λν μΈμ μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def delete_chat_session(session_id): | |
| """λν μΈμ μμ """ | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first_or_404() | |
| db.session.delete(session) | |
| db.session.commit() | |
| return jsonify({'message': 'λν μΈμ μ΄ μμ λμμ΅λλ€.'}), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'λν μΈμ μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def add_chat_message(session_id): | |
| """λν λ©μμ§ μΆκ°""" | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first_or_404() | |
| data = request.json | |
| role = data.get('role', 'user') | |
| content = data.get('content', '') | |
| if not content: | |
| return jsonify({'error': 'λ©μμ§ λ΄μ©μ΄ νμν©λλ€.'}), 400 | |
| message = ChatMessage( | |
| session_id=session_id, | |
| role=role, | |
| content=content | |
| ) | |
| db.session.add(message) | |
| # μΈμ μ λͺ© μ λ°μ΄νΈ (첫 μ¬μ©μ λ©μμ§μΈ κ²½μ°) | |
| if not session.title or session.title == 'μ λν': | |
| if role == 'user': | |
| title = content[:30] + '...' if len(content) > 30 else content | |
| session.title = title | |
| session.updated_at = datetime.utcnow() | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'λ©μμ§κ° μΆκ°λμμ΅λλ€.', | |
| 'chat_message': message.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'λ©μμ§ μΆκ° μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |