|
""" |
|
์์ฑ์ธ์ ๊ธฐ๋ฅ์ด ์ถ๊ฐ๋ RAG ์ฑ๋ด ์ฑ |
|
""" |
|
import os |
|
import time |
|
import tempfile |
|
from typing import List, Dict, Tuple, Any, Optional |
|
import hashlib |
|
import pickle |
|
import json |
|
|
|
|
|
from config import PDF_DIRECTORY, CHUNK_SIZE, CHUNK_OVERLAP, LLM_MODEL |
|
from optimized_document_processor import OptimizedDocumentProcessor |
|
from vector_store import VectorStore |
|
from langchain.schema import Document |
|
|
|
|
|
from clova_stt import ClovaSTT |
|
|
|
|
|
try: |
|
from rag_chain import RAGChain |
|
|
|
RAG_CHAIN_AVAILABLE = True |
|
except ImportError: |
|
print("RAG ์ฒด์ธ ๋ชจ๋์ ๋ก๋ํ ์ ์์ต๋๋ค.") |
|
RAG_CHAIN_AVAILABLE = False |
|
|
|
|
|
class VoiceRAGChatApp: |
|
""" |
|
์์ฑ์ธ์ ๊ธฐ๋ฅ์ด ์ถ๊ฐ๋ RAG ์ฑ๋ด ์ ํ๋ฆฌ์ผ์ด์
|
|
""" |
|
|
|
def __init__(self): |
|
""" |
|
์์ฑ์ธ์ RAG ์ฑ๋ด ์ ํ๋ฆฌ์ผ์ด์
์ด๊ธฐํ |
|
""" |
|
|
|
self.pdf_directory = PDF_DIRECTORY |
|
self.cache_directory = "cached_data" |
|
self.index_file = os.path.join(self.cache_directory, "file_index.json") |
|
self.chunks_dir = os.path.join(self.cache_directory, "chunks") |
|
self.vector_index_dir = os.path.join(self.cache_directory, "vector_index") |
|
|
|
|
|
os.makedirs(self.pdf_directory, exist_ok=True) |
|
os.makedirs(self.cache_directory, exist_ok=True) |
|
os.makedirs(self.chunks_dir, exist_ok=True) |
|
os.makedirs(self.vector_index_dir, exist_ok=True) |
|
|
|
print(f"PDF ๋ฌธ์ ๋๋ ํ ๋ฆฌ: '{self.pdf_directory}'") |
|
print(f"์บ์ ๋๋ ํ ๋ฆฌ: '{self.cache_directory}'") |
|
|
|
|
|
self.document_processor = OptimizedDocumentProcessor( |
|
chunk_size=CHUNK_SIZE, |
|
chunk_overlap=CHUNK_OVERLAP |
|
) |
|
|
|
|
|
self.vector_store = VectorStore(use_milvus=False) |
|
|
|
|
|
self.file_index = self._load_file_index() |
|
|
|
|
|
self.documents = [] |
|
self.processed_files = [] |
|
self.is_initialized = False |
|
|
|
|
|
self.stt_client = ClovaSTT() |
|
print("์์ฑ์ธ์(STT) ๊ธฐ๋ฅ์ด ์ด๊ธฐํ๋์์ต๋๋ค.") |
|
|
|
|
|
print("๋ฌธ์ ์๋ ๋ก๋ ๋ฐ ์ฒ๋ฆฌ ์์...") |
|
self.auto_process_documents() |
|
|
|
def _load_file_index(self) -> Dict[str, Dict[str, Any]]: |
|
""" |
|
ํ์ผ ์ธ๋ฑ์ค ๋ก๋ |
|
|
|
Returns: |
|
ํ์ผ ๊ฒฝ๋ก -> ๋ฉํ๋ฐ์ดํฐ ๋งคํ |
|
""" |
|
if os.path.exists(self.index_file): |
|
try: |
|
with open(self.index_file, 'r', encoding='utf-8') as f: |
|
return json.load(f) |
|
except Exception as e: |
|
print(f"์ธ๋ฑ์ค ํ์ผ ๋ก๋ ์คํจ: {e}") |
|
return {} |
|
return {} |
|
|
|
def _save_file_index(self) -> None: |
|
""" |
|
ํ์ผ ์ธ๋ฑ์ค ์ ์ฅ |
|
""" |
|
with open(self.index_file, 'w', encoding='utf-8') as f: |
|
json.dump(self.file_index, f, ensure_ascii=False, indent=2) |
|
|
|
def _calculate_file_hash(self, file_path: str) -> str: |
|
""" |
|
ํ์ผ ํด์ ๊ณ์ฐ |
|
|
|
Args: |
|
file_path: ํ์ผ ๊ฒฝ๋ก |
|
|
|
Returns: |
|
MD5 ํด์๊ฐ |
|
""" |
|
hasher = hashlib.md5() |
|
with open(file_path, 'rb') as f: |
|
buf = f.read(65536) |
|
while len(buf) > 0: |
|
hasher.update(buf) |
|
buf = f.read(65536) |
|
return hasher.hexdigest() |
|
|
|
def _is_file_processed(self, file_path: str) -> bool: |
|
""" |
|
ํ์ผ์ด ์ด๋ฏธ ์ฒ๋ฆฌ๋์๊ณ ๋ณ๊ฒฝ๋์ง ์์๋์ง ํ์ธ |
|
|
|
Args: |
|
file_path: ํ์ผ ๊ฒฝ๋ก |
|
|
|
Returns: |
|
์ฒ๋ฆฌ ์ฌ๋ถ |
|
""" |
|
if file_path not in self.file_index: |
|
return False |
|
|
|
|
|
current_hash = self._calculate_file_hash(file_path) |
|
|
|
|
|
if self.file_index[file_path]['hash'] != current_hash: |
|
print(f"ํ์ผ ๋ณ๊ฒฝ ๊ฐ์ง: {file_path}") |
|
return False |
|
|
|
|
|
chunks_path = self.file_index[file_path]['chunks_path'] |
|
if not os.path.exists(chunks_path): |
|
return False |
|
|
|
return True |
|
|
|
def _get_chunks_path(self, file_hash: str) -> str: |
|
""" |
|
์ฒญํฌ ํ์ผ ๊ฒฝ๋ก ์์ฑ |
|
|
|
Args: |
|
file_hash: ํ์ผ ํด์๊ฐ |
|
|
|
Returns: |
|
์ฒญํฌ ํ์ผ ๊ฒฝ๋ก |
|
""" |
|
return os.path.join(self.chunks_dir, f"{file_hash}.pkl") |
|
|
|
def _save_chunks(self, file_path: str, chunks: List[Document]) -> None: |
|
""" |
|
์ฒญํฌ ๋ฐ์ดํฐ ์ ์ฅ |
|
|
|
Args: |
|
file_path: ์๋ณธ ํ์ผ ๊ฒฝ๋ก |
|
chunks: ๋ฌธ์ ์ฒญํฌ ๋ฆฌ์คํธ |
|
""" |
|
|
|
file_hash = self._calculate_file_hash(file_path) |
|
|
|
|
|
chunks_path = self._get_chunks_path(file_hash) |
|
|
|
|
|
with open(chunks_path, 'wb') as f: |
|
pickle.dump(chunks, f) |
|
|
|
|
|
self.file_index[file_path] = { |
|
'hash': file_hash, |
|
'chunks_path': chunks_path, |
|
'last_processed': time.time(), |
|
'chunks_count': len(chunks) |
|
} |
|
|
|
|
|
self._save_file_index() |
|
|
|
print(f"์ฒญํฌ ์ ์ฅ ์๋ฃ: {file_path} ({len(chunks)}๊ฐ ์ฒญํฌ)") |
|
|
|
def _load_chunks(self, file_path: str) -> List[Document]: |
|
""" |
|
์ ์ฅ๋ ์ฒญํฌ ๋ฐ์ดํฐ ๋ก๋ |
|
|
|
Args: |
|
file_path: ํ์ผ ๊ฒฝ๋ก |
|
|
|
Returns: |
|
๋ฌธ์ ์ฒญํฌ ๋ฆฌ์คํธ |
|
""" |
|
chunks_path = self.file_index[file_path]['chunks_path'] |
|
with open(chunks_path, 'rb') as f: |
|
chunks = pickle.load(f) |
|
|
|
print(f"์ฒญํฌ ๋ก๋ ์๋ฃ: {file_path} ({len(chunks)}๊ฐ ์ฒญํฌ)") |
|
return chunks |
|
|
|
def _process_pdf_file(self, file_path: str) -> List[Document]: |
|
""" |
|
PDF ํ์ผ ์ฒ๋ฆฌ - docling ์คํจ ์ PyPDFLoader ์ฌ์ฉ |
|
|
|
Args: |
|
file_path: ์ฒ๋ฆฌํ PDF ํ์ผ ๊ฒฝ๋ก |
|
|
|
Returns: |
|
์ฒ๋ฆฌ๋ ๋ฌธ์ ์ฒญํฌ ๋ฆฌ์คํธ |
|
""" |
|
try: |
|
print(f"docling์ผ๋ก ์ฒ๋ฆฌ ์๋: {file_path}") |
|
|
|
|
|
try: |
|
|
|
import signal |
|
|
|
def timeout_handler(signum, frame): |
|
raise TimeoutError("docling ์ฒ๋ฆฌ ์๊ฐ ์ด๊ณผ") |
|
|
|
|
|
try: |
|
signal.signal(signal.SIGALRM, timeout_handler) |
|
signal.alarm(60) |
|
except: |
|
pass |
|
|
|
|
|
chunks = self.document_processor.process_pdf(file_path, use_docling=True) |
|
|
|
|
|
try: |
|
signal.alarm(0) |
|
except: |
|
pass |
|
|
|
return chunks |
|
|
|
except Exception as e: |
|
|
|
error_str = str(e) |
|
if "Invalid code point" in error_str or "RuntimeError" in error_str: |
|
print(f"docling ์ฒ๋ฆฌ ์ค๋ฅ (์ฝ๋ ํฌ์ธํธ ๋ฌธ์ ): {error_str}") |
|
print("PyPDFLoader๋ก ๋์ฒดํฉ๋๋ค.") |
|
else: |
|
print(f"docling ์ฒ๋ฆฌ ์ค๋ฅ: {error_str}") |
|
print("PyPDFLoader๋ก ๋์ฒดํฉ๋๋ค.") |
|
|
|
|
|
try: |
|
return self.document_processor.process_pdf(file_path, use_docling=False) |
|
except Exception as inner_e: |
|
print(f"PyPDFLoader ์ฒ๋ฆฌ ์ค๋ฅ: {inner_e}") |
|
raise |
|
|
|
except Exception as e: |
|
print(f"PDF ์ฒ๋ฆฌ ์ค ์ฌ๊ฐํ ์ค๋ฅ: {e}") |
|
|
|
return [] |
|
|
|
def auto_process_documents(self) -> str: |
|
""" |
|
documents ํด๋์ PDF ํ์ผ ์๋ ์ฒ๋ฆฌ |
|
|
|
Returns: |
|
์ฒ๋ฆฌ ๊ฒฐ๊ณผ ๋ฉ์์ง |
|
""" |
|
try: |
|
start_time = time.time() |
|
|
|
|
|
pdf_files = [] |
|
for filename in os.listdir(self.pdf_directory): |
|
if filename.lower().endswith('.pdf'): |
|
pdf_files.append(os.path.join(self.pdf_directory, filename)) |
|
|
|
if not pdf_files: |
|
return f"'{self.pdf_directory}' ํด๋์ PDF ํ์ผ์ด ์์ต๋๋ค." |
|
|
|
print(f"๋ฐ๊ฒฌ๋ PDF ํ์ผ: {len(pdf_files)}๊ฐ") |
|
|
|
|
|
new_files = [] |
|
updated_files = [] |
|
cached_files = [] |
|
failed_files = [] |
|
all_chunks = [] |
|
|
|
for file_path in pdf_files: |
|
if self._is_file_processed(file_path): |
|
|
|
chunks = self._load_chunks(file_path) |
|
all_chunks.extend(chunks) |
|
cached_files.append(file_path) |
|
self.processed_files.append(os.path.basename(file_path)) |
|
else: |
|
|
|
print(f"์ฒ๋ฆฌ ์ค: {file_path}") |
|
|
|
try: |
|
|
|
chunks = self._process_pdf_file(file_path) |
|
|
|
if chunks: |
|
|
|
self._save_chunks(file_path, chunks) |
|
|
|
all_chunks.extend(chunks) |
|
if file_path in self.file_index: |
|
updated_files.append(file_path) |
|
else: |
|
new_files.append(file_path) |
|
|
|
self.processed_files.append(os.path.basename(file_path)) |
|
else: |
|
print(f"'{file_path}' ์ฒ๋ฆฌ ์คํจ: ์ถ์ถ๋ ์ฒญํฌ ์์") |
|
failed_files.append(file_path) |
|
except Exception as e: |
|
print(f"'{file_path}' ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {e}") |
|
failed_files.append(file_path) |
|
|
|
|
|
self.documents = all_chunks |
|
|
|
processing_time = time.time() - start_time |
|
print(f"๋ฌธ์ ์ฒ๋ฆฌ ์๋ฃ: {len(all_chunks)}๊ฐ ์ฒญํฌ, {processing_time:.2f}์ด") |
|
|
|
|
|
if os.path.exists(self.vector_index_dir) and any(os.listdir(self.vector_index_dir)): |
|
|
|
try: |
|
print("์ ์ฅ๋ ๋ฒกํฐ ์ธ๋ฑ์ค ๋ก๋ ์ค...") |
|
vector_store_loaded = self.vector_store.load_local(self.vector_index_dir) |
|
|
|
|
|
if self.vector_store.vector_store is not None: |
|
|
|
if new_files or updated_files: |
|
print("๋ฒกํฐ ์ธ๋ฑ์ค ์
๋ฐ์ดํธ ์ค...") |
|
self.vector_store.add_documents(self.documents) |
|
|
|
print("๋ฒกํฐ ์ธ๋ฑ์ค ๋ก๋ ์๋ฃ") |
|
else: |
|
print("๋ฒกํฐ ์ธ๋ฑ์ค๋ฅผ ๋ก๋ํ์ผ๋ ์ ํจํ์ง ์์, ์๋ก ์์ฑํฉ๋๋ค.") |
|
self.vector_store.create_or_load(self.documents) |
|
|
|
except Exception as e: |
|
print(f"๋ฒกํฐ ์ธ๋ฑ์ค ๋ก๋ ์คํจ, ์๋ก ์์ฑํฉ๋๋ค: {e}") |
|
|
|
import traceback |
|
traceback.print_exc() |
|
|
|
|
|
self.vector_store.create_or_load(self.documents) |
|
else: |
|
|
|
print("์ ๋ฒกํฐ ์ธ๋ฑ์ค ์์ฑ ์ค...") |
|
self.vector_store.create_or_load(self.documents) |
|
|
|
|
|
if self.vector_store and self.vector_store.vector_store is not None: |
|
try: |
|
print(f"๋ฒกํฐ ์ธ๋ฑ์ค ์ ์ฅ ์ค: {self.vector_index_dir}") |
|
save_result = self.vector_store.save_local(self.vector_index_dir) |
|
print(f"๋ฒกํฐ ์ธ๋ฑ์ค ์ ์ฅ ์๋ฃ: {self.vector_index_dir}") |
|
except Exception as e: |
|
print(f"๋ฒกํฐ ์ธ๋ฑ์ค ์ ์ฅ ์คํจ: {e}") |
|
|
|
import traceback |
|
traceback.print_exc() |
|
else: |
|
print("๋ฒกํฐ ์ธ๋ฑ์ค๊ฐ ์ด๊ธฐํ๋์ง ์์ ์ ์ฅํ์ง ์์ต๋๋ค.") |
|
|
|
|
|
if RAG_CHAIN_AVAILABLE: |
|
self.rag_chain = RAGChain(self.vector_store) |
|
self.is_initialized = True |
|
|
|
total_time = time.time() - start_time |
|
|
|
status_message = ( |
|
f"๋ฌธ์ ์ฒ๋ฆฌ ์๋ฃ!\n" |
|
f"- ์ฒ๋ฆฌ๋ ํ์ผ: {len(self.processed_files)}๊ฐ\n" |
|
f"- ์บ์๋ ํ์ผ: {len(cached_files)}๊ฐ\n" |
|
f"- ์ ํ์ผ: {len(new_files)}๊ฐ\n" |
|
f"- ์
๋ฐ์ดํธ๋ ํ์ผ: {len(updated_files)}๊ฐ\n" |
|
f"- ์คํจํ ํ์ผ: {len(failed_files)}๊ฐ\n" |
|
f"- ์ด ์ฒญํฌ ์: {len(self.documents)}๊ฐ\n" |
|
f"- ์ฒ๋ฆฌ ์๊ฐ: {total_time:.2f}์ด\n" |
|
f"์ด์ ์ง๋ฌธํ ์ค๋น๊ฐ ๋์์ต๋๋ค!" |
|
) |
|
|
|
print(status_message) |
|
return status_message |
|
else: |
|
return "RAG ์ฒด์ธ์ ์ด๊ธฐํํ ์ ์์ต๋๋ค. ํ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ๊ฐ ์ค์น๋์ด ์๋์ง ํ์ธํ์ธ์." |
|
|
|
except Exception as e: |
|
error_message = f"๋ฌธ์ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
print(error_message) |
|
import traceback |
|
traceback.print_exc() |
|
return error_message |
|
|
|
def reset_cache(self) -> str: |
|
""" |
|
์บ์ ์ด๊ธฐํ |
|
|
|
Returns: |
|
๊ฒฐ๊ณผ ๋ฉ์์ง |
|
""" |
|
try: |
|
|
|
for filename in os.listdir(self.chunks_dir): |
|
file_path = os.path.join(self.chunks_dir, filename) |
|
if os.path.isfile(file_path): |
|
os.remove(file_path) |
|
|
|
|
|
self.file_index = {} |
|
self._save_file_index() |
|
|
|
|
|
for filename in os.listdir(self.vector_index_dir): |
|
file_path = os.path.join(self.vector_index_dir, filename) |
|
if os.path.isfile(file_path): |
|
os.remove(file_path) |
|
|
|
self.documents = [] |
|
self.processed_files = [] |
|
self.is_initialized = False |
|
|
|
return "์บ์๊ฐ ์ด๊ธฐํ๋์์ต๋๋ค. ๋ค์ ์คํ ์ ๋ชจ๋ ๋ฌธ์๊ฐ ๋ค์ ์ฒ๋ฆฌ๋ฉ๋๋ค." |
|
except Exception as e: |
|
return f"์บ์ ์ด๊ธฐํ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
|
|
def process_query(self, query: str, chat_history: List[Tuple[str, str]]) -> Tuple[str, List[Tuple[str, str]]]: |
|
""" |
|
์ฌ์ฉ์ ์ฟผ๋ฆฌ ์ฒ๋ฆฌ |
|
|
|
Args: |
|
query: ์ฌ์ฉ์ ์ง๋ฌธ |
|
chat_history: ๋ํ ๊ธฐ๋ก |
|
|
|
Returns: |
|
์๋ต ๋ฐ ์
๋ฐ์ดํธ๋ ๋ํ ๊ธฐ๋ก |
|
""" |
|
if not query: |
|
return "", chat_history |
|
|
|
if not self.is_initialized: |
|
response = "๋ฌธ์ ๋ก๋๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค. ์๋ ๋ก๋๋ฅผ ์๋ํฉ๋๋ค." |
|
chat_history.append((query, response)) |
|
|
|
|
|
try: |
|
self.auto_process_documents() |
|
if not self.is_initialized: |
|
response = "๋ฌธ์๋ฅผ ๋ก๋ํ ์ ์์ต๋๋ค. 'documents' ํด๋์ PDF ํ์ผ์ด ์๋์ง ํ์ธํ์ธ์." |
|
chat_history.append((query, response)) |
|
return "", chat_history |
|
except Exception as e: |
|
response = f"๋ฌธ์ ๋ก๋ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
chat_history.append((query, response)) |
|
return "", chat_history |
|
|
|
try: |
|
|
|
start_time = time.time() |
|
response = self.rag_chain.run(query) |
|
end_time = time.time() |
|
|
|
query_time = end_time - start_time |
|
print(f"์ฟผ๋ฆฌ ์ฒ๋ฆฌ ์๊ฐ: {query_time:.2f}์ด") |
|
|
|
chat_history.append((query, response)) |
|
return "", chat_history |
|
except Exception as e: |
|
error_msg = f"์ค๋ฅ ๋ฐ์: {str(e)}" |
|
chat_history.append((query, error_msg)) |
|
return "", chat_history |
|
|
|
def process_voice_query(self, audio, chat_history: List[Tuple[str, str]]) -> Tuple[str, List[Tuple[str, str]]]: |
|
""" |
|
์์ฑ ์ฟผ๋ฆฌ ์ฒ๋ฆฌ |
|
|
|
Args: |
|
audio: ๋
น์๋ ์ค๋์ค ๋ฐ์ดํฐ |
|
chat_history: ๋ํ ๊ธฐ๋ก |
|
|
|
Returns: |
|
์๋ต ๋ฐ ์
๋ฐ์ดํธ๋ ๋ํ ๊ธฐ๋ก |
|
""" |
|
if audio is None: |
|
return "", chat_history |
|
|
|
try: |
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: |
|
temp_path = temp_file.name |
|
temp_file.write(audio) |
|
|
|
print(f"[STT] ์์ ์ค๋์ค ํ์ผ ์์ฑ: {temp_path}") |
|
|
|
|
|
result = self.stt_client.recognize_file(temp_path) |
|
|
|
|
|
try: |
|
os.unlink(temp_path) |
|
print("[STT] ์์ ์ค๋์ค ํ์ผ ์ญ์ ๋จ") |
|
except Exception as e: |
|
print(f"[STT] ์์ ํ์ผ ์ญ์ ์คํจ: {e}") |
|
|
|
|
|
if "error" in result: |
|
error_msg = f"์์ฑ์ธ์ ์ค๋ฅ: {result.get('error')}" |
|
print(f"[STT] {error_msg}") |
|
chat_history.append(("์์ฑ ๋ฉ์์ง", error_msg)) |
|
return "", chat_history |
|
|
|
|
|
recognized_text = result.get("text", "") |
|
if not recognized_text: |
|
error_msg = "์์ฑ์ ์ธ์ํ ์ ์์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์." |
|
print("[STT] ์ธ์๋ ํ
์คํธ ์์") |
|
chat_history.append(("์์ฑ ๋ฉ์์ง", error_msg)) |
|
return "", chat_history |
|
|
|
print(f"[STT] ์ธ์๋ ํ
์คํธ: {recognized_text}") |
|
|
|
|
|
return self.process_query(f"๐ค {recognized_text}", chat_history) |
|
|
|
except Exception as e: |
|
error_msg = f"์์ฑ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
print(f"[STT] {error_msg}") |
|
chat_history.append(("์์ฑ ๋ฉ์์ง", error_msg)) |
|
return "", chat_history |
|
|
|
def launch_app(self) -> None: |
|
""" |
|
์์ฑ์ธ์ ๊ธฐ๋ฅ์ด ์ถ๊ฐ๋ Gradio ์ฑ ์คํ |
|
""" |
|
import gradio as gr |
|
|
|
with gr.Blocks(title="์์ฑ์ธ์ ๊ธฐ๋ฅ์ด ์ถ๊ฐ๋ PDF ๋ฌธ์ ๊ธฐ๋ฐ RAG ์ฑ๋ด") as app: |
|
gr.Markdown("# ์์ฑ์ธ์ ๊ธฐ๋ฅ์ด ์ถ๊ฐ๋ PDF ๋ฌธ์ ๊ธฐ๋ฐ RAG ์ฑ๋ด") |
|
gr.Markdown(f"* ์ฌ์ฉ ์ค์ธ LLM ๋ชจ๋ธ: **{LLM_MODEL}**") |
|
gr.Markdown(f"* PDF ๋ฌธ์ ํด๋: **{self.pdf_directory}**") |
|
gr.Markdown("* ๋ค์ด๋ฒ ํด๋ก๋ฐ ์์ฑ์ธ์ API ํตํฉ") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
|
|
status_box = gr.Textbox( |
|
label="๋ฌธ์ ์ฒ๋ฆฌ ์ํ", |
|
value=f"์ฒ๋ฆฌ๋ ๋ฌธ์ ({len(self.processed_files)}๊ฐ): {', '.join(self.processed_files)}", |
|
lines=5, |
|
interactive=False |
|
) |
|
|
|
|
|
refresh_button = gr.Button("๋ฌธ์ ์๋ก ์ฝ๊ธฐ", variant="primary") |
|
reset_button = gr.Button("์บ์ ์ด๊ธฐํ", variant="stop") |
|
|
|
|
|
with gr.Accordion("์บ์ ์ธ๋ถ ์ ๋ณด", open=False): |
|
file_info = "" |
|
for file_path, info in self.file_index.items(): |
|
file_info += f"- {os.path.basename(file_path)}: {info['chunks_count']}๊ฐ ์ฒญํฌ\n" |
|
|
|
cache_info = gr.Textbox( |
|
label="์บ์๋ ํ์ผ ์ ๋ณด", |
|
value=file_info or "์บ์๋ ํ์ผ์ด ์์ต๋๋ค.", |
|
lines=5, |
|
interactive=False |
|
) |
|
|
|
with gr.Column(scale=2): |
|
|
|
chatbot = gr.Chatbot( |
|
label="๋ํ ๋ด์ฉ", |
|
bubble_full_width=False, |
|
height=500, |
|
show_copy_button=True |
|
) |
|
|
|
with gr.Tabs() as input_tabs: |
|
|
|
with gr.Tab("ํ
์คํธ ์
๋ ฅ"): |
|
|
|
with gr.Row(): |
|
query_box = gr.Textbox( |
|
label="์ง๋ฌธ", |
|
placeholder="์ฒ๋ฆฌ๋ ๋ฌธ์ ๋ด์ฉ์ ๋ํด ์ง๋ฌธํ์ธ์...", |
|
lines=2, |
|
scale=4 |
|
) |
|
submit_btn = gr.Button("์ ์ก", variant="primary", scale=1) |
|
|
|
|
|
with gr.Tab("์์ฑ ์
๋ ฅ"): |
|
audio_input = gr.Audio( |
|
label="๋ง์ดํฌ ์
๋ ฅ", |
|
sources=["microphone"], |
|
type="bytes", |
|
format="wav" |
|
) |
|
voice_submit_btn = gr.Button("์์ฑ ์ง๋ฌธ ์ ์ก", variant="primary") |
|
|
|
clear_chat_button = gr.Button("๋ํ ์ด๊ธฐํ") |
|
|
|
|
|
refresh_button.click( |
|
fn=self.auto_process_documents, |
|
inputs=[], |
|
outputs=[status_box] |
|
) |
|
|
|
reset_button.click( |
|
fn=lambda: (self.reset_cache(), self.auto_process_documents()), |
|
inputs=[], |
|
outputs=[status_box] |
|
) |
|
|
|
|
|
submit_btn.click( |
|
fn=self.process_query, |
|
inputs=[query_box, chatbot], |
|
outputs=[query_box, chatbot] |
|
) |
|
|
|
|
|
query_box.submit( |
|
fn=self.process_query, |
|
inputs=[query_box, chatbot], |
|
outputs=[query_box, chatbot] |
|
) |
|
|
|
|
|
voice_submit_btn.click( |
|
fn=self.process_voice_query, |
|
inputs=[audio_input, chatbot], |
|
outputs=[audio_input, chatbot] |
|
) |
|
|
|
|
|
clear_chat_button.click( |
|
fn=lambda: [], |
|
outputs=[chatbot] |
|
) |
|
|
|
|
|
app.launch(share=False) |
|
|
|
|
|
if __name__ == "__main__": |
|
app = VoiceRAGChatApp() |
|
app.launch_app() |