diff --git "a/TransateKRtoEN.py" "b/TransateKRtoEN.py"
new file mode 100644--- /dev/null
+++ "b/TransateKRtoEN.py"
@@ -0,0 +1,11749 @@
+# TransateKRtoEN.py
+# -*- coding: utf-8 -*-
+import json
+import logging
+import shutil
+import threading
+import queue
+import uuid
+import inspect
+import os, sys, io, zipfile, time, re, mimetypes, subprocess, tiktoken
+import builtins
+import ebooklib
+from ebooklib import epub
+from bs4 import BeautifulSoup
+try:
+    from bs4 import XMLParsedAsHTMLWarning
+    import warnings
+    # Suppress the warning since we handle both HTML and XHTML content
+    warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
+except ImportError:
+    # Older versions of BeautifulSoup might not have this warning
+    pass
+from collections import Counter
+from unified_api_client import UnifiedClient, UnifiedClientError
+import hashlib
+import tempfile
+import unicodedata
+from difflib import SequenceMatcher
+import unicodedata
+import re
+import time
+from history_manager import HistoryManager
+from chapter_splitter import ChapterSplitter
+from image_translator import ImageTranslator
+from typing import Dict, List, Tuple 
+from txt_processor import TextFileProcessor
+from ai_hunter_enhanced import ImprovedAIHunterDetection
+import csv
+from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
+
+# Module-level functions for ProcessPoolExecutor compatibility
+def _check_sentence_batch_for_terms(args):
+    """Check a batch of sentences for term matches - used by ProcessPoolExecutor"""
+    batch_sentences, terms = args
+    filtered = []
+    
+    # Use pre-compiled term list for fast checking
+    for sentence in batch_sentences:
+        # Quick check using any() - stops at first match
+        if any(term in sentence for term in terms):
+            filtered.append(sentence)
+    
+    return filtered
+
+def _process_sentence_batch_for_extraction(args):
+    """Process sentences to extract terms - used by ProcessPoolExecutor"""
+    batch_sentences, batch_idx, combined_pattern, exclude_check_data = args
+    from collections import Counter
+    import re
+    
+    local_word_freq = Counter()
+    local_important = []
+    local_seen = set()
+    
+    # Rebuild the exclusion check function from data
+    honorifics_to_exclude, title_patterns_str, common_words, chinese_nums = exclude_check_data
+    title_patterns = [re.compile(p) for p in title_patterns_str]
+    
+    def should_exclude_term(term):
+        term_lower = term.lower()
+        
+        # Check if it's a common word
+        if term in common_words or term_lower in common_words:
+            return True
+        
+        # Check if it contains honorifics
+        for honorific in honorifics_to_exclude:
+            if honorific in term or (honorific.startswith('-') and term.endswith(honorific[1:])):
+                return True
+        
+        # Check if it matches title patterns
+        for pattern in title_patterns:
+            if pattern.search(term):
+                return True
+        
+        # Check if it's a number
+        if term in chinese_nums or term.isdigit():
+            return True
+        
+        return False
+    
+    for sentence in batch_sentences:
+        sentence = sentence.strip()
+        if len(sentence) < 10 or len(sentence) > 500:
+            continue
+            
+        # Find all potential terms in this sentence
+        matches = re.findall(combined_pattern, sentence)
+        
+        if matches:
+            # Filter out excluded terms
+            filtered_matches = []
+            for match in matches:
+                if not should_exclude_term(match):
+                    local_word_freq[match] += 1
+                    filtered_matches.append(match)
+            
+            # Keep sentences with valid potential terms
+            if filtered_matches:
+                sentence_key = ' '.join(sorted(filtered_matches))
+                if sentence_key not in local_seen:
+                    local_important.append(sentence)
+                    local_seen.add(sentence_key)
+    
+    return local_word_freq, local_important, local_seen, batch_idx
+from tqdm import tqdm
+
+def is_traditional_translation_api(model: str) -> bool:
+    """Check if the model is a traditional translation API"""
+    return model in ['deepl', 'google-translate', 'google-translate-free'] or model.startswith('deepl/') or model.startswith('google-translate/')
+    
+def get_chapter_terminology(is_text_file, chapter_data=None):
+    """Get appropriate terminology (Chapter/Section) based on source type"""
+    if is_text_file:
+        return "Section"
+    if chapter_data:
+        if chapter_data.get('filename', '').endswith('.txt') or chapter_data.get('is_chunk', False):
+            return "Section"
+    return "Chapter"
+# =====================================================
+# CONFIGURATION AND ENVIRONMENT MANAGEMENT
+# =====================================================
+class TranslationConfig:
+    """Centralized configuration management"""
+    def __init__(self):
+        self.MODEL = os.getenv("MODEL", "gemini-1.5-flash")
+        self.input_path = os.getenv("input_path", "default.epub")
+        self.PROFILE_NAME = os.getenv("PROFILE_NAME", "korean").lower()
+        self.CONTEXTUAL = os.getenv("CONTEXTUAL", "1") == "1"
+        self.DELAY = float(os.getenv("SEND_INTERVAL_SECONDS", "1"))
+        self.SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "").strip()
+        self.REMOVE_AI_ARTIFACTS = os.getenv("REMOVE_AI_ARTIFACTS", "0") == "1"
+        self.TEMP = float(os.getenv("TRANSLATION_TEMPERATURE", "0.3"))
+        self.HIST_LIMIT = int(os.getenv("TRANSLATION_HISTORY_LIMIT", "20"))
+        self.MAX_OUTPUT_TOKENS = int(os.getenv("MAX_OUTPUT_TOKENS", "8192"))
+        self.EMERGENCY_RESTORE = os.getenv("EMERGENCY_PARAGRAPH_RESTORE", "1") == "1"
+        self.BATCH_TRANSLATION = os.getenv("BATCH_TRANSLATION", "0") == "1"  
+        self.BATCH_SIZE = int(os.getenv("BATCH_SIZE", "10"))
+        self.ENABLE_IMAGE_TRANSLATION = os.getenv("ENABLE_IMAGE_TRANSLATION", "1") == "1"
+        self.TRANSLATE_BOOK_TITLE = os.getenv("TRANSLATE_BOOK_TITLE", "1") == "1"
+        self.DISABLE_ZERO_DETECTION = os.getenv("DISABLE_ZERO_DETECTION", "0") == "1"
+        self.ENABLE_AUTO_GLOSSARY = os.getenv("ENABLE_AUTO_GLOSSARY", "0") == "1"
+        self.COMPREHENSIVE_EXTRACTION = os.getenv("COMPREHENSIVE_EXTRACTION", "0") == "1"
+        self.MANUAL_GLOSSARY = os.getenv("MANUAL_GLOSSARY")
+        self.RETRY_TRUNCATED = os.getenv("RETRY_TRUNCATED", "0") == "1"
+        self.RETRY_DUPLICATE_BODIES = os.getenv("RETRY_DUPLICATE_BODIES", "1") == "1"
+        self.RETRY_TIMEOUT = os.getenv("RETRY_TIMEOUT", "0") == "1"
+        self.CHUNK_TIMEOUT = int(os.getenv("CHUNK_TIMEOUT", "900"))
+        self.MAX_RETRY_TOKENS = int(os.getenv("MAX_RETRY_TOKENS", "16384"))
+        self.DUPLICATE_LOOKBACK_CHAPTERS = int(os.getenv("DUPLICATE_LOOKBACK_CHAPTERS", "3"))
+        self.USE_ROLLING_SUMMARY = os.getenv("USE_ROLLING_SUMMARY", "0") == "1"
+        self.ROLLING_SUMMARY_EXCHANGES = int(os.getenv("ROLLING_SUMMARY_EXCHANGES", "5"))
+        self.ROLLING_SUMMARY_MODE = os.getenv("ROLLING_SUMMARY_MODE", "replace")
+        # New: maximum number of rolling summary entries to retain when in append mode (0 = unlimited)
+        self.ROLLING_SUMMARY_MAX_ENTRIES = int(os.getenv("ROLLING_SUMMARY_MAX_ENTRIES", "10"))
+        self.DUPLICATE_DETECTION_MODE = os.getenv("DUPLICATE_DETECTION_MODE", "basic")
+        self.AI_HUNTER_THRESHOLD = int(os.getenv("AI_HUNTER_THRESHOLD", "75"))
+        self.TRANSLATION_HISTORY_ROLLING = os.getenv("TRANSLATION_HISTORY_ROLLING", "0") == "1"
+        self.API_KEY = (os.getenv("API_KEY") or 
+                       os.getenv("OPENAI_API_KEY") or 
+                       os.getenv("OPENAI_OR_Gemini_API_KEY") or
+                       os.getenv("GEMINI_API_KEY"))
+        # NEW: Simple chapter number offset
+        self.CHAPTER_NUMBER_OFFSET = int(os.getenv("CHAPTER_NUMBER_OFFSET", "0"))
+        self.ENABLE_WATERMARK_REMOVAL = os.getenv("ENABLE_WATERMARK_REMOVAL", "1") == "1"
+        self.SAVE_CLEANED_IMAGES = os.getenv("SAVE_CLEANED_IMAGES", "1") == "1"
+        self.WATERMARK_PATTERN_THRESHOLD = int(os.getenv("WATERMARK_PATTERN_THRESHOLD", "10"))
+        self.WATERMARK_CLAHE_LIMIT = float(os.getenv("WATERMARK_CLAHE_LIMIT", "3.0"))
+        self.COMPRESSION_FACTOR = float(os.getenv("COMPRESSION_FACTOR", "1.0"))
+        
+        # Multi API key support
+        self.use_multi_api_keys = os.environ.get('USE_MULTI_API_KEYS', '0') == '1'
+        self.multi_api_keys = []
+        
+        if self.use_multi_api_keys:
+            multi_keys_json = os.environ.get('MULTI_API_KEYS', '[]')
+            try:
+                self.multi_api_keys = json.loads(multi_keys_json)
+                print(f"Loaded {len(self.multi_api_keys)} API keys for multi-key mode")
+            except Exception as e:
+                print(f"Failed to load multi API keys: {e}")
+                self.use_multi_api_keys = False
+        
+        
+# =====================================================
+# UNIFIED PATTERNS AND CONSTANTS
+# =====================================================
+class PatternManager:
+    """Centralized pattern management"""
+    
+    CHAPTER_PATTERNS = [
+        # English patterns
+        (r'chapter[\s_-]*(\d+)', re.IGNORECASE, 'english_chapter'),
+        (r'\bch\.?\s*(\d+)\b', re.IGNORECASE, 'english_ch'),
+        (r'part[\s_-]*(\d+)', re.IGNORECASE, 'english_part'),
+        (r'episode[\s_-]*(\d+)', re.IGNORECASE, 'english_episode'),
+        # Chinese patterns
+        (r'第\s*(\d+)\s*[章节話话回]', 0, 'chinese_chapter'),
+        (r'第\s*([一二三四五六七八九十百千万]+)\s*[章节話话回]', 0, 'chinese_chapter_cn'),
+        (r'(\d+)[章节話话回]', 0, 'chinese_short'),
+        # Japanese patterns
+        (r'第\s*(\d+)\s*話', 0, 'japanese_wa'),
+        (r'第\s*(\d+)\s*章', 0, 'japanese_chapter'),
+        (r'その\s*(\d+)', 0, 'japanese_sono'),
+        (r'(\d+)話目', 0, 'japanese_wame'),
+        # Korean patterns
+        (r'제\s*(\d+)\s*[장화권부편]', 0, 'korean_chapter'),
+        (r'(\d+)\s*[장화권부편]', 0, 'korean_short'),
+        (r'에피소드\s*(\d+)', 0, 'korean_episode'),
+        # Generic numeric patterns
+        (r'^\s*(\d+)\s*[-–—.\:]', re.MULTILINE, 'generic_numbered'),
+        (r'_(\d+)\.x?html?$', re.IGNORECASE, 'filename_number'),
+        (r'/(\d+)\.x?html?$', re.IGNORECASE, 'path_number'),
+        (r'(\d+)', 0, 'any_number'),
+    ]
+    
+    FILENAME_EXTRACT_PATTERNS = [
+        # IMPORTANT: More specific patterns MUST come first
+        r'^\d{3}(\d)_(\d{2})_\.x?html?$', # Captures both parts for decimal: group1.group2
+        r'^\d{4}_(\d+)\.x?html?$',  # "0000_1.xhtml" - extracts 1, not 0000
+        r'^\d+_(\d+)[_\.]',         # Any digits followed by underscore then capture next digits
+        r'^(\d+)[_\.]',             # Standard: "0249_" or "0249."
+        r'response_(\d+)_',         # Standard pattern: response_001_
+        r'response_(\d+)\.',        # Pattern: response_001.
+        r'(\d{3,5})[_\.]',          # 3-5 digit pattern with padding
+        r'[Cc]hapter[_\s]*(\d+)',   # Chapter word pattern
+        r'[Cc]h[_\s]*(\d+)',        # Ch abbreviation
+        r'No(\d+)Chapter',          # No prefix with Chapter - matches "No00013Chapter.xhtml"
+        r'No(\d+)Section',          # No prefix with Section - matches "No00013Section.xhtml"
+        r'No(\d+)(?=\.|_|$)',       # No prefix followed by end, dot, or underscore (not followed by text)
+        r'第(\d+)[章话回]',          # Chinese chapter markers
+        r'_(\d+)(?:_|\.|$)',        # Number between underscores or at end
+        r'^(\d+)(?:_|\.|$)',        # Starting with number
+        r'(\d+)',                   # Any number (fallback)
+    ]
+    
+    CJK_HONORIFICS = {
+        'korean': [
+            # Modern honorifics
+            '님', '씨', '선배', '후배', '동기', '형', '누나', '언니', '오빠', '동생',
+            '선생님', '교수님', '박사님', '사장님', '회장님', '부장님', '과장님', '대리님',
+            '팀장님', '실장님', '이사님', '전무님', '상무님', '부사장님', '고문님',
+            
+            # Classical/formal honorifics
+            '공', '옹', '군', '양', '낭', '랑', '생', '자', '부', '모', '시', '제', '족하',
+            
+            # Royal/noble address forms
+            '마마', '마노라', '대감', '영감', '나리', '도령', '낭자', '아씨', '규수',
+            '각하', '전하', '폐하', '저하', '합하', '대비', '대왕', '왕자', '공주',
+            
+            # Buddhist/religious
+            '스님', '사부님', '조사님', '큰스님', '화상', '대덕', '대사', '법사',
+            '선사', '율사', '보살님', '거사님', '신부님', '목사님', '장로님', '집사님',
+            
+            # Confucian/scholarly
+            '부자', '선생', '대인', '어른', '어르신', '존자', '현자', '군자', '대부',
+            '학사', '진사', '문하생', '제자',
+            
+            # Kinship honorifics
+            '어르신', '할아버님', '할머님', '아버님', '어머님', '형님', '누님',
+            '아주버님', '아주머님', '삼촌', '이모님', '고모님', '외삼촌', '장인어른',
+            '장모님', '시아버님', '시어머님', '처남', '처형', '매형', '손님',
+            
+            # Verb-based honorific endings and speech levels
+            '습니다', 'ㅂ니다', '습니까', 'ㅂ니까', '시다', '세요', '셔요', '십시오', '시오',
+            '이에요', '예요', '이예요', '에요', '어요', '아요', '여요', '해요', '이세요', '으세요',
+            '으시', '시', '으십니다', '십니다', '으십니까', '십니까', '으셨', '셨',
+            '드립니다', '드려요', '드릴게요', '드리겠습니다', '올립니다', '올려요',
+            '사옵니다', '사뢰', '여쭙니다', '여쭤요', '아뢰', '뵙니다', '뵈요', '모십니다',
+            '시지요', '시죠', '시네요', '시는군요', '시는구나', '으실', '실',
+            '드시다', '잡수시다', '주무시다', '계시다', '가시다', '오시다',
+            
+            # Common verb endings with 있다/없다/하다
+            '있어요', '있습니다', '있으세요', '있으십니까', '없어요', '없습니다', '없으세요',
+            '해요', '합니다', '하세요', '하십시오', '하시죠', '하시네요', '했어요', '했습니다',
+            '되세요', '되셨어요', '되십니다', '됩니다', '되요', '돼요',
+            '이야', '이네', '이구나', '이군', '이네요', '인가요', '인가', '일까요', '일까',
+            '거예요', '거에요', '겁니다', '건가요', '게요', '을게요', '을까요', '었어요', '었습니다',
+            '겠습니다', '겠어요', '겠네요', '을겁니다', '을거예요', '을거에요',
+            
+            # Common endings
+            '요', '죠', '네요', '는데요', '거든요', '니까', '으니까', '는걸요', '군요', '구나',
+            '는구나', '는군요', '더라고요', '더군요', '던데요', '나요', '가요', '까요',
+            '라고요', '다고요', '냐고요', '자고요', '란다', '단다', '냔다', '잔다',
+            
+            # Formal archaic endings
+            '나이다', '사옵나이다', '옵니다', '오', '소서', '으오', '으옵소서', '사이다',
+            '으시옵니다', '시옵니다', '으시옵니까', '시옵니까', '나이까', '리이까', '리이다',
+            '옵소서', '으소서', '소이다', '로소이다', '이옵니다', '이올시다', '하옵니다'
+        ],
+        'japanese': [
+            # Modern honorifics
+            'さん', 'ちゃん', '君', 'くん', '様', 'さま', '先生', 'せんせい', '殿', 'どの', '先輩', 'せんぱい',
+            # Classical/historical
+            '氏', 'し', '朝臣', 'あそん', '宿禰', 'すくね', '連', 'むらじ', '臣', 'おみ', '君', 'きみ',
+            '真人', 'まひと', '道師', 'みちのし', '稲置', 'いなぎ', '直', 'あたい', '造', 'みやつこ',
+            # Court titles
+            '卿', 'きょう', '大夫', 'たいふ', '郎', 'ろう', '史', 'し', '主典', 'さかん',
+            # Buddhist titles
+            '和尚', 'おしょう', '禅師', 'ぜんじ', '上人', 'しょうにん', '聖人', 'しょうにん',
+            '法師', 'ほうし', '阿闍梨', 'あじゃり', '大和尚', 'だいおしょう',
+            # Shinto titles
+            '大宮司', 'だいぐうじ', '宮司', 'ぐうじ', '禰宜', 'ねぎ', '祝', 'はふり',
+            # Samurai era
+            '守', 'かみ', '介', 'すけ', '掾', 'じょう', '目', 'さかん', '丞', 'じょう',
+            # Keigo (honorific language) verb forms
+            'です', 'ます', 'ございます', 'いらっしゃる', 'いらっしゃいます', 'おっしゃる', 'おっしゃいます',
+            'なさる', 'なさいます', 'くださる', 'くださいます', 'いただく', 'いただきます',
+            'おります', 'でございます', 'ございません', 'いたします', 'いたしました',
+            '申す', '申します', '申し上げる', '申し上げます', '存じる', '存じます', '存じ上げる',
+            '伺う', '伺います', '参る', '参ります', 'お目にかかる', 'お目にかかります',
+            '拝見', '拝見します', '拝聴', '拝聴します', '承る', '承ります',
+            # Respectful prefixes/suffixes
+            'お', 'ご', '御', 'み', '美', '貴', '尊'
+        ],
+        'chinese': [
+            # Modern forms
+            '先生', '小姐', '夫人', '公子', '大人', '老师', '师父', '师傅', '同志', '同学',
+            # Ancient/classical forms
+            '子', '丈', '翁', '公', '侯', '伯', '叔', '仲', '季', '父', '甫', '卿', '君', '生',
+            # Imperial court
+            '陛下', '殿下', '千岁', '万岁', '圣上', '皇上', '天子', '至尊', '御前', '爷',
+            # Nobility/officials
+            '阁下', '大人', '老爷', '相公', '官人', '郎君', '娘子', '夫子', '足下',
+            # Religious titles
+            '上人', '法师', '禅师', '大师', '高僧', '圣僧', '神僧', '活佛', '仁波切',
+            '真人', '天师', '道长', '道友', '仙长', '上仙', '祖师', '掌教',
+            # Scholarly/Confucian
+            '夫子', '圣人', '贤人', '君子', '大儒', '鸿儒', '宗师', '泰斗', '巨擘',
+            # Martial arts
+            '侠士', '大侠', '少侠', '女侠', '英雄', '豪杰', '壮士', '义士',
+            # Family/kinship
+            '令尊', '令堂', '令郎', '令爱', '贤弟', '贤侄', '愚兄', '小弟', '家父', '家母',
+            # Humble forms
+            '在下', '小人', '鄙人', '不才', '愚', '某', '仆', '妾', '奴', '婢',
+            # Polite verbal markers
+            '请', '请问', '敢问', '恭请', '敬请', '烦请', '有请', '请教', '赐教',
+            '惠顾', '惠赐', '惠存', '笑纳', '雅正', '指正', '斧正', '垂询',
+            '拜', '拜见', '拜访', '拜读', '拜托', '拜谢', '敬上', '谨上', '顿首'
+        ],
+        'english': [
+            # Modern Korean romanizations (Revised Romanization of Korean - 2000)
+            '-nim', '-ssi', '-seonbae', '-hubae', '-donggi', '-hyeong', '-nuna', 
+            '-eonni', '-oppa', '-dongsaeng', '-seonsaengnim', '-gyosunim', 
+            '-baksanim', '-sajangnim', '-hoejangnim', '-bujangnim', '-gwajangnim',
+            '-daerim', '-timjangnim', '-siljangnim', '-isanim', '-jeonmunim',
+            '-sangmunim', '-busajangnim', '-gomunnim',
+            
+            # Classical/formal Korean romanizations  
+            '-gong', '-ong', '-gun', '-yang', '-nang', '-rang', '-saeng', '-ja',
+            '-bu', '-mo', '-si', '-je', '-jokha',
+            
+            # Royal/noble Korean romanizations
+            '-mama', '-manora', '-daegam', '-yeonggam', '-nari', '-doryeong',
+            '-nangja', '-assi', '-gyusu', '-gakha', '-jeonha', '-pyeha', '-jeoha',
+            '-hapka', '-daebi', '-daewang', '-wangja', '-gongju',
+            
+            # Buddhist/religious Korean romanizations
+            '-seunim', '-sabunim', '-josanim', '-keunseunim', '-hwasang',
+            '-daedeok', '-daesa', '-beopsa', '-seonsa', '-yulsa', '-bosalnim',
+            '-geosanim', '-sinbunim', '-moksanim', '-jangnonim', '-jipsanim',
+            
+            # Confucian/scholarly Korean romanizations
+            '-buja', '-seonsaeng', '-daein', '-eoreun', '-eoreusin', '-jonja', 
+            '-hyeonja', '-gunja', '-daebu', '-haksa', '-jinsa', '-munhasaeng', '-jeja',
+            
+            # Kinship Korean romanizations
+            '-harabeonim', '-halmeonim', '-abeonim', '-eomeonim', '-hyeongnim', 
+            '-nunim', '-ajubeonim', '-ajumeonim', '-samchon', '-imonim', '-gomonim',
+            '-oesamchon', '-jangineoreun', '-jangmonim', '-siabeonim', '-sieomeonim',
+            '-cheonam', '-cheohyeong', '-maehyeong', '-sonnim',
+            
+            # Korean verb endings romanized (Revised Romanization)
+            '-seumnida', '-mnida', '-seumnikka', '-mnikka', '-sida', '-seyo', 
+            '-syeoyo', '-sipsio', '-sio', '-ieyo', '-yeyo', '-iyeyo', '-eyo', 
+            '-eoyo', '-ayo', '-yeoyo', '-haeyo', '-iseyo', '-euseyo',
+            '-eusi', '-si', '-eusimnida', '-simnida', '-eusimnikka', '-simnikka',
+            '-eusyeot', '-syeot', '-deurimnida', '-deuryeoyo', '-deurilgeyo',
+            '-deurigesseumnida', '-ollimnida', '-ollyeoyo', '-saomnida', '-saroe',
+            '-yeojjumnida', '-yeojjwoyo', '-aroe', '-boemnida', '-boeyo', '-mosimnida',
+            '-sijiyo', '-sijyo', '-sineyo', '-sineungunyo', '-sineunguna', '-eusil', '-sil',
+            '-deusida', '-japsusida', '-jumusida', '-gyesida', '-gasida', '-osida',
+            
+            # Common Korean verb endings romanized
+            '-isseoyo', '-isseumnida', '-isseuseyo', '-isseusimnikka', 
+            '-eopseoyo', '-eopseumnida', '-eopseuseyo', '-hamnida', '-haseyo', 
+            '-hasipsio', '-hasijyo', '-hasineyo', '-haesseoyo', '-haesseumnida',
+            '-doeseyo', '-doesyeosseoyo', '-doesimnida', '-doemnida', '-doeyo', '-dwaeyo',
+            '-iya', '-ine', '-iguna', '-igun', '-ineyo', '-ingayo', '-inga', 
+            '-ilkkayo', '-ilkka', '-geoyeyo', '-geoeyo', '-geomnida', '-geongayo',
+            '-geyo', '-eulgeyo', '-eulkkayo', '-eosseoyo', '-eosseumnida',
+            '-gesseumnida', '-gesseoyo', '-genneyo', '-eulgeommida', '-eulgeoyeyo', '-eulgeoeyo',
+            
+            # Common Korean endings romanized
+            '-yo', '-jyo', '-neyo', '-neundeyo', '-geodeunyo', '-nikka', 
+            '-eunikka', '-neungeolyo', '-gunyo', '-guna', '-neunguna', '-neungunyo',
+            '-deoragoyo', '-deogunyo', '-deondeyo', '-nayo', '-gayo', '-kkayo',
+            '-ragoyo', '-dagoyo', '-nyagoyo', '-jagoyo', '-randa', '-danda', 
+            '-nyanda', '-janda',
+            
+            # Formal archaic Korean romanized
+            '-naida', '-saomnaida', '-omnida', '-o', '-soseo', '-euo', 
+            '-euopsoseo', '-saida', '-eusiomnida', '-siomnida', '-eusiomnikka', 
+            '-siomnikka', '-naikka', '-riikka', '-riida', '-opsoseo', '-eusoseo',
+            '-soida', '-rosoida', '-iomnida', '-iolsida', '-haomnida',
+            
+            # Japanese keigo romanized (keeping existing)
+            '-san', '-chan', '-kun', '-sama', '-sensei', '-senpai', '-dono', 
+            '-shi', '-tan', '-chin', '-desu', '-masu', '-gozaimasu', 
+            '-irassharu', '-irasshaimasu', '-ossharu', '-osshaimasu',
+            '-nasaru', '-nasaimasu', '-kudasaru', '-kudasaimasu', '-itadaku', 
+            '-itadakimasu', '-orimasu', '-degozaimasu', '-gozaimasen', 
+            '-itashimasu', '-itashimashita', '-mousu', '-moushimasu', 
+            '-moushiageru', '-moushiagemasu', '-zonjiru', '-zonjimasu',
+            '-ukagau', '-ukagaimasu', '-mairu', '-mairimasu', '-haiken', 
+            '-haikenshimasu',
+            
+            # Chinese romanizations (keeping existing)
+            '-xiong', '-di', '-ge', '-gege', '-didi', '-jie', '-jiejie', 
+            '-meimei', '-shixiong', '-shidi', '-shijie', '-shimei', '-gongzi', 
+            '-guniang', '-xiaojie', '-daren', '-qianbei', '-daoyou', '-zhanglao', 
+            '-shibo', '-shishu', '-shifu', '-laoshi', '-xiansheng', '-daxia', 
+            '-shaoxia', '-nvxia', '-jushi', '-shanren', '-dazhang', '-zhenren',
+            
+            # Ancient Chinese romanizations
+            '-zi', '-gong', '-hou', '-bo', '-jun', '-qing', '-weng', '-fu', 
+            '-sheng', '-lang', '-langjun', '-niangzi', '-furen', '-gege', 
+            '-jiejie', '-yeye', '-nainai',
+            
+            # Chinese politeness markers romanized
+            '-qing', '-jing', '-gong', '-hui', '-ci', '-bai', '-gan', '-chui',
+            'qingwen', 'ganwen', 'gongjing', 'jingjing', 'baijian', 'baifang', 
+            'baituo'
+        ]
+    }
+
+    TITLE_PATTERNS = {
+        'korean': [
+            # Modern titles
+            r'\b(왕|여왕|왕자|공주|황제|황후|대왕|대공|공작|백작|자작|남작|기사|장군|대장|원수|제독|함장|대신|재상|총리|대통령|시장|지사|검사|판사|변호사|의사|박사|교수|신부|목사|스님|도사)\b',
+            r'\b(폐하|전하|각하|예하|님|대감|영감|나리|도련님|아가씨|부인|선생)\b',
+            # Historical/classical titles
+            r'\b(대왕|태왕|왕비|왕후|세자|세자빈|대군|군|옹주|공주|부마|원자|원손)\b',
+            r'\b(영의정|좌의정|우의정|판서|참판|참의|정승|판사|사또|현령|군수|목사|부사)\b',
+            r'\b(대제학|제학|대사간|사간|대사헌|사헌|도승지|승지|한림|사관|내시|환관)\b',
+            r'\b(병조판서|이조판서|호조판서|예조판서|형조판서|공조판서)\b',
+            r'\b(도원수|부원수|병마절도사|수군절도사|첨절제사|만호|천호|백호)\b',
+            r'\b(정일품|종일품|정이품|종이품|정삼품|종삼품|정사품|종사품|정오품|종오품)\b',
+            # Korean honorific verb endings patterns
+            r'(습니다|ㅂ니다|습니까|ㅂ니까|세요|셔요|십시오|시오)$',
+            r'(이에요|예요|이예요|에요|어요|아요|여요|해요)$',
+            r'(으시|시)(었|겠|ㄹ|을|는|던)*(습니다|ㅂ니다|어요|아요|세요)',
+            r'(드립니다|드려요|드릴게요|드리겠습니다|올립니다|올려요)$',
+            r'(사옵니다|여쭙니다|여쭤요|뵙니다|뵈요|모십니다)$',
+            r'(나이다|사옵나이다|옵니다|으오|으옵소서|사이다)$'
+        ],
+        'japanese': [
+            # Modern titles
+            r'\b(王|女王|王子|姫|皇帝|皇后|天皇|皇太子|大王|大公|公爵|伯爵|子爵|男爵|騎士|将軍|大将|元帥|提督|艦長|大臣|宰相|総理|大統領|市長|知事|検事|裁判官|弁護士|医者|博士|教授|神父|牧師|僧侶|道士)\b',
+            r'\b(陛下|殿下|閣下|猊下|様|大人|殿|卿|君|氏)\b',
+            # Historical titles
+            r'\b(天皇|皇后|皇太子|親王|内親王|王|女王|太政大臣|左大臣|右大臣|内大臣|大納言|中納言|参議)\b',
+            r'\b(関白|摂政|征夷大将軍|管領|執権|守護|地頭|代官|奉行|与力|同心)\b',
+            r'\b(太政官|神祇官|式部省|治部省|民部省|兵部省|刑部省|大蔵省|宮内省)\b',
+            r'\b(大僧正|僧正|大僧都|僧都|律師|大法師|法師|大禅師|禅師)\b',
+            r'\b(正一位|従一位|正二位|従二位|正三位|従三位|正四位|従四位|正五位|従五位)\b',
+            r'\b(大和守|山城守|摂津守|河内守|和泉守|伊賀守|伊勢守|尾張守|三河守|遠江守)\b',
+            # Japanese keigo (honorific language) patterns
+            r'(です|ます|ございます)$',
+            r'(いらっしゃ|おっしゃ|なさ|くださ)(います|いました|る|った)$',
+            r'(いただ|お|ご|御)(き|きます|きました|く|ける|けます)',
+            r'(申し上げ|申し|存じ上げ|存じ|伺い|参り)(ます|ました|る)$',
+            r'(拝見|拝聴|承り|承)(します|しました|いたします|いたしました)$',
+            r'お[^あ-ん]+[になる|になります|くださる|くださいます]'
+        ],
+        'chinese': [
+            # Modern titles
+            r'\b(王|女王|王子|公主|皇帝|皇后|大王|大公|公爵|伯爵|子爵|男爵|骑士|将军|大将|元帅|提督|舰长|大臣|宰相|总理|大总统|市长|知事|检察官|法官|律师|医生|博士|教授|神父|牧师|和尚|道士)\b',
+            r'\b(陛下|殿下|阁下|大人|老爷|夫人|小姐|公子|少爷|姑娘|先生)\b',
+            # Imperial titles
+            r'\b(天子|圣上|皇上|万岁|万岁爷|太上皇|皇太后|太后|皇后|贵妃|妃|嫔|贵人|常在|答应)\b',
+            r'\b(太子|皇子|皇孙|亲王|郡王|贝勒|贝子|公主|格格|郡主|县主|郡君|县君)\b',
+            # Ancient official titles
+            r'\b(丞相|相国|太师|太傅|太保|太尉|司徒|司空|大司马|大司农|大司寇)\b',
+            r'\b(尚书|侍郎|郎中|员外郎|主事|知府|知州|知县|同知|通判|推官|巡抚|总督)\b',
+            r'\b(御史大夫|御史中丞|监察御史|给事中|都察院|翰林院|国子监|钦天监)\b',
+            r'\b(大学士|学士|侍读|侍讲|编修|检讨|庶吉士|举人|进士|状元|榜眼|探花)\b',
+            # Military ranks
+            r'\b(大元帅|元帅|大将军|将军|都督|都指挥使|指挥使|千户|百户|总兵|副将|参将|游击|都司|守备)\b',
+            r'\b(提督|总兵官|副总兵|参将|游击将军|都司|守备|千总|把总|外委)\b',
+            # Religious titles
+            r'\b(国师|帝师|法王|活佛|堪布|仁波切|大和尚|方丈|住持|首座|维那|知客)\b',
+            r'\b(天师|真人|道长|掌教|监院|高功|都讲|总理|提点|知观)\b',
+            # Nobility ranks
+            r'\b(公|侯|伯|子|男|开国公|郡公|国公|郡侯|县侯|郡伯|县伯|县子|县男)\b',
+            r'\b(一品|二品|三品|四品|五品|六品|七品|八品|九品|正一品|从一品|正二品|从二品)\b',
+            # Chinese politeness markers
+            r'(请|敢|恭|敬|烦|有)(问|请|赐|教|告|示)',
+            r'(拜|惠|赐|垂|雅|笑)(见|访|读|托|谢|顾|赐|存|纳|正|询)',
+            r'(敬|谨|顿)(上|呈|启|白|首)'
+        ],
+        'english': [
+            # Western titles
+            r'\b(King|Queen|Prince|Princess|Emperor|Empress|Duke|Duchess|Marquis|Marquess|Earl|Count|Countess|Viscount|Viscountess|Baron|Baroness|Knight|Lord|Lady|Sir|Dame|General|Admiral|Captain|Major|Colonel|Commander|Lieutenant|Sergeant|Minister|Chancellor|President|Mayor|Governor|Judge|Doctor|Professor|Father|Reverend|Master|Mistress)\b',
+            r'\b(His|Her|Your|Their)\s+(Majesty|Highness|Grace|Excellency|Honor|Worship|Lordship|Ladyship)\b',
+            # Romanized historical titles
+            r'\b(Tianzi|Huangdi|Huanghou|Taizi|Qinwang|Junwang|Beile|Beizi|Gongzhu|Gege)\b',
+            r'\b(Chengxiang|Zaixiang|Taishi|Taifu|Taibao|Taiwei|Situ|Sikong|Dasima)\b',
+            r'\b(Shogun|Daimyo|Samurai|Ronin|Ninja|Tenno|Mikado|Kampaku|Sessho)\b',
+            r'\b(Taewang|Wangbi|Wanghu|Seja|Daegun|Gun|Ongju|Gongju|Buma)\b'
+        ]
+    }
+
+    # Expanded Chinese numbers including classical forms
+    CHINESE_NUMS = {
+        # Basic numbers
+        '一': 1, '二': 2, '三': 3, '四': 4, '五': 5,
+        '六': 6, '七': 7, '八': 8, '九': 9, '十': 10,
+        '十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15,
+        '十六': 16, '十七': 17, '十八': 18, '十九': 19, '二十': 20,
+        '二十一': 21, '二十二': 22, '二十三': 23, '二十四': 24, '二十五': 25,
+        '三十': 30, '四十': 40, '五十': 50, '六十': 60,
+        '七十': 70, '八十': 80, '九十': 90, '百': 100,
+        # Classical/formal numbers
+        '壹': 1, '贰': 2, '叁': 3, '肆': 4, '伍': 5,
+        '陆': 6, '柒': 7, '捌': 8, '玖': 9, '拾': 10,
+        '佰': 100, '仟': 1000, '萬': 10000, '万': 10000,
+        # Ordinal indicators
+        '第一': 1, '第二': 2, '第三': 3, '第四': 4, '第五': 5,
+        '首': 1, '次': 2, '初': 1, '末': -1,
+    }
+
+    # Common words - keeping the same for filtering
+    COMMON_WORDS = {
+        '이', '그', '저', '우리', '너희', '자기', '당신', '여기', '거기', '저기',
+        '오늘', '내일', '어제', '지금', '아까', '나중', '먼저', '다음', '마지막',
+        '모든', '어떤', '무슨', '이런', '그런', '저런', '같은', '다른', '새로운',
+        '하다', '있다', '없다', '되다', '하는', '있는', '없는', '되는',
+        '것', '수', '때', '년', '월', '일', '시', '분', '초',
+        '은', '는', '이', '가', '을', '를', '에', '의', '와', '과', '도', '만',
+        '에서', '으로', '로', '까지', '부터', '에게', '한테', '께', '께서',
+        'この', 'その', 'あの', 'どの', 'これ', 'それ', 'あれ', 'どれ',
+        'わたし', 'あなた', 'かれ', 'かのじょ', 'わたしたち', 'あなたたち',
+        'きょう', 'あした', 'きのう', 'いま', 'あとで', 'まえ', 'つぎ',
+        'の', 'は', 'が', 'を', 'に', 'で', 'と', 'も', 'や', 'から', 'まで',
+        '这', '那', '哪', '这个', '那个', '哪个', '这里', '那里', '哪里',
+        '我', '你', '他', '她', '它', '我们', '你们', '他们', '她们',
+        '今天', '明天', '昨天', '现在', '刚才', '以后', '以前', '后来',
+        '的', '了', '在', '是', '有', '和', '与', '或', '但', '因为', '所以',
+        '一', '二', '三', '四', '五', '六', '七', '八', '九', '十',
+        '1', '2', '3', '4', '5', '6', '7', '8', '9', '0',
+    }
+# =====================================================
+# CHUNK CONTEXT MANAGER (unchanged - already optimal)
+# =====================================================
+class ChunkContextManager:
+    """Manage context within a chapter separate from history"""
+    def __init__(self):
+        self.current_chunks = []
+        self.chapter_num = None
+        self.chapter_title = None
+        
+    def start_chapter(self, chapter_num, chapter_title):
+        """Start a new chapter context"""
+        self.current_chunks = []
+        self.chapter_num = chapter_num
+        self.chapter_title = chapter_title
+        
+    def add_chunk(self, user_content, assistant_content, chunk_idx, total_chunks):
+        """Add a chunk to the current chapter context"""
+        self.current_chunks.append({
+            "user": user_content,
+            "assistant": assistant_content,
+            "chunk_idx": chunk_idx,
+            "total_chunks": total_chunks
+        })
+    
+    def get_context_messages(self, limit=3):
+        """Get last N chunks as messages for API context"""
+        context = []
+        for chunk in self.current_chunks[-limit:]:
+            context.extend([
+                {"role": "user", "content": chunk["user"]},
+                {"role": "assistant", "content": chunk["assistant"]}
+            ])
+        return context
+    
+    def get_summary_for_history(self):
+        """Create a summary representation for the history"""
+        if not self.current_chunks:
+            return None, None
+            
+        total_chunks = len(self.current_chunks)
+        
+        user_summary = f"[Chapter {self.chapter_num}: {self.chapter_title}]\n"
+        user_summary += f"[{total_chunks} chunks processed]\n"
+        if self.current_chunks:
+            first_chunk = self.current_chunks[0]['user']
+            if len(first_chunk) > 500:
+                user_summary += first_chunk[:500] + "..."
+            else:
+                user_summary += first_chunk
+        
+        assistant_summary = f"[Chapter {self.chapter_num} Translation Complete]\n"
+        assistant_summary += f"[Translated in {total_chunks} chunks]\n"
+        if self.current_chunks:
+            samples = []
+            first_trans = self.current_chunks[0]['assistant']
+            samples.append(f"Beginning: {first_trans[:200]}..." if len(first_trans) > 200 else f"Beginning: {first_trans}")
+            
+            if total_chunks > 2:
+                mid_idx = total_chunks // 2
+                mid_trans = self.current_chunks[mid_idx]['assistant']
+                samples.append(f"Middle: {mid_trans[:200]}..." if len(mid_trans) > 200 else f"Middle: {mid_trans}")
+            
+            if total_chunks > 1:
+                last_trans = self.current_chunks[-1]['assistant']
+                samples.append(f"End: {last_trans[:200]}..." if len(last_trans) > 200 else f"End: {last_trans}")
+            
+            assistant_summary += "\n".join(samples)
+        
+        return user_summary, assistant_summary
+    
+    def clear(self):
+        """Clear the current chapter context"""
+        self.current_chunks = []
+        self.chapter_num = None 
+        self.chapter_title = None
+
+# =====================================================
+# UNIFIED UTILITIES
+# =====================================================
+class FileUtilities:
+    """Utilities for file and path operations"""
+    
+    @staticmethod
+    def extract_actual_chapter_number(chapter, patterns=None, config=None):
+        """Extract actual chapter number from filename using improved logic"""
+        
+        # IMPORTANT: Check if this is a pre-split TEXT FILE chunk first
+        if (chapter.get('is_chunk', False) and 
+            'num' in chapter and 
+            isinstance(chapter['num'], float) and
+            chapter.get('filename', '').endswith('.txt')):
+            # For text file chunks only, preserve the decimal number
+            return chapter['num']  # This will be 1.1, 1.2, etc.
+        
+        # Get filename for extraction
+        filename = chapter.get('original_basename') or chapter.get('filename', '')
+        
+        # Use our improved extraction function
+        # Note: We don't have opf_spine_position here, so pass None
+        actual_num, method = extract_chapter_number_from_filename(filename, opf_spine_position=None)
+        
+        # If extraction succeeded, return the result
+        if actual_num is not None:
+            #print(f"[DEBUG] Extracted {actual_num} from '{filename}' using method: {method}")
+            return actual_num
+        
+        # Fallback to original complex logic for edge cases
+        actual_num = None
+        
+        if patterns is None:
+            patterns = PatternManager.FILENAME_EXTRACT_PATTERNS
+        
+        # Try to extract from original basename first
+        if chapter.get('original_basename'):
+            basename = chapter['original_basename']
+            
+            # Check if decimal chapters are enabled for EPUBs
+            enable_decimal = os.getenv('ENABLE_DECIMAL_CHAPTERS', '0') == '1'
+            
+            # For EPUBs, only check decimal patterns if the toggle is enabled
+            if enable_decimal:
+                # Check for standard decimal chapter numbers (e.g., Chapter_1.1, 1.2.html)
+                decimal_match = re.search(r'(\d+)\.(\d+)', basename)
+                if decimal_match:
+                    actual_num = float(f"{decimal_match.group(1)}.{decimal_match.group(2)}")
+                    return actual_num
+                
+                # Check for the XXXX_YY pattern where it represents X.YY decimal chapters
+                decimal_prefix_match = re.match(r'^(\d{4})_(\d{1,2})(?:_|\.)?(?:x?html?)?$', basename)
+                if decimal_prefix_match:
+                    first_part = decimal_prefix_match.group(1)
+                    second_part = decimal_prefix_match.group(2)
+                    
+                    if len(second_part) == 2 and int(second_part) > 9:
+                        chapter_num = int(first_part[-1])
+                        decimal_part = second_part
+                        actual_num = float(f"{chapter_num}.{decimal_part}")
+                        return actual_num
+            
+            # Standard XXXX_Y format handling (existing logic)
+            prefix_suffix_match = re.match(r'^(\d+)_(\d+)', basename)
+            if prefix_suffix_match:
+                second_part = prefix_suffix_match.group(2)
+                
+                if not enable_decimal:
+                    actual_num = int(second_part)
+                    return actual_num
+                else:
+                    if len(second_part) == 1 or (len(second_part) == 2 and int(second_part) <= 9):
+                        actual_num = int(second_part)
+                        return actual_num
+            
+            # Check other patterns if no match yet
+            for pattern in patterns:
+                if pattern in [r'^(\d+)[_\.]', r'(\d{3,5})[_\.]', r'^(\d+)_']:
+                    continue
+                match = re.search(pattern, basename, re.IGNORECASE)
+                if match:
+                    actual_num = int(match.group(1))
+                    break
+        
+        # Final fallback to chapter num
+        if actual_num is None:
+            actual_num = chapter.get("num", 0)
+            print(f"[DEBUG] No pattern matched, using chapter num: {actual_num}")
+        
+        return actual_num
+    
+    @staticmethod
+    def create_chapter_filename(chapter, actual_num=None):
+        """Create consistent chapter filename"""
+        # Check if we should use header as output name
+        use_header_output = os.getenv("USE_HEADER_AS_OUTPUT", "0") == "1"
+        
+        # Check if this is for a text file
+        is_text_file = chapter.get('filename', '').endswith('.txt') or chapter.get('is_chunk', False)
+        
+        # Respect toggle: retain source extension and remove 'response_' prefix
+        retain = should_retain_source_extension()
+        
+        # Helper to compute full original extension chain (e.g., '.html.xhtml')
+        def _full_ext_from_original(ch):
+            fn = ch.get('original_filename')
+            if not fn:
+                return '.html'
+            bn = os.path.basename(fn)
+            root, ext = os.path.splitext(bn)
+            if not ext:
+                return '.html'
+            full_ext = ''
+            while ext:
+                full_ext = ext + full_ext
+                root, ext = os.path.splitext(root)
+            return full_ext or '.html'
+        
+        if use_header_output and chapter.get('title'):
+            safe_title = make_safe_filename(chapter['title'], actual_num or chapter.get('num', 0))
+            if safe_title and safe_title != f"chapter_{actual_num or chapter.get('num', 0):03d}":
+                if is_text_file:
+                    return f"{safe_title}.txt" if retain else f"response_{safe_title}.txt"
+                else:
+                    # If retaining, use full original ext chain; else default .html
+                    if retain:
+                        return f"{safe_title}{_full_ext_from_original(chapter)}"
+                    return f"response_{safe_title}.html"
+        
+        # Check if decimal chapters are enabled
+        enable_decimal = os.getenv('ENABLE_DECIMAL_CHAPTERS', '0') == '1'
+        
+        # For EPUBs with decimal detection enabled
+        if enable_decimal and 'original_basename' in chapter and chapter['original_basename']:
+            basename = chapter['original_basename']
+            
+            # Check for standard decimal pattern (e.g., Chapter_1.1)
+            decimal_match = re.search(r'(\d+)\.(\d+)', basename)
+            if decimal_match:
+                # Create a modified basename that preserves the decimal
+                base = os.path.splitext(basename)[0]
+                # Replace dots with underscores for filesystem compatibility
+                base = base.replace('.', '_')
+                # Use .txt extension for text files
+                if is_text_file:
+                    return f"{base}.txt" if retain else f"response_{base}.txt"
+                else:
+                    if retain:
+                        return f"{base}{_full_ext_from_original(chapter)}"
+                    return f"response_{base}.html"
+            
+            # Check for the special XXXX_YY decimal pattern
+            decimal_prefix_match = re.match(r'^(\d{4})_(\d{1,2})(?:_|\.)?(?:x?html?)?$', basename)
+            if decimal_prefix_match:
+                first_part = decimal_prefix_match.group(1)
+                second_part = decimal_prefix_match.group(2)
+                
+                # If this matches our decimal pattern (e.g., 0002_33 -> 2.33)
+                if len(second_part) == 2 and int(second_part) > 9:
+                    chapter_num = int(first_part[-1])
+                    decimal_part = second_part
+                    # Create filename reflecting the decimal interpretation
+                    if is_text_file:
+                        return f"{chapter_num:04d}_{decimal_part}.txt" if retain else f"response_{chapter_num:04d}_{decimal_part}.txt"
+                    else:
+                        return f"{chapter_num:04d}_{decimal_part}{_full_ext_from_original(chapter)}" if retain else f"response_{chapter_num:04d}_{decimal_part}.html"
+        
+        # Standard EPUB handling - use original basename
+        if 'original_basename' in chapter and chapter['original_basename']:
+            base = os.path.splitext(chapter['original_basename'])[0]
+            # Use .txt extension for text files
+            if is_text_file:
+                return f"{base}.txt" if retain else f"response_{base}.txt"
+            else:
+                if retain:
+                    # Preserve the full original extension chain
+                    return f"{base}{_full_ext_from_original(chapter)}"
+                return f"response_{base}.html"
+        else:
+            # Text file handling (no original basename)
+            if actual_num is None:
+                actual_num = chapter.get('actual_chapter_num', chapter.get('num', 0))
+            
+            # Handle decimal chapter numbers from text file splitting
+            if isinstance(actual_num, float):
+                major = int(actual_num)
+                minor = int(round((actual_num - major) * 10))
+                if is_text_file:
+                    return f"{major:04d}_{minor}.txt" if retain else f"response_{major:04d}_{minor}.txt"
+                else:
+                    return f"{major:04d}_{minor}.html" if retain else f"response_{major:04d}_{minor}.html"
+            else:
+                if is_text_file:
+                    return f"{actual_num:04d}.txt" if retain else f"response_{actual_num:04d}.txt"
+                else:
+                    return f"{actual_num:04d}.html" if retain else f"response_{actual_num:04d}.html"
+
+# =====================================================
+# UNIFIED PROGRESS MANAGER
+# =====================================================
+class ProgressManager:
+    """Unified progress management"""
+    
+    def __init__(self, payloads_dir):
+        self.payloads_dir = payloads_dir
+        self.PROGRESS_FILE = os.path.join(payloads_dir, "translation_progress.json")
+        self.prog = self._init_or_load()
+        
+    def _init_or_load(self):
+        """Initialize or load progress tracking with improved structure"""
+        if os.path.exists(self.PROGRESS_FILE):
+            try:
+                with open(self.PROGRESS_FILE, "r", encoding="utf-8") as pf:
+                    prog = json.load(pf)
+            except json.JSONDecodeError as e:
+                print(f"⚠️ Warning: Progress file is corrupted: {e}")
+                print("🔧 Attempting to fix JSON syntax...")
+                
+                try:
+                    with open(self.PROGRESS_FILE, "r", encoding="utf-8") as pf:
+                        content = pf.read()
+                    
+                    content = re.sub(r',\s*\]', ']', content)
+                    content = re.sub(r',\s*\}', '}', content)
+                    
+                    prog = json.loads(content)
+                    
+                    with open(self.PROGRESS_FILE, "w", encoding="utf-8") as pf:
+                        json.dump(prog, pf, ensure_ascii=False, indent=2)
+                    print("✅ Successfully fixed and saved progress file")
+                    
+                except Exception as fix_error:
+                    print(f"❌ Could not fix progress file: {fix_error}")
+                    print("🔄 Creating backup and starting fresh...")
+                    
+                    backup_name = f"translation_progress_backup_{int(time.time())}.json"
+                    backup_path = os.path.join(self.payloads_dir, backup_name)
+                    try:
+                        shutil.copy(self.PROGRESS_FILE, backup_path)
+                        print(f"📁 Backup saved to: {backup_name}")
+                    except:
+                        pass
+                    
+                    prog = {
+                        "chapters": {},
+                        "chapter_chunks": {},
+                        "version": "2.0"
+                    }
+            
+            if "chapters" not in prog:
+                prog["chapters"] = {}
+                
+                for idx in prog.get("completed", []):
+                    prog["chapters"][str(idx)] = {
+                        "status": "completed",
+                        "timestamp": None
+                    }
+            
+            if "chapter_chunks" not in prog:
+                prog["chapter_chunks"] = {}
+                
+        else:
+            prog = {
+                "chapters": {},
+                "chapter_chunks": {},
+                "image_chunks": {},
+                "version": "2.1"
+            }
+        
+        return prog
+    
+    def save(self):
+        """Save progress to file"""
+        try:
+            self.prog["completed_list"] = []
+            for chapter_key, chapter_info in self.prog.get("chapters", {}).items():
+                if chapter_info.get("status") == "completed" and chapter_info.get("output_file"):
+                    self.prog["completed_list"].append({
+                        "num": chapter_info.get("chapter_num", 0),
+                        "idx": chapter_info.get("chapter_idx", 0),
+                        "title": f"Chapter {chapter_info.get('chapter_num', 0)}",
+                        "file": chapter_info.get("output_file", ""),
+                        "key": chapter_key
+                    })
+            
+            if self.prog.get("completed_list"):
+                self.prog["completed_list"].sort(key=lambda x: x["num"])
+            
+            temp_file = self.PROGRESS_FILE + '.tmp'
+            with open(temp_file, "w", encoding="utf-8") as pf:
+                json.dump(self.prog, pf, ensure_ascii=False, indent=2)
+            
+            if os.path.exists(self.PROGRESS_FILE):
+                os.remove(self.PROGRESS_FILE)
+            os.rename(temp_file, self.PROGRESS_FILE)
+        except Exception as e:
+            print(f"⚠️ Warning: Failed to save progress: {e}")
+            temp_file = self.PROGRESS_FILE + '.tmp'
+            if os.path.exists(temp_file):
+                try:
+                    os.remove(temp_file)
+                except:
+                    pass
+    
+    def update(self, idx, actual_num, content_hash, output_file, status="in_progress", ai_features=None, raw_num=None):
+        """Update progress for a chapter"""
+        # CHANGE THIS LINE - Use actual_num instead of idx
+        chapter_key = str(actual_num)  # WAS: chapter_key = str(idx)
+        
+        chapter_info = {
+            "actual_num": actual_num,
+            "content_hash": content_hash,
+            "output_file": output_file,
+            "status": status,
+            "last_updated": time.time()
+        }
+        
+        # Add raw number tracking
+        if raw_num is not None:
+            chapter_info["raw_chapter_num"] = raw_num
+        
+        # Check if zero detection was disabled
+        if hasattr(builtins, '_DISABLE_ZERO_DETECTION') and builtins._DISABLE_ZERO_DETECTION:
+            chapter_info["zero_adjusted"] = False
+        else:
+            chapter_info["zero_adjusted"] = (raw_num != actual_num) if raw_num is not None else False
+        
+        # FIXED: Store AI features if provided
+        if ai_features is not None:
+            chapter_info["ai_features"] = ai_features
+        
+        # Preserve existing AI features if not overwriting
+        elif chapter_key in self.prog["chapters"] and "ai_features" in self.prog["chapters"][chapter_key]:
+            chapter_info["ai_features"] = self.prog["chapters"][chapter_key]["ai_features"]
+        
+        self.prog["chapters"][chapter_key] = chapter_info
+        
+    def check_chapter_status(self, chapter_idx, actual_num, content_hash, output_dir, chapter_obj=None):
+        """Check if a chapter needs translation"""
+        
+        chapter_key = str(actual_num)
+        
+        # Check if we have tracking for this chapter
+        if chapter_key in self.prog["chapters"]:
+            chapter_info = self.prog["chapters"][chapter_key]
+            status = chapter_info.get("status")
+            
+            # Failed statuses ALWAYS trigger retranslation
+            if status in ["qa_failed", "failed", "error", "file_missing"]:
+                return True, None, None
+            
+            # Completed - check file exists
+            if status in ["completed", "completed_empty", "completed_image_only"]:
+                output_file = chapter_info.get("output_file")
+                if output_file:
+                    output_path = os.path.join(output_dir, output_file)
+                    if os.path.exists(output_path):
+                        return False, f"Chapter {actual_num} already translated: {output_file}", output_file
+                
+                # File missing - retranslate
+                del self.prog["chapters"][chapter_key]
+                if chapter_key in self.prog.get("chapter_chunks", {}):
+                    del self.prog["chapter_chunks"][chapter_key]
+                self.save()
+                return True, None, None
+            
+            # Any other status - retranslate
+            return True, None, None
+        
+        # BEFORE auto-discovery, check if ANY entry exists for this chapter's file
+        if chapter_obj:
+            from TransateKRtoEN import FileUtilities
+            output_filename = FileUtilities.create_chapter_filename(chapter_obj, actual_num)
+            
+            # Check if ANY entry has this output file
+            for key, info in self.prog["chapters"].items():
+                if info.get("output_file") == output_filename:
+                    # Entry exists somewhere else - don't auto-discover
+                    return True, None, None
+            
+            # NOW check if file exists for auto-discovery
+            output_path = os.path.join(output_dir, output_filename)
+            if os.path.exists(output_path):
+                print(f"📁 Found existing file for chapter {actual_num}: {output_filename}")
+                
+                self.prog["chapters"][chapter_key] = {
+                    "actual_num": actual_num,
+                    "content_hash": content_hash,
+                    "output_file": output_filename,
+                    "status": "completed",
+                    "last_updated": os.path.getmtime(output_path),
+                    "auto_discovered": True
+                }
+                
+                self.save()
+                return False, f"Chapter {actual_num} already exists: {output_filename}", output_filename
+        
+        # No entry and no file - needs translation
+        return True, None, None
+        
+    def cleanup_missing_files(self, output_dir):
+        """Remove missing files and duplicates - NO RESTORATION BULLSHIT"""
+        cleaned_count = 0
+        
+        # Remove entries for missing files
+        for chapter_key, chapter_info in list(self.prog["chapters"].items()):
+            output_file = chapter_info.get("output_file")
+            
+            if output_file:
+                output_path = os.path.join(output_dir, output_file)
+                if not os.path.exists(output_path):
+                    print(f"🗑️ Removing entry for missing file: {output_file}")
+                    
+                    # Delete the entry
+                    del self.prog["chapters"][chapter_key]
+                    
+                    # Remove chunk data
+                    if chapter_key in self.prog.get("chapter_chunks", {}):
+                        del self.prog["chapter_chunks"][chapter_key]
+                    
+                    cleaned_count += 1
+        
+        if cleaned_count > 0:
+            print(f"🔄 Removed {cleaned_count} entries - will retranslate")
+    
+    def migrate_to_content_hash(self, chapters):
+        """Change keys to match actual_num values for proper mapping and sort by chapter number"""
+        
+        new_chapters = {}
+        migrated_count = 0
+        
+        for old_key, chapter_info in self.prog["chapters"].items():
+            actual_num = chapter_info.get("actual_num")
+            
+            if actual_num is not None:
+                new_key = str(actual_num)
+                
+                # If key needs to change
+                if old_key != new_key:
+                    print(f"  Migrating: key '{old_key}' → '{new_key}' (actual_num: {actual_num})")
+                    migrated_count += 1
+                    
+                    # Check for collision
+                    if new_key in new_chapters:
+                        print(f"    ⚠️ Warning: Key '{new_key}' already exists, keeping newer entry")
+                        if chapter_info.get("last_updated", 0) > new_chapters[new_key].get("last_updated", 0):
+                            new_chapters[new_key] = chapter_info
+                    else:
+                        new_chapters[new_key] = chapter_info
+                else:
+                    # Key already matches actual_num
+                    new_chapters[old_key] = chapter_info
+            else:
+                # No actual_num, keep as-is
+                print(f"  ⚠️ Warning: No actual_num for key '{old_key}', keeping as-is")
+                new_chapters[old_key] = chapter_info
+        
+        # Sort chapters by actual_num field, then by key as fallback
+        def sort_key(item):
+            key, chapter_info = item
+            actual_num = chapter_info.get("actual_num")
+            if actual_num is not None:
+                return actual_num
+            else:
+                # Fallback to key if no actual_num
+                try:
+                    return int(key)
+                except ValueError:
+                    # For non-numeric keys, sort them at the end
+                    return float('inf')
+        
+        sorted_chapters = dict(sorted(new_chapters.items(), key=sort_key))
+        
+        if migrated_count > 0:
+            # Also migrate and sort chapter_chunks if they exist
+            if "chapter_chunks" in self.prog:
+                new_chunks = {}
+                for old_key, chunk_data in self.prog["chapter_chunks"].items():
+                    if old_key in self.prog["chapters"] and "actual_num" in self.prog["chapters"][old_key]:
+                        new_key = str(self.prog["chapters"][old_key]["actual_num"])
+                        new_chunks[new_key] = chunk_data
+                    else:
+                        new_chunks[old_key] = chunk_data
+                
+                # Sort chapter_chunks using the same sorting logic
+                sorted_chunks = dict(sorted(new_chunks.items(), key=sort_key))
+                self.prog["chapter_chunks"] = sorted_chunks
+            
+            self.prog["chapters"] = sorted_chapters
+            self.save()
+            print(f"✅ Migrated {migrated_count} entries to use actual_num as key and sorted by chapter number")
+        else:
+            # Even if no migration occurred, still apply sorting
+            self.prog["chapters"] = sorted_chapters
+            if "chapter_chunks" in self.prog:
+                sorted_chunks = dict(sorted(self.prog["chapter_chunks"].items(), key=sort_key))
+                self.prog["chapter_chunks"] = sorted_chunks
+            self.save()
+            print("✅ Sorted chapters by chapter number")
+    
+    def get_stats(self, output_dir):
+        """Get statistics about translation progress"""
+        stats = {
+            "total_tracked": len(self.prog["chapters"]),
+            "completed": 0,
+            "missing_files": 0,
+            "in_progress": 0
+        }
+        
+        for chapter_info in self.prog["chapters"].values():
+            status = chapter_info.get("status")
+            output_file = chapter_info.get("output_file")
+            
+            if status == "completed" and output_file:
+                output_path = os.path.join(output_dir, output_file)
+                if os.path.exists(output_path):
+                    stats["completed"] += 1
+                else:
+                    stats["missing_files"] += 1
+            elif status == "in_progress":
+                stats["in_progress"] += 1
+            elif status == "file_missing":
+                stats["missing_files"] += 1
+        
+        return stats
+
+# =====================================================
+# UNIFIED CONTENT PROCESSOR
+# =====================================================
+class ContentProcessor:
+    """Unified content processing"""
+    
+    @staticmethod
+    def clean_ai_artifacts(text, remove_artifacts=True):
+        """Remove AI response artifacts from text - but ONLY when enabled"""
+        if not remove_artifacts:
+            return text
+        
+        # First, remove thinking tags if they exist
+        text = ContentProcessor._remove_thinking_tags(text)
+        
+        # After removing thinking tags, re-analyze the text structure
+        # to catch AI artifacts that may now be at the beginning
+        lines = text.split('\n')
+        
+        # Clean up empty lines at the beginning
+        while lines and not lines[0].strip():
+            lines.pop(0)
+        
+        if not lines:
+            return text
+        
+        # Check the first non-empty line for AI artifacts
+        first_line = lines[0].strip()
+        
+        ai_patterns = [
+            r'^(?:Sure|Okay|Understood|Of course|Got it|Alright|Certainly|Here\'s|Here is)',
+            r'^(?:I\'ll|I will|Let me) (?:translate|help|assist)',
+            r'^(?:System|Assistant|AI|User|Human|Model)\s*:',
+            r'^\[PART\s+\d+/\d+\]',
+            r'^(?:Translation note|Note|Here\'s the translation|I\'ve translated)',
+            r'^```(?:html|xml|text)?\s*$',  # Enhanced code block detection
+            r'^<!DOCTYPE',
+        ]
+        
+        for pattern in ai_patterns:
+            if re.search(pattern, first_line, re.IGNORECASE):
+                remaining_lines = lines[1:]
+                remaining_text = '\n'.join(remaining_lines)
+                
+                if remaining_text.strip():
+                    # More lenient conditions: if we detect AI artifact patterns and there's meaningful content
+                    if (re.search(r'<h[1-6]', remaining_text, re.IGNORECASE) or 
+                        re.search(r'Chapter\s+\d+', remaining_text, re.IGNORECASE) or
+                        re.search(r'第\s*\d+\s*[章節話话回]', remaining_text) or
+                        re.search(r'제\s*\d+\s*[장화]', remaining_text) or
+                        re.search(r'<p>', remaining_text, re.IGNORECASE) or
+                        len(remaining_text.strip()) > 50):  # Reduced from 100 to 50
+                        
+                        print(f"✂️ Removed AI artifact: {first_line[:50]}...")
+                        return remaining_text.lstrip()
+        
+        if first_line.lower() in ['html', 'text', 'content', 'translation', 'output']:
+            remaining_lines = lines[1:]
+            remaining_text = '\n'.join(remaining_lines)
+            if remaining_text.strip():
+                print(f"✂️ Removed single word artifact: {first_line}")
+                return remaining_text.lstrip()
+        
+        return '\n'.join(lines)
+    
+    @staticmethod
+    def _remove_thinking_tags(text):
+        """Remove thinking tags that some AI models produce"""
+        if not text:
+            return text
+        
+        # Common thinking tag patterns used by various AI models
+        thinking_patterns = [
+            # XML-style thinking tags
+            (r'<thinking>.*?</thinking>', 'thinking'),
+            (r'<think>.*?</think>', 'think'),
+            (r'<thoughts>.*?</thoughts>', 'thoughts'),
+            (r'<reasoning>.*?</reasoning>', 'reasoning'),
+            (r'<analysis>.*?</analysis>', 'analysis'),
+            (r'<reflection>.*?</reflection>', 'reflection'),
+            # OpenAI o1-style reasoning blocks - fix the regex escaping
+            (r'<\|thinking\|>.*?</\|thinking\|>', 'o1-thinking'),
+            # Claude-style thinking blocks
+            (r'\[thinking\].*?\[/thinking\]', 'claude-thinking'),
+            # Generic bracketed thinking patterns
+            (r'\[THINKING\].*?\[/THINKING\]', 'bracketed-thinking'),
+            (r'\[ANALYSIS\].*?\[/ANALYSIS\]', 'bracketed-analysis'),
+        ]
+        
+        original_text = text
+        removed_count = 0
+        
+        for pattern, tag_type in thinking_patterns:
+            # Use DOTALL flag to match across newlines
+            matches = re.findall(pattern, text, re.DOTALL | re.IGNORECASE)
+            if matches:
+                text = re.sub(pattern, '', text, flags=re.DOTALL | re.IGNORECASE)
+                removed_count += len(matches)
+        
+        # Also remove standalone code block markers that might be artifacts
+        # But preserve all actual content - only remove the ``` markers themselves
+        code_block_removed = 0
+        code_block_patterns = [
+            (r'^```\w*\s*\n', '\n'),                # Opening code blocks - replace with newline
+            (r'\n```\s*$', ''),                     # Closing code blocks at end - remove entirely
+            (r'^```\w*\s*$', ''),                   # Standalone ``` on its own line - remove entirely
+        ]
+        
+        for pattern, replacement in code_block_patterns:
+            matches = re.findall(pattern, text, re.MULTILINE)
+            if matches:
+                text = re.sub(pattern, replacement, text, flags=re.MULTILINE)
+                code_block_removed += len(matches)
+        
+        # Clean up any extra whitespace or empty lines left after removing thinking tags
+        total_removed = removed_count + code_block_removed
+        if total_removed > 0:
+            # Remove multiple consecutive newlines
+            text = re.sub(r'\n\s*\n\s*\n', '\n\n', text)
+            # Remove leading/trailing whitespace
+            text = text.strip()
+            if removed_count > 0 and code_block_removed > 0:
+                print(f"🧠 Removed {removed_count} thinking tag(s) and {code_block_removed} code block marker(s)")
+            elif removed_count > 0:
+                print(f"🧠 Removed {removed_count} thinking tag(s)")
+            elif code_block_removed > 0:
+                print(f"📝 Removed {code_block_removed} code block marker(s)")
+        
+        return text
+    
+    @staticmethod
+    def clean_memory_artifacts(text):
+        """Remove any memory/summary artifacts that leaked into the translation"""
+        text = re.sub(r'\[MEMORY\].*?\[END MEMORY\]', '', text, flags=re.DOTALL)
+        
+        lines = text.split('\n')
+        cleaned_lines = []
+        skip_next = False
+        
+        for line in lines:
+            if any(marker in line for marker in ['[MEMORY]', '[END MEMORY]', 'Previous context summary:', 
+                                                  'memory summary', 'context summary', '[Context]']):
+                skip_next = True
+                continue
+            
+            if skip_next and line.strip() == '':
+                skip_next = False
+                continue
+                
+            skip_next = False
+            cleaned_lines.append(line)
+        
+        return '\n'.join(cleaned_lines)
+    
+    @staticmethod
+    def emergency_restore_paragraphs(text, original_html=None, verbose=True):
+        """Emergency restoration when AI returns wall of text without proper paragraph tags"""
+        def log(message):
+            if verbose:
+                print(message)
+        
+        if text.count('</p>') >= 3:
+            return text
+        
+        if original_html:
+            original_para_count = original_html.count('<p>')
+            current_para_count = text.count('<p>')
+            
+            if current_para_count < original_para_count / 2:
+                log(f"⚠️ Paragraph mismatch! Original: {original_para_count}, Current: {current_para_count}")
+                log("🔧 Attempting emergency paragraph restoration...")
+        
+        if '</p>' not in text and len(text) > 300:
+            log("❌ No paragraph tags found - applying emergency restoration")
+            
+            if '\n\n' in text:
+                parts = text.split('\n\n')
+                paragraphs = ['<p>' + part.strip() + '</p>' for part in parts if part.strip()]
+                return '\n'.join(paragraphs)
+            
+            dialogue_pattern = r'(?<=[.!?])\s+(?=[""\u201c\u201d])'
+            if re.search(dialogue_pattern, text):
+                parts = re.split(dialogue_pattern, text)
+                paragraphs = []
+                for part in parts:
+                    part = part.strip()
+                    if part:
+                        if not part.startswith('<p>'):
+                            part = '<p>' + part
+                        if not part.endswith('</p>'):
+                            part = part + '</p>'
+                        paragraphs.append(part)
+                return '\n'.join(paragraphs)
+            
+            sentence_boundary = r'(?<=[.!?])\s+(?=[A-Z\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af])'
+            sentences = re.split(sentence_boundary, text)
+            
+            if len(sentences) > 1:
+                paragraphs = []
+                current_para = []
+                
+                for sentence in sentences:
+                    sentence = sentence.strip()
+                    if not sentence:
+                        continue
+                        
+                    current_para.append(sentence)
+                    
+                    should_break = (
+                        len(current_para) >= 3 or
+                        sentence.rstrip().endswith(('"', '"', '"')) or
+                        '* * *' in sentence or
+                        '***' in sentence or
+                        '---' in sentence
+                    )
+                    
+                    if should_break:
+                        para_text = ' '.join(current_para)
+                        if not para_text.startswith('<p>'):
+                            para_text = '<p>' + para_text
+                        if not para_text.endswith('</p>'):
+                            para_text = para_text + '</p>'
+                        paragraphs.append(para_text)
+                        current_para = []
+                
+                if current_para:
+                    para_text = ' '.join(current_para)
+                    if not para_text.startswith('<p>'):
+                        para_text = '<p>' + para_text
+                    if not para_text.endswith('</p>'):
+                        para_text = para_text + '</p>'
+                    paragraphs.append(para_text)
+                
+                result = '\n'.join(paragraphs)
+                log(f"✅ Restored {len(paragraphs)} paragraphs from wall of text")
+                return result
+            
+            words = text.split()
+            if len(words) > 100:
+                paragraphs = []
+                words_per_para = max(100, len(words) // 10)
+                
+                for i in range(0, len(words), words_per_para):
+                    chunk = ' '.join(words[i:i + words_per_para])
+                    if chunk.strip():
+                        paragraphs.append('<p>' + chunk.strip() + '</p>')
+                
+                return '\n'.join(paragraphs)
+        
+        elif '<p>' in text and text.count('<p>') < 3 and len(text) > 1000:
+            log("⚠️ Very few paragraphs for long text - checking if more breaks needed")
+            
+            soup = BeautifulSoup(text, 'html.parser')
+            existing_paras = soup.find_all('p')
+            
+            new_paragraphs = []
+            for para in existing_paras:
+                para_text = para.get_text()
+                if len(para_text) > 500:
+                    sentences = re.split(r'(?<=[.!?])\s+', para_text)
+                    if len(sentences) > 5:
+                        chunks = []
+                        current = []
+                        for sent in sentences:
+                            current.append(sent)
+                            if len(current) >= 3:
+                                chunks.append('<p>' + ' '.join(current) + '</p>')
+                                current = []
+                        if current:
+                            chunks.append('<p>' + ' '.join(current) + '</p>')
+                        new_paragraphs.extend(chunks)
+                    else:
+                        new_paragraphs.append(str(para))
+                else:
+                    new_paragraphs.append(str(para))
+            
+            return '\n'.join(new_paragraphs)
+        
+        return text
+    
+    @staticmethod
+    def get_content_hash(html_content):
+        """Create a stable hash of content"""
+        try:
+            soup = BeautifulSoup(html_content, 'html.parser')
+            
+            for tag in soup(['script', 'style', 'meta', 'link']):
+                tag.decompose()
+            
+            text_content = soup.get_text(separator=' ', strip=True)
+            text_content = ' '.join(text_content.split())
+            
+            return hashlib.md5(text_content.encode('utf-8')).hexdigest()
+            
+        except Exception as e:
+            print(f"[WARNING] Failed to create hash: {e}")
+            return hashlib.md5(html_content.encode('utf-8')).hexdigest()
+    
+    @staticmethod
+    def is_meaningful_text_content(html_content):
+        """Check if chapter has meaningful text beyond just structure"""
+        try:
+            # Check if this is plain text from enhanced extraction (html2text output)
+            # html2text output characteristics:
+            # - Often starts with # for headers
+            # - Contains markdown-style formatting
+            # - Doesn't have HTML tags
+            content_stripped = html_content.strip()
+            
+            # Quick check for plain text/markdown content
+            is_plain_text = False
+            if content_stripped and (
+                not content_stripped.startswith('<') or  # Doesn't start with HTML tag
+                content_stripped.startswith('#') or      # Markdown header
+                '\n\n' in content_stripped[:500] or      # Markdown paragraphs
+                not '<p>' in content_stripped[:500] and not '<div>' in content_stripped[:500]  # No common HTML tags
+            ):
+                # This looks like plain text or markdown from html2text
+                is_plain_text = True
+                
+            if is_plain_text:
+                # For plain text, just check the length
+                text_length = len(content_stripped)
+                # Be more lenient with plain text since it's already extracted
+                return text_length > 50  # Much lower threshold for plain text
+            
+            # Original HTML parsing logic
+            soup = BeautifulSoup(html_content, 'html.parser')
+            
+            soup_copy = BeautifulSoup(str(soup), 'html.parser')
+            
+            for img in soup_copy.find_all('img'):
+                img.decompose()
+            
+            text_elements = soup_copy.find_all(['p', 'div', 'span'])
+            text_content = ' '.join(elem.get_text(strip=True) for elem in text_elements)
+            
+            headers = soup_copy.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
+            header_text = ' '.join(h.get_text(strip=True) for h in headers)
+            
+            if headers and len(text_content.strip()) > 1:
+                return True
+            
+            if len(text_content.strip()) > 200:
+                return True
+            
+            if len(header_text.strip()) > 100:
+                return True
+                
+            return False
+            
+        except Exception as e:
+            print(f"Warning: Error checking text content: {e}")
+            return True
+
+# =====================================================
+# UNIFIED CHAPTER EXTRACTOR
+# =====================================================
+class ChapterExtractor:
+    """Unified chapter extraction with three modes: Smart, Comprehensive, and Full"""
+    
+    def __init__(self, progress_callback=None):
+        self.pattern_manager = PatternManager()
+        self.progress_callback = progress_callback  # Add progress callback
+        self.parser = self._get_best_parser()  # Determine best parser on init
+    
+    def _get_best_parser(self):
+        """Determine the best parser available, preferring lxml for CJK text"""
+        try:
+            import lxml
+            return 'lxml'
+        except ImportError:
+            return 'html.parser'
+    
+    def _sort_by_opf_spine(self, chapters, opf_path):
+        """Sort chapters according to OPF spine order"""
+        try:
+            import xml.etree.ElementTree as ET
+            
+            # Read OPF file
+            with open(opf_path, 'r', encoding='utf-8') as f:
+                opf_content = f.read()
+            
+            # Parse OPF
+            root = ET.fromstring(opf_content)
+            
+            # Find namespaces
+            ns = {'opf': 'http://www.idpf.org/2007/opf'}
+            if root.tag.startswith('{'):
+                default_ns = root.tag[1:root.tag.index('}')]
+                ns = {'opf': default_ns}
+            
+            # Build manifest map (id -> href)
+            manifest = {}
+            for item in root.findall('.//opf:manifest/opf:item', ns):
+                item_id = item.get('id')
+                href = item.get('href')
+                if item_id and href:
+                    manifest[item_id] = href
+            
+            # Get spine order
+            spine_order = []
+            spine = root.find('.//opf:spine', ns)
+            if spine is not None:
+                for itemref in spine.findall('opf:itemref', ns):
+                    idref = itemref.get('idref')
+                    if idref and idref in manifest:
+                        href = manifest[idref]
+                        spine_order.append(href)
+            
+            if not spine_order:
+                print("⚠️ No spine order found in OPF, keeping original order")
+                return chapters
+            
+            # Create a mapping of filenames to spine position
+            spine_map = {}
+            for idx, href in enumerate(spine_order):
+                # Try different matching strategies
+                basename = os.path.basename(href)
+                spine_map[basename] = idx
+                spine_map[href] = idx
+                # Also store without extension for flexible matching
+                name_no_ext = os.path.splitext(basename)[0]
+                spine_map[name_no_ext] = idx
+            
+            print(f"📋 OPF spine contains {len(spine_order)} items")
+            
+            # Sort chapters based on spine order
+            def get_spine_position(chapter):
+                # Try to match chapter to spine
+                filename = chapter.get('filename', '')
+                basename = chapter.get('original_basename', '')
+                
+                # Try exact filename match
+                if filename in spine_map:
+                    return spine_map[filename]
+                
+                # Try basename match
+                if basename in spine_map:
+                    return spine_map[basename]
+                
+                # Try basename of filename
+                if filename:
+                    fname_base = os.path.basename(filename)
+                    if fname_base in spine_map:
+                        return spine_map[fname_base]
+                
+                # Try without extension
+                if basename:
+                    if basename + '.html' in spine_map:
+                        return spine_map[basename + '.html']
+                    if basename + '.xhtml' in spine_map:
+                        return spine_map[basename + '.xhtml']
+                
+                # Fallback to chapter number * 1000 (to sort after spine items)
+                return 1000000 + chapter.get('num', 0)
+            
+            # Sort chapters
+            sorted_chapters = sorted(chapters, key=get_spine_position)
+            
+            # Renumber chapters based on new order
+            for idx, chapter in enumerate(sorted_chapters, 1):
+                chapter['spine_order'] = idx
+                # Optionally update chapter numbers to match spine order
+                # chapter['num'] = idx  # Uncomment if you want to renumber
+            
+            # Log reordering info
+            reordered_count = 0
+            for idx, chapter in enumerate(sorted_chapters):
+                original_idx = chapters.index(chapter)
+                if original_idx != idx:
+                    reordered_count += 1
+            
+            if reordered_count > 0:
+                print(f"🔄 Reordered {reordered_count} chapters to match OPF spine")
+            else:
+                print(f"✅ Chapter order already matches OPF spine")
+            
+            return sorted_chapters
+            
+        except Exception as e:
+            print(f"⚠️ Could not sort by OPF spine: {e}")
+            import traceback
+            traceback.print_exc()
+            return chapters
+
+
+    def protect_angle_brackets_with_korean(self, text: str) -> str:
+        """Protect CJK text in angle brackets from HTML parsing"""
+        if text is None:
+            return ""
+        
+        import re
+        # Extended pattern to include Korean, Chinese, and Japanese characters
+        cjk_pattern = r'[가-힣ㄱ-ㅎㅏ-ㅣ一-龿ぁ-ゟァ-ヿ]'
+        bracket_pattern = rf'<([^<>]*{cjk_pattern}[^<>]*)>'
+        
+        def replace_brackets(match):
+            content = match.group(1)
+            return f'&#60;{content}&#62;'
+        
+        return re.sub(bracket_pattern, replace_brackets, text)
+    
+    def ensure_all_opf_chapters_extracted(zf, chapters, out):
+        """Ensure ALL chapters from OPF spine are extracted, not just what ChapterExtractor found"""
+        
+        # Parse OPF to get ALL chapters in spine
+        opf_chapters = []
+        
+        try:
+            # Find content.opf
+            opf_content = None
+            for name in zf.namelist():
+                if name.endswith('content.opf'):
+                    opf_content = zf.read(name)
+                    break
+            
+            if not opf_content:
+                return chapters  # No OPF, return original
+            
+            import xml.etree.ElementTree as ET
+            root = ET.fromstring(opf_content)
+            
+            # Handle namespaces
+            ns = {'opf': 'http://www.idpf.org/2007/opf'}
+            if root.tag.startswith('{'):
+                default_ns = root.tag[1:root.tag.index('}')]
+                ns = {'opf': default_ns}
+            
+            # Get manifest
+            manifest = {}
+            for item in root.findall('.//opf:manifest/opf:item', ns):
+                item_id = item.get('id')
+                href = item.get('href')
+                media_type = item.get('media-type', '')
+                
+                if item_id and href and ('html' in media_type.lower() or href.endswith(('.html', '.xhtml', '.htm'))):
+                    manifest[item_id] = href
+            
+            # Get spine order
+            spine = root.find('.//opf:spine', ns)
+            if spine:
+                for itemref in spine.findall('opf:itemref', ns):
+                    idref = itemref.get('idref')
+                    if idref and idref in manifest:
+                        href = manifest[idref]
+                        filename = os.path.basename(href)
+                        
+                        # Skip nav, toc, cover
+                        if any(skip in filename.lower() for skip in ['nav', 'toc', 'cover']):
+                            continue
+                        
+                        opf_chapters.append(href)
+            
+            print(f"📚 OPF spine contains {len(opf_chapters)} chapters")
+            
+            # Check which OPF chapters are missing from extraction
+            extracted_files = set()
+            for c in chapters:
+                if 'filename' in c:
+                    extracted_files.add(c['filename'])
+                if 'original_basename' in c:
+                    extracted_files.add(c['original_basename'])
+            
+            missing_chapters = []
+            for opf_chapter in opf_chapters:
+                basename = os.path.basename(opf_chapter)
+                if basename not in extracted_files and opf_chapter not in extracted_files:
+                    missing_chapters.append(opf_chapter)
+            
+            if missing_chapters:
+                print(f"⚠️ {len(missing_chapters)} chapters in OPF but not extracted!")
+                print(f"   Missing: {missing_chapters[:5]}{'...' if len(missing_chapters) > 5 else ''}")
+                
+                # Extract the missing chapters
+                for href in missing_chapters:
+                    try:
+                        # Read the chapter content
+                        content = zf.read(href).decode('utf-8')
+                        
+                        # Extract chapter number
+                        import re
+                        basename = os.path.basename(href)
+                        matches = re.findall(r'(\d+)', basename)
+                        if matches:
+                            chapter_num = int(matches[-1])
+                        else:
+                            chapter_num = len(chapters) + 1
+                        
+                        # Create chapter entry
+                        from bs4 import BeautifulSoup
+                        parser = 'lxml' if 'lxml' in sys.modules else 'html.parser'
+                        soup = BeautifulSoup(content, parser)
+                        
+                        # Get title
+                        title = "Chapter " + str(chapter_num)
+                        title_tag = soup.find('title')
+                        if title_tag:
+                            title = title_tag.get_text().strip() or title
+                        else:
+                            for tag in ['h1', 'h2', 'h3']:
+                                header = soup.find(tag)
+                                if header:
+                                    title = header.get_text().strip() or title
+                                    break
+                        
+                        # Save the chapter file
+                        output_filename = f"chapter_{chapter_num:04d}_{basename}"
+                        output_path = os.path.join(out, output_filename)
+                        with open(output_path, 'w', encoding='utf-8') as f:
+                            f.write(content)
+                        
+                        # Add to chapters list
+                        new_chapter = {
+                            'num': chapter_num,
+                            'title': title,
+                            'body': content,
+                            'filename': href,
+                            'original_basename': basename,
+                            'file_size': len(content),
+                            'has_images': bool(soup.find_all('img')),
+                            'detection_method': 'opf_recovery',
+                            'content_hash': None  # Will be calculated later
+                        }
+                        
+                        chapters.append(new_chapter)
+                        print(f"   ✅ Recovered chapter {chapter_num}: {basename}")
+                        
+                    except Exception as e:
+                        print(f"   ❌ Failed to extract {href}: {e}")
+                
+                # Re-sort chapters by number
+                chapters.sort(key=lambda x: x['num'])
+                print(f"✅ Total chapters after OPF recovery: {len(chapters)}")
+            
+        except Exception as e:
+            print(f"⚠️ Error checking OPF chapters: {e}")
+            import traceback
+            traceback.print_exc()
+        
+        return chapters
+        
+    def extract_chapters(self, zf, output_dir):
+        """Extract chapters and all resources from EPUB using ThreadPoolExecutor"""
+        import time
+        
+        # Check stop at the very beginning
+        if is_stop_requested():
+            print("❌ Extraction stopped by user")
+            return []
+            
+        print("🚀 Starting EPUB extraction with ThreadPoolExecutor...")
+        print(f"📄 Using parser: {self.parser} {'(optimized for CJK)' if self.parser == 'lxml' else '(standard)'}")
+        
+        # Initial progress
+        if self.progress_callback:
+            self.progress_callback("Starting EPUB extraction...")
+        
+        # First, extract and save content.opf for reference
+        for name in zf.namelist():
+            if name.endswith('.opf'):
+                try:
+                    opf_content = zf.read(name).decode('utf-8', errors='ignore')
+                    opf_output_path = os.path.join(output_dir, 'content.opf')
+                    with open(opf_output_path, 'w', encoding='utf-8') as f:
+                        f.write(opf_content)
+                    print(f"📋 Saved OPF file: {name} → content.opf")
+                    break
+                except Exception as e:
+                    print(f"⚠️ Could not save OPF file: {e}")
+        
+        # Get extraction mode from environment
+        extraction_mode = os.getenv("EXTRACTION_MODE", "smart").lower()
+        print(f"✅ Using {extraction_mode.capitalize()} extraction mode")
+        
+        # Get number of workers from environment or use default
+        max_workers = int(os.getenv("EXTRACTION_WORKERS", "2"))
+        print(f"🔧 Using {max_workers} workers for parallel processing")
+        
+        extracted_resources = self._extract_all_resources(zf, output_dir)
+        
+        # Check stop after resource extraction
+        if is_stop_requested():
+            print("❌ Extraction stopped by user")
+            return []
+        
+        metadata_path = os.path.join(output_dir, 'metadata.json')
+        if os.path.exists(metadata_path):
+            print("📋 Loading existing metadata...")
+            with open(metadata_path, 'r', encoding='utf-8') as f:
+                metadata = json.load(f)
+        else:
+            print("📋 Extracting fresh metadata...")
+            metadata = self._extract_epub_metadata(zf)
+            print(f"📋 Extracted metadata: {list(metadata.keys())}")
+        
+        chapters, detected_language = self._extract_chapters_universal(zf, extraction_mode)
+        
+        # Sort chapters according to OPF spine order if available
+        opf_path = os.path.join(output_dir, 'content.opf')
+        if os.path.exists(opf_path) and chapters:
+            print("📋 Sorting chapters according to OPF spine order...")
+            chapters = self._sort_by_opf_spine(chapters, opf_path)
+            print(f"✅ Chapters sorted according to OPF reading order")
+        
+        # Check stop after chapter extraction
+        if is_stop_requested():
+            print("❌ Extraction stopped by user")
+            return []
+        
+        if not chapters:
+            print("❌ No chapters could be extracted!")
+            return []
+        
+        chapters_info_path = os.path.join(output_dir, 'chapters_info.json')
+        chapters_info = []
+        chapters_info_lock = threading.Lock()
+        
+        def process_chapter(chapter):
+            """Process a single chapter"""
+            # Check stop in worker
+            if is_stop_requested():
+                return None
+                
+            info = {
+                'num': chapter['num'],
+                'title': chapter['title'],
+                'original_filename': chapter.get('filename', ''),
+                'has_images': chapter.get('has_images', False),
+                'image_count': chapter.get('image_count', 0),
+                'text_length': chapter.get('file_size', len(chapter.get('body', ''))),
+                'detection_method': chapter.get('detection_method', 'unknown'),
+                'content_hash': chapter.get('content_hash', '')
+            }
+            
+            if chapter.get('has_images'):
+                try:
+                    soup = BeautifulSoup(chapter.get('body', ''), self.parser)
+                    images = soup.find_all('img')
+                    info['images'] = [img.get('src', '') for img in images]
+                except:
+                    info['images'] = []
+            
+            return info
+        
+        # Process chapters in parallel
+        print(f"🔄 Processing {len(chapters)} chapters in parallel...")
+        
+        if self.progress_callback:
+            self.progress_callback(f"Processing {len(chapters)} chapters...")
+        
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # Submit all tasks
+            future_to_chapter = {
+                executor.submit(process_chapter, chapter): chapter 
+                for chapter in chapters
+            }
+            
+            # Process completed tasks
+            completed = 0
+            for future in as_completed(future_to_chapter):
+                if is_stop_requested():
+                    print("❌ Extraction stopped by user")
+                    # Cancel remaining futures
+                    for f in future_to_chapter:
+                        f.cancel()
+                    return []
+                
+                try:
+                    result = future.result()
+                    if result:
+                        with chapters_info_lock:
+                            chapters_info.append(result)
+                        completed += 1
+                        
+                        # Yield to GUI periodically (can be disabled for max speed)
+                        if completed % 5 == 0 and os.getenv("ENABLE_GUI_YIELD", "1") == "1":
+                            time.sleep(0.001)
+                        
+                        # Progress updates
+                        if completed % 10 == 0 or completed == len(chapters):
+                            progress_msg = f"Processed {completed}/{len(chapters)} chapters"
+                            print(f"   📊 {progress_msg}")
+                            if self.progress_callback:
+                                self.progress_callback(progress_msg)
+                except Exception as e:
+                    chapter = future_to_chapter[future]
+                    print(f"   ❌ Error processing chapter {chapter['num']}: {e}")
+        
+        # Sort chapters_info by chapter number to maintain order
+        chapters_info.sort(key=lambda x: x['num'])
+        
+        print(f"✅ Successfully processed {len(chapters_info)} chapters")
+        
+        with open(chapters_info_path, 'w', encoding='utf-8') as f:
+            json.dump(chapters_info, f, ensure_ascii=False, indent=2)
+        
+        print(f"💾 Saved detailed chapter info to: chapters_info.json")
+        
+        metadata.update({
+            'chapter_count': len(chapters),
+            'detected_language': detected_language,
+            'extracted_resources': extracted_resources,
+            'extraction_mode': extraction_mode,
+            'extraction_summary': {
+                'total_chapters': len(chapters),
+                'chapter_range': f"{chapters[0]['num']}-{chapters[-1]['num']}",
+                'resources_extracted': sum(len(files) for files in extracted_resources.values())
+            }
+        })
+        
+        metadata['chapter_titles'] = {
+            str(c['num']): c['title'] for c in chapters
+        }
+        
+        with open(metadata_path, 'w', encoding='utf-8') as f:
+            json.dump(metadata, f, ensure_ascii=False, indent=2)
+        
+        print(f"💾 Saved comprehensive metadata to: {metadata_path}")
+        
+        self._create_extraction_report(output_dir, metadata, chapters, extracted_resources)
+        self._log_extraction_summary(chapters, extracted_resources, detected_language)
+        
+        print(f"🔍 VERIFICATION: {extraction_mode.capitalize()} chapter extraction completed successfully")
+        print(f"⚡ Used {max_workers} workers for parallel processing")
+        
+        return chapters
+
+    def _extract_all_resources(self, zf, output_dir):
+        """Extract all resources with parallel processing"""
+        import time
+        
+        extracted_resources = {
+            'css': [],
+            'fonts': [],
+            'images': [],
+            'epub_structure': [],
+            'other': []
+        }
+        
+        # Check if already extracted
+        extraction_marker = os.path.join(output_dir, '.resources_extracted')
+        if os.path.exists(extraction_marker):
+            print("📦 Resources already extracted, skipping...")
+            return self._count_existing_resources(output_dir, extracted_resources)
+        
+        self._cleanup_old_resources(output_dir)
+        
+        # Create directories
+        for resource_type in ['css', 'fonts', 'images']:
+            os.makedirs(os.path.join(output_dir, resource_type), exist_ok=True)
+        
+        print(f"📦 Extracting resources in parallel...")
+        
+        # Get list of files to process
+        file_list = [f for f in zf.namelist() if not f.endswith('/') and os.path.basename(f)]
+        
+        # Thread-safe lock for extracted_resources
+        resource_lock = threading.Lock()
+        
+        def extract_single_resource(file_path):
+            if is_stop_requested():
+                return None
+                
+            try:
+                file_data = zf.read(file_path)
+                resource_info = self._categorize_resource(file_path, os.path.basename(file_path))
+                
+                if resource_info:
+                    resource_type, target_dir, safe_filename = resource_info
+                    target_path = os.path.join(output_dir, target_dir, safe_filename) if target_dir else os.path.join(output_dir, safe_filename)
+                    
+                    with open(target_path, 'wb') as f:
+                        f.write(file_data)
+                    
+                    # Thread-safe update
+                    with resource_lock:
+                        extracted_resources[resource_type].append(safe_filename)
+                    
+                    return (resource_type, safe_filename)
+            except Exception as e:
+                print(f"[WARNING] Failed to extract {file_path}: {e}")
+                return None
+        
+        # Process files in parallel
+        total_resources = len(file_list)
+        extracted_count = 0
+        
+        # Use same worker count as chapter processing
+        resource_workers = int(os.getenv("EXTRACTION_WORKERS", "2"))
+        
+        with ThreadPoolExecutor(max_workers=resource_workers) as executor:
+            futures = {executor.submit(extract_single_resource, file_path): file_path 
+                      for file_path in file_list}
+            
+            for future in as_completed(futures):
+                if is_stop_requested():
+                    executor.shutdown(wait=False)
+                    break
+                
+                extracted_count += 1
+                
+                # Progress update every 20 files
+                if extracted_count % 20 == 0 and self.progress_callback:
+                    self.progress_callback(f"Extracting resources: {extracted_count}/{total_resources}")
+                
+                # Yield to GUI periodically (can be disabled for max speed)
+                if extracted_count % 10 == 0 and os.getenv("ENABLE_GUI_YIELD", "1") == "1":
+                    time.sleep(0.001)
+                    
+                result = future.result()
+                if result:
+                    resource_type, filename = result
+                    # Only print for important resources
+                    if extracted_count < 10 or resource_type in ['css', 'fonts']:
+                        print(f"   📄 Extracted {resource_type}: {filename}")
+        
+        # Mark as complete
+        with open(extraction_marker, 'w') as f:
+            f.write(f"Resources extracted at {time.time()}")
+        
+        self._validate_critical_files(output_dir, extracted_resources)
+        return extracted_resources
+    
+    def _extract_chapters_universal(self, zf, extraction_mode="smart"):
+        """Universal chapter extraction with four modes: smart, comprehensive, full, enhanced
+        
+        All modes now properly merge Section/Chapter pairs
+        Enhanced mode uses html2text for superior text processing
+        Now with parallel processing for improved performance
+        """
+        # Check stop at the beginning
+        if is_stop_requested():
+            print("❌ Chapter extraction stopped by user")
+            return [], 'unknown'
+        
+        # Import time for yielding
+        import time
+        
+        # Initialize enhanced extractor if using enhanced mode
+        enhanced_extractor = None
+        enhanced_filtering = extraction_mode  # Default fallback
+        preserve_structure = True
+        
+        # Independent control: translate cover.html when requested
+        translate_cover_html = os.getenv("TRANSLATE_COVER_HTML", "0") == "1"
+        
+        if extraction_mode == "enhanced":
+            print("🚀 Initializing Enhanced extraction mode with html2text...")
+            
+            # Get enhanced mode configuration from environment
+            enhanced_filtering = os.getenv("ENHANCED_FILTERING", "smart")
+            # Avoid 'full' with html2text to prevent XML declaration artifacts; use 'comprehensive' instead
+            if str(enhanced_filtering).lower() == 'full':
+                enhanced_filtering = 'comprehensive'
+            preserve_structure = os.getenv("ENHANCED_PRESERVE_STRUCTURE", "1") == "1"
+            
+            print(f"  • Enhanced filtering level: {enhanced_filtering}")
+            print(f"  • Preserve structure: {preserve_structure}")
+            
+            # Try to initialize enhanced extractor
+            try:
+                # Import our enhanced extractor (assume it's in the same directory or importable)
+                from enhanced_text_extractor import EnhancedTextExtractor
+                enhanced_extractor = EnhancedTextExtractor(
+                    filtering_mode=enhanced_filtering,
+                    preserve_structure=preserve_structure
+                )
+                print("✅ Enhanced text extractor initialized successfully")
+                    
+            except ImportError as e:
+                print(f"❌ Enhanced text extractor module not found: {e}")
+                print(f"❌ Cannot use enhanced extraction mode. Please install enhanced_text_extractor or select a different extraction mode.")
+                raise e
+            except Exception as e:
+                print(f"❌ Enhanced extractor initialization failed: {e}")
+                print(f"❌ Cannot use enhanced extraction mode. Please select a different extraction mode.")
+                raise e
+        
+        chapters = []
+        sample_texts = []
+        
+        # First phase: Collect HTML files
+        html_files = []
+        file_list = zf.namelist()
+        total_files = len(file_list)
+        
+        # Update progress for file collection
+        if self.progress_callback and total_files > 100:
+            self.progress_callback(f"Scanning {total_files} files in EPUB...")
+        
+        for idx, name in enumerate(file_list):
+            # Check stop while collecting files
+            if is_stop_requested():
+                print("❌ Chapter extraction stopped by user")
+                return [], 'unknown'
+            
+            # Yield to GUI every 50 files (can be disabled for max speed)
+            if idx % 50 == 0 and idx > 0:
+                if os.getenv("ENABLE_GUI_YIELD", "1") == "1":
+                    time.sleep(0.001)  # Brief yield to GUI
+                if self.progress_callback and total_files > 100:
+                    self.progress_callback(f"Scanning files: {idx}/{total_files}")
+                
+            if name.lower().endswith(('.xhtml', '.html', '.htm')):
+                # Skip cover files by default unless override is enabled
+                basename = os.path.basename(name).lower()
+                if basename in ['cover.html', 'cover.xhtml', 'cover.htm'] and not translate_cover_html:
+                    print(f"[SKIP] Cover file excluded from all modes: {name}")
+                    continue
+                
+                # Apply filtering based on the actual extraction mode (or enhanced_filtering for enhanced mode)
+                current_filtering = enhanced_filtering if extraction_mode == "enhanced" else extraction_mode
+                
+                if current_filtering == "smart":
+                    # Smart mode: aggressive filtering
+                    lower_name = name.lower()
+                    if any(skip in lower_name for skip in [
+                        'nav', 'toc', 'contents', 'title', 'index',
+                        'copyright', 'acknowledgment', 'dedication'
+                    ]):
+                        continue
+                elif current_filtering == "comprehensive":
+                    # Comprehensive mode: moderate filtering
+                    skip_keywords = ['nav.', 'toc.', 'contents.', 'copyright.']
+                    basename = os.path.basename(name.lower())
+                    should_skip = False
+                    for skip in skip_keywords:
+                        if basename == skip + 'xhtml' or basename == skip + 'html' or basename == skip + 'htm':
+                            should_skip = True
+                            break
+                    if should_skip:
+                        print(f"[SKIP] Navigation/TOC file: {name}")
+                        continue
+                # else: full mode - no filtering at all (except cover which is filtered above)
+                
+                html_files.append(name)
+        
+        # Update mode description to include enhanced mode
+        mode_description = {
+            "smart": "potential content files",
+            "comprehensive": "HTML files", 
+            "full": "ALL HTML/XHTML files (no filtering)",
+            "enhanced": f"files (enhanced with {enhanced_filtering} filtering)"
+        }
+        print(f"📚 Found {len(html_files)} {mode_description.get(extraction_mode, 'files')} in EPUB")
+        
+        # Sort files to ensure proper order
+        html_files.sort()
+        
+        # Check if merging is disabled via environment variable
+        disable_merging = os.getenv("DISABLE_CHAPTER_MERGING", "0") == "1"
+        
+        processed_files = set()
+        merge_candidates = {}  # Store potential merges without reading files yet
+        
+        if disable_merging:
+            print("📌 Chapter merging is DISABLED - processing all files independently")
+        else:
+            print("📌 Chapter merging is ENABLED")
+            
+            # Only do merging logic if not disabled
+            file_groups = {}
+            
+            # Group files by their base number to detect Section/Chapter pairs
+            for file_path in html_files:
+                filename = os.path.basename(file_path)
+                
+                # Try different patterns to extract base number
+                base_num = None
+                
+                # Pattern 1: "No00014" from "No00014Section.xhtml"
+                match = re.match(r'(No\d+)', filename)
+                if match:
+                    base_num = match.group(1)
+                else:
+                    # Pattern 2: "0014" from "0014_section.html" or "0014_chapter.html"
+                    match = re.match(r'^(\d+)[_\-]', filename)
+                    if match:
+                        base_num = match.group(1)
+                    else:
+                        # Pattern 3: Just numbers at the start
+                        match = re.match(r'^(\d+)', filename)
+                        if match:
+                            base_num = match.group(1)
+                
+                if base_num:
+                    if base_num not in file_groups:
+                        file_groups[base_num] = []
+                    file_groups[base_num].append(file_path)
+            
+            # Identify merge candidates WITHOUT reading files yet
+            for base_num, group_files in sorted(file_groups.items()):
+                if len(group_files) == 2:
+                    # Check if we have a Section/Chapter pair based on filenames only
+                    section_file = None
+                    chapter_file = None
+                    
+                    for file_path in group_files:
+                        basename = os.path.basename(file_path)
+                        # More strict detection - must have 'section' or 'chapter' in the filename
+                        if 'section' in basename.lower() and 'chapter' not in basename.lower():
+                            section_file = file_path
+                        elif 'chapter' in basename.lower() and 'section' not in basename.lower():
+                            chapter_file = file_path
+                    
+                    if section_file and chapter_file:
+                        # Store as potential merge candidate
+                        merge_candidates[chapter_file] = section_file
+                        processed_files.add(section_file)
+                        print(f"[DEBUG] Potential merge candidate: {base_num}")
+                        print(f"  Section: {os.path.basename(section_file)}")
+                        print(f"  Chapter: {os.path.basename(chapter_file)}")
+        
+        # Filter out section files that were marked for merging
+        files_to_process = []
+        for file_path in html_files:
+            if not disable_merging and file_path in processed_files:
+                print(f"[DEBUG] Skipping section file: {file_path}")
+                continue
+            files_to_process.append(file_path)
+        
+        print(f"📚 Processing {len(files_to_process)} files after merge analysis")
+        
+        # Thread-safe collections
+        sample_texts_lock = threading.Lock()
+        file_size_groups_lock = threading.Lock()
+        h1_count_lock = threading.Lock()
+        h2_count_lock = threading.Lock()
+        
+        # Initialize counters
+        file_size_groups = {}
+        h1_count = 0
+        h2_count = 0
+        processed_count = 0
+        processed_count_lock = threading.Lock()
+        
+        # Progress tracking
+        total_files = len(files_to_process)
+        
+        # Function to process a single HTML file
+        def process_single_html_file(file_path, file_index):
+            nonlocal h1_count, h2_count, processed_count
+            
+            # Check stop
+            if is_stop_requested():
+                return None
+            
+            # Update progress
+            with processed_count_lock:
+                processed_count += 1
+                current_count = processed_count
+                if self.progress_callback and current_count % 5 == 0:
+                    progress_msg = f"Processing chapters: {current_count}/{total_files} ({current_count*100//total_files}%)"
+                    self.progress_callback(progress_msg)
+            
+            try:
+                # Read file data
+                file_data = zf.read(file_path)
+                
+                # Decode the file data
+                html_content = None
+                detected_encoding = None
+                for encoding in ['utf-8', 'utf-16', 'gb18030', 'shift_jis', 'euc-kr', 'gbk', 'big5']:
+                    try:
+                        html_content = file_data.decode(encoding)
+                        detected_encoding = encoding
+                        break
+                    except UnicodeDecodeError:
+                        continue
+                
+                if not html_content:
+                    print(f"[WARNING] Could not decode {file_path}")
+                    return None
+                
+                # Check if this file needs merging
+                if not disable_merging and file_path in merge_candidates:
+                    section_file = merge_candidates[file_path]
+                    print(f"[DEBUG] Processing merge for: {file_path}")
+                    
+                    try:
+                        # Read section file
+                        section_data = zf.read(section_file)
+                        section_html = None
+                        for encoding in ['utf-8', 'utf-16', 'gb18030', 'shift_jis', 'euc-kr', 'gbk', 'big5']:
+                            try:
+                                section_html = section_data.decode(encoding)
+                                break
+                            except UnicodeDecodeError:
+                                continue
+                        
+                        if section_html:
+                            # Quick check if section is small enough to merge
+                            section_soup = BeautifulSoup(section_html, self.parser)
+                            section_text = section_soup.get_text(strip=True)
+                            
+                            if len(section_text) < 200:  # Merge if section is small
+                                # Extract body content
+                                chapter_soup = BeautifulSoup(html_content, self.parser)
+                                
+                                if section_soup.body:
+                                    section_body_content = ''.join(str(child) for child in section_soup.body.children)
+                                else:
+                                    section_body_content = section_html
+                                
+                                if chapter_soup.body:
+                                    chapter_body_content = ''.join(str(child) for child in chapter_soup.body.children)
+                                else:
+                                    chapter_body_content = html_content
+                                
+                                # Merge content
+                                html_content = section_body_content + "\n<hr/>\n" + chapter_body_content
+                                print(f"  → MERGED: Section ({len(section_text)} chars) + Chapter")
+                            else:
+                                print(f"  → NOT MERGED: Section too large ({len(section_text)} chars)")
+                                # Remove from processed files so it gets processed separately
+                                processed_files.discard(section_file)
+                        
+                    except Exception as e:
+                        print(f"[WARNING] Failed to merge {file_path}: {e}")
+                
+                # === ENHANCED EXTRACTION POINT ===
+                # Initialize variables that will be set by extraction
+                content_html = None
+                content_text = None
+                chapter_title = None
+                enhanced_extraction_used = False
+                
+                # Determine whether to use enhanced extractor based on toggle and provider
+                use_enhanced = enhanced_extractor and extraction_mode == "enhanced"
+                force_bs_traditional = False
+                try:
+                    force_bs = os.getenv('FORCE_BS_FOR_TRADITIONAL', '0') == '1'
+                    model_env = os.getenv('MODEL', '')
+                    if force_bs and is_traditional_translation_api(model_env):
+                        use_enhanced = False
+                        force_bs_traditional = True
+                except Exception:
+                    pass
+                
+                # Use enhanced extractor if available and allowed
+                if use_enhanced:
+                    print(f"🚀 Using enhanced extraction for: {os.path.basename(file_path)}")
+                    # Get clean text from html2text
+                    clean_content, _, chapter_title = enhanced_extractor.extract_chapter_content(
+                        html_content, enhanced_filtering
+                    )
+                    enhanced_extraction_used = True
+                    print(f"✅ Enhanced extraction complete: {len(clean_content)} chars")
+                    
+                    # For enhanced mode, store the markdown/plain text
+                    # This will be sent to the translation API as-is
+                    content_html = clean_content  # This is MARKDOWN/PLAIN TEXT from html2text
+                    content_text = clean_content  # Same clean text for analysis
+                
+                # BeautifulSoup method (only for non-enhanced modes)
+                if not enhanced_extraction_used:
+                    if extraction_mode == "enhanced" and not force_bs_traditional:
+                        # Enhanced mode failed - skip this file
+                        print(f"❌ Skipping {file_path} - enhanced extraction required but not available")
+                        return None
+                    # Parse the (possibly merged) content
+                    protected_html = self.protect_angle_brackets_with_korean(html_content)
+                    
+                    # Use lxml parser which handles both HTML and XHTML well
+                    soup = BeautifulSoup(protected_html, self.parser)
+                    
+                    # Get effective mode for filtering
+                    effective_filtering = enhanced_filtering if extraction_mode == "enhanced" else extraction_mode
+                    
+                    # In full mode, keep the entire HTML structure
+                    if effective_filtering == "full":
+                        content_html = html_content  # Keep EVERYTHING
+                        content_text = soup.get_text(strip=True)
+                    else:
+                        # Smart and comprehensive modes extract body content
+                        if soup.body:
+                            content_html = str(soup.body)
+                            content_text = soup.body.get_text(strip=True)
+                        else:
+                            content_html = html_content
+                            content_text = soup.get_text(strip=True)
+                    
+                    # Extract title (with ignore settings support)
+                    chapter_title = None
+                    
+                    # Check ignore settings for batch translation
+                    batch_translate_active = os.getenv('BATCH_TRANSLATE_HEADERS', '0') == '1'
+                    ignore_title_tag = os.getenv('IGNORE_TITLE', '0') == '1' and batch_translate_active
+                    ignore_header_tags = os.getenv('IGNORE_HEADER', '0') == '1' and batch_translate_active
+                    
+                    # Extract from title tag if not ignored
+                    if not ignore_title_tag and soup.title and soup.title.string:
+                        chapter_title = soup.title.string.strip()
+                    
+                    # Extract from header tags if not ignored and no title found
+                    if not chapter_title and not ignore_header_tags:
+                        for header_tag in ['h1', 'h2', 'h3']:
+                            header = soup.find(header_tag)
+                            if header:
+                                chapter_title = header.get_text(strip=True)
+                                break
+                    
+                    # Fallback to filename if nothing found
+                    if not chapter_title:
+                        chapter_title = os.path.splitext(os.path.basename(file_path))[0]
+                
+                # Get the effective extraction mode for processing logic
+                effective_mode = enhanced_filtering if extraction_mode == "enhanced" else extraction_mode
+                
+                # Skip truly empty files in smart mode
+                # BUT: Never skip anything when merging is disabled (to ensure section files are processed)
+                if effective_mode == "smart" and not disable_merging and len(content_text.strip()) < 10:
+                    print(f"[SKIP] Nearly empty file: {file_path} ({len(content_text)} chars)")
+                    return None
+                
+                # Get actual chapter number based on original position
+                actual_chapter_num = files_to_process.index(file_path) + 1
+                
+                # Mode-specific logic
+                if effective_mode == "comprehensive" or effective_mode == "full":
+                    # For comprehensive/full mode, use sequential numbering
+                    chapter_num = actual_chapter_num
+                    
+                    if not chapter_title:
+                        chapter_title = os.path.splitext(os.path.basename(file_path))[0]
+                    
+                    detection_method = f"{extraction_mode}_sequential" if extraction_mode == "enhanced" else f"{effective_mode}_sequential"
+                    
+                elif effective_mode == "smart":
+                    # For smart mode, when merging is disabled, use sequential numbering
+                    if disable_merging:
+                        chapter_num = actual_chapter_num
+                        
+                        if not chapter_title:
+                            chapter_title = os.path.splitext(os.path.basename(file_path))[0]
+                        
+                        detection_method = f"{extraction_mode}_sequential_no_merge" if extraction_mode == "enhanced" else "sequential_no_merge"
+                    else:
+                        # When merging is enabled, try to extract chapter info
+                        protected_html = self.protect_angle_brackets_with_korean(html_content)
+                        soup = BeautifulSoup(protected_html, self.parser)
+                        
+                        # Count headers (thread-safe)
+                        h1_tags = soup.find_all('h1')
+                        h2_tags = soup.find_all('h2')
+                        if h1_tags:
+                            with h1_count_lock:
+                                h1_count += 1
+                        if h2_tags:
+                            with h2_count_lock:
+                                h2_count += 1
+                        
+                        # Try to extract chapter number and title
+                        chapter_num, extracted_title, detection_method = self._extract_chapter_info(
+                            soup, file_path, content_text, html_content
+                        )
+                        
+                        # Use extracted title if we don't have one
+                        if extracted_title and not chapter_title:
+                            chapter_title = extracted_title
+                        
+                        # For hash-based filenames, chapter_num might be None
+                        if chapter_num is None:
+                            chapter_num = actual_chapter_num  # Use actual chapter count
+                            detection_method = f"{extraction_mode}_sequential_fallback" if extraction_mode == "enhanced" else "sequential_fallback"
+                            print(f"[DEBUG] No chapter number found in {file_path}, assigning: {chapter_num}")
+                
+                # Filter content_html for ignore settings (before processing)
+                batch_translate_active = os.getenv('BATCH_TRANSLATE_HEADERS', '0') == '1'
+                ignore_title_tag = os.getenv('IGNORE_TITLE', '0') == '1' and batch_translate_active
+                ignore_header_tags = os.getenv('IGNORE_HEADER', '0') == '1' and batch_translate_active
+                
+                if (ignore_title_tag or ignore_header_tags) and content_html and not enhanced_extraction_used:
+                    # Parse the content HTML to remove ignored tags
+                    content_soup = BeautifulSoup(content_html, self.parser)
+                    
+                    # Remove title tags if ignored
+                    if ignore_title_tag:
+                        for title_tag in content_soup.find_all('title'):
+                            title_tag.decompose()
+                    
+                    # Remove header tags if ignored
+                    if ignore_header_tags:
+                        for header_tag in content_soup.find_all(['h1', 'h2', 'h3']):
+                            header_tag.decompose()
+                    
+                    # Update content_html with filtered version
+                    content_html = str(content_soup)
+                
+                # Process images and metadata (same for all modes)
+                protected_html = self.protect_angle_brackets_with_korean(html_content)
+                soup = BeautifulSoup(protected_html, self.parser)
+                images = soup.find_all('img')
+                has_images = len(images) > 0
+                is_image_only_chapter = has_images and len(content_text.strip()) < 500
+                
+                if is_image_only_chapter:
+                    print(f"[DEBUG] Image-only chapter detected: {file_path} ({len(images)} images, {len(content_text)} chars)")
+                
+                content_hash = ContentProcessor.get_content_hash(content_html)
+                
+                # Collect file size groups for smart mode (thread-safe)
+                if effective_mode == "smart":
+                    file_size = len(content_text)
+                    with file_size_groups_lock:
+                        if file_size not in file_size_groups:
+                            file_size_groups[file_size] = []
+                        file_size_groups[file_size].append(file_path)
+                    
+                    # Collect sample texts (thread-safe)
+                    with sample_texts_lock:
+                        if len(sample_texts) < 5:
+                            sample_texts.append(content_text[:1000])
+                
+                # Ensure chapter_num is always an integer
+                if isinstance(chapter_num, float):
+                    chapter_num = int(chapter_num)
+                
+                # Create chapter info
+                chapter_info = {
+                    "num": chapter_num,  # Now guaranteed to have a value
+                    "title": chapter_title or f"Chapter {chapter_num}",
+                    "body": content_html,
+                    "filename": file_path,
+                    "original_filename": os.path.basename(file_path),
+                    "original_basename": os.path.splitext(os.path.basename(file_path))[0],
+                    "content_hash": content_hash,
+                    "detection_method": detection_method if detection_method else "pending",
+                    "file_size": len(content_text),
+                    "has_images": has_images,
+                    "image_count": len(images),
+                    "is_empty": len(content_text.strip()) == 0,
+                    "is_image_only": is_image_only_chapter,
+                    "extraction_mode": extraction_mode,
+                    "file_index": file_index  # Store original file index for sorting
+                }
+                
+                # Add enhanced extraction info if used
+                if enhanced_extraction_used:
+                    chapter_info["enhanced_extraction"] = True
+                    chapter_info["enhanced_filtering"] = enhanced_filtering
+                    chapter_info["preserve_structure"] = preserve_structure
+                
+                # Add merge info if applicable
+                if not disable_merging and file_path in merge_candidates:
+                    chapter_info["was_merged"] = True
+                    chapter_info["merged_with"] = merge_candidates[file_path]
+                
+                if effective_mode == "smart":
+                    chapter_info["language_sample"] = content_text[:500]
+                    # Debug for section files
+                    if 'section' in chapter_info['original_basename'].lower():
+                        print(f"[DEBUG] Added section file to candidates: {chapter_info['original_basename']} (size: {chapter_info['file_size']})")
+                
+                return chapter_info
+                        
+            except Exception as e:
+                print(f"[ERROR] Failed to process {file_path}: {e}")
+                import traceback
+                traceback.print_exc()
+                return None
+        
+        # Process files in parallel or sequentially based on file count
+        print(f"🚀 Processing {len(files_to_process)} HTML files...")
+        
+        # Initial progress
+        if self.progress_callback:
+            self.progress_callback(f"Processing {len(files_to_process)} chapters...")
+        
+        candidate_chapters = []  # For smart mode
+        chapters_direct = []      # For other modes
+        
+        # Decide whether to use parallel processing
+        use_parallel = len(files_to_process) > 10
+        
+        if use_parallel:
+            # Get worker count from environment variable
+            max_workers = int(os.getenv("EXTRACTION_WORKERS", "2"))
+            print(f"📦 Using parallel processing with {max_workers} workers...")
+            
+            # Process files in parallel
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                # Submit all files for processing
+                future_to_file = {
+                    executor.submit(process_single_html_file, file_path, idx): (file_path, idx)
+                    for idx, file_path in enumerate(files_to_process)
+                }
+                
+                # Collect results as they complete
+                for future in as_completed(future_to_file):
+                    if is_stop_requested():
+                        print("❌ Chapter processing stopped by user")
+                        executor.shutdown(wait=False)
+                        return [], 'unknown'
+                    
+                    try:
+                        chapter_info = future.result()
+                        if chapter_info:
+                            effective_mode = enhanced_filtering if extraction_mode == "enhanced" else extraction_mode
+                            
+                            # For smart mode when merging is enabled, collect candidates
+                            # Otherwise, add directly to chapters
+                            if effective_mode == "smart" and not disable_merging:
+                                candidate_chapters.append(chapter_info)
+                            else:
+                                chapters_direct.append(chapter_info)
+                    except Exception as e:
+                        file_path, idx = future_to_file[future]
+                        print(f"[ERROR] Thread error processing {file_path}: {e}")
+        else:
+            print("📦 Using sequential processing (small file count)...")
+            
+            # Process files sequentially for small EPUBs
+            for idx, file_path in enumerate(files_to_process):
+                if is_stop_requested():
+                    print("❌ Chapter processing stopped by user")
+                    return [], 'unknown'
+                
+                chapter_info = process_single_html_file(file_path, idx)
+                if chapter_info:
+                    effective_mode = enhanced_filtering if extraction_mode == "enhanced" else extraction_mode
+                    
+                    # For smart mode when merging is enabled, collect candidates
+                    # Otherwise, add directly to chapters
+                    if effective_mode == "smart" and not disable_merging:
+                        candidate_chapters.append(chapter_info)
+                    else:
+                        chapters_direct.append(chapter_info)
+        
+        # Final progress update
+        if self.progress_callback:
+            self.progress_callback(f"Chapter processing complete: {len(candidate_chapters) + len(chapters_direct)} chapters")
+        
+        # Sort direct chapters by file index to maintain order
+        chapters_direct.sort(key=lambda x: x["file_index"])
+        
+        # Post-process smart mode candidates (only when merging is enabled)
+        effective_mode = enhanced_filtering if extraction_mode == "enhanced" else extraction_mode
+        if effective_mode == "smart" and candidate_chapters and not disable_merging:
+            # Check stop before post-processing
+            if is_stop_requested():
+                print("❌ Chapter post-processing stopped by user")
+                return chapters, 'unknown'
+                
+            print(f"\n[SMART MODE] Processing {len(candidate_chapters)} candidate files...")
+            
+            # Sort candidates by file index to maintain order
+            candidate_chapters.sort(key=lambda x: x["file_index"])
+            
+            # Debug: Show what files we have
+            section_files = [c for c in candidate_chapters if 'section' in c['original_basename'].lower()]
+            chapter_files = [c for c in candidate_chapters if 'chapter' in c['original_basename'].lower() and 'section' not in c['original_basename'].lower()]
+            other_files = [c for c in candidate_chapters if c not in section_files and c not in chapter_files]
+            
+            print(f"  📊 File breakdown:")
+            print(f"    • Section files: {len(section_files)}")
+            print(f"    • Chapter files: {len(chapter_files)}")
+            print(f"    • Other files: {len(other_files)}")
+            
+            # Original smart mode logic when merging is enabled
+            # First, separate files with detected chapter numbers from those without
+            numbered_chapters = []
+            unnumbered_chapters = []
+            
+            for idx, chapter in enumerate(candidate_chapters):
+                # Yield periodically during categorization (can be disabled for max speed)
+                if idx % 10 == 0 and idx > 0 and os.getenv("ENABLE_GUI_YIELD", "1") == "1":
+                    time.sleep(0.001)
+                    
+                if chapter["num"] is not None:
+                    numbered_chapters.append(chapter)
+                else:
+                    unnumbered_chapters.append(chapter)
+            
+            print(f"  • Files with chapter numbers: {len(numbered_chapters)}")
+            print(f"  • Files without chapter numbers: {len(unnumbered_chapters)}")
+            
+            # Check if we have hash-based filenames (no numbered chapters found)
+            if not numbered_chapters and unnumbered_chapters:
+                print("  ⚠️ No chapter numbers found - likely hash-based filenames")
+                print("  → Using file order as chapter sequence")
+                
+                # Sort by file index to maintain order
+                unnumbered_chapters.sort(key=lambda x: x["file_index"])
+                
+                # Assign sequential numbers
+                for i, chapter in enumerate(unnumbered_chapters, 1):
+                    chapter["num"] = i
+                    chapter["detection_method"] = f"{extraction_mode}_hash_filename_sequential" if extraction_mode == "enhanced" else "hash_filename_sequential"
+                    if not chapter["title"] or chapter["title"] == chapter["original_basename"]:
+                        chapter["title"] = f"Chapter {i}"
+                
+                chapters = unnumbered_chapters
+            else:
+                # We have some numbered chapters
+                chapters = numbered_chapters
+                
+                # For unnumbered files, check if they might be duplicates or appendices
+                if unnumbered_chapters:
+                    print(f"  → Analyzing {len(unnumbered_chapters)} unnumbered files...")
+                    
+                    # Get the max chapter number
+                    max_num = max(c["num"] for c in numbered_chapters)
+                    
+                    # Check each unnumbered file
+                    for chapter in unnumbered_chapters:
+                        # Check stop in post-processing loop
+                        if is_stop_requested():
+                            print("❌ Chapter post-processing stopped by user")
+                            return chapters, 'unknown'
+                            
+                        # Check if it's very small (might be a separator or note)
+                        if chapter["file_size"] < 200:
+                            print(f"    [SKIP] Very small file: {chapter['filename']} ({chapter['file_size']} chars)")
+                            continue
+                        
+                        # Check if it has similar size to existing chapters (might be duplicate)
+                        size = chapter["file_size"]
+                        similar_chapters = [c for c in numbered_chapters 
+                                          if abs(c["file_size"] - size) < 50]
+                        
+                        if similar_chapters:
+                            # Might be a duplicate, skip it
+                            print(f"    [SKIP] Possible duplicate: {chapter['filename']} (similar size to {len(similar_chapters)} chapters)")
+                            continue
+                        
+                        # Otherwise, add as appendix
+                        max_num += 1
+                        chapter["num"] = max_num
+                        chapter["detection_method"] = f"{extraction_mode}_appendix_sequential" if extraction_mode == "enhanced" else "appendix_sequential"
+                        if not chapter["title"] or chapter["title"] == chapter["original_basename"]:
+                            chapter["title"] = f"Appendix {max_num}"
+                        chapters.append(chapter)
+                        print(f"    [ADD] Added as chapter {max_num}: {chapter['filename']}")
+        else:
+            # For other modes or smart mode with merging disabled
+            chapters = chapters_direct
+        
+        # Sort chapters by number
+        chapters.sort(key=lambda x: x["num"])
+        
+        # Ensure chapter numbers are integers
+        # When merging is disabled, all chapters should have integer numbers anyway
+        for chapter in chapters:
+            if isinstance(chapter["num"], float):
+                chapter["num"] = int(chapter["num"])
+        
+        # Final validation
+        if chapters:
+            print(f"\n✅ Final chapter count: {len(chapters)}")
+            print(f"   • Chapter range: {chapters[0]['num']} - {chapters[-1]['num']}")
+            
+            # Enhanced mode summary
+            if extraction_mode == "enhanced":
+                enhanced_count = sum(1 for c in chapters if c.get('enhanced_extraction', False))
+                print(f"   🚀 Enhanced extraction used: {enhanced_count}/{len(chapters)} chapters")
+            
+            # Check for gaps
+            chapter_nums = [c["num"] for c in chapters]
+            expected_nums = list(range(min(chapter_nums), max(chapter_nums) + 1))
+            missing = set(expected_nums) - set(chapter_nums)
+            if missing:
+                print(f"   ⚠️ Missing chapter numbers: {sorted(missing)}")
+        
+        # Language detection
+        combined_sample = ' '.join(sample_texts) if effective_mode == "smart" else ''
+        detected_language = self._detect_content_language(combined_sample) if combined_sample else 'unknown'
+        
+        if chapters:
+            self._print_extraction_summary(chapters, detected_language, extraction_mode, 
+                                         h1_count if effective_mode == "smart" else 0, 
+                                         h2_count if effective_mode == "smart" else 0,
+                                         file_size_groups if effective_mode == "smart" else {})
+        
+        return chapters, detected_language
+    
+    def _extract_chapter_info(self, soup, file_path, content_text, html_content):
+        """Extract chapter number and title from various sources with parallel pattern matching"""
+        chapter_num = None
+        chapter_title = None
+        detection_method = None
+        
+        # SPECIAL HANDLING: When we have Section/Chapter pairs, differentiate them
+        filename = os.path.basename(file_path)
+        
+        # Handle different naming patterns for Section/Chapter files
+        if ('section' in filename.lower() or '_section' in filename.lower()) and 'chapter' not in filename.lower():
+            # For Section files, add 0.1 to the base number
+            # Try different patterns
+            match = re.search(r'No(\d+)', filename)
+            if not match:
+                match = re.search(r'^(\d+)[_\-]', filename)
+            if not match:
+                match = re.search(r'^(\d+)', filename)
+                
+            if match:
+                base_num = int(match.group(1))
+                chapter_num = base_num + 0.1  # Section gets .1
+                detection_method = "filename_section_special"
+                
+        elif ('chapter' in filename.lower() or '_chapter' in filename.lower()) and 'section' not in filename.lower():
+            # For Chapter files, use the base number
+            # Try different patterns
+            match = re.search(r'No(\d+)', filename)
+            if not match:
+                match = re.search(r'^(\d+)[_\-]', filename)
+            if not match:
+                match = re.search(r'^(\d+)', filename)
+                
+            if match:
+                chapter_num = int(match.group(1))
+                detection_method = "filename_chapter_special"
+        
+        # If not handled by special logic, continue with normal extraction
+        if not chapter_num:
+            # Try filename first - use parallel pattern matching for better performance
+            chapter_patterns = [(pattern, flags, method) for pattern, flags, method in self.pattern_manager.CHAPTER_PATTERNS 
+                              if method.endswith('_number')]
+            
+            if len(chapter_patterns) > 3:  # Only parallelize if we have enough patterns
+                # Parallel pattern matching for filename
+                with ThreadPoolExecutor(max_workers=min(4, len(chapter_patterns))) as executor:
+                    def try_pattern(pattern_info):
+                        pattern, flags, method = pattern_info
+                        match = re.search(pattern, file_path, flags)
+                        if match:
+                            try:
+                                num_str = match.group(1)
+                                if num_str.isdigit():
+                                    return int(num_str), f"filename_{method}"
+                                elif method == 'chinese_chapter_cn':
+                                    converted = self._convert_chinese_number(num_str)
+                                    if converted:
+                                        return converted, f"filename_{method}"
+                            except (ValueError, IndexError):
+                                pass
+                        return None, None
+                    
+                    # Submit all patterns
+                    futures = [executor.submit(try_pattern, pattern_info) for pattern_info in chapter_patterns]
+                    
+                    # Check results as they complete
+                    for future in as_completed(futures):
+                        try:
+                            num, method = future.result()
+                            if num:
+                                chapter_num = num
+                                detection_method = method
+                                # Cancel remaining futures
+                                for f in futures:
+                                    f.cancel()
+                                break
+                        except Exception:
+                            continue
+            else:
+                # Sequential processing for small pattern sets
+                for pattern, flags, method in chapter_patterns:
+                    match = re.search(pattern, file_path, flags)
+                    if match:
+                        try:
+                            num_str = match.group(1)
+                            if num_str.isdigit():
+                                chapter_num = int(num_str)
+                                detection_method = f"filename_{method}"
+                                break
+                            elif method == 'chinese_chapter_cn':
+                                converted = self._convert_chinese_number(num_str)
+                                if converted:
+                                    chapter_num = converted
+                                    detection_method = f"filename_{method}"
+                                    break
+                        except (ValueError, IndexError):
+                            continue
+        
+        # Try content if not found in filename
+        if not chapter_num:
+            # Check ignore settings for batch translation
+            batch_translate_active = os.getenv('BATCH_TRANSLATE_HEADERS', '0') == '1'
+            ignore_title_tag = os.getenv('IGNORE_TITLE', '0') == '1' and batch_translate_active
+            ignore_header_tags = os.getenv('IGNORE_HEADER', '0') == '1' and batch_translate_active
+            
+            # Prepare all text sources to check in parallel
+            text_sources = []
+            
+            # Add title tag if not ignored
+            if not ignore_title_tag and soup.title and soup.title.string:
+                title_text = soup.title.string.strip()
+                text_sources.append(("title", title_text, True))  # True means this can be chapter_title
+            
+            # Add headers if not ignored
+            if not ignore_header_tags:
+                for header_tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
+                    headers = soup.find_all(header_tag)
+                    for header in headers[:3]:  # Limit to first 3 of each type
+                        header_text = header.get_text(strip=True)
+                        if header_text:
+                            text_sources.append((f"header_{header_tag}", header_text, True))
+            
+            # Add first paragraphs
+            first_elements = soup.find_all(['p', 'div'])[:5]
+            for elem in first_elements:
+                elem_text = elem.get_text(strip=True)
+                if elem_text:
+                    text_sources.append(("content", elem_text, False))  # False means don't use as chapter_title
+            
+            # Process text sources in parallel if we have many
+            if len(text_sources) > 5:
+                with ThreadPoolExecutor(max_workers=min(6, len(text_sources))) as executor:
+                    def extract_from_source(source_info):
+                        source_type, text, can_be_title = source_info
+                        num, method = self._extract_from_text(text, source_type)
+                        return num, method, text if (num and can_be_title) else None
+                    
+                    # Submit all text sources
+                    future_to_source = {executor.submit(extract_from_source, source): source 
+                                      for source in text_sources}
+                    
+                    # Process results as they complete
+                    for future in as_completed(future_to_source):
+                        try:
+                            num, method, title = future.result()
+                            if num:
+                                chapter_num = num
+                                detection_method = method
+                                if title and not chapter_title:
+                                    chapter_title = title
+                                # Cancel remaining futures
+                                for f in future_to_source:
+                                    f.cancel()
+                                break
+                        except Exception:
+                            continue
+            else:
+                # Sequential processing for small text sets
+                for source_type, text, can_be_title in text_sources:
+                    num, method = self._extract_from_text(text, source_type)
+                    if num:
+                        chapter_num = num
+                        detection_method = method
+                        if can_be_title and not chapter_title:
+                            chapter_title = text
+                        break
+            
+            # Final fallback to filename patterns
+            if not chapter_num:
+                filename_base = os.path.basename(file_path)
+                # Parallel pattern matching for filename extraction
+                if len(self.pattern_manager.FILENAME_EXTRACT_PATTERNS) > 3:
+                    with ThreadPoolExecutor(max_workers=min(4, len(self.pattern_manager.FILENAME_EXTRACT_PATTERNS))) as executor:
+                        def try_filename_pattern(pattern):
+                            match = re.search(pattern, filename_base, re.IGNORECASE)
+                            if match:
+                                try:
+                                    return int(match.group(1))
+                                except (ValueError, IndexError):
+                                    pass
+                            return None
+                        
+                        futures = [executor.submit(try_filename_pattern, pattern) 
+                                 for pattern in self.pattern_manager.FILENAME_EXTRACT_PATTERNS]
+                        
+                        for future in as_completed(futures):
+                            try:
+                                num = future.result()
+                                if num:
+                                    chapter_num = num
+                                    detection_method = "filename_number"
+                                    for f in futures:
+                                        f.cancel()
+                                    break
+                            except Exception:
+                                continue
+                else:
+                    # Sequential for small pattern sets
+                    for pattern in self.pattern_manager.FILENAME_EXTRACT_PATTERNS:
+                        match = re.search(pattern, filename_base, re.IGNORECASE)
+                        if match:
+                            chapter_num = int(match.group(1))
+                            detection_method = "filename_number"
+                            break
+        
+        # Extract title if not already found (with ignore settings support)
+        if not chapter_title:
+            # Check ignore settings for batch translation
+            batch_translate_active = os.getenv('BATCH_TRANSLATE_HEADERS', '0') == '1'
+            ignore_title_tag = os.getenv('IGNORE_TITLE', '0') == '1' and batch_translate_active
+            ignore_header_tags = os.getenv('IGNORE_HEADER', '0') == '1' and batch_translate_active
+            
+            # Try title tag if not ignored
+            if not ignore_title_tag and soup.title and soup.title.string:
+                chapter_title = soup.title.string.strip()
+            
+            # Try header tags if not ignored and no title found
+            if not chapter_title and not ignore_header_tags:
+                for header_tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
+                    header = soup.find(header_tag)
+                    if header:
+                        chapter_title = header.get_text(strip=True)
+                        break
+            
+            # Final fallback
+            if not chapter_title:
+                chapter_title = f"Chapter {chapter_num}" if chapter_num else None
+        
+        chapter_title = re.sub(r'\s+', ' ', chapter_title).strip() if chapter_title else None
+        
+        return chapter_num, chapter_title, detection_method
+
+
+    def _extract_from_text(self, text, source_type):
+        """Extract chapter number from text using patterns with parallel matching for large pattern sets"""
+        # Get patterns that don't end with '_number'
+        text_patterns = [(pattern, flags, method) for pattern, flags, method in self.pattern_manager.CHAPTER_PATTERNS 
+                        if not method.endswith('_number')]
+        
+        # Only use parallel processing if we have many patterns
+        if len(text_patterns) > 5:
+            with ThreadPoolExecutor(max_workers=min(4, len(text_patterns))) as executor:
+                def try_text_pattern(pattern_info):
+                    pattern, flags, method = pattern_info
+                    match = re.search(pattern, text, flags)
+                    if match:
+                        try:
+                            num_str = match.group(1)
+                            if num_str.isdigit():
+                                return int(num_str), f"{source_type}_{method}"
+                            elif method == 'chinese_chapter_cn':
+                                converted = self._convert_chinese_number(num_str)
+                                if converted:
+                                    return converted, f"{source_type}_{method}"
+                        except (ValueError, IndexError):
+                            pass
+                    return None, None
+                
+                # Submit all patterns
+                futures = [executor.submit(try_text_pattern, pattern_info) for pattern_info in text_patterns]
+                
+                # Check results as they complete
+                for future in as_completed(futures):
+                    try:
+                        num, method = future.result()
+                        if num:
+                            # Cancel remaining futures
+                            for f in futures:
+                                f.cancel()
+                            return num, method
+                    except Exception:
+                        continue
+        else:
+            # Sequential processing for small pattern sets
+            for pattern, flags, method in text_patterns:
+                match = re.search(pattern, text, flags)
+                if match:
+                    try:
+                        num_str = match.group(1)
+                        if num_str.isdigit():
+                            return int(num_str), f"{source_type}_{method}"
+                        elif method == 'chinese_chapter_cn':
+                            converted = self._convert_chinese_number(num_str)
+                            if converted:
+                                return converted, f"{source_type}_{method}"
+                    except (ValueError, IndexError):
+                        continue
+        
+        return None, None
+    
+    def _convert_chinese_number(self, cn_num):
+        """Convert Chinese number to integer"""
+        if cn_num in self.pattern_manager.CHINESE_NUMS:
+            return self.pattern_manager.CHINESE_NUMS[cn_num]
+        
+        if '十' in cn_num:
+            parts = cn_num.split('十')
+            if len(parts) == 2:
+                tens = self.pattern_manager.CHINESE_NUMS.get(parts[0], 1) if parts[0] else 1
+                ones = self.pattern_manager.CHINESE_NUMS.get(parts[1], 0) if parts[1] else 0
+                return tens * 10 + ones
+        
+        return None
+    
+    def _detect_content_language(self, text_sample):
+        """Detect the primary language of content with parallel processing for large texts"""
+        
+        # For very short texts, use sequential processing
+        if len(text_sample) < 1000:
+            scripts = {
+                'korean': 0,
+                'japanese_hiragana': 0,
+                'japanese_katakana': 0,
+                'chinese': 0,
+                'latin': 0
+            }
+            
+            for char in text_sample:
+                code = ord(char)
+                if 0xAC00 <= code <= 0xD7AF:
+                    scripts['korean'] += 1
+                elif 0x3040 <= code <= 0x309F:
+                    scripts['japanese_hiragana'] += 1
+                elif 0x30A0 <= code <= 0x30FF:
+                    scripts['japanese_katakana'] += 1
+                elif 0x4E00 <= code <= 0x9FFF:
+                    scripts['chinese'] += 1
+                elif 0x0020 <= code <= 0x007F:
+                    scripts['latin'] += 1
+        else:
+            # For longer texts, use parallel processing
+            # Split text into chunks for parallel processing
+            chunk_size = max(500, len(text_sample) // (os.cpu_count() or 4))
+            chunks = [text_sample[i:i + chunk_size] for i in range(0, len(text_sample), chunk_size)]
+            
+            # Thread-safe accumulator
+            scripts_lock = threading.Lock()
+            scripts = {
+                'korean': 0,
+                'japanese_hiragana': 0,
+                'japanese_katakana': 0,
+                'chinese': 0,
+                'latin': 0
+            }
+            
+            def process_chunk(text_chunk):
+                """Process a chunk of text and return script counts"""
+                local_scripts = {
+                    'korean': 0,
+                    'japanese_hiragana': 0,
+                    'japanese_katakana': 0,
+                    'chinese': 0,
+                    'latin': 0
+                }
+                
+                for char in text_chunk:
+                    code = ord(char)
+                    if 0xAC00 <= code <= 0xD7AF:
+                        local_scripts['korean'] += 1
+                    elif 0x3040 <= code <= 0x309F:
+                        local_scripts['japanese_hiragana'] += 1
+                    elif 0x30A0 <= code <= 0x30FF:
+                        local_scripts['japanese_katakana'] += 1
+                    elif 0x4E00 <= code <= 0x9FFF:
+                        local_scripts['chinese'] += 1
+                    elif 0x0020 <= code <= 0x007F:
+                        local_scripts['latin'] += 1
+                
+                return local_scripts
+            
+            # Process chunks in parallel
+            with ThreadPoolExecutor(max_workers=min(os.cpu_count() or 4, len(chunks))) as executor:
+                # Submit all chunks
+                futures = [executor.submit(process_chunk, chunk) for chunk in chunks]
+                
+                # Collect results
+                for future in as_completed(futures):
+                    try:
+                        chunk_scripts = future.result()
+                        # Thread-safe accumulation
+                        with scripts_lock:
+                            for script, count in chunk_scripts.items():
+                                scripts[script] += count
+                    except Exception as e:
+                        print(f"[WARNING] Error processing chunk in language detection: {e}")
+        
+        # Language determination logic (same as original)
+        total_cjk = scripts['korean'] + scripts['japanese_hiragana'] + scripts['japanese_katakana'] + scripts['chinese']
+        
+        if scripts['korean'] > total_cjk * 0.3:
+            return 'korean'
+        elif scripts['japanese_hiragana'] + scripts['japanese_katakana'] > total_cjk * 0.2:
+            return 'japanese'
+        elif scripts['chinese'] > total_cjk * 0.3:
+            return 'chinese'
+        elif scripts['latin'] > len(text_sample) * 0.7:
+            return 'english'
+        else:
+            return 'unknown'
+    
+    def _print_extraction_summary(self, chapters, detected_language, extraction_mode, h1_count, h2_count, file_size_groups):
+        """Print extraction summary"""
+        print(f"\n📊 Chapter Extraction Summary ({extraction_mode.capitalize()} Mode):")
+        print(f"   • Total chapters extracted: {len(chapters)}")
+        
+        # Format chapter range handling both int and float
+        first_num = chapters[0]['num']
+        last_num = chapters[-1]['num']
+        
+        print(f"   • Chapter range: {first_num} to {last_num}")
+        print(f"   • Detected language: {detected_language}")
+        
+        if extraction_mode == "smart":
+            print(f"   • Primary header type: {'<h2>' if h2_count > h1_count else '<h1>'}")
+        
+        image_only_count = sum(1 for c in chapters if c.get('is_image_only', False))
+        text_only_count = sum(1 for c in chapters if not c.get('has_images', False) and c.get('file_size', 0) >= 500)
+        mixed_count = sum(1 for c in chapters if c.get('has_images', False) and c.get('file_size', 0) >= 500)
+        empty_count = sum(1 for c in chapters if c.get('file_size', 0) < 50)
+        
+        print(f"   • Text-only chapters: {text_only_count}")
+        print(f"   • Image-only chapters: {image_only_count}")
+        print(f"   • Mixed content chapters: {mixed_count}")
+        print(f"   • Empty/minimal content: {empty_count}")
+        
+        # Check for merged chapters
+        merged_count = sum(1 for c in chapters if c.get('was_merged', False))
+        if merged_count > 0:
+            print(f"   • Merged chapters: {merged_count}")
+        
+        # Check for missing chapters (only for integer sequences)
+        expected_chapters = set(range(chapters[0]['num'], chapters[-1]['num'] + 1))
+        actual_chapters = set(c['num'] for c in chapters)
+        missing = expected_chapters - actual_chapters
+        if missing:
+            print(f"   ⚠️ Missing chapter numbers: {sorted(missing)}")
+        
+        if extraction_mode == "smart":
+            method_stats = Counter(c['detection_method'] for c in chapters)
+            print(f"   📈 Detection methods used:")
+            for method, count in method_stats.most_common():
+                print(f"      • {method}: {count} chapters")
+            
+            large_groups = [size for size, files in file_size_groups.items() if len(files) > 1]
+            if large_groups:
+                print(f"   ⚠️ Found {len(large_groups)} file size groups with potential duplicates")
+        else:
+            print(f"   • Empty/placeholder: {empty_count}")
+            
+        if extraction_mode == "full":
+            print(f"   🔍 Full extraction preserved all HTML structure and tags")
+    
+    def _extract_epub_metadata(self, zf):
+        """Extract comprehensive metadata from EPUB file including all custom fields"""
+        meta = {}
+        # Use lxml for XML if available
+        xml_parser = 'lxml-xml' if self.parser == 'lxml' else 'xml'
+        try:
+            for name in zf.namelist():
+                if name.lower().endswith('.opf'):
+                    opf_content = zf.read(name)
+                    soup = BeautifulSoup(opf_content, xml_parser)
+                    
+                    # Extract ALL Dublin Core elements (expanded list)
+                    dc_elements = ['title', 'creator', 'subject', 'description', 
+                                  'publisher', 'contributor', 'date', 'type', 
+                                  'format', 'identifier', 'source', 'language', 
+                                  'relation', 'coverage', 'rights']
+                    
+                    for element in dc_elements:
+                        tag = soup.find(element)
+                        if tag and tag.get_text(strip=True):
+                            meta[element] = tag.get_text(strip=True)
+                    
+                    # Extract ALL meta tags (not just series)
+                    meta_tags = soup.find_all('meta')
+                    for meta_tag in meta_tags:
+                        # Try different attribute names for the metadata name
+                        name = meta_tag.get('name') or meta_tag.get('property', '')
+                        content = meta_tag.get('content', '')
+                        
+                        if name and content:
+                            # Store original name for debugging
+                            original_name = name
+                            
+                            # Clean up common prefixes
+                            if name.startswith('calibre:'):
+                                name = name[8:]  # Remove 'calibre:' prefix
+                            elif name.startswith('dc:'):
+                                name = name[3:]  # Remove 'dc:' prefix
+                            elif name.startswith('opf:'):
+                                name = name[4:]  # Remove 'opf:' prefix
+                            
+                            # Normalize the field name - replace hyphens with underscores
+                            name = name.replace('-', '_')
+                            
+                            # Don't overwrite if already exists (prefer direct tags over meta tags)
+                            if name not in meta:
+                                meta[name] = content
+                                
+                                # Debug output for custom fields
+                                if original_name != name:
+                                    print(f"   • Found custom field: {original_name} → {name}")
+                    
+                    # Special handling for series information (maintain compatibility)
+                    if 'series' not in meta:
+                        series_tags = soup.find_all('meta', attrs={'name': lambda x: x and 'series' in x.lower()})
+                        for series_tag in series_tags:
+                            series_name = series_tag.get('content', '')
+                            if series_name:
+                                meta['series'] = series_name
+                                break
+                    
+                    # Extract refines metadata (used by some EPUB creators)
+                    refines_metas = soup.find_all('meta', attrs={'refines': True})
+                    for refine in refines_metas:
+                        property_name = refine.get('property', '')
+                        content = refine.get_text(strip=True) or refine.get('content', '')
+                        
+                        if property_name and content:
+                            # Clean property name
+                            if ':' in property_name:
+                                property_name = property_name.split(':')[-1]
+                            property_name = property_name.replace('-', '_')
+                            
+                            if property_name not in meta:
+                                meta[property_name] = content
+                    
+                    # Log extraction summary
+                    print(f"📋 Extracted {len(meta)} metadata fields")
+                    
+                    # Show standard vs custom fields
+                    standard_keys = {'title', 'creator', 'language', 'subject', 'description', 
+                                   'publisher', 'date', 'identifier', 'source', 'rights', 
+                                   'contributor', 'type', 'format', 'relation', 'coverage'}
+                    custom_keys = set(meta.keys()) - standard_keys
+                    
+                    if custom_keys:
+                        print(f"📋 Standard fields: {len(standard_keys & set(meta.keys()))}")
+                        print(f"📋 Custom fields found: {sorted(custom_keys)}")
+                        
+                        # Show sample values for custom fields (truncated)
+                        for key in sorted(custom_keys)[:5]:  # Show first 5 custom fields
+                            value = str(meta[key])
+                            if len(value) > 50:
+                                value = value[:47] + "..."
+                            print(f"   • {key}: {value}")
+                        
+                        if len(custom_keys) > 5:
+                            print(f"   • ... and {len(custom_keys) - 5} more custom fields")
+                    
+                    break
+                    
+        except Exception as e:
+            print(f"[WARNING] Failed to extract metadata: {e}")
+            import traceback
+            traceback.print_exc()
+        
+        return meta
+    
+    def _categorize_resource(self, file_path, file_name):
+        """Categorize a file and return (resource_type, target_dir, safe_filename)"""
+        file_path_lower = file_path.lower()
+        file_name_lower = file_name.lower()
+        
+        if file_path_lower.endswith('.css'):
+            return 'css', 'css', sanitize_resource_filename(file_name)
+        elif file_path_lower.endswith(('.ttf', '.otf', '.woff', '.woff2', '.eot')):
+            return 'fonts', 'fonts', sanitize_resource_filename(file_name)
+        elif file_path_lower.endswith(('.jpg', '.jpeg', '.png', '.gif', '.svg', '.bmp', '.webp')):
+            return 'images', 'images', sanitize_resource_filename(file_name)
+        elif (file_path_lower.endswith(('.opf', '.ncx')) or 
+              file_name_lower == 'container.xml' or
+              'container.xml' in file_path_lower):
+            if 'container.xml' in file_path_lower:
+                safe_filename = 'container.xml'
+            else:
+                safe_filename = file_name
+            return 'epub_structure', None, safe_filename
+        elif file_path_lower.endswith(('.js', '.xml', '.txt')):
+            return 'other', None, sanitize_resource_filename(file_name)
+        
+        return None
+    
+    def _cleanup_old_resources(self, output_dir):
+        """Clean up old resource directories and EPUB structure files"""
+        print("🧹 Cleaning up any existing resource directories...")
+        
+        cleanup_success = True
+        
+        for resource_type in ['css', 'fonts', 'images']:
+            resource_dir = os.path.join(output_dir, resource_type)
+            if os.path.exists(resource_dir):
+                try:
+                    shutil.rmtree(resource_dir)
+                    print(f"   🗑️ Removed old {resource_type} directory")
+                except PermissionError as e:
+                    print(f"   ⚠️ Cannot remove {resource_type} directory (permission denied) - will merge with existing files")
+                    cleanup_success = False
+                except Exception as e:
+                    print(f"   ⚠️ Error removing {resource_type} directory: {e} - will merge with existing files")
+                    cleanup_success = False
+        
+        epub_structure_files = ['container.xml', 'content.opf', 'toc.ncx']
+        for epub_file in epub_structure_files:
+            input_path = os.path.join(output_dir, epub_file)
+            if os.path.exists(input_path):
+                try:
+                    os.remove(input_path)
+                    print(f"   🗑️ Removed old {epub_file}")
+                except PermissionError:
+                    print(f"   ⚠️ Cannot remove {epub_file} (permission denied) - will use existing file")
+                except Exception as e:
+                    print(f"   ⚠️ Error removing {epub_file}: {e}")
+        
+        try:
+            for file in os.listdir(output_dir):
+                if file.lower().endswith(('.opf', '.ncx')):
+                    file_path = os.path.join(output_dir, file)
+                    try:
+                        os.remove(file_path)
+                        print(f"   🗑️ Removed old EPUB file: {file}")
+                    except PermissionError:
+                        print(f"   ⚠️ Cannot remove {file} (permission denied)")
+                    except Exception as e:
+                        print(f"   ⚠️ Error removing {file}: {e}")
+        except Exception as e:
+            print(f"⚠️ Error scanning for EPUB files: {e}")
+        
+        if not cleanup_success:
+            print("⚠️ Some cleanup operations failed due to file permissions")
+            print("   The program will continue and merge with existing files")
+        
+        return cleanup_success
+    
+    def _count_existing_resources(self, output_dir, extracted_resources):
+        """Count existing resources when skipping extraction"""
+        for resource_type in ['css', 'fonts', 'images', 'epub_structure']:
+            if resource_type == 'epub_structure':
+                epub_files = []
+                for file in ['container.xml', 'content.opf', 'toc.ncx']:
+                    if os.path.exists(os.path.join(output_dir, file)):
+                        epub_files.append(file)
+                try:
+                    for file in os.listdir(output_dir):
+                        if file.lower().endswith(('.opf', '.ncx')) and file not in epub_files:
+                            epub_files.append(file)
+                except:
+                    pass
+                extracted_resources[resource_type] = epub_files
+            else:
+                resource_dir = os.path.join(output_dir, resource_type)
+                if os.path.exists(resource_dir):
+                    try:
+                        files = [f for f in os.listdir(resource_dir) if os.path.isfile(os.path.join(resource_dir, f))]
+                        extracted_resources[resource_type] = files
+                    except:
+                        extracted_resources[resource_type] = []
+        
+        total_existing = sum(len(files) for files in extracted_resources.values())
+        print(f"✅ Found {total_existing} existing resource files")
+        return extracted_resources
+    
+    def _validate_critical_files(self, output_dir, extracted_resources):
+        """Validate that critical EPUB files were extracted"""
+        total_extracted = sum(len(files) for files in extracted_resources.values())
+        print(f"✅ Extracted {total_extracted} resource files:")
+        
+        for resource_type, files in extracted_resources.items():
+            if files:
+                if resource_type == 'epub_structure':
+                    print(f"   • EPUB Structure: {len(files)} files")
+                    for file in files:
+                        print(f"     - {file}")
+                else:
+                    print(f"   • {resource_type.title()}: {len(files)} files")
+        
+        critical_files = ['container.xml']
+        missing_critical = [f for f in critical_files if not os.path.exists(os.path.join(output_dir, f))]
+        
+        if missing_critical:
+            print(f"⚠️ WARNING: Missing critical EPUB files: {missing_critical}")
+            print("   This may prevent proper EPUB reconstruction!")
+        else:
+            print("✅ All critical EPUB structure files extracted successfully")
+        
+        opf_files = [f for f in extracted_resources['epub_structure'] if f.lower().endswith('.opf')]
+        if not opf_files:
+            print("⚠️ WARNING: No OPF file found! This will prevent EPUB reconstruction.")
+        else:
+            print(f"✅ Found OPF file(s): {opf_files}")
+    
+    def _create_extraction_report(self, output_dir, metadata, chapters, extracted_resources):
+        """Create comprehensive extraction report with HTML file tracking"""
+        report_path = os.path.join(output_dir, 'extraction_report.txt')
+        with open(report_path, 'w', encoding='utf-8') as f:
+            f.write("EPUB Extraction Report\n")
+            f.write("=" * 50 + "\n\n")
+            
+            f.write(f"EXTRACTION MODE: {metadata.get('extraction_mode', 'unknown').upper()}\n\n")
+            
+            f.write("METADATA:\n")
+            for key, value in metadata.items():
+                if key not in ['chapter_titles', 'extracted_resources', 'extraction_mode']:
+                    f.write(f"  {key}: {value}\n")
+            
+            f.write(f"\nCHAPTERS ({len(chapters)}):\n")
+            
+            text_chapters = []
+            image_only_chapters = []
+            mixed_chapters = []
+            
+            for chapter in chapters:
+                if chapter.get('has_images') and chapter.get('file_size', 0) < 500:
+                    image_only_chapters.append(chapter)
+                elif chapter.get('has_images') and chapter.get('file_size', 0) >= 500:
+                    mixed_chapters.append(chapter)
+                else:
+                    text_chapters.append(chapter)
+            
+            if text_chapters:
+                f.write(f"\n  TEXT CHAPTERS ({len(text_chapters)}):\n")
+                for c in text_chapters:
+                    f.write(f"    {c['num']:3d}. {c['title']} ({c['detection_method']})\n")
+                    if c.get('original_html_file'):
+                        f.write(f"         → {c['original_html_file']}\n")
+            
+            if image_only_chapters:
+                f.write(f"\n  IMAGE-ONLY CHAPTERS ({len(image_only_chapters)}):\n")
+                for c in image_only_chapters:
+                    f.write(f"    {c['num']:3d}. {c['title']} (images: {c.get('image_count', 0)})\n")
+                    if c.get('original_html_file'):
+                        f.write(f"         → {c['original_html_file']}\n")
+                    if 'body' in c:
+                        try:
+                            soup = BeautifulSoup(c['body'], 'html.parser')
+                            images = soup.find_all('img')
+                            for img in images[:3]:
+                                src = img.get('src', 'unknown')
+                                f.write(f"         • Image: {src}\n")
+                            if len(images) > 3:
+                                f.write(f"         • ... and {len(images) - 3} more images\n")
+                        except:
+                            pass
+            
+            if mixed_chapters:
+                f.write(f"\n  MIXED CONTENT CHAPTERS ({len(mixed_chapters)}):\n")
+                for c in mixed_chapters:
+                    f.write(f"    {c['num']:3d}. {c['title']} (text: {c.get('file_size', 0)} chars, images: {c.get('image_count', 0)})\n")
+                    if c.get('original_html_file'):
+                        f.write(f"         → {c['original_html_file']}\n")
+            
+            f.write(f"\nRESOURCES EXTRACTED:\n")
+            for resource_type, files in extracted_resources.items():
+                if files:
+                    if resource_type == 'epub_structure':
+                        f.write(f"  EPUB Structure: {len(files)} files\n")
+                        for file in files:
+                            f.write(f"    - {file}\n")
+                    else:
+                        f.write(f"  {resource_type.title()}: {len(files)} files\n")
+                        for file in files[:5]:
+                            f.write(f"    - {file}\n")
+                        if len(files) > 5:
+                            f.write(f"    ... and {len(files) - 5} more\n")
+            
+            f.write(f"\nHTML FILES WRITTEN:\n")
+            html_files_written = metadata.get('html_files_written', 0)
+            f.write(f"  Total: {html_files_written} files\n")
+            f.write(f"  Location: Main directory and 'originals' subdirectory\n")
+            
+            f.write(f"\nPOTENTIAL ISSUES:\n")
+            issues = []
+            
+            if image_only_chapters:
+                issues.append(f"  • {len(image_only_chapters)} chapters contain only images (may need OCR)")
+            
+            missing_html = sum(1 for c in chapters if not c.get('original_html_file'))
+            if missing_html > 0:
+                issues.append(f"  • {missing_html} chapters failed to write HTML files")
+            
+            if not extracted_resources.get('epub_structure'):
+                issues.append("  • No EPUB structure files found (may affect reconstruction)")
+            
+            if not issues:
+                f.write("  None detected - extraction appears successful!\n")
+            else:
+                for issue in issues:
+                    f.write(issue + "\n")
+        
+        print(f"📄 Saved extraction report to: {report_path}")
+    
+    def _log_extraction_summary(self, chapters, extracted_resources, detected_language, html_files_written=0):
+        """Log final extraction summary with HTML file information"""
+        extraction_mode = chapters[0].get('extraction_mode', 'unknown') if chapters else 'unknown'
+        
+        print(f"\n✅ {extraction_mode.capitalize()} extraction complete!")
+        print(f"   📚 Chapters: {len(chapters)}")
+        print(f"   📄 HTML files written: {html_files_written}")
+        print(f"   🎨 Resources: {sum(len(files) for files in extracted_resources.values())}")
+        print(f"   🌍 Language: {detected_language}")
+        
+        image_only_count = sum(1 for c in chapters if c.get('has_images') and c.get('file_size', 0) < 500)
+        if image_only_count > 0:
+            print(f"   📸 Image-only chapters: {image_only_count}")
+        
+        epub_files = extracted_resources.get('epub_structure', [])
+        if epub_files:
+            print(f"   📋 EPUB Structure: {len(epub_files)} files ({', '.join(epub_files)})")
+        else:
+            print(f"   ⚠️ No EPUB structure files extracted!")
+        
+        print(f"\n🔍 Pre-flight check readiness:")
+        print(f"   ✅ HTML files: {'READY' if html_files_written > 0 else 'NOT READY'}")
+        print(f"   ✅ Metadata: READY")
+        print(f"   ✅ Resources: READY")
+        
+# =====================================================
+# UNIFIED TRANSLATION PROCESSOR
+# =====================================================
+    
+class TranslationProcessor:
+    """Handles the translation of individual chapters"""
+    
+    def __init__(self, config, client, out_dir, log_callback=None, stop_callback=None, uses_zero_based=False, is_text_file=False):
+        self.config = config
+        self.client = client
+        self.out_dir = out_dir
+        self.log_callback = log_callback
+        self.stop_callback = stop_callback
+        self.chapter_splitter = ChapterSplitter(model_name=config.MODEL)
+        self.uses_zero_based = uses_zero_based
+        self.is_text_file = is_text_file
+        
+        # Check and log multi-key status
+        if hasattr(self.client, 'use_multi_keys') and self.client.use_multi_keys:
+            stats = self.client.get_stats()
+            self._log(f"🔑 Multi-key mode active: {stats.get('total_keys', 0)} keys")
+            self._log(f"   Active keys: {stats.get('active_keys', 0)}")
+    
+    def _log(self, message):
+        """Log a message"""
+        if self.log_callback:
+            self.log_callback(message)
+        else:
+            print(message)
+
+    def report_key_status(self):
+        """Report multi-key status if available"""
+        if hasattr(self.client, 'get_stats'):
+            stats = self.client.get_stats()
+            if stats.get('multi_key_mode', False):
+                self._log(f"\n📊 API Key Status:")
+                self._log(f"   Active Keys: {stats.get('active_keys', 0)}/{stats.get('total_keys', 0)}")
+                self._log(f"   Success Rate: {stats.get('success_rate', 0):.1%}")
+                self._log(f"   Total Requests: {stats.get('total_requests', 0)}\n")
+        
+    def check_stop(self):
+        """Check if translation should stop"""
+        if self.stop_callback and self.stop_callback():
+            print("❌ Translation stopped by user request.")
+            return True
+    
+    def check_duplicate_content(self, result, idx, prog, out, actual_num=None):
+        """Check if translated content is duplicate - with mode selection"""
+        
+        # Get detection mode from config
+        detection_mode = getattr(self.config, 'DUPLICATE_DETECTION_MODE', 'basic')
+        print(f"    🔍 DEBUG: Detection mode = '{detection_mode}'")
+        print(f"    🔍 DEBUG: Lookback chapters = {self.config.DUPLICATE_LOOKBACK_CHAPTERS}")
+        
+        # Extract content_hash if available from progress
+        content_hash = None
+        if detection_mode == 'ai-hunter':
+            # Try to get content_hash from the current chapter info
+            # Use actual_num if provided, otherwise fallback to idx+1
+            if actual_num is not None:
+                chapter_key = str(actual_num)
+            else:
+                chapter_key = str(idx + 1)
+            if chapter_key in prog.get("chapters", {}):
+                chapter_info = prog["chapters"][chapter_key]
+                content_hash = chapter_info.get("content_hash")
+                print(f"    🔍 DEBUG: Found content_hash for chapter {idx}: {content_hash}")
+        
+        if detection_mode == 'ai-hunter':
+            print("    🤖 DEBUG: Routing to AI Hunter detection...")
+            # Check if AI Hunter method is available (injected by the wrapper)
+            if hasattr(self, '_check_duplicate_ai_hunter'):
+                return self._check_duplicate_ai_hunter(result, idx, prog, out, content_hash)
+            else:
+                print("    ⚠️ AI Hunter method not available, falling back to basic detection")
+                return self._check_duplicate_basic(result, idx, prog, out)
+        elif detection_mode == 'cascading':
+            print("    🔄 DEBUG: Routing to Cascading detection...")
+            return self._check_duplicate_cascading(result, idx, prog, out)
+        else:
+            print("    📋 DEBUG: Routing to Basic detection...")
+            return self._check_duplicate_basic(result, idx, prog, out)
+
+    def _check_duplicate_basic(self, result, idx, prog, out):
+        """Original basic duplicate detection"""
+        try:
+            result_clean = re.sub(r'<[^>]+>', '', result).strip().lower()
+            result_sample = result_clean[:1000]
+            
+            lookback_chapters = self.config.DUPLICATE_LOOKBACK_CHAPTERS
+            
+            for prev_idx in range(max(0, idx - lookback_chapters), idx):
+                prev_key = str(prev_idx)
+                if prev_key in prog["chapters"] and prog["chapters"][prev_key].get("output_file"):
+                    prev_file = prog["chapters"][prev_key]["output_file"]
+                    prev_path = os.path.join(out, prev_file)
+                    
+                    if os.path.exists(prev_path):
+                        try:
+                            with open(prev_path, 'r', encoding='utf-8') as f:
+                                prev_content = f.read()
+                                prev_clean = re.sub(r'<[^>]+>', '', prev_content).strip().lower()
+                                prev_sample = prev_clean[:1000]
+                                
+                                # Use SequenceMatcher for similarity comparison
+                                similarity = SequenceMatcher(None, result_sample, prev_sample).ratio()
+                                
+                                if similarity >= 0.85:  # 85% threshold
+                                    print(f"    🚀 Basic detection: Duplicate found ({int(similarity*100)}%)")
+                                    return True, int(similarity * 100)
+                                    
+                        except Exception as e:
+                            print(f"    Warning: Failed to read {prev_path}: {e}")
+                            continue
+            
+            return False, 0
+            
+        except Exception as e:
+            print(f"    Warning: Failed to check duplicate content: {e}")
+            return False, 0
+
+       
+    def _check_duplicate_cascading(self, result, idx, prog, out):
+        """Cascading detection - basic first, then AI Hunter for borderline cases"""
+        # Step 1: Basic 
+        is_duplicate_basic, similarity_basic = self._check_duplicate_basic(result, idx, prog, out)
+        
+        if is_duplicate_basic:
+            return True, similarity_basic
+        
+        # Step 2: If basic detection finds moderate similarity, use AI Hunter
+        if similarity_basic >= 60:  # Configurable threshold
+            print(f"    🤖 Moderate similarity ({similarity_basic}%) - running AI Hunter analysis...")
+            if hasattr(self, '_check_duplicate_ai_hunter'):
+                is_duplicate_ai, similarity_ai = self._check_duplicate_ai_hunter(result, idx, prog, out)
+                if is_duplicate_ai:
+                    return True, similarity_ai
+            else:
+                print("    ⚠️ AI Hunter method not available for cascading analysis")
+        
+        return False, max(similarity_basic, 0)
+
+    def _extract_text_features(self, text):
+        """Extract multiple features from text for AI Hunter analysis"""
+        features = {
+            'semantic': {},
+            'structural': {},
+            'characters': [],
+            'patterns': {}
+        }
+        
+        # Semantic fingerprint
+        lines = text.split('\n')
+        
+        # Character extraction (names that appear 3+ times)
+        words = re.findall(r'\b[A-Z][a-z]+\b', text)
+        word_freq = Counter(words)
+        features['characters'] = [name for name, count in word_freq.items() if count >= 3]
+        
+        # Dialogue patterns
+        dialogue_patterns = re.findall(r'"([^"]+)"', text)
+        features['semantic']['dialogue_count'] = len(dialogue_patterns)
+        features['semantic']['dialogue_lengths'] = [len(d) for d in dialogue_patterns[:10]]
+        
+        # Speaker patterns
+        speaker_patterns = re.findall(r'(\w+)\s+(?:said|asked|replied|shouted|whispered)', text.lower())
+        features['semantic']['speakers'] = list(set(speaker_patterns[:20]))
+        
+        # Number extraction
+        numbers = re.findall(r'\b\d+\b', text)
+        features['patterns']['numbers'] = numbers[:20]
+        
+        # Structural signature
+        para_lengths = []
+        dialogue_count = 0
+        for para in text.split('\n\n'):
+            if para.strip():
+                para_lengths.append(len(para))
+                if '"' in para:
+                    dialogue_count += 1
+        
+        features['structural']['para_count'] = len(para_lengths)
+        features['structural']['avg_para_length'] = sum(para_lengths) / max(1, len(para_lengths))
+        features['structural']['dialogue_ratio'] = dialogue_count / max(1, len(para_lengths))
+        
+        # Create structural pattern string
+        pattern = []
+        for para in text.split('\n\n')[:20]:  # First 20 paragraphs
+            if para.strip():
+                if '"' in para:
+                    pattern.append('D')  # Dialogue
+                elif len(para) > 300:
+                    pattern.append('L')  # Long
+                elif len(para) < 100:
+                    pattern.append('S')  # Short
+                else:
+                    pattern.append('M')  # Medium
+        features['structural']['pattern'] = ''.join(pattern)
+        
+        return features
+
+    def _calculate_exact_similarity(self, text1, text2):
+        """Calculate exact text similarity"""
+        return SequenceMatcher(None, text1.lower(), text2.lower()).ratio()
+
+    def _calculate_smart_similarity(self, text1, text2):
+        """Smart similarity with length-aware sampling"""
+        # Check length ratio first
+        len_ratio = len(text1) / max(1, len(text2))
+        if len_ratio < 0.7 or len_ratio > 1.3:
+            return 0.0
+        
+        # Smart sampling for large texts
+        if len(text1) > 10000:
+            sample_size = 3000
+            samples1 = [
+                text1[:sample_size],
+                text1[len(text1)//2 - sample_size//2:len(text1)//2 + sample_size//2],
+                text1[-sample_size:]
+            ]
+            samples2 = [
+                text2[:sample_size],
+                text2[len(text2)//2 - sample_size//2:len(text2)//2 + sample_size//2],
+                text2[-sample_size:]
+            ]
+            similarities = [SequenceMatcher(None, s1.lower(), s2.lower()).ratio() 
+                           for s1, s2 in zip(samples1, samples2)]
+            return sum(similarities) / len(similarities)
+        else:
+            # Use first 2000 chars for smaller texts
+            return SequenceMatcher(None, text1[:2000].lower(), text2[:2000].lower()).ratio()
+
+    def _calculate_semantic_similarity(self, sem1, sem2):
+        """Calculate semantic fingerprint similarity"""
+        score = 0.0
+        max_score = 0.0
+        
+        # Compare dialogue counts
+        if 'dialogue_count' in sem1 and 'dialogue_count' in sem2:
+            max_score += 1.0
+            ratio = min(sem1['dialogue_count'], sem2['dialogue_count']) / max(1, max(sem1['dialogue_count'], sem2['dialogue_count']))
+            score += ratio * 0.3
+        
+        # Compare speakers
+        if 'speakers' in sem1 and 'speakers' in sem2:
+            max_score += 1.0
+            if sem1['speakers'] and sem2['speakers']:
+                overlap = len(set(sem1['speakers']) & set(sem2['speakers']))
+                total = len(set(sem1['speakers']) | set(sem2['speakers']))
+                score += (overlap / max(1, total)) * 0.4
+        
+        # Compare dialogue lengths pattern
+        if 'dialogue_lengths' in sem1 and 'dialogue_lengths' in sem2:
+            max_score += 1.0
+            if sem1['dialogue_lengths'] and sem2['dialogue_lengths']:
+                # Compare dialogue length patterns
+                len1 = sem1['dialogue_lengths'][:10]
+                len2 = sem2['dialogue_lengths'][:10]
+                if len1 and len2:
+                    avg1 = sum(len1) / len(len1)
+                    avg2 = sum(len2) / len(len2)
+                    ratio = min(avg1, avg2) / max(1, max(avg1, avg2))
+                    score += ratio * 0.3
+        
+        return score / max(1, max_score)
+
+    def _calculate_structural_similarity(self, struct1, struct2):
+        """Calculate structural signature similarity"""
+        score = 0.0
+        
+        # Compare paragraph patterns
+        if 'pattern' in struct1 and 'pattern' in struct2:
+            pattern_sim = SequenceMatcher(None, struct1['pattern'], struct2['pattern']).ratio()
+            score += pattern_sim * 0.4
+        
+        # Compare paragraph statistics
+        if all(k in struct1 for k in ['para_count', 'avg_para_length', 'dialogue_ratio']) and \
+           all(k in struct2 for k in ['para_count', 'avg_para_length', 'dialogue_ratio']):
+            
+            # Paragraph count ratio
+            para_ratio = min(struct1['para_count'], struct2['para_count']) / max(1, max(struct1['para_count'], struct2['para_count']))
+            score += para_ratio * 0.2
+            
+            # Average length ratio
+            avg_ratio = min(struct1['avg_para_length'], struct2['avg_para_length']) / max(1, max(struct1['avg_para_length'], struct2['avg_para_length']))
+            score += avg_ratio * 0.2
+            
+            # Dialogue ratio similarity
+            dialogue_diff = abs(struct1['dialogue_ratio'] - struct2['dialogue_ratio'])
+            score += (1 - dialogue_diff) * 0.2
+        
+        return score
+
+    def _calculate_character_similarity(self, chars1, chars2):
+        """Calculate character name similarity"""
+        if not chars1 or not chars2:
+            return 0.0
+        
+        # Find overlapping characters
+        set1 = set(chars1)
+        set2 = set(chars2)
+        overlap = len(set1 & set2)
+        total = len(set1 | set2)
+        
+        return overlap / max(1, total)
+
+    def _calculate_pattern_similarity(self, pat1, pat2):
+        """Calculate pattern-based similarity"""
+        score = 0.0
+        
+        # Compare numbers (they rarely change in translations)
+        if 'numbers' in pat1 and 'numbers' in pat2:
+            nums1 = set(pat1['numbers'])
+            nums2 = set(pat2['numbers'])
+            if nums1 and nums2:
+                overlap = len(nums1 & nums2)
+                total = len(nums1 | nums2)
+                score = overlap / max(1, total)
+        
+        return score
+    
+    def generate_rolling_summary(self, history_manager, chapter_num, base_system_content=None, source_text=None):
+        """Generate rolling summary after a chapter for context continuity.
+        Uses a dedicated summary system prompt (with glossary) distinct from translation.
+        Writes the summary to rolling_summary.txt and returns the summary string.
+        """
+        if not self.config.USE_ROLLING_SUMMARY:
+            return None
+        
+            
+        current_history = history_manager.load_history()
+        messages_to_include = self.config.ROLLING_SUMMARY_EXCHANGES * 2
+        
+        # Prefer directly provided source text (e.g., just-translated chapter) when available
+        assistant_responses = []
+        if source_text and isinstance(source_text, str) and source_text.strip():
+            assistant_responses = [source_text]
+        else:
+            if len(current_history) >= 2:
+                recent_messages = current_history[-messages_to_include:] if messages_to_include > 0 else current_history
+                for h in recent_messages:
+                    if h.get("role") == "assistant":
+                        assistant_responses.append(h["content"])
+        
+        # If still empty, skip quietly
+        if not assistant_responses:
+            return None
+        
+        # Build a dedicated summary system prompt (do NOT reuse main translation system prompt)
+        # Append glossary to keep terminology consistent
+        summary_system_template = os.getenv("ROLLING_SUMMARY_SYSTEM_PROMPT", "You create concise summaries for continuity.").strip()
+        try:
+            glossary_path = find_glossary_file(self.out_dir)
+        except Exception:
+            glossary_path = None
+        system_prompt = build_system_prompt(summary_system_template, glossary_path)
+        # Add explicit instruction for clarity
+        system_prompt += "\n\n[Instruction: Generate a concise rolling summary of the previous chapter. Use glossary terms consistently. Do not include warnings or explanations.]"
+        
+        user_prompt_template = os.getenv(
+            "ROLLING_SUMMARY_USER_PROMPT",
+            "Summarize the key events, characters, tone, and important details from these translations. "
+            "Focus on: character names/relationships, plot developments, and any special terminology used.\n\n"
+            "{translations}"
+        )
+        
+        translations_text = "\n---\n".join(assistant_responses)
+        user_prompt = user_prompt_template.replace("{translations}", translations_text)
+        
+        summary_msgs = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": f"[Rolling Summary of Chapter {chapter_num}]\n" + user_prompt}
+        ]
+        
+        
+        try:
+            summary_resp, _ = send_with_interrupt(
+                summary_msgs, self.client, self.config.TEMP, 
+                min(2000, self.config.MAX_OUTPUT_TOKENS), 
+                self.check_stop,
+                context='summary'
+            )
+            
+            # Save the summary to the output folder
+            summary_file = os.path.join(self.out_dir, "rolling_summary.txt")
+            header = f"=== Rolling Summary of Chapter {chapter_num} ===\n(This is a summary of the previous chapter for context)\n"
+            
+            mode = "a" if self.config.ROLLING_SUMMARY_MODE == "append" else "w"
+            with open(summary_file, mode, encoding="utf-8") as sf:
+                if mode == "a":
+                    sf.write("\n\n")
+                sf.write(header)
+                sf.write(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}]\n")
+                sf.write(summary_resp.strip())
+
+            # If in append mode, trim to retain only the last N entries if configured
+            try:
+                if self.config.ROLLING_SUMMARY_MODE == "append":
+                    max_entries = int(getattr(self.config, "ROLLING_SUMMARY_MAX_ENTRIES", 0) or 0)
+                    if max_entries > 0:
+                        with open(summary_file, 'r', encoding='utf-8') as rf:
+                            content = rf.read()
+                        # Find the start of each summary block by header line
+                        headers = [m.start() for m in re.finditer(r"(?m)^===\s*Rolling Summary.*$", content)]
+                        if len(headers) > max_entries:
+                            # Keep only the last max_entries blocks
+                            keep_starts = headers[-max_entries:]
+                            blocks = []
+                            for i, s in enumerate(keep_starts):
+                                e = keep_starts[i + 1] if i + 1 < len(keep_starts) else len(content)
+                                block = content[s:e].strip()
+                                if block:
+                                    blocks.append(block)
+                            trimmed_content = ("\n\n".join(blocks) + "\n") if blocks else ""
+                            with open(summary_file, 'w', encoding='utf-8') as wf:
+                                wf.write(trimmed_content)
+                            # Optional log showing retained count
+                            try:
+                                self._log(f"📚 Total summaries in memory: {len(blocks)} (trimmed to last {max_entries})")
+                            except Exception:
+                                pass
+            except Exception as _trim_err:
+                try:
+                    self._log(f"⚠️ Failed to trim rolling summaries: {_trim_err}")
+                except Exception:
+                    pass
+            
+            # Log to GUI if available, otherwise console
+            try:
+                self._log(f"📝 Generated rolling summary for Chapter {chapter_num} ({'append' if mode=='a' else 'replace'} mode)")
+                self._log(f"   ➜ Saved to: {summary_file} ({len(summary_resp.strip())} chars)")
+            except Exception:
+                print(f"📝 Generated rolling summary for Chapter {chapter_num} ({'append' if mode=='a' else 'replace'} mode)")
+                print(f"   ➜ Saved to: {summary_file} ({len(summary_resp.strip())} chars)")
+            return summary_resp.strip()
+            
+        except Exception as e:
+            try:
+                self._log(f"⚠️ Failed to generate rolling summary: {e}")
+            except Exception:
+                print(f"⚠️ Failed to generate rolling summary: {e}")
+            return None
+    
+    def translate_with_retry(self, msgs, chunk_html, c, chunk_idx, total_chunks):
+        """Handle translation with retry logic"""
+        
+        # CRITICAL FIX: Reset client state for each chunk
+        if hasattr(self.client, 'reset_cleanup_state'):
+            self.client.reset_cleanup_state()
+        
+        # Also ensure we're not in cleanup mode from previous operations
+        if hasattr(self.client, '_in_cleanup'):
+            self.client._in_cleanup = False
+        if hasattr(self.client, '_cancelled'):
+            self.client._cancelled = False
+    
+
+        retry_count = 0
+        
+        # Get retry attempts from AI Hunter config if available
+        ai_config = {}
+        try:
+            # Try to get AI Hunter config from environment variable first
+            ai_hunter_config_str = os.getenv('AI_HUNTER_CONFIG')
+            if ai_hunter_config_str:
+                ai_config = json.loads(ai_hunter_config_str)
+            else:
+                # Fallback to config attribute
+                ai_config = getattr(self.config, 'ai_hunter_config', {})
+        except (json.JSONDecodeError, AttributeError):
+            ai_config = {}
+        
+        if isinstance(ai_config, dict):
+            max_retries = ai_config.get('retry_attempts', 3)
+            max_duplicate_retries = ai_config.get('retry_attempts', 6)  # Use same setting for duplicate retries
+        else:
+            max_retries = 3
+            max_duplicate_retries = 6
+        
+        duplicate_retry_count = 0
+        timeout_retry_count = 0
+        max_timeout_retries = 2
+        history_purged = False
+        
+        original_max_tokens = self.config.MAX_OUTPUT_TOKENS
+        original_temp = self.config.TEMP
+        original_user_prompt = msgs[-1]["content"]
+        
+        chunk_timeout = None
+        if self.config.RETRY_TIMEOUT:
+            chunk_timeout = self.config.CHUNK_TIMEOUT
+        
+        result = None
+        finish_reason = None
+        
+        while True:
+            if self.check_stop():
+                return None, None
+            
+            try:
+                current_max_tokens = self.config.MAX_OUTPUT_TOKENS
+                current_temp = self.config.TEMP
+                
+                total_tokens = sum(self.chapter_splitter.count_tokens(m["content"]) for m in msgs)
+                # Determine file reference
+                if c.get('is_chunk', False):
+                    file_ref = f"Section_{c['num']}"
+                else:
+                    # Check if this is a text file - need to access from self
+                    is_text_source = self.is_text_file or c.get('filename', '').endswith('.txt')
+                    terminology = "Section" if is_text_source else "Chapter"
+                    file_ref = c.get('original_basename', f'{terminology}_{c["num"]}')
+
+                print(f"[DEBUG] Chunk {chunk_idx}/{total_chunks} tokens = {total_tokens:,} / {self.get_token_budget_str()} [File: {file_ref}]")            
+                
+                self.client.context = 'translation'
+
+                # Generate filename for chunks
+                if chunk_idx and total_chunks > 1:
+                    # This is a chunk - use chunk naming format
+                    fname = f"response_{c['num']:03d}_chunk_{chunk_idx}.html"
+                else:
+                    # Not a chunk - use regular naming
+                    fname = FileUtilities.create_chapter_filename(c, c.get('actual_chapter_num', c['num']))
+
+                # Set output filename BEFORE the API call
+                if hasattr(self.client, 'set_output_filename'):
+                    self.client.set_output_filename(fname)
+                
+                # Track the filename so truncation logs know which file this is
+                if hasattr(self.client, '_current_output_file'):
+                    self.client._current_output_file = fname
+
+                # Generate unique request ID for this chunk
+                #request_id = f"{c['num']:03d}_chunk{chunk_idx}_{uuid.uuid4().hex[:8]}"
+
+                result, finish_reason = send_with_interrupt(
+                    msgs, self.client, current_temp, current_max_tokens, 
+                    self.check_stop, chunk_timeout
+                )
+                # Enhanced mode workflow:
+                # 1. Original HTML -> html2text -> Markdown/plain text (during extraction)
+                # 2. Markdown sent to translation API (better for translation quality)
+                # 3. Translated markdown -> HTML conversion (here)
+                if result and c.get("enhanced_extraction", False):
+                    print(f"🔄 Converting translated markdown back to HTML...")
+                    result = convert_enhanced_text_to_html(result, c)
+                retry_needed = False
+                retry_reason = ""
+                is_duplicate_retry = False
+                
+                # ENHANCED: Force re-read environment variable for latest setting
+                retry_truncated_enabled = os.getenv("RETRY_TRUNCATED", "0") == "1"
+                
+                # Debug logging to verify the toggle state
+                #print(f"    DEBUG: finish_reason='{finish_reason}', RETRY_TRUNCATED={retry_truncated_enabled}, config.RETRY_TRUNCATED={self.config.RETRY_TRUNCATED}")
+                #print(f"    DEBUG: Current tokens={self.config.MAX_OUTPUT_TOKENS}, Min retry tokens={self.config.MAX_RETRY_TOKENS}, retry_count={retry_count}")
+                    
+                if finish_reason == "length" and (retry_truncated_enabled or self.config.RETRY_TRUNCATED):
+                    if retry_count < max_retries:
+                        # For truncated responses, ensure we never go below the minimum retry tokens
+                        proposed_limit = self.config.MAX_OUTPUT_TOKENS * 2
+                        
+                        # Always enforce minimum - never retry with tokens below the constraint
+                        new_token_limit = max(proposed_limit, self.config.MAX_RETRY_TOKENS)
+                        
+                        if new_token_limit != self.config.MAX_OUTPUT_TOKENS:
+                            retry_needed = True
+                            retry_reason = "truncated output"
+                            old_limit = self.config.MAX_OUTPUT_TOKENS
+                            self.config.MAX_OUTPUT_TOKENS = new_token_limit
+                            retry_count += 1
+                            
+                            if old_limit < self.config.MAX_RETRY_TOKENS:
+                                print(f"    🔄 TRUNCATION RETRY: Boosting tokens {old_limit} → {new_token_limit} (enforcing minimum: {self.config.MAX_RETRY_TOKENS})")
+                            else:
+                                print(f"    🔄 TRUNCATION RETRY: Doubling tokens {old_limit} → {new_token_limit} (above minimum: {self.config.MAX_RETRY_TOKENS})")
+                        else:
+                            print(f"    ⚠️ TRUNCATION DETECTED: Token adjustment not needed - already at maximum {self.config.MAX_OUTPUT_TOKENS}")
+                    else:
+                        print(f"    ⚠️ TRUNCATION DETECTED: Max retries ({max_retries}) reached - accepting truncated response")
+                elif finish_reason == "length" and not (retry_truncated_enabled or self.config.RETRY_TRUNCATED):
+                    print(f"    ⏭️ TRUNCATION DETECTED: Auto-retry is DISABLED - accepting truncated response")
+                elif finish_reason == "length":
+                    print(f"    ⚠️ TRUNCATION DETECTED: Unexpected condition - check logic")
+                
+                if not retry_needed:
+                    # Force re-read the environment variable to ensure we have current setting
+                    duplicate_enabled = os.getenv("RETRY_DUPLICATE_BODIES", "0") == "1"
+                    
+                    if duplicate_enabled and duplicate_retry_count < max_duplicate_retries:
+                        idx = c.get('__index', 0)
+                        prog = c.get('__progress', {})
+                        print(f"    🔍 Checking for duplicate content...")
+                        # Get actual chapter number for duplicate detection
+                        actual_num = c.get('actual_chapter_num', c.get('num', idx + 1))
+                        is_duplicate, similarity = self.check_duplicate_content(result, idx, prog, self.out_dir, actual_num)
+                        
+                        if is_duplicate:
+                            retry_needed = True
+                            is_duplicate_retry = True
+                            retry_reason = f"duplicate content (similarity: {similarity}%)"
+                            duplicate_retry_count += 1
+                            
+                            # Check if temperature change is disabled
+                            disable_temp_change = ai_config.get('disable_temperature_change', False) if isinstance(ai_config, dict) else False
+                            
+                            if duplicate_retry_count >= 3 and not history_purged:
+                                print(f"    🧹 Clearing history after 3 attempts...")
+                                if 'history_manager' in c:
+                                    c['history_manager'].save_history([])
+                                history_purged = True
+                                if not disable_temp_change:
+                                    self.config.TEMP = original_temp
+                                else:
+                                    print(f"    🌡️ Temperature change disabled - keeping current temp: {self.config.TEMP}")
+                            
+                            elif duplicate_retry_count == 1:
+                                if disable_temp_change:
+                                    print(f"    🔄 First duplicate retry - temperature change disabled")
+                                else:
+                                    print(f"    🔄 First duplicate retry - same temperature")
+                            
+                            elif history_purged:
+                                if not disable_temp_change:
+                                    attempts_since_purge = duplicate_retry_count - 3
+                                    self.config.TEMP = min(original_temp + (0.1 * attempts_since_purge), 1.0)
+                                    print(f"    🌡️ Post-purge temp: {self.config.TEMP}")
+                                else:
+                                    print(f"    🌡️ Temperature change disabled - keeping temp: {self.config.TEMP}")
+                            
+                            else:
+                                if not disable_temp_change:
+                                    self.config.TEMP = min(original_temp + (0.1 * (duplicate_retry_count - 1)), 1.0)
+                                    print(f"    🌡️ Gradual temp increase: {self.config.TEMP}")
+                                else:
+                                    print(f"    🌡️ Temperature change disabled - keeping temp: {self.config.TEMP}")
+                            
+                            if duplicate_retry_count == 1:
+                                user_prompt = f"[RETRY] Chapter {c['num']}: Ensure unique translation.\n{chunk_html}"
+                            elif duplicate_retry_count <= 3:
+                                user_prompt = f"[ATTEMPT {duplicate_retry_count}] Translate uniquely:\n{chunk_html}"
+                            else:
+                                user_prompt = f"Chapter {c['num']}:\n{chunk_html}"
+                            
+                            msgs[-1] = {"role": "user", "content": user_prompt}
+                    elif not duplicate_enabled:
+                        print(f"    ⏭️ Duplicate detection is DISABLED - skipping check")
+                
+                if retry_needed:
+                    if is_duplicate_retry:
+                        print(f"    🔄 Duplicate retry {duplicate_retry_count}/{max_duplicate_retries}")
+                    else:
+                        print(f"    🔄 Retry {retry_count}/{max_retries}: {retry_reason}")
+                    
+                    time.sleep(2)
+                    continue
+                
+                break
+                
+            except UnifiedClientError as e:
+                error_msg = str(e)
+                
+                if "stopped by user" in error_msg:
+                    print("❌ Translation stopped by user during API call")
+                    return None, None
+                
+                if "took" in error_msg and "timeout:" in error_msg:
+                    if timeout_retry_count < max_timeout_retries:
+                        timeout_retry_count += 1
+                        print(f"    ⏱️ Chunk took too long, retry {timeout_retry_count}/{max_timeout_retries}")
+                        print(f"    🔄 Retrying")
+                        time.sleep(2)
+                        continue
+                    else:
+                        print(f"    ❌ Max timeout retries reached")
+                        raise UnifiedClientError("Translation failed after timeout retries")
+                
+                elif "timed out" in error_msg and "timeout:" not in error_msg:
+                    print(f"⚠️ {error_msg}, retrying...")
+                    time.sleep(5)
+                    continue
+                
+                elif getattr(e, "error_type", None) == "rate_limit" or getattr(e, "http_status", None) == 429:
+                    # Rate limit errors - clean handling without traceback
+                    print("⚠️ Rate limited, sleeping 60s…")
+                    for i in range(60):
+                        if self.check_stop():
+                            print("❌ Translation stopped during rate limit wait")
+                            return None, None
+                        time.sleep(1)
+                    continue
+                
+                else:
+                    # For unexpected errors, show the error message but suppress traceback in most cases
+                    if getattr(e, "error_type", None) in ["api_error", "validation", "prohibited_content"]:
+                        print(f"❌ API Error: {error_msg}")
+                        raise UnifiedClientError(f"API Error: {error_msg}")
+                    else:
+                        raise
+            
+            except Exception as e:
+                print(f"❌ Unexpected error during API call: {e}")
+                raise
+        
+        self.config.MAX_OUTPUT_TOKENS = original_max_tokens
+        self.config.TEMP = original_temp
+        
+        if retry_count > 0 or duplicate_retry_count > 0 or timeout_retry_count > 0:
+            if duplicate_retry_count > 0:
+                print(f"    🔄 Restored original temperature: {self.config.TEMP} (after {duplicate_retry_count} duplicate retries)")
+            elif timeout_retry_count > 0:
+                print(f"    🔄 Restored original settings after {timeout_retry_count} timeout retries")
+            elif retry_count > 0:
+                print(f"    🔄 Restored original settings after {retry_count} retries")
+        
+        if duplicate_retry_count >= max_duplicate_retries:
+            print(f"    ⚠️ WARNING: Duplicate content issue persists after {max_duplicate_retries} attempts")
+        
+        return result, finish_reason
+    
+    def get_token_budget_str(self):
+        """Get token budget as string"""
+        _tok_env = os.getenv("MAX_INPUT_TOKENS", "1000000").strip()
+        max_tokens_limit, budget_str = parse_token_limit(_tok_env)
+        return budget_str
+
+# =====================================================
+# BATCH TRANSLATION PROCESSOR
+# =====================================================
+class BatchTranslationProcessor:
+    """Handles batch/parallel translation processing"""
+    
+    def __init__(self, config, client, base_msg, out_dir, progress_lock, 
+                 save_progress_fn, update_progress_fn, check_stop_fn, 
+                 image_translator=None, is_text_file=False):
+        self.config = config
+        self.client = client
+        self.base_msg = base_msg
+        self.out_dir = out_dir
+        self.progress_lock = progress_lock
+        self.save_progress_fn = save_progress_fn
+        self.update_progress_fn = update_progress_fn
+        self.check_stop_fn = check_stop_fn
+        self.image_translator = image_translator
+        self.chapters_completed = 0
+        self.chunks_completed = 0
+        self.is_text_file = is_text_file
+        
+       # Optionally log multi-key status
+        if hasattr(self.client, 'use_multi_keys') and self.client.use_multi_keys:
+            stats = self.client.get_stats()
+            print(f"🔑 Batch processor using multi-key mode: {stats.get('total_keys', 0)} keys")
+    
+    def process_single_chapter(self, chapter_data):
+        """Process a single chapter (runs in thread)"""
+        # APPLY INTERRUPTIBLE THREADING DELAY FIRST
+        thread_delay = float(os.getenv("THREAD_SUBMISSION_DELAY_SECONDS", "0.5"))
+        if thread_delay > 0:
+            # Check if we need to wait (same logic as unified_api_client)
+            if hasattr(self.client, '_thread_submission_lock') and hasattr(self.client, '_last_thread_submission_time'):
+                with self.client._thread_submission_lock:
+                    current_time = time.time()
+                    time_since_last = current_time - self.client._last_thread_submission_time
+                    
+                    if time_since_last < thread_delay:
+                        sleep_time = thread_delay - time_since_last
+                        thread_name = threading.current_thread().name
+                        
+                        # PRINT BEFORE THE DELAY STARTS
+                        idx, chapter = chapter_data  # Extract chapter info for better logging
+                        print(f"🧵 [{thread_name}] Applying thread delay: {sleep_time:.1f}s for Chapter {idx+1}")
+                        
+                        # Interruptible sleep - check stop flag every 0.1 seconds
+                        elapsed = 0
+                        check_interval = 0.1
+                        while elapsed < sleep_time:
+                            if self.check_stop_fn():
+                                print(f"🛑 Threading delay interrupted by stop flag")
+                                raise Exception("Translation stopped by user during threading delay")
+                            
+                            sleep_chunk = min(check_interval, sleep_time - elapsed)
+                            time.sleep(sleep_chunk)
+                            elapsed += sleep_chunk
+                    
+                    self.client._last_thread_submission_time = time.time()
+                    if not hasattr(self.client, '_thread_submission_count'):
+                        self.client._thread_submission_count = 0
+                    self.client._thread_submission_count += 1
+        
+        idx, chapter = chapter_data
+        chap_num = chapter["num"]
+        
+        # Use the pre-calculated actual_chapter_num from the main loop
+        actual_num = chapter.get('actual_chapter_num')
+        
+        # Fallback if not set (common in batch mode where first pass might be skipped)
+        if actual_num is None:
+            # Try to extract it using the same logic as non-batch mode
+            raw_num = FileUtilities.extract_actual_chapter_number(chapter, patterns=None, config=self.config)
+            
+            # Apply offset if configured
+            offset = self.config.CHAPTER_NUMBER_OFFSET if hasattr(self.config, 'CHAPTER_NUMBER_OFFSET') else 0
+            raw_num += offset
+            
+            # Check if zero detection is disabled
+            if hasattr(self.config, 'DISABLE_ZERO_DETECTION') and self.config.DISABLE_ZERO_DETECTION:
+                actual_num = raw_num
+            elif hasattr(self.config, '_uses_zero_based') and self.config._uses_zero_based:
+                # This is a 0-based novel, adjust the number
+                actual_num = raw_num + 1
+            else:
+                # Default to raw number (1-based or unknown)
+                actual_num = raw_num
+            
+            print(f"    📖 Extracted actual chapter number: {actual_num} (from raw: {raw_num})")
+        
+        try:
+            # Check if this is from a text file
+            ai_features = None
+            is_text_source = self.is_text_file or chapter.get('filename', '').endswith('.txt') or chapter.get('is_chunk', False)
+            terminology = "Section" if is_text_source else "Chapter"
+            print(f"🔄 Starting #{idx+1} (Internal: {terminology} {chap_num}, Actual: {terminology} {actual_num})  (thread: {threading.current_thread().name}) [File: {chapter.get('original_basename', f'{terminology}_{chap_num}')}]")
+                      
+            content_hash = chapter.get("content_hash") or ContentProcessor.get_content_hash(chapter["body"])
+            with self.progress_lock:
+                self.update_progress_fn(idx, actual_num, content_hash, None, status="in_progress")
+                self.save_progress_fn()
+            
+            chapter_body = chapter["body"]
+            if chapter.get('has_images') and self.image_translator and self.config.ENABLE_IMAGE_TRANSLATION:
+                print(f"🖼️ Processing images for Chapter {actual_num}...")
+                self.image_translator.set_current_chapter(actual_num)
+                chapter_body, image_translations = process_chapter_images(
+                    chapter_body, 
+                    actual_num, 
+                    self.image_translator,
+                    self.check_stop_fn
+                )
+                if image_translations:
+                    # Create a copy of the processed body
+                    from bs4 import BeautifulSoup 
+                    c = chapter
+                    soup_for_text = BeautifulSoup(c["body"], 'html.parser')
+                    
+                    # Remove all translated content
+                    for trans_div in soup_for_text.find_all('div', class_='translated-text-only'):
+                        trans_div.decompose()
+                    
+                    # Use this cleaned version for text translation
+                    text_to_translate = str(soup_for_text)
+                    final_body_with_images = c["body"]
+                else:
+                    text_to_translate = c["body"]
+                    image_translations = {}
+                    print(f"✅ Processed {len(image_translations)} images for Chapter {actual_num}")
+            
+            chapter_msgs = self.base_msg + [{"role": "user", "content": chapter_body}]
+            
+            # Generate filename before API call
+            fname = FileUtilities.create_chapter_filename(chapter, actual_num)
+            self.client.set_output_filename(fname)
+
+            if hasattr(self.client, '_current_output_file'):
+                self.client._current_output_file = fname
+
+            print(f"📤 Sending Chapter {actual_num} to API...")
+            result, finish_reason = send_with_interrupt(
+                chapter_msgs, self.client, self.config.TEMP, 
+                self.config.MAX_OUTPUT_TOKENS, self.check_stop_fn
+            )
+            
+            print(f"📥 Received Chapter {actual_num} response, finish_reason: {finish_reason}")
+
+            # Enhanced mode workflow (same as non-batch):
+            # 1. Original HTML -> html2text -> Markdown/plain text (during extraction)
+            # 2. Markdown sent to translation API (better for translation quality)
+            # 3. Translated markdown -> HTML conversion (here)
+            if result and chapter.get("enhanced_extraction", False):
+                print(f"🔄 Converting translated markdown back to HTML...")
+                result = convert_enhanced_text_to_html(result, chapter)
+                
+            if finish_reason in ["length", "max_tokens"]:
+                print(f"⚠️ Chapter {actual_num} response was TRUNCATED!")
+            
+            if self.config.REMOVE_AI_ARTIFACTS:
+                result = ContentProcessor.clean_ai_artifacts(result, True)
+                
+            result = ContentProcessor.clean_memory_artifacts(result)
+            
+            cleaned = re.sub(r"^```(?:html)?\s*\n?", "", result, count=1, flags=re.MULTILINE)
+            cleaned = re.sub(r"\n?```\s*$", "", cleaned, count=1, flags=re.MULTILINE)
+            cleaned = ContentProcessor.clean_ai_artifacts(cleaned, remove_artifacts=self.config.REMOVE_AI_ARTIFACTS)
+            
+            fname = FileUtilities.create_chapter_filename(chapter, actual_num)
+            
+            if self.is_text_file:
+                # For text files, save as plain text
+                fname_txt = fname.replace('.html', '.txt') if fname.endswith('.html') else fname
+                
+                # Extract text from HTML
+                from bs4 import BeautifulSoup
+                soup = BeautifulSoup(cleaned, 'html.parser')
+                text_content = soup.get_text(strip=True)
+                
+                # Merge image translations back with text translation
+                if 'final_body_with_images' in locals() and image_translations:
+                    # Parse both versions
+                    soup_with_images = BeautifulSoup(final_body_with_images, 'html.parser')
+                    soup_with_text = BeautifulSoup(cleaned, 'html.parser')
+                    
+                    # Get the translated text content (without images)
+                    body_content = soup_with_text.body
+                    
+                    # Add image translations to the translated content
+                    for trans_div in soup_with_images.find_all('div', class_='translated-text-only'):
+                        body_content.insert(0, trans_div)
+                    
+                    final_html = str(soup_with_text)
+                    cleaned = final_html
+
+                with open(os.path.join(self.out_dir, fname), 'w', encoding='utf-8') as f:
+                    f.write(cleaned)
+                
+                # Update with .txt filename
+                with self.progress_lock:
+                    self.update_progress_fn(idx, actual_num, content_hash, fname_txt, status="completed", ai_features=ai_features)
+                    self.save_progress_fn()
+            else:
+                # Original code for EPUB files
+                with open(os.path.join(self.out_dir, fname), 'w', encoding='utf-8') as f:
+                    f.write(cleaned)
+            
+            print(f"💾 Saved Chapter {actual_num}: {fname} ({len(cleaned)} chars)")
+            
+            # Initialize ai_features at the beginning to ensure it's always defined
+            if ai_features is None:
+                ai_features = None
+            
+            # Extract and save AI features for future duplicate detection
+            if (self.config.RETRY_DUPLICATE_BODIES and 
+                hasattr(self.config, 'DUPLICATE_DETECTION_MODE') and 
+                self.config.DUPLICATE_DETECTION_MODE in ['ai-hunter', 'cascading']):
+                try:
+                    # Extract features from the translated content
+                    cleaned_text = re.sub(r'<[^>]+>', '', cleaned).strip()
+                    # Note: self.translator doesn't exist, so we can't extract features here
+                    # The features will need to be extracted during regular processing
+                    print(f"    ⚠️ AI features extraction not available in batch mode")
+                except Exception as e:
+                    print(f"    ⚠️ Failed to extract AI features: {e}")
+            
+            with self.progress_lock:
+                # Check for QA failures with comprehensive detection
+                if is_qa_failed_response(cleaned):
+                    chapter_status = "qa_failed"
+                    failure_reason = get_failure_reason(cleaned)
+                    print(f"⚠️ Batch: Chapter {actual_num} marked as qa_failed: {failure_reason}")
+                    # Update progress to qa_failed status
+                    self.update_progress_fn(idx, actual_num, content_hash, fname, status=chapter_status, ai_features=ai_features)
+                    self.save_progress_fn()
+                    # DO NOT increment chapters_completed for qa_failed
+                    # Return False to indicate failure
+                    return False, actual_num
+                else:
+                    chapter_status = "completed"
+                    # Update progress to completed status
+                    self.update_progress_fn(idx, actual_num, content_hash, fname, status=chapter_status, ai_features=ai_features)
+                    self.save_progress_fn()
+                    # Only increment chapters_completed for successful chapters
+                    self.chapters_completed += 1
+                    self.chunks_completed += 1
+            
+            print(f"✅ Chapter {actual_num} completed successfully")
+            return True, actual_num
+            
+        except Exception as e:
+            print(f"❌ Chapter {actual_num} failed: {e}")
+            with self.progress_lock:
+                self.update_progress_fn(idx, actual_num, content_hash, None, status="failed")
+                self.save_progress_fn()
+            return False, actual_num
+            
+# =====================================================
+# GLOSSARY MANAGER - TRUE CSV FORMAT WITH FUZZY MATCHING
+# =====================================================
+
+class GlossaryManager:
+    """Unified glossary management with true CSV format, fuzzy matching, and parallel processing"""
+    
+    # Class-level shared lock for API submission timing
+    _api_submission_lock = threading.Lock()
+    _last_api_submission_time = 0
+    
+    def __init__(self):
+        self.pattern_manager = PatternManager()
+        self._results_lock = threading.Lock()  # Thread lock for collecting results
+        self._file_write_lock = threading.Lock()  # Thread lock for file operations
+
+    def _atomic_write_file(self, filepath, content, encoding='utf-8'):
+        """Atomically write to a file to prevent corruption from concurrent writes"""
+        
+        # Create temp file in same directory to ensure same filesystem
+        dir_path = os.path.dirname(filepath)
+        
+        with self._file_write_lock:
+            try:
+                # Write to temporary file first
+                with tempfile.NamedTemporaryFile(mode='w', encoding=encoding, 
+                                                dir=dir_path, delete=False) as tmp_file:
+                    tmp_file.write(content)
+                    tmp_path = tmp_file.name
+                
+                # Atomic rename (on same filesystem)
+                if os.name == 'nt':  # Windows
+                    # Windows doesn't support atomic rename if target exists
+                    if os.path.exists(filepath):
+                        os.remove(filepath)
+                    os.rename(tmp_path, filepath)
+                else:  # Unix/Linux/Mac
+                    os.rename(tmp_path, filepath)
+                
+                return True
+                
+            except Exception as e:
+                print(f"⚠️ Atomic write failed: {e}")
+                # Cleanup temp file if it exists
+                if 'tmp_path' in locals() and os.path.exists(tmp_path):
+                    try:
+                        os.remove(tmp_path)
+                    except:
+                        pass
+                
+                # Fallback to direct write with lock
+                try:
+                    with open(filepath, 'w', encoding=encoding) as f:
+                        f.write(content)
+                    return True
+                except Exception as e2:
+                    print(f"⚠️ Fallback write also failed: {e2}")
+                    return False
+   
+    def save_glossary(self, output_dir, chapters, instructions, language="korean"):
+        """Targeted glossary generator with true CSV format output and parallel processing"""
+        print("📑 Targeted Glossary Generator v6.0 (CSV Format + Parallel)")
+        
+        # Check stop flag at start
+        # Ensure output directory exists
+        try:
+            os.makedirs(output_dir, exist_ok=True)
+        except Exception as _e:
+            print(f"⚠️ Could not ensure output directory exists: {output_dir} ({_e})")
+        if is_stop_requested():
+            print("📑 ❌ Glossary generation stopped by user")
+            return {}
+        
+        # Check if glossary already exists; if so, we'll MERGE it later (do not return early)
+        glossary_path = os.path.join(output_dir, "glossary.csv")
+        existing_glossary_content = None
+        if os.path.exists(glossary_path):
+            print(f"📑 Existing glossary detected (will merge): {glossary_path}")
+            try:
+                with open(glossary_path, 'r', encoding='utf-8') as f:
+                    existing_glossary_content = f.read()
+            except Exception as e:
+                print(f"⚠️ Could not read existing glossary: {e}")
+        
+        # Rest of the method continues as before...
+        print("📑 Extracting names and terms with configurable options")
+        
+        # Check stop flag before processing
+        if is_stop_requested():
+            print("📑 ❌ Glossary generation stopped by user")
+            return {}
+        
+        # Check for manual glossary first (CSV only)
+        manual_glossary_path = os.getenv("MANUAL_GLOSSARY")
+        existing_glossary = None
+        if manual_glossary_path and os.path.exists(manual_glossary_path):
+            print(f"📑 Manual glossary detected: {os.path.basename(manual_glossary_path)}")
+            try:
+                with open(manual_glossary_path, 'r', encoding='utf-8') as f:
+                    content = f.read()
+                # Treat as CSV text and stage it for merge; also copy to output for visibility
+                target_path = os.path.join(output_dir, "glossary.csv")
+                with open(target_path, 'w', encoding='utf-8') as f:
+                    f.write(content)
+                print(f"📑 ✅ Manual CSV glossary copied to: {target_path}")
+                existing_glossary = content
+            except Exception as e:
+                print(f"⚠️ Could not copy manual glossary: {e}")
+                print(f"📑 Proceeding with automatic generation...")
+        
+        # Check for existing glossary from manual extraction
+        glossary_folder_path = os.path.join(output_dir, "Glossary")
+        # existing_glossary may already be set by MANUAL_GLOSSARY above
+        
+        if os.path.exists(glossary_folder_path):
+            for file in os.listdir(glossary_folder_path):
+                if file.endswith("_glossary.json"):
+                    existing_path = os.path.join(glossary_folder_path, file)
+                    try:
+                        with open(existing_path, 'r', encoding='utf-8') as f:
+                            existing_content = f.read()
+                        existing_glossary = existing_content
+                        print(f"📑 Found existing glossary from manual extraction: {file}")
+                        break
+                    except Exception as e:
+                        print(f"⚠️ Could not load existing glossary: {e}")
+        
+        # Get configuration from environment variables
+        min_frequency = int(os.getenv("GLOSSARY_MIN_FREQUENCY", "2"))
+        max_names = int(os.getenv("GLOSSARY_MAX_NAMES", "50"))
+        max_titles = int(os.getenv("GLOSSARY_MAX_TITLES", "30"))
+        batch_size = int(os.getenv("GLOSSARY_BATCH_SIZE", "50"))
+        strip_honorifics = os.getenv("GLOSSARY_STRIP_HONORIFICS", "1") == "1"
+        fuzzy_threshold = float(os.getenv("GLOSSARY_FUZZY_THRESHOLD", "0.90"))
+        max_text_size = int(os.getenv("GLOSSARY_MAX_TEXT_SIZE", "50000"))
+        
+        print(f"📑 Settings: Min frequency: {min_frequency}, Max names: {max_names}, Max titles: {max_titles}")
+        print(f"📑 Strip honorifics: {'✅ Yes' if strip_honorifics else '❌ No'}")
+        print(f"📑 Fuzzy matching threshold: {fuzzy_threshold}")
+        
+        # Get custom prompt from environment
+        custom_prompt = os.getenv("AUTO_GLOSSARY_PROMPT", "").strip()
+        
+        def clean_html(html_text):
+            """Remove HTML tags to get clean text"""
+            soup = BeautifulSoup(html_text, 'html.parser')
+            return soup.get_text()
+        
+        # Check stop before processing chapters
+        if is_stop_requested():
+            print("📑 ❌ Glossary generation stopped by user")
+            return {}
+        
+        # Get chapter split threshold and filter mode
+        chapter_split_threshold = int(os.getenv("GLOSSARY_CHAPTER_SPLIT_THRESHOLD", "100000"))
+        filter_mode = os.getenv("GLOSSARY_FILTER_MODE", "all")  # all, only_with_honorifics, only_without_honorifics
+        
+        # Check if parallel extraction is enabled for automatic glossary
+        extraction_workers = int(os.getenv("EXTRACTION_WORKERS", "1"))
+        batch_translation = os.getenv("BATCH_TRANSLATION", "0") == "1"
+        api_batch_size = int(os.getenv("BATCH_SIZE", "5"))
+        
+        # Log the settings
+        print(f"📑 Filter mode: {filter_mode}")
+        if extraction_workers > 1:
+            print(f"📑 Parallel extraction enabled: {extraction_workers} workers")
+        if batch_translation:
+            print(f"📑 Batch API calls enabled: {api_batch_size} chunks per batch")
+        
+        all_text = ' '.join(clean_html(chapter["body"]) for chapter in chapters)
+        print(f"📑 Processing {len(all_text):,} characters of text")
+        
+        # Apply smart filtering FIRST to check actual size needed
+        use_smart_filter = os.getenv("GLOSSARY_USE_SMART_FILTER", "1") == "1"
+        effective_text_size = len(all_text)
+        
+        filtered_text_cache = None
+        if use_smart_filter and custom_prompt:  # Only apply for AI extraction
+            print(f"📑 Smart filtering enabled - checking effective text size after filtering...")
+            # Perform filtering ONCE and reuse for chunking
+            filtered_sample, _ = self._filter_text_for_glossary(all_text, min_frequency)
+            filtered_text_cache = filtered_sample
+            effective_text_size = len(filtered_sample)
+            print(f"📑 Effective text size after filtering: {effective_text_size:,} chars (from {len(all_text):,})")
+        
+        # Check if we need to split into chunks based on EFFECTIVE size after filtering
+        if chapter_split_threshold > 0 and effective_text_size > chapter_split_threshold:
+            print(f"📑 Effective text exceeds {chapter_split_threshold:,} chars, will process in chunks...")
+            
+            # If using smart filter, we need to split the FILTERED text, not raw text
+            if use_smart_filter and custom_prompt:
+                # Split the filtered text into chunks (reuse cached filtered text)
+                filtered_text = filtered_text_cache if filtered_text_cache is not None else self._filter_text_for_glossary(all_text, min_frequency)[0]
+                chunks_to_process = []
+                
+                # Split filtered text into chunks of appropriate size
+                chunk_size = chapter_split_threshold
+                for i in range(0, len(filtered_text), chunk_size):
+                    chunk_text = filtered_text[i:i + chunk_size]
+                    chunks_to_process.append((len(chunks_to_process) + 1, chunk_text))
+                
+                print(f"📑 Split filtered text into {len(chunks_to_process)} chunks")
+                all_glossary_entries = []
+            else:
+                # Original logic for unfiltered text
+                all_glossary_entries = []
+                chunk_size = 0
+                chunk_chapters = []
+                chunks_to_process = []
+                
+                for idx, chapter in enumerate(chapters):
+                    if is_stop_requested():
+                        print("📑 ❌ Glossary generation stopped by user")
+                        return all_glossary_entries
+                    
+                    chapter_text = clean_html(chapter["body"])
+                    chunk_size += len(chapter_text)
+                    chunk_chapters.append(chapter)
+                    
+                    # Process chunk when it reaches threshold or last chapter
+                    if chunk_size >= chapter_split_threshold or idx == len(chapters) - 1:
+                        chunk_text = ' '.join(clean_html(ch["body"]) for ch in chunk_chapters)
+                        chunks_to_process.append((len(chunks_to_process) + 1, chunk_text))
+                        
+                        # Reset for next chunk
+                        chunk_size = 0
+                        chunk_chapters = []
+            
+            print(f"📑 Split into {len(chunks_to_process)} chunks for processing")
+            
+            # Batch toggle decides concurrency: ON => parallel API calls; OFF => strict sequential
+            if batch_translation and custom_prompt and len(chunks_to_process) > 1:
+                print(f"📑 Processing chunks in batch mode with {api_batch_size} chunks per batch...")
+                # Set fast mode for batch processing
+                os.environ["GLOSSARY_SKIP_ALL_VALIDATION"] = "1"
+                
+                # Use batch API calls for AI extraction
+                all_csv_lines = self._process_chunks_batch_api(
+                    chunks_to_process, custom_prompt, language, 
+                    min_frequency, max_names, max_titles, 
+                    output_dir, strip_honorifics, fuzzy_threshold, 
+                    filter_mode, api_batch_size, extraction_workers
+                )
+                
+                # Reset validation mode
+                os.environ["GLOSSARY_SKIP_ALL_VALIDATION"] = "0"
+                
+                print(f"📑 All chunks completed. Aggregated raw lines: {len(all_csv_lines)}")
+                
+                # Process all collected entries at once (even if empty)
+                # Add header so downstream steps can work uniformly
+                all_csv_lines.insert(0, "type,raw_name,translated_name")
+                
+                # Merge with any on-disk glossary first (to avoid overwriting user edits)
+                on_disk_path = os.path.join(output_dir, "glossary.csv")
+                if os.path.exists(on_disk_path):
+                    try:
+                        with open(on_disk_path, 'r', encoding='utf-8') as f:
+                            on_disk_content = f.read()
+                        all_csv_lines = self._merge_csv_entries(all_csv_lines, on_disk_content, strip_honorifics, language)
+                        print("📑 Merged with existing on-disk glossary")
+                    except Exception as e:
+                        print(f"⚠️ Failed to merge with existing on-disk glossary: {e}")
+                
+                # Apply filter mode if needed
+                if filter_mode == "only_with_honorifics":
+                    filtered = [all_csv_lines[0]]  # Keep header
+                    for line in all_csv_lines[1:]:
+                        parts = line.split(',', 2)
+                        if len(parts) >= 3 and parts[0] == "character":
+                            filtered.append(line)
+                    all_csv_lines = filtered
+                    print(f"📑 Filter applied: {len(all_csv_lines)-1} character entries with honorifics kept")
+                
+                # Apply fuzzy deduplication (deferred until after all chunks)
+                try:
+                    print(f"📑 Applying fuzzy deduplication (threshold: {fuzzy_threshold})...")
+                    all_csv_lines = self._deduplicate_glossary_with_fuzzy(all_csv_lines, fuzzy_threshold)
+                except Exception as e:
+                    print(f"⚠️ Deduplication error: {e} — continuing without dedup")
+                
+                # Sort by type and name
+                print(f"📑 Sorting glossary by type and name...")
+                header = all_csv_lines[0]
+                entries = all_csv_lines[1:]
+                if entries:
+                    entries.sort(key=lambda x: (0 if x.startswith('character,') else 1, x.split(',')[1].lower()))
+                all_csv_lines = [header] + entries
+                
+                # Save
+                # Check format preference
+                use_legacy_format = os.getenv('GLOSSARY_USE_LEGACY_CSV', '0') == '1'
+
+                if not use_legacy_format:
+                    # Convert to token-efficient format
+                    all_csv_lines = self._convert_to_token_efficient_format(all_csv_lines)
+
+                # Final sanitize to prevent stray headers
+                all_csv_lines = self._sanitize_final_glossary_lines(all_csv_lines, use_legacy_format)
+
+                # Save
+                csv_content = '\n'.join(all_csv_lines)
+                glossary_path = os.path.join(output_dir, "glossary.csv")
+                self._atomic_write_file(glossary_path, csv_content)
+                
+                # Verify file exists; fallback direct write if needed
+                if not os.path.exists(glossary_path):
+                    try:
+                        with open(glossary_path, 'w', encoding='utf-8') as f:
+                            f.write(csv_content)
+                        print("📑 Fallback write succeeded for glossary.csv")
+                    except Exception as e:
+                        print(f"❌ Failed to write glossary.csv: {e}")
+                
+                print(f"\n📑 ✅ GLOSSARY SAVED!")
+                print(f"📑 ✅ AI GLOSSARY SAVED!")
+                c_count, t_count, total = self._count_glossary_entries(all_csv_lines, use_legacy_format)
+                print(f"📑 Character entries: {c_count}")
+                print(f"📑 Term entries: {t_count}")
+                print(f"📑 Total entries: {total}")
+                
+                return self._parse_csv_to_dict(csv_content)
+            else:
+                # Strict sequential processing (one API call at a time)
+                _prev_defer = os.getenv("GLOSSARY_DEFER_SAVE")
+                _prev_filtered = os.getenv("_CHUNK_ALREADY_FILTERED")
+                _prev_force_disable = os.getenv("GLOSSARY_FORCE_DISABLE_SMART_FILTER")
+                os.environ["GLOSSARY_DEFER_SAVE"] = "1"
+                # Tell the extractor each chunk is already filtered to avoid re-running smart filter per chunk
+                os.environ["_CHUNK_ALREADY_FILTERED"] = "1"
+                os.environ["GLOSSARY_FORCE_DISABLE_SMART_FILTER"] = "1"
+                try:
+                    for chunk_idx, chunk_text in chunks_to_process:
+                        if is_stop_requested():
+                            break
+                        
+                        print(f"📑 Processing chunk {chunk_idx}/{len(chunks_to_process)} ({len(chunk_text):,} chars)...")
+                        
+                        if custom_prompt:
+                            chunk_glossary = self._extract_with_custom_prompt(
+                                custom_prompt, chunk_text, language, 
+                                min_frequency, max_names, max_titles, 
+                                None, output_dir,  # Don't pass existing glossary to chunks
+                                strip_honorifics, fuzzy_threshold, filter_mode
+                            )
+                        else:
+                            chunk_glossary = self._extract_with_patterns(
+                                chunk_text, language, min_frequency, 
+                                max_names, max_titles, batch_size, 
+                                None, output_dir,  # Don't pass existing glossary to chunks
+                                strip_honorifics, fuzzy_threshold, filter_mode
+                            )
+                        
+                        # Normalize to CSV lines and aggregate
+                        chunk_lines = []
+                        if isinstance(chunk_glossary, list):
+                            for line in chunk_glossary:
+                                if line and not line.startswith('type,'):
+                                    all_glossary_entries.append(line)
+                                    chunk_lines.append(line)
+                        else:
+                            for raw_name, translated_name in chunk_glossary.items():
+                                entry_type = "character" if self._has_honorific(raw_name) else "term"
+                                line = f"{entry_type},{raw_name},{translated_name}"
+                                all_glossary_entries.append(line)
+                                chunk_lines.append(line)
+                        
+                        # Incremental update
+                        try:
+                            self._incremental_update_glossary(output_dir, chunk_lines, strip_honorifics, language, filter_mode)
+                            print(f"📑 Incremental write: +{len(chunk_lines)} entries")
+                        except Exception as e2:
+                            print(f"⚠️ Incremental write failed: {e2}")
+                finally:
+                    if _prev_defer is None:
+                        if "GLOSSARY_DEFER_SAVE" in os.environ:
+                            del os.environ["GLOSSARY_DEFER_SAVE"]
+                    else:
+                        os.environ["GLOSSARY_DEFER_SAVE"] = _prev_defer
+                    if _prev_filtered is None:
+                        os.environ.pop("_CHUNK_ALREADY_FILTERED", None)
+                    else:
+                        os.environ["_CHUNK_ALREADY_FILTERED"] = _prev_filtered
+                    if _prev_force_disable is None:
+                        os.environ.pop("GLOSSARY_FORCE_DISABLE_SMART_FILTER", None)
+                    else:
+                        os.environ["GLOSSARY_FORCE_DISABLE_SMART_FILTER"] = _prev_force_disable
+            
+            # Build CSV from aggregated entries
+            csv_lines = ["type,raw_name,translated_name"] + all_glossary_entries
+            
+            # Merge with any provided existing glossary AND on-disk glossary to avoid overwriting
+            on_disk_path = os.path.join(output_dir, "glossary.csv")
+            merge_sources = []
+            if existing_glossary:
+                merge_sources.append(existing_glossary)
+            if os.path.exists(on_disk_path):
+                try:
+                    with open(on_disk_path, 'r', encoding='utf-8') as f:
+                        merge_sources.append(f.read())
+                    print("📑 Found existing on-disk glossary to merge")
+                except Exception as e:
+                    print(f"⚠️ Failed to read on-disk glossary for merging: {e}")
+            # Also merge the main on-disk glossary if it was present at start
+            if existing_glossary_content:
+                csv_lines = self._merge_csv_entries(csv_lines, existing_glossary_content, strip_honorifics, language)
+            for src in merge_sources:
+                csv_lines = self._merge_csv_entries(csv_lines, src, strip_honorifics, language)
+            
+            # Apply filter mode to final results
+            csv_lines = self._filter_csv_by_mode(csv_lines, filter_mode)
+            
+            # Apply fuzzy deduplication (deferred until after all chunks)
+            print(f"📑 Applying fuzzy deduplication (threshold: {fuzzy_threshold})...")
+            original_count = len(csv_lines) - 1
+            csv_lines = self._deduplicate_glossary_with_fuzzy(csv_lines, fuzzy_threshold)
+            deduped_count = len(csv_lines) - 1
+            if original_count > deduped_count:
+                print(f"📑 Removed {original_count - deduped_count} duplicate entries")
+            
+            # Sort by type and name
+            print(f"📑 Sorting glossary by type and name...")
+            header = csv_lines[0]
+            entries = csv_lines[1:]
+            entries.sort(key=lambda x: (0 if x.startswith('character,') else 1, x.split(',')[1].lower() if ',' in x else x.lower()))
+            csv_lines = [header] + entries
+            
+            # Token-efficient format if enabled
+            use_legacy_format = os.getenv('GLOSSARY_USE_LEGACY_CSV', '0') == '1'
+            if not use_legacy_format:
+                csv_lines = self._convert_to_token_efficient_format(csv_lines)
+            
+            # Final sanitize to prevent stray headers and section titles at end
+            csv_lines = self._sanitize_final_glossary_lines(csv_lines, use_legacy_format)
+            
+            try:
+                # Save
+                csv_content = '\n'.join(csv_lines)
+                glossary_path = os.path.join(output_dir, "glossary.csv")
+                self._atomic_write_file(glossary_path, csv_content)
+                
+                # Verify file exists; fallback direct write if needed
+                if not os.path.exists(glossary_path):
+                    try:
+                        with open(glossary_path, 'w', encoding='utf-8') as f:
+                            f.write(csv_content)
+                        print("📑 Fallback write succeeded for glossary.csv")
+                    except Exception as e:
+                        print(f"❌ Failed to write glossary.csv: {e}")
+            finally:
+                print(f"\n📑 ✅ CHUNKED GLOSSARY SAVED!")
+                print(f"📑 ✅ AI GLOSSARY SAVED!")
+                print(f"📑 File: {glossary_path}")
+                c_count, t_count, total = self._count_glossary_entries(csv_lines, use_legacy_format)
+                print(f"📑 Character entries: {c_count}")
+                print(f"📑 Term entries: {t_count}")
+                print(f"📑 Total entries: {total}")
+            
+            return self._parse_csv_to_dict(csv_content)
+        
+        # Original single-text processing
+        if custom_prompt:
+            return self._extract_with_custom_prompt(custom_prompt, all_text, language, 
+                                                   min_frequency, max_names, max_titles, 
+                                                   existing_glossary, output_dir, 
+                                                   strip_honorifics, fuzzy_threshold, filter_mode)
+        else:
+            return self._extract_with_patterns(all_text, language, min_frequency, 
+                                             max_names, max_titles, batch_size, 
+                                             existing_glossary, output_dir, 
+                                             strip_honorifics, fuzzy_threshold, filter_mode)
+
+        total_time = time.time() - total_start_time
+        print(f"\n📑 ========== GLOSSARY GENERATION COMPLETE ==========")
+        print(f"📑 Total time: {total_time:.1f}s")
+        print(f"📑 Performance breakdown:")
+        print(f"📑   - Extraction: {getattr(self, '_extraction_time', 0):.1f}s")
+        print(f"📑   - API calls: {getattr(self, '_api_time', 0):.1f}s")
+        print(f"📑   - Frequency checking: {getattr(self, '_freq_check_time', 0):.1f}s")
+        print(f"📑   - Deduplication: {getattr(self, '_dedup_time', 0):.1f}s")
+        print(f"📑   - File I/O: {getattr(self, '_io_time', 0):.1f}s")
+        print(f"📑 ================================================")
+        
+        return result  # This is the existing return statement
+
+    def _convert_to_token_efficient_format(self, csv_lines):
+        """Convert CSV lines to token-efficient format with sections and asterisks"""
+        if len(csv_lines) <= 1:
+            return csv_lines
+        
+        header = csv_lines[0]
+        entries = csv_lines[1:]
+        
+        # Group by type (only from valid CSV lines)
+        import re as _re
+        grouped = {}
+        for line in entries:
+            if not line.strip():
+                continue
+            # Only accept proper CSV rows: at least 3 fields and a sane type token
+            parts_full = [p.strip() for p in line.split(',')]
+            if len(parts_full) < 3:
+                continue
+            entry_type = parts_full[0].lower()
+            if not _re.match(r'^[a-z_]+$', entry_type):
+                continue
+            if entry_type not in grouped:
+                grouped[entry_type] = []
+            grouped[entry_type].append(line)
+        
+        # Rebuild with token-efficient format
+        result = []
+        result.append("Glossary: Characters, Terms, and Important Elements\n")
+        
+        # Process in order: character first, then term, then others
+        type_order = ['character', 'term'] + [t for t in grouped.keys() if t not in ['character', 'term']]
+        
+        for entry_type in type_order:
+            if entry_type not in grouped:
+                continue
+                
+            entries = grouped[entry_type]
+            
+            # Add section header
+            section_name = entry_type.upper() + 'S' if not entry_type.upper().endswith('S') else entry_type.upper()
+            result.append(f"=== {section_name} ===")
+            
+            # Add entries in new format
+            for line in entries:
+                parts = [p.strip() for p in line.split(',')]
+                if len(parts) >= 3:
+                    raw_name = parts[1]
+                    translated_name = parts[2]
+                    
+                    # Format: * TranslatedName (RawName)
+                    entry_line = f"* {translated_name} ({raw_name})"
+                    
+                    # Add gender if present and not Unknown
+                    if len(parts) > 3 and parts[3] and parts[3] != 'Unknown':
+                        entry_line += f" [{parts[3]}]"
+                    
+                    # Add any additional fields as description
+                    if len(parts) > 4:
+                        description = ', '.join(parts[4:])
+                        if description.strip():
+                            entry_line += f": {description}"
+                    
+                    result.append(entry_line)
+            
+            result.append("")  # Blank line between sections
+        
+        return result
+    
+    def _count_glossary_entries(self, lines, use_legacy_format=False):
+        """Return (char_count, term_count, total_count) for either format."""
+        if not lines:
+            return 0, 0, 0
+        if use_legacy_format:
+            data = lines[1:] if lines and lines[0].lower().startswith('type,raw_name') else lines
+            char_count = sum(1 for ln in data if ln.startswith('character,'))
+            term_count = sum(1 for ln in data if ln.startswith('term,'))
+            total = sum(1 for ln in data if ln and ',' in ln)
+            return char_count, term_count, total
+        # token-efficient
+        current = None
+        char_count = term_count = total = 0
+        for ln in lines:
+            s = ln.strip()
+            if s.startswith('=== ') and 'CHARACTER' in s.upper():
+                current = 'character'
+                continue
+            if s.startswith('=== ') and 'TERM' in s.upper():
+                current = 'term'
+                continue
+            if s.startswith('* '):
+                total += 1
+                if current == 'character':
+                    char_count += 1
+                elif current == 'term':
+                    term_count += 1
+        return char_count, term_count, total
+
+    def _sanitize_final_glossary_lines(self, lines, use_legacy_format=False):
+        """Remove stray CSV headers and normalize header placement before saving.
+        - In legacy CSV mode, ensure exactly one header at the very top.
+        - In token-efficient mode, remove any CSV header lines entirely.
+        """
+        header_norm = "type,raw_name,translated_name"
+        if not lines:
+            return lines
+        
+        if use_legacy_format:
+            sanitized = []
+            header_seen = False
+            for ln in lines:
+                txt = ln.strip()
+                if txt.lower().startswith("type,raw_name"):
+                    if not header_seen:
+                        sanitized.append(header_norm)
+                        header_seen = True
+                    # skip duplicates
+                else:
+                    sanitized.append(ln)
+            # ensure header at top
+            if sanitized and not sanitized[0].strip().lower().startswith("type,raw_name"):
+                sanitized.insert(0, header_norm)
+            return sanitized
+        else:
+            # remove any CSV header lines anywhere and duplicate top headers/sections
+            cleaned = []
+            glossary_header_seen = False
+            for i, ln in enumerate(lines):
+                txt = ln.strip()
+                low = txt.lower()
+                # Drop CSV headers
+                if low.startswith("type,raw_name"):
+                    continue
+                # Keep only the first main glossary header
+                if low.startswith("glossary:"):
+                    if glossary_header_seen:
+                        continue
+                    glossary_header_seen = True
+                    cleaned.append(ln)
+                    continue
+                # Remove bogus section like '=== GLOSSARY: ... ==='
+                if low.startswith("=== glossary:"):
+                    continue
+                cleaned.append(ln)
+            return cleaned
+    
+    def _process_chunks_batch_api(self, chunks_to_process, custom_prompt, language, 
+                                  min_frequency, max_names, max_titles, 
+                                  output_dir, strip_honorifics, fuzzy_threshold, 
+                                  filter_mode, api_batch_size, extraction_workers):
+        """Process chunks using batch API calls for AI extraction with thread delay"""
+        
+        print(f"📑 Using batch API mode with {api_batch_size} chunks per batch")
+        
+        # Ensure we defer saving and heavy merging when processing chunks
+        _prev_defer = os.getenv("GLOSSARY_DEFER_SAVE")
+        os.environ["GLOSSARY_DEFER_SAVE"] = "1"
+        
+        # Get thread submission delay
+        thread_delay = float(os.getenv("THREAD_SUBMISSION_DELAY_SECONDS", "0.5"))
+        if thread_delay > 0:
+            print(f"📑 Thread submission delay: {thread_delay}s between parallel calls")
+        
+        # CHANGE: Collect raw CSV lines instead of dictionary
+        all_csv_lines = []  # Collect all entries as CSV lines
+        total_chunks = len(chunks_to_process)
+        completed_chunks = 0
+        
+        # Ensure per-chunk smart filtering is disabled globally during batch processing
+        _prev_filtered = os.getenv("_CHUNK_ALREADY_FILTERED")
+        _prev_force_disable = os.getenv("GLOSSARY_FORCE_DISABLE_SMART_FILTER")
+        os.environ["_CHUNK_ALREADY_FILTERED"] = "1"
+        os.environ["GLOSSARY_FORCE_DISABLE_SMART_FILTER"] = "1"
+
+        # Process in API batches
+        for batch_start in range(0, len(chunks_to_process), api_batch_size):
+            if is_stop_requested():
+                break
+            
+            batch_end = min(batch_start + api_batch_size, len(chunks_to_process))
+            batch_chunks = chunks_to_process[batch_start:batch_end]
+            
+            print(f"📑 Processing API batch {batch_start//api_batch_size + 1}: chunks {batch_start+1}-{batch_end}")
+            
+            # Use ThreadPoolExecutor for parallel API calls within batch
+            # Batch mode: issue multiple API calls in parallel within each batch (one worker per chunk)
+            with ThreadPoolExecutor(max_workers=len(batch_chunks)) as executor:
+                futures = {}
+                last_submission_time = 0
+                
+                for chunk_idx, chunk_text in batch_chunks:
+                    if is_stop_requested():
+                        break
+                    
+                    # Apply thread submission delay
+                    if thread_delay > 0 and last_submission_time > 0:
+                        time_since_last = time.time() - last_submission_time
+                        if time_since_last < thread_delay:
+                            sleep_time = thread_delay - time_since_last
+                            print(f"🧵 Thread delay: {sleep_time:.1f}s for chunk {chunk_idx}")
+                            time.sleep(sleep_time)
+                    
+                    future = executor.submit(
+                        self._extract_with_custom_prompt,
+                        custom_prompt, chunk_text, language,
+                        min_frequency, max_names, max_titles,
+                        None, output_dir, strip_honorifics,
+                        fuzzy_threshold, filter_mode
+                    )
+                    futures[future] = chunk_idx
+                    last_submission_time = time.time()
+                
+                # Collect results
+                for future in as_completed(futures):
+                    if is_stop_requested():
+                        break
+                    
+                    try:
+                        chunk_glossary = future.result()
+                        print(f"📑 DEBUG: Chunk {futures[future]} returned type={type(chunk_glossary)}, len={len(chunk_glossary)}")
+
+                        # Normalize to CSV lines (without header)
+                        chunk_lines = []
+                        if isinstance(chunk_glossary, dict):
+                            for raw_name, translated_name in chunk_glossary.items():
+                                entry_type = "character" if self._has_honorific(raw_name) else "term"
+                                chunk_lines.append(f"{entry_type},{raw_name},{translated_name}")
+                        elif isinstance(chunk_glossary, list):
+                            for line in chunk_glossary:
+                                if line and not line.startswith('type,'):
+                                    chunk_lines.append(line)
+                        
+                        # Aggregate for end-of-run
+                        all_csv_lines.extend(chunk_lines)
+                        
+                        # Incremental update of glossary.csv in token-efficient format
+                        try:
+                            self._incremental_update_glossary(output_dir, chunk_lines, strip_honorifics, language, filter_mode)
+                            print(f"📑 Incremental write: +{len(chunk_lines)} entries")
+                        except Exception as e2:
+                            print(f"⚠️ Incremental write failed: {e2}")
+                        
+                        completed_chunks += 1
+                        
+                        # Print progress for GUI
+                        progress_percent = (completed_chunks / total_chunks) * 100
+                        print(f"📑 Progress: {completed_chunks}/{total_chunks} chunks ({progress_percent:.0f}%)")
+                        print(f"📑 Chunk {futures[future]} completed and aggregated")
+                        
+                    except Exception as e:
+                        print(f"⚠️ API call for chunk {futures[future]} failed: {e}")
+                        completed_chunks += 1
+                        progress_percent = (completed_chunks / total_chunks) * 100
+                        print(f"📑 Progress: {completed_chunks}/{total_chunks} chunks ({progress_percent:.0f}%)")
+            
+            # Add delay between API batches
+            if batch_end < len(chunks_to_process):
+                api_delay = float(os.getenv("SEND_INTERVAL_SECONDS", "2"))
+                print(f"⏱️ Waiting {api_delay}s before next API batch...")
+                time.sleep(api_delay)
+        
+        # CHANGE: Return CSV lines instead of dictionary
+        
+        # Restore per-chunk filter disabling envs
+        if _prev_filtered is None:
+            os.environ.pop("_CHUNK_ALREADY_FILTERED", None)
+        else:
+            os.environ["_CHUNK_ALREADY_FILTERED"] = _prev_filtered
+        if _prev_force_disable is None:
+            os.environ.pop("GLOSSARY_FORCE_DISABLE_SMART_FILTER", None)
+        else:
+            os.environ["GLOSSARY_FORCE_DISABLE_SMART_FILTER"] = _prev_force_disable
+
+        # Restore previous defer setting
+        if _prev_defer is None:
+            # Default back to not deferring if it wasn't set
+            if "GLOSSARY_DEFER_SAVE" in os.environ:
+                del os.environ["GLOSSARY_DEFER_SAVE"]
+        else:
+            os.environ["GLOSSARY_DEFER_SAVE"] = _prev_defer
+        
+        return all_csv_lines
+    
+    def _incremental_update_glossary(self, output_dir, chunk_lines, strip_honorifics, language, filter_mode):
+        """Incrementally update glossary.csv (token-efficient) using an on-disk CSV aggregator.
+        This keeps glossary.csv present and growing after each chunk while preserving
+        token-efficient format for the visible file.
+        """
+        if not chunk_lines:
+            return
+        # Paths
+        agg_path = os.path.join(output_dir, "glossary.incremental.csv")
+        vis_path = os.path.join(output_dir, "glossary.csv")
+        # Ensure output dir
+        os.makedirs(output_dir, exist_ok=True)
+        # Compose CSV with header for merging
+        new_csv_lines = ["type,raw_name,translated_name"] + chunk_lines
+        # Load existing aggregator content, if any
+        existing_csv = None
+        if os.path.exists(agg_path):
+            try:
+                with open(agg_path, 'r', encoding='utf-8') as f:
+                    existing_csv = f.read()
+            except Exception as e:
+                print(f"⚠️ Incremental: cannot read aggregator: {e}")
+        # Merge (exact merge, no fuzzy to keep this fast)
+        merged_csv_lines = self._merge_csv_entries(new_csv_lines, existing_csv or "", strip_honorifics, language)
+        # Optional filter mode
+        merged_csv_lines = self._filter_csv_by_mode(merged_csv_lines, filter_mode)
+        # Save aggregator (CSV)
+        self._atomic_write_file(agg_path, "\n".join(merged_csv_lines))
+        # Convert to token-efficient format for visible glossary.csv
+        token_lines = self._convert_to_token_efficient_format(merged_csv_lines)
+        token_lines = self._sanitize_final_glossary_lines(token_lines, use_legacy_format=False)
+        self._atomic_write_file(vis_path, "\n".join(token_lines))
+        if not os.path.exists(vis_path):
+            with open(vis_path, 'w', encoding='utf-8') as f:
+                f.write("\n".join(token_lines))
+
+    def _process_single_chunk(self, chunk_idx, chunk_text, custom_prompt, language,
+                             min_frequency, max_names, max_titles, batch_size,
+                             output_dir, strip_honorifics, fuzzy_threshold, filter_mode,
+                             already_filtered=False):
+        """Process a single chunk - wrapper for parallel execution"""
+        print(f"📑 Worker processing chunk {chunk_idx} ({len(chunk_text):,} chars)...")
+        
+        if custom_prompt:
+            # Pass flag to indicate if text is already filtered
+            os.environ["_CHUNK_ALREADY_FILTERED"] = "1" if already_filtered else "0"
+            _prev_defer = os.getenv("GLOSSARY_DEFER_SAVE")
+            os.environ["GLOSSARY_DEFER_SAVE"] = "1"
+            try:
+                result = self._extract_with_custom_prompt(
+                    custom_prompt, chunk_text, language, 
+                    min_frequency, max_names, max_titles, 
+                    None, output_dir,
+                    strip_honorifics, fuzzy_threshold, filter_mode
+                )
+            finally:
+                os.environ["_CHUNK_ALREADY_FILTERED"] = "0"  # Reset
+                if _prev_defer is None:
+                    if "GLOSSARY_DEFER_SAVE" in os.environ:
+                        del os.environ["GLOSSARY_DEFER_SAVE"]
+                else:
+                    os.environ["GLOSSARY_DEFER_SAVE"] = _prev_defer
+            return result
+        else:
+            return self._extract_with_patterns(
+                chunk_text, language, min_frequency, 
+                max_names, max_titles, batch_size, 
+                None, output_dir,
+                strip_honorifics, fuzzy_threshold, filter_mode
+            )
+    
+    def _apply_final_filter(self, entries, filter_mode):
+        """Apply final filtering based on mode to ensure only requested types are included"""
+        if filter_mode == "only_with_honorifics":
+            # Filter to keep only entries that look like they have honorifics
+            filtered = {}
+            for key, value in entries.items():
+                # Check if the key contains known honorific patterns
+                if self._has_honorific(key):
+                    filtered[key] = value
+            print(f"📑 Final filter: Kept {len(filtered)} entries with honorifics (from {len(entries)} total)")
+            return filtered
+        elif filter_mode == "only_without_honorifics":
+            # Filter to keep only entries without honorifics
+            filtered = {}
+            for key, value in entries.items():
+                if not self._has_honorific(key):
+                    filtered[key] = value
+            print(f"📑 Final filter: Kept {len(filtered)} entries without honorifics (from {len(entries)} total)")
+            return filtered
+        else:
+            return entries
+
+    def _looks_like_name(self, text):
+        """Check if text looks like a character name"""
+        if not text:
+            return False
+        
+        # Check for various name patterns
+        # Korean names (2-4 hangul characters)
+        if all(0xAC00 <= ord(char) <= 0xD7AF for char in text) and 2 <= len(text) <= 4:
+            return True
+        
+        # Japanese names (mix of kanji/kana, 2-6 chars)
+        has_kanji = any(0x4E00 <= ord(char) <= 0x9FFF for char in text)
+        has_kana = any((0x3040 <= ord(char) <= 0x309F) or (0x30A0 <= ord(char) <= 0x30FF) for char in text)
+        if (has_kanji or has_kana) and 2 <= len(text) <= 6:
+            return True
+        
+        # Chinese names (2-4 Chinese characters)
+        if all(0x4E00 <= ord(char) <= 0x9FFF for char in text) and 2 <= len(text) <= 4:
+            return True
+        
+        # English names (starts with capital, mostly letters)
+        if text[0].isupper() and sum(1 for c in text if c.isalpha()) >= len(text) * 0.8:
+            return True
+        
+        return False
+    
+    def _has_honorific(self, term):
+        """Check if a term contains an honorific using PatternManager's comprehensive list"""
+        if not term:
+            return False
+        
+        term_lower = term.lower()
+        
+        # Check all language honorifics from PatternManager
+        for language, honorifics_list in self.pattern_manager.CJK_HONORIFICS.items():
+            for honorific in honorifics_list:
+                # For romanized/English honorifics with spaces or dashes
+                if honorific.startswith(' ') or honorific.startswith('-'):
+                    if term_lower.endswith(honorific.lower()):
+                        return True
+                # For CJK honorifics (no separator)
+                else:
+                    if honorific in term:
+                        return True
+        
+        return False
+    
+    def _strip_all_honorifics(self, term, language='korean'):
+        """Strip all honorifics from a term using PatternManager's lists"""
+        if not term:
+            return term
+        
+        result = term
+        
+        # Get honorifics for the specific language and English romanizations
+        honorifics_to_strip = []
+        if language in self.pattern_manager.CJK_HONORIFICS:
+            honorifics_to_strip.extend(self.pattern_manager.CJK_HONORIFICS[language])
+        honorifics_to_strip.extend(self.pattern_manager.CJK_HONORIFICS.get('english', []))
+        
+        # Sort by length (longest first) to avoid partial matches
+        honorifics_to_strip.sort(key=len, reverse=True)
+        
+        # Strip honorifics
+        for honorific in honorifics_to_strip:
+            if honorific.startswith(' ') or honorific.startswith('-'):
+                # For romanized honorifics with separators
+                if result.lower().endswith(honorific.lower()):
+                    result = result[:-len(honorific)]
+            else:
+                # For CJK honorifics (no separator)
+                if result.endswith(honorific):
+                    result = result[:-len(honorific)]
+        
+        return result.strip()
+    
+    def _convert_to_csv_format(self, data):
+        """Convert various glossary formats to CSV string format with enforced 3 columns"""
+        csv_lines = ["type,raw_name,translated_name"]
+        
+        if isinstance(data, str):
+            # Already CSV string
+            if data.strip().startswith('type,raw_name'):
+                return data
+            # Try to parse as JSON
+            try:
+                data = json.loads(data)
+            except:
+                return data
+        
+        if isinstance(data, list):
+            for item in data:
+                if isinstance(item, dict):
+                    if 'type' in item and 'raw_name' in item:
+                        # Already in correct format
+                        line = f"{item['type']},{item['raw_name']},{item.get('translated_name', item['raw_name'])}"
+                        csv_lines.append(line)
+                    else:
+                        # Old format - default to 'term' type
+                        entry_type = 'term'
+                        raw_name = item.get('original_name', '')
+                        translated_name = item.get('name', raw_name)
+                        if raw_name and translated_name:
+                            csv_lines.append(f"{entry_type},{raw_name},{translated_name}")
+                            
+        elif isinstance(data, dict):
+            if 'entries' in data:
+                # Has metadata wrapper, extract entries
+                for original, translated in data['entries'].items():
+                    csv_lines.append(f"term,{original},{translated}")
+            else:
+                # Plain dictionary - default to 'term' type
+                for original, translated in data.items():
+                    csv_lines.append(f"term,{original},{translated}")
+        
+        return '\n'.join(csv_lines)
+    
+    def _parse_csv_to_dict(self, csv_content):
+        """Parse CSV content to dictionary for backward compatibility"""
+        result = {}
+        lines = csv_content.strip().split('\n')
+        
+        for line in lines[1:]:  # Skip header
+            if not line.strip():
+                continue
+            parts = [p.strip() for p in line.split(',')]
+            if len(parts) >= 3:
+                result[parts[1]] = parts[2]  # raw_name -> translated_name
+        
+        return result
+    
+    def _fuzzy_match(self, term1, term2, threshold=0.90):
+        """Check if two terms match using fuzzy matching"""
+        ratio = SequenceMatcher(None, term1.lower(), term2.lower()).ratio()
+        return ratio >= threshold
+
+    def _fuzzy_match_rapidfuzz(self, term_lower, text_lower, threshold, term_len):
+        """Use rapidfuzz library for MUCH faster fuzzy matching"""
+        from rapidfuzz import fuzz
+        
+        print(f"📑     Using RapidFuzz (C++ speed)...")
+        start_time = time.time()
+        
+        matches_count = 0
+        threshold_percent = threshold * 100  # rapidfuzz uses 0-100 scale
+        
+        # Can use smaller step because rapidfuzz is so fast
+        step = 1  # Check every position - rapidfuzz can handle it
+        
+        # Process text
+        for i in range(0, len(text_lower) - term_len + 1, step):
+            # Check stop flag every 10000 positions
+            if i > 0 and i % 10000 == 0:
+                if is_stop_requested():
+                    print(f"📑     RapidFuzz stopped at position {i}")
+                    return matches_count
+            
+            window = text_lower[i:i + term_len]
+            
+            # rapidfuzz is fast enough we can check every position
+            if fuzz.ratio(term_lower, window) >= threshold_percent:
+                matches_count += 1
+        
+        elapsed = time.time() - start_time
+        print(f"📑     RapidFuzz found {matches_count} matches in {elapsed:.2f}s")
+        return matches_count
+    
+    def _batch_compute_frequencies(self, terms, all_text, fuzzy_threshold=0.90, min_frequency=2):
+        """Compute frequencies for all terms at once - MUCH faster than individual checking"""
+        print(f"📑 Computing frequencies for {len(terms)} terms in batch mode...")
+        start_time = time.time()
+        
+        # Result dictionary
+        term_frequencies = {}
+        
+        # First pass: exact matching (very fast)
+        print(f"📑   Phase 1: Exact matching...")
+        text_lower = all_text.lower()
+        for term in terms:
+            if is_stop_requested():
+                return term_frequencies
+            term_lower = term.lower()
+            count = text_lower.count(term_lower)
+            term_frequencies[term] = count
+        
+        exact_time = time.time() - start_time
+        high_freq_terms = sum(1 for count in term_frequencies.values() if count >= min_frequency)
+        print(f"📑   Exact matching complete: {high_freq_terms}/{len(terms)} terms meet threshold ({exact_time:.1f}s)")
+        
+        # If fuzzy matching is disabled, we're done
+        if fuzzy_threshold >= 1.0:
+            return term_frequencies
+        
+        # Second pass: fuzzy matching ONLY for low-frequency terms
+        low_freq_terms = [term for term, count in term_frequencies.items() if count < min_frequency]
+        
+        if low_freq_terms:
+            print(f"📑   Phase 2: Fuzzy matching for {len(low_freq_terms)} low-frequency terms...")
+            
+            # Try to use RapidFuzz batch processing
+            try:
+                from rapidfuzz import process, fuzz
+                
+                # For very large texts, sample it for fuzzy matching
+                if len(text_lower) > 500000:
+                    print(f"📑   Text too large ({len(text_lower):,} chars), sampling for fuzzy matching...")
+                    # Sample every Nth character to reduce size
+                    sample_rate = max(1, len(text_lower) // 100000)
+                    sampled_text = text_lower[::sample_rate]
+                else:
+                    sampled_text = text_lower
+                
+                # Create chunks of text for fuzzy matching
+                chunk_size = 1000  # Process text in chunks
+                text_chunks = [sampled_text[i:i+chunk_size] for i in range(0, len(sampled_text), chunk_size//2)]  # Overlapping chunks
+                
+                print(f"📑   Processing {len(text_chunks)} text chunks...")
+                threshold_percent = fuzzy_threshold * 100
+                
+                # Process in batches to avoid memory issues
+                batch_size = 100  # Process 100 terms at a time
+                for batch_start in range(0, len(low_freq_terms), batch_size):
+                    if is_stop_requested():
+                        break
+                    
+                    batch_end = min(batch_start + batch_size, len(low_freq_terms))
+                    batch_terms = low_freq_terms[batch_start:batch_end]
+                    
+                    for term in batch_terms:
+                        if is_stop_requested():
+                            break
+                        
+                        # Quick fuzzy search in chunks
+                        fuzzy_count = 0
+                        for chunk in text_chunks[:50]:  # Limit to first 50 chunks for speed
+                            if fuzz.partial_ratio(term.lower(), chunk) >= threshold_percent:
+                                fuzzy_count += 1
+                        
+                        if fuzzy_count > 0:
+                            # Scale up based on sampling
+                            if len(text_lower) > 500000:
+                                fuzzy_count *= (len(text_lower) // len(sampled_text))
+                            term_frequencies[term] += fuzzy_count
+                    
+                    if (batch_end % 500 == 0) or (batch_end == len(low_freq_terms)):
+                        elapsed = time.time() - start_time
+                        print(f"📑   Processed {batch_end}/{len(low_freq_terms)} terms ({elapsed:.1f}s)")
+                
+            except ImportError:
+                print("📑   RapidFuzz not available, skipping fuzzy matching")
+        
+        total_time = time.time() - start_time
+        final_high_freq = sum(1 for count in term_frequencies.values() if count >= min_frequency)
+        print(f"📑 Batch frequency computation complete: {final_high_freq}/{len(terms)} terms accepted ({total_time:.1f}s)")
+        
+        return term_frequencies
+    
+    def _find_fuzzy_matches(self, term, text, threshold=0.90):
+        """Find fuzzy matches of a term in text using efficient method with parallel processing"""
+        start_time = time.time()
+        
+        term_lower = term.lower()
+        text_lower = text.lower()
+        term_len = len(term)
+        
+        # Only log for debugging if explicitly enabled
+        debug_search = os.getenv("GLOSSARY_DEBUG_SEARCH", "0") == "1"
+        if debug_search and len(text) > 100000:
+            print(f"📑     Searching for '{term}' in {len(text):,} chars (threshold: {threshold})")
+        
+        # Strategy 1: Use exact matching first for efficiency
+        exact_start = time.time()
+        matches_count = text_lower.count(term_lower)
+        exact_time = time.time() - exact_start
+        
+        if matches_count > 0:
+            if debug_search and len(text) > 100000:
+                print(f"📑     Found {matches_count} exact matches in {exact_time:.3f}s")
+            return matches_count
+        
+        # Strategy 2: Try rapidfuzz if available (much faster)
+        if matches_count == 0 and threshold < 1.0:
+            try:
+                from rapidfuzz import fuzz
+                return self._fuzzy_match_rapidfuzz(term_lower, text_lower, threshold, term_len)
+            except ImportError:
+                pass  # Fall back to parallel/sequential
+            
+            # Strategy 3: Fall back to parallel/sequential if rapidfuzz not available
+            # Check if parallel processing is enabled
+            extraction_workers = int(os.getenv("EXTRACTION_WORKERS", "1"))
+            
+            if extraction_workers > 1 and len(text) > 50000:  # Use parallel for large texts
+                return self._parallel_fuzzy_search(term_lower, text_lower, threshold, term_len, extraction_workers)
+            else:
+                return self._sequential_fuzzy_search(term_lower, text_lower, threshold, term_len)
+            # Check if parallel processing is enabled
+            extraction_workers = int(os.getenv("EXTRACTION_WORKERS", "1"))
+            
+            if extraction_workers > 1 and len(text) > 50000:  # Use parallel for large texts
+                return self._parallel_fuzzy_search(term_lower, text_lower, threshold, term_len, extraction_workers)
+            else:
+                return self._sequential_fuzzy_search(term_lower, text_lower, threshold, term_len)
+        
+        return matches_count
+    
+    def _parallel_fuzzy_search(self, term_lower, text_lower, threshold, term_len, num_workers):
+        """Parallel fuzzy search using ThreadPoolExecutor"""
+        print(f"📑     Starting parallel fuzzy search with {num_workers} workers...")
+        
+        text_len = len(text_lower)
+        matches_count = 0
+        
+        # Split text into overlapping chunks for parallel processing
+        chunk_size = max(text_len // num_workers, term_len * 100)
+        chunks = []
+        
+        for i in range(0, text_len, chunk_size):
+            # Add overlap to avoid missing matches at boundaries
+            end = min(i + chunk_size + term_len - 1, text_len)
+            chunks.append((i, text_lower[i:end]))
+        
+        print(f"📑     Split into {len(chunks)} chunks of ~{chunk_size:,} chars each")
+        
+        # Process chunks in parallel
+        with ThreadPoolExecutor(max_workers=num_workers) as executor:
+            futures = []
+            
+            for chunk_idx, (start_pos, chunk_text) in enumerate(chunks):
+                if is_stop_requested():
+                    return matches_count
+                
+                future = executor.submit(
+                    self._fuzzy_search_chunk,
+                    term_lower, chunk_text, threshold, term_len, chunk_idx, len(chunks)
+                )
+                futures.append(future)
+            
+            # Collect results
+            for future in as_completed(futures):
+                if is_stop_requested():
+                    executor.shutdown(wait=False)
+                    return matches_count
+                
+                try:
+                    chunk_matches = future.result()
+                    matches_count += chunk_matches
+                except Exception as e:
+                    print(f"📑     ⚠️ Chunk processing error: {e}")
+        
+        print(f"📑     Parallel fuzzy search found {matches_count} matches")
+        return matches_count
+    
+    def _fuzzy_search_chunk(self, term_lower, chunk_text, threshold, term_len, chunk_idx, total_chunks):
+        """Process a single chunk for fuzzy matches"""
+        chunk_matches = 0
+        
+        # Use a more efficient step size - no need to check every position
+        step = max(1, term_len // 3)  # Check every third of term length
+        
+        for i in range(0, len(chunk_text) - term_len + 1, step):
+            # Check stop flag periodically
+            if i > 0 and i % 1000 == 0:
+                if is_stop_requested():
+                    return chunk_matches
+            
+            window = chunk_text[i:i + term_len]
+            
+            # Use SequenceMatcher for fuzzy matching
+            if SequenceMatcher(None, term_lower, window).ratio() >= threshold:
+                chunk_matches += 1
+        
+        # Log progress for this chunk
+        if total_chunks > 1:
+            print(f"📑     Chunk {chunk_idx + 1}/{total_chunks} completed: {chunk_matches} matches")
+        
+        return chunk_matches
+    
+    def _sequential_fuzzy_search(self, term_lower, text_lower, threshold, term_len):
+        """Sequential fuzzy search (fallback for small texts or single worker)"""
+        print(f"📑     Starting sequential fuzzy search...")
+        fuzzy_start = time.time()
+        
+        matches_count = 0
+        
+        # More efficient step size
+        step = max(1, term_len // 3)
+        total_windows = (len(text_lower) - term_len + 1) // step
+        
+        print(f"📑     Checking ~{total_windows:,} windows with step size {step}")
+        
+        windows_checked = 0
+        for i in range(0, len(text_lower) - term_len + 1, step):
+            # Check stop flag frequently
+            if i > 0 and i % (step * 100) == 0:
+                if is_stop_requested():
+                    return matches_count
+                
+                # Progress log for very long operations
+                if windows_checked % 1000 == 0 and windows_checked > 0:
+                    elapsed = time.time() - fuzzy_start
+                    rate = windows_checked / elapsed if elapsed > 0 else 0
+                    eta = (total_windows - windows_checked) / rate if rate > 0 else 0
+                    print(f"📑     Progress: {windows_checked}/{total_windows} windows, {rate:.0f} w/s, ETA: {eta:.1f}s")
+            
+            window = text_lower[i:i + term_len]
+            if SequenceMatcher(None, term_lower, window).ratio() >= threshold:
+                matches_count += 1
+            
+            windows_checked += 1
+        
+        fuzzy_time = time.time() - fuzzy_start
+        print(f"📑     Sequential fuzzy search completed in {fuzzy_time:.2f}s, found {matches_count} matches")
+        
+        return matches_count
+    
+    def _fuzzy_match(self, term1, term2, threshold=0.90):
+        """Check if two terms match using fuzzy matching (unchanged)"""
+        ratio = SequenceMatcher(None, term1.lower(), term2.lower()).ratio()
+        return ratio >= threshold
+    
+    def _strip_honorific(self, term, language_hint='unknown'):
+        """Strip honorific from a term if present"""
+        if not term:
+            return term
+            
+        # Get honorifics for the detected language
+        honorifics_to_check = []
+        if language_hint in self.pattern_manager.CJK_HONORIFICS:
+            honorifics_to_check.extend(self.pattern_manager.CJK_HONORIFICS[language_hint])
+        honorifics_to_check.extend(self.pattern_manager.CJK_HONORIFICS.get('english', []))
+        
+        # Check and remove honorifics
+        for honorific in honorifics_to_check:
+            if honorific.startswith('-') or honorific.startswith(' '):
+                # English-style suffix
+                if term.endswith(honorific):
+                    return term[:-len(honorific)].strip()
+            else:
+                # CJK-style suffix (no separator)
+                if term.endswith(honorific):
+                    return term[:-len(honorific)]
+        
+        return term
+
+    def _translate_chunk_traditional(self, chunk_text, chunk_index, total_chunks, chapter_title=""):
+        """Simplified translation for traditional APIs (DeepL, Google Translate)"""
+        
+        print(f"📝 Using traditional translation API for chunk {chunk_index}/{total_chunks}")
+        
+        # Traditional APIs don't use complex prompts, just need the text
+        messages = []
+        
+        # Add minimal system context for language detection
+        profile = self.active_profile
+        if profile == 'korean':
+            lang_hint = "Translating from Korean to English"
+        elif profile == 'japanese':
+            lang_hint = "Translating from Japanese to English"
+        elif profile == 'chinese':
+            lang_hint = "Translating from Chinese to English"
+        else:
+            lang_hint = "Translating to English"
+        
+        messages.append({
+            "role": "system",
+            "content": lang_hint
+        })
+        
+        # For traditional APIs, we need to handle glossary differently
+        # Apply glossary terms as preprocessing if available
+        processed_text = chunk_text
+        
+        if hasattr(self, 'glossary_manager') and self.glossary_manager and self.glossary_manager.entries:
+            # Pre-process: Mark glossary terms with placeholders
+            glossary_placeholders = {}
+            placeholder_index = 0
+            
+            for entry in self.glossary_manager.entries:
+                source = entry.get('source', '')
+                target = entry.get('target', '')
+                
+                if source and target and source in processed_text:
+                    # Create unique placeholder
+                    placeholder = f"[[GLOSS_{placeholder_index}]]"
+                    glossary_placeholders[placeholder] = target
+                    processed_text = processed_text.replace(source, placeholder)
+                    placeholder_index += 1
+            
+            print(f"📚 Applied {len(glossary_placeholders)} glossary placeholders")
+        
+        # Add the text to translate
+        messages.append({
+            "role": "user",
+            "content": processed_text
+        })
+        
+        # Send to API
+        try:
+            response = self.client.send(messages)
+            
+            if response and response.content:
+                translated_text = response.content
+                
+                # Post-process: Replace placeholders with glossary terms
+                if 'glossary_placeholders' in locals():
+                    for placeholder, target in glossary_placeholders.items():
+                        translated_text = translated_text.replace(placeholder, target)
+                    print(f"✅ Restored {len(glossary_placeholders)} glossary terms")
+                
+                # Log detected language if available
+                if hasattr(response, 'usage') and response.usage:
+                    detected_lang = response.usage.get('detected_source_lang')
+                    if detected_lang:
+                        print(f"🔍 Detected source language: {detected_lang}")
+                
+                return translated_text
+            else:
+                print("❌ No translation received from traditional API")
+                return None
+                
+        except Exception as e:
+            print(f"❌ Traditional API translation error: {e}")
+            return None
+    
+    def _filter_text_for_glossary(self, text, min_frequency=2):
+        """Filter text to extract only meaningful content for glossary extraction"""
+        import re
+        from collections import Counter
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+        import time
+        
+        filter_start_time = time.time()
+        print(f"📑 Starting smart text filtering...")
+        print(f"📑 Input text size: {len(text):,} characters")
+        
+        # Clean HTML if present
+        print(f"📑 Step 1/7: Cleaning HTML tags...")
+        from bs4 import BeautifulSoup
+        soup = BeautifulSoup(text, 'html.parser')
+        clean_text = soup.get_text()
+        print(f"📑 Clean text size: {len(clean_text):,} characters")
+        
+        # Detect primary language for better filtering
+        print(f"📑 Step 2/7: Detecting primary language...")
+        def detect_primary_language(text_sample):
+            sample = text_sample[:1000]
+            korean_chars = sum(1 for char in sample if 0xAC00 <= ord(char) <= 0xD7AF)
+            japanese_kana = sum(1 for char in sample if (0x3040 <= ord(char) <= 0x309F) or (0x30A0 <= ord(char) <= 0x30FF))
+            chinese_chars = sum(1 for char in sample if 0x4E00 <= ord(char) <= 0x9FFF)
+            
+            if korean_chars > 50:
+                return 'korean'
+            elif japanese_kana > 20:
+                return 'japanese'
+            elif chinese_chars > 50 and japanese_kana < 10:
+                return 'chinese'
+            else:
+                return 'english'
+        
+        primary_lang = detect_primary_language(clean_text)
+        print(f"📑 Detected primary language: {primary_lang}")
+        
+        # Split into sentences for better context
+        print(f"📑 Step 3/7: Splitting text into sentences...")
+        sentences = re.split(r'[.!?。！？]+', clean_text)
+        print(f"📑 Found {len(sentences):,} sentences")
+        
+        # Extract potential terms (words/phrases that appear multiple times)
+        print(f"📑 Step 4/7: Setting up extraction patterns and exclusion rules...")
+        word_freq = Counter()
+        
+        # Pattern for detecting potential names/terms based on capitalization or special characters
+        # Korean names: 2-4 hangul characters WITHOUT honorifics
+        korean_pattern = r'[가-힣]{2,4}'
+        # Japanese names: kanji/hiragana/katakana combinations
+        japanese_pattern = r'[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]{2,6}'
+        # Chinese names: 2-4 Chinese characters
+        chinese_pattern = r'[\u4e00-\u9fff]{2,4}'
+        # English proper nouns: Capitalized words
+        english_pattern = r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b'
+        
+        # Combine patterns
+        combined_pattern = f'({korean_pattern}|{japanese_pattern}|{chinese_pattern}|{english_pattern})'
+        print(f"📑 Using combined regex pattern for {primary_lang} text")
+        
+        # Get honorifics and title patterns for the detected language
+        honorifics_to_exclude = set()
+        if primary_lang in self.pattern_manager.CJK_HONORIFICS:
+            honorifics_to_exclude.update(self.pattern_manager.CJK_HONORIFICS[primary_lang])
+        # Also add English romanizations
+        honorifics_to_exclude.update(self.pattern_manager.CJK_HONORIFICS.get('english', []))
+        
+        # Compile title patterns for the language
+        title_patterns = []
+        if primary_lang in self.pattern_manager.TITLE_PATTERNS:
+            for pattern in self.pattern_manager.TITLE_PATTERNS[primary_lang]:
+                title_patterns.append(re.compile(pattern))
+        
+        # Function to check if a term should be excluded
+        def should_exclude_term(term):
+            term_lower = term.lower()
+            
+            # Check if it's a common word
+            if term in self.pattern_manager.COMMON_WORDS or term_lower in self.pattern_manager.COMMON_WORDS:
+                return True
+            
+            # Check if it contains honorifics
+            for honorific in honorifics_to_exclude:
+                if honorific in term or (honorific.startswith('-') and term.endswith(honorific[1:])):
+                    return True
+            
+            # Check if it matches title patterns
+            for pattern in title_patterns:
+                if pattern.search(term):
+                    return True
+            
+            # Check if it's a number (including Chinese numbers)
+            if term in self.pattern_manager.CHINESE_NUMS:
+                return True
+            
+            # Check if it's just digits
+            if term.isdigit():
+                return True
+            
+            return False
+        
+        # Extract potential terms from each sentence
+        print(f"📑 Step 5/7: Extracting and filtering terms from sentences...")
+        
+        # Check if we should use parallel processing
+        extraction_workers = int(os.getenv("EXTRACTION_WORKERS", "1"))
+        # Auto-detect optimal workers if not set
+        if extraction_workers == 1 and len(sentences) > 1000:
+            # Use more cores for better parallelization
+            cpu_count = os.cpu_count() or 4
+            extraction_workers = min(cpu_count, 12)  # Use up to 12 cores
+            print(f"📑 Auto-detected {cpu_count} CPU cores, using {extraction_workers} workers")
+        
+        use_parallel = extraction_workers > 1 and len(sentences) > 100
+        
+        if use_parallel:
+            print(f"📑 Using parallel processing with {extraction_workers} workers")
+            print(f"📑 Estimated speedup: {extraction_workers}x faster")
+        
+        important_sentences = []
+        seen_contexts = set()
+        processed_count = 0
+        total_sentences = len(sentences)
+        last_progress_time = time.time()
+        
+        def process_sentence_batch(batch_sentences, batch_idx):
+            """Process a batch of sentences"""
+            local_word_freq = Counter()
+            local_important = []
+            local_seen = set()
+            
+            for sentence in batch_sentences:
+                sentence = sentence.strip()
+                if len(sentence) < 10 or len(sentence) > 500:
+                    continue
+                    
+                # Find all potential terms in this sentence
+                matches = re.findall(combined_pattern, sentence)
+                
+                if matches:
+                    # Filter out excluded terms
+                    filtered_matches = []
+                    for match in matches:
+                        if not should_exclude_term(match):
+                            local_word_freq[match] += 1
+                            filtered_matches.append(match)
+                    
+                    # Keep sentences with valid potential terms
+                    if filtered_matches:
+                        sentence_key = ' '.join(sorted(filtered_matches))
+                        if sentence_key not in local_seen:
+                            local_important.append(sentence)
+                            local_seen.add(sentence_key)
+            
+            return local_word_freq, local_important, local_seen, batch_idx
+        
+        if use_parallel:
+            # Force SMALL batches for real parallelization
+            # We want MANY small batches, not few large ones!
+            
+            # Calculate based on total sentences
+            total_sentences = len(sentences)
+            
+            if total_sentences < 1000:
+                # Small dataset: 50-100 sentences per batch
+                optimal_batch_size = 100
+            elif total_sentences < 10000:
+                # Medium dataset: 200 sentences per batch
+                optimal_batch_size = 200
+            elif total_sentences < 50000:
+                # Large dataset: 300 sentences per batch
+                optimal_batch_size = 300
+            else:
+                # Very large dataset: 400 sentences per batch max
+                optimal_batch_size = 400
+            
+            # Ensure we have enough batches for all workers
+            min_batches = extraction_workers * 3  # At least 3 batches per worker
+            max_batch_size = max(50, total_sentences // min_batches)
+            optimal_batch_size = min(optimal_batch_size, max_batch_size)
+            
+            print(f"📑 Total sentences: {total_sentences:,}")
+            print(f"📑 Target batch size: {optimal_batch_size} sentences")
+            
+            # Calculate expected number of batches
+            expected_batches = (total_sentences + optimal_batch_size - 1) // optimal_batch_size
+            print(f"📑 Expected batches: {expected_batches} (for {extraction_workers} workers)")
+            print(f"📑 Batches per worker: ~{expected_batches // extraction_workers} batches")
+            
+            batches = [sentences[i:i + optimal_batch_size] for i in range(0, len(sentences), optimal_batch_size)]
+            print(f"📑 Processing {len(batches)} batches of ~{optimal_batch_size} sentences each")
+            print(f"📑 Expected speedup: {min(extraction_workers, len(batches))}x (using {extraction_workers} workers)")
+            
+            # Decide between ThreadPoolExecutor and ProcessPoolExecutor
+            import multiprocessing
+            in_subprocess = multiprocessing.current_process().name != 'MainProcess'
+            
+            # Use ProcessPoolExecutor for better parallelism on larger datasets
+            use_process_pool = (not in_subprocess and len(sentences) > 5000)
+            
+            if use_process_pool:
+                print(f"📑 Using ProcessPoolExecutor for maximum performance (true parallelism)")
+                executor_class = ProcessPoolExecutor
+            else:
+                print(f"📑 Using ThreadPoolExecutor for sentence processing")
+                executor_class = ThreadPoolExecutor
+            
+            with executor_class(max_workers=extraction_workers) as executor:
+                futures = []
+                
+                # Prepare data for ProcessPoolExecutor if needed
+                if use_process_pool:
+                    # Serialize exclusion check data for process pool
+                    exclude_check_data = (
+                        list(honorifics_to_exclude),
+                        [p.pattern for p in title_patterns],  # Convert regex to strings
+                        self.pattern_manager.COMMON_WORDS,
+                        self.pattern_manager.CHINESE_NUMS
+                    )
+                
+                for idx, batch in enumerate(batches):
+                    if use_process_pool:
+                        # Use module-level function for ProcessPoolExecutor
+                        future = executor.submit(_process_sentence_batch_for_extraction, 
+                                               (batch, idx, combined_pattern, exclude_check_data))
+                    else:
+                        # Use local function for ThreadPoolExecutor
+                        future = executor.submit(process_sentence_batch, batch, idx)
+                    
+                    futures.append(future)
+                    # Yield to GUI when submitting futures
+                    if idx % 10 == 0:
+                        time.sleep(0.001)
+                
+                # Collect results with progress
+                completed_batches = 0
+                batch_start_time = time.time()
+                for future in as_completed(futures):
+                    # Get result without timeout - as_completed already handles waiting
+                    local_word_freq, local_important, local_seen, batch_idx = future.result()
+                    
+                    # Merge results
+                    word_freq.update(local_word_freq)
+                    for sentence in local_important:
+                        sentence_key = ' '.join(sorted(re.findall(combined_pattern, sentence)))
+                        if sentence_key not in seen_contexts:
+                            important_sentences.append(sentence)
+                            seen_contexts.add(sentence_key)
+                    
+                    processed_count += len(batches[batch_idx])
+                    completed_batches += 1
+                    
+                    # Show progress every 10 batches or at key milestones
+                    if completed_batches % 10 == 0 or completed_batches == len(batches):
+                        progress = (processed_count / total_sentences) * 100
+                        elapsed = time.time() - batch_start_time
+                        rate = (processed_count / elapsed) if elapsed > 0 else 0
+                        print(f"📑 Progress: {processed_count:,}/{total_sentences:,} sentences ({progress:.1f}%) | Batch {completed_batches}/{len(batches)} | {rate:.0f} sent/sec")
+                    
+                    # Yield to GUI after each batch completes
+                    time.sleep(0.001)
+        else:
+            # Sequential processing with progress
+            for idx, sentence in enumerate(sentences):
+                sentence = sentence.strip()
+                if len(sentence) < 10 or len(sentence) > 500:
+                    continue
+                    
+                # Find all potential terms in this sentence
+                matches = re.findall(combined_pattern, sentence)
+                
+                if matches:
+                    # Filter out excluded terms
+                    filtered_matches = []
+                    for match in matches:
+                        if not should_exclude_term(match):
+                            word_freq[match] += 1
+                            filtered_matches.append(match)
+                    
+                    # Keep sentences with valid potential terms
+                    if filtered_matches:
+                        sentence_key = ' '.join(sorted(filtered_matches))
+                        if sentence_key not in seen_contexts:
+                            important_sentences.append(sentence)
+                            seen_contexts.add(sentence_key)
+                
+                # Show progress every 1000 sentences or 2 seconds
+                if idx % 1000 == 0 or (time.time() - last_progress_time > 2):
+                    progress = ((idx + 1) / total_sentences) * 100
+                    print(f"📑 Processing sentences: {idx + 1:,}/{total_sentences:,} ({progress:.1f}%)")
+                    last_progress_time = time.time()
+                    # Yield to GUI thread every 1000 sentences
+                    time.sleep(0.001)  # Tiny sleep to let GUI update
+                    # Yield to GUI thread every 1000 sentences
+                    time.sleep(0.001)  # Tiny sleep to let GUI update
+        
+        print(f"📑 Found {len(important_sentences):,} sentences with potential glossary terms")
+        
+        # Step 6/7: Deduplicate and normalize terms
+        print(f"📑 Step 6/7: Normalizing and deduplicating {len(word_freq):,} unique terms...")
+        
+        # Since should_exclude_term already filters honorifics, we just need to deduplicate
+        # based on normalized forms (lowercase, etc.)
+        combined_freq = Counter()
+        term_count = 0
+        
+        for term, count in word_freq.items():
+            # Normalize term for deduplication (but keep original form)
+            normalized = term.lower().strip()
+            
+            # Keep the version with highest count
+            if normalized in combined_freq:
+                # If we already have this normalized form, keep the one with higher count
+                if count > combined_freq[normalized]:
+                    # Remove old entry and add new one
+                    del combined_freq[normalized]
+                    combined_freq[term] = count
+            else:
+                combined_freq[term] = count
+            
+            term_count += 1
+            # Yield to GUI every 1000 terms
+            if term_count % 1000 == 0:
+                time.sleep(0.001)
+        
+        print(f"📑 Deduplicated to {len(combined_freq):,} unique terms")
+        
+        # Filter to keep only terms that appear at least min_frequency times
+        frequent_terms = {term: count for term, count in combined_freq.items() if count >= min_frequency}
+        
+        # Build filtered text focusing on sentences containing frequent terms
+        print(f"📑 Step 7/7: Building filtered text from relevant sentences...")
+        
+        # OPTIMIZATION: Skip sentences that already passed filtering in step 5
+        # These sentences already contain glossary terms, no need to check again!
+        # We just need to limit the sample size
+        
+        filtered_sentences = important_sentences  # Already filtered!
+        print(f"📑 Using {len(filtered_sentences):,} pre-filtered sentences (already contain glossary terms)")
+        
+        # For extremely large datasets, we can optionally do additional filtering
+        if len(filtered_sentences) > 10000 and len(frequent_terms) > 1000:
+            print(f"📑 Large dataset detected - applying frequency-based filtering...")
+            print(f"📑 Filtering {len(filtered_sentences):,} sentences for top frequent terms...")
+            
+            # Sort terms by frequency to prioritize high-frequency ones
+            sorted_terms = sorted(frequent_terms.items(), key=lambda x: x[1], reverse=True)
+            top_terms = dict(sorted_terms[:1000])  # Focus on top 1000 most frequent terms
+            
+            print(f"📑 Using top {len(top_terms):,} most frequent terms for final filtering")
+            
+            # Use parallel processing only if really needed
+            if use_parallel and len(filtered_sentences) > 5000:
+                import multiprocessing
+                in_subprocess = multiprocessing.current_process().name != 'MainProcess'
+                
+                # Create a simple set of terms for fast lookup (no variations needed)
+                term_set = set(top_terms.keys())
+                
+                print(f"📑 Using parallel filtering with {extraction_workers} workers...")
+                
+                # Optimize batch size
+                check_batch_size = 500  # Larger batches since we're doing simpler checks
+                check_batches = [filtered_sentences[i:i + check_batch_size] 
+                               for i in range(0, len(filtered_sentences), check_batch_size)]
+                
+                print(f"📑 Processing {len(check_batches)} batches of ~{check_batch_size} sentences")
+                
+                # Simple function to check if sentence contains any top term
+                def check_batch_simple(batch):
+                    result = []
+                    for sentence in batch:
+                        # Simple substring check - much faster than regex
+                        for term in term_set:
+                            if term in sentence:
+                                result.append(sentence)
+                                break
+                    return result
+                
+                new_filtered = []
+                with ThreadPoolExecutor(max_workers=extraction_workers) as executor:
+                    futures = [executor.submit(check_batch_simple, batch) for batch in check_batches]
+                    
+                    for future in as_completed(futures):
+                        new_filtered.extend(future.result())
+                
+                filtered_sentences = new_filtered
+                print(f"📑 Filtered to {len(filtered_sentences):,} sentences containing top terms")
+            else:
+                # For smaller datasets, simple sequential filtering
+                print(f"📑 Using sequential filtering...")
+                new_filtered = []
+                for i, sentence in enumerate(filtered_sentences):
+                    for term in top_terms:
+                        if term in sentence:
+                            new_filtered.append(sentence)
+                            break
+                    if i % 1000 == 0:
+                        print(f"📑 Progress: {i:,}/{len(filtered_sentences):,} sentences")
+                        time.sleep(0.001)
+                
+                filtered_sentences = new_filtered
+                print(f"📑 Filtered to {len(filtered_sentences):,} sentences containing top terms")
+        
+        print(f"📑 Selected {len(filtered_sentences):,} sentences containing frequent terms")
+        
+        # Limit the number of sentences to reduce token usage
+        max_sentences = int(os.getenv("GLOSSARY_MAX_SENTENCES", "200"))
+        if len(filtered_sentences) > max_sentences:
+            print(f"📑 Limiting to {max_sentences} representative sentences (from {len(filtered_sentences):,})")
+            # Take a representative sample
+            step = len(filtered_sentences) // max_sentences
+            filtered_sentences = filtered_sentences[::step][:max_sentences]
+        
+        filtered_text = ' '.join(filtered_sentences)
+        
+        # Calculate and display filtering statistics
+        filter_end_time = time.time()
+        filter_duration = filter_end_time - filter_start_time
+        
+        original_length = len(clean_text)
+        filtered_length = len(filtered_text)
+        reduction_percent = ((original_length - filtered_length) / original_length * 100) if original_length > 0 else 0
+        
+        print(f"\n📑 === FILTERING COMPLETE ===")
+        print(f"📑 Duration: {filter_duration:.1f} seconds")
+        print(f"📑 Text reduction: {original_length:,} → {filtered_length:,} chars ({reduction_percent:.1f}% reduction)")
+        print(f"📑 Terms found: {len(frequent_terms):,} unique terms (min frequency: {min_frequency})")
+        print(f"📑 Final output: {len(filtered_sentences)} sentences, {filtered_length:,} characters")
+        print(f"📑 Performance: {(original_length / filter_duration / 1000):.1f}K chars/second")
+        print(f"📑 ========================\n")
+        
+        return filtered_text, frequent_terms
+    
+    def _extract_with_custom_prompt(self, custom_prompt, all_text, language, 
+                                   min_frequency, max_names, max_titles, 
+                                   existing_glossary, output_dir, 
+                                   strip_honorifics=True, fuzzy_threshold=0.90, filter_mode='all'):
+        """Extract glossary using custom AI prompt with proper filtering"""
+        print("📑 Using custom automatic glossary prompt")
+        extraction_start = time.time()
+        
+        # Check stop flag
+        if is_stop_requested():
+            print("📑 ❌ Glossary extraction stopped by user")
+            return {}
+        
+        # Note: Filter mode can be controlled via the configurable prompt environment variable
+        # No hardcoded filter instructions are added here
+        
+        try:
+            MODEL = os.getenv("MODEL", "gemini-2.0-flash")
+            API_KEY = (os.getenv("API_KEY") or 
+                       os.getenv("OPENAI_API_KEY") or 
+                       os.getenv("OPENAI_OR_Gemini_API_KEY") or
+                       os.getenv("GEMINI_API_KEY"))
+            
+            if is_traditional_translation_api(MODEL):
+                return self._translate_chunk_traditional(chunk_text, chunk_index, total_chunks, chapter_title)
+            
+            elif not API_KEY:
+                print(f"📑 No API key found, falling back to pattern-based extraction")
+                return self._extract_with_patterns(all_text, language, min_frequency, 
+                                                 max_names, max_titles, 50,
+                                                 existing_glossary, output_dir, 
+                                                 strip_honorifics, fuzzy_threshold, filter_mode)
+            else:
+                print(f"📑 Using AI-assisted extraction with custom prompt")
+                
+                from unified_api_client import UnifiedClient, UnifiedClientError
+                client = UnifiedClient(model=MODEL, api_key=API_KEY, output_dir=output_dir)
+                if hasattr(client, 'reset_cleanup_state'):
+                    client.reset_cleanup_state()
+                
+                # Apply thread submission delay using the client's method
+                thread_delay = float(os.getenv("THREAD_SUBMISSION_DELAY_SECONDS", "0.5"))
+                if thread_delay > 0:
+                    client._apply_thread_submission_delay()
+                    
+                    # Check if cancelled during delay
+                    if hasattr(client, '_cancelled') and client._cancelled:
+                        print("📑 ❌ Glossary extraction stopped during delay")
+                        return {}
+                    
+                # Check if text is already filtered (from chunking)
+                already_filtered = os.getenv("_CHUNK_ALREADY_FILTERED", "0") == "1"
+                
+                if already_filtered:
+                    print("📑 Text already filtered during chunking, skipping re-filtering")
+                    text_sample = all_text  # Use as-is since it's already filtered
+                    detected_terms = {}
+                else:
+                    # Apply smart filtering to reduce noise and focus on meaningful content
+                    force_disable = os.getenv("GLOSSARY_FORCE_DISABLE_SMART_FILTER", "0") == "1"
+                    use_smart_filter = (os.getenv("GLOSSARY_USE_SMART_FILTER", "1") == "1") and not force_disable
+                    
+                    if use_smart_filter:
+                        print("📑 Applying smart text filtering to reduce noise...")
+                        text_sample, detected_terms = self._filter_text_for_glossary(all_text, min_frequency)
+                    else:
+                        print("📑 Smart filter disabled - using raw text sample")
+                        # Fallback to simple truncation
+                        max_text_size = int(os.getenv("GLOSSARY_MAX_TEXT_SIZE", "50000"))
+                        text_sample = all_text[:max_text_size] if len(all_text) > max_text_size and max_text_size > 0 else all_text
+                        detected_terms = {}
+                
+                # Replace placeholders in prompt
+                prompt = custom_prompt.replace('{language}', language)
+                prompt = prompt.replace('{min_frequency}', str(min_frequency))
+                prompt = prompt.replace('{max_names}', str(max_names))
+                prompt = prompt.replace('{max_titles}', str(max_titles))
+                
+                # Get the format instructions from environment variable
+                format_instructions = os.getenv("GLOSSARY_FORMAT_INSTRUCTIONS", "")
+                
+                # If no format instructions are provided, use a default
+                if not format_instructions:
+                    format_instructions = """
+Return the results in EXACT CSV format with this header:
+type,raw_name,translated_name
+
+For example:
+character,김상현,Kim Sang-hyu
+character,갈편제,Gale Hardest  
+character,디히릿 아데,Dihirit Ade
+
+Only include entries that actually appear in the text.
+Do not use quotes around values unless they contain commas.
+
+Text to analyze:
+{text_sample}"""
+                
+                # Replace placeholders in format instructions
+                format_instructions = format_instructions.replace('{text_sample}', text_sample)
+                
+                # Combine the user's prompt with format instructions
+                enhanced_prompt = f"{prompt}\n\n{format_instructions}"
+                
+                messages = [
+                    {"role": "system", "content": "You are a glossary extraction assistant. Return ONLY CSV format with exactly 3 columns: type,raw_name,translated_name. The 'type' column should classify entries (e.g., character, term, location, etc.)."},
+                    {"role": "user", "content": enhanced_prompt}
+                ]
+                
+                # Check stop before API call
+                if is_stop_requested():
+                    print("📑 ❌ Glossary extraction stopped before API call")
+                    return {}
+                
+                try:
+                    temperature = float(os.getenv("TEMPERATURE", "0.3"))
+                    max_tokens = int(os.getenv("MAX_OUTPUT_TOKENS", "4096"))
+                    
+                    # Use send_with_interrupt for interruptible API call
+                    chunk_timeout = int(os.getenv("CHUNK_TIMEOUT", "900"))  # 15 minute default for glossary
+                    print(f"📑 Sending AI extraction request (timeout: {chunk_timeout}s, interruptible)...")
+                    
+                    # Before API call
+                    api_start = time.time()
+                    print(f"📑 Preparing API request (text size: {len(text_sample):,} chars)...")
+                    print(f"📑 ⏳ Processing {len(text_sample):,} characters... Please wait, this may take 5-10 minutes")
+
+                    response = send_with_interrupt(
+                        messages=messages,
+                        client=client,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        stop_check_fn=is_stop_requested,
+                        chunk_timeout=chunk_timeout
+                    )
+                    api_time = time.time() - api_start
+                    print(f"📑 API call completed in {api_time:.1f}s")
+    
+                    # Get the actual text from the response
+                    if hasattr(response, 'content'):
+                        response_text = response.content
+                    else:
+                        response_text = str(response)
+
+                    # Before processing response
+                    process_start = time.time()
+                    print(f"📑 Processing AI response...")
+                  
+                    # Process response and build CSV
+                    csv_lines = self._process_ai_response(response_text, all_text, min_frequency, 
+                                                         strip_honorifics, fuzzy_threshold, 
+                                                         language, filter_mode)
+                    
+                    print(f"📑 AI extracted {len(csv_lines) - 1} valid terms (header excluded)")
+
+                    process_time = time.time() - process_start
+                    print(f"📑 Response processing took {process_time:.1f}s")
+                    
+                    # If we're running per-chunk, defer all heavy work and saving
+                    if os.getenv("GLOSSARY_DEFER_SAVE", "0") == "1":
+                        return csv_lines
+                    
+                    # Check stop before merging
+                    if is_stop_requested():
+                        print("📑 ❌ Glossary generation stopped before merging")
+                        return {}
+                    
+                    # Merge with existing glossary if present
+                    if existing_glossary:
+                        csv_lines = self._merge_csv_entries(csv_lines, existing_glossary, strip_honorifics, language)
+
+                    # Fuzzy matching deduplication
+                    skip_frequency_check = os.getenv("GLOSSARY_SKIP_FREQUENCY_CHECK", "0") == "1"
+                    if not skip_frequency_check:  # Only dedupe if we're checking frequencies
+                        # Time the deduplication
+                        dedup_start = time.time()
+                        original_count = len(csv_lines) - 1  # Exclude header
+                        
+                        csv_lines = self._deduplicate_glossary_with_fuzzy(csv_lines, fuzzy_threshold)
+                        
+                        dedup_time = time.time() - dedup_start
+                        final_count = len(csv_lines) - 1  # Exclude header
+                        removed_count = original_count - final_count
+                        
+                        print(f"📑 Deduplication completed in {dedup_time:.1f}s")
+                        print(f"📑   - Original entries: {original_count}")
+                        print(f"📑   - Duplicates removed: {removed_count}")
+                        print(f"📑   - Final entries: {final_count}")
+                        
+                        # Store for summary statistics
+                        self._dedup_time = getattr(self, '_dedup_time', 0) + dedup_time
+                    else:
+                        print(f"📑 Skipping deduplication (frequency check disabled)")
+                    
+                    # Apply filter mode to final results
+                    csv_lines = self._filter_csv_by_mode(csv_lines, filter_mode)
+                    
+                    # Check if we should use token-efficient format
+                    use_legacy_format = os.getenv('GLOSSARY_USE_LEGACY_CSV', '0') == '1'
+
+                    if not use_legacy_format:
+                        # Convert to token-efficient format
+                        csv_lines = self._convert_to_token_efficient_format(csv_lines)
+                    
+                    # Final sanitize to prevent stray headers
+                    csv_lines = self._sanitize_final_glossary_lines(csv_lines, use_legacy_format)
+                    
+                    # Create final CSV content
+                    csv_content = '\n'.join(csv_lines)
+                    
+                    # Save glossary as CSV with proper extension
+                    glossary_path = os.path.join(output_dir, "glossary.csv")
+                    self._atomic_write_file(glossary_path, csv_content)
+                    
+                    print(f"\n📑 ✅ AI-ASSISTED GLOSSARY SAVED!")
+                    print(f"📑 File: {glossary_path}")
+                    c_count, t_count, total = self._count_glossary_entries(csv_lines, use_legacy_format)
+                    print(f"📑 Character entries: {c_count}")
+                    print(f"📑 Term entries: {t_count}")
+                    print(f"📑 Total entries: {total}")
+                    total_time = time.time() - extraction_start
+                    print(f"📑 Total extraction time: {total_time:.1f}s")
+                    return self._parse_csv_to_dict(csv_content)
+                    
+                except UnifiedClientError as e:
+                    if "stopped by user" in str(e).lower():
+                        print(f"📑 ❌ AI extraction interrupted by user")
+                        return {}
+                    else:
+                        print(f"⚠️ AI extraction failed: {e}")
+                        print("📑 Falling back to pattern-based extraction")
+                        return self._extract_with_patterns(all_text, language, min_frequency, 
+                                                         max_names, max_titles, 50,
+                                                         existing_glossary, output_dir, 
+                                                         strip_honorifics, fuzzy_threshold, filter_mode)
+                except Exception as e:
+                    print(f"⚠️ AI extraction failed: {e}")
+                    import traceback
+                    traceback.print_exc()
+                    print("📑 Falling back to pattern-based extraction")
+                    return self._extract_with_patterns(all_text, language, min_frequency, 
+                                                     max_names, max_titles, 50,
+                                                     existing_glossary, output_dir, 
+                                                     strip_honorifics, fuzzy_threshold, filter_mode)
+                    
+        except Exception as e:
+            print(f"⚠️ Custom prompt processing failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return self._extract_with_patterns(all_text, language, min_frequency, 
+                                             max_names, max_titles, 50, 
+                                             existing_glossary, output_dir, 
+                                             strip_honorifics, fuzzy_threshold, filter_mode)
+    
+    def _filter_csv_by_mode(self, csv_lines, filter_mode):
+        """Filter CSV lines based on the filter mode"""
+        if filter_mode == "all":
+            return csv_lines
+        
+        filtered = [csv_lines[0]]  # Keep header
+        
+        for line in csv_lines[1:]:
+            if not line.strip():
+                continue
+            
+            parts = [p.strip() for p in line.split(',')]
+            if len(parts) < 3:
+                continue
+            
+            entry_type = parts[0].lower()
+            raw_name = parts[1]
+            
+            if filter_mode == "only_with_honorifics":
+                # Only keep character entries with honorifics
+                if entry_type == "character" and self._has_honorific(raw_name):
+                    filtered.append(line)
+            elif filter_mode == "only_without_honorifics":
+                # Keep terms and characters without honorifics
+                if entry_type == "term" or (entry_type == "character" and not self._has_honorific(raw_name)):
+                    filtered.append(line)
+        
+        print(f"📑 Filter '{filter_mode}': {len(filtered)-1} entries kept from {len(csv_lines)-1}")
+        return filtered
+    
+    def _process_ai_response(self, response_text, all_text, min_frequency, 
+                           strip_honorifics, fuzzy_threshold, language, filter_mode):
+        """Process AI response and return CSV lines"""
+
+        # option to completely skip frequency validation for speed
+        skip_all_validation = os.getenv("GLOSSARY_SKIP_ALL_VALIDATION", "0") == "1"
+
+        if skip_all_validation:
+            print("📑 ⚡ FAST MODE: Skipping all frequency validation (accepting all AI results)")
+    
+        # Clean response text
+        response_text = response_text.strip()
+        
+        # Remove string representation artifacts if they wrap the entire response
+        if response_text.startswith('("') and response_text.endswith('")'):
+            response_text = response_text[2:-2]
+        elif response_text.startswith('"') and response_text.endswith('"'):
+            response_text = response_text[1:-1]
+        elif response_text.startswith('(') and response_text.endswith(')'):
+            response_text = response_text[1:-1]
+        
+        # Unescape the string
+        response_text = response_text.replace('\\n', '\n')
+        response_text = response_text.replace('\\r', '')
+        response_text = response_text.replace('\\t', '\t')
+        response_text = response_text.replace('\\"', '"')
+        response_text = response_text.replace("\\'", "'")
+        response_text = response_text.replace('\\\\', '\\')
+        
+        # Clean up markdown code blocks if present
+        if '```' in response_text:
+            parts = response_text.split('```')
+            for part in parts:
+                if 'csv' in part[:10].lower():
+                    response_text = part[part.find('\n')+1:]
+                    break
+                elif part.strip() and ('type,raw_name' in part or 'character,' in part or 'term,' in part):
+                    response_text = part
+                    break
+        
+        # Normalize line endings
+        response_text = response_text.replace('\r\n', '\n').replace('\r', '\n')
+        lines = [line.strip() for line in response_text.strip().split('\n') if line.strip()]
+        
+        csv_lines = []
+        header_found = False
+        
+        # Check if we should skip frequency check
+        skip_frequency_check = os.getenv("GLOSSARY_SKIP_FREQUENCY_CHECK", "0") == "1"
+
+        # Add option to completely skip ALL validation for maximum speed
+        skip_all_validation = os.getenv("GLOSSARY_SKIP_ALL_VALIDATION", "0") == "1"
+        
+        if skip_all_validation:
+            print("📑 ⚡ FAST MODE: Skipping all frequency validation (accepting all AI results)")
+            
+            # Always use the enforced 3-column header
+            csv_lines.append("type,raw_name,translated_name")
+            
+            # Process the AI response
+            for line in lines:
+                # Skip header lines
+                if 'type' in line.lower() and 'raw_name' in line.lower():
+                    continue
+                    
+                # Parse CSV line
+                parts = [p.strip().strip('"\"') for p in line.split(',')]
+                
+                if len(parts) >= 3:
+                    # Has all 3 columns
+                    entry_type = parts[0]
+                    raw_name = parts[1]
+                    translated_name = parts[2]
+                    if raw_name and translated_name:
+                        csv_lines.append(f"{entry_type},{raw_name},{translated_name}")
+                elif len(parts) == 2:
+                    # Missing type, default to 'term'
+                    raw_name = parts[0]
+                    translated_name = parts[1]
+                    if raw_name and translated_name:
+                        csv_lines.append(f"term,{raw_name},{translated_name}")
+            
+            print(f"📑 Fast mode: Accepted {len(csv_lines) - 1} entries without validation")
+            return csv_lines
+        
+        # For "only_with_honorifics" mode, ALWAYS skip frequency check
+        if filter_mode == "only_with_honorifics":
+            skip_frequency_check = True
+            print("📑 Filter mode 'only_with_honorifics': Bypassing frequency checks")
+        
+        print(f"📑 Processing {len(lines)} lines from AI response...")
+        print(f"📑 Text corpus size: {len(all_text):,} chars")
+        print(f"📑 Frequency checking: {'DISABLED' if skip_frequency_check else f'ENABLED (min: {min_frequency})'}")  
+        print(f"📑 Fuzzy threshold: {fuzzy_threshold}")
+        
+        # Collect all terms first for batch processing
+        all_terms_to_check = []
+        term_info_map = {}  # Map term to its full info
+        
+        if not skip_frequency_check:
+            # First pass: collect all terms that need frequency checking
+            for line in lines:
+                if 'type' in line.lower() and 'raw_name' in line.lower():
+                    continue  # Skip header
+                
+                parts = [p.strip().strip('"\"') for p in line.split(',')]
+                if len(parts) >= 3:
+                    entry_type = parts[0].lower()
+                    raw_name = parts[1]
+                    translated_name = parts[2]
+                elif len(parts) == 2:
+                    entry_type = 'term'
+                    raw_name = parts[0]
+                    translated_name = parts[1]
+                else:
+                    continue
+                
+                if raw_name and translated_name:
+                    # Store for batch processing
+                    original_raw = raw_name
+                    if strip_honorifics:
+                        raw_name = self._strip_honorific(raw_name, language)
+                    
+                    all_terms_to_check.append(raw_name)
+                    term_info_map[raw_name] = {
+                        'entry_type': entry_type,
+                        'original_raw': original_raw,
+                        'translated_name': translated_name,
+                        'line': line
+                    }
+            
+            # Batch compute all frequencies at once
+            if all_terms_to_check:
+                print(f"📑 Computing frequencies for {len(all_terms_to_check)} terms...")
+                term_frequencies = self._batch_compute_frequencies(
+                    all_terms_to_check, all_text, fuzzy_threshold, min_frequency
+                )
+            else:
+                term_frequencies = {}
+
+        # Now process the results using pre-computed frequencies
+        entries_processed = 0
+        entries_accepted = 0
+        # Process based on mode
+        if filter_mode == "only_with_honorifics" or skip_frequency_check:
+            # For these modes, accept all entries
+            csv_lines.append("type,raw_name,translated_name")  # Header
+            for line in lines:
+                if 'type' in line.lower() and 'raw_name' in line.lower():
+                    continue  # Skip header
+                
+                parts = [p.strip().strip('"\"') for p in line.split(',')]
+                if len(parts) >= 3:
+                    entry_type = parts[0].lower()
+                    raw_name = parts[1]
+                    translated_name = parts[2]
+                elif len(parts) == 2:
+                    entry_type = 'term'
+                    raw_name = parts[0]
+                    translated_name = parts[1]
+                else:
+                    continue
+                
+                if raw_name and translated_name:
+                    csv_line = f"{entry_type},{raw_name},{translated_name}"
+                    csv_lines.append(csv_line)
+                    entries_accepted += 1
+            
+            print(f"📑 Accepted {entries_accepted} entries (frequency check disabled)")
+        
+        else:
+            # Use pre-computed frequencies
+            csv_lines.append("type,raw_name,translated_name")  # Header
+            
+            for term, info in term_info_map.items():
+                count = term_frequencies.get(term, 0)
+                
+                # Also check original form if it was stripped
+                if info['original_raw'] != term:
+                    count += term_frequencies.get(info['original_raw'], 0)
+                
+                if count >= min_frequency:
+                    csv_line = f"{info['entry_type']},{term},{info['translated_name']}"
+                    csv_lines.append(csv_line)
+                    entries_accepted += 1
+                    
+                    # Log first few examples
+                    if entries_accepted <= 5:
+                        print(f"📑   ✓ Example: {term} -> {info['translated_name']} (freq: {count})")
+            
+            print(f"📑 Frequency filtering complete: {entries_accepted}/{len(term_info_map)} terms accepted")
+        
+        # Ensure we have at least the header
+        if len(csv_lines) == 0:
+            csv_lines.append("type,raw_name,translated_name")
+        
+        # Print final summary
+        print(f"📑 Processing complete: {entries_accepted} terms accepted")
+        
+        return csv_lines
+    
+    def _deduplicate_glossary_with_fuzzy(self, csv_lines, fuzzy_threshold):
+        """Apply fuzzy matching to remove duplicate entries from the glossary with stop flag checks"""
+        from difflib import SequenceMatcher
+        
+        print(f"📑 Applying fuzzy deduplication (threshold: {fuzzy_threshold})...")
+        
+        # Check stop flag at start
+        if is_stop_requested():
+            print(f"📑 ❌ Deduplication stopped by user")
+            return csv_lines
+        
+        header_line = csv_lines[0]  # Keep header
+        entry_lines = csv_lines[1:]  # Data lines
+        
+        deduplicated = [header_line]
+        seen_entries = {}  # Use dict for O(1) lookups instead of list
+        seen_names_lower = set()  # Quick exact match check
+        removed_count = 0
+        total_entries = len(entry_lines)
+        
+        # Pre-process all entries for faster comparison
+        print(f"📑 Processing {total_entries} entries for deduplication...")
+        
+        for idx, line in enumerate(entry_lines):
+            # Check stop flag every 100 entries
+            if idx > 0 and idx % 100 == 0:
+                if is_stop_requested():
+                    print(f"📑 ❌ Deduplication stopped at entry {idx}/{total_entries}")
+                    return deduplicated
+            
+            # Show progress for large glossaries
+            if total_entries > 500 and idx % 200 == 0:
+                progress = (idx / total_entries) * 100
+                print(f"📑 Deduplication progress: {progress:.1f}% ({idx}/{total_entries})")
+            
+            if not line.strip():
+                continue
+                
+            parts = [p.strip() for p in line.split(',')]
+            if len(parts) < 3:
+                continue
+                
+            entry_type = parts[0]
+            raw_name = parts[1]
+            translated_name = parts[2]
+            raw_name_lower = raw_name.lower()
+            
+            # Fast exact duplicate check first
+            if raw_name_lower in seen_names_lower:
+                removed_count += 1
+                continue
+            
+            # For fuzzy matching, only check if threshold is less than 1.0
+            is_duplicate = False
+            if fuzzy_threshold < 1.0:
+                # Use a more efficient approach: only check similar length strings
+                name_len = len(raw_name)
+                min_len = int(name_len * 0.7)
+                max_len = int(name_len * 1.3)
+                
+                # Only compare with entries of similar length
+                candidates = []
+                for seen_name, (seen_type, seen_trans) in seen_entries.items():
+                    if min_len <= len(seen_name) <= max_len:
+                        candidates.append(seen_name)
+                
+                # Check fuzzy similarity with candidates
+                for seen_name in candidates:
+                    # Quick character overlap check before expensive SequenceMatcher
+                    char_overlap = len(set(raw_name_lower) & set(seen_name.lower()))
+                    if char_overlap < len(raw_name_lower) * 0.5:
+                        continue  # Too different, skip
+                    
+                    raw_similarity = SequenceMatcher(None, raw_name_lower, seen_name.lower()).ratio()
+                    
+                    if raw_similarity >= fuzzy_threshold:
+                        if removed_count < 10:  # Only log first few
+                            print(f"📑   Removing duplicate: '{raw_name}' ~= '{seen_name}' (similarity: {raw_similarity:.2%})")
+                        removed_count += 1
+                        is_duplicate = True
+                        break
+            
+            if not is_duplicate:
+                seen_entries[raw_name] = (entry_type, translated_name)
+                seen_names_lower.add(raw_name_lower)
+                deduplicated.append(line)
+        
+        print(f"📑 ✅ Removed {removed_count} duplicates from glossary")
+        print(f"📑 Final glossary size: {len(deduplicated) - 1} unique entries")
+        
+        return deduplicated
+ 
+    def _merge_csv_entries(self, new_csv_lines, existing_glossary, strip_honorifics, language):
+        """Merge CSV entries with existing glossary with stop flag checks"""
+        
+        # Check stop flag at start
+        if is_stop_requested():
+            print(f"📑 ❌ Glossary merge stopped by user")
+            return new_csv_lines
+        
+        # Parse existing glossary
+        existing_lines = []
+        existing_names = set()
+        
+        if isinstance(existing_glossary, str):
+            # Already CSV format
+            lines = existing_glossary.strip().split('\n')
+            total_lines = len(lines)
+            
+            for idx, line in enumerate(lines):
+                # Check stop flag every 50 lines
+                if idx > 0 and idx % 50 == 0:
+                    if is_stop_requested():
+                        print(f"📑 ❌ Merge stopped while processing existing glossary at line {idx}/{total_lines}")
+                        return new_csv_lines
+                    
+                    if total_lines > 200:
+                        progress = (idx / total_lines) * 100
+                        print(f"📑 Processing existing glossary: {progress:.1f}%")
+                
+                if 'type,raw_name' in line.lower():
+                    continue  # Skip header
+                
+                line_stripped = line.strip()
+                # Skip token-efficient lines and section/bullet markers
+                if not line_stripped or line_stripped.startswith('===') or line_stripped.startswith('*') or line_stripped.lower().startswith('glossary:'):
+                    continue
+                
+                parts = [p.strip() for p in line.split(',')]
+                # Require at least 3 fields (type, raw_name, translated_name)
+                if len(parts) < 3:
+                    continue
+                
+                entry_type = parts[0].strip().lower()
+                # Only accept reasonable type tokens (letters/underscores only)
+                import re as _re
+                if not _re.match(r'^[a-z_]+$', entry_type):
+                    continue
+                
+                raw_name = parts[1]
+                if strip_honorifics:
+                    raw_name = self._strip_honorific(raw_name, language)
+                    parts[1] = raw_name
+                if raw_name not in existing_names:
+                    existing_lines.append(','.join(parts))
+                    existing_names.add(raw_name)
+        
+        # Check stop flag before processing new names
+        if is_stop_requested():
+            print(f"📑 ❌ Merge stopped before processing new entries")
+            return new_csv_lines
+        
+        # Get new names
+        new_names = set()
+        final_lines = []
+        
+        for idx, line in enumerate(new_csv_lines):
+            # Check stop flag every 50 lines
+            if idx > 0 and idx % 50 == 0:
+                if is_stop_requested():
+                    print(f"📑 ❌ Merge stopped while processing new entries at line {idx}")
+                    return final_lines if final_lines else new_csv_lines
+            
+            if 'type,raw_name' in line.lower():
+                final_lines.append(line)  # Keep header
+                continue
+            parts = [p.strip() for p in line.split(',')]
+            if len(parts) >= 2:
+                new_names.add(parts[1])
+                final_lines.append(line)
+        
+        # Check stop flag before adding existing entries
+        if is_stop_requested():
+            print(f"📑 ❌ Merge stopped before combining entries")
+            return final_lines
+        
+        # Add non-duplicate existing entries
+        added_count = 0
+        for idx, line in enumerate(existing_lines):
+            # Check stop flag every 50 additions
+            if idx > 0 and idx % 50 == 0:
+                if is_stop_requested():
+                    print(f"📑 ❌ Merge stopped while adding existing entries ({added_count} added)")
+                    return final_lines
+            
+            parts = [p.strip() for p in line.split(',')]
+            if len(parts) >= 2 and parts[1] not in new_names:
+                final_lines.append(line)
+                added_count += 1
+        
+        print(f"📑 Merged {added_count} entries from existing glossary")
+        return final_lines
+    
+    def _extract_with_patterns(self, all_text, language, min_frequency, 
+                              max_names, max_titles, batch_size, 
+                              existing_glossary, output_dir, 
+                              strip_honorifics=True, fuzzy_threshold=0.90, filter_mode='all'):
+        """Extract glossary using pattern matching with true CSV format output and stop flag checks"""
+        print("📑 Using pattern-based extraction")
+        
+        # Check stop flag at start
+        if is_stop_requested():
+            print("📑 ❌ Pattern-based extraction stopped by user")
+            return {}
+        
+        def is_valid_name(name, language_hint='unknown'):
+            """Strict validation for proper names only"""
+            if not name or len(name.strip()) < 1:
+                return False
+                
+            name = name.strip()
+            
+            if name.lower() in self.pattern_manager.COMMON_WORDS or name in self.pattern_manager.COMMON_WORDS:
+                return False
+            
+            if language_hint == 'korean':
+                if not (2 <= len(name) <= 4):
+                    return False
+                if not all(0xAC00 <= ord(char) <= 0xD7AF for char in name):
+                    return False
+                if len(set(name)) == 1:
+                    return False
+                    
+            elif language_hint == 'japanese':
+                if not (2 <= len(name) <= 6):
+                    return False
+                has_kanji = any(0x4E00 <= ord(char) <= 0x9FFF for char in name)
+                has_kana = any((0x3040 <= ord(char) <= 0x309F) or (0x30A0 <= ord(char) <= 0x30FF) for char in name)
+                if not (has_kanji or has_kana):
+                    return False
+                    
+            elif language_hint == 'chinese':
+                if not (2 <= len(name) <= 4):
+                    return False
+                if not all(0x4E00 <= ord(char) <= 0x9FFF for char in name):
+                    return False
+                    
+            elif language_hint == 'english':
+                if not name[0].isupper():
+                    return False
+                if sum(1 for c in name if c.isalpha()) < len(name) * 0.8:
+                    return False
+                if not (2 <= len(name) <= 20):
+                    return False
+            
+            return True
+        
+        def detect_language_hint(text_sample):
+            """Quick language detection for validation purposes"""
+            sample = text_sample[:1000]
+            
+            korean_chars = sum(1 for char in sample if 0xAC00 <= ord(char) <= 0xD7AF)
+            japanese_kana = sum(1 for char in sample if (0x3040 <= ord(char) <= 0x309F) or (0x30A0 <= ord(char) <= 0x30FF))
+            chinese_chars = sum(1 for char in sample if 0x4E00 <= ord(char) <= 0x9FFF)
+            latin_chars = sum(1 for char in sample if 0x0041 <= ord(char) <= 0x007A)
+            
+            if korean_chars > 50:
+                return 'korean'
+            elif japanese_kana > 20:
+                return 'japanese'
+            elif chinese_chars > 50 and japanese_kana < 10:
+                return 'chinese'
+            elif latin_chars > 100:
+                return 'english'
+            else:
+                return 'unknown'
+        
+        language_hint = detect_language_hint(all_text)
+        print(f"📑 Detected primary language: {language_hint}")
+        
+        # Check stop flag after language detection
+        if is_stop_requested():
+            print("📑 ❌ Extraction stopped after language detection")
+            return {}
+        
+        honorifics_to_use = []
+        if language_hint in self.pattern_manager.CJK_HONORIFICS:
+            honorifics_to_use.extend(self.pattern_manager.CJK_HONORIFICS[language_hint])
+        honorifics_to_use.extend(self.pattern_manager.CJK_HONORIFICS.get('english', []))
+        
+        print(f"📑 Using {len(honorifics_to_use)} honorifics for {language_hint}")
+        
+        names_with_honorifics = {}
+        standalone_names = {}
+        
+        # Check if parallel processing is enabled
+        extraction_workers = int(os.getenv("EXTRACTION_WORKERS", "1"))
+        
+        # PARALLEL HONORIFIC PROCESSING
+        if extraction_workers > 1 and len(honorifics_to_use) > 3:
+            print(f"📑 Scanning for names with honorifics (parallel with {extraction_workers} workers)...")
+            
+            # Create a wrapper function that can be called in parallel
+            def process_honorific(args):
+                """Process a single honorific in a worker thread"""
+                honorific, idx, total = args
+                
+                # Check stop flag
+                if is_stop_requested():
+                    return None, None
+                
+                print(f"📑 Worker processing honorific {idx}/{total}: '{honorific}'")
+                
+                # Local dictionaries for this worker
+                local_names_with = {}
+                local_standalone = {}
+                
+                # Call the extraction method
+                self._extract_names_for_honorific(
+                    honorific, all_text, language_hint,
+                    min_frequency, local_names_with,
+                    local_standalone, is_valid_name, fuzzy_threshold
+                )
+                
+                return local_names_with, local_standalone
+            
+            # Prepare arguments for parallel processing
+            honorific_args = [
+                (honorific, idx + 1, len(honorifics_to_use))
+                for idx, honorific in enumerate(honorifics_to_use)
+            ]
+            
+            # Process honorifics in parallel
+            with ThreadPoolExecutor(max_workers=min(extraction_workers, len(honorifics_to_use))) as executor:
+                futures = []
+                
+                for args in honorific_args:
+                    if is_stop_requested():
+                        executor.shutdown(wait=False)
+                        return {}
+                    
+                    future = executor.submit(process_honorific, args)
+                    futures.append(future)
+                
+                # Collect results as they complete
+                completed = 0
+                for future in as_completed(futures):
+                    if is_stop_requested():
+                        executor.shutdown(wait=False)
+                        return {}
+                    
+                    try:
+                        result = future.result()
+                        if result and result[0] is not None:
+                            local_names_with, local_standalone = result
+                            
+                            # Merge results (thread-safe since we're in main thread)
+                            for name, count in local_names_with.items():
+                                if name not in names_with_honorifics:
+                                    names_with_honorifics[name] = count
+                                else:
+                                    names_with_honorifics[name] = max(names_with_honorifics[name], count)
+                            
+                            for name, count in local_standalone.items():
+                                if name not in standalone_names:
+                                    standalone_names[name] = count
+                                else:
+                                    standalone_names[name] = max(standalone_names[name], count)
+                        
+                        completed += 1
+                        if completed % 5 == 0 or completed == len(honorifics_to_use):
+                            print(f"📑 Honorific processing: {completed}/{len(honorifics_to_use)} completed")
+                            
+                    except Exception as e:
+                        print(f"⚠️ Failed to process honorific: {e}")
+                        completed += 1
+            
+            print(f"📑 Parallel honorific processing completed: found {len(names_with_honorifics)} names")
+            
+        else:
+            # SEQUENTIAL PROCESSING (fallback)
+            print("📑 Scanning for names with honorifics...")
+            
+            # Extract names with honorifics
+            total_honorifics = len(honorifics_to_use)
+            for idx, honorific in enumerate(honorifics_to_use):
+                # Check stop flag before each honorific
+                if is_stop_requested():
+                    print(f"📑 ❌ Extraction stopped at honorific {idx}/{total_honorifics}")
+                    return {}
+                
+                print(f"📑 Processing honorific {idx + 1}/{total_honorifics}: '{honorific}'")
+                
+                self._extract_names_for_honorific(honorific, all_text, language_hint, 
+                                                min_frequency, names_with_honorifics, 
+                                                standalone_names, is_valid_name, fuzzy_threshold)
+        
+        # Check stop flag before processing terms
+        if is_stop_requested():
+            print("📑 ❌ Extraction stopped before processing terms")
+            return {}
+        
+        # Apply filter mode
+        filtered_names = {}
+        if filter_mode == 'only_with_honorifics':
+            # Only keep names that have honorifics (no standalone names)
+            filtered_names = names_with_honorifics.copy()
+            print(f"📑 Filter: Keeping only names with honorifics ({len(filtered_names)} names)")
+        elif filter_mode == 'only_without_honorifics':
+            # Keep standalone names that were NOT found with honorifics
+            for name, count in standalone_names.items():
+                # Check if this name also appears with honorifics
+                appears_with_honorific = False
+                for honorific_name in names_with_honorifics.keys():
+                    if self._strip_honorific(honorific_name, language_hint) == name:
+                        appears_with_honorific = True
+                        break
+                
+                # Only add if it doesn't appear with honorifics
+                if not appears_with_honorific:
+                    filtered_names[name] = count
+            
+            print(f"📑 Filter: Keeping only names without honorifics ({len(filtered_names)} names)")
+        else:  # 'all' mode
+            # Keep all names (both with and without honorifics)
+            filtered_names = names_with_honorifics.copy()
+            # Also add standalone names
+            for name, count in standalone_names.items():
+                if name not in filtered_names and not any(
+                    self._strip_honorific(n, language_hint) == name for n in filtered_names.keys()
+                ):
+                    filtered_names[name] = count
+            print(f"📑 Filter: Keeping all names ({len(filtered_names)} names)")
+        
+        # Process extracted terms
+        final_terms = {}
+        
+        term_count = 0
+        total_terms = len(filtered_names)
+        for term, count in filtered_names.items():
+            term_count += 1
+            
+            # Check stop flag every 20 terms
+            if term_count % 20 == 0:
+                if is_stop_requested():
+                    print(f"📑 ❌ Term processing stopped at {term_count}/{total_terms}")
+                    return {}
+            
+            if strip_honorifics:
+                clean_term = self._strip_honorific(term, language_hint)
+                if clean_term in final_terms:
+                    final_terms[clean_term] = final_terms[clean_term] + count
+                else:
+                    final_terms[clean_term] = count
+            else:
+                final_terms[term] = count
+        
+        # Check stop flag before finding titles
+        if is_stop_requested():
+            print("📑 ❌ Extraction stopped before finding titles")
+            return {}
+        
+        # Find titles (but respect filter mode)
+        print("📑 Scanning for titles...")
+        found_titles = {}
+        
+        # Extract titles for all modes EXCEPT "only_with_honorifics"
+        # (titles are included in "only_without_honorifics" since titles typically don't have honorifics)
+        if filter_mode != 'only_with_honorifics':
+            title_patterns_to_use = []
+            if language_hint in self.pattern_manager.TITLE_PATTERNS:
+                title_patterns_to_use.extend(self.pattern_manager.TITLE_PATTERNS[language_hint])
+            title_patterns_to_use.extend(self.pattern_manager.TITLE_PATTERNS.get('english', []))
+            
+            total_patterns = len(title_patterns_to_use)
+            for pattern_idx, pattern in enumerate(title_patterns_to_use):
+                # Check stop flag before each pattern
+                if is_stop_requested():
+                    print(f"📑 ❌ Title extraction stopped at pattern {pattern_idx}/{total_patterns}")
+                    return {}
+                
+                print(f"📑 Processing title pattern {pattern_idx + 1}/{total_patterns}")
+                
+                matches = list(re.finditer(pattern, all_text, re.IGNORECASE if 'english' in pattern else 0))
+                
+                for match_idx, match in enumerate(matches):
+                    # Check stop flag every 50 matches
+                    if match_idx > 0 and match_idx % 50 == 0:
+                        if is_stop_requested():
+                            print(f"📑 ❌ Title extraction stopped at match {match_idx}")
+                            return {}
+                    
+                    title = match.group(0)
+                    
+                    # Skip if this title is already in names
+                    if title in filtered_names or title in names_with_honorifics:
+                        continue
+                        
+                    count = self._find_fuzzy_matches(title, all_text, fuzzy_threshold)
+                    
+                    # Check if stopped during fuzzy matching
+                    if is_stop_requested():
+                        print(f"📑 ❌ Title extraction stopped during fuzzy matching")
+                        return {}
+                    
+                    if count >= min_frequency:
+                        if re.match(r'[A-Za-z]', title):
+                            title = title.title()
+                        
+                        if strip_honorifics:
+                            title = self._strip_honorific(title, language_hint)
+                        
+                        if title not in found_titles:
+                            found_titles[title] = count
+            
+            if filter_mode == 'only_without_honorifics':
+                print(f"📑 Found {len(found_titles)} titles (included in 'without honorifics' mode)")
+            else:
+                print(f"📑 Found {len(found_titles)} unique titles")
+        else:
+            print(f"📑 Skipping title extraction (filter mode: only_with_honorifics)")
+        
+        # Check stop flag before sorting and translation
+        if is_stop_requested():
+            print("📑 ❌ Extraction stopped before sorting terms")
+            return {}
+        
+        # Combine and sort
+        sorted_names = sorted(final_terms.items(), key=lambda x: x[1], reverse=True)[:max_names]
+        sorted_titles = sorted(found_titles.items(), key=lambda x: x[1], reverse=True)[:max_titles]
+        
+        all_terms = []
+        for name, count in sorted_names:
+            all_terms.append(name)
+        for title, count in sorted_titles:
+            all_terms.append(title)
+        
+        print(f"📑 Total terms to translate: {len(all_terms)}")
+        
+        # Check stop flag before translation
+        if is_stop_requested():
+            print("📑 ❌ Extraction stopped before translation")
+            return {}
+        
+        # Translate terms
+        if os.getenv("DISABLE_GLOSSARY_TRANSLATION", "0") == "1":
+            print("📑 Translation disabled - keeping original terms")
+            translations = {term: term for term in all_terms}
+        else:
+            print(f"📑 Translating {len(all_terms)} terms...")
+            translations = self._translate_terms_batch(all_terms, language_hint, batch_size, output_dir)
+        
+        # Check if translation was stopped
+        if is_stop_requested():
+            print("📑 ❌ Extraction stopped after translation")
+            return translations  # Return partial results
+        
+        # Build CSV lines
+        csv_lines = ["type,raw_name,translated_name"]
+        
+        for name, _ in sorted_names:
+            if name in translations:
+                csv_lines.append(f"character,{name},{translations[name]}")
+        
+        for title, _ in sorted_titles:
+            if title in translations:
+                csv_lines.append(f"term,{title},{translations[title]}")
+        
+        # Check stop flag before merging
+        if is_stop_requested():
+            print("📑 ❌ Extraction stopped before merging with existing glossary")
+            # Still save what we have
+            csv_content = '\n'.join(csv_lines)
+            glossary_path = os.path.join(output_dir, "glossary.json")
+            self._atomic_write_file(glossary_path, csv_content)
+            return self._parse_csv_to_dict(csv_content)
+        
+        # Merge with existing glossary
+        if existing_glossary:
+            csv_lines = self._merge_csv_entries(csv_lines, existing_glossary, strip_honorifics, language_hint)
+        
+        # Check stop flag before deduplication
+        if is_stop_requested():
+            print("📑 ❌ Extraction stopped before deduplication")
+            csv_content = '\n'.join(csv_lines)
+            glossary_path = os.path.join(output_dir, "glossary.json")
+            self._atomic_write_file(glossary_path, csv_content)
+            return self._parse_csv_to_dict(csv_content)
+        
+        # Fuzzy matching deduplication
+        csv_lines = self._deduplicate_glossary_with_fuzzy(csv_lines, fuzzy_threshold)
+        
+        # Create CSV content
+        csv_content = '\n'.join(csv_lines)
+        # Save glossary as CSV
+        glossary_path = os.path.join(output_dir, "glossary.csv")
+        self._atomic_write_file(glossary_path, csv_content)
+        
+        print(f"\n📑 ✅ TARGETED GLOSSARY SAVED!")
+        print(f"📑 File: {glossary_path}")
+        print(f"📑 Total entries: {len(csv_lines) - 1}")  # Exclude header
+        
+        return self._parse_csv_to_dict(csv_content)
+    
+    def _translate_terms_batch(self, term_list, profile_name, batch_size=50, output_dir=None):
+        """Use fully configurable prompts for translation with interrupt support"""
+        if not term_list or os.getenv("DISABLE_GLOSSARY_TRANSLATION", "0") == "1":
+            print(f"📑 Glossary translation disabled or no terms to translate")
+            return {term: term for term in term_list}
+        
+        # Check stop flag
+        if is_stop_requested():
+            print("📑 ❌ Glossary translation stopped by user")
+            return {term: term for term in term_list}
+        
+        try:
+            MODEL = os.getenv("MODEL", "gemini-1.5-flash")
+            API_KEY = (os.getenv("API_KEY") or 
+                       os.getenv("OPENAI_API_KEY") or 
+                       os.getenv("OPENAI_OR_Gemini_API_KEY") or
+                       os.getenv("GEMINI_API_KEY"))
+
+            if is_traditional_translation_api(MODEL):
+                return
+            
+            if not API_KEY:
+                print(f"📑 No API key found, skipping translation")
+                return {term: term for term in term_list}
+            
+            print(f"📑 Translating {len(term_list)} {profile_name} terms to English using batch size {batch_size}...")
+            
+            from unified_api_client import UnifiedClient, UnifiedClientError
+            client = UnifiedClient(model=MODEL, api_key=API_KEY, output_dir=output_dir)
+            if hasattr(client, 'reset_cleanup_state'):
+                client.reset_cleanup_state()
+            
+            # Get custom translation prompt from environment
+            translation_prompt_template = os.getenv("GLOSSARY_TRANSLATION_PROMPT", "")
+            
+            if not translation_prompt_template:
+                translation_prompt_template = """You are translating {language} character names and important terms to English.
+    For character names, provide English transliterations or keep as romanized.
+    Keep honorifics/suffixes only if they are integral to the name.
+    Respond with the same numbered format.
+
+    Terms to translate:
+    {terms_list}
+
+    Provide translations in the same numbered format."""
+            
+            all_translations = {}
+            chunk_timeout = int(os.getenv("CHUNK_TIMEOUT", "300"))  # 5 minute default
+            
+            for i in range(0, len(term_list), batch_size):
+                # Check stop flag before each batch
+                if is_stop_requested():
+                    print(f"📑 ❌ Translation stopped at batch {(i // batch_size) + 1}")
+                    # Return partial translations
+                    for term in term_list:
+                        if term not in all_translations:
+                            all_translations[term] = term
+                    return all_translations
+                
+                batch = term_list[i:i + batch_size]
+                batch_num = (i // batch_size) + 1
+                total_batches = (len(term_list) + batch_size - 1) // batch_size
+                
+                print(f"📑 Processing batch {batch_num}/{total_batches} ({len(batch)} terms)...")
+                
+                # Format terms list
+                terms_text = ""
+                for idx, term in enumerate(batch, 1):
+                    terms_text += f"{idx}. {term}\n"
+                
+                # Replace placeholders in prompt
+                prompt = translation_prompt_template.replace('{language}', profile_name)
+                prompt = prompt.replace('{terms_list}', terms_text.strip())
+                prompt = prompt.replace('{batch_size}', str(len(batch)))
+                
+                messages = [
+                    {"role": "user", "content": prompt}
+                ]
+                
+                try:
+                    temperature = float(os.getenv("TEMPERATURE", "0.3"))
+                    max_tokens = int(os.getenv("MAX_OUTPUT_TOKENS", "4096"))
+                    
+                    # Use send_with_interrupt for interruptible API call
+                    print(f"📑 Sending translation request for batch {batch_num} (interruptible)...")
+                    
+                    response = send_with_interrupt(
+                        messages=messages,
+                        client=client,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        stop_check_fn=is_stop_requested,
+                        chunk_timeout=chunk_timeout
+                    )
+                    
+                    # Handle response properly
+                    if hasattr(response, 'content'):
+                        response_text = response.content
+                    else:
+                        response_text = str(response)
+                    
+                    batch_translations = self._parse_translation_response(response_text, batch)
+                    all_translations.update(batch_translations)
+                    
+                    print(f"📑 Batch {batch_num} completed: {len(batch_translations)} translations")
+                    
+                    # Small delay between batches to avoid rate limiting (configurable)
+                    if i + batch_size < len(term_list):
+                        # Check stop before sleep
+                        if is_stop_requested():
+                            print(f"📑 ❌ Translation stopped after batch {batch_num}")
+                            # Fill in missing translations
+                            for term in term_list:
+                                if term not in all_translations:
+                                    all_translations[term] = term
+                            return all_translations
+                        # Use configurable batch delay or default to 0.1s (much faster than 0.5s)
+                        batch_delay = float(os.getenv("GLOSSARY_BATCH_DELAY", "0.001"))
+                        if batch_delay > 0:
+                            time.sleep(batch_delay)
+                        
+                except UnifiedClientError as e:
+                    if "stopped by user" in str(e).lower():
+                        print(f"📑 ❌ Translation interrupted by user at batch {batch_num}")
+                        # Fill in remaining terms with originals
+                        for term in term_list:
+                            if term not in all_translations:
+                                all_translations[term] = term
+                        return all_translations
+                    else:
+                        print(f"⚠️ Translation failed for batch {batch_num}: {e}")
+                        for term in batch:
+                            all_translations[term] = term
+                except Exception as e:
+                    print(f"⚠️ Translation failed for batch {batch_num}: {e}")
+                    for term in batch:
+                        all_translations[term] = term
+            
+            # Ensure all terms have translations
+            for term in term_list:
+                if term not in all_translations:
+                    all_translations[term] = term
+            
+            translated_count = sum(1 for term, translation in all_translations.items() 
+                                 if translation != term and translation.strip())
+            
+            print(f"📑 Successfully translated {translated_count}/{len(term_list)} terms")
+            return all_translations
+            
+        except Exception as e:
+            print(f"⚠️ Glossary translation failed: {e}")
+            return {term: term for term in term_list}
+
+    
+    def _extract_names_for_honorific(self, honorific, all_text, language_hint, 
+                                    min_frequency, names_with_honorifics, 
+                                    standalone_names, is_valid_name, fuzzy_threshold=0.90):
+        """Extract names for a specific honorific with fuzzy matching and stop flag checks"""
+        
+        # Check stop flag at start
+        if is_stop_requested():
+            print(f"📑 ❌ Name extraction for '{honorific}' stopped by user")
+            return
+        
+        if language_hint == 'korean' and not honorific.startswith('-'):
+            pattern = r'([\uac00-\ud7af]{2,4})(?=' + re.escape(honorific) + r'(?:\s|[,.\!?]|$))'
+            
+            matches = list(re.finditer(pattern, all_text))
+            total_matches = len(matches)
+            
+            for idx, match in enumerate(matches):
+                # Check stop flag every 50 matches
+                if idx > 0 and idx % 50 == 0:
+                    if is_stop_requested():
+                        print(f"📑 ❌ Korean name extraction stopped at {idx}/{total_matches}")
+                        return
+                    
+                    # Show progress for large sets
+                    if total_matches > 500:
+                        progress = (idx / total_matches) * 100
+                        print(f"📑 Processing Korean names: {progress:.1f}% ({idx}/{total_matches})")
+                
+                potential_name = match.group(1)
+                
+                if is_valid_name(potential_name, 'korean'):
+                    full_form = potential_name + honorific
+                    
+                    # Use fuzzy matching for counting with stop check
+                    count = self._find_fuzzy_matches(full_form, all_text, fuzzy_threshold)
+                    
+                    # Check if stopped during fuzzy matching
+                    if is_stop_requested():
+                        print(f"📑 ❌ Name extraction stopped during fuzzy matching")
+                        return
+                    
+                    if count >= min_frequency:
+                        context_patterns = [
+                            full_form + r'[은는이가]',
+                            full_form + r'[을를]',
+                            full_form + r'[에게한테]',
+                            r'["]' + full_form,
+                            full_form + r'[,]',
+                        ]
+                        
+                        context_count = 0
+                        for ctx_pattern in context_patterns:
+                            context_count += len(re.findall(ctx_pattern, all_text))
+                        
+                        if context_count > 0:
+                            names_with_honorifics[full_form] = count
+                            standalone_names[potential_name] = count
+                            
+        elif language_hint == 'japanese' and not honorific.startswith('-'):
+            pattern = r'([\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]{2,5})(?=' + re.escape(honorific) + r'(?:\s|[、。！？]|$))'
+            
+            matches = list(re.finditer(pattern, all_text))
+            total_matches = len(matches)
+            
+            for idx, match in enumerate(matches):
+                # Check stop flag every 50 matches
+                if idx > 0 and idx % 50 == 0:
+                    if is_stop_requested():
+                        print(f"📑 ❌ Japanese name extraction stopped at {idx}/{total_matches}")
+                        return
+                    
+                    if total_matches > 500:
+                        progress = (idx / total_matches) * 100
+                        print(f"📑 Processing Japanese names: {progress:.1f}% ({idx}/{total_matches})")
+                
+                potential_name = match.group(1)
+                
+                if is_valid_name(potential_name, 'japanese'):
+                    full_form = potential_name + honorific
+                    count = self._find_fuzzy_matches(full_form, all_text, fuzzy_threshold)
+                    
+                    if is_stop_requested():
+                        print(f"📑 ❌ Name extraction stopped during fuzzy matching")
+                        return
+                    
+                    if count >= min_frequency:
+                        names_with_honorifics[full_form] = count
+                        standalone_names[potential_name] = count
+                            
+        elif language_hint == 'chinese' and not honorific.startswith('-'):
+            pattern = r'([\u4e00-\u9fff]{2,4})(?=' + re.escape(honorific) + r'(?:\s|[，。！？]|$))'
+            
+            matches = list(re.finditer(pattern, all_text))
+            total_matches = len(matches)
+            
+            for idx, match in enumerate(matches):
+                # Check stop flag every 50 matches
+                if idx > 0 and idx % 50 == 0:
+                    if is_stop_requested():
+                        print(f"📑 ❌ Chinese name extraction stopped at {idx}/{total_matches}")
+                        return
+                    
+                    if total_matches > 500:
+                        progress = (idx / total_matches) * 100
+                        print(f"📑 Processing Chinese names: {progress:.1f}% ({idx}/{total_matches})")
+                
+                potential_name = match.group(1)
+                
+                if is_valid_name(potential_name, 'chinese'):
+                    full_form = potential_name + honorific
+                    count = self._find_fuzzy_matches(full_form, all_text, fuzzy_threshold)
+                    
+                    if is_stop_requested():
+                        print(f"📑 ❌ Name extraction stopped during fuzzy matching")
+                        return
+                    
+                    if count >= min_frequency:
+                        names_with_honorifics[full_form] = count
+                        standalone_names[potential_name] = count
+                            
+        elif honorific.startswith('-') or honorific.startswith(' '):
+            is_space_separated = honorific.startswith(' ')
+            
+            if is_space_separated:
+                pattern_english = r'\b([A-Z][a-zA-Z]+)' + re.escape(honorific) + r'(?=\s|[,.\!?]|$)'
+            else:
+                pattern_english = r'\b([A-Z][a-zA-Z]+)' + re.escape(honorific) + r'\b'
+            
+            matches = list(re.finditer(pattern_english, all_text))
+            total_matches = len(matches)
+            
+            for idx, match in enumerate(matches):
+                # Check stop flag every 50 matches
+                if idx > 0 and idx % 50 == 0:
+                    if is_stop_requested():
+                        print(f"📑 ❌ English name extraction stopped at {idx}/{total_matches}")
+                        return
+                    
+                    if total_matches > 500:
+                        progress = (idx / total_matches) * 100
+                        print(f"📑 Processing English names: {progress:.1f}% ({idx}/{total_matches})")
+                
+                potential_name = match.group(1)
+                
+                if is_valid_name(potential_name, 'english'):
+                    full_form = potential_name + honorific
+                    count = self._find_fuzzy_matches(full_form, all_text, fuzzy_threshold)
+                    
+                    if is_stop_requested():
+                        print(f"📑 ❌ Name extraction stopped during fuzzy matching")
+                        return
+                    
+                    if count >= min_frequency:
+                        names_with_honorifics[full_form] = count
+                        standalone_names[potential_name] = count
+    
+    def _parse_translation_response(self, response, original_terms):
+        """Parse translation response - handles numbered format"""
+        translations = {}
+        
+        # Handle UnifiedResponse object
+        if hasattr(response, 'content'):
+            response_text = response.content
+        else:
+            response_text = str(response)
+        
+        lines = response_text.strip().split('\n')
+        
+        for line in lines:
+            line = line.strip()
+            if not line or not line[0].isdigit():
+                continue
+                
+            try:
+                number_match = re.match(r'^(\d+)\.?\s*(.+)', line)
+                if number_match:
+                    num = int(number_match.group(1)) - 1
+                    content = number_match.group(2).strip()
+                    
+                    if 0 <= num < len(original_terms):
+                        original_term = original_terms[num]
+                        
+                        for separator in ['->', '→', ':', '-', '—', '=']:
+                            if separator in content:
+                                parts = content.split(separator, 1)
+                                if len(parts) == 2:
+                                    translation = parts[1].strip()
+                                    translation = translation.strip('"\'()[]')
+                                    if translation and translation != original_term:
+                                        translations[original_term] = translation
+                                        break
+                        else:
+                            if content != original_term:
+                                translations[original_term] = content
+                                
+            except (ValueError, IndexError):
+                continue
+        
+        return translations
+
+# =====================================================
+# UNIFIED UTILITIES
+# =====================================================
+def sanitize_resource_filename(filename):
+    """Sanitize resource filenames for filesystem compatibility"""
+    filename = unicodedata.normalize('NFC', filename)
+    
+    replacements = {
+        '/': '_', '\\': '_', ':': '_', '*': '_',
+        '?': '_', '"': '_', '<': '_', '>': '_',
+        '|': '_', '\0': '', '\n': '_', '\r': '_'
+    }
+    
+    for old, new in replacements.items():
+        filename = filename.replace(old, new)
+    
+    filename = ''.join(char for char in filename if ord(char) >= 32)
+    
+    name, ext = os.path.splitext(filename)
+    
+    if not name:
+        name = 'resource'
+    
+    return name + ext
+
+def should_retain_source_extension():
+    """Read GUI toggle for retaining original extension and no 'response_' prefix.
+    This is stored in config or env by the GUI; we read env as bridge.
+    """
+    return os.getenv('RETAIN_SOURCE_EXTENSION', os.getenv('retain_source_extension', '0')) in ('1', 'true', 'True')
+
+def make_safe_filename(title, actual_num):
+    """Create a safe filename that works across different filesystems"""
+    if not title:
+        return f"chapter_{actual_num:03d}"
+    
+    title = unicodedata.normalize('NFC', str(title))
+    
+    dangerous_chars = {
+        '/': '_', '\\': '_', ':': '_', '*': '_', '?': '_',
+        '"': '_', '<': '_', '>': '_', '|': '_', '\0': '',
+        '\n': ' ', '\r': ' ', '\t': ' '
+    }
+    
+    for old, new in dangerous_chars.items():
+        title = title.replace(old, new)
+    
+    title = ''.join(char for char in title if ord(char) >= 32)
+    title = re.sub(r'\s+', '_', title)
+    title = title.strip('_.• \t')
+    
+    if not title or title == '_' * len(title):
+        title = f"chapter_{actual_num:03d}"
+    
+    return title
+
+def get_content_hash(html_content):
+    """Create a stable hash of content"""
+    return ContentProcessor.get_content_hash(html_content)
+
+def clean_ai_artifacts(text, remove_artifacts=True):
+    """Remove AI response artifacts from text"""
+    return ContentProcessor.clean_ai_artifacts(text, remove_artifacts)
+
+def find_glossary_file(output_dir):
+    """Return path to glossary file preferring CSV over JSON, or None if not found"""
+    candidates = [
+        os.path.join(output_dir, "glossary.csv"),
+        os.path.join(output_dir, "glossary.json"),
+    ]
+    for p in candidates:
+        if os.path.exists(p):
+            return p
+    return None
+
+def clean_memory_artifacts(text):
+    """Remove any memory/summary artifacts"""
+    return ContentProcessor.clean_memory_artifacts(text)
+
+def emergency_restore_paragraphs(text, original_html=None, verbose=True):
+    """Emergency restoration when AI returns wall of text"""
+    return ContentProcessor.emergency_restore_paragraphs(text, original_html, verbose)
+
+def is_meaningful_text_content(html_content):
+    """Check if chapter has meaningful text beyond just structure"""
+    return ContentProcessor.is_meaningful_text_content(html_content)
+
+# =====================================================
+# GLOBAL SETTINGS AND FLAGS
+# =====================================================
+logging.basicConfig(level=logging.DEBUG)
+
+try:
+    if hasattr(sys.stdout, 'reconfigure'):
+        sys.stdout.reconfigure(encoding='utf-8', errors='ignore')
+except AttributeError:
+    if sys.stdout is None:
+        devnull = open(os.devnull, "wb")
+        sys.stdout = io.TextIOWrapper(devnull, encoding='utf-8', errors='ignore')
+    elif hasattr(sys.stdout, 'buffer'):
+        try:
+            sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='ignore')
+        except:
+            pass
+
+_stop_requested = False
+
+def set_stop_flag(value):
+    """Set the global stop flag"""
+    global _stop_requested
+    _stop_requested = value
+
+def is_stop_requested():
+    """Check if stop was requested"""
+    global _stop_requested
+    return _stop_requested
+
+def set_output_redirect(log_callback=None):
+    """Redirect print statements to a callback function for GUI integration"""
+    if log_callback:
+        class CallbackWriter:
+            def __init__(self, callback):
+                self.callback = callback
+                
+            def write(self, text):
+                if text.strip():
+                    self.callback(text.strip())
+                    
+            def flush(self):
+                pass
+                
+        sys.stdout = CallbackWriter(log_callback)
+
+# =====================================================
+# EPUB AND FILE PROCESSING
+# =====================================================
+def extract_chapter_number_from_filename(filename, opf_spine_position=None, opf_spine_data=None):
+    """Extract chapter number from filename, prioritizing OPF spine order"""
+    
+    # Priority 1: Use OPF spine position if available
+    if opf_spine_position is not None:
+        # Handle special non-chapter files (always chapter 0)
+        filename_lower = filename.lower()
+        name_without_ext = os.path.splitext(filename)[0].lower()
+        
+        # Check for special keywords OR no numbers present
+        special_keywords = ['title', 'toc', 'cover', 'index', 'copyright', 'preface', 'nav']
+        has_special_keyword = any(name in filename_lower for name in special_keywords)
+        has_no_numbers = not re.search(r'\d', name_without_ext)
+        
+        if has_special_keyword or has_no_numbers:
+            return 0, 'opf_special_file'
+        
+        # Use spine position for regular chapters (0, 1, 2, 3...)
+        return opf_spine_position, 'opf_spine_order'
+    
+    # Priority 2: Check if this looks like a special file (even without OPF)
+    name_without_ext = os.path.splitext(filename)[0].lower()
+    special_keywords = ['title', 'toc', 'cover', 'index', 'copyright', 'preface']
+    has_special_keyword = any(name in name_without_ext for name in special_keywords)
+    has_no_numbers = not re.search(r'\d', name_without_ext)
+    
+    if has_special_keyword or has_no_numbers:
+        return 0, 'special_file'
+    
+    # Priority 3: Try to extract sequential numbers (000, 001, 002...)
+    name_without_ext = os.path.splitext(filename)[0]
+    
+    # Look for simple sequential patterns first
+    # Priority 3: Try to extract sequential numbers and decimals
+    sequential_patterns = [
+        (r'^(\d+)\.(\d+)$', 'decimal_number'),      # 1.5, 2.3 (NEW!)
+        (r'^(\d{3,4})$', 'sequential_number'),      # 000, 001, 0001
+        (r'^(\d+)$', 'direct_number'),              # 0, 1, 2
+    ]
+
+    for pattern, method in sequential_patterns:
+        match = re.search(pattern, name_without_ext)
+        if match:
+            if method == 'decimal_number':
+                # Return as float for decimal chapters
+                return float(f"{match.group(1)}.{match.group(2)}"), method
+            else:
+                return int(match.group(1)), method
+    
+    # Priority 4: Fall back to existing filename parsing patterns
+    fallback_patterns = [
+        (r'^response_(\d+)[_\.]', 'response_prefix'),
+        (r'[Cc]hapter[_\s]*(\d+)', 'chapter_word'),
+        (r'[Cc]h[_\s]*(\d+)', 'ch_abbreviation'),
+        (r'No(\d+)', 'no_prefix'),
+        (r'第(\d+)[章话回]', 'chinese_chapter'),
+        (r'-h-(\d+)', 'h_suffix'),              # For your -h-16 pattern
+        (r'_(\d+)', 'underscore_suffix'),
+        (r'-(\d+)', 'dash_suffix'),
+        (r'(\d+)', 'trailing_number'),
+    ]
+    
+    for pattern, method in fallback_patterns:
+        match = re.search(pattern, name_without_ext, re.IGNORECASE)
+        if match:
+            return int(match.group(1)), method
+    
+    return None, None
+
+def process_chapter_images(chapter_html: str, actual_num: int, image_translator: ImageTranslator, 
+                         check_stop_fn=None) -> Tuple[str, Dict[str, str]]:
+    """Process and translate images in a chapter"""
+    from bs4 import BeautifulSoup
+    images = image_translator.extract_images_from_chapter(chapter_html)
+
+    if not images:
+        return chapter_html, {}
+        
+    print(f"🖼️ Found {len(images)} images in chapter {actual_num}")
+    
+    soup = BeautifulSoup(chapter_html, 'html.parser')
+    
+    image_translations = {}
+    translated_count = 0
+    
+    max_images_per_chapter = int(os.getenv('MAX_IMAGES_PER_CHAPTER', '10'))
+    if len(images) > max_images_per_chapter:
+        print(f"   ⚠️ Chapter has {len(images)} images - processing first {max_images_per_chapter} only")
+        images = images[:max_images_per_chapter]
+    
+    for idx, img_info in enumerate(images, 1):
+        if check_stop_fn and check_stop_fn():
+            print("❌ Image translation stopped by user")
+            break
+            
+        img_src = img_info['src']
+        
+        if img_src.startswith('../'):
+            img_path = os.path.join(image_translator.output_dir, img_src[3:])
+        elif img_src.startswith('./'):
+            img_path = os.path.join(image_translator.output_dir, img_src[2:])
+        elif img_src.startswith('/'):
+            img_path = os.path.join(image_translator.output_dir, img_src[1:])
+        else:
+            possible_paths = [
+                os.path.join(image_translator.images_dir, os.path.basename(img_src)),
+                os.path.join(image_translator.output_dir, img_src),
+                os.path.join(image_translator.output_dir, 'images', os.path.basename(img_src)),
+                os.path.join(image_translator.output_dir, os.path.basename(img_src)),
+                os.path.join(image_translator.output_dir, os.path.dirname(img_src), os.path.basename(img_src))
+            ]
+            
+            img_path = None
+            for path in possible_paths:
+                if os.path.exists(path):
+                    img_path = path
+                    print(f"   ✅ Found image at: {path}")
+                    break
+            
+            if not img_path:
+                print(f"   ❌ Image not found in any location for: {img_src}")
+                print(f"   Tried: {possible_paths}")
+                continue
+        
+        img_path = os.path.normpath(img_path)
+        
+        if not os.path.exists(img_path):
+            print(f"   ⚠️ Image not found: {img_path}")
+            print(f"   📁 Images directory: {image_translator.images_dir}")
+            print(f"   📁 Output directory: {image_translator.output_dir}")
+            print(f"   📁 Working directory: {os.getcwd()}")
+            
+            if os.path.exists(image_translator.images_dir):
+                files = os.listdir(image_translator.images_dir)
+                print(f"   📁 Files in images dir: {files[:5]}...")
+            continue
+        
+        print(f"   🔍 Processing image {idx}/{len(images)}: {os.path.basename(img_path)}")
+        
+        context = ""
+        if img_info.get('alt'):
+            context += f", Alt text: {img_info['alt']}"
+            
+        if translated_count > 0:
+            delay = float(os.getenv('IMAGE_API_DELAY', '1.0'))
+            time.sleep(delay)
+            
+        translation_result = image_translator.translate_image(img_path, context, check_stop_fn)
+        
+        print(f"\n🔍 DEBUG: Image {idx}/{len(images)}")
+        print(f"   Translation result: {'Success' if translation_result and '[Image Translation Error:' not in translation_result else 'Failed'}")
+        if translation_result and "[Image Translation Error:" in translation_result:
+            print(f"   Error message: {translation_result}")
+        
+        if translation_result:
+            img_tag = None
+            for img in soup.find_all('img'):
+                if img.get('src') == img_src:
+                    img_tag = img
+                    break
+            
+            if img_tag:
+                hide_label = os.getenv("HIDE_IMAGE_TRANSLATION_LABEL", "0") == "1"
+                
+                print(f"   🔍 DEBUG: Integration Phase")
+                print(f"   🏷️ Hide label mode: {hide_label}")
+                print(f"   📍 Found img tag: {img_tag.get('src')}")
+                
+                # Store the translation result in the dictionary FIRST
+                image_translations[img_path] = translation_result
+                
+                # Parse the translation result to integrate into the chapter HTML
+                if '<div class="image-translation">' in translation_result:
+                    trans_soup = BeautifulSoup(translation_result, 'html.parser')
+                    
+                    # Try to get the full container first
+                    full_container = trans_soup.find('div', class_=['translated-text-only', 'image-with-translation'])
+                    
+                    if full_container:
+                        # Clone the container to avoid issues
+                        new_container = BeautifulSoup(str(full_container), 'html.parser').find('div')
+                        img_tag.replace_with(new_container)
+                        print(f"   ✅ Replaced image with full translation container")
+                    else:
+                        # Fallback: manually build the structure
+                        trans_div = trans_soup.find('div', class_='image-translation')
+                        if trans_div:
+                            container = soup.new_tag('div', **{'class': 'translated-text-only' if hide_label else 'image-with-translation'})
+                            img_tag.replace_with(container)
+                            
+                            if not hide_label:
+                                new_img = soup.new_tag('img', src=img_src)
+                                if img_info.get('alt'):
+                                    new_img['alt'] = img_info.get('alt')
+                                container.append(new_img)
+                            
+                            # Clone the translation div content
+                            new_trans_div = soup.new_tag('div', **{'class': 'image-translation'})
+                            # Copy all children from trans_div to new_trans_div
+                            for child in trans_div.children:
+                                if hasattr(child, 'name'):
+                                    new_trans_div.append(BeautifulSoup(str(child), 'html.parser'))
+                                else:
+                                    new_trans_div.append(str(child))
+                            
+                            container.append(new_trans_div)
+                            print(f"   ✅ Built container with translation div")
+                        else:
+                            print(f"   ⚠️ No translation div found in result")
+                            continue
+                else:
+                    # Plain text translation - build structure manually
+                    container = soup.new_tag('div', **{'class': 'translated-text-only' if hide_label else 'image-with-translation'})
+                    img_tag.replace_with(container)
+                    
+                    if not hide_label:
+                        new_img = soup.new_tag('img', src=img_src)
+                        if img_info.get('alt'):
+                            new_img['alt'] = img_info.get('alt')
+                        container.append(new_img)
+                    
+                    # Create translation div with content
+                    translation_div = soup.new_tag('div', **{'class': 'image-translation'})
+                    if not hide_label:
+                        label_p = soup.new_tag('p')
+                        label_em = soup.new_tag('em')
+                        #label_em.string = "[Image text translation:]"
+                        label_p.append(label_em)
+                        translation_div.append(label_p)
+                    
+                    trans_p = soup.new_tag('p')
+                    trans_p.string = translation_result
+                    translation_div.append(trans_p)
+                    container.append(translation_div)
+                    print(f"   ✅ Created plain text translation structure")
+                
+                translated_count += 1
+                
+                # Save to translated_images folder
+                trans_filename = f"ch{actual_num:03d}_img{idx:02d}_translation.html"
+                trans_filepath = os.path.join(image_translator.translated_images_dir, trans_filename)
+                
+                # Extract just the translation content for saving
+                save_soup = BeautifulSoup(translation_result, 'html.parser')
+                save_div = save_soup.find('div', class_='image-translation')
+                if not save_div:
+                    # Create a simple div for plain text
+                    save_div = f'<div class="image-translation"><p>{translation_result}</p></div>'
+                
+                with open(trans_filepath, 'w', encoding='utf-8') as f:
+                    f.write(f"""<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="utf-8"/>
+    <title>Chapter {actual_num} - Image {idx} Translation</title>
+</head>
+<body>
+    <h2>Chapter {actual_num} - Image {idx}</h2>
+    <p>Original: {os.path.basename(img_path)}</p>
+    <hr/>
+    {save_div}
+</body>
+</html>""")
+                
+                print(f"   ✅ Saved translation to: {trans_filename}")
+            else:
+                print(f"   ⚠️ Could not find image tag in HTML for: {img_src}")
+    
+    if translated_count > 0:
+        print(f"   🖼️ Successfully translated {translated_count} images")
+        
+        # Debug output
+        final_html = str(soup)
+        trans_count = final_html.count('<div class="image-translation">')
+        print(f"   📊 Final HTML has {trans_count} translation divs")
+        print(f"   📊 image_translations dict has {len(image_translations)} entries")
+        
+        prog = image_translator.load_progress()
+        if "image_chunks" in prog:
+            completed_images = []
+            for img_key, img_data in prog["image_chunks"].items():
+                if len(img_data["completed"]) == img_data["total"]:
+                    completed_images.append(img_key)
+            
+            for img_key in completed_images:
+                del prog["image_chunks"][img_key]
+                
+            if completed_images:
+                image_translator.save_progress(prog)
+                print(f"   🧹 Cleaned up progress for {len(completed_images)} completed images")
+        
+        image_translator.save_translation_log(actual_num, image_translations)
+        
+        return str(soup), image_translations
+    else:
+        print(f"   ℹ️ No images were successfully translated")
+        
+    return chapter_html, {}
+
+def detect_novel_numbering(chapters):
+    """Detect if the novel uses 0-based or 1-based chapter numbering with improved accuracy"""
+    print("[DEBUG] Detecting novel numbering system...")
+    
+    if not chapters:
+        return False
+    
+    if isinstance(chapters[0], str):
+        print("[DEBUG] Text file detected, skipping numbering detection")
+        return False
+    
+    patterns = PatternManager.FILENAME_EXTRACT_PATTERNS
+    
+    # Special check for prefix_suffix pattern like "0000_1.xhtml"
+    prefix_suffix_pattern = r'^(\d+)_(\d+)[_\.]'
+    
+    # Track chapter numbers from different sources
+    filename_numbers = []
+    content_numbers = []
+    has_prefix_suffix = False
+    prefix_suffix_numbers = []
+    
+    for idx, chapter in enumerate(chapters):
+        extracted_num = None
+        
+        # Check filename patterns
+        if 'original_basename' in chapter and chapter['original_basename']:
+            filename = chapter['original_basename']
+        elif 'filename' in chapter:
+            filename = os.path.basename(chapter['filename'])
+        else:
+            continue
+            
+        # First check for prefix_suffix pattern
+        prefix_match = re.search(prefix_suffix_pattern, filename, re.IGNORECASE)
+        if prefix_match:
+            has_prefix_suffix = True
+            # Use the SECOND number (after underscore)
+            suffix_num = int(prefix_match.group(2))
+            prefix_suffix_numbers.append(suffix_num)
+            extracted_num = suffix_num
+            print(f"[DEBUG] Prefix_suffix pattern matched: {filename} -> Chapter {suffix_num}")
+        else:
+            # Try other patterns
+            for pattern in patterns:
+                match = re.search(pattern, filename)
+                if match:
+                    extracted_num = int(match.group(1))
+                    #print(f"[DEBUG] Pattern '{pattern}' matched: {filename} -> Chapter {extracted_num}")
+                    break
+        
+        if extracted_num is not None:
+            filename_numbers.append(extracted_num)
+        
+        # Also check chapter content for chapter declarations
+        if 'body' in chapter:
+            # Look for "Chapter N" in the first 1000 characters
+            content_preview = chapter['body'][:1000]
+            content_match = re.search(r'Chapter\s+(\d+)', content_preview, re.IGNORECASE)
+            if content_match:
+                content_num = int(content_match.group(1))
+                content_numbers.append(content_num)
+                print(f"[DEBUG] Found 'Chapter {content_num}' in content")
+    
+    # Decision logic with improved heuristics
+    
+    # 1. If using prefix_suffix pattern, trust those numbers exclusively
+    if has_prefix_suffix and prefix_suffix_numbers:
+        min_suffix = min(prefix_suffix_numbers)
+        if min_suffix >= 1:
+            print(f"[DEBUG] ✅ 1-based novel detected (prefix_suffix pattern starts at {min_suffix})")
+            return False
+        else:
+            print(f"[DEBUG] ✅ 0-based novel detected (prefix_suffix pattern starts at {min_suffix})")
+            return True
+    
+    # 2. If we have content numbers, prefer those over filename numbers
+    if content_numbers:
+        min_content = min(content_numbers)
+        # Check if we have a good sequence starting from 0 or 1
+        if 0 in content_numbers and 1 in content_numbers:
+            print(f"[DEBUG] ✅ 0-based novel detected (found both Chapter 0 and Chapter 1 in content)")
+            return True
+        elif min_content == 1:
+            print(f"[DEBUG] ✅ 1-based novel detected (content chapters start at 1)")
+            return False
+    
+    # 3. Fall back to filename numbers
+    if filename_numbers:
+        min_filename = min(filename_numbers)
+        max_filename = max(filename_numbers)
+        
+        # Check for a proper sequence
+        # If we have 0,1,2,3... it's likely 0-based
+        # If we have 1,2,3,4... it's likely 1-based
+        
+        # Count how many chapters we have in sequence starting from 0
+        zero_sequence_count = 0
+        for i in range(len(chapters)):
+            if i in filename_numbers:
+                zero_sequence_count += 1
+            else:
+                break
+        
+        # Count how many chapters we have in sequence starting from 1
+        one_sequence_count = 0
+        for i in range(1, len(chapters) + 1):
+            if i in filename_numbers:
+                one_sequence_count += 1
+            else:
+                break
+        
+        print(f"[DEBUG] Zero-based sequence length: {zero_sequence_count}")
+        print(f"[DEBUG] One-based sequence length: {one_sequence_count}")
+        
+        # If we have a better sequence starting from 1, it's 1-based
+        if one_sequence_count > zero_sequence_count and min_filename >= 1:
+            print(f"[DEBUG] ✅ 1-based novel detected (better sequence match starting from 1)")
+            return False
+        
+        # If we have any 0 in filenames and it's part of a sequence
+        if 0 in filename_numbers and zero_sequence_count >= 3:
+            print(f"[DEBUG] ✅ 0-based novel detected (found 0 in sequence)")
+            return True
+    
+    # 4. Default to 1-based if uncertain
+    print(f"[DEBUG] ✅ Defaulting to 1-based novel (insufficient evidence for 0-based)")
+    return False
+    
+def validate_chapter_continuity(chapters):
+    """Validate chapter continuity and warn about issues"""
+    if not chapters:
+        print("No chapters to translate")
+        return
+    
+    issues = []
+    
+    # Get all chapter numbers
+    chapter_nums = [c['num'] for c in chapters]
+    actual_nums = [c.get('actual_chapter_num', c['num']) for c in chapters]
+    
+    # Check for duplicates
+    duplicates = [num for num in chapter_nums if chapter_nums.count(num) > 1]
+    if duplicates:
+        issues.append(f"Duplicate chapter numbers found: {set(duplicates)}")
+    
+    # Check for gaps in sequence
+    min_num = min(chapter_nums)
+    max_num = max(chapter_nums)
+    expected = set(range(min_num, max_num + 1))
+    actual = set(chapter_nums)
+    missing = expected - actual
+    
+    if missing:
+        issues.append(f"Missing chapter numbers: {sorted(missing)}")
+        # Show gaps more clearly
+        gaps = []
+        sorted_missing = sorted(missing)
+        if sorted_missing:
+            start = sorted_missing[0]
+            end = sorted_missing[0]
+            for num in sorted_missing[1:]:
+                if num == end + 1:
+                    end = num
+                else:
+                    gaps.append(f"{start}-{end}" if start != end else str(start))
+                    start = end = num
+            gaps.append(f"{start}-{end}" if start != end else str(start))
+            issues.append(f"Gap ranges: {', '.join(gaps)}")
+    
+    # Check for duplicate titles
+    title_map = {}
+    for c in chapters:
+        title_lower = c['title'].lower().strip()
+        if title_lower in title_map:
+            title_map[title_lower].append(c['num'])
+        else:
+            title_map[title_lower] = [c['num']]
+    
+    for title, nums in title_map.items():
+        if len(nums) > 1:
+            issues.append(f"Duplicate title '{title}' in chapters: {nums}")
+    
+    # Print summary
+    print("\n" + "="*60)
+    print("📚 CHAPTER VALIDATION SUMMARY")
+    print("="*60)
+    print(f"Total chapters: {len(chapters)}")
+    print(f"Chapter range: {min_num} to {max_num}")
+    print(f"Expected count: {max_num - min_num + 1}")
+    print(f"Actual count: {len(chapters)}")
+    
+    if len(chapters) != (max_num - min_num + 1):
+        print(f"⚠️  Chapter count mismatch - missing {(max_num - min_num + 1) - len(chapters)} chapters")
+    
+    if issues:
+        print("\n⚠️  Issues found:")
+        for issue in issues:
+            print(f"  - {issue}")
+    else:
+        print("✅ No continuity issues detected")
+    
+    print("="*60 + "\n")
+
+def validate_epub_structure(output_dir):
+    """Validate that all necessary EPUB structure files are present"""
+    print("🔍 Validating EPUB structure...")
+    
+    required_files = {
+        'container.xml': 'META-INF container file (critical)',
+        '*.opf': 'OPF package file (critical)',
+        '*.ncx': 'Navigation file (recommended)'
+    }
+    
+    found_files = {}
+    missing_files = []
+    
+    container_path = os.path.join(output_dir, 'container.xml')
+    if os.path.exists(container_path):
+        found_files['container.xml'] = 'Found'
+        print("   ✅ container.xml - Found")
+    else:
+        missing_files.append('container.xml')
+        print("   ❌ container.xml - Missing (CRITICAL)")
+    
+    opf_files = []
+    ncx_files = []
+    
+    for file in os.listdir(output_dir):
+        if file.lower().endswith('.opf'):
+            opf_files.append(file)
+        elif file.lower().endswith('.ncx'):
+            ncx_files.append(file)
+    
+    if opf_files:
+        found_files['opf'] = opf_files
+        print(f"   ✅ OPF file(s) - Found: {', '.join(opf_files)}")
+    else:
+        missing_files.append('*.opf')
+        print("   ❌ OPF file - Missing (CRITICAL)")
+    
+    if ncx_files:
+        found_files['ncx'] = ncx_files
+        print(f"   ✅ NCX file(s) - Found: {', '.join(ncx_files)}")
+    else:
+        missing_files.append('*.ncx')
+        print("   ⚠️ NCX file - Missing (navigation may not work)")
+    
+    html_files = [f for f in os.listdir(output_dir) if f.lower().endswith('.html') and f.startswith('response_')]
+    if html_files:
+        print(f"   ✅ Translated chapters - Found: {len(html_files)} files")
+    else:
+        print("   ⚠️ No translated chapter files found")
+    
+    critical_missing = [f for f in missing_files if f in ['container.xml', '*.opf']]
+    
+    if not critical_missing:
+        print("✅ EPUB structure validation PASSED")
+        print("   All critical files present for EPUB reconstruction")
+        return True
+    else:
+        print("❌ EPUB structure validation FAILED")
+        print(f"   Missing critical files: {', '.join(critical_missing)}")
+        print("   EPUB reconstruction may fail without these files")
+        return False
+
+def check_epub_readiness(output_dir):
+    """Check if the output directory is ready for EPUB compilation"""
+    print("📋 Checking EPUB compilation readiness...")
+    
+    issues = []
+    
+    if not validate_epub_structure(output_dir):
+        issues.append("Missing critical EPUB structure files")
+    
+    html_files = [f for f in os.listdir(output_dir) if f.lower().endswith('.html') and f.startswith('response_')]
+    if not html_files:
+        issues.append("No translated chapter files found")
+    else:
+        print(f"   ✅ Found {len(html_files)} translated chapters")
+    
+    metadata_path = os.path.join(output_dir, 'metadata.json')
+    if os.path.exists(metadata_path):
+        print("   ✅ Metadata file present")
+        try:
+            with open(metadata_path, 'r', encoding='utf-8') as f:
+                metadata = json.load(f)
+            if 'title' not in metadata:
+                issues.append("Metadata missing title")
+        except Exception as e:
+            issues.append(f"Metadata file corrupted: {e}")
+    else:
+        issues.append("Missing metadata.json file")
+    
+    resource_dirs = ['css', 'fonts', 'images']
+    found_resources = 0
+    for res_dir in resource_dirs:
+        res_path = os.path.join(output_dir, res_dir)
+        if os.path.exists(res_path):
+            files = [f for f in os.listdir(res_path) if os.path.isfile(os.path.join(res_path, f))]
+            if files:
+                found_resources += len(files)
+                print(f"   ✅ Found {len(files)} {res_dir} files")
+    
+    if found_resources > 0:
+        print(f"   ✅ Total resources: {found_resources} files")
+    else:
+        print("   ⚠️ No resource files found (this may be normal)")
+    
+    if not issues:
+        print("🎉 EPUB compilation readiness: READY")
+        print("   All necessary files present for EPUB creation")
+        return True
+    else:
+        print("⚠️ EPUB compilation readiness: ISSUES FOUND")
+        for issue in issues:
+            print(f"   • {issue}")
+        return False
+
+def cleanup_previous_extraction(output_dir):
+    """Clean up any files from previous extraction runs (preserves CSS files)"""
+    # Remove 'css' from cleanup_items to preserve CSS files
+    cleanup_items = [
+         'images',  # Removed 'css' from this list
+        '.resources_extracted'
+    ]
+    
+    epub_structure_files = [
+        'container.xml', 'content.opf', 'toc.ncx'
+    ]
+    
+    cleaned_count = 0
+    
+    # Clean up directories (except CSS)
+    for item in cleanup_items:
+        if item.startswith('.'):
+            continue
+        item_path = os.path.join(output_dir, item)
+        try:
+            if os.path.isdir(item_path):
+                shutil.rmtree(item_path)
+                print(f"🧹 Removed directory: {item}")
+                cleaned_count += 1
+        except Exception as e:
+            print(f"⚠️ Could not remove directory {item}: {e}")
+    
+    # Clean up EPUB structure files
+    for epub_file in epub_structure_files:
+        file_path = os.path.join(output_dir, epub_file)
+        try:
+            if os.path.isfile(file_path):
+                os.remove(file_path)
+                print(f"🧹 Removed EPUB file: {epub_file}")
+                cleaned_count += 1
+        except Exception as e:
+            print(f"⚠️ Could not remove {epub_file}: {e}")
+    
+    # Clean up any loose .opf and .ncx files
+    try:
+        for file in os.listdir(output_dir):
+            if file.lower().endswith(('.opf', '.ncx')):
+                file_path = os.path.join(output_dir, file)
+                if os.path.isfile(file_path):
+                    os.remove(file_path)
+                    print(f"🧹 Removed EPUB file: {file}")
+                    cleaned_count += 1
+    except Exception as e:
+        print(f"⚠️ Error scanning for EPUB files: {e}")
+    
+    # Remove extraction marker
+    marker_path = os.path.join(output_dir, '.resources_extracted')
+    try:
+        if os.path.isfile(marker_path):
+            os.remove(marker_path)
+            print(f"🧹 Removed extraction marker")
+            cleaned_count += 1
+    except Exception as e:
+        print(f"⚠️ Could not remove extraction marker: {e}")
+    
+    # Check if CSS files exist and inform user they're being preserved
+    css_path = os.path.join(output_dir, 'css')
+    if os.path.exists(css_path):
+        try:
+            css_files = [f for f in os.listdir(css_path) if os.path.isfile(os.path.join(css_path, f))]
+            if css_files:
+                print(f"📚 Preserving {len(css_files)} CSS files")
+        except Exception:
+            pass
+    
+    if cleaned_count > 0:
+        print(f"🧹 Cleaned up {cleaned_count} items from previous runs (CSS files preserved)")
+    
+    return cleaned_count
+
+# =====================================================
+# API AND TRANSLATION UTILITIES
+# =====================================================
+def send_with_interrupt(messages, client, temperature, max_tokens, stop_check_fn, chunk_timeout=None, request_id=None, context=None):
+    """Send API request with interrupt capability and optional timeout retry.
+    Optional context parameter is passed through to the client to improve payload labeling.
+    """
+    # Import UnifiedClientError at function level to avoid scoping issues
+    from unified_api_client import UnifiedClientError
+    
+    # The client.send() call will handle multi-key rotation automatically
+    
+    # Generate request_id if not provided
+    #if request_id is None:
+    #    request_id = str(uuid.uuid4())[:8]
+    
+    result_queue = queue.Queue()
+    
+    def api_call():
+        try:
+            start_time = time.time()
+            
+            # Check if client.send accepts request_id parameter
+            send_params = {
+                'messages': messages,
+                'temperature': temperature,
+                'max_tokens': max_tokens
+            }
+            # Add context if supported
+            sig = inspect.signature(client.send)
+            if 'context' in sig.parameters and context is not None:
+                send_params['context'] = context
+            
+            # Add request_id if the client supports it
+            sig = inspect.signature(client.send)
+            #if 'request_id' in sig.parameters:
+            #    send_params['request_id'] = request_id
+            
+            result = client.send(**send_params)
+            elapsed = time.time() - start_time
+            result_queue.put((result, elapsed))
+        except Exception as e:
+            result_queue.put(e)
+    
+    api_thread = threading.Thread(target=api_call)
+    api_thread.daemon = True
+    api_thread.start()
+    
+    timeout = chunk_timeout if chunk_timeout is not None else 86400
+    check_interval = 0.5
+    elapsed = 0
+    
+    while elapsed < timeout:
+        try:
+            result = result_queue.get(timeout=check_interval)
+            if isinstance(result, Exception):
+                # For expected errors like rate limits, preserve the error type without extra traceback
+                if hasattr(result, 'error_type') and result.error_type == "rate_limit":
+                    raise result
+                elif "429" in str(result) or "rate limit" in str(result).lower():
+                    # Convert generic exceptions to UnifiedClientError for rate limits
+                    raise UnifiedClientError(str(result), error_type="rate_limit")
+                else:
+                    raise result
+            if isinstance(result, tuple):
+                api_result, api_time = result
+                if chunk_timeout and api_time > chunk_timeout:
+                    # Set cleanup flag when chunk timeout occurs
+                    if hasattr(client, '_in_cleanup'):
+                        client._in_cleanup = True
+                    if hasattr(client, 'cancel_current_operation'):
+                        client.cancel_current_operation()
+                    raise UnifiedClientError(f"API call took {api_time:.1f}s (timeout: {chunk_timeout}s)")
+                return api_result
+            return result
+        except queue.Empty:
+            if stop_check_fn():
+                # Set cleanup flag when user stops
+                if hasattr(client, '_in_cleanup'):
+                    client._in_cleanup = True
+                if hasattr(client, 'cancel_current_operation'):
+                    client.cancel_current_operation()
+                raise UnifiedClientError("Translation stopped by user")
+            elapsed += check_interval
+    
+    # Set cleanup flag when timeout occurs
+    if hasattr(client, '_in_cleanup'):
+        client._in_cleanup = True
+    if hasattr(client, 'cancel_current_operation'):
+        client.cancel_current_operation()
+    raise UnifiedClientError(f"API call timed out after {timeout} seconds")
+
+def handle_api_error(processor, error, chunk_info=""):
+    """Handle API errors with multi-key support"""
+    error_str = str(error)
+    
+    # Check for rate limit
+    if "429" in error_str or "rate limit" in error_str.lower():
+        if processor.config.use_multi_api_keys:
+            print(f"⚠️ Rate limit hit {chunk_info}, client should rotate to next key")
+            stats = processor.client.get_stats()
+            print(f"📊 API Stats - Active keys: {stats.get('active_keys', 0)}/{stats.get('total_keys', 0)}")
+            
+            if stats.get('active_keys', 0) == 0:
+                print("⏳ All API keys are cooling down - will wait and retry")
+            print(f"🔄 Multi-key error handling: Rate limit processed, preparing for key rotation...")
+            time.sleep(0.1)  # Brief pause after rate limit detection for stability
+            return True  # Always retry
+        else:
+            print(f"⚠️ Rate limit hit {chunk_info}, waiting before retry...")
+            time.sleep(60)
+            print(f"🔄 Single-key error handling: Rate limit wait completed, ready for retry...")
+            time.sleep(0.1)  # Brief pause after rate limit wait for stability
+            return True  # Always retry
+    
+    # Other errors
+    print(f"❌ API Error {chunk_info}: {error_str}")
+    return False
+    
+def parse_token_limit(env_value):
+    """Parse token limit from environment variable"""
+    if not env_value or env_value.strip() == "":
+        return None, "unlimited"
+    
+    env_value = env_value.strip()
+    if env_value.lower() == "unlimited":
+        return None, "unlimited"
+    
+    if env_value.isdigit() and int(env_value) > 0:
+        limit = int(env_value)
+        return limit, str(limit)
+    
+    return 1000000, "1000000 (default)"
+
+def build_system_prompt(user_prompt, glossary_path=None):
+    """Build the system prompt with glossary - TRUE BRUTE FORCE VERSION"""
+    append_glossary = os.getenv("APPEND_GLOSSARY", "1") == "1"
+    actual_glossary_path = glossary_path
+    
+    
+    system = user_prompt if user_prompt else ""
+    
+    if append_glossary and actual_glossary_path and os.path.exists(actual_glossary_path):
+        try:
+            print(f"[DEBUG] ✅ Loading glossary from: {os.path.abspath(actual_glossary_path)}")
+            
+            # Try to load as JSON first
+            try:
+                with open(actual_glossary_path, "r", encoding="utf-8") as gf:
+                    glossary_data = json.load(gf)
+                glossary_text = json.dumps(glossary_data, ensure_ascii=False, indent=2)
+                print(f"[DEBUG] Loaded as JSON")
+            except json.JSONDecodeError:
+                # If JSON fails, just read as raw text
+                #print(f"[DEBUG] JSON parse failed, reading as raw text")
+                with open(actual_glossary_path, "r", encoding="utf-8") as gf:
+                    glossary_text = gf.read()
+            
+            if system:
+                system += "\n\n"
+            
+            custom_prompt = os.getenv("APPEND_GLOSSARY_PROMPT", "Character/Term Glossary (use these translations consistently):").strip()
+            if not custom_prompt:
+                custom_prompt = "Character/Term Glossary (use these translations consistently):"
+            
+            system += f"{custom_prompt}\n{glossary_text}"
+            
+            print(f"[DEBUG] ✅ Entire glossary appended!")
+            print(f"[DEBUG] Glossary text length: {len(glossary_text)} characters")           
+                
+        except Exception as e:
+            print(f"[ERROR] Could not load glossary: {e}")
+            import traceback
+            print(f"[ERROR] Full traceback: {traceback.format_exc()}")
+    else:
+        if not append_glossary:
+            #print(f"[DEBUG] ❌ Glossary append disabled")
+            pass
+        elif not actual_glossary_path:
+            print(f"[DEBUG] ❌ No glossary path provided")
+        elif not os.path.exists(actual_glossary_path):
+            print(f"[DEBUG] ❌ Glossary file does not exist: {actual_glossary_path}")
+    
+    print(f"🎯 Final system prompt length: {len(system)} characters")
+    
+    return system
+
+def translate_title(title, client, system_prompt, user_prompt, temperature=0.3):
+    """Translate the book title using the configured settings"""
+    if not title or not title.strip():
+        return title
+        
+    print(f"📚 Processing book title: {title}")
+    
+    try:
+        if os.getenv("TRANSLATE_BOOK_TITLE", "1") == "0":
+            print(f"📚 Book title translation disabled - keeping original")
+            return title
+        
+        # Check if we're using a translation service (not AI)
+        client_type = getattr(client, 'client_type', '')
+        is_translation_service = client_type in ['deepl', 'google_translate']
+        
+        if is_translation_service:
+            # For translation services, send only the text without AI prompts
+            print(f"📚 Using translation service ({client_type}) - sending text directly")
+            messages = [
+                {"role": "user", "content": title}
+            ]
+            max_tokens = int(os.getenv("MAX_OUTPUT_TOKENS", "8192"))
+            translated_title, _ = client.send(messages, temperature=temperature, max_tokens=max_tokens)
+        else:
+            # For AI services, use prompts as before
+            book_title_prompt = os.getenv("BOOK_TITLE_PROMPT", 
+                "Translate this book title to English while retaining any acronyms:")
+            
+            # Get the system prompt for book titles, with fallback to default
+            book_title_system_prompt = os.getenv("BOOK_TITLE_SYSTEM_PROMPT", 
+                "You are a translator. Respond with only the translated text, nothing else. Do not add any explanation or additional content.")
+            
+            messages = [
+                {"role": "system", "content": book_title_system_prompt},
+                {"role": "user", "content": f"{book_title_prompt}\n\n{title}"}
+            ]
+            max_tokens = int(os.getenv("MAX_OUTPUT_TOKENS", "8192"))
+            translated_title, _ = client.send(messages, temperature=temperature, max_tokens=max_tokens)
+        
+        print(f"[DEBUG] Raw API response: '{translated_title}'")
+        print(f"[DEBUG] Response length: {len(translated_title)} (original: {len(title)})")
+        newline = '\n'
+        print(f"[DEBUG] Has newlines: {repr(translated_title) if newline in translated_title else 'No'}")
+        
+        translated_title = translated_title.strip()
+        
+        if ((translated_title.startswith('"') and translated_title.endswith('"')) or 
+            (translated_title.startswith("'") and translated_title.endswith("'"))):
+            translated_title = translated_title[1:-1].strip()
+        
+        if '\n' in translated_title:
+            print(f"⚠️ API returned multi-line content, keeping original title")
+            return title           
+            
+        # Check for JSON-like structured content, but allow simple brackets like [END]
+        if (any(char in translated_title for char in ['{', '}']) or 
+            '"role":' in translated_title or 
+            '"content":' in translated_title or
+            ('[[' in translated_title and ']]' in translated_title)):  # Only flag double brackets
+            print(f"⚠️ API returned structured content, keeping original title")
+            return title
+            
+        if any(tag in translated_title.lower() for tag in ['<p>', '</p>', '<h1>', '</h1>', '<html']):
+            print(f"⚠️ API returned HTML content, keeping original title")
+            return title
+        
+        print(f"✅ Processed title: {translated_title}")
+        return translated_title
+        
+    except Exception as e:
+        print(f"⚠️ Failed to process title: {e}")
+        return title
+
+# =====================================================
+# FAILURE RESPONSES
+# =====================================================
+def is_qa_failed_response(content):
+    """
+    Comprehensive check for API failure markers based on research of major AI providers
+    (OpenAI, Anthropic, Google Gemini, Azure OpenAI, etc.)
+    """
+    if not content:
+        return True
+    
+    content_str = str(content).strip()
+    content_lower = content_str.lower()
+    
+    # 1. EXPLICIT FAILURE MARKERS from unified_api_client fallback responses
+    explicit_failures = [
+        "[TRANSLATION FAILED - ORIGINAL TEXT PRESERVED]",
+        "[IMAGE TRANSLATION FAILED]",
+        "API response unavailable",
+        "[]",  # Empty JSON response from glossary context
+        "[API_ERROR]",
+        "[TIMEOUT]",
+        "[RATE_LIMIT_EXCEEDED]"
+    ]
+    
+    for marker in explicit_failures:
+        if marker in content_str:
+            return True
+    
+    # 2. HTTP ERROR STATUS MESSAGES
+    http_errors = [
+        "400 - invalid_request_error",
+        "401 - authentication_error", 
+        "403 - permission_error",
+        "404 - not_found_error",
+        "413 - request_too_large",
+        "429 - rate_limit_error",
+        "500 - api_error",
+        "529 - overloaded_error",
+        "invalid x-api-key",
+        "authentication_error",
+        "permission_error",
+        "rate_limit_error",
+        "api_error",
+        "overloaded_error"
+    ]
+    
+    for error in http_errors:
+        if error in content_lower:
+            return True
+    
+    # 3. CONTENT FILTERING / SAFETY BLOCKS
+    content_filter_markers = [
+        "content_filter",  # OpenAI finish_reason
+        "content was blocked",
+        "response was blocked",
+        "safety filter",
+        "content policy",
+        "harmful content",
+        "content filtering",
+        "blocked by safety",
+        "harm_category_harassment",
+        "harm_category_hate_speech", 
+        "harm_category_sexually_explicit",
+        "harm_category_dangerous_content",
+        "block_low_and_above",
+        "block_medium_and_above",
+        "block_only_high"
+    ]
+    
+    for marker in content_filter_markers:
+        if marker in content_lower:
+            return True
+    
+    # 4. TIMEOUT AND NETWORK ERRORS
+    timeout_markers = [
+        "timed out",
+        "request timeout",
+        "connection timeout",
+        "read timeout",
+        "apitimeouterror",
+        "network error",
+        "connection refused",
+        "connection reset",
+        "socket timeout"
+    ]
+    
+    for marker in timeout_markers:
+        if marker in content_lower:
+            return True
+    
+    # 6. EMPTY OR MINIMAL RESPONSES INDICATING FAILURE
+    if len(content_str) <= 10:
+        # Very short responses that are likely errors
+        short_error_indicators = [
+            "error", "fail", "null", "none", "empty", 
+            "unavailable", "timeout", "blocked", "denied"
+        ]
+        if any(indicator in content_lower for indicator in short_error_indicators):
+            return True
+    
+    # 7. COMMON REFUSAL PATTERNS (AI refusing to generate content)
+    refusal_patterns = [
+        "i cannot",
+        "i can't", 
+        "i'm unable to",
+        "i am unable to",
+        "i apologize, but i cannot",
+        "i'm sorry, but i cannot",
+        "i don't have the ability to",
+        "i'm not able to",
+        "this request cannot be",
+        "unable to process",
+        "cannot complete",
+        "cannot generate",
+        "not available",
+        "service unavailable",
+        "temporarily unavailable"
+    ]
+    
+    # Only check refusal patterns for relatively short responses (likely to be refusals)
+    if len(content_str) < 500:
+        for pattern in refusal_patterns:
+            if pattern in content_lower:
+                return True
+    
+    # 8. JSON ERROR RESPONSES
+    json_error_patterns = [
+        '{"error"',
+        '{"type":"error"',
+        '"error_type"',
+        '"error_message"',
+        '"error_code"',
+        '"message":"error"',
+        '"status":"error"',
+        '"success":false'
+    ]
+    
+    for pattern in json_error_patterns:
+        if pattern in content_lower:
+            return True
+    
+    # 9. GEMINI-SPECIFIC ERRORS
+    gemini_errors = [
+        "finish_reason: safety",
+        "finish_reason: other", 
+        "finish_reason: recitation",
+        "candidate.content field",  # Voided content field
+        "safety_ratings",
+        "probability_score",
+        "severity_score"
+    ]
+    
+    for error in gemini_errors:
+        if error in content_lower:
+            return True
+    
+    # 10. ANTHROPIC-SPECIFIC ERRORS  
+    anthropic_errors = [
+        "invalid_request_error",
+        "authentication_error",
+        "permission_error", 
+        "not_found_error",
+        "request_too_large",
+        "rate_limit_error",
+        "api_error",
+        "overloaded_error"
+    ]
+    
+    for error in anthropic_errors:
+        if error in content_lower:
+            return True
+    
+    # 11. OPENAI-SPECIFIC ERRORS
+    openai_errors = [
+        "finish_reason: content_filter",
+        "finish_reason: length",  # Only if very short content
+        "insufficient_quota",
+        "invalid_api_key",
+        "model_not_found",
+        "context_length_exceeded"
+    ]
+    
+    for error in openai_errors:
+        if error in content_lower:
+            return True
+    
+    # 12. EMPTY RESPONSE PATTERNS
+    empty_patterns = [
+        "choices: [ { text: '', index: 0",  # OpenAI empty response pattern
+        '"text": ""',
+        '"content": ""',
+        '"content": null',
+        "text: ''",
+        "content: ''"
+    ]
+    
+    for pattern in empty_patterns:
+        if pattern in content_lower:
+            return True
+    
+    # 13. PROVIDER-AGNOSTIC ERROR MESSAGES
+    generic_errors = [
+        "internal server error",
+        "service error", 
+        "server error",
+        "bad gateway",
+        "service temporarily unavailable",
+        "upstream error",
+        "proxy error",
+        "gateway timeout",
+        "connection error",
+        "network failure",
+        "service degraded",
+        "maintenance mode"
+    ]
+    
+    for error in generic_errors:
+        if error in content_lower:
+            return True
+    
+    # 14. SPECIAL CASE: Check for responses that are just original text
+    # (indicating translation completely failed and fallback was used)
+    if content_str.startswith("[") and content_str.endswith("]") and "FAILED" in content_str:
+        return True
+    
+    # 15. FINAL CHECK: Very short responses with error indicators
+    if len(content_str) < 100:
+        final_error_check = [
+            "error", "failed", "timeout", "blocked", "denied", 
+            "refused", "rejected", "unavailable", "invalid", 
+            "forbidden", "unauthorized", "limit", "quota"
+        ]
+        
+        # Count how many error indicators are present
+        error_count = sum(1 for word in final_error_check if word in content_lower)
+        
+        # If multiple error indicators in short response, likely a failure
+        if error_count >= 2:
+            return True
+        
+        # Single strong error indicator in very short response
+        if len(content_str) < 50 and error_count >= 1:
+            return True
+    
+    return False
+
+
+# Additional helper function for debugging
+def get_failure_reason(content):
+    """
+    Returns the specific reason why content was marked as qa_failed
+    Useful for debugging and logging
+    """
+    if not content:
+        return "Empty content"
+    
+    content_str = str(content).strip()
+    content_lower = content_str.lower()
+    
+    # Check each category and return the first match
+    failure_categories = {
+        "Explicit Failure Marker": [
+            "[TRANSLATION FAILED - ORIGINAL TEXT PRESERVED]",
+            "[IMAGE TRANSLATION FAILED]", 
+            "API response unavailable",
+            "[]"
+        ],
+        "HTTP Error": [
+            "authentication_error", "rate_limit_error", "api_error"
+        ],
+        "Content Filter": [
+            "content_filter", "safety filter", "blocked by safety"
+        ],
+        "Timeout": [
+            "timeout", "timed out", "apitimeouterror"
+        ],
+        "Rate Limit": [
+            "rate limit exceeded", "quota exceeded", "too many requests"
+        ],
+        "Refusal Pattern": [
+            "i cannot", "i can't", "unable to process"
+        ],
+        "Empty Response": [
+            '"text": ""', "choices: [ { text: ''"
+        ]
+    }
+    
+    for category, markers in failure_categories.items():
+        for marker in markers:
+            if marker in content_str or marker in content_lower:
+                return f"{category}: {marker}"
+    
+    if len(content_str) < 50:
+        return f"Short response with error indicators: {content_str[:30]}..."
+    
+    return "Unknown failure pattern"
+    
+def convert_enhanced_text_to_html(plain_text, chapter_info=None):
+    """Convert markdown/plain text back to HTML after translation (for enhanced mode)
+    
+    This function handles the conversion of translated markdown back to HTML.
+    The input is the TRANSLATED text that was originally extracted using html2text.
+    """
+    import re
+    
+    preserve_structure = chapter_info.get('preserve_structure', False) if chapter_info else False
+    
+    # First, try to use markdown2 for proper markdown conversion
+    try:
+        import markdown2
+        
+        # Check if the text contains markdown patterns
+        has_markdown = any([
+            '##' in plain_text,  # Headers
+            '**' in plain_text,  # Bold
+            '*' in plain_text and not '**' in plain_text,  # Italic
+            '[' in plain_text and '](' in plain_text,  # Links
+            '```' in plain_text,  # Code blocks
+            '> ' in plain_text,  # Blockquotes
+            '- ' in plain_text or '* ' in plain_text or '1. ' in plain_text  # Lists
+        ])
+        
+        if has_markdown or preserve_structure:
+            # Use markdown2 for proper conversion
+            html = markdown2.markdown(plain_text, extras=[
+                'cuddled-lists',       # Lists without blank lines
+                'fenced-code-blocks',  # Code blocks with ```
+                'break-on-newline',    # Treat single newlines as <br>
+                'smarty-pants',        # Smart quotes and dashes
+                'tables',              # Markdown tables
+            ])
+            
+            # Post-process to ensure proper paragraph structure
+            if not '<p>' in html:
+                # If markdown2 didn't create paragraphs, wrap content
+                lines = html.split('\n')
+                processed_lines = []
+                for line in lines:
+                    line = line.strip()
+                    if line and not line.startswith('<') and not line.endswith('>'):
+                        processed_lines.append(f'<p>{line}</p>')
+                    elif line:
+                        processed_lines.append(line)
+                html = '\n'.join(processed_lines)
+            
+            return html
+            
+    except ImportError:
+        print("⚠️ markdown2 not available, using fallback HTML conversion")
+    
+    # Fallback: Manual markdown-to-HTML conversion
+    lines = plain_text.strip().split('\n')
+    html_parts = []
+    in_code_block = False
+    code_block_content = []
+    
+    for line in lines:
+        # Handle code blocks
+        if line.strip().startswith('```'):
+            if in_code_block:
+                # End code block
+                html_parts.append('<pre><code>' + '\n'.join(code_block_content) + '</code></pre>')
+                code_block_content = []
+                in_code_block = False
+            else:
+                # Start code block
+                in_code_block = True
+            continue
+        
+        if in_code_block:
+            code_block_content.append(line)
+            continue
+        
+        line = line.strip()
+        if not line:
+            # Preserve empty lines as paragraph breaks
+            if html_parts and not html_parts[-1].endswith('</p>'):
+                # Only add break if not already after a closing tag
+                html_parts.append('<br/>')
+            continue
+        
+        # Check for markdown headers
+        if line.startswith('#'):
+            match = re.match(r'^(#+)\s*(.+)$', line)
+            if match:
+                level = min(len(match.group(1)), 6)
+                header_text = match.group(2).strip()
+                html_parts.append(f'<h{level}>{header_text}</h{level}>')
+                continue
+        
+        # Check for blockquotes
+        if line.startswith('> '):
+            quote_text = line[2:].strip()
+            html_parts.append(f'<blockquote>{quote_text}</blockquote>')
+            continue
+        
+        # Check for lists
+        if re.match(r'^[*\-+]\s+', line):
+            list_text = re.sub(r'^[*\-+]\s+', '', line)
+            html_parts.append(f'<li>{list_text}</li>')
+            continue
+        
+        if re.match(r'^\d+\.\s+', line):
+            list_text = re.sub(r'^\d+\.\s+', '', line)
+            html_parts.append(f'<li>{list_text}</li>')
+            continue
+        
+        # Convert inline markdown
+        # Bold
+        line = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', line)
+        line = re.sub(r'__(.+?)__', r'<strong>\1</strong>', line)
+        
+        # Italic
+        line = re.sub(r'\*(.+?)\*', r'<em>\1</em>', line)
+        line = re.sub(r'_(.+?)_', r'<em>\1</em>', line)
+        
+        # Links
+        line = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<a href="\2">\1</a>', line)
+        
+        # Code inline
+        line = re.sub(r'`([^`]+)`', r'<code>\1</code>', line)
+        
+        # Regular paragraph
+        html_parts.append(f'<p>{line}</p>')
+    
+    # Post-process lists to wrap in ul/ol tags
+    final_html = []
+    in_list = False
+    list_type = None
+    
+    for part in html_parts:
+        if part.startswith('<li>'):
+            if not in_list:
+                # Determine list type based on context (simplified)
+                list_type = 'ul'  # Default to unordered
+                final_html.append(f'<{list_type}>')
+                in_list = True
+            final_html.append(part)
+        else:
+            if in_list:
+                final_html.append(f'</{list_type}>')
+                in_list = False
+            final_html.append(part)
+    
+    # Close any open list
+    if in_list:
+        final_html.append(f'</{list_type}>')
+    
+    return '\n'.join(final_html)
+# =====================================================
+# MAIN TRANSLATION FUNCTION
+# =====================================================
+def main(log_callback=None, stop_callback=None):
+    """Main translation function with enhanced duplicate detection and progress tracking"""
+    
+    config = TranslationConfig()
+    builtins._DISABLE_ZERO_DETECTION = config.DISABLE_ZERO_DETECTION
+    
+    if config.DISABLE_ZERO_DETECTION:
+        print("=" * 60)
+        print("⚠️  0-BASED DETECTION DISABLED BY USER")
+        print("⚠️  All chapter numbers will be used exactly as found")
+        print("=" * 60)
+    
+    args = None
+    chapters_completed = 0
+    chunks_completed = 0
+    
+    args = None
+    chapters_completed = 0
+    chunks_completed = 0
+    
+    input_path = config.input_path
+    if not input_path and len(sys.argv) > 1:
+        input_path = sys.argv[1]
+    
+    is_text_file = input_path.lower().endswith('.txt')
+    
+    if is_text_file:
+        os.environ["IS_TEXT_FILE_TRANSLATION"] = "1"  
+        
+    import json as _json
+    _original_load = _json.load
+      
+    def debug_json_load(fp, *args, **kwargs):
+        result = _original_load(fp, *args, **kwargs)
+        if isinstance(result, list) and len(result) > 0:
+            if isinstance(result[0], dict) and 'original_name' in result[0]:
+                print(f"[DEBUG] Loaded glossary list with {len(result)} items from {fp.name if hasattr(fp, 'name') else 'unknown'}")
+        return result
+    
+    _json.load = debug_json_load
+    
+    if log_callback:
+        set_output_redirect(log_callback)
+    
+    def check_stop():
+        if stop_callback and stop_callback():
+            print("❌ Translation stopped by user request.")
+            return True
+        return is_stop_requested()
+    
+    if config.EMERGENCY_RESTORE:
+        print("✅ Emergency paragraph restoration is ENABLED")
+    else:
+        print("⚠️ Emergency paragraph restoration is DISABLED")
+    
+    print(f"[DEBUG] REMOVE_AI_ARTIFACTS environment variable: {os.getenv('REMOVE_AI_ARTIFACTS', 'NOT SET')}")
+    print(f"[DEBUG] REMOVE_AI_ARTIFACTS parsed value: {config.REMOVE_AI_ARTIFACTS}")
+    if config.REMOVE_AI_ARTIFACTS:
+        print("⚠️ AI artifact removal is ENABLED - will clean AI response artifacts")
+    else:
+        print("✅ AI artifact removal is DISABLED - preserving all content as-is")
+       
+    if '--epub' in sys.argv or (len(sys.argv) > 1 and sys.argv[1].endswith(('.epub', '.txt'))):
+        import argparse
+        parser = argparse.ArgumentParser()
+        parser.add_argument('epub', help='Input EPUB or text file')
+        args = parser.parse_args()
+        input_path = args.epub
+    
+    is_text_file = input_path.lower().endswith('.txt')
+    
+    if is_text_file:
+        file_base = os.path.splitext(os.path.basename(input_path))[0]
+    else:
+        epub_base = os.path.splitext(os.path.basename(input_path))[0]
+        file_base = epub_base
+        
+    out = file_base
+    os.makedirs(out, exist_ok=True)
+    print(f"[DEBUG] Created output folder → {out}")
+    
+    cleanup_previous_extraction(out)
+
+    os.environ["EPUB_OUTPUT_DIR"] = out
+    payloads_dir = out
+
+    # clear history if CONTEXTUAL is disabled
+    if not config.CONTEXTUAL:
+        history_file = os.path.join(payloads_dir, "translation_history.json")
+        if os.path.exists(history_file):
+            os.remove(history_file)
+            print("[DEBUG] CONTEXTUAL disabled - cleared translation history")
+            
+    history_manager = HistoryManager(payloads_dir)
+    chapter_splitter = ChapterSplitter(model_name=config.MODEL)
+    chunk_context_manager = ChunkContextManager()
+    progress_manager = ProgressManager(payloads_dir)
+    
+    # Create ChapterExtractor with progress callback if available
+    chapter_progress_callback = None
+    if log_callback:
+        # Create a wrapper that formats progress messages for the log
+        def chapter_progress_callback(msg):
+            log_callback(f"📊 {msg}")
+    
+    chapter_extractor = ChapterExtractor(progress_callback=chapter_progress_callback)
+    glossary_manager = GlossaryManager()
+
+    history_file = os.path.join(payloads_dir, "translation_history.json")
+    if os.path.exists(history_file):
+        os.remove(history_file)
+        print(f"[DEBUG] Purged translation history → {history_file}")
+
+    print("🔍 Checking for deleted output files...")
+    progress_manager.cleanup_missing_files(out)
+    progress_manager.save()
+
+    if check_stop():
+        return
+
+    if not config.API_KEY:
+        print("❌ Error: Set API_KEY, OPENAI_API_KEY, or OPENAI_OR_Gemini_API_KEY in your environment.")
+        return
+
+    #print(f"[DEBUG] Found API key: {config.API_KEY[:10]}...")
+    print(f"[DEBUG] Using model = {config.MODEL}")
+    print(f"[DEBUG] Max output tokens = {config.MAX_OUTPUT_TOKENS}")
+
+    client = UnifiedClient(model=config.MODEL, api_key=config.API_KEY, output_dir=out)
+    if hasattr(client, 'use_multi_keys') and client.use_multi_keys:
+        stats = client.get_stats()
+        print(f"🔑 Multi-key mode active: {stats.get('total_keys', 0)} keys loaded")
+        print(f"   Active keys: {stats.get('active_keys', 0)}")
+    else:
+        print(f"🔑 Single-key mode: Using {config.MODEL}")    
+    # Reset cleanup state when starting new translation
+    if hasattr(client, 'reset_cleanup_state'):
+        client.reset_cleanup_state()    
+        
+    if is_text_file:
+        print("📄 Processing text file...")
+        try:
+            txt_processor = TextFileProcessor(input_path, out)
+            chapters = txt_processor.extract_chapters()
+            txt_processor.save_original_structure()
+            
+            metadata = {
+                "title": os.path.splitext(os.path.basename(input_path))[0],
+                "type": "text",
+                "chapter_count": len(chapters)
+            }
+        except ImportError as e:
+            print(f"❌ Error: Text file processor not available: {e}")
+            if log_callback:
+                log_callback(f"❌ Error: Text file processor not available: {e}")
+            return
+        except Exception as e:
+            print(f"❌ Error processing text file: {e}")
+            if log_callback:
+                log_callback(f"❌ Error processing text file: {e}")
+            return
+    else:
+        # Check if we should use async extraction (for GUI mode)
+        use_async_extraction = os.getenv("USE_ASYNC_CHAPTER_EXTRACTION", "0") == "1"
+        
+        if use_async_extraction and log_callback:
+            print("🚀 Using async chapter extraction (subprocess mode)...")
+            from chapter_extraction_manager import ChapterExtractionManager
+            
+            # Create manager with log callback
+            extraction_manager = ChapterExtractionManager(log_callback=log_callback)
+            
+            # Get extraction mode
+            extraction_mode = os.getenv("EXTRACTION_MODE", "smart").lower()
+            
+            # Define completion callback
+            extraction_result = {"completed": False, "result": None}
+            
+            def on_extraction_complete(result):
+                extraction_result["completed"] = True
+                extraction_result["result"] = result
+                
+                # Safety check for None result
+                if result is None:
+                    log_callback("❌ Chapter extraction failed: No result returned")
+                    return
+                
+                if result.get("success"):
+                    log_callback(f"✅ Chapter extraction completed: {result.get('chapters', 0)} chapters")
+                else:
+                    log_callback(f"❌ Chapter extraction failed: {result.get('error', 'Unknown error')}")
+            
+            # Start async extraction
+            extraction_manager.extract_chapters_async(
+                input_path,
+                out,
+                extraction_mode=extraction_mode,
+                progress_callback=lambda msg: log_callback(f"📊 {msg}"),
+                completion_callback=on_extraction_complete
+            )
+            
+            # Wait for completion (with timeout)
+            timeout = 300  # 5 minutes timeout
+            start_time = time.time()
+            
+            while not extraction_result["completed"]:
+                if check_stop():
+                    extraction_manager.stop_extraction()
+                    return
+                
+                if time.time() - start_time > timeout:
+                    log_callback("⚠️ Chapter extraction timeout")
+                    extraction_manager.stop_extraction()
+                    return
+                
+                time.sleep(0.1)  # Check every 100ms
+            
+            # Check if extraction was successful
+            if not extraction_result["result"] or not extraction_result["result"].get("success"):
+                log_callback("❌ Chapter extraction failed")
+                return
+            
+            # Load the extracted data
+            metadata_path = os.path.join(out, "metadata.json")
+            if os.path.exists(metadata_path):
+                with open(metadata_path, 'r', encoding='utf-8') as f:
+                    metadata = json.load(f)
+            else:
+                metadata = extraction_result["result"].get("metadata", {})
+            
+            # The async extraction should have saved chapters directly, similar to the sync version
+            # We need to reconstruct the chapters list with body content
+            
+            # Check if the extraction actually created a chapters.json file with full content
+            chapters_full_path = os.path.join(out, "chapters_full.json")
+            chapters_info_path = os.path.join(out, "chapters_info.json") 
+            
+            chapters = []
+            
+            # First try to load full chapters if saved
+            if os.path.exists(chapters_full_path):
+                log_callback("Loading full chapters data...")
+                with open(chapters_full_path, 'r', encoding='utf-8') as f:
+                    chapters = json.load(f)
+                log_callback(f"✅ Loaded {len(chapters)} chapters with content")
+                    
+            elif os.path.exists(chapters_info_path):
+                # Fall back to loading from individual files
+                log_callback("Loading chapter info and searching for content files...")
+                with open(chapters_info_path, 'r', encoding='utf-8') as f:
+                    chapters_info = json.load(f)
+                
+                # List all files in the output directory
+                all_files = os.listdir(out)
+                log_callback(f"Found {len(all_files)} files in output directory")
+                
+                # Try to match chapter files
+                for info in chapters_info:
+                    chapter_num = info['num']
+                    found = False
+                    
+                    # Try different naming patterns
+                    patterns = [
+                        f"chapter_{chapter_num:04d}_",  # With leading zeros
+                        f"chapter_{chapter_num}_",       # Without leading zeros  
+                        f"ch{chapter_num:04d}_",         # Shortened with zeros
+                        f"ch{chapter_num}_",             # Shortened without zeros
+                        f"{chapter_num:04d}_",          # Just number with zeros
+                        f"{chapter_num}_"                # Just number
+                    ]
+                    
+                    for pattern in patterns:
+                        # Find files matching this pattern (any extension)
+                        matching_files = [f for f in all_files if f.startswith(pattern)]
+                        
+                        if matching_files:
+                            # Prefer HTML/XHTML files
+                            html_files = [f for f in matching_files if f.endswith(('.html', '.xhtml', '.htm'))]
+                            if html_files:
+                                chapter_file = html_files[0]
+                            else:
+                                chapter_file = matching_files[0]
+                            
+                            chapter_path = os.path.join(out, chapter_file)
+                            
+                            try:
+                                with open(chapter_path, 'r', encoding='utf-8') as f:
+                                    content = f.read()
+                                
+                                chapters.append({
+                                    "num": chapter_num,
+                                    "title": info.get("title", f"Chapter {chapter_num}"),
+                                    "body": content,
+                                    "filename": info.get("original_filename", ""),
+                                    "has_images": info.get("has_images", False),
+                                    "file_size": len(content),
+                                    "content_hash": info.get("content_hash", "")
+                                })
+                                found = True
+                                break
+                            except Exception as e:
+                                log_callback(f"⚠️ Error reading {chapter_file}: {e}")
+                    
+                    if not found:
+                        log_callback(f"⚠️ No file found for Chapter {chapter_num}")
+                        # Log available files for debugging
+                        if len(all_files) < 50:
+                            similar_files = [f for f in all_files if str(chapter_num) in f]
+                            if similar_files:
+                                log_callback(f"   Similar files: {similar_files[:3]}")
+            
+            if not chapters:
+                log_callback("❌ No chapters could be loaded!")
+                log_callback(f"❌ Output directory: {out}")
+                log_callback(f"❌ Files in directory: {len(os.listdir(out))} files")
+                # Show first few files for debugging
+                sample_files = os.listdir(out)[:10]
+                log_callback(f"❌ Sample files: {sample_files}")
+                return
+            
+            # Sort chapters by OPF spine order if available
+            opf_path = os.path.join(out, 'content.opf')
+            if os.path.exists(opf_path) and chapters:
+                log_callback("📋 Sorting chapters according to OPF spine order...")
+                # Use the existing chapter_extractor instance to sort
+                chapters = chapter_extractor._sort_by_opf_spine(chapters, opf_path)
+                log_callback("✅ Chapters sorted according to OPF reading order")
+        else:
+            print("🚀 Using comprehensive chapter extraction with resource handling...")
+            with zipfile.ZipFile(input_path, 'r') as zf:
+                metadata = chapter_extractor._extract_epub_metadata(zf)
+                chapters = chapter_extractor.extract_chapters(zf, out)
+
+            print(f"\n📚 Extraction Summary:")
+            print(f"   Total chapters extracted: {len(chapters)}")
+            if chapters:
+                nums = [c.get('num', 0) for c in chapters]
+                print(f"   Chapter range: {min(nums)} to {max(nums)}")
+                
+                # Check for gaps in the sequence
+                expected_count = max(nums) - min(nums) + 1
+                if len(chapters) < expected_count:
+                    print(f"\n⚠️ Potential missing chapters detected:")
+                    print(f"   Expected {expected_count} chapters (from {min(nums)} to {max(nums)})")
+                    print(f"   Actually found: {len(chapters)} chapters")
+                    print(f"   Potentially missing: {expected_count - len(chapters)} chapters")         
+
+            validate_chapter_continuity(chapters)
+        
+        print("\n" + "="*50)
+        validate_epub_structure(out)
+        print("="*50 + "\n")
+    
+    progress_manager.migrate_to_content_hash(chapters)
+    progress_manager.save()
+
+    if check_stop():
+        return
+
+    metadata_path = os.path.join(out, "metadata.json")
+    if os.path.exists(metadata_path):
+        with open(metadata_path, 'r', encoding='utf-8') as mf:
+            metadata = json.load(mf)
+
+    metadata["chapter_count"] = len(chapters)
+    metadata["chapter_titles"] = {str(c["num"]): c["title"] for c in chapters}
+
+    print(f"[DEBUG] Initializing client with model = {config.MODEL}")
+    client = UnifiedClient(api_key=config.API_KEY, model=config.MODEL, output_dir=out)
+    if hasattr(client, 'use_multi_keys') and client.use_multi_keys:
+        stats = client.get_stats()
+        print(f"🔑 Multi-key mode active: {stats.get('total_keys', 0)} keys loaded")
+        print(f"   Active keys: {stats.get('active_keys', 0)}")
+    else:
+        print(f"🔑 Single-key mode: Using {config.MODEL}")
+    
+    # Reset cleanup state when starting new translation
+    if hasattr(client, 'reset_cleanup_state'):
+        client.reset_cleanup_state()
+        
+    if "title" in metadata and config.TRANSLATE_BOOK_TITLE and not metadata.get("title_translated", False):
+        original_title = metadata["title"]
+        print(f"📚 Original title: {original_title}")
+        
+        if not check_stop():
+            translated_title = translate_title(
+                original_title, 
+                client, 
+                None,
+                None,
+                config.TEMP
+            )
+            
+            metadata["original_title"] = original_title
+            metadata["title"] = translated_title
+            metadata["title_translated"] = True
+            
+            print(f"📚 Translated title: {translated_title}")
+        else:
+            print("❌ Title translation skipped due to stop request")
+            
+    # Translate other metadata fields if configured
+    translate_metadata_fields_str = os.getenv('TRANSLATE_METADATA_FIELDS', '{}')
+    metadata_translation_mode = os.getenv('METADATA_TRANSLATION_MODE', 'together')
+
+    try:
+        translate_metadata_fields = json.loads(translate_metadata_fields_str)
+        
+        if translate_metadata_fields and any(translate_metadata_fields.values()):
+            # Filter out fields that should be translated (excluding already translated fields)
+            fields_to_translate = {}
+            skipped_fields = []
+            
+            for field_name, should_translate in translate_metadata_fields.items():
+                if should_translate and field_name != 'title' and field_name in metadata:
+                    # Check if already translated
+                    if metadata.get(f"{field_name}_translated", False):
+                        skipped_fields.append(field_name)
+                        print(f"✓ Skipping {field_name} - already translated")
+                    else:
+                        fields_to_translate[field_name] = should_translate
+            
+            if fields_to_translate:
+                print("\n" + "="*50)
+                print("📋 METADATA TRANSLATION PHASE")
+                print("="*50)
+                print(f"🌐 Translating {len(fields_to_translate)} metadata fields...")
+                
+                # Get ALL configuration from environment - NO DEFAULTS
+                system_prompt = os.getenv('BOOK_TITLE_SYSTEM_PROMPT', '')
+                if not system_prompt:
+                    print("❌ No system prompt configured, skipping metadata translation")
+                else:
+                    # Get field-specific prompts
+                    field_prompts_str = os.getenv('METADATA_FIELD_PROMPTS', '{}')
+                    try:
+                        field_prompts = json.loads(field_prompts_str)
+                    except:
+                        field_prompts = {}
+                    
+                    if not field_prompts and not field_prompts.get('_default'):
+                        print("❌ No field prompts configured, skipping metadata translation")
+                    else:
+                        # Get language configuration
+                        lang_behavior = os.getenv('LANG_PROMPT_BEHAVIOR', 'auto')
+                        forced_source_lang = os.getenv('FORCED_SOURCE_LANG', 'Korean')
+                        output_language = os.getenv('OUTPUT_LANGUAGE', 'English')
+                        
+                        # Determine source language
+                        source_lang = metadata.get('language', '').lower()
+                        if lang_behavior == 'never':
+                            lang_str = ""
+                        elif lang_behavior == 'always':
+                            lang_str = forced_source_lang
+                        else:  # auto
+                            if 'zh' in source_lang or 'chinese' in source_lang:
+                                lang_str = 'Chinese'
+                            elif 'ja' in source_lang or 'japanese' in source_lang:
+                                lang_str = 'Japanese'
+                            elif 'ko' in source_lang or 'korean' in source_lang:
+                                lang_str = 'Korean'
+                            else:
+                                lang_str = ''
+                        
+                        # Check if batch translation is enabled for parallel processing
+                        batch_translate_enabled = os.getenv('BATCH_TRANSLATION', '0') == '1'
+                        batch_size = int(os.getenv('BATCH_SIZE', '50'))  # Default batch size
+                        
+                        if batch_translate_enabled and len(fields_to_translate) > 1:
+                            print(f"⚡ Using parallel metadata translation mode ({len(fields_to_translate)} fields, batch size: {batch_size})...")
+                            
+                            # Import ThreadPoolExecutor for parallel processing
+                            from concurrent.futures import ThreadPoolExecutor, as_completed
+                            import threading
+                            
+                            # Thread-safe results storage
+                            translation_results = {}
+                            results_lock = threading.Lock()
+                            
+                            def translate_metadata_field(field_name, original_value):
+                                """Translate a single metadata field"""
+                                try:
+                                    print(f"\n📋 Translating {field_name}: {original_value[:100]}..." 
+                                          if len(str(original_value)) > 100 else f"\n📋 Translating {field_name}: {original_value}")
+                                    
+                                    # Get field-specific prompt
+                                    prompt_template = field_prompts.get(field_name, field_prompts.get('_default', ''))
+                                    
+                                    if not prompt_template:
+                                        print(f"⚠️ No prompt configured for field '{field_name}', skipping")
+                                        return None
+                                    
+                                    # Replace variables in prompt
+                                    field_prompt = prompt_template.replace('{source_lang}', lang_str)
+                                    field_prompt = field_prompt.replace('{output_lang}', output_language)
+                                    field_prompt = field_prompt.replace('English', output_language)
+                                    field_prompt = field_prompt.replace('{field_value}', str(original_value))
+                                    
+                                    # Check if we're using a translation service (not AI)
+                                    client_type = getattr(client, 'client_type', '')
+                                    is_translation_service = client_type in ['deepl', 'google_translate']
+                                    
+                                    if is_translation_service:
+                                        # For translation services, send only the field value without AI prompts
+                                        print(f"🌐 Using translation service ({client_type}) - sending field directly")
+                                        messages = [
+                                            {"role": "user", "content": str(original_value)}
+                                        ]
+                                    else:
+                                        # For AI services, use prompts as before
+                                        messages = [
+                                            {"role": "system", "content": system_prompt},
+                                            {"role": "user", "content": f"{field_prompt}\n\n{original_value}"}
+                                        ]
+                                    
+                                    # Add delay for rate limiting
+                                    if config.DELAY > 0:
+                                        time.sleep(config.DELAY)
+                                    
+                                    # Make API call
+                                    content, finish_reason = client.send(
+                                        messages, 
+                                        temperature=config.TEMP,
+                                        max_tokens=config.MAX_OUTPUT_TOKENS
+                                    )
+                                    translated_value = content.strip()
+                                    
+                                    # Store result thread-safely
+                                    with results_lock:
+                                        translation_results[field_name] = {
+                                            'original': original_value,
+                                            'translated': translated_value,
+                                            'success': True
+                                        }
+                                    
+                                    print(f"✅ Translated {field_name}: {translated_value}")
+                                    return translated_value
+                                    
+                                except Exception as e:
+                                    print(f"❌ Failed to translate {field_name}: {e}")
+                                    with results_lock:
+                                        translation_results[field_name] = {
+                                            'original': original_value,
+                                            'translated': None,
+                                            'success': False,
+                                            'error': str(e)
+                                        }
+                                    return None
+                            
+                            # Execute parallel translations with limited workers
+                            max_workers = min(len(fields_to_translate), batch_size)
+                            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                                # Submit all translation tasks
+                                futures = {}
+                                for field_name in fields_to_translate:
+                                    if field_name in metadata and not check_stop():
+                                        original_value = metadata[field_name]
+                                        future = executor.submit(translate_metadata_field, field_name, original_value)
+                                        futures[future] = field_name
+                                
+                                # Wait for completion
+                                for future in as_completed(futures):
+                                    if check_stop():
+                                        print("❌ Metadata translation stopped by user")
+                                        break
+                            
+                            # Apply results to metadata
+                            for field_name, result in translation_results.items():
+                                if result['success'] and result['translated']:
+                                    metadata[f"original_{field_name}"] = result['original']
+                                    metadata[field_name] = result['translated']
+                                    metadata[f"{field_name}_translated"] = True
+                        
+                        else:
+                            # Sequential translation mode (individual translation)
+                            mode_desc = "sequential" if not batch_translate_enabled else "sequential (single field)"
+                            print(f"📝 Using {mode_desc} translation mode...")
+                            
+                            for field_name in fields_to_translate:
+                                if not check_stop() and field_name in metadata:
+                                    original_value = metadata[field_name]
+                                    print(f"\n📋 Translating {field_name}: {original_value[:100]}..." 
+                                          if len(str(original_value)) > 100 else f"\n📋 Translating {field_name}: {original_value}")
+                                    
+                                    # Get field-specific prompt
+                                    prompt_template = field_prompts.get(field_name, field_prompts.get('_default', ''))
+                                    
+                                    if not prompt_template:
+                                        print(f"⚠️ No prompt configured for field '{field_name}', skipping")
+                                        continue
+                                    
+                                    # Replace variables in prompt
+                                    field_prompt = prompt_template.replace('{source_lang}', lang_str)
+                                    field_prompt = field_prompt.replace('{output_lang}', output_language)
+                                    field_prompt = field_prompt.replace('English', output_language)
+                                    field_prompt = field_prompt.replace('{field_value}', str(original_value))
+                                    
+                                    # Check if we're using a translation service (not AI)
+                                    client_type = getattr(client, 'client_type', '')
+                                    is_translation_service = client_type in ['deepl', 'google_translate']
+                                    
+                                    if is_translation_service:
+                                        # For translation services, send only the field value without AI prompts
+                                        print(f"🌐 Using translation service ({client_type}) - sending field directly")
+                                        messages = [
+                                            {"role": "user", "content": str(original_value)}
+                                        ]
+                                    else:
+                                        # For AI services, use prompts as before
+                                        messages = [
+                                            {"role": "system", "content": system_prompt},
+                                            {"role": "user", "content": f"{field_prompt}\n\n{original_value}"}
+                                        ]
+                                    
+                                    try:
+                                        # Add delay using the config instance from main()
+                                        if config.DELAY > 0:  # ✅ FIXED - use config.DELAY instead of config.SEND_INTERVAL
+                                            time.sleep(config.DELAY)
+                                        
+                                        # Use the same client instance from main()
+                                        # ✅ FIXED - Properly unpack tuple response and provide max_tokens
+                                        content, finish_reason = client.send(
+                                            messages, 
+                                            temperature=config.TEMP,
+                                            max_tokens=config.MAX_OUTPUT_TOKENS  # ✅ FIXED - provide max_tokens to avoid NoneType error
+                                        )
+                                        translated_value = content.strip()  # ✅ FIXED - use content from unpacked tuple
+                                        
+                                        metadata[f"original_{field_name}"] = original_value
+                                        metadata[field_name] = translated_value
+                                        metadata[f"{field_name}_translated"] = True
+                                        
+                                        print(f"✅ Translated {field_name}: {translated_value}")
+                                        
+                                    except Exception as e:
+                                        print(f"❌ Failed to translate {field_name}: {e}")
+
+                                else:
+                                    if check_stop():
+                                        print("❌ Metadata translation stopped by user")
+                                        break
+            else:
+                print("📋 No additional metadata fields to translate")
+                
+    except Exception as e:
+        print(f"⚠️ Error processing metadata translation settings: {e}")
+        import traceback
+        traceback.print_exc()
+    
+    with open(metadata_path, 'w', encoding='utf-8') as mf:
+        json.dump(metadata, mf, ensure_ascii=False, indent=2)
+    print(f"💾 Saved metadata with {'translated' if metadata.get('title_translated', False) else 'original'} title")
+        
+    print("\n" + "="*50)
+    print("📑 GLOSSARY GENERATION PHASE")
+    print("="*50)
+    
+    print(f"📑 DEBUG: ENABLE_AUTO_GLOSSARY = '{os.getenv('ENABLE_AUTO_GLOSSARY', 'NOT SET')}'")
+    print(f"📑 DEBUG: MANUAL_GLOSSARY = '{config.MANUAL_GLOSSARY}'")
+    print(f"📑 DEBUG: Manual glossary exists? {os.path.isfile(config.MANUAL_GLOSSARY) if config.MANUAL_GLOSSARY else False}")
+    
+    # Check if glossary.csv already exists in the source folder
+    existing_glossary_csv = os.path.join(out, "glossary.csv")
+    existing_glossary_json = os.path.join(out, "glossary.json")
+    print(f"📑 DEBUG: Existing glossary.csv? {os.path.exists(existing_glossary_csv)}")
+    print(f"📑 DEBUG: Existing glossary.json? {os.path.exists(existing_glossary_json)}")
+
+    if config.MANUAL_GLOSSARY and os.path.isfile(config.MANUAL_GLOSSARY):
+        ext = os.path.splitext(config.MANUAL_GLOSSARY)[1].lower()
+        target_name = "glossary.csv" if ext == ".csv" else "glossary.json"
+        target_path = os.path.join(out, target_name)
+        if os.path.abspath(config.MANUAL_GLOSSARY) != os.path.abspath(target_path):
+            shutil.copy(config.MANUAL_GLOSSARY, target_path)
+            print("📑 Using manual glossary from:", config.MANUAL_GLOSSARY)
+        else:
+            print("📑 Using existing glossary:", config.MANUAL_GLOSSARY)
+    elif os.path.exists(existing_glossary_csv) or os.path.exists(existing_glossary_json):
+        print("📑 Existing glossary file detected in source folder - skipping automatic generation")
+        if os.path.exists(existing_glossary_csv):
+            print(f"📑 Using existing glossary.csv: {existing_glossary_csv}")
+        elif os.path.exists(existing_glossary_json):
+            print(f"📑 Using existing glossary.json: {existing_glossary_json}")
+    elif os.getenv("ENABLE_AUTO_GLOSSARY", "0") == "1":
+        model = os.getenv("MODEL", "gpt-4")
+        if is_traditional_translation_api(model):
+            print("📑 Automatic glossary generation disabled")
+            print(f"   {model} does not support glossary extraction")
+            print("   Traditional translation APIs cannot identify character names/terms")
+        else:
+            print("📑 Starting automatic glossary generation...")
+            try:
+                # Use the new process-safe glossary worker
+                from glossary_process_worker import generate_glossary_in_process
+                import concurrent.futures
+                import multiprocessing
+                
+                instructions = ""
+                
+                # Get extraction workers setting
+                extraction_workers = int(os.getenv("EXTRACTION_WORKERS", "1"))
+                if extraction_workers == 1:
+                    # Auto-detect for better performance
+                    extraction_workers = min(os.cpu_count() or 4, 4)
+                    print(f"📑 Using {extraction_workers} CPU cores for glossary generation")
+                
+                # Collect environment variables to pass to subprocess
+                env_vars = {}
+                important_vars = [
+                    'EXTRACTION_WORKERS', 'GLOSSARY_MIN_FREQUENCY', 'GLOSSARY_MAX_NAMES',
+                    'GLOSSARY_MAX_TITLES', 'GLOSSARY_BATCH_SIZE', 'GLOSSARY_STRIP_HONORIFICS',
+                    'GLOSSARY_FUZZY_THRESHOLD', 'GLOSSARY_MAX_TEXT_SIZE', 'AUTO_GLOSSARY_PROMPT',
+                    'GLOSSARY_USE_SMART_FILTER', 'GLOSSARY_USE_LEGACY_CSV', 'GLOSSARY_PARALLEL_ENABLED',
+                    'GLOSSARY_FILTER_MODE', 'GLOSSARY_SKIP_FREQUENCY_CHECK', 'GLOSSARY_SKIP_ALL_VALIDATION',
+                    'MODEL', 'API_KEY', 'OPENAI_API_KEY', 'GEMINI_API_KEY', 'MAX_OUTPUT_TOKENS',
+                    'GLOSSARY_TEMPERATURE', 'MANUAL_GLOSSARY', 'ENABLE_AUTO_GLOSSARY'
+                ]
+                
+                for var in important_vars:
+                    if var in os.environ:
+                        env_vars[var] = os.environ[var]
+                
+                # Create a Queue for real-time log streaming
+                manager = multiprocessing.Manager()
+                log_queue = manager.Queue()
+                
+                # Use ProcessPoolExecutor for true parallelism (completely bypasses GIL)
+                print("📑 Starting glossary generation in separate process...")
+                with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
+                    # Submit to separate process WITH log queue
+                    future = executor.submit(
+                        generate_glossary_in_process,
+                        out,
+                        chapters,
+                        instructions,
+                        env_vars,
+                        log_queue  # Pass the queue for real-time logs
+                    )
+                    
+                    # Poll for completion and stream logs in real-time
+                    poll_count = 0
+                    while not future.done():
+                        poll_count += 1
+                        
+                        # Check for logs from subprocess and print them immediately
+                        try:
+                            while not log_queue.empty():
+                                log_line = log_queue.get_nowait()
+                                print(log_line)  # Print to GUI
+                        except:
+                            pass
+                        
+                        # Super short sleep to yield to GUI
+                        time.sleep(0.001)
+                        
+                        # Check for stop every 100 polls
+                        if poll_count % 100 == 0:
+                            if check_stop():
+                                print("📑 ❌ Glossary generation cancelled")
+                                executor.shutdown(wait=False, cancel_futures=True)
+                                return
+                    
+                    # Get any remaining logs from queue
+                    try:
+                        while not log_queue.empty():
+                            log_line = log_queue.get_nowait()
+                            print(log_line)
+                    except:
+                        pass
+                    
+                    # Get result
+                    if future.done():
+                        try:
+                            result = future.result(timeout=0.1)
+                            if isinstance(result, dict):
+                                if result.get('success'):
+                                    print(f"📑 ✅ Glossary generation completed successfully")
+                                else:
+                                    print(f"📑 ❌ Glossary generation failed: {result.get('error')}")
+                                    if result.get('traceback'):
+                                        print(f"📑 Error details:\n{result.get('traceback')}")
+                        except Exception as e:
+                            print(f"📑 ❌ Error retrieving glossary result: {e}")
+                
+                print("✅ Automatic glossary generation COMPLETED")
+                
+                # Handle deferred glossary appending
+                if os.getenv('DEFER_GLOSSARY_APPEND') == '1':
+                    print("📑 Processing deferred glossary append to system prompt...")
+                    
+                    glossary_path = find_glossary_file(out)
+                    if glossary_path and os.path.exists(glossary_path):
+                        try:
+                            glossary_block = None
+                            if glossary_path.lower().endswith('.csv'):
+                                with open(glossary_path, 'r', encoding='utf-8') as f:
+                                    glossary_block = f.read()
+                            else:
+                                with open(glossary_path, 'r', encoding='utf-8') as f:
+                                    glossary_data = json.load(f)
+                                
+                                formatted_entries = {}
+                                if isinstance(glossary_data, dict) and 'entries' in glossary_data:
+                                    formatted_entries = glossary_data['entries']
+                                elif isinstance(glossary_data, dict):
+                                    formatted_entries = {k: v for k, v in glossary_data.items() if k != "metadata"}
+                                
+                                if formatted_entries:
+                                    glossary_block = json.dumps(formatted_entries, ensure_ascii=False, indent=2)
+                                else:
+                                    glossary_block = None
+                            
+                            if glossary_block:
+                                glossary_prompt = os.getenv('GLOSSARY_APPEND_PROMPT', 
+                                    "Character/Term Glossary (use these translations consistently):")
+                                
+                                current_prompt = config.PROMPT
+                                if current_prompt:
+                                    current_prompt += "\n\n"
+                                current_prompt += f"{glossary_prompt}\n{glossary_block}"
+                                
+                                config.PROMPT = current_prompt
+                                
+                                print(f"✅ Added auto-generated glossary to system prompt ({os.path.basename(glossary_path)})")
+                                
+                                if 'DEFER_GLOSSARY_APPEND' in os.environ:
+                                    del os.environ['DEFER_GLOSSARY_APPEND']
+                                if 'GLOSSARY_APPEND_PROMPT' in os.environ:
+                                    del os.environ['GLOSSARY_APPEND_PROMPT']
+                            else:
+                                print("⚠️ Auto-generated glossary has no entries - skipping append")
+                                if 'DEFER_GLOSSARY_APPEND' in os.environ:
+                                    del os.environ['DEFER_GLOSSARY_APPEND']
+                                if 'GLOSSARY_APPEND_PROMPT' in os.environ:
+                                    del os.environ['GLOSSARY_APPEND_PROMPT']
+                        except Exception as e:
+                            print(f"⚠️ Failed to append auto-generated glossary: {e}")
+                    else:
+                        print("⚠️ No glossary file found after automatic generation")
+                
+            except Exception as e:
+                print(f"❌ Glossary generation failed: {e}")
+    else:
+        print("📑 Automatic glossary generation disabled")
+        # Don't create an empty glossary - let any existing manual glossary remain
+
+    glossary_file = find_glossary_file(out)
+    if glossary_file and os.path.exists(glossary_file):
+        try:
+            if glossary_file.lower().endswith('.csv'):
+                # Quick CSV stats
+                with open(glossary_file, 'r', encoding='utf-8') as f:
+                    lines = [ln.strip() for ln in f.readlines() if ln.strip()]
+                entry_count = max(0, len(lines) - 1) if lines and ',' in lines[0] else len(lines)
+                print(f"📑 Glossary ready (CSV) with {entry_count} entries")
+                print("📑 Sample glossary lines:")
+                for ln in lines[1:4]:
+                    print(f"   • {ln}")
+            else:
+                with open(glossary_file, 'r', encoding='utf-8') as f:
+                    glossary_data = json.load(f)
+                
+                if isinstance(glossary_data, dict):
+                    if 'entries' in glossary_data and isinstance(glossary_data['entries'], dict):
+                        entry_count = len(glossary_data['entries'])
+                        sample_items = list(glossary_data['entries'].items())[:3]
+                    else:
+                        entry_count = len(glossary_data)
+                        sample_items = list(glossary_data.items())[:3]
+                    
+                    print(f"📑 Glossary ready with {entry_count} entries")
+                    print("📑 Sample glossary entries:")
+                    for key, value in sample_items:
+                        print(f"   • {key} → {value}")
+                        
+                elif isinstance(glossary_data, list):
+                    print(f"📑 Glossary ready with {len(glossary_data)} entries")
+                    print("📑 Sample glossary entries:")
+                    for i, entry in enumerate(glossary_data[:3]):
+                        if isinstance(entry, dict):
+                            original = entry.get('original_name', '?')
+                            translated = entry.get('name', original)
+                            print(f"   • {original} → {translated}")
+                else:
+                    print(f"⚠️ Unexpected glossary format: {type(glossary_data)}")
+                
+        except Exception as e:
+            print(f"⚠️ Failed to inspect glossary file: {e}")
+    else:
+        print("📑 No glossary file found")
+
+    print("="*50)
+    print("🚀 STARTING MAIN TRANSLATION PHASE")
+    print("="*50 + "\n")
+
+    glossary_path = find_glossary_file(out)
+    if glossary_path and os.path.exists(glossary_path) and glossary_path.lower().endswith('.json'):
+        try:
+            with open(glossary_path, 'r', encoding='utf-8') as f:
+                g_data = json.load(f)
+            
+            print(f"[DEBUG] Glossary type before translation: {type(g_data)}")
+            if isinstance(g_data, list):
+                print(f"[DEBUG] Glossary is a list")
+        except Exception as e:
+            print(f"[DEBUG] Error checking glossary: {e}")
+    glossary_path = find_glossary_file(out)
+    system = build_system_prompt(config.SYSTEM_PROMPT, glossary_path)
+    base_msg = [{"role": "system", "content": system}]
+    # Preserve the original system prompt to avoid in-place mutations
+    original_system_prompt = system
+    last_summary_block_text = None  # Will hold the last rolling summary text for the NEXT chapter only
+    
+    image_translator = None
+
+    if config.ENABLE_IMAGE_TRANSLATION:
+        print(f"🖼️ Image translation enabled for model: {config.MODEL}")
+        print("🖼️ Image translation will use your custom system prompt and glossary")
+        image_translator = ImageTranslator(
+            client, 
+            out, 
+            config.PROFILE_NAME, 
+            system, 
+            config.TEMP,
+            log_callback ,
+            progress_manager,
+            history_manager,
+            chunk_context_manager
+        )
+        
+        known_vision_models = [
+            'gemini-1.5-pro', 'gemini-1.5-flash', 'gemini-2.0-flash', 'gemini-2.5-flash', 'gemini-2.5-pro',
+            'gpt-4-turbo', 'gpt-4o', 'gpt-4.1-mini', 'gpt-4.1-nano', 'o4-mini', 'gpt-4.1-mini'
+        ]
+        
+        if config.MODEL.lower() not in known_vision_models:
+            print(f"⚠️ Note: {config.MODEL} may not have vision capabilities. Image translation will be attempted anyway.")
+    else:
+        print("ℹ️ Image translation disabled by user")
+    
+    total_chapters = len(chapters)
+
+    # Only detect numbering if the toggle is not disabled
+    if config.DISABLE_ZERO_DETECTION:
+        print(f"📊 0-based detection disabled by user setting")
+        uses_zero_based = False
+        # Important: Set a flag that can be checked throughout the codebase
+        config._force_disable_zero_detection = True
+    else:
+        if chapters:
+            uses_zero_based = detect_novel_numbering(chapters)
+            print(f"📊 Novel numbering detected: {'0-based' if uses_zero_based else '1-based'}")
+        else:
+            uses_zero_based = False
+        config._force_disable_zero_detection = False
+
+    # Store this for later use
+    config._uses_zero_based = uses_zero_based
+
+
+    rng = os.getenv("CHAPTER_RANGE", "")
+    start = None
+    end = None
+    if rng and re.match(r"^\d+\s*-\s*\d+$", rng):
+            start, end = map(int, rng.split("-", 1))
+            
+            if config.DISABLE_ZERO_DETECTION:
+                print(f"📊 0-based detection disabled - using range as specified: {start}-{end}")
+            elif uses_zero_based:
+                print(f"📊 0-based novel detected")
+                print(f"📊 User range {start}-{end} will be used as-is (chapters are already adjusted)")
+            else:
+                print(f"📊 1-based novel detected")
+                print(f"📊 Using range as specified: {start}-{end}")
+    
+    print("📊 Calculating total chunks needed...")
+    total_chunks_needed = 0
+    chunks_per_chapter = {}
+    chapters_to_process = 0
+
+    # When setting actual chapter numbers (in the main function)
+    for idx, c in enumerate(chapters):
+        chap_num = c["num"]
+        content_hash = c.get("content_hash") or ContentProcessor.get_content_hash(c["body"])
+        
+        # Extract the raw chapter number from the file
+        raw_num = FileUtilities.extract_actual_chapter_number(c, patterns=None, config=config)
+        #print(f"[DEBUG] Extracted raw_num={raw_num} from {c.get('original_basename', 'unknown')}")
+
+        
+        # Apply the offset
+        offset = config.CHAPTER_NUMBER_OFFSET if hasattr(config, 'CHAPTER_NUMBER_OFFSET') else 0
+        raw_num += offset
+        
+        # When toggle is disabled, use raw numbers without any 0-based adjustment
+        if config.DISABLE_ZERO_DETECTION:
+            c['actual_chapter_num'] = raw_num
+            # Store raw number for consistency
+            c['raw_chapter_num'] = raw_num
+            c['zero_adjusted'] = False
+        else:
+            # Store raw number
+            c['raw_chapter_num'] = raw_num
+            # Apply adjustment only if this is a 0-based novel
+            if uses_zero_based:
+                c['actual_chapter_num'] = raw_num + 1
+                c['zero_adjusted'] = True
+            else:
+                c['actual_chapter_num'] = raw_num
+                c['zero_adjusted'] = False
+        
+        # Now we can safely use actual_num
+        actual_num = c['actual_chapter_num']
+
+
+        if start is not None:
+            if not (start <= c['actual_chapter_num'] <= end):
+                #print(f"[SKIP] Chapter {c['actual_chapter_num']} outside range {start}-{end}")
+                continue
+                
+        needs_translation, skip_reason, _ = progress_manager.check_chapter_status(
+            idx, actual_num, content_hash, out
+        )
+        
+        if not needs_translation:
+            chunks_per_chapter[idx] = 0
+            continue
+        
+        chapters_to_process += 1
+        
+        chapter_key = str(actual_num)
+        if chapter_key in progress_manager.prog["chapters"] and progress_manager.prog["chapters"][chapter_key].get("status") == "in_progress":
+            pass
+        
+        # Calculate based on OUTPUT limit only
+        max_output_tokens = config.MAX_OUTPUT_TOKENS 
+        safety_margin_output = 500
+        
+        # Korean to English typically compresses to 0.7-0.9x
+        compression_factor = config.COMPRESSION_FACTOR
+        available_tokens = int((max_output_tokens - safety_margin_output) / compression_factor)
+        
+        # Ensure minimum
+        available_tokens = max(available_tokens, 1000)
+        
+        #print(f"📊 Chunk size: {available_tokens:,} tokens (based on {max_output_tokens:,} output limit, compression: {compression_factor})")
+        
+        # For mixed content chapters, calculate on clean text
+        # For mixed content chapters, calculate on clean text
+        if c.get('has_images', False) and ContentProcessor.is_meaningful_text_content(c["body"]):
+            # Don't modify c["body"] at all during chunk calculation
+            # Just pass the body as-is, the chunking will be slightly off but that's OK
+            chunks = chapter_splitter.split_chapter(c["body"], available_tokens)
+        else:
+            chunks = chapter_splitter.split_chapter(c["body"], available_tokens)
+        
+        chapter_key_str = content_hash
+        old_key_str = str(idx)
+
+        if chapter_key_str not in progress_manager.prog.get("chapter_chunks", {}) and old_key_str in progress_manager.prog.get("chapter_chunks", {}):
+            progress_manager.prog["chapter_chunks"][chapter_key_str] = progress_manager.prog["chapter_chunks"][old_key_str]
+            del progress_manager.prog["chapter_chunks"][old_key_str]
+            #print(f"[PROGRESS] Migrated chunks for chapter {actual_num} to new tracking system")
+
+        # Always count actual chunks - ignore "completed" tracking
+        chunks_per_chapter[idx] = len(chunks)
+        total_chunks_needed += chunks_per_chapter[idx]
+            
+    terminology = "Sections" if is_text_file else "Chapters"
+    print(f"📊 Total chunks to translate: {total_chunks_needed}")
+    print(f"📚 {terminology} to process: {chapters_to_process}")
+    
+    multi_chunk_chapters = [(idx, count) for idx, count in chunks_per_chapter.items() if count > 1]
+    if multi_chunk_chapters:
+        # Determine terminology based on file type
+        terminology = "Sections" if is_text_file else "Chapters"
+        print(f"📄 {terminology} requiring multiple chunks:")
+        for idx, chunk_count in multi_chunk_chapters:
+            chap = chapters[idx]
+            section_term = "Section" if is_text_file else "Chapter"
+            print(f"   • {section_term} {idx+1} ({chap['title'][:30]}...): {chunk_count} chunks")
+    
+    translation_start_time = time.time()
+    chunks_completed = 0
+    chapters_completed = 0
+    
+    current_chunk_number = 0
+
+    if config.BATCH_TRANSLATION:
+        print(f"\n📦 PARALLEL TRANSLATION MODE ENABLED")
+        print(f"📦 Processing chapters with up to {config.BATCH_SIZE} concurrent API calls")
+        
+        import concurrent.futures
+        from threading import Lock
+        
+        progress_lock = Lock()
+        
+        chapters_to_translate = []
+        
+        # FIX: First pass to set actual chapter numbers for ALL chapters
+        # This ensures batch mode has the same chapter numbering as non-batch mode
+        print("📊 Setting chapter numbers...")
+        for idx, c in enumerate(chapters):
+            raw_num = FileUtilities.extract_actual_chapter_number(c, patterns=None, config=config)
+            
+            # Apply offset if configured
+            offset = config.CHAPTER_NUMBER_OFFSET if hasattr(config, 'CHAPTER_NUMBER_OFFSET') else 0
+            raw_num += offset
+            
+            if config.DISABLE_ZERO_DETECTION:
+                # Use raw numbers without adjustment
+                c['actual_chapter_num'] = raw_num
+                c['raw_chapter_num'] = raw_num
+                c['zero_adjusted'] = False
+            else:
+                # Store raw number
+                c['raw_chapter_num'] = raw_num
+                # Apply 0-based adjustment if detected
+                if uses_zero_based:
+                    c['actual_chapter_num'] = raw_num + 1
+                    c['zero_adjusted'] = True
+                else:
+                    c['actual_chapter_num'] = raw_num
+                    c['zero_adjusted'] = False
+        
+        for idx, c in enumerate(chapters):
+            chap_num = c["num"]
+            content_hash = c.get("content_hash") or ContentProcessor.get_content_hash(c["body"])
+            
+            # Check if this is a pre-split text chunk with decimal number
+            if (is_text_file and c.get('is_chunk', False) and isinstance(c['num'], float)):
+                actual_num = c['num']  # Preserve the decimal for text files only
+            else:
+                actual_num = c.get('actual_chapter_num', c['num'])  # Now this will exist!
+            
+            # Skip chapters outside the range
+            if start is not None and not (start <= actual_num <= end):
+                continue
+            
+            # Check if chapter needs translation
+            needs_translation, skip_reason, existing_file = progress_manager.check_chapter_status(
+                idx, actual_num, content_hash, out, c  # Pass the chapter object
+            )
+            # Add explicit file check for supposedly completed chapters
+            if not needs_translation and existing_file:
+                file_path = os.path.join(out, existing_file)
+                if not os.path.exists(file_path):
+                    print(f"⚠️ Output file missing for chapter {actual_num}: {existing_file}")
+                    needs_translation = True
+                    skip_reason = None
+                    # Update status to file_missing
+                    progress_manager.update(idx, actual_num, content_hash, None, status="file_missing")
+                    progress_manager.save()
+                    
+            if not needs_translation:
+                # Modify skip_reason to use appropriate terminology
+                is_text_source = is_text_file or c.get('filename', '').endswith('.txt') or c.get('is_chunk', False)
+                terminology = "Section" if is_text_source else "Chapter"
+                
+                # Replace "Chapter" with appropriate terminology in skip_reason
+                skip_reason_modified = skip_reason.replace("Chapter", terminology)
+                print(f"[SKIP] {skip_reason_modified}")
+                chapters_completed += 1
+                continue
+            
+            # Check for empty or image-only chapters
+            has_images = c.get('has_images', False)
+            has_meaningful_text = ContentProcessor.is_meaningful_text_content(c["body"])
+            text_size = c.get('file_size', 0)
+            
+            is_empty_chapter = (not has_images and text_size < 10)
+            is_image_only_chapter = (has_images and not has_meaningful_text)
+            
+            # Handle empty chapters
+            if is_empty_chapter:
+                print(f"📄 Empty chapter {chap_num} - will process individually")
+                
+                safe_title = make_safe_filename(c['title'], c['num'])
+                
+                if isinstance(c['num'], float):
+                    fname = FileUtilities.create_chapter_filename(c, c['num'])
+                else:
+                    fname = FileUtilities.create_chapter_filename(c, c['num'])
+                with open(os.path.join(out, fname), 'w', encoding='utf-8') as f:
+                    f.write(c["body"])
+                progress_manager.update(idx, actual_num, content_hash, fname, status="completed_empty")
+                progress_manager.save()
+                chapters_completed += 1
+                continue
+            
+            # Add to chapters to translate
+            chapters_to_translate.append((idx, c))
+        
+        print(f"📊 Found {len(chapters_to_translate)} chapters to translate in parallel")
+        
+        # Continue with the rest of the existing batch processing code...
+        batch_processor = BatchTranslationProcessor(
+            config, client, base_msg, out, progress_lock,
+            progress_manager.save, 
+            lambda idx, actual_num, content_hash, output_file=None, status="completed", **kwargs: progress_manager.update(idx, actual_num, content_hash, output_file, status, **kwargs),
+            check_stop,
+            image_translator,
+            is_text_file=is_text_file
+        )
+        
+        total_to_process = len(chapters_to_translate)
+        processed = 0
+        
+        # Apply conservative batching setting
+        batch_multiplier = 3 if os.getenv('CONSERVATIVE_BATCHING', '0') == '1' else 1
+        batch_group_size = config.BATCH_SIZE * batch_multiplier
+        
+        if batch_multiplier > 1:
+            print(f"📦 Using conservative batching: {batch_group_size} chapters per group, {config.BATCH_SIZE} parallel")
+        else:
+            print(f"📦 Using direct batching (default): {batch_group_size} chapters per group, {config.BATCH_SIZE} parallel")
+        
+        with concurrent.futures.ThreadPoolExecutor(max_workers=config.BATCH_SIZE) as executor:
+            for batch_start in range(0, total_to_process, batch_group_size):
+                if check_stop():
+                    print("❌ Translation stopped during parallel processing")
+                    executor.shutdown(wait=False)
+                    return
+                
+                batch_end = min(batch_start + batch_group_size, total_to_process)
+                current_batch = chapters_to_translate[batch_start:batch_end]
+                
+                batch_number = (batch_start // batch_group_size) + 1
+                print(f"\n📦 Submitting batch {batch_number}: {len(current_batch)} chapters")
+                
+                future_to_chapter = {
+                    executor.submit(batch_processor.process_single_chapter, chapter_data): chapter_data
+                    for chapter_data in current_batch
+                }
+                
+                active_count = 0
+                completed_in_batch = 0
+                failed_in_batch = 0
+                
+                for future in concurrent.futures.as_completed(future_to_chapter):
+                    if check_stop():
+                        print("❌ Translation stopped")
+                        executor.shutdown(wait=False)
+                        return
+                    
+                    chapter_data = future_to_chapter[future]
+                    idx, chapter = chapter_data
+                    
+                    try:
+                        success, chap_num = future.result()
+                        if success:
+                            completed_in_batch += 1
+                            print(f"✅ Chapter {chap_num} done ({completed_in_batch + failed_in_batch}/{len(current_batch)} in batch)")
+                        else:
+                            failed_in_batch += 1
+                            print(f"❌ Chapter {chap_num} failed ({completed_in_batch + failed_in_batch}/{len(current_batch)} in batch)")
+                    except Exception as e:
+                        failed_in_batch += 1
+                        print(f"❌ Chapter thread error: {e}")
+                    
+                    processed += 1
+                    
+                    progress_percent = (processed / total_to_process) * 100
+                    print(f"📊 Overall Progress: {processed}/{total_to_process} ({progress_percent:.1f}%)")
+                
+                print(f"\n📦 Batch Summary:")
+                print(f"   ✅ Successful: {completed_in_batch}")
+                print(f"   ❌ Failed: {failed_in_batch}")
+                
+                if batch_end < total_to_process:
+                    print(f"⏳ Waiting {config.DELAY}s before next batch...")
+                    time.sleep(config.DELAY)
+        
+        chapters_completed = batch_processor.chapters_completed
+        chunks_completed = batch_processor.chunks_completed
+        
+        print(f"\n🎉 Parallel translation complete!")
+        print(f"   Total chapters processed: {processed}")
+        
+        # Count qa_failed chapters correctly
+        qa_failed_count = 0
+        actual_successful = 0
+        
+        for idx, c in enumerate(chapters):
+            # Get the chapter's actual number
+            if (is_text_file and c.get('is_chunk', False) and isinstance(c['num'], float)):
+                actual_num = c['num']
+            else:
+                actual_num = c.get('actual_chapter_num', c['num'])
+            
+            # Check if this chapter was processed and has qa_failed status
+            content_hash = c.get("content_hash") or ContentProcessor.get_content_hash(c["body"])
+            
+            # Check if this chapter exists in progress
+            chapter_info = progress_manager.prog["chapters"].get(content_hash, {})
+            status = chapter_info.get("status")
+            
+            if status == "qa_failed":
+                qa_failed_count += 1
+            elif status == "completed":
+                actual_successful += 1
+        
+        # Correct the displayed counts
+        print(f"   Successful: {actual_successful}")
+        if qa_failed_count > 0:
+            print(f"\n⚠️ {qa_failed_count} chapters failed due to content policy violations:")
+            qa_failed_chapters = []
+            for idx, c in enumerate(chapters):
+                if (is_text_file and c.get('is_chunk', False) and isinstance(c['num'], float)):
+                    actual_num = c['num']
+                else:
+                    actual_num = c.get('actual_chapter_num', c['num'])
+                
+                content_hash = c.get("content_hash") or ContentProcessor.get_content_hash(c["body"])
+                chapter_info = progress_manager.prog["chapters"].get(content_hash, {})
+                if chapter_info.get("status") == "qa_failed":
+                    qa_failed_chapters.append(actual_num)
+            
+            print(f"   Failed chapters: {', '.join(map(str, sorted(qa_failed_chapters)))}")
+        
+        # Stop translation completely after batch mode
+        print("\n📌 Batch translation completed.")
+    
+    elif not config.BATCH_TRANSLATION:
+        translation_processor = TranslationProcessor(config, client, out, log_callback, check_stop, uses_zero_based, is_text_file)
+        
+        if config.DUPLICATE_DETECTION_MODE == 'ai-hunter':
+            # Build the main config from environment variables and config object
+            main_config = {
+                'duplicate_lookback_chapters': config.DUPLICATE_LOOKBACK_CHAPTERS,
+                'duplicate_detection_mode': config.DUPLICATE_DETECTION_MODE,
+            }
+            
+            # Check if AI Hunter config was passed via environment variable
+            ai_hunter_config_str = os.getenv('AI_HUNTER_CONFIG')
+            if ai_hunter_config_str:
+                try:
+                    ai_hunter_config = json.loads(ai_hunter_config_str)
+                    main_config['ai_hunter_config'] = ai_hunter_config
+                    print("🤖 AI Hunter: Loaded configuration from environment")
+                except json.JSONDecodeError:
+                    print("⚠️ AI Hunter: Failed to parse AI_HUNTER_CONFIG from environment")
+            
+            # If no AI Hunter config in environment, try to load from file as fallback
+            if 'ai_hunter_config' not in main_config:
+                # Try multiple locations for config.json
+                config_paths = [
+                    os.path.join(os.getcwd(), 'config.json'),
+                    os.path.join(out, '..', 'config.json'),
+                ]
+                
+                if getattr(sys, 'frozen', False):
+                    config_paths.append(os.path.join(os.path.dirname(sys.executable), 'config.json'))
+                else:
+                    script_dir = os.path.dirname(os.path.abspath(__file__))
+                    config_paths.extend([
+                        os.path.join(script_dir, 'config.json'),
+                        os.path.join(os.path.dirname(script_dir), 'config.json')
+                    ])
+                
+                for config_path in config_paths:
+                    if os.path.exists(config_path):
+                        try:
+                            with open(config_path, 'r', encoding='utf-8') as f:
+                                file_config = json.load(f)
+                                if 'ai_hunter_config' in file_config:
+                                    main_config['ai_hunter_config'] = file_config['ai_hunter_config']
+                                    print(f"🤖 AI Hunter: Loaded configuration from {config_path}")
+                                    break
+                        except Exception as e:
+                            print(f"⚠️ Failed to load config from {config_path}: {e}")
+            
+            # Always create and inject the improved AI Hunter when ai-hunter mode is selected
+            ai_hunter = ImprovedAIHunterDetection(main_config)
+
+            # The TranslationProcessor class has a method that checks for duplicates
+            # We need to replace it with our enhanced AI Hunter
+            
+            # Create a wrapper to match the expected signature
+            def enhanced_duplicate_check(self, result, idx, prog, out, actual_num=None):
+                # If actual_num is not provided, try to get it from progress
+                if actual_num is None:
+                    # Look for the chapter being processed
+                    for ch_key, ch_info in prog.get("chapters", {}).items():
+                        if ch_info.get("chapter_idx") == idx:
+                            actual_num = ch_info.get("actual_num", idx + 1)
+                            break
+                    
+                    # Fallback to idx+1 if not found
+                    if actual_num is None:
+                        actual_num = idx + 1
+                
+                return ai_hunter.detect_duplicate_ai_hunter_enhanced(result, idx, prog, out, actual_num)
+            
+            # Bind the enhanced method to the processor instance
+            translation_processor.check_duplicate_content = enhanced_duplicate_check.__get__(translation_processor, TranslationProcessor)
+            
+            print("🤖 AI Hunter: Using enhanced detection with configurable thresholds")
+                
+        # First pass: set actual chapter numbers respecting the config
+        for idx, c in enumerate(chapters):
+            raw_num = FileUtilities.extract_actual_chapter_number(c, patterns=None, config=config)
+            #print(f"[DEBUG] Extracted raw_num={raw_num} from {c.get('original_basename', 'unknown')}")
+
+            
+            # Apply offset if configured
+            offset = config.CHAPTER_NUMBER_OFFSET if hasattr(config, 'CHAPTER_NUMBER_OFFSET') else 0
+            raw_num += offset
+            
+            if config.DISABLE_ZERO_DETECTION:
+                # Use raw numbers without adjustment
+                c['actual_chapter_num'] = raw_num
+                c['raw_chapter_num'] = raw_num
+                c['zero_adjusted'] = False
+            else:
+                # Store raw number
+                c['raw_chapter_num'] = raw_num
+                # Apply 0-based adjustment if detected
+                if uses_zero_based:
+                    c['actual_chapter_num'] = raw_num + 1
+                    c['zero_adjusted'] = True
+                else:
+                    c['actual_chapter_num'] = raw_num
+                    c['zero_adjusted'] = False
+
+        # Second pass: process chapters
+        for idx, c in enumerate(chapters):
+            chap_num = c["num"]
+            
+            # Check if this is a pre-split text chunk with decimal number
+            if (is_text_file and c.get('is_chunk', False) and isinstance(c['num'], float)):
+                actual_num = c['num']  # Preserve the decimal for text files only
+            else:
+                actual_num = c.get('actual_chapter_num', c['num'])
+            content_hash = c.get("content_hash") or ContentProcessor.get_content_hash(c["body"])
+            
+            if start is not None and not (start <= actual_num <= end):
+                #print(f"[SKIP] Chapter {actual_num} (file: {c.get('original_basename', 'unknown')}) outside range {start}-{end}")
+                continue
+            
+            needs_translation, skip_reason, existing_file = progress_manager.check_chapter_status(
+                idx, actual_num, content_hash, out, c  # Pass the chapter object
+            )
+            # Add explicit file check for supposedly completed chapters
+            if not needs_translation and existing_file:
+                file_path = os.path.join(out, existing_file)
+                if not os.path.exists(file_path):
+                    print(f"⚠️ Output file missing for chapter {actual_num}: {existing_file}")
+                    needs_translation = True
+                    skip_reason = None
+                    # Update status to file_missing
+                    progress_manager.update(idx, actual_num, content_hash, None, status="file_missing")
+                    progress_manager.save() 
+            if not needs_translation:
+                # Modify skip_reason to use appropriate terminology
+                is_text_source = is_text_file or c.get('filename', '').endswith('.txt') or c.get('is_chunk', False)
+                terminology = "Section" if is_text_source else "Chapter"
+                
+                # Replace "Chapter" with appropriate terminology in skip_reason
+                skip_reason_modified = skip_reason.replace("Chapter", terminology)
+                print(f"[SKIP] {skip_reason_modified}")
+                continue
+
+            chapter_position = f"{chapters_completed + 1}/{chapters_to_process}"
+          
+            # Determine if this is a text file
+            is_text_source = is_text_file or c.get('filename', '').endswith('.txt') or c.get('is_chunk', False)
+            terminology = "Section" if is_text_source else "Chapter"
+
+            # Determine file reference based on type
+            if c.get('is_chunk', False):
+                file_ref = f"Section_{c['num']}"
+            else:
+                file_ref = c.get('original_basename', f'{terminology}_{actual_num}')
+
+            print(f"\n🔄 Processing #{idx+1}/{total_chapters} (Actual: {terminology} {actual_num}) ({chapter_position} to translate): {c['title']} [File: {file_ref}]")
+
+            chunk_context_manager.start_chapter(chap_num, c['title'])
+            
+            has_images = c.get('has_images', False)
+            has_meaningful_text = ContentProcessor.is_meaningful_text_content(c["body"])
+            text_size = c.get('file_size', 0)
+            
+            is_empty_chapter = (not has_images and text_size < 10)
+            is_image_only_chapter = (has_images and not has_meaningful_text)
+            is_mixed_content = (has_images and has_meaningful_text)
+            is_text_only = (not has_images and has_meaningful_text)
+            
+            if is_empty_chapter:
+                print(f"📄 Empty chapter {actual_num} detected")
+                
+                # Create filename for empty chapter
+                if isinstance(c['num'], float):
+                    fname = FileUtilities.create_chapter_filename(c, c['num'])
+                else:
+                    fname = FileUtilities.create_chapter_filename(c, actual_num)
+                
+                # Save original content
+                with open(os.path.join(out, fname), 'w', encoding='utf-8') as f:
+                    f.write(c["body"])
+                
+                # Update progress tracking
+                progress_manager.update(idx, actual_num, content_hash, fname, status="completed_empty")
+                progress_manager.save()
+                chapters_completed += 1
+                
+                # CRITICAL: Skip translation!
+                continue
+
+            elif is_image_only_chapter:
+                print(f"📸 Image-only chapter: {c.get('image_count', 0)} images")
+                
+                translated_html = c["body"]
+                image_translations = {}
+                
+                # Step 1: Process images if image translation is enabled
+                if image_translator and config.ENABLE_IMAGE_TRANSLATION:
+                    print(f"🖼️ Translating {c.get('image_count', 0)} images...")
+                    image_translator.set_current_chapter(chap_num)
+                    
+                    translated_html, image_translations = process_chapter_images(
+                        c["body"], 
+                        actual_num,
+                        image_translator,
+                        check_stop
+                    )
+                    
+                    if image_translations:
+                        print(f"✅ Translated {len(image_translations)} images")
+                
+                # Step 2: Check for headers/titles that need translation
+                from bs4 import BeautifulSoup
+                soup = BeautifulSoup(c["body"], 'html.parser')
+                
+                # Look for headers
+                headers = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'title'])
+                
+                # If we have headers, we should translate them even in "image-only" chapters
+                if headers and any(h.get_text(strip=True) for h in headers):
+                    print(f"📝 Found headers to translate in image-only chapter")
+                    
+                    # Create a minimal HTML with just the headers for translation
+                    headers_html = ""
+                    for header in headers:
+                        if header.get_text(strip=True):
+                            headers_html += str(header) + "\n"
+                    
+                    if headers_html:
+                        print(f"📤 Translating chapter headers...")
+                        
+                        # Send just the headers for translation
+                        header_msgs = base_msg + [{"role": "user", "content": headers_html}]
+                        
+                        # Use the standard filename
+                        fname = FileUtilities.create_chapter_filename(c, actual_num)
+                        client.set_output_filename(fname)
+                        
+                        # Simple API call for headers
+                        header_result, _ = client.send(
+                            header_msgs,
+                            temperature=config.TEMP,
+                            max_tokens=config.MAX_OUTPUT_TOKENS
+                        )
+                        
+                        if header_result:
+                            # Clean the result
+                            header_result = re.sub(r"^```(?:html)?\s*\n?", "", header_result, count=1, flags=re.MULTILINE)
+                            header_result = re.sub(r"\n?```\s*$", "", header_result, count=1, flags=re.MULTILINE)
+                            
+                            # Parse both the translated headers and the original body
+                            soup_headers = BeautifulSoup(header_result, 'html.parser')
+                            soup_body = BeautifulSoup(translated_html, 'html.parser')
+                            
+                            # Replace headers in the body with translated versions
+                            translated_headers = soup_headers.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'title'])
+                            original_headers = soup_body.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'title'])
+                            
+                            # Match and replace headers
+                            for orig, trans in zip(original_headers, translated_headers):
+                                if trans and trans.get_text(strip=True):
+                                    orig.string = trans.get_text(strip=True)
+                            
+                            translated_html = str(soup_body)
+                            print(f"✅ Headers translated successfully")
+                            status = "completed"
+                        else:
+                            print(f"⚠️ Failed to translate headers")
+                            status = "completed_image_only"
+                    else:
+                        status = "completed_image_only"
+                else:
+                    print(f"ℹ️ No headers found to translate")
+                    status = "completed_image_only"
+                
+                # Step 3: Save with correct filename
+                fname = FileUtilities.create_chapter_filename(c, actual_num)
+                
+                with open(os.path.join(out, fname), 'w', encoding='utf-8') as f:
+                    f.write(translated_html)
+                
+                print(f"[Chapter {idx+1}/{total_chapters}] ✅ Saved image-only chapter")
+                progress_manager.update(idx, actual_num, content_hash, fname, status=status)
+                progress_manager.save()
+                chapters_completed += 1
+                continue
+
+            else:
+                # Set default text to translate
+                text_to_translate = c["body"]
+                image_translations = {}
+                if is_mixed_content and image_translator and config.ENABLE_IMAGE_TRANSLATION:
+                    print(f"🖼️ Processing {c.get('image_count', 0)} images first...")
+                    
+                    print(f"[DEBUG] Content before image processing (first 200 chars):")
+                    print(c["body"][:200])
+                    print(f"[DEBUG] Has h1 tags: {'<h1>' in c['body']}")
+                    print(f"[DEBUG] Has h2 tags: {'<h2>' in c['body']}")
+                    
+                    image_translator.set_current_chapter(chap_num)
+                    
+                    # Store the original body before processing
+                    original_body = c["body"]
+                    
+                    # Calculate original chapter tokens before modification
+                    original_chapter_tokens = chapter_splitter.count_tokens(original_body)
+                    
+                    # Process images and get body with translations
+                    body_with_images, image_translations = process_chapter_images(
+                        c["body"], 
+                        actual_num,
+                        image_translator,
+                        check_stop
+                    )
+                    
+                    if image_translations:
+                        print(f"✅ Translated {len(image_translations)} images")
+                        
+                        # Store the body with images for later merging
+                        c["body_with_images"] = c["body"]
+                        
+                        # For chapters with only images and title, we still need to translate the title
+                        # Extract clean text for translation from ORIGINAL body
+                        from bs4 import BeautifulSoup
+                        soup_clean = BeautifulSoup(original_body, 'html.parser')
+
+                        # Remove images from the original to get pure text
+                        for img in soup_clean.find_all('img'):
+                            img.decompose()
+
+                        # Set clean text for translation - use prettify() or str() on the full document
+                        c["body"] = str(soup_clean) if soup_clean.body else original_body
+                        
+                        # If there's no meaningful text content after removing images, 
+                        # the text translation will just translate the title, which is correct
+                        print(f"   📝 Clean text for translation: {len(c['body'])} chars")
+                        
+                        # Update text_size to reflect actual text to translate
+                        text_size = len(c["body"])
+                        
+                        # Recalculate the actual token count for clean text
+                        actual_text_tokens = chapter_splitter.count_tokens(c["body"])
+                        print(f"   📊 Actual text tokens: {actual_text_tokens} (was counting {original_chapter_tokens} with images)")
+                    else:
+                        print(f"ℹ️ No translatable text found in images")
+                        # Keep original body if no image translations
+                        c["body"] = original_body
+
+                print(f"📖 Translating text content ({text_size} characters)")
+                progress_manager.update(idx, actual_num, content_hash, output_file=None, status="in_progress")
+                progress_manager.save()
+
+                # Apply ignore filtering to the content before chunk splitting
+                batch_translate_active = os.getenv('BATCH_TRANSLATE_HEADERS', '0') == '1'
+                ignore_title_tag = os.getenv('IGNORE_TITLE', '0') == '1' and batch_translate_active
+                ignore_header_tags = os.getenv('IGNORE_HEADER', '0') == '1' and batch_translate_active
+                
+                if (ignore_title_tag or ignore_header_tags) and c["body"]:
+                    from bs4 import BeautifulSoup
+                    content_soup = BeautifulSoup(c["body"], 'html.parser')
+                    
+                    # Remove title tags if ignored
+                    if ignore_title_tag:
+                        for title_tag in content_soup.find_all('title'):
+                            title_tag.decompose()
+                    
+                    # Remove header tags if ignored
+                    if ignore_header_tags:
+                        for header_tag in content_soup.find_all(['h1', 'h2', 'h3']):
+                            header_tag.decompose()
+                    
+                    c["body"] = str(content_soup)  # Update the chapter body
+
+                # Check if this chapter is already a chunk from text file splitting
+                if c.get('is_chunk', False):
+                    # This is already a pre-split chunk, but still check if it needs further splitting
+                    # Calculate based on OUTPUT limit only
+                    max_output_tokens = config.MAX_OUTPUT_TOKENS
+                    safety_margin_output = 500
+                    
+                    # CJK to English typically compresses to 0.7-0.9x
+                    compression_factor = config.COMPRESSION_FACTOR
+                    available_tokens = int((max_output_tokens - safety_margin_output) / compression_factor)
+                    
+                    # Ensure minimum
+                    available_tokens = max(available_tokens, 1000)
+                    
+                    print(f"📊 Chunk size: {available_tokens:,} tokens (based on {max_output_tokens:,} output limit, compression: {compression_factor})")
+                    
+                    chapter_tokens = chapter_splitter.count_tokens(c["body"])
+                    
+                    if chapter_tokens > available_tokens:
+                        # Even pre-split chunks might need further splitting
+                        chunks = chapter_splitter.split_chapter(c["body"], available_tokens)
+                        print(f"📄 Section {c['num']} (pre-split from text file) needs further splitting into {len(chunks)} chunks")
+                    else:
+                        chunks = [(c["body"], 1, 1)]
+                        print(f"📄 Section {c['num']} (pre-split from text file)")
+                else:
+                    # Normal splitting logic for non-text files
+                    # Calculate based on OUTPUT limit only
+                    max_output_tokens = config.MAX_OUTPUT_TOKENS
+                    safety_margin_output = 500
+                    
+                    # CJK to English typically compresses to 0.7-0.9x
+                    compression_factor = config.COMPRESSION_FACTOR
+                    available_tokens = int((max_output_tokens - safety_margin_output) / compression_factor)
+                    
+                    # Ensure minimum
+                    available_tokens = max(available_tokens, 1000)
+                    
+                    print(f"📊 Chunk size: {available_tokens:,} tokens (based on {max_output_tokens:,} output limit, compression: {compression_factor})")
+                    
+                    chunks = chapter_splitter.split_chapter(c["body"], available_tokens)
+                    
+                    # Use consistent terminology
+                    is_text_source = is_text_file or c.get('filename', '').endswith('.txt') or c.get('is_chunk', False)
+                    terminology = "Section" if is_text_source else "Chapter"
+                    print(f"📄 {terminology} will be processed in {len(chunks)} chunk(s)")
+                                  
+            # Recalculate tokens on the actual text to be translated
+            actual_chapter_tokens = chapter_splitter.count_tokens(c["body"])
+            
+            if len(chunks) > 1:
+                is_text_source = is_text_file or c.get('filename', '').endswith('.txt') or c.get('is_chunk', False)
+                terminology = "Section" if is_text_source else "Chapter"
+                print(f"   ℹ️ {terminology} size: {actual_chapter_tokens:,} tokens (limit: {available_tokens:,} tokens per chunk)")
+            else:
+                is_text_source = is_text_file or c.get('filename', '').endswith('.txt') or c.get('is_chunk', False)
+                terminology = "Section" if is_text_source else "Chapter"
+                print(f"   ℹ️ {terminology} size: {actual_chapter_tokens:,} tokens (within limit of {available_tokens:,} tokens)")
+            
+            chapter_key_str = str(idx)
+            if chapter_key_str not in progress_manager.prog["chapter_chunks"]:
+                progress_manager.prog["chapter_chunks"][chapter_key_str] = {
+                    "total": len(chunks),
+                    "completed": [],
+                    "chunks": {}
+                }
+            
+            progress_manager.prog["chapter_chunks"][chapter_key_str]["total"] = len(chunks)
+            
+            translated_chunks = []
+            
+            for chunk_html, chunk_idx, total_chunks in chunks:
+                chapter_key_str = content_hash
+                old_key_str = str(idx)
+                
+                if chapter_key_str not in progress_manager.prog.get("chapter_chunks", {}) and old_key_str in progress_manager.prog.get("chapter_chunks", {}):
+                    progress_manager.prog["chapter_chunks"][chapter_key_str] = progress_manager.prog["chapter_chunks"][old_key_str]
+                    del progress_manager.prog["chapter_chunks"][old_key_str]
+                    #print(f"[PROGRESS] Migrated chunks for chapter {chap_num} to new tracking system")
+                
+                if chapter_key_str not in progress_manager.prog["chapter_chunks"]:
+                    progress_manager.prog["chapter_chunks"][chapter_key_str] = {
+                        "total": len(chunks),
+                        "completed": [],
+                        "chunks": {}
+                    }
+                
+                progress_manager.prog["chapter_chunks"][chapter_key_str]["total"] = len(chunks)
+                
+                # Get chapter status to check for qa_failed
+                chapter_info = progress_manager.prog["chapters"].get(chapter_key_str, {})
+                chapter_status = chapter_info.get("status")
+
+                if chapter_status == "qa_failed":
+                    # Force retranslation of qa_failed chapters
+                    print(f"  [RETRY] Chunk {chunk_idx}/{total_chunks} - retranslating due to QA failure")
+                        
+                if config.CONTEXTUAL and history_manager.will_reset_on_next_append(config.HIST_LIMIT):
+                    print(f"  📌 History will reset after this chunk (current: {len(history_manager.load_history())//2}/{config.HIST_LIMIT} exchanges)")
+                    
+                if check_stop():
+                    print(f"❌ Translation stopped during chapter {actual_num}, chunk {chunk_idx}")
+                    return
+                
+                current_chunk_number += 1
+                
+                progress_percent = (current_chunk_number / total_chunks_needed) * 100 if total_chunks_needed > 0 else 0
+                
+                if chunks_completed > 0:
+                    elapsed_time = time.time() - translation_start_time
+                    avg_time_per_chunk = elapsed_time / chunks_completed
+                    remaining_chunks = total_chunks_needed - current_chunk_number + 1
+                    eta_seconds = remaining_chunks * avg_time_per_chunk
+                    
+                    eta_hours = int(eta_seconds // 3600)
+                    eta_minutes = int((eta_seconds % 3600) // 60)
+                    eta_str = f"{eta_hours}h {eta_minutes}m" if eta_hours > 0 else f"{eta_minutes}m"
+                else:
+                    eta_str = "calculating..."
+                
+                if total_chunks > 1:
+                    print(f"  🔄 Translating chunk {chunk_idx}/{total_chunks} for #{idx+1} (Overall: {current_chunk_number}/{total_chunks_needed} - {progress_percent:.1f}% - ETA: {eta_str})")
+                    print(f"  ⏳ Chunk size: {len(chunk_html):,} characters (~{chapter_splitter.count_tokens(chunk_html):,} tokens)")
+                else:
+                    # Determine terminology and file reference
+                    is_text_source = is_text_file or c.get('filename', '').endswith('.txt') or c.get('is_chunk', False)
+                    terminology = "Section" if is_text_source else "Chapter"
+                    
+                    # Consistent file reference
+                    if c.get('is_chunk', False):
+                        file_ref = f"Section_{c['num']}"
+                    else:
+                        file_ref = c.get('original_basename', f'{terminology}_{actual_num}')
+                    
+                    print(f"  📄 Translating {terminology.lower()} content (Overall: {current_chunk_number}/{total_chunks_needed} - {progress_percent:.1f}% - ETA: {eta_str}) [File: {file_ref}]")
+                    print(f"  📊 {terminology} {actual_num} size: {len(chunk_html):,} characters (~{chapter_splitter.count_tokens(chunk_html):,} tokens)")
+                
+                print(f"  ℹ️ This may take 30-60 seconds. Stop will take effect after completion.")
+                
+                if log_callback:
+                    if hasattr(log_callback, '__self__') and hasattr(log_callback.__self__, 'append_chunk_progress'):
+                        if total_chunks == 1:
+                            # Determine terminology based on source type
+                            is_text_source = is_text_file or c.get('filename', '').endswith('.txt') or c.get('is_chunk', False)
+                            terminology = "Section" if is_text_source else "Chapter"
+
+                            log_callback.__self__.append_chunk_progress(
+                                1, 1, "text", 
+                                f"{terminology} {actual_num}",
+                                overall_current=current_chunk_number,
+                                overall_total=total_chunks_needed,
+                                extra_info=f"{len(chunk_html):,} chars"
+                            )
+                        else:
+                            log_callback.__self__.append_chunk_progress(
+                                chunk_idx, 
+                                total_chunks, 
+                                "text", 
+                                f"{terminology} {actual_num}",
+                                overall_current=current_chunk_number,
+                                overall_total=total_chunks_needed
+                            )
+                    else:
+                        # Determine terminology based on source type
+                        is_text_source = is_text_file or c.get('filename', '').endswith('.txt') or c.get('is_chunk', False)
+                        terminology = "Section" if is_text_source else "Chapter"
+                        terminology_lower = "section" if is_text_source else "chapter"
+
+                        if total_chunks == 1:
+                            log_callback(f"📄 Processing {terminology} {actual_num} ({chapters_completed + 1}/{chapters_to_process}) - {progress_percent:.1f}% complete")
+                        else:
+                            log_callback(f"📄 processing chunk {chunk_idx}/{total_chunks} for {terminology_lower} {actual_num} - {progress_percent:.1f}% complete")
+                        
+                # Get custom chunk prompt template from environment
+                chunk_prompt_template = os.getenv("TRANSLATION_CHUNK_PROMPT", "[PART {chunk_idx}/{total_chunks}]\n{chunk_html}")
+                
+                if total_chunks > 1:
+                    user_prompt = chunk_prompt_template.format(
+                        chunk_idx=chunk_idx,
+                        total_chunks=total_chunks,
+                        chunk_html=chunk_html
+                    )
+                else:
+                    user_prompt = chunk_html
+                
+                if config.CONTEXTUAL:
+                    history = history_manager.load_history()
+                    trimmed = history[-config.HIST_LIMIT*2:]
+                    chunk_context = chunk_context_manager.get_context_messages(limit=2)
+                else:
+                    history = []  # Set empty history when not contextual
+                    trimmed = []
+                    chunk_context = []
+
+                # Build the current system prompt from the original each time, and append the last summary block if present
+                current_system_content = original_system_prompt
+                if config.USE_ROLLING_SUMMARY and last_summary_block_text:
+                    current_system_content = (
+                        current_system_content
+                        + "\n\n[Rolling Summary of Previous Chapter]\n"
+                        + "(For AI: Use as context only; do not include in output)\n"
+                        + last_summary_block_text
+                        + "\n[End of Rolling Summary]"
+                    )
+                current_base = [{"role": "system", "content": current_system_content}]
+                # If we have a prepared rolling summary from previous chapter, include it as a separate message (do NOT mutate system prompt)
+                summary_msgs_list = []
+                if config.USE_ROLLING_SUMMARY and last_summary_block_text:
+                    summary_msgs_list = [{
+                        "role": os.getenv("SUMMARY_ROLE", "user"),
+                        "content": (
+                            "CONTEXT ONLY - DO NOT INCLUDE IN TRANSLATION:\n"
+                            "[MEMORY] Previous context summary:\n\n"
+                            f"{last_summary_block_text}\n\n"
+                            "[END MEMORY]\n"
+                            "END OF CONTEXT - BEGIN ACTUAL CONTENT TO TRANSLATE:"
+                        )
+                    }]
+                msgs = current_base + summary_msgs_list + chunk_context + trimmed + [{"role": "user", "content": user_prompt}]
+
+                c['__index'] = idx
+                c['__progress'] = progress_manager.prog
+                c['history_manager'] = history_manager
+                
+                result, finish_reason = translation_processor.translate_with_retry(
+                    msgs, chunk_html, c, chunk_idx, total_chunks
+                )
+                
+                if result is None:
+                    progress_manager.update(idx, actual_num, content_hash, output_file=None, status="failed")
+                    progress_manager.save()
+                    continue
+
+                if config.REMOVE_AI_ARTIFACTS:
+                    result = ContentProcessor.clean_ai_artifacts(result, True)
+
+                if config.EMERGENCY_RESTORE:
+                    result = ContentProcessor.emergency_restore_paragraphs(result, chunk_html)
+
+                if config.REMOVE_AI_ARTIFACTS:
+                    lines = result.split('\n')
+                    
+                    json_line_count = 0
+                    for i, line in enumerate(lines[:5]):
+                        if line.strip() and any(pattern in line for pattern in [
+                            '"role":', '"content":', '"messages":', 
+                            '{"role"', '{"content"', '[{', '}]'
+                        ]):
+                            json_line_count = i + 1
+                        else:
+                            break
+                    
+                    if json_line_count > 0 and json_line_count < len(lines):
+                        remaining = '\n'.join(lines[json_line_count:])
+                        if remaining.strip() and len(remaining) > 100:
+                            result = remaining
+                            print(f"✂️ Removed {json_line_count} lines of JSON artifacts")
+
+                result = re.sub(r'\[PART \d+/\d+\]\s*', '', result, flags=re.IGNORECASE)
+
+                translated_chunks.append((result, chunk_idx, total_chunks))
+                
+                chunk_context_manager.add_chunk(user_prompt, result, chunk_idx, total_chunks)
+
+                progress_manager.prog["chapter_chunks"][chapter_key_str]["completed"].append(chunk_idx)
+                progress_manager.prog["chapter_chunks"][chapter_key_str]["chunks"][str(chunk_idx)] = result
+                progress_manager.save()
+
+                chunks_completed += 1
+                    
+                will_reset = history_manager.will_reset_on_next_append(
+                    config.HIST_LIMIT if config.CONTEXTUAL else 0, 
+                    config.TRANSLATION_HISTORY_ROLLING
+                )
+
+
+                history = history_manager.append_to_history(
+                    user_prompt, 
+                    result, 
+                    config.HIST_LIMIT if config.CONTEXTUAL else 0,
+                    reset_on_limit=True,
+                    rolling_window=config.TRANSLATION_HISTORY_ROLLING
+                )
+
+                if chunk_idx < total_chunks:
+                    # Handle float delays while checking for stop
+                    full_seconds = int(config.DELAY)
+                    fractional_second = config.DELAY - full_seconds
+                    
+                    # Check stop signal every second for full seconds
+                    for i in range(full_seconds):
+                        if check_stop():
+                            print("❌ Translation stopped during delay")
+                            return
+                        time.sleep(1)
+                    
+                    # Handle the fractional part if any
+                    if fractional_second > 0:
+                        if check_stop():
+                            print("❌ Translation stopped during delay")
+                            return
+                        time.sleep(fractional_second)
+
+            if check_stop():
+                print(f"❌ Translation stopped before saving chapter {actual_num}")
+                return
+
+            if len(translated_chunks) > 1:
+                print(f"  📎 Merging {len(translated_chunks)} chunks...")
+                translated_chunks.sort(key=lambda x: x[1])
+                merged_result = chapter_splitter.merge_translated_chunks(translated_chunks)
+            else:
+                merged_result = translated_chunks[0][0] if translated_chunks else ""
+
+            if config.CONTEXTUAL and len(translated_chunks) > 1:
+                user_summary, assistant_summary = chunk_context_manager.get_summary_for_history()
+                
+                if user_summary and assistant_summary:
+                    history_manager.append_to_history(
+                        user_summary,
+                        assistant_summary,
+                        config.HIST_LIMIT,
+                        reset_on_limit=False,
+                        rolling_window=config.TRANSLATION_HISTORY_ROLLING
+                    )
+                    print(f"  📝 Added chapter summary to history")
+
+            chunk_context_manager.clear()
+
+            # For text file chunks, ensure we pass the decimal number
+            if is_text_file and c.get('is_chunk', False) and isinstance(c.get('num'), float):
+                fname = FileUtilities.create_chapter_filename(c, c['num'])  # Use the decimal num directly
+            else:
+                fname = FileUtilities.create_chapter_filename(c, actual_num)
+
+            client.set_output_filename(fname)
+            cleaned = re.sub(r"^```(?:html)?\s*\n?", "", merged_result, count=1, flags=re.MULTILINE)
+            cleaned = re.sub(r"\n?```\s*$", "", cleaned, count=1, flags=re.MULTILINE)
+
+            cleaned = ContentProcessor.clean_ai_artifacts(cleaned, remove_artifacts=config.REMOVE_AI_ARTIFACTS)
+            
+            if is_mixed_content and image_translations:
+                print(f"🔀 Merging {len(image_translations)} image translations with text...")
+                from bs4 import BeautifulSoup
+                # Parse the translated text (which has the translated title/header)
+                soup_translated = BeautifulSoup(cleaned, 'html.parser')
+                
+                # For each image translation, insert it into the document
+                for img_path, translation_html in image_translations.items():
+                    if translation_html and '<div' in translation_html:
+                        # Parse the translation HTML
+                        trans_soup = BeautifulSoup(translation_html, 'html.parser')
+                        container = trans_soup.find('div', class_=['translated-text-only', 'image-with-translation'])
+                        
+                        if container:
+                            # Clone the container to avoid issues
+                            new_container = BeautifulSoup(str(container), 'html.parser').find('div')
+                            
+                            # Find where to insert - after header or at beginning of body
+                            if soup_translated.body:
+                                # Try to find a header to insert after
+                                header = soup_translated.body.find(['h1', 'h2', 'h3'])
+                                if header:
+                                    header.insert_after(new_container)
+                                else:
+                                    # No header, insert at beginning of body
+                                    soup_translated.body.insert(0, new_container)
+                            else:
+                                # No body tag, try to find any header
+                                header = soup_translated.find(['h1', 'h2', 'h3'])
+                                if header:
+                                    header.insert_after(new_container)
+                                else:
+                                    # Just append to the document
+                                    soup_translated.append(new_container)
+                
+                # Update cleaned with the merged content
+                cleaned = str(soup_translated)
+                print(f"✅ Successfully merged image translations")
+                
+
+            if is_text_file:
+                # For text files, save as plain text instead of HTML
+                fname_txt = fname.replace('.html', '.txt')  # Change extension to .txt
+                
+                # Extract text from HTML
+                from bs4 import BeautifulSoup
+                soup = BeautifulSoup(cleaned, 'html.parser')
+                text_content = soup.get_text(strip=True)
+                
+                # Write plain text file
+                with open(os.path.join(out, fname_txt), 'w', encoding='utf-8') as f:
+                    f.write(text_content)
+                
+                final_title = c['title'] or make_safe_filename(c['title'], actual_num)
+                print(f"[Processed {idx+1}/{total_chapters}] ✅ Saved Chapter {actual_num}: {final_title}")
+                
+            else:
+                # For EPUB files, keep original HTML behavior
+                with open(os.path.join(out, fname), 'w', encoding='utf-8') as f:
+                    f.write(cleaned)
+                
+                final_title = c['title'] or make_safe_filename(c['title'], actual_num)
+                print(f"[Processed {idx+1}/{total_chapters}] ✅ Saved Chapter {actual_num}: {final_title}")
+                
+            # Determine status based on comprehensive failure detection
+            if is_qa_failed_response(cleaned):
+                chapter_status = "qa_failed"
+                failure_reason = get_failure_reason(cleaned)
+                print(f"⚠️ Chapter {actual_num} marked as qa_failed: {failure_reason}")
+            else:
+                chapter_status = "completed"
+
+            progress_manager.update(idx, actual_num, content_hash, fname, status=chapter_status)
+            progress_manager.save()
+            
+            # After completing this chapter, produce a rolling summary and store it for the NEXT chapter
+            if config.USE_ROLLING_SUMMARY:
+                # Use the original system prompt to build the summary system prompt
+                base_system_content = original_system_prompt
+                summary_text = translation_processor.generate_rolling_summary(
+                    history_manager, actual_num, base_system_content, source_text=cleaned
+                )
+                if summary_text:
+                    last_summary_block_text = summary_text
+            
+            chapters_completed += 1
+
+    if is_text_file:
+        print("📄 Text file translation complete!")
+        try:
+            # Collect all translated chapters with their metadata
+            translated_chapters = []
+            
+            for chapter in chapters:
+                # Look for .txt files instead of .html
+                fname_base = FileUtilities.create_chapter_filename(chapter, chapter['num'])
+                fname_txt = fname_base.replace('.html', '.txt')
+                
+                if os.path.exists(os.path.join(out, fname_txt)):
+                    with open(os.path.join(out, fname_txt), 'r', encoding='utf-8') as f:
+                        content = f.read()
+                    
+                    translated_chapters.append({
+                        'num': chapter['num'],
+                        'title': chapter['title'],
+                        'content': content,
+                        'is_chunk': chapter.get('is_chunk', False),
+                        'chunk_info': chapter.get('chunk_info', {})
+                    })
+                elif os.path.exists(os.path.join(out, fname_base)):
+                    # Fallback to HTML if txt doesn't exist
+                    with open(os.path.join(out, fname_base), 'r', encoding='utf-8') as f:
+                        content = f.read()
+                        # Extract text from HTML
+                        from bs4 import BeautifulSoup
+                        soup = BeautifulSoup(content, 'html.parser')
+                        text = soup.get_text(strip=True)
+                    
+                    translated_chapters.append({
+                        'num': chapter['num'],
+                        'title': chapter['title'],
+                        'content': text,
+                        'is_chunk': chapter.get('is_chunk', False),
+                        'chunk_info': chapter.get('chunk_info', {})
+                    })
+            
+            print(f"✅ Translation complete! {len(translated_chapters)} section files created:")
+            for chapter_data in sorted(translated_chapters, key=lambda x: x['num']):
+                print(f"   • Section {chapter_data['num']}: {chapter_data['title']}")
+            
+            # Create a combined file with proper section structure
+            combined_path = os.path.join(out, f"{txt_processor.file_base}_translated.txt")
+            with open(combined_path, 'w', encoding='utf-8') as combined:
+                current_main_chapter = None
+                
+                for i, chapter_data in enumerate(sorted(translated_chapters, key=lambda x: x['num'])):
+                    content = chapter_data['content']
+                    
+                    # Check if this is a chunk of a larger chapter
+                    if chapter_data.get('is_chunk'):
+                        chunk_info = chapter_data.get('chunk_info', {})
+                        original_chapter = chunk_info.get('original_chapter')
+                        chunk_idx = chunk_info.get('chunk_idx', 1)
+                        total_chunks = chunk_info.get('total_chunks', 1)
+                        
+                        # Only add the chapter header for the first chunk
+                        if original_chapter != current_main_chapter:
+                            current_main_chapter = original_chapter
+                            
+                            # Add separator if not first chapter
+                            if i > 0:
+                                combined.write(f"\n\n{'='*50}\n\n")
+                            
+                            # Write the original chapter title (without Part X/Y suffix)
+                            original_title = chapter_data['title']
+                            # Remove the (Part X/Y) suffix if present
+                            if ' (Part ' in original_title:
+                                original_title = original_title.split(' (Part ')[0]
+                            
+                            combined.write(f"{original_title}\n\n")
+                        
+                        # Add the chunk content
+                        combined.write(content)
+                        
+                        # Add spacing between chunks of the same chapter
+                        if chunk_idx < total_chunks:
+                            combined.write("\n\n")
+                    else:
+                        # This is a standalone chapter
+                        current_main_chapter = chapter_data['num']
+                        
+                        # Add separator if not first chapter
+                        if i > 0:
+                            combined.write(f"\n\n{'='*50}\n\n")
+                        
+                        # Write the chapter title
+                        combined.write(f"{chapter_data['title']}\n\n")
+                        
+                        # Add the content
+                        combined.write(content)
+            
+            print(f"   • Combined file with preserved sections: {combined_path}")
+            
+            total_time = time.time() - translation_start_time
+            hours = int(total_time // 3600)
+            minutes = int((total_time % 3600) // 60)
+            seconds = int(total_time % 60)
+            
+            print(f"\n⏱️ Total translation time: {hours}h {minutes}m {seconds}s")
+            print(f"📊 Chapters completed: {chapters_completed}")
+            print(f"✅ Text file translation complete!")
+            
+            if log_callback:
+                log_callback(f"✅ Text file translation complete! Created {combined_path}")
+            
+        except Exception as e:
+            print(f"❌ Error creating combined text file: {e}")
+            if log_callback:
+                log_callback(f"❌ Error creating combined text file: {e}")
+    else:
+        print("🔍 Checking for translated chapters...")
+        # Respect retain extension toggle: if enabled, don't look for response_ prefix
+        if should_retain_source_extension():
+            response_files = [f for f in os.listdir(out) if f.endswith('.html') and not f.startswith('chapter_')]
+        else:
+            response_files = [f for f in os.listdir(out) if f.startswith('response_') and f.endswith('.html')]
+        chapter_files = [f for f in os.listdir(out) if f.startswith('chapter_') and f.endswith('.html')]
+
+        if not response_files and chapter_files:
+            if should_retain_source_extension():
+                print(f"⚠️ No translated files found, but {len(chapter_files)} original chapters exist")
+                print("ℹ️ Retain-source-extension mode is ON: skipping placeholder creation and using original files for EPUB compilation.")
+            else:
+                print(f"⚠️ No translated files found, but {len(chapter_files)} original chapters exist")
+                print("📝 Creating placeholder response files for EPUB compilation...")
+                
+                for chapter_file in chapter_files:
+                    response_file = chapter_file.replace('chapter_', 'response_', 1)
+                    src = os.path.join(out, chapter_file)
+                    dst = os.path.join(out, response_file)
+                    
+                    try:
+                        with open(src, 'r', encoding='utf-8') as f:
+                            content = f.read()
+                        
+                        soup = BeautifulSoup(content, 'html.parser')
+                        notice = soup.new_tag('p')
+                        notice.string = "[Note: This chapter could not be translated - showing original content]"
+                        notice['style'] = "color: red; font-style: italic;"
+                        
+                        if soup.body:
+                            soup.body.insert(0, notice)
+                        
+                        with open(dst, 'w', encoding='utf-8') as f:
+                            f.write(str(soup))
+                            
+                    except Exception as e:
+                        print(f"⚠️ Error processing {chapter_file}: {e}")
+                        try:
+                            shutil.copy2(src, dst)
+                        except:
+                            pass
+                
+                print(f"✅ Created {len(chapter_files)} placeholder response files")
+                print("⚠️ Note: The EPUB will contain untranslated content")
+        
+        print("📘 Building final EPUB…")
+        try:
+            from epub_converter import fallback_compile_epub
+            fallback_compile_epub(out, log_callback=log_callback)
+            print("✅ All done: your final EPUB is in", out)
+            
+            total_time = time.time() - translation_start_time
+            hours = int(total_time // 3600)
+            minutes = int((total_time % 3600) // 60)
+            seconds = int(total_time % 60)
+            
+            print(f"\n📊 Translation Statistics:")
+            print(f"   • Total chunks processed: {chunks_completed}")
+            print(f"   • Total time: {hours}h {minutes}m {seconds}s")
+            if chunks_completed > 0:
+                avg_time = total_time / chunks_completed
+                print(f"   • Average time per chunk: {avg_time:.1f} seconds")
+            
+            stats = progress_manager.get_stats(out)
+            print(f"\n📊 Progress Tracking Summary:")
+            print(f"   • Total chapters tracked: {stats['total_tracked']}")
+            print(f"   • Successfully completed: {stats['completed']}")
+            print(f"   • Missing files: {stats['missing_files']}")
+            print(f"   • In progress: {stats['in_progress']}")
+                
+        except Exception as e:
+            print("❌ EPUB build failed:", e)
+
+    print("TRANSLATION_COMPLETE_SIGNAL")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file