Spaces:

Norelad
/

coptic-translation-interface

Sleeping

coptic-translation-interface / apertus_ui.py

Rogaton

Display Prolog validation results in dependency parsing output

6d0a56b 24 days ago

40.6 kB

	import streamlit as st
	import os
	import xml.etree.ElementTree as ET
	import re
	import sys

	# Try importing transformers with detailed error handling
	try:
	import torch
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	except ImportError as e:
	st.error(f"""
	### ❌ Transformers Import Error

	Failed to import required transformers components: {e}

	Debug Info:
	- Python version: {sys.version}
	- Torch available: {('torch' in sys.modules)}

	This usually means:
	1. The Docker container is still rebuilding (wait 2-5 minutes)
	2. Dependencies weren't installed correctly
	3. There's a version conflict in requirements.txt

	Please check the HuggingFace Space build logs or try rebuilding the Space.
	""")
	st.stop()

	from huggingface_hub import InferenceClient
	from coptic_parser_core import CopticParserCore

	# ========================================
	# COPTIC TRANSLATOR PREPROCESSING FUNCTIONS
	# ========================================
	# These functions convert between Coptic Unicode and Greek transcription
	# Required for Coptic translator models (MarianMT-based)

	COPTIC_TO_GREEK = {
	"ⲁ": "α", "ⲃ": "β", "ⲅ": "γ", "ⲇ": "δ", "ⲉ": "ε", "ⲋ": "ϛ",
	"ⲍ": "ζ", "ⲏ": "η", "ⲑ": "θ", "ⲓ": "ι", "ⲕ": "κ", "ⲗ": "λ",
	"ⲙ": "μ", "ⲛ": "ν", "ⲝ": "ξ", "ⲟ": "ο", "ⲡ": "π", "ⲣ": "ρ",
	"ⲥ": "σ", "ⲧ": "τ", "ⲩ": "υ", "ⲫ": "φ", "ⲭ": "χ", "ⲯ": "ψ",
	"ⲱ": "ω",
	# Coptic-specific characters (must match model training)
	"ϣ": "ʃ", "ϥ": "f", "ϧ": "x", "ϩ": "h", "ϫ": "ɟ",
	"ϭ": "c", "ϯ": "ti",
	# Uppercase variants
	"Ⲁ": "Α", "Ⲃ": "Β", "Ⲅ": "Γ", "Ⲇ": "Δ", "Ⲉ": "Ε", "Ⲍ": "Ζ", "Ⲏ": "Η", "Ⲑ": "Θ",
	"Ⲓ": "Ι", "Ⲕ": "Κ", "Ⲗ": "Λ", "Ⲙ": "Μ", "Ⲛ": "Ν", "Ⲝ": "Ξ", "Ⲟ": "Ο", "Ⲡ": "Π",
	"Ⲣ": "Ρ", "Ⲥ": "Σ", "Ⲧ": "Τ", "Ⲩ": "Υ", "Ⲫ": "Φ", "Ⲭ": "Χ", "Ⲯ": "Ψ", "Ⲱ": "Ω",
	"Ϣ": "Ʃ", "Ϥ": "F", "Ϧ": "X", "Ϩ": "H", "Ϫ": "Ɉ", "Ϭ": "C", "Ϯ": "TI"
	}

	GREEK_TO_COPTIC = {
	"α": "ⲁ", "β": "ⲃ", "γ": "ⲅ", "δ": "ⲇ", "ε": "ⲉ", "ϛ": "ⲋ",
	"ζ": "ⲍ", "η": "ⲏ", "θ": "ⲑ", "ι": "ⲓ", "κ": "ⲕ", "λ": "ⲗ",
	"μ": "ⲙ", "ν": "ⲛ", "ξ": "ⲝ", "ο": "ⲟ", "π": "ⲡ", "ρ": "ⲣ",
	"σ": "ⲥ", "ς": "ⲥ", "τ": "ⲧ", "υ": "ⲩ", "φ": "ⲫ", "χ": "ⲭ", "ψ": "ⲯ",
	"ω": "ⲱ",
	# Coptic-specific characters (must match model training)
	"ʃ": "ϣ", "f": "ϥ", "x": "ϧ", "h": "ϩ", "ɟ": "ϫ",
	"c": "ϭ", "ti": "ϯ",
	# Uppercase variants
	"Α": "Ⲁ", "Β": "Ⲃ", "Γ": "Ⲅ", "Δ": "Ⲇ", "Ε": "Ⲉ", "Ζ": "Ⲍ", "Η": "Ⲏ", "Θ": "Ⲑ",
	"Ι": "Ⲓ", "Κ": "Ⲕ", "Λ": "Ⲗ", "Μ": "Ⲙ", "Ν": "Ⲛ", "Ξ": "Ⲝ", "Ο": "Ⲟ", "Π": "Ⲡ",
	"Ρ": "Ⲣ", "Σ": "Ⲥ", "Τ": "Ⲧ", "Υ": "Ⲩ", "Φ": "Ⲫ", "Χ": "Ⲭ", "Ψ": "Ⲯ", "Ω": "Ⲱ",
	"Ʃ": "Ϣ", "F": "Ϥ", "X": "Ϧ", "H": "Ϩ", "Ɉ": "Ϫ", "C": "Ϭ", "TI": "Ϯ"
	}

	def greekify(coptic_text):
	"""Convert Coptic Unicode to Greek transcription for Coptic translator models."""
	chars = []
	for c in coptic_text:
	l_c = c.lower()
	chars.append(COPTIC_TO_GREEK.get(l_c, l_c))
	return "".join(chars)

	def degreekify(greek_text):
	"""Convert Greek transcription back to Coptic Unicode.

	Handles two-character sequences like 'ti' → 'ϯ'
	"""
	result = []
	i = 0
	while i < len(greek_text):
	# Check for two-character sequences first
	if i < len(greek_text) - 1:
	two_char = greek_text[i:i+2].lower()
	if two_char == 'ti':
	result.append(GREEK_TO_COPTIC.get(two_char, greek_text[i:i+2]))
	i += 2
	continue
	# Single character
	result.append(GREEK_TO_COPTIC.get(greek_text[i], greek_text[i]))
	i += 1
	return ''.join(result)

	# Coptic alphabet helper
	COPTIC_ALPHABET = {
	'Ⲁ': 'Alpha', 'Ⲃ': 'Beta', 'Ⲅ': 'Gamma', 'Ⲇ': 'Delta', 'Ⲉ': 'Epsilon', 'Ⲋ': 'Zeta',
	'Ⲏ': 'Eta', 'Ⲑ': 'Theta', 'Ⲓ': 'Iota', 'Ⲕ': 'Kappa', 'Ⲗ': 'Lambda', 'Ⲙ': 'Mu',
	'Ⲛ': 'Nu', 'Ⲝ': 'Xi', 'Ⲟ': 'Omicron', 'Ⲡ': 'Pi', 'Ⲣ': 'Rho', 'Ⲥ': 'Sigma',
	'Ⲧ': 'Tau', 'Ⲩ': 'Upsilon', 'Ⲫ': 'Phi', 'Ⲭ': 'Chi', 'Ⲯ': 'Psi', 'Ⲱ': 'Omega',
	'Ϣ': 'Shai', 'Ϥ': 'Fai', 'Ϧ': 'Khei', 'Ϩ': 'Hori', 'Ϫ': 'Gangia', 'Ϭ': 'Shima', 'Ϯ': 'Ti'
	}

	# Coptic linguistic prompts (will be formatted with target language)
	def get_coptic_prompts(target_language):
	"""Generate Coptic analysis prompts with specified target language"""
	return {
	'dialect_analysis': f"Analyze the Coptic dialect of this text and identify linguistic features. Respond in {target_language}:",
	'translation': f"You are a professional Coptic translator. Translate the following Coptic text to {target_language}.\n\nIMPORTANT: Provide ONLY the direct translation. Do not include:\n- The original Coptic text\n- Explanations or commentary\n- Notes about context or meaning\n- Any text other than the {target_language} translation\n\nCoptic text to translate:",
	'transcription': f"Provide a romanized transcription of this Coptic text. Respond in {target_language}:",
	'morphology': f"Analyze the morphological structure of these Coptic words. Respond in {target_language}:",
	'lexicon_lookup': f"Look up these Coptic words and provide definitions with Greek etymologies. Respond in {target_language}:"
	}

	# Lexicon loader
	@st.cache_data
	def load_coptic_lexicon(file_path=None):
	"""Load Coptic lexicon from various formats including TEI XML"""
	if not file_path or not os.path.exists(file_path):
	return {}

	lexicon = {}

	try:
	# Handle XML format (TEI structure for Comprehensive Coptic Lexicon)
	if file_path.endswith('.xml'):
	tree = ET.parse(file_path)
	root = tree.getroot()

	# Handle TEI namespace
	ns = {'tei': 'http://www.tei-c.org/ns/1.0'}

	# Find entries in TEI format
	entries = root.findall('.//tei:entry', ns)

	for entry in entries[:100]: # Limit to first 100 entries for performance
	coptic_word = ""
	definition = ""

	# Extract Coptic headword from TEI structure
	form = entry.find('.//tei:form[@type="lemma"]', ns) or entry.find('.//tei:form', ns)
	if form is not None:
	orth = form.find('.//tei:orth', ns)
	if orth is not None and orth.text:
	coptic_word = orth.text.strip()

	# Extract definition from sense elements
	senses = entry.findall('.//tei:sense', ns)
	definitions = []
	for sense in senses[:2]: # Limit to first 2 senses
	def_elem = sense.find('.//tei:def', ns)
	if def_elem is not None and def_elem.text:
	definitions.append(def_elem.text.strip())

	if definitions:
	definition = "; ".join(definitions)

	# Clean and store
	if coptic_word and definition:
	# Clean Coptic word (preserve Coptic and Greek Unicode)
	coptic_word = re.sub(r'[^\u2C80-\u2CFF\u03B0-\u03FF\u1F00-\u1FFF\w\s\-]', '', coptic_word).strip()
	if coptic_word:
	lexicon[coptic_word] = definition[:200] # Limit definition length

	# Handle text formats
	else:
	with open(file_path, 'r', encoding='utf-8') as f:
	for line in f:
	line = line.strip()
	if not line:
	continue

	# Support multiple separators
	separator = None
	for sep in ['\t', '\|', ',', ';']:
	if sep in line:
	separator = sep
	break

	if separator:
	parts = line.split(separator, 1)
	if len(parts) >= 2:
	coptic_word = parts[0].strip()
	definition = parts[1].strip()
	lexicon[coptic_word] = definition

	except Exception as e:
	st.error(f"Error loading lexicon: {str(e)}")

	return lexicon

	# ========================================
	# COPTIC TRANSLATOR MODEL LOADING
	# ========================================
	# Load and cache Coptic translation models

	@st.cache_resource
	def load_coptic_to_english_model():
	"""Load Coptic → English translation model (Norelad/coptic-megalaa-finetuned)."""
	try:
	with st.spinner("📥 Loading Coptic→English model (first time only, ~600MB)..."):
	model_name = "Norelad/coptic-megalaa-finetuned"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

	# Move to GPU if available
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = model.to(device)

	st.success(f"✅ Coptic→English model loaded on {device.upper()}")
	return tokenizer, model, device
	except Exception as e:
	st.error(f"Failed to load Coptic→English model: {e}")
	return None, None, None

	@st.cache_resource
	def load_english_to_coptic_model():
	"""Load English → Coptic translation model (megalaa/english-coptic-translator)."""
	try:
	with st.spinner("📥 Loading English→Coptic model (first time only, ~600MB)..."):
	model_name = "megalaa/english-coptic-translator"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

	# Move to GPU if available
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = model.to(device)

	st.success(f"✅ English→Coptic model loaded on {device.upper()}")
	return tokenizer, model, device
	except Exception as e:
	st.error(f"Failed to load English→Coptic model: {e}")
	return None, None, None

	def translate_coptic_to_english(text, dialect='cop-sa'):
	"""Translate Coptic text to English using local Coptic translator.

	Args:
	text: Coptic text to translate
	dialect: Coptic dialect ('cop-sa' for Sahidic, 'cop-bo' for Bohairic, 'cop' defaults to Sahidic)
	"""
	tokenizer, model, device = load_coptic_to_english_model()

	if tokenizer is None or model is None:
	return "Error: Model not loaded. Please check your internet connection."

	try:
	# Dialect tags (required by the Norelad/coptic-megalaa-finetuned model)
	DIALECT_TAGS = {
	'cop-sa': 'з', # Sahidic (Cyrillic 'з')
	'cop-bo': 'б', # Bohairic (Cyrillic 'б')
	'cop': 'з' # Default to Sahidic for generic Coptic
	}

	dialect_tag = DIALECT_TAGS.get(dialect, 'з')

	# Preprocessing: Convert Coptic Unicode to Greek transcription and add dialect tag
	greek_input = greekify(text.lower())
	greek_input = f"{dialect_tag} {greek_input}"

	# Tokenize and generate
	inputs = tokenizer(greek_input, return_tensors="pt", padding=True).to(device)
	outputs = model.generate(
	**inputs,
	max_new_tokens=128,
	num_beams=5,
	early_stopping=True
	)

	# Decode translation
	translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return translation

	except Exception as e:
	return f"Translation error: {e}"

	def translate_english_to_coptic(text):
	"""Translate English text to Coptic using local Coptic translator."""
	tokenizer, model, device = load_english_to_coptic_model()

	if tokenizer is None or model is None:
	return "Error: Model not loaded. Please check your internet connection."

	try:
	# Tokenize and generate (input is already in English)
	inputs = tokenizer(text, return_tensors="pt", padding=True).to(device)
	outputs = model.generate(
	**inputs,
	max_new_tokens=128,
	num_beams=5,
	early_stopping=True
	)

	# Decode and postprocess: Convert Greek transcription to Coptic Unicode
	greek_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
	coptic_output = degreekify(greek_output)
	return coptic_output

	except Exception as e:
	return f"Translation error: {e}"

	# Language detection and UI
	LANGUAGES = {
	'en': 'English', 'es': 'Español', 'fr': 'Français', 'de': 'Deutsch',
	'zh': '中文', 'ja': '日本語', 'ar': 'العربية', 'hi': 'हिन्दी',
	'cop': 'Coptic (ⲘⲉⲧⲢⲉⲙ̀ⲛⲭⲏⲙⲓ)', 'cop-sa': 'Sahidic Coptic', 'cop-bo': 'Bohairic Coptic'
	}

	st.set_page_config(page_title="Apertus Chat", layout="wide")

	# Initialize variables (so they're accessible throughout the script)
	analysis_type = None
	target_lang = None
	target_language_name = "English"

	# Language selector
	selected_lang = st.selectbox("Language / Langue / Idioma",
	options=list(LANGUAGES.keys()),
	format_func=lambda x: LANGUAGES[x])

	# Sidebar for Coptic tools
	with st.sidebar:
	st.header("Coptic Tools")

	# Translation Model Selection
	st.subheader("🤖 Translation Model")
	st.info("✨ NEW: Using specialized Coptic translator models (free, no API token needed!)")
	st.markdown("Models: `Norelad/coptic-megalaa-finetuned` & `megalaa/english-coptic-translator`")

	# Optional: HuggingFace API Token for advanced features
	with st.expander("⚙️ Advanced: Use Apertus-8B (optional)"):
	st.caption("For multi-language translation beyond English-Coptic")
	hf_token_input = st.text_input(
	"HuggingFace API Token",
	type="password",
	help="Optional: For Apertus-8B multi-language support"
	)
	use_apertus = st.checkbox("Use Apertus-8B instead of local Coptic translator", value=False)
	if hf_token_input and use_apertus:
	st.success("✅ Apertus-8B enabled")
	elif not use_apertus:
	hf_token_input = None # Disable API usage

	st.divider()

	# Lexicon file uploader
	st.subheader("📚 Lexicon Upload")
	lexicon_file = st.file_uploader(
	"Upload Coptic Lexicon (optional)",
	type=['txt', 'tsv', 'csv', 'xml'],
	help="Supports: Text (TAB/pipe separated), XML (TEI format), CSV\nNote: Comprehensive lexicon is pre-loaded"
	)

	# Load lexicon
	if lexicon_file:
	try:
	# Check file size (max 20MB)
	file_size = len(lexicon_file.getvalue())
	if file_size > 20 * 1024 * 1024:
	st.error("❌ File too large (max 20MB)")
	coptic_lexicon = {}
	else:
	# Save uploaded file temporarily
	temp_path = f"temp_lexicon.{lexicon_file.name.split('.')[-1]}"
	with open(temp_path, "wb") as f:
	f.write(lexicon_file.getbuffer())

	coptic_lexicon = load_coptic_lexicon(temp_path)

	if coptic_lexicon:
	st.success(f"✅ Loaded {len(coptic_lexicon)} lexicon entries from {lexicon_file.name}")
	else:
	st.warning("⚠️ File uploaded but no valid entries found")
	coptic_lexicon = {}

	# Clean up temp file
	if os.path.exists(temp_path):
	os.remove(temp_path)
	except Exception as e:
	st.error(f"❌ Error loading file: {str(e)}")
	st.info("💡 Supported formats: Plain text (TAB/pipe separated), XML (TEI), CSV")
	coptic_lexicon = {}
	else:
	# Try to load the comprehensive lexicon if available
	comprehensive_lexicon_path = "Comprehensive_Coptic_Lexicon-v1.2-2020.xml"
	if os.path.exists(comprehensive_lexicon_path):
	coptic_lexicon = load_coptic_lexicon(comprehensive_lexicon_path)
	if coptic_lexicon:
	st.info(f"📚 Loaded Comprehensive Coptic Lexicon: {len(coptic_lexicon)} entries")
	else:
	coptic_lexicon = {}
	else:
	coptic_lexicon = {}

	# Coptic alphabet reference
	if st.expander("Coptic Alphabet"):
	for letter, name in COPTIC_ALPHABET.items():
	st.text(f"{letter} - {name}")

	# Lexicon search
	if coptic_lexicon:
	st.subheader("Lexicon Search")

	# Initialize session state for search term
	if "search_term" not in st.session_state:
	st.session_state.search_term = ""

	# Virtual Coptic keyboard
	st.write("Virtual Keyboard:")
	coptic_letters = ['ⲁ', 'ⲃ', 'ⲅ', 'ⲇ', 'ⲉ', 'ⲍ', 'ⲏ', 'ⲑ', 'ⲓ', 'ⲕ', 'ⲗ', 'ⲙ', 'ⲛ', 'ⲝ', 'ⲟ', 'ⲡ', 'ⲣ', 'ⲥ', 'ⲧ', 'ⲩ', 'ⲫ', 'ⲭ', 'ⲯ', 'ⲱ', 'ϣ', 'ϥ', 'ϧ', 'ϩ', 'ϫ', 'ϭ', 'ϯ']

	# Create keyboard layout in rows
	cols1 = st.columns(8)
	cols2 = st.columns(8)
	cols3 = st.columns(8)
	cols4 = st.columns(8)

	# Keyboard buttons - accumulate in session state
	for i, letter in enumerate(coptic_letters):
	col_idx = i % 8
	if i < 8:
	if cols1[col_idx].button(letter, key=f"key_{letter}"):
	st.session_state.search_term += letter
	st.rerun()
	elif i < 16:
	if cols2[col_idx].button(letter, key=f"key_{letter}"):
	st.session_state.search_term += letter
	st.rerun()
	elif i < 24:
	if cols3[col_idx].button(letter, key=f"key_{letter}"):
	st.session_state.search_term += letter
	st.rerun()
	else:
	if cols4[col_idx].button(letter, key=f"key_{letter}"):
	st.session_state.search_term += letter
	st.rerun()

	# Control buttons
	col_space, col_back, col_clear = st.columns(3)
	with col_space:
	if st.button("Space"):
	st.session_state.search_term += " "
	st.rerun()
	with col_back:
	if st.button("⌫ Backspace"):
	st.session_state.search_term = st.session_state.search_term[:-1]
	st.rerun()
	with col_clear:
	if st.button("Clear"):
	st.session_state.search_term = ""
	st.rerun()

	# Search input - directly use session state WITHOUT key parameter to avoid conflicts
	search_term = st.text_input("Search Coptic word:", value=st.session_state.search_term)

	# Update session state if user types directly
	if search_term != st.session_state.search_term:
	st.session_state.search_term = search_term

	if search_term:
	if search_term in coptic_lexicon:
	st.write(f"{search_term}")
	st.write(coptic_lexicon[search_term])
	else:
	# Partial matches
	matches = [k for k in coptic_lexicon.keys() if search_term in k]
	if matches:
	st.write("Partial matches:")
	for match in matches[:5]: # Show first 5 matches
	st.write(f"{match} → {coptic_lexicon[match][:100]}...")
	else:
	st.write("No matches found")

	# Test Corpus Examples
	if selected_lang in ['cop', 'cop-sa', 'cop-bo']:
	st.divider()
	st.subheader("📖 Example Texts")

	try:
	import json
	from pathlib import Path

	corpus_path = Path(__file__).parent / "coptic_test_corpus.json"
	if corpus_path.exists():
	with open(corpus_path, 'r', encoding='utf-8') as f:
	corpus = json.load(f)

	# Category selection
	categories = {
	"simple_sentences": "Simple Sentences",
	"complex_sentences": "Complex Sentences",
	"short_texts": "Short Texts (Paragraphs)",
	"grammar_patterns": "Grammar Patterns"
	}

	selected_category = st.selectbox(
	"Choose category:",
	options=list(categories.keys()),
	format_func=lambda x: categories[x],
	key="corpus_category"
	)

	if selected_category in corpus['categories']:
	category_data = corpus['categories'][selected_category]

	if selected_category == 'grammar_patterns':
	# Handle grammar patterns differently
	pattern_names = [p['pattern'] for p in category_data['patterns']]
	selected_pattern = st.selectbox("Select pattern:", pattern_names, key="pattern_select")

	pattern_data = next(p for p in category_data['patterns'] if p['pattern'] == selected_pattern)
	st.caption(f"Structure: {pattern_data['structure']}")

	example_texts = [f"{ex['coptic']} → {ex['english']}" for ex in pattern_data['examples']]
	selected_example_idx = st.selectbox(
	"Select example:",
	range(len(pattern_data['examples'])),
	format_func=lambda i: example_texts[i],
	key="pattern_example"
	)

	example = pattern_data['examples'][selected_example_idx]

	else:
	# Handle regular examples
	examples = category_data['examples']
	example_labels = []
	for ex in examples:
	label = ex.get('title', ex['coptic'][:30] + '...' if len(ex['coptic']) > 30 else ex['coptic'])
	example_labels.append(label)

	selected_example_idx = st.selectbox(
	"Select example:",
	range(len(examples)),
	format_func=lambda i: example_labels[i],
	key="example_select"
	)

	example = examples[selected_example_idx]

	# Display example details
	with st.expander("📝 View Example", expanded=True):
	st.markdown(f"Coptic:")
	st.code(example['coptic'], language="")
	st.markdown(f"English:")
	st.write(example['english'])

	if 'grammar_notes' in example:
	st.caption(f"Grammar: {example['grammar_notes']}")
	elif 'analysis' in example:
	st.caption(f"Analysis: {example['analysis']}")

	if 'source' in example:
	st.caption(f"Source: {example['source']}")

	# Load button
	if st.button("📥 Load This Example", key="load_example", use_container_width=True):
	st.session_state['example_text'] = example['coptic']
	st.success("✓ Example loaded! Scroll down to chat input.")
	st.rerun()

	except Exception as e:
	st.info("💡 Test corpus not available")

	# Linguistic analysis options for Coptic input
	if selected_lang in ['cop', 'cop-sa', 'cop-bo']:
	st.subheader("Analysis Type")
	analysis_type = st.selectbox("Choose analysis:",
	options=['dependency_parse', 'translation', 'parse_and_translate', 'dialect_analysis', 'transcription', 'morphology', 'lexicon_lookup'],
	format_func=lambda x: x.replace('_', ' ').title())

	# Target language selector for translation
	if analysis_type in ['translation', 'parse_and_translate']:
	st.subheader("Target Language")
	target_lang = st.selectbox("Translate to:",
	options=[k for k in LANGUAGES.keys() if k not in ['cop', 'cop-sa', 'cop-bo']],
	format_func=lambda x: LANGUAGES[x],
	index=0) # Default to English
	target_language_name = LANGUAGES[target_lang]
	else:
	# For non-translation tasks, use English as default output language
	target_language_name = "English"

	# Get prompts for the target language (only for LLM-based tasks)
	if analysis_type not in ['dependency_parse', 'parse_and_translate']:
	COPTIC_PROMPTS = get_coptic_prompts(target_language_name)

	# Use HuggingFace Inference API instead of loading model locally
	# This is much faster and doesn't require GPU
	MODEL_NAME = "swiss-ai/Apertus-8B-Instruct-2509"

	def get_inference_client(token=None):
	"""Initialize HuggingFace Inference API client with provided token"""
	try:
	if token:
	client = InferenceClient(token=token)
	return client
	else:
	# Try to get token from Space secrets as fallback
	if hasattr(st, 'secrets') and 'HF_TOKEN' in st.secrets:
	client = InferenceClient(token=st.secrets['HF_TOKEN'])
	return client
	else:
	return None
	except Exception as e:
	st.error(f"Error initializing inference client: {e}")
	return None

	# Initialize Coptic Dependency Parser
	@st.cache_resource
	def get_parser():
	"""Initialize and cache the Coptic parser"""
	try:
	parser = CopticParserCore()
	# Note: Don't pre-load here, load on demand to avoid startup delays
	# First use will trigger model download if needed
	return parser
	except Exception as e:
	st.error(f"Failed to initialize parser: {e}")
	return None

	# Chat interface
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Display chat history
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# Check if an example was loaded from the test corpus
	prompt = None
	if 'example_text' in st.session_state:
	prompt = st.session_state['example_text']
	del st.session_state['example_text'] # Clear after using

	# User input (or use loaded example)
	if not prompt:
	prompt = st.chat_input("Type your message...")

	if prompt:
	# Handle dependency parsing (doesn't need API token)
	if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and analysis_type == 'dependency_parse':
	st.session_state.messages.append({"role": "user", "content": prompt})

	with st.chat_message("user"):
	st.markdown(f"Parse this text: {prompt}")

	with st.chat_message("assistant"):
	with st.spinner("🔍 Parsing Coptic text..."):
	parser = get_parser()
	if parser:
	try:
	parse_result = parser.parse_text(prompt)

	if parse_result:
	# Display parse results
	st.success(f"✅ Parsed {parse_result['total_sentences']} sentence(s), {parse_result['total_tokens']} tokens")

	# Show formatted table
	table_output = parser.format_table(parse_result)
	st.markdown(table_output)

	# Display Prolog validation results if available
	if 'prolog_validation' in parse_result and parse_result['prolog_validation']:
	validation = parse_result['prolog_validation']

	st.divider()
	st.subheader("🔍 Prolog Validation (Walter Till Grammar)")

	# Show detected patterns
	if validation.get('patterns_detected'):
	st.success("✓ Grammatical Patterns Detected:")
	for pattern in validation['patterns_detected']:
	if isinstance(pattern, dict):
	if pattern.get('is_tripartite'):
	st.write(f"- Tripartite Sentence: {pattern.get('description', '')}")
	st.code(pattern.get('pattern', ''), language="")
	else:
	st.write(f"- {pattern}")
	else:
	st.write(f"- {pattern}")

	# Show warnings if any
	if validation.get('warnings'):
	st.warning("⚠ Grammatical Warnings:")
	for warning in validation['warnings']:
	st.write(f"- {warning}")

	# Show if no issues found
	if not validation.get('warnings') and not validation.get('patterns_detected'):
	st.info("✓ No grammatical issues detected")

	# Offer CoNLL-U download
	conllu_output = parser.format_conllu(parse_result)
	st.download_button(
	label="📥 Download CoNLL-U",
	data=conllu_output,
	file_name="coptic_parse.conllu",
	mime="text/plain"
	)

	response = f"Parse complete. {parse_result['total_sentences']} sentences analyzed."
	st.session_state.messages.append({"role": "assistant", "content": response})
	else:
	st.error("Failed to parse text. Please check the input.")
	except Exception as e:
	st.error(f"Parsing error: {e}")
	else:
	st.error("Parser not available. Please check Stanza installation.")

	st.stop() # Don't continue to translation

	# Initialize inference client if API token is provided (optional for local translator)
	inference_client = None
	if hf_token_input:
	inference_client = get_inference_client(hf_token_input)

	# Handle parse_and_translate mode
	if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and analysis_type == 'parse_and_translate':
	st.session_state.messages.append({"role": "user", "content": prompt})

	with st.chat_message("user"):
	st.markdown(f"Parse and translate: {prompt}")

	with st.chat_message("assistant"):
	# First, parse
	st.subheader("📊 Dependency Analysis")
	with st.spinner("🔍 Parsing..."):
	parser = get_parser()
	if parser:
	parse_result = parser.parse_text(prompt)
	if parse_result:
	table_output = parser.format_table(parse_result)
	st.markdown(table_output)

	# Then, translate
	st.divider()
	st.subheader(f"🌍 Translation to {LANGUAGES[target_lang]}")

	with st.spinner("🤖 Translating with local Coptic translator..."):
	try:
	# Use local Coptic translator for Coptic→English translation
	if target_lang == 'en':
	translation = translate_coptic_to_english(prompt, dialect=selected_lang)
	st.markdown(translation)

	combined_response = f"Parse complete. Translation: {translation}"
	st.session_state.messages.append({"role": "assistant", "content": combined_response})
	else:
	# For non-English targets, need Apertus or show message
	if inference_client and hf_token_input:
	COPTIC_PROMPTS_TRANSLATE = get_coptic_prompts(target_language_name)
	translate_prompt = f"{COPTIC_PROMPTS_TRANSLATE['translation']} {prompt}"

	messages = [
	{"role": "system", "content": "You are a professional Coptic-to-modern-language translator. Provide only direct translations without explanations, commentary, or repeating the source text."},
	{"role": "user", "content": translate_prompt}
	]

	response_stream = inference_client.chat_completion(
	model=MODEL_NAME,
	messages=messages,
	max_tokens=512,
	temperature=0.5,
	top_p=0.9,
	stream=True
	)

	# Stream the translation
	response_placeholder = st.empty()
	full_response = ""

	for message in response_stream:
	if message.choices[0].delta.content:
	full_response += message.choices[0].delta.content
	response_placeholder.markdown(full_response + "▌")

	response_placeholder.markdown(full_response)

	combined_response = f"Parse complete. Translation: {full_response}"
	st.session_state.messages.append({"role": "assistant", "content": combined_response})
	else:
	st.warning(f"⚠️ Translation to {target_language_name} requires Apertus-8B. Please enable it in the sidebar.")
	st.info("💡 Local Coptic translator currently supports English↔Coptic only.")

	except Exception as e:
	st.error(f"❌ Translation error: {e}")

	st.stop() # Special handling complete

	# Standard translation/analysis handling
	if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and analysis_type is not None:
	# For translation, use raw text without prompt template
	if analysis_type == 'translation':
	full_prompt = prompt
	else:
	full_prompt = f"{COPTIC_PROMPTS[analysis_type]} {prompt}"

	# Add lexicon context for lexicon lookup
	if analysis_type == 'lexicon_lookup' and coptic_lexicon:
	words_in_prompt = prompt.split()
	lexicon_matches = []
	for word in words_in_prompt:
	if word in coptic_lexicon:
	lexicon_matches.append(f"{word} = {coptic_lexicon[word]}")

	if lexicon_matches:
	full_prompt += f"\n\nLexicon entries found: {'; '.join(lexicon_matches)}"
	else:
	full_prompt = prompt

	st.session_state.messages.append({"role": "user", "content": prompt})

	with st.chat_message("user"):
	st.markdown(prompt)

	# Generate response using local Coptic translator or Apertus API
	with st.chat_message("assistant"):
	try:
	# Check if this is a Coptic→English translation task
	if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and analysis_type == 'translation':
	# Use local Coptic translator (Norelad/coptic-megalaa-finetuned)
	if target_lang == 'en':
	with st.spinner("🤖 Translating with local Coptic translator..."):
	translation = translate_coptic_to_english(prompt, dialect=selected_lang)
	st.markdown(translation)
	st.session_state.messages.append({"role": "assistant", "content": translation})
	else:
	# Non-English target: requires Apertus
	if inference_client and hf_token_input:
	with st.spinner("🤖 Translating with Apertus-8B..."):
	messages = [
	{"role": "system", "content": "You are a professional Coptic-to-modern-language translator. Provide only direct translations without explanations, commentary, or repeating the source text."},
	{"role": "user", "content": full_prompt}
	]

	response_stream = inference_client.chat_completion(
	model=MODEL_NAME,
	messages=messages,
	max_tokens=512,
	temperature=0.5,
	top_p=0.9,
	stream=True
	)

	response_placeholder = st.empty()
	full_response = ""

	for message in response_stream:
	if message.choices[0].delta.content:
	full_response += message.choices[0].delta.content
	response_placeholder.markdown(full_response + "▌")

	response_placeholder.markdown(full_response)
	st.session_state.messages.append({"role": "assistant", "content": full_response})
	else:
	st.warning(f"⚠️ Translation to {target_language_name} requires Apertus-8B.")
	st.info("💡 Enable Apertus-8B in the sidebar for multi-language support.")
	st.info("💡 Local Coptic translator currently supports English↔Coptic only.")

	# For non-translation tasks or other languages
	else:
	if inference_client and hf_token_input:
	with st.spinner("🤖 Generating response..."):
	messages = [{"role": "user", "content": full_prompt}]

	response_stream = inference_client.chat_completion(
	model=MODEL_NAME,
	messages=messages,
	max_tokens=512,
	temperature=0.5,
	top_p=0.9,
	stream=True
	)

	response_placeholder = st.empty()
	full_response = ""

	for message in response_stream:
	if message.choices[0].delta.content:
	full_response += message.choices[0].delta.content
	response_placeholder.markdown(full_response + "▌")

	response_placeholder.markdown(full_response)
	st.session_state.messages.append({"role": "assistant", "content": full_response})
	else:
	st.warning("⚠️ This feature requires Apertus-8B. Please enable it in the sidebar.")
	st.info("💡 Coptic→English translation works without API token using local Coptic translator.")

	except Exception as e:
	st.error(f"❌ Error: {str(e)}")
	st.info("💡 If using Apertus-8B, please verify your API token is valid.")