import gradio as gr
import torch
from PIL import Image
from transformers import AutoModel, AutoTokenizer
import warnings
import os
import spaces

# 禁用警告信息
warnings.filterwarnings("ignore")

# 全局变量存储模型
model = None
tokenizer = None

@spaces.GPU
def load_model():
    """加载MiniCPM-o模型"""
    global model, tokenizer
    if model is None:
        print("正在加载MiniCPM-o模型...")
        device = "cuda" if torch.cuda.is_available() else "cpu"
        model = AutoModel.from_pretrained(
            'openbmb/MiniCPM-o-2_6',
            trust_remote_code=True,
            torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32,
            device_map="auto" if device == "cuda" else None,
            init_vision=True,
            init_audio=False,
            init_tts=False
        )
        model = model.eval().to(device)
        tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-o-2_6', trust_remote_code=True)
        print("模型加载完成")
    return model, tokenizer

def clean_markdown_output(text):
    """清理输出文本，只保留markdown表格"""
    lines = text.strip().split('\n')
    markdown_lines = []
    
    # 查找markdown表格的开始和结束
    in_table = False
    for line in lines:
        line = line.strip()
        # 检查是否是表格行（包含|符号）
        if '|' in line and not line.startswith('```'):
            in_table = True
            markdown_lines.append(line)
        elif in_table and line == '':
            # 空行可能表示表格结束
            break
        elif in_table and not line.startswith('```'):
            # 继续收集表格相关行
            markdown_lines.append(line)
    
    # 如果没有找到表格，返回原始清理后的文本
    if not markdown_lines:
        # 移除代码块标记和多余的说明文字
        cleaned_text = text.replace('```markdown', '').replace('```', '').strip()
        # 移除常见的解释性文字
        lines = cleaned_text.split('\n')
        result_lines = []
        for line in lines:
            line = line.strip()
            if line and not line.startswith('这个表格') and not line.startswith('该表格') and not line.startswith('表格显示'):
                result_lines.append(line)
        return '\n'.join(result_lines)
    
    return '\n'.join(markdown_lines)

def clean_formula_output(text):
    """清理输出文本，只保留LaTeX公式"""
    lines = text.strip().split('\n')
    formula_lines = []
    
    for line in lines:
        line = line.strip()
        # 跳过解释性文字
        if line and not any(line.startswith(prefix) for prefix in [
            '这个公式', '该公式', '公式表示', '根据图片', '图片中的', '识别结果'
        ]):
            # 保留包含LaTeX语法的行
            if any(symbol in line for symbol in ['$', '\\', '{', '}', '^', '_']) or '=' in line:
                formula_lines.append(line)
            # 或者保留纯数学表达式
            elif any(char.isdigit() or char in '+-*/=()[]{}^_' for char in line):
                formula_lines.append(line)
    
    # 如果没有找到公式，返回原始清理后的文本
    if not formula_lines:
        cleaned_text = text.replace('```latex', '').replace('```', '').strip()
        lines = cleaned_text.split('\n')
        result_lines = []
        for line in lines:
            line = line.strip()
            if line and not any(line.startswith(prefix) for prefix in [
                '这个公式', '该公式', '公式表示', '根据图片', '图片中的'
            ]):
                result_lines.append(line)
        return '\n'.join(result_lines)
    
    return '\n'.join(formula_lines)

def clean_text_output(text):
    """清理输出文本，只保留识别的文字内容"""
    # 移除代码块标记
    cleaned_text = text.replace('```text', '').replace('```', '').strip()
    lines = cleaned_text.split('\n')
    
    text_lines = []
    for line in lines:
        line = line.strip()
        # 跳过解释性文字和标签信息
        if line and not any(line.startswith(prefix) for prefix in [
            '图片中的文字', '识别结果', '文字内容', '根据图片', '这张图片', '该图片',
            '标题:', '正文:', '内容:', '文本:', '题目:', '段落:', '文字:'
        ]):
            # 移除行首的标签格式（如 "标题：内容" -> "内容"）
            if '：' in line:
                # 检查是否是标签格式
                parts = line.split('：', 1)
                if len(parts) == 2 and len(parts[0]) <= 10:  # 标签通常很短
                    # 可能的标签词
                    label_keywords = ['标题', '正文', '内容', '文本', '题目', '段落', '文字', '主题', '副标题']
                    if any(keyword in parts[0] for keyword in label_keywords):
                        # 只保留标签后的内容
                        text_lines.append(parts[1].strip())
                    else:
                        # 不是标签格式，保留整行
                        text_lines.append(line)
                else:
                    text_lines.append(line)
            else:
                text_lines.append(line)
    
    return '\n'.join(text_lines)

@spaces.GPU
def parse_image(image, parse_type):
    """解析图片内容为指定格式"""
    try:
        # 确保模型已加载
        model, tokenizer = load_model()
        
        if image is None:
            return "请上传一张图片", ""
        
        # 转换图片格式
        if isinstance(image, str):
            image = Image.open(image).convert('RGB')
        elif hasattr(image, 'convert'):
            image = image.convert('RGB')
        
        # 根据解析类型设置不同的提示词
        questions = {
            "表格解析": "解析一下这个表格为markdown格式,不需要任何解释和思考,直接输出markdown格式",
            "公式解析": "识别并提取图片中的数学公式，用LaTeX格式输出，不需要任何解释，直接输出公式",
            "文本解析": "识别并提取图片中的所有文字内容，保持原有格式，不需要任何解释，直接输出文字内容"
        }
        
        question = questions.get(parse_type, questions["表格解析"])
        msgs = [{'role': 'user', 'content': [image, question]}]
        
        # 使用流式输出获取结果
        res = model.chat(
            msgs=msgs,
            tokenizer=tokenizer,
            sampling=True,
            stream=True
        )
        
        # 收集所有输出文本
        generated_text = ""
        for new_text in res:
            generated_text += new_text
        
        # 根据类型清理输出
        if parse_type == "表格解析":
            result = clean_markdown_output(generated_text)
            output_format = "Markdown"
        elif parse_type == "公式解析":
            result = clean_formula_output(generated_text)
            output_format = "LaTeX"
        elif parse_type == "文本解析":
            result = clean_text_output(generated_text)
            output_format = "纯文本"
        else:
            result = generated_text.strip()
            output_format = "原始输出"
        
        return result, f"解析完成 - 输出格式: {output_format}"
        
    except Exception as e:
        return f"解析失败: {str(e)}", "错误"

def create_interface():
    """创建Gradio界面"""
    
    # 自定义CSS样式
    css = """
    .gradio-container {
        font-family: 'Helvetica Neue', Arial, sans-serif;
    }
    .output-text {
        font-family: 'Courier New', monospace;
        font-size: 14px;
    }
    """
    
    with gr.Blocks(css=css, title="MiniCPM 多模态内容解析工具", analytics_enabled=False) as interface:
        gr.Markdown("""
        # 🚀 MiniCPM 多模态内容解析工具
        
        基于MiniCPM-o多模态模型的智能图片内容解析工具，支持表格、公式、文本三种解析模式。
        
        ## 📋 使用说明
        1. **上传图片**: 支持 PNG、JPG、JPEG 等格式
        2. **选择解析类型**: 根据图片内容选择相应的解析模式
        3. **获取结果**: 自动清理输出，获得纯净的解析结果
        
        ## 🎯 解析类型说明
        - **📊 表格解析**: 将表格图片转换为Markdown格式
        - **🧮 公式解析**: 识别数学公式并输出LaTeX格式  
        - **📝 文本解析**: 提取图片中的所有文字内容
        """)
        
        with gr.Row():
            with gr.Column(scale=1):
                # 输入组件
                image_input = gr.Image(
                    label="📷 上传图片",
                    type="pil",
                    height=400
                )
                
                parse_type = gr.Radio(
                    choices=["表格解析", "公式解析", "文本解析"],
                    value="表格解析",
                    label="🎛️ 选择解析类型",
                    info="根据图片内容选择合适的解析模式"
                )
                
                parse_button = gr.Button(
                    "🔍 开始解析",
                    variant="primary",
                    size="lg"
                )
                
            with gr.Column(scale=1):
                # 输出组件
                status_output = gr.Textbox(
                    label="📊 解析状态",
                    value="等待上传图片...",
                    interactive=False
                )
                
                result_output = gr.Textbox(
                    label="📄 解析结果",
                    lines=20,
                    max_lines=30,
                    show_copy_button=True,
                    elem_classes=["output-text"],
                    placeholder="解析结果将在这里显示...",
                    interactive=True
                )
        
        # 示例图片
        gr.Markdown("## 📖 示例图片")
        with gr.Row():
            gr.Examples(
                examples=[
                    ["./table.png", "表格解析"],
                    ["./formulas.png", "公式解析"], 
                    ["./text.png", "文本解析"]
                ],
                inputs=[image_input, parse_type],
                label="点击示例快速体验",
                cache_examples=False
            )
        
        # 绑定事件
        parse_button.click(
            fn=parse_image,
            inputs=[image_input, parse_type],
            outputs=[result_output, status_output]
        )
        
        # 添加页脚信息
        gr.Markdown("""
        ---
        ### 💡 使用提示
        - 确保图片清晰，内容结构明显
        - 复杂表格建议分段处理
        - 公式图片建议使用高分辨率
        - 文字图片避免模糊、倾斜或光线不足
        
        ### 🔧 技术支持
        - 模型: MiniCPM-o-2.6
        - 框架: Gradio + Transformers
        - GPU: CUDA加速推理
        """)
    
    return interface

if __name__ == "__main__":
    # 在ZeroGPU环境中不预加载模型，按需加载以节省资源
    print("🚀 启动MiniCPM多模态内容解析工具")
    print("📝 模型将在首次使用时自动加载")
    
    # 创建并启动界面
    interface = create_interface()
    interface.launch(
        server_name="0.0.0.0",  # 允许外部访问
        server_port=7860,       # Hugging Face Spaces默认端口
        share=False,            # 在Hugging Face上部署时设为False
        show_error=True,        # 显示详细错误信息
        quiet=False,            # 显示启动信息
        debug=False,            # 关闭调试模式
    )