import os
import json
import gradio as gr
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import logging
import traceback
import sys
from audio_processing import AudioProcessor
import spaces 
from chunkedTranscriber import ChunkedTranscriber
from system_message import SYSTEM_MESSAGE


logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)]
)
logger = logging.getLogger(__name__)


def load_qa_model():
    """Load question-answering model with long context support."""
    try:
        from transformers import AutoModelForCausalLM, AwqConfig
        
        model_id = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
        
        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=os.getenv("HF_TOKEN"))
        
        quantization_config = AwqConfig(
            bits=4,
            fuse_max_seq_len=8192,   # Configure tokenizer for long inputs
            do_fuse=True,
        )

        # Load the model with simplified rope_scaling configuration
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.bfloat16,
            low_cpu_mem_usage=True,
            device_map="auto",
            rope_scaling={
                "type": "dynamic",  # Simplified type as expected by the model
                "factor": 8.0       # Scaling factor to support longer contexts
            },
            use_auth_token=os.getenv("HF_TOKEN"),
            quantization_config=quantization_config
        )
        
        # Initialize the pipeline
        qa_pipeline = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=1024,  # Limit generation as needed
        )
        
        return qa_pipeline

    except Exception as e:
        logger.error(f"Failed to load Q&A model: {str(e)}")
        return None
        
# def load_qa_model():
#     """Load question-answering model"""
#     try:
#         model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
#         qa_pipeline = pipeline(
#             "text-generation",
#             model="hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
#             model_kwargs={"torch_dtype": torch.bfloat16},
#             device_map="auto",
#             use_auth_token=os.getenv("HF_TOKEN")
#         )
#         return qa_pipeline
#     except Exception as e:
#         logger.error(f"Failed to load Q&A model: {str(e)}")
#         return None

def load_summarization_model():
    """Load summarization model"""
    try:
        summarizer = pipeline(
            "summarization", 
            model="sshleifer/distilbart-cnn-12-6",
            device=0 if torch.cuda.is_available() else -1
        )
        return summarizer
    except Exception as e:
        logger.error(f"Failed to load summarization model: {str(e)}")
        return None


@spaces.GPU(duration=180)
def process_audio(audio_file, translate=False):
    """Process audio file"""
    transcriber = ChunkedTranscriber(chunk_size=5, overlap=1)
    _translation, _output = transcriber.transcribe_audio(audio_file, translate=True)
    return _translation, _output
    # try:
    #     processor = AudioProcessor()
    #     language_segments, final_segments = processor.process_audio(audio_file, translate)
        
    #     # Format output
    #     transcription = ""
    #     full_text = ""
        
    #     # Add language detection information
    #     for segment in language_segments:
    #         transcription += f"Language: {segment['language']}\n"
    #         transcription += f"Time: {segment['start']:.2f}s - {segment['end']:.2f}s\n\n"
        
    #     # Add transcription/translation information
    #     transcription += "Transcription with language detection:\n\n"
    #     for segment in final_segments:
    #         transcription += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] ({segment['language']}):\n"
    #         transcription += f"Original: {segment['text']}\n"
    #         if translate and 'translated' in segment:
    #             transcription += f"Translated: {segment['translated']}\n"
    #             full_text += segment['translated'] + " "
    #         else:
    #             full_text += segment['text'] + " "
    #         transcription += "\n"
    #     return transcription, full_text
    # except Exception as e:
    #     logger.error(f"Audio processing failed: {str(e)}")
    #     raise gr.Error(f"Processing failed: {str(e)}")


# @spaces.GPU(duration=180)
# def summarize_text(text):
#     """Summarize text"""
#     try:
        
#         summarizer = load_summarization_model()
        
#         if summarizer is None:
#             return "Summarization model could not be loaded."
#         logger.info("Successfully loaded summarization Model")
#         # logger.info(f"\n\n {text}\n")

#         summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
#         return summary
#     except Exception as e:
#         logger.error(f"Summarization failed: {str(e)}")
#         return "Error occurred during summarization."


@spaces.GPU(duration=180)
def answer_question(context, question):
    """Answer questions about the text"""
    try:
        qa_pipeline = load_qa_model()
        if qa_pipeline is None:
            return "Q&A model could not be loaded."
        if not question : 
            return "Please enter your Question"

        messages = [
            # {"role": "system", "content": "You are a helpful assistant who can answer questions based on the given context."},
            {"role":"system", "content": SYSTEM_MESSAGE},
            {"role": "user", "content": f"Context: {context}\n Question: {question}"}
        ]
        response = qa_pipeline(messages, max_new_tokens=256)[0]['generated_text']
        logger.info(response)
        return response[-1]['content']
    except Exception as e:
        logger.error(f"Q&A failed: {str(e)}")
        return f"Error occurred during Q&A process: {str(e)}"


# Create Gradio interface
with gr.Blocks() as iface:
    gr.Markdown("# Automatic Speech Recognition for Indic Languages")
    
    with gr.Row():
        with gr.Column():
            audio_input = gr.Audio(type="filepath")
            translate_checkbox = gr.Checkbox(label="Enable Translation")
            process_button = gr.Button("Process Audio")
        
        with gr.Column():
            # ASR_RESULT = gr.Textbox(label="Output")
            full_text_output = gr.Textbox(label="Full Text", lines=5)
            translation_output = gr.Textbox(label="Transcription/Translation", lines=10)
    
    with gr.Row():
        # with gr.Column():
        #     summarize_button = gr.Button("Summarize")
        #     summary_output = gr.Textbox(label="Summary", lines=3)
          
        with gr.Column():
            question_input = gr.Textbox(label="Ask a question about the transcription")
            answer_button = gr.Button("Get Answer")
            answer_output = gr.Textbox(label="Answer", lines=3)
    
    # Set up event handlers
    process_button.click(
        process_audio,
        inputs=[audio_input, translate_checkbox],
        outputs=[translation_output, full_text_output]
        # outputs=[ASR_RESULT]
    )
    # translated_text = ''.join(item['translated'] for item in ASR_RESULT if 'translated' in item)
    # summarize_button.click(
    #     summarize_text,
    #     # inputs=[ASR_RESULT],
    #     inputs=[translation_output],
    #     outputs=[summary_output]
    # )
    
    answer_button.click(
        answer_question,
        inputs=[full_text_output, question_input],
        outputs=[answer_output]
    )
    
    # Add system information
    gr.Markdown(f"""
    ## System Information
    - Device: {"CUDA" if torch.cuda.is_available() else "CPU"}
    - CUDA Available: {"Yes" if torch.cuda.is_available() else "No"}
    
    ## Features
    - Automatic language detection
    - High-quality transcription using MMS
    - Optional translation to English
    - Text summarization
    - Question answering
    """)

if __name__ == "__main__":
    iface.launch(server_port=None)