import streamlit as st
from streamlit_mic_recorder import mic_recorder
#import whisper
import tempfile
import os
from pydub import AudioSegment
from faster_whisper import WhisperModel
# Load whisper model
@st.cache_resource
def load_model():
    #return whisper.load_model("small")
    return WhisperModel("large-v3", device="auto", compute_type="int8")

whisper_model = load_model()

st.title("🗣 中文語音識別 (Whisper + Mic Recorder)")

mode = st.radio("選擇輸入方式", ["🎤 使用麥克風錄音", "📁 上傳本地音檔","✍️ 手動文字輸入"], horizontal=True)

def prompt_switch(index, input_text):
    print("prompt_switch")
    prompts = {
        "Qwen/Qwen2.5-7B-Instruct-Turbo": """
            You are an assistant for intent classification.
            Your task is to classify a given user input into one of the following two categories:
            1."Reservation": user input is related to or imply a a restaurant reservation.
            2."unrelated": user input is anything else.
            Your response should be in JSON format either 
            {{"result": "Reservation"}} or {{"result": "Others"}}.
            If the user input is related to restaurant reservation, return {{"result": "Reservation"}};
            If the user input is anything else, return {{"result": "Others"}}.
            
            Here is the user input: {input}
            """.strip(),
        "Qwen/Qwen2.5-Coder-32B-Instruct": """
                You are an assistant for intent classification.
                Your task is to classify a given user input into one of the following two categories:
                "Reservation": user input is related to or imply a a restaurant reservation.
                "unrelated": user input is anything else.
                Your response should be in JSON format either {{"result": "Reservation"}} or {{"result": "Others"}}.
                Here is the user input: {input}
                """.strip(),
        "google/gemma-2b-it": """
            You are an assistant for intent classification.
            Your task is to classify a given user input into one of the following two categories:
            1."Reservation": user input is related to or imply a a restaurant reservation.
            2."unrelated": user input is anything else.
            Your response should be in JSON format either 
            {{"result": "Reservation"}} or {{"result": "Others"}}.
            If the user input is related to restaurant reservation, return {{"result": "Reservation"}};
            If the user input is anything else, return {{"result": "Others"}}.
            
            Here is the user input: {input}
            """.strip(),
        "google/gemma-2-9b-it": """
            You are an assistant for intent classification.
            Your task is to classify a given user input into one of the following two categories:
            1."Reservation": user input is related to or imply a a restaurant reservation.
            2."unrelated": user input is anything else.
            Your response should be in JSON format either 
            {{"result": "Reservation"}} or {{"result": "Others"}}.
            If the user input is related to restaurant reservation, return {{"result": "Reservation"}};
            If the user input is anything else, return {{"result": "Others"}}.
            
            Here is the user input: {input}
            """.strip(),
        "google/gemma-2-27b-it": """
            You are an assistant for intent classification.
            Your task is to classify a given user input into one of the following two categories:
            1."Reservation": user input is related to or imply a a restaurant reservation.
            2."unrelated": user input is anything else.
            Your response should be in JSON format either 
            {{"result": "Reservation"}} or {{"result": "Others"}}.
            If the user input is related to restaurant reservation, return {{"result": "Reservation"}};
            If the user input is anything else, return {{"result": "Others"}}.
            
            Here is the user input: {input}
            """.strip()
        }
    prompt = prompts[index].format(input=input_text)
    
    return prompt

model_option = st.selectbox(
    "你要選擇哪一個模型?",
    ("Qwen/Qwen2.5-7B-Instruct-Turbo", "Qwen/Qwen2.5-Coder-32B-Instruct","google/gemma-2b-it", "google/gemma-2-9b-it", "google/gemma-2-27b-it")
)

st.write("你選擇的模型:", model_option)
# Record audio from browser
# audio_data = mic_recorder(start_prompt="🎤 點擊開始錄音", stop_prompt="⏹️ 停止錄音", just_once=True, use_container_width=True)

import os
import outlines
import os, termcolor
from termcolor import cprint, colored
from outlines.models import openai
#from outlines.generate import choice

# '''
    # prompt_messages_q32b = f"""
            # You are an assistant for intent classification.
            # Your task is to classify a given user input into one of the following two categories:
            # "Reservation": user input is related to or imply a a restaurant reservation.
            # "unrelated": user input is anything else.
            # Your response should be in JSON format either {{"result": "Reservation"}} or {{"result": "Others"}}.
            # Here is the user input: {input}
            # """.strip()
    # prompt_messages_q7b = f"""
            # You are an assistant for intent classification.
            # Your task is to classify a given user input into one of the following two categories:
            # 1."Reservation": user input is related to or imply a a restaurant reservation.
            # 2."unrelated": user input is anything else.
            # Your response should be in JSON format either 
            # {{"result": "Reservation"}} or {{"result": "Others"}}.
            # If the user input is related to restaurant reservation, return {{"result": "Reservation"}};
            # If the user input is anything else, return {{"result": "Others"}}.
            
            # Here is the user input: {input}
            # """.strip()
# '''


def clssification( input):
    st.write("🧠 LLM辨識意圖中..")
    labels = ["Reservation", "unrelated"]
    
    model = openai(
        #"Qwen/Qwen2.5-Coder-32B-Instruct",
        #"Qwen/Qwen2.5-7B-Instruct-Turbo",
        #"google/gemma-2b-it",
        model_option,
        api_key=os.environ["TOGETHER_API_KEY"],
        base_url="https://api.together.xyz/v1"
    )
    
    
    generator = outlines.generate.choice(model, labels)
    
    prompt_message = prompt_switch(model_option, input)
    st.write(prompt_message)  
    answer = generator(prompt_message)
    
    
    return answer

def convert_audio_to_wav(audio_bytes, target_sample_rate=16000):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_input:
        temp_input.write(audio_bytes)
        temp_input_path = temp_input.name

    audio = AudioSegment.from_file(temp_input_path)
    audio = audio.set_channels(1).set_frame_rate(target_sample_rate)

    converted_path = temp_input_path.replace(".wav", "_converted.wav")
    audio.export(converted_path, format="wav")
    os.remove(temp_input_path)
    return converted_path
    
def transcribe_audio(wav_path):
    st.write("🧠 Whisper 正在識別語音..")
    #result = whisper_model.transcribe(wav_path, language="zh")
    #st.text_area("📜 轉寫結果", result["text"], height=200)
    segments, info = whisper_model.transcribe(wav_path, language="zh")
    result_text = "".join([seg.text for seg in segments])
    st.text_area("📜 轉寫結果", result_text, height=200)
    os.remove(wav_path)
    intent_classification(result_text)
    
def intent_classification(input_text):
    st.write("🧠 意圖識別 ")
    intent=clssification(input_text)
    st.write(intent)
    
# --- Mode: Microphone ---
if mode == "🎤 使用麥克風錄音":
    audio_data = mic_recorder(start_prompt="🎤 點擊開始錄音", stop_prompt="⏹️ 停止錄音", just_once=True, use_container_width=True)

    if audio_data:
        st.audio(audio_data["bytes"], format="audio/wav")
        wav_path = convert_audio_to_wav(audio_data["bytes"])
        transcribe_audio(wav_path)

# --- Mode: File Upload ---
elif mode == "📁 上傳本地音檔":
    uploaded_file = st.file_uploader("上傳音頻文件 (支持 wav, mp3, m4a 等)", type=["wav", "mp3", "m4a", "ogg", "flac"])

    if uploaded_file is not None:
        st.audio(uploaded_file, format="audio/wav")
        wav_path = convert_audio_to_wav(uploaded_file.read())
        transcribe_audio(wav_path)
elif mode == "✍️ 手動文字輸入":
    manual_text = st.text_area("請輸入文字", height=200, key="manual_input")
    if st.button("確認輸入"):
        st.success("✅ 已接收輸入內容！")
        st.text_area("📜 輸入內容", manual_text, height=200, key="manual_output")
        intent_classification(manual_text)
    
#==============