import gradio as gr import requests import json import os API_TOKEN = os.getenv("HF_API_TOKEN") TRANSCRIBE_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-base.en" LLM_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1" def transcribe_audio(audio_file): """Transcribe audio file to text.""" headers = {"Authorization": f"Bearer {API_TOKEN}"} with open(audio_file, "rb") as f: data = f.read() response = requests.post(TRANSCRIBE_API_URL, headers=headers, data=data) transcription = json.loads(response.content.decode("utf-8")).get("text", "Transcription not available") return transcription def get_answer(context, question): """Get an answer from the LLM based on the context and question.""" prompt = ( "As an intelligent coding assistant, your task is to provide clear, concise, and accurate answers to coding-related questions. " "Below are examples of questions and the kind of direct answers expected:\n\n" "Example Question 1: How can I remove duplicates from a list in Python?\n" "Example Answer 1: Use the set() function to convert the list to a set, which removes duplicates, then convert it back to a list.\n\n" "Example Question 2: What's the difference between '==' and '===' in JavaScript?\n" "Example Answer 2: '==' checks for equality of values after type coercion, while '===' checks for both value and type equality without coercion.\n\n" "Example Question 3: How to check if a key exists in a dictionary in Python?\n" "Example Answer 3: Use the 'in' keyword, like 'if key in my_dict:'.\n\n" "Based on the above examples, answer the following question:\n\n" f"Question: {question}\n" "Answer:" ) headers = {"Authorization": f"Bearer {API_TOKEN}"} # Adjust generation parameters for more focused and relevant responses payload = { "inputs": prompt, "parameters": { "temperature": 0.3, # More deterministic "top_p": 0.95, # Consider top 90% probable tokens at each step "repetition_penalty": 1.2, # Discourage repetition "num_return_sequences": 1, # Number of responses to generate "return_full_text": False, # Return only generated text, not the full prompt "top_k" : 50, "truncate" : 24576, "max_new_tokens" : 8192, "stop" : [""] }, "options": { "use_cache": True # Use cached responses when available } } response = requests.post(LLM_API_URL, headers=headers, json=payload) answer = json.loads(response.content.decode("utf-8"))[0].get("generated_text", "Answer not available") return answer def transcribe_and_answer(audio_file, question): """Process the audio file for transcription and use the result to get an answer to a question.""" transcription = transcribe_audio(audio_file) answer = get_answer(transcription, question) return transcription, answer # Create the Gradio app import gradio as gr # Create the Gradio app with gr.Blocks() as app: gr.HTML("""

TALKING DUCK

An Audio to Text Q&A Chatbot

Your swift coding sidekick. Speak your code queries, and let the duck do the magic.

""") gr.Markdown("""

Models running on backend

mistralai/Mixtral-8x7B-Instruct-v0.1

Model Page

openai/whisper-base.en

Model Page

""") with gr.Row(): audio_input = gr.Audio(type="filepath", label="Upload your audio question") question_input = gr.Textbox(label="Type your question here") answer_button = gr.Button("Get Answer") with gr.Row(): transcription_output = gr.Textbox(label="Transcription") answer_output = gr.Textbox(label="Answer") answer_button.click(transcribe_and_answer, inputs=[audio_input, question_input], outputs=[transcription_output, answer_output]) if __name__ == "__main__": app.launch(server_name="0.0.0.0")