import gradio as gr import requests from fpdf import FPDF import nltk import os import tempfile from nltk.tokenize import sent_tokenize import random from groq import Groq api_key = os.environ.get("GROQ_API_KEY") # Attempt to download punkt tokenizer try: nltk.download("punkt") except: print("NLTK punkt tokenizer download failed. Using custom tokenizer.") def custom_sent_tokenize(text): return text.split(". ") def transcribe(audio_path): with open(audio_path, "rb") as audio_file: audio_data = audio_file.read() groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions" headers = { "Authorization": f"Bearer {api_key}", # Fix: api_key is used properly } files = { 'file': ('audio.wav', audio_data, 'audio/wav'), } data = { 'model': 'whisper-large-v3-turbo', 'response_format': 'json', 'language': 'en', } response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data) if response.status_code == 200: result = response.json() transcript = result.get("text", "No transcription available.") return generate_notes(transcript) else: error_msg = response.json().get("error", {}).get("message", "Unknown error.") print(f"API Error: {error_msg}") return create_error_pdf(f"API Error: {error_msg}") def generate_notes(transcript): # try: # sentences = sent_tokenize(transcript) # except LookupError: # sentences = custom_sent_tokenize(transcript) # # Generate long questions # long_questions = [f"Explain the concept discussed in: '{sentence}'." for sentence in sentences[:5]] # # Generate short questions # short_questions = [f"What does '{sentence.split()[0]}' mean in the context of this text?" for sentence in sentences[:5]] # # Generate MCQs with relevant distractors # mcqs = [] # for sentence in sentences[:5]: # if len(sentence.split()) > 1: # Ensure there are enough words to create meaningful options # key_word = sentence.split()[0] # Use the first word as a key term # distractors = ["Term A", "Term B", "Term C"] # Replace with relevant terms if needed # options = [key_word] + distractors # random.shuffle(options) # Shuffle options for randomness # mcq = { # "question": f"What is '{key_word}' based on the context?", # "options": options, # "answer": key_word # } # mcqs.append(mcq) client = Groq(api_key="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") chat_completion = client.chat.completions.create( # # Required parameters # messages=[ # Set an optional system message. This sets the behavior of the # assistant and can be used to provide specific instructions for # how it should behave throughout the conversation. { "role": "system", "content": "you are expert question generator from content. Generate one long question,possible number of short questions and mcqs.plz also provide the notes" }, # Set a user message for the assistant to respond to. { "role": "user", "content": transcript, } ], # The language model which will generate the completion. model="llama3-8b-8192", # # Optional parameters # # Controls randomness: lowering results in less random completions. # As the temperature approaches zero, the model will become deterministic # and repetitive. temperature=0.5, # The maximum number of tokens to generate. Requests can use up to # 32,768 tokens shared between prompt and completion. max_tokens=1024, # Controls diversity via nucleus sampling: 0.5 means half of all # likelihood-weighted options are considered. top_p=1, # A stop sequence is a predefined or user-specified text string that # signals an AI to stop generating content, ensuring its responses # remain focused and concise. Examples include punctuation marks and # markers like "[end]". stop=None, # If set, partial message deltas will be sent. stream=False, ) # Print the completion returned by the LLM. res=chat_completion.choices[0].message.content # Generate and save a structured PDF pdf_path = create_pdf(res,transcript) return pdf_path def create_pdf(question,transcript): pdf = FPDF() pdf.add_page() # Add title pdf.set_font("Arial", "B", 16) pdf.cell(200, 10, "Transcription Notes and Questions", ln=True, align="C") # Add transcription content pdf.set_font("Arial", "", 12) pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n") # Add long questions pdf.set_font("Arial", "B", 14) pdf.cell(200, 10, "Questions", ln=True) pdf.set_font("Arial", "", 12) pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n") # # Add short questions # pdf.set_font("Arial", "B", 14) # pdf.cell(200, 10, "Short Questions", ln=True) # pdf.set_font("Arial", "", 12) # for question in short_questions: # pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n") # # Add MCQs # pdf.set_font("Arial", "B", 14) # pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True) # pdf.set_font("Arial", "", 12) # for mcq in mcqs: # pdf.multi_cell(0, 10, f"Q: {mcq['question'].encode('latin1', 'replace').decode('latin1')}") # for option in mcq["options"]: # pdf.multi_cell(0, 10, f" - {option.encode('latin1', 'replace').decode('latin1')}") # pdf.multi_cell(0, 10, f"Answer: {mcq['answer'].encode('latin1', 'replace').decode('latin1')}\n") with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf: pdf.output(temp_pdf.name) pdf_path = temp_pdf.name return pdf_path def create_error_pdf(message): pdf = FPDF() pdf.add_page() pdf.set_font("Arial", "B", 16) pdf.cell(200, 10, "Error Report", ln=True, align="C") pdf.set_font("Arial", "", 12) pdf.multi_cell(0, 10, message.encode('latin1', 'replace').decode('latin1')) with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf: pdf.output(temp_pdf.name) error_pdf_path = temp_pdf.name return error_pdf_path iface = gr.Interface( fn=transcribe, inputs=gr.Audio(type="filepath"), outputs=gr.File(label="Download PDF with Notes or Error Report"), title="Voice to Text Converter and Notes Generator", description="This app converts audio to text and generates academic questions including long, short, and multiple-choice questions." ) iface.launch()