import gradio as gr import pytube from youtube_transcript_api import YouTubeTranscriptApi as yt from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import os from langchain import PromptTemplate from langchain import LLMChain from langchain_together import Together import re # Set the API key with double quotes os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8" def Summary_BART(text): checkpoint = "sshleifer/distilbart-cnn-12-6" tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt") summary_ids = model.generate(inputs["input_ids"]) summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) return summary[0] def YtToQuizz(link, difficulty_level): video_id = pytube.extract.video_id(link) transcript = yt.get_transcript(video_id) data = "" for text in transcript: data += text.get('text') + " " summary = Summary_BART(data) mcq_template = """ Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary} The difficulty level of the questions should be: {difficulty_level} Please provide the following for each question: 1. Question 2. Correct answer 3. Three plausible incorrect answer options 4. Format: "Question: \\nCorrect answer: \\nIncorrect answers: , , " """ prompt = PromptTemplate( input_variables=['summary', 'difficulty_level'], template=mcq_template ) llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500) Generated_mcqs = LLMChain(llm=llama3, prompt=prompt) response = Generated_mcqs.invoke({ "summary": summary, "difficulty_level": difficulty_level }) response_text = response['text'] # Extract MCQs mcq_pattern = r'Question: (.*?)\nCorrect answer: (.*?)\nIncorrect answers: (.*?)(?:\n|$)' mcqs = re.findall(mcq_pattern, response_text, re.DOTALL) if len(mcqs) < 10: return "Failed to generate 10 complete MCQs. Please try again.", "", "" questions_str = "" correct_answers_str = "" options_str = "" for idx, mcq in enumerate(mcqs[:10]): question, correct_answer, incorrect_answers = mcq incorrect_answers = incorrect_answers.split(', ') questions_str += f"Q{idx+1}: {question}, " correct_answers_str += f"Q{idx+1}: {correct_answer}, " options_str += f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}, " # Removing the trailing comma and space questions_str = questions_str.rstrip(", ") correct_answers_str = correct_answers_str.rstrip(", ") options_str = options_str.rstrip(", ") return questions_str, correct_answers_str, options_str def main(link, difficulty_level): return YtToQuizz(link, difficulty_level) iface = gr.Interface( fn=main, inputs=[ gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"), gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:") ], outputs=[ gr.components.Textbox(label="MCQs Statements", lines=20), gr.components.Textbox(label="Correct Answers", lines=10), gr.components.Textbox(label="Options", lines=30) ], title="YouTube Video Subtitle to MCQs Quiz", description="Generate MCQs from YouTube video subtitles" ) if __name__ == '__main__': iface.launch()