Rehman1603 commited on
Commit
f0c4fb1
1 Parent(s): 76e82bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -60
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import pytube
3
  from youtube_transcript_api import YouTubeTranscriptApi as yt
4
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
  import os
6
  from langchain import PromptTemplate
7
  from langchain import LLMChain
@@ -20,24 +20,14 @@ def Summary_BART(text):
20
  summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
21
  return summary[0]
22
 
23
- def translate_text(text, target_language):
24
- translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
25
- translated_text = translator(text, max_length=512)
26
- return translated_text[0]['translation_text']
27
-
28
- def YtToQuizz(link, difficulty_level, language):
29
  video_id = pytube.extract.video_id(link)
30
  transcript = yt.get_transcript(video_id)
31
- data = " ".join([text['text'] for text in transcript])
32
-
 
33
  summary = Summary_BART(data)
34
 
35
- if language != "en":
36
- translated_data = translate_text(data, language)
37
- translated_summary = Summary_BART(translated_data)
38
- else:
39
- translated_summary = summary
40
-
41
  mcq_template = """
42
  Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary}
43
  The difficulty level of the questions should be: {difficulty_level}
@@ -46,81 +36,58 @@ def YtToQuizz(link, difficulty_level, language):
46
  2. Correct answer
47
  3. Three plausible incorrect answer options
48
  4. Format: "Question: <question text>\\nCorrect answer: <correct answer>\\nIncorrect answers: <option1>, <option2>, <option3>"
49
- The language of the questions should be: {language}
50
  """
51
  prompt = PromptTemplate(
52
- input_variables=['summary', 'difficulty_level', 'language'],
53
  template=mcq_template
54
  )
55
  llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
56
  Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
57
 
58
- response_en = Generated_mcqs.invoke({
59
  "summary": summary,
60
- "difficulty_level": difficulty_level,
61
- "language": "English"
62
- })
63
-
64
- response_translated = Generated_mcqs.invoke({
65
- "summary": translated_summary,
66
- "difficulty_level": difficulty_level,
67
- "language": language
68
  })
69
 
70
- response_text_en = response_en['text']
71
- response_text_translated = response_translated['text']
72
 
73
  # Extract MCQs
74
  mcq_pattern = r'Question: (.*?)\nCorrect answer: (.*?)\nIncorrect answers: (.*?)(?:\n|$)'
75
- mcqs_en = re.findall(mcq_pattern, response_text_en, re.DOTALL)
76
- mcqs_translated = re.findall(mcq_pattern, response_text_translated, re.DOTALL)
77
-
78
- if len(mcqs_en) < 10 or len(mcqs_translated) < 10:
79
- return ["Failed to generate 10 complete MCQs. Please try again."] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3
80
 
81
- questions_en, correct_answers_en, options_en = [], [], []
82
- questions_translated, correct_answers_translated, options_translated = [], [], []
83
 
84
- for idx, mcq in enumerate(mcqs_en[:10]):
85
- question, correct_answer, incorrect_answers = mcq
86
- incorrect_answers = incorrect_answers.split(', ')
87
- questions_en.append(f"Q{idx+1}: {question}")
88
- correct_answers_en.append(f"Q{idx+1}: {correct_answer}")
89
- options_en.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")
90
 
91
- for idx, mcq in enumerate(mcqs_translated[:10]):
92
  question, correct_answer, incorrect_answers = mcq
93
  incorrect_answers = incorrect_answers.split(', ')
94
- questions_translated.append(f"Q{idx+1}: {question}")
95
- correct_answers_translated.append(f"Q{idx+1}: {correct_answer}")
96
- options_translated.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")
97
 
98
- return questions_en, correct_answers_en, options_en, questions_translated, correct_answers_translated, options_translated
99
 
100
- def main(link, difficulty_level, language):
101
- return YtToQuizz(link, difficulty_level, language)
102
 
103
  iface = gr.Interface(
104
  fn=main,
105
  inputs=[
106
  gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"),
107
- gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:"),
108
- gr.components.Dropdown(
109
- ["en", "fr", "es", "de", "it", "pt", "nl", "ru", "zh", "ja", "ko"],
110
- label="Select language:"
111
- )
112
  ],
113
  outputs=[
114
- gr.components.Textbox(label="MCQs Statements (English)", lines=20),
115
- gr.components.Textbox(label="Correct Answers (English)", lines=10),
116
- gr.components.Textbox(label="Options (English)", lines=30),
117
- gr.components.Textbox(label="MCQs Statements (Translated)", lines=20),
118
- gr.components.Textbox(label="Correct Answers (Translated)", lines=10),
119
- gr.components.Textbox(label="Options (Translated)", lines=30)
120
  ],
121
  title="YouTube Video Subtitle to MCQs Quiz",
122
  description="Generate MCQs from YouTube video subtitles"
123
  )
124
 
125
  if __name__ == '__main__':
126
- iface.launch()
 
1
  import gradio as gr
2
  import pytube
3
  from youtube_transcript_api import YouTubeTranscriptApi as yt
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
  import os
6
  from langchain import PromptTemplate
7
  from langchain import LLMChain
 
20
  summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
21
  return summary[0]
22
 
23
+ def YtToQuizz(link, difficulty_level):
 
 
 
 
 
24
  video_id = pytube.extract.video_id(link)
25
  transcript = yt.get_transcript(video_id)
26
+ data = ""
27
+ for text in transcript:
28
+ data += text.get('text') + " "
29
  summary = Summary_BART(data)
30
 
 
 
 
 
 
 
31
  mcq_template = """
32
  Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary}
33
  The difficulty level of the questions should be: {difficulty_level}
 
36
  2. Correct answer
37
  3. Three plausible incorrect answer options
38
  4. Format: "Question: <question text>\\nCorrect answer: <correct answer>\\nIncorrect answers: <option1>, <option2>, <option3>"
 
39
  """
40
  prompt = PromptTemplate(
41
+ input_variables=['summary', 'difficulty_level'],
42
  template=mcq_template
43
  )
44
  llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
45
  Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
46
 
47
+ response = Generated_mcqs.invoke({
48
  "summary": summary,
49
+ "difficulty_level": difficulty_level
 
 
 
 
 
 
 
50
  })
51
 
52
+ response_text = response['text']
 
53
 
54
  # Extract MCQs
55
  mcq_pattern = r'Question: (.*?)\nCorrect answer: (.*?)\nIncorrect answers: (.*?)(?:\n|$)'
56
+ mcqs = re.findall(mcq_pattern, response_text, re.DOTALL)
 
 
 
 
57
 
58
+ if len(mcqs) < 10:
59
+ return ["Failed to generate 10 complete MCQs. Please try again."] * 3
60
 
61
+ questions = []
62
+ correct_answers = []
63
+ options = []
 
 
 
64
 
65
+ for idx, mcq in enumerate(mcqs[:10]):
66
  question, correct_answer, incorrect_answers = mcq
67
  incorrect_answers = incorrect_answers.split(', ')
68
+ questions.append(f"Q{idx+1}: {question}")
69
+ correct_answers.append(f"Q{idx+1}: {correct_answer}")
70
+ options.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")
71
 
72
+ return questions, correct_answers, options
73
 
74
+ def main(link, difficulty_level):
75
+ return YtToQuizz(link, difficulty_level)
76
 
77
  iface = gr.Interface(
78
  fn=main,
79
  inputs=[
80
  gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"),
81
+ gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:")
 
 
 
 
82
  ],
83
  outputs=[
84
+ gr.components.Textbox(label="MCQs Statements", lines=20),
85
+ gr.components.Textbox(label="Correct Answers", lines=10),
86
+ gr.components.Textbox(label="Options", lines=30)
 
 
 
87
  ],
88
  title="YouTube Video Subtitle to MCQs Quiz",
89
  description="Generate MCQs from YouTube video subtitles"
90
  )
91
 
92
  if __name__ == '__main__':
93
+ iface.launch()