Imageye commited on
Commit
eec8d75
1 Parent(s): 101d171

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +191 -141
app.py CHANGED
@@ -1,29 +1,97 @@
 
 
1
  import openai
2
  import streamlit as st
3
  from youtube_transcript_api import YouTubeTranscriptApi
4
  import re
5
- import tempfile
6
- import os
7
- from pydub import AudioSegment
8
  import logging
9
  import warnings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- def convert_to_supported_format(file_path):
12
- audio = AudioSegment.from_file(file_path)
13
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp:
14
- audio.export(temp.name, format="wav")
15
- return temp.name
16
-
17
- def transcribe_audio(file_path):
18
- logging.info(f"Transcribing audio file: {file_path}")
19
- file_path = convert_to_supported_format(file_path)
20
- logging.info(f"Converted file path: {file_path}")
21
- with warnings.catch_warnings():
22
- warnings.simplefilter("ignore")
23
- with open(file_path, "rb") as audio_file:
24
- transcript = openai.Audio.transcribe("whisper-1", audio_file)
25
- os.remove(file_path) # Clean up temporary file
26
- return transcript["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  def get_transcript(url):
29
  try:
@@ -39,59 +107,34 @@ def get_transcript(url):
39
  except Exception as e:
40
  return str(e)
41
 
42
- def summarize_text(text):
43
- response = openai.ChatCompletion.create(
44
- model="gpt-3.5-turbo",
45
- messages=[
46
- {"role": "system", "content": "You are a helpful assistant."},
47
- {"role": "user", "content": f"Summarize the following text:\n\n{text}"}
48
- ],
49
- max_tokens=150
50
- )
51
- summary = response['choices'][0]['message']['content'].strip()
52
- return summary
53
-
54
- def generate_quiz_questions(text):
55
- response = openai.ChatCompletion.create(
56
- model="gpt-3.5-turbo",
57
- messages=[
58
- {"role": "system", "content": "You are a helpful assistant that generates quiz questions. Your task is to generate ten quiz questions and four multiple choice answers for each question from the given text. Make sure to mark the correct answer with an asterisk (*) at the beginning of the answer line. Use the following format for each question:\n\n1. Question\n a) Answer 1\n b) Answer 2\n c) Answer 3\n d) Answer 4\n\n2. Question\n a) Answer 1\n b) Answer 2\n c) Answer 3\n d) Answer 4\n\n..."},
59
- {"role": "user", "content": f"Generate quiz questions from the following text:\n\n{text}"}
60
- ],
61
- max_tokens=300
62
- )
63
- quiz_questions = response['choices'][0]['message']['content'].strip()
64
- return quiz_questions
65
-
66
  def parse_quiz_questions(quiz_text):
67
  questions = []
68
  question_blocks = quiz_text.split("\n\n")
69
  for block in question_blocks:
70
  lines = block.strip().split("\n")
71
  if len(lines) >= 5:
72
- question = lines[0].split(". ")[1]
73
- choices = [line.split(") ")[1].strip() for line in lines[1:5]]
74
- correct_answer_lines = [line for line in lines[1:5] if "*" in line]
75
- if correct_answer_lines:
76
- correct_answer = correct_answer_lines[0].split(") ")[1].replace("*", "").strip()
77
- else:
78
- correct_answer = "No correct answer provided"
79
- questions.append({"question": question, "choices": choices, "correct_answer": correct_answer})
 
 
 
 
 
 
 
 
 
 
 
 
80
  return questions
81
 
82
- def generate_explanation(question, correct_answer, user_answer):
83
- prompt = f"Explain why the correct answer to the following question is '{correct_answer}' and not '{user_answer}':\n\n{question}"
84
- response = openai.ChatCompletion.create(
85
- model="gpt-3.5-turbo",
86
- messages=[
87
- {"role": "system", "content": "You are a helpful assistant."},
88
- {"role": "user", "content": prompt}
89
- ],
90
- max_tokens=150
91
- )
92
- explanation = response['choices'][0]['message']['content'].strip()
93
- return explanation
94
-
95
  def check_answers(questions, user_answers):
96
  feedback = []
97
  correct_count = 0
@@ -117,88 +160,95 @@ def check_answers(questions, user_answers):
117
  })
118
  return feedback
119
 
120
- def handle_uploaded_file(uploaded_file):
121
- with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
122
- tmp_file.write(uploaded_file.read())
123
- tmp_file_path = tmp_file.name
124
- return tmp_file_path
125
-
126
- st.title("YouTube Transcript Quiz Generator")
127
-
128
- st.markdown("**Instructions:** Paste a YouTube link or upload a media file to generate a quiz.")
129
-
130
- option = st.selectbox("Choose input type", ("YouTube URL", "Upload audio/video file"))
131
-
132
- if "generated_quiz" not in st.session_state:
133
- st.session_state.generated_quiz = False
134
-
135
- if option == "YouTube URL":
136
- url = st.text_input("YouTube URL", value="")
137
- if url:
138
- if st.button("Generate Quiz"):
139
- transcript_text = get_transcript(url)
140
- if "Error" not in transcript_text:
141
- summary = summarize_text(transcript_text)
142
- quiz_text = generate_quiz_questions(transcript_text)
143
- questions = parse_quiz_questions(quiz_text)
144
-
145
- st.write("## Summary")
146
- st.write(summary)
147
-
148
- st.write("## Quiz Questions")
149
- st.session_state.questions = questions
150
- st.session_state.user_answers = {}
151
- st.session_state.generated_quiz = True
152
-
153
- for i, question in enumerate(questions):
154
- st.write(f"### Question {i+1}")
155
- st.write(question['question'])
156
- st.session_state.user_answers[f"question_{i+1}"] = st.radio(
157
- label="",
158
- options=question['choices'],
159
- key=f"question_{i+1}"
160
- )
161
-
162
- elif option == "Upload audio/video file":
163
- uploaded_file = st.file_uploader("Choose an audio or video file", type=["mp3", "wav", "mp4", "mov"])
164
- if uploaded_file:
165
- tmp_file_path = handle_uploaded_file(uploaded_file)
166
- transcript_text = transcribe_audio(tmp_file_path)
167
- os.remove(tmp_file_path)
168
- if "Error" not in transcript_text:
169
- summary = summarize_text(transcript_text)
170
- quiz_text = generate_quiz_questions(transcript_text)
171
- questions = parse_quiz_questions(quiz_text)
172
-
173
- st.write("## Summary")
174
- st.write(summary)
175
-
176
- st.write("## Quiz Questions")
177
- st.session_state.questions = questions
178
- st.session_state.user_answers = {}
179
- st.session_state.generated_quiz = True
180
-
181
- for i, question in enumerate(questions):
182
- st.write(f"### Question {i+1}")
183
- st.write(question['question'])
184
- st.session_state.user_answers[f"question_{i+1}"] = st.radio(
185
- label="",
186
- options=question['choices'],
187
- key=f"question_{i+1}"
188
- )
189
-
190
- if st.session_state.generated_quiz:
191
- if st.button("Submit Answers"):
192
- if "questions" in st.session_state and st.session_state.questions:
 
 
 
 
 
 
193
  with st.spinner('Processing your answers...'):
194
- feedback = check_answers(st.session_state.questions, st.session_state.user_answers)
195
  st.write("## Feedback")
196
- for i, item in enumerate(feedback):
197
  with st.expander(f"Question {i+1} Feedback"):
198
  st.write(f"### {item['question']}")
199
  st.write(f"**Your answer:** {item['user_answer']}")
200
  st.write(f"**Correct answer:** {item['correct_answer']}")
201
  if item['status'] == "Incorrect":
202
  st.write(f"**Explanation:** {item['explanation']}")
203
- else:
204
- st.write("Please generate the quiz first.")
 
 
1
+ import os
2
+ import io
3
  import openai
4
  import streamlit as st
5
  from youtube_transcript_api import YouTubeTranscriptApi
6
  import re
 
 
 
7
  import logging
8
  import warnings
9
+ from pydub import AudioSegment
10
+ import tempfile
11
+ from dotenv import load_dotenv
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+ # Set up OpenAI API
17
+ openai.api_key = os.getenv("OPENAI_API_KEY")
18
+
19
+ # Set up logging
20
+ logging.basicConfig(level=logging.INFO)
21
+
22
+ def convert_to_supported_format(file):
23
+ audio = AudioSegment.from_file(file)
24
+ buffer = io.BytesIO()
25
+ audio.export(buffer, format="wav")
26
+ buffer.seek(0)
27
+ return buffer
28
+
29
+ def transcribe_audio(file):
30
+ logging.info(f"Transcribing audio file")
31
+ file = convert_to_supported_format(file)
32
+ logging.info(f"Converted file to WAV format")
33
+
34
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
35
+ temp_file.write(file.getvalue())
36
+ temp_file_path = temp_file.name
37
+
38
+ try:
39
+ with warnings.catch_warnings():
40
+ warnings.simplefilter("ignore")
41
+ with open(temp_file_path, "rb") as audio_file:
42
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
43
+ return transcript["text"]
44
+ except Exception as e:
45
+ logging.error(f"Error in transcription: {str(e)}")
46
+ return f"Error in transcription: {str(e)}"
47
+ finally:
48
+ os.remove(temp_file_path)
49
+
50
+ def summarize_text(text):
51
+ try:
52
+ response = openai.ChatCompletion.create(
53
+ model="gpt-3.5-turbo",
54
+ messages=[
55
+ {"role": "system", "content": "You are a helpful assistant."},
56
+ {"role": "user", "content": f"Summarize the following text:\n\n{text}"}
57
+ ],
58
+ max_tokens=150
59
+ )
60
+ return response['choices'][0]['message']['content'].strip()
61
+ except Exception as e:
62
+ logging.error(f"Error in summarization: {str(e)}")
63
+ return f"Error in summarization: {str(e)}"
64
 
65
+ def generate_quiz_questions(text):
66
+ try:
67
+ response = openai.ChatCompletion.create(
68
+ model="gpt-3.5-turbo",
69
+ messages=[
70
+ {"role": "system", "content": "You are a helpful assistant that generates quiz questions. Your task is to generate ten quiz questions and four multiple choice answers for each question from the given text. It is CRUCIAL that you mark the correct answer with an asterisk (*) at the beginning of the answer line. There MUST be exactly one correct answer marked for each question. If you're unsure which answer is correct, mark the most likely correct answer. Use the following format for each question:\n\n1. Question\n a) *Correct Answer\n b) Incorrect Answer\n c) Incorrect Answer\n d) Incorrect Answer\n\n2. Question\n a) Incorrect Answer\n b) Incorrect Answer\n c) *Correct Answer\n d) Incorrect Answer\n\n..."},
71
+ {"role": "user", "content": f"Generate quiz questions from the following text. Remember to mark EXACTLY ONE correct answer with an asterisk (*) for EACH question:\n\n{text}"}
72
+ ],
73
+ max_tokens=1500
74
+ )
75
+ return response['choices'][0]['message']['content'].strip()
76
+ except Exception as e:
77
+ logging.error(f"Error in quiz generation: {str(e)}")
78
+ return f"Error in quiz generation: {str(e)}"
79
+
80
+ def generate_explanation(question, correct_answer, user_answer):
81
+ try:
82
+ prompt = f"Explain why the correct answer to the following question is '{correct_answer}' and not '{user_answer}':\n\n{question}"
83
+ response = openai.ChatCompletion.create(
84
+ model="gpt-3.5-turbo",
85
+ messages=[
86
+ {"role": "system", "content": "You are a helpful assistant."},
87
+ {"role": "user", "content": prompt}
88
+ ],
89
+ max_tokens=150
90
+ )
91
+ return response['choices'][0]['message']['content'].strip()
92
+ except Exception as e:
93
+ logging.error(f"Error in explanation generation: {str(e)}")
94
+ return f"Error in explanation generation: {str(e)}"
95
 
96
  def get_transcript(url):
97
  try:
 
107
  except Exception as e:
108
  return str(e)
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def parse_quiz_questions(quiz_text):
111
  questions = []
112
  question_blocks = quiz_text.split("\n\n")
113
  for block in question_blocks:
114
  lines = block.strip().split("\n")
115
  if len(lines) >= 5:
116
+ question = lines[0].split(". ", 1)[1] if ". " in lines[0] else lines[0]
117
+ choices = []
118
+ correct_answer = None
119
+ for line in lines[1:5]:
120
+ if ") " in line:
121
+ choice = line.split(") ", 1)[1].strip()
122
+ if choice.startswith("*"):
123
+ correct_answer = choice[1:].strip() # Remove the asterisk
124
+ choices.append(correct_answer)
125
+ else:
126
+ choices.append(choice)
127
+
128
+ if not correct_answer:
129
+ correct_answer = choices[0] if choices else "No correct answer provided"
130
+
131
+ questions.append({
132
+ "question": question,
133
+ "choices": choices,
134
+ "correct_answer": correct_answer
135
+ })
136
  return questions
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  def check_answers(questions, user_answers):
139
  feedback = []
140
  correct_count = 0
 
160
  })
161
  return feedback
162
 
163
+ def main():
164
+ st.title("YouTube Transcript Quiz Generator")
165
+
166
+ st.markdown("**Instructions:** Paste a YouTube link or upload a media file to generate a quiz.")
167
+
168
+ option = st.selectbox("Choose input type", ("YouTube URL", "Upload audio/video file"))
169
+
170
+ if "generated_quiz" not in st.session_state:
171
+ st.session_state.generated_quiz = False
172
+ if "questions" not in st.session_state:
173
+ st.session_state.questions = []
174
+ if "user_answers" not in st.session_state:
175
+ st.session_state.user_answers = {}
176
+ if "feedback" not in st.session_state:
177
+ st.session_state.feedback = []
178
+ if "url" not in st.session_state:
179
+ st.session_state.url = ""
180
+ if "transcript_text" not in st.session_state:
181
+ st.session_state.transcript_text = ""
182
+
183
+ if option == "YouTube URL":
184
+ url = st.text_input("YouTube URL", value="")
185
+ if url:
186
+ st.session_state.url = url
187
+ if st.button("Generate Quiz"):
188
+ transcript_text = get_transcript(url)
189
+ if "Error" not in transcript_text:
190
+ summary = summarize_text(transcript_text)
191
+ quiz_text = generate_quiz_questions(transcript_text)
192
+ questions = parse_quiz_questions(quiz_text)
193
+
194
+ st.session_state.questions = questions
195
+ st.session_state.user_answers = {}
196
+ st.session_state.generated_quiz = True
197
+ st.session_state.feedback = []
198
+ st.session_state.transcript_text = transcript_text
199
+
200
+ elif option == "Upload audio/video file":
201
+ uploaded_file = st.file_uploader("Choose an audio or video file", type=["mp3", "wav", "mp4", "mov"])
202
+ if uploaded_file is not None:
203
+ if st.button("Generate Quiz"):
204
+ with st.spinner('Transcribing audio...'):
205
+ transcript_text = transcribe_audio(uploaded_file)
206
+ if "Error" not in transcript_text:
207
+ summary = summarize_text(transcript_text)
208
+ quiz_text = generate_quiz_questions(transcript_text)
209
+ questions = parse_quiz_questions(quiz_text)
210
+
211
+ # Check if all questions have valid correct answers
212
+ valid_questions = [q for q in questions if q['correct_answer'] != "No correct answer provided"]
213
+
214
+ if len(valid_questions) < len(questions):
215
+ st.warning(f"Warning: {len(questions) - len(valid_questions)} questions did not have a marked correct answer. The first option was selected as the correct answer for these questions.")
216
+
217
+ st.session_state.questions = valid_questions
218
+ st.session_state.user_answers = {}
219
+ st.session_state.generated_quiz = True
220
+ st.session_state.feedback = []
221
+ st.session_state.transcript_text = transcript_text
222
+
223
+ if st.session_state.generated_quiz:
224
+ st.write("## Summary")
225
+ summary = summarize_text(st.session_state.transcript_text)
226
+ st.write(summary)
227
+
228
+ st.write("## Quiz Questions")
229
+ form = st.form("quiz_form")
230
+ for i, question in enumerate(st.session_state.questions):
231
+ form.write(f"### Question {i+1}")
232
+ form.write(question['question'])
233
+ user_answer = form.selectbox(
234
+ label="",
235
+ options=question['choices'],
236
+ key=f"question_{i+1}"
237
+ )
238
+ st.session_state.user_answers[f"question_{i+1}"] = user_answer
239
+
240
+ submit_button = form.form_submit_button("Submit Answers")
241
+ if submit_button:
242
  with st.spinner('Processing your answers...'):
243
+ st.session_state.feedback = check_answers(st.session_state.questions, st.session_state.user_answers)
244
  st.write("## Feedback")
245
+ for i, item in enumerate(st.session_state.feedback):
246
  with st.expander(f"Question {i+1} Feedback"):
247
  st.write(f"### {item['question']}")
248
  st.write(f"**Your answer:** {item['user_answer']}")
249
  st.write(f"**Correct answer:** {item['correct_answer']}")
250
  if item['status'] == "Incorrect":
251
  st.write(f"**Explanation:** {item['explanation']}")
252
+
253
+ if __name__ == "__main__":
254
+ main()