Imageye commited on
Commit
bc87def
1 Parent(s): 707c124

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -80
app.py CHANGED
@@ -1,32 +1,23 @@
1
- import openai
2
  import streamlit as st
 
3
  from youtube_transcript_api import YouTubeTranscriptApi
4
  import re
5
  import tempfile
6
  import os
7
- from transformers import WhisperProcessor, WhisperForConditionalGeneration
8
- import torch
9
- import librosa
10
 
11
- # Load the Whisper model and processor from Hugging Face
12
- processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
13
- model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
14
 
 
15
  def transcribe_audio(file_path):
16
- # Load audio file
17
- audio, _ = librosa.load(file_path, sr=16000)
18
-
19
- # Tokenize the audio
20
- inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
21
-
22
- # Perform the transcription
23
- with torch.no_grad():
24
- generated_ids = model.generate(inputs["input_features"])
25
-
26
- # Decode the transcription
27
- transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
28
  return transcription
29
 
 
30
  def get_transcript(url):
31
  try:
32
  video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
@@ -41,32 +32,31 @@ def get_transcript(url):
41
  except Exception as e:
42
  return str(e)
43
 
44
- def summarize_text(api_key, text):
45
- openai.api_key = api_key
46
- response = openai.ChatCompletion.create(
47
  model="gpt-3.5-turbo",
48
  messages=[
49
  {"role": "system", "content": "You are a helpful assistant."},
50
  {"role": "user", "content": f"Summarize the following text:\n\n{text}"}
51
- ],
52
- max_tokens=150
53
  )
54
- summary = response.choices[0]['message']['content'].strip()
55
  return summary
56
 
57
- def generate_quiz_questions(api_key, text):
58
- openai.api_key = api_key
59
- response = openai.ChatCompletion.create(
60
  model="gpt-3.5-turbo",
61
  messages=[
62
  {"role": "system", "content": "You are a helpful assistant."},
63
  {"role": "user", "content": f"Generate ten quiz questions and four multiple choice answers for each question from the following text. Mark the correct answer with an asterisk (*) at the beginning:\n\n{text}"}
64
- ],
65
- max_tokens=300
66
  )
67
- quiz_questions = response.choices[0]['message']['content'].strip()
68
  return quiz_questions
69
 
 
70
  def parse_quiz_questions(quiz_text):
71
  questions = []
72
  question_blocks = quiz_text.split("\n\n")
@@ -83,21 +73,21 @@ def parse_quiz_questions(quiz_text):
83
  questions.append({"question": question, "choices": choices, "correct_answer": correct_answer})
84
  return questions
85
 
86
- def generate_explanation(api_key, question, correct_answer, user_answer):
87
- openai.api_key = api_key
88
  prompt = f"Explain why the correct answer to the following question is '{correct_answer}' and not '{user_answer}':\n\n{question}"
89
- response = openai.ChatCompletion.create(
90
  model="gpt-3.5-turbo",
91
  messages=[
92
  {"role": "system", "content": "You are a helpful assistant."},
93
  {"role": "user", "content": prompt}
94
- ],
95
- max_tokens=150
96
  )
97
- explanation = response.choices[0]['message']['content'].strip()
98
  return explanation
99
 
100
- def check_answers(api_key, questions, user_answers):
 
101
  feedback = []
102
  correct_count = 0
103
  for i, question in enumerate(questions):
@@ -112,7 +102,7 @@ def check_answers(api_key, questions, user_answers):
112
  })
113
  correct_count += 1
114
  else:
115
- explanation = generate_explanation(api_key, question['question'], correct_answer, user_answer)
116
  feedback.append({
117
  "question": question['question'],
118
  "user_answer": user_answer,
@@ -122,17 +112,23 @@ def check_answers(api_key, questions, user_answers):
122
  })
123
  return feedback
124
 
 
125
  def handle_uploaded_file(uploaded_file):
126
  with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
127
  tmp_file.write(uploaded_file.read())
128
  tmp_file_path = tmp_file.name
129
  return tmp_file_path
130
 
 
131
  st.title("YouTube Transcript Quiz Generator")
132
 
133
  st.markdown("**Instructions:** Enter your OpenAI API key and paste a YouTube link or upload a media file to generate a quiz.")
134
 
135
  api_key = st.text_input("Enter your OpenAI API Key", type="password")
 
 
 
 
136
  option = st.selectbox("Choose input type", ("YouTube URL", "Upload audio/video file"))
137
 
138
  if "generated_quiz" not in st.session_state:
@@ -144,60 +140,51 @@ if option == "YouTube URL":
144
  if st.button("Generate Quiz"):
145
  transcript_text = get_transcript(url)
146
  if "Error" not in transcript_text:
147
- summary = summarize_text(api_key, transcript_text)
148
- quiz_text = generate_quiz_questions(api_key, transcript_text)
149
  questions = parse_quiz_questions(quiz_text)
150
 
151
- st.write("## Summary")
152
- st.write(summary)
153
-
154
- st.write("## Quiz Questions")
155
  st.session_state.questions = questions
156
  st.session_state.user_answers = {}
157
  st.session_state.generated_quiz = True
158
 
159
- for i, question in enumerate(questions):
160
- st.write(f"### Question {i+1}")
161
- st.write(question['question'])
162
- st.session_state.user_answers[f"question_{i+1}"] = st.radio(
163
- label="",
164
- options=question['choices'],
165
- key=f"question_{i+1}"
166
- )
167
-
168
- elif option == "Upload audio/video file":
169
  uploaded_file = st.file_uploader("Choose an audio or video file", type=["mp3", "wav", "mp4", "mov"])
170
  if uploaded_file and api_key:
171
- tmp_file_path = handle_uploaded_file(uploaded_file)
172
- transcript_text = transcribe_audio(tmp_file_path)
173
- os.remove(tmp_file_path)
174
- if "Error" not in transcript_text:
175
- summary = summarize_text(api_key, transcript_text)
176
- quiz_text = generate_quiz_questions(api_key, transcript_text)
177
- questions = parse_quiz_questions(quiz_text)
178
-
179
- st.write("## Summary")
180
- st.write(summary)
181
-
182
- st.write("## Quiz Questions")
183
- st.session_state.questions = questions
184
- st.session_state.user_answers = {}
185
- st.session_state.generated_quiz = True
186
-
187
- for i, question in enumerate(questions):
188
- st.write(f"### Question {i+1}")
189
- st.write(question['question'])
190
- st.session_state.user_answers[f"question_{i+1}"] = st.radio(
191
- label="",
192
- options=question['choices'],
193
- key=f"question_{i+1}"
194
- )
195
 
196
  if st.session_state.generated_quiz:
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  if st.button("Submit Answers"):
198
  if "questions" in st.session_state and st.session_state.questions:
199
  with st.spinner('Processing your answers...'):
200
- feedback = check_answers(api_key, st.session_state.questions, st.session_state.user_answers)
201
  st.write("## Feedback")
202
  for i, item in enumerate(feedback):
203
  with st.expander(f"Question {i+1} Feedback"):
 
 
1
  import streamlit as st
2
+ from openai import OpenAI
3
  from youtube_transcript_api import YouTubeTranscriptApi
4
  import re
5
  import tempfile
6
  import os
7
+ from transformers import pipeline
8
+ import soundfile as sf
 
9
 
10
+ # Initialize the pipeline with the model
11
+ pipe = pipeline("automatic-speech-recognition", model="openai/whisper-small")
 
12
 
13
+ # Function to transcribe audio using Hugging Face Whisper
14
  def transcribe_audio(file_path):
15
+ # Load audio file into NumPy array
16
+ audio_input, _ = sf.read(file_path)
17
+ transcription = pipe(audio_input)["text"]
 
 
 
 
 
 
 
 
 
18
  return transcription
19
 
20
+ # Function to get YouTube transcript
21
  def get_transcript(url):
22
  try:
23
  video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
 
32
  except Exception as e:
33
  return str(e)
34
 
35
+ # Function to summarize text using OpenAI API
36
+ def summarize_text(client, text):
37
+ response = client.chat.completions.create(
38
  model="gpt-3.5-turbo",
39
  messages=[
40
  {"role": "system", "content": "You are a helpful assistant."},
41
  {"role": "user", "content": f"Summarize the following text:\n\n{text}"}
42
+ ]
 
43
  )
44
+ summary = response.choices[0].message.content.strip()
45
  return summary
46
 
47
+ # Function to generate quiz questions using OpenAI API
48
+ def generate_quiz_questions(client, text):
49
+ response = client.chat.completions.create(
50
  model="gpt-3.5-turbo",
51
  messages=[
52
  {"role": "system", "content": "You are a helpful assistant."},
53
  {"role": "user", "content": f"Generate ten quiz questions and four multiple choice answers for each question from the following text. Mark the correct answer with an asterisk (*) at the beginning:\n\n{text}"}
54
+ ]
 
55
  )
56
+ quiz_questions = response.choices[0].message.content.strip()
57
  return quiz_questions
58
 
59
+ # Function to parse quiz questions
60
  def parse_quiz_questions(quiz_text):
61
  questions = []
62
  question_blocks = quiz_text.split("\n\n")
 
73
  questions.append({"question": question, "choices": choices, "correct_answer": correct_answer})
74
  return questions
75
 
76
+ # Function to generate explanation using OpenAI API
77
+ def generate_explanation(client, question, correct_answer, user_answer):
78
  prompt = f"Explain why the correct answer to the following question is '{correct_answer}' and not '{user_answer}':\n\n{question}"
79
+ response = client.chat.completions.create(
80
  model="gpt-3.5-turbo",
81
  messages=[
82
  {"role": "system", "content": "You are a helpful assistant."},
83
  {"role": "user", "content": prompt}
84
+ ]
 
85
  )
86
+ explanation = response.choices[0].message.content.strip()
87
  return explanation
88
 
89
+ # Function to check answers and provide feedback
90
+ def check_answers(client, questions, user_answers):
91
  feedback = []
92
  correct_count = 0
93
  for i, question in enumerate(questions):
 
102
  })
103
  correct_count += 1
104
  else:
105
+ explanation = generate_explanation(client, question['question'], correct_answer, user_answer)
106
  feedback.append({
107
  "question": question['question'],
108
  "user_answer": user_answer,
 
112
  })
113
  return feedback
114
 
115
+ # Function to handle uploaded file
116
  def handle_uploaded_file(uploaded_file):
117
  with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
118
  tmp_file.write(uploaded_file.read())
119
  tmp_file_path = tmp_file.name
120
  return tmp_file_path
121
 
122
+ # Streamlit UI
123
  st.title("YouTube Transcript Quiz Generator")
124
 
125
  st.markdown("**Instructions:** Enter your OpenAI API key and paste a YouTube link or upload a media file to generate a quiz.")
126
 
127
  api_key = st.text_input("Enter your OpenAI API Key", type="password")
128
+
129
+ if api_key:
130
+ client = OpenAI(api_key=api_key)
131
+
132
  option = st.selectbox("Choose input type", ("YouTube URL", "Upload audio/video file"))
133
 
134
  if "generated_quiz" not in st.session_state:
 
140
  if st.button("Generate Quiz"):
141
  transcript_text = get_transcript(url)
142
  if "Error" not in transcript_text:
143
+ summary = summarize_text(client, transcript_text)
144
+ quiz_text = generate_quiz_questions(client, transcript_text)
145
  questions = parse_quiz_questions(quiz_text)
146
 
147
+ st.session_state.summary = summary
 
 
 
148
  st.session_state.questions = questions
149
  st.session_state.user_answers = {}
150
  st.session_state.generated_quiz = True
151
 
152
+ if option == "Upload audio/video file":
 
 
 
 
 
 
 
 
 
153
  uploaded_file = st.file_uploader("Choose an audio or video file", type=["mp3", "wav", "mp4", "mov"])
154
  if uploaded_file and api_key:
155
+ if st.button("Generate Quiz"):
156
+ tmp_file_path = handle_uploaded_file(uploaded_file)
157
+ with st.spinner('Transcribing audio...'):
158
+ transcript_text = transcribe_audio(tmp_file_path)
159
+ os.remove(tmp_file_path)
160
+ if "Error" not in transcript_text:
161
+ summary = summarize_text(client, transcript_text)
162
+ quiz_text = generate_quiz_questions(client, transcript_text)
163
+ questions = parse_quiz_questions(quiz_text)
164
+
165
+ st.session_state.summary = summary
166
+ st.session_state.questions = questions
167
+ st.session_state.user_answers = {}
168
+ st.session_state.generated_quiz = True
 
 
 
 
 
 
 
 
 
 
169
 
170
  if st.session_state.generated_quiz:
171
+ st.write("## Summary")
172
+ st.write(st.session_state.summary)
173
+
174
+ st.write("## Quiz Questions")
175
+ for i, question in enumerate(st.session_state.questions):
176
+ st.write(f"### Question {i+1}")
177
+ st.write(question['question'])
178
+ st.session_state.user_answers[f"question_{i+1}"] = st.radio(
179
+ label="",
180
+ options=question['choices'],
181
+ key=f"question_{i+1}"
182
+ )
183
+
184
  if st.button("Submit Answers"):
185
  if "questions" in st.session_state and st.session_state.questions:
186
  with st.spinner('Processing your answers...'):
187
+ feedback = check_answers(client, st.session_state.questions, st.session_state.user_answers)
188
  st.write("## Feedback")
189
  for i, item in enumerate(feedback):
190
  with st.expander(f"Question {i+1} Feedback"):