Spaces:
Sleeping
Sleeping
| import openai | |
| import streamlit as st | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| def get_transcript(video_id): | |
| transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) | |
| # Try fetching the manual transcript | |
| try: | |
| transcript = transcript_list.find_manually_created_transcript() | |
| language_code = transcript.language_code # Save the detected language | |
| except: | |
| # If no manual transcript is found, try fetching an auto-generated transcript in a supported language | |
| try: | |
| generated_transcripts = [trans for trans in transcript_list if trans.is_generated] | |
| transcript = generated_transcripts[0] | |
| language_code = transcript.language_code # Save the detected language | |
| except: | |
| # If no auto-generated transcript is found, raise an exception | |
| raise Exception("No suitable transcript found.") | |
| full_transcript = " ".join([part['text'] for part in transcript.fetch()]) | |
| return full_transcript, language_code # Return both the transcript and detected language | |
| def summarize_with_langchain_and_openai(transcript, language_code, openai_api_key, model_name='gpt-3.5-turbo'): | |
| # Set the OpenAI API key | |
| openai.api_key = openai_api_key | |
| # Split the document if it's too long | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0) | |
| texts = text_splitter.split_text(transcript) | |
| text_to_summarize = " ".join(texts[:4]) # Adjust this as needed | |
| # Prepare the prompt for summarization | |
| system_prompt = 'I want you to act as a Life Coach that can create good summaries!' | |
| prompt = f'''Summarize the following text in {language_code}. | |
| Text: {text_to_summarize} | |
| Add a title to the summary in {language_code}. | |
| Include an INTRODUCTION, BULLET POINTS if possible, and a CONCLUSION in {language_code}.''' | |
| # Start summarizing using OpenAI | |
| response = openai.ChatCompletion.create( | |
| model=model_name, | |
| messages=[ | |
| {'role': 'system', 'content': system_prompt}, | |
| {'role': 'user', 'content': prompt} | |
| ], | |
| temperature=1 | |
| ) | |
| return response['choices'][0]['message']['content'] | |
| def main(): | |
| st.title('YouTube Video Summarizer') | |
| openai_api_key = st.text_input('Enter your OpenAI API Key:', type='password') | |
| video_id = st.text_input('Enter the YouTube Video ID:') | |
| if st.button('Start'): | |
| if openai_api_key and video_id: | |
| try: | |
| progress = st.progress(0) | |
| status_text = st.empty() | |
| status_text.text('Loading the transcript...') | |
| progress.progress(25) | |
| # Getting both the transcript and language_code | |
| transcript, language_code = get_transcript(video_id) | |
| status_text.text(f'Creating summary...') | |
| progress.progress(75) | |
| summary = summarize_with_langchain_and_openai(transcript, language_code, openai_api_key) | |
| status_text.text('Summary:') | |
| st.markdown(summary) | |
| progress.progress(100) | |
| except Exception as e: | |
| st.write(str(e)) | |
| else: | |
| st.write('Please enter both your OpenAI API Key and YouTube Video ID.') | |
| if __name__ == "__main__": | |
| main() | |