cowrycode commited on
Commit
4793736
·
verified ·
1 Parent(s): 680755c

Update youtube_tool.py

Browse files
Files changed (1) hide show
  1. youtube_tool.py +42 -0
youtube_tool.py CHANGED
@@ -5,6 +5,48 @@ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, No
5
  #loader = YoutubeTranscriptReader()
6
  yt_ap = YouTubeTranscriptApi()
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def extract_video_id(url: str) -> str:
9
  """
10
  Handles typical YouTube URLs:
 
5
  #loader = YoutubeTranscriptReader()
6
  yt_ap = YouTubeTranscriptApi()
7
 
8
+
9
+ def extract_video_id(url: str) -> str:
10
+ """
11
+ Extracts the video ID from a YouTube URL.
12
+ Args:
13
+ url (str): The full YouTube video URL.
14
+ Returns:
15
+ str: The extracted video ID or raises ValueError.
16
+ """
17
+ patterns = [
18
+ r"youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})",
19
+ r"youtu\.be/([a-zA-Z0-9_-]{11})"
20
+ ]
21
+ for pattern in patterns:
22
+ match = re.search(pattern, url)
23
+ if match:
24
+ return match.group(1)
25
+ raise ValueError("Invalid YouTube URL or unable to extract video ID.")
26
+
27
+ def get_youtube_transcript(url: str) -> str:
28
+ """
29
+ Fetches the transcript text for a given YouTube video.
30
+ Args:
31
+ url (str): The YouTube video URL.
32
+ Returns:
33
+ str: Combined transcript text or an error message.
34
+ """
35
+ try:
36
+ video_id = extract_video_id(url)
37
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
38
+ full_text = " ".join([entry["text"] for entry in transcript_list])
39
+ return full_text.strip()[:2000] # Truncate to 2000 chars to prevent token overflow
40
+ except TranscriptsDisabled:
41
+ return "This video has transcripts disabled."
42
+ except NoTranscriptFound:
43
+ return "No transcript was found for this video."
44
+ except Exception as e:
45
+ return f"Transcript error: {str(e)}"
46
+
47
+ youtube_tool = FunctionTool.from_defaults(get_youtube_transcript)
48
+
49
+
50
  def extract_video_id(url: str) -> str:
51
  """
52
  Handles typical YouTube URLs: