Final_Assignment_Template

Sleeping

App Files Files Community

cowrycode commited on Nov 4

Commit

aea337a

verified ·

1 Parent(s): 81917a3

Create youtube_tool.py

Browse files

Files changed (1) hide show

youtube_tool.py +68 -0

youtube_tool.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from urllib.parse import parse_qs, urlparse
+from llama_index.core.tools import FunctionTool
+from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
+from llama_index.readers.youtube_transcript.utils import is_youtube_video
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
+loader = YoutubeTranscriptReader()
+yt_ap = YouTubeTranscriptApi()
+def extract_video_id(url: str) -> str:
+    """
+    Handles typical YouTube URLs:
+    - https://www.youtube.com/watch?v=VIDEO_ID
+    - https://youtu.be/VIDEO_ID
+    - with extra query params
+    """
+    parsed = urlparse(url)
+    if parsed.hostname in {"www.youtube.com", "youtube.com"}:
+        qs = parse_qs(parsed.query)
+        if "v" in qs:
+            return qs["v"][0]
+    # fallback for youtu.be or raw IDs
+    return parsed.path.lstrip("/")
+def fetch_youtube_transcript(video_url: str) -> str:
+    """
+    Fetch YouTube transcript text for the given URL.
+    In English language.
+    """
+    video_id = extract_video_id(video_url)
+    try:
+        # ✅ call on the class, NOT an instance
+        transcript_data = yt_ap.fetch(
+            video_id=video_id,
+            languages=["en"], #You can add as many languages, use yt_ap.list(video_id) function to get the langauges
+        )
+        #FROM TRANSCRIPT DATA, YOU CAN CREATE A OBJECT OF TRANSCRIPT SNIPET AND TIME
+        arr = [snippet.text for snippet in transcript_data]
+        return " ".join(arr)
+        #return " ".join(entry["text"] for entry in arr)
+    except Exception as e:
+        return f"Error fetching video details: {str(e)}"
+def fetch_youtube_transcript_snippets(video_url: str) -> str:
+    """
+    Fetch YouTube transcript snippets for the given URL.
+    It gets the start-time, end-time and duration of each snippet.
+    """
+    video_id = extract_video_id(video_url)
+    try:
+        # ✅ call on the class, NOT an instance
+        transcript_data = yt_ap.fetch(
+            video_id=video_id,
+            languages=["en"], #You can add as many languages, use yt_ap.list(video_id) function to get the langauges
+        )
+        arr = [
+            {"text": snippet.text, "duration": snippet.duration, "start": snippet.start}
+            for snippet in transcript_data
+        ]
+        return " ".join(f"Text: {entry['text']} Duration: {entry['duration']} StartTime: {entry['start']} <End>" for entry in arr)
+    except Exception as e:
+        return f"Error fetching video details: {str(e)}"
+youtube_transcript_tool = FunctionTool.from_defaults(fetch_youtube_transcript)
+youtube_transcript_snippet_tool = FunctionTool.from_defaults(fetch_youtube_transcript_snippets)