Spaces:
Sleeping
Sleeping
import feedparser | |
import requests | |
import streamlit as st | |
from tqdm import tqdm | |
from pathlib import Path | |
def get_matadata(): | |
METADATA_MAP = {} | |
transcript_path = Path("making_sense_transcripts/") | |
MAKING_SENSE_RSS = "https://wakingup.libsyn.com/rss" | |
response = requests.get(MAKING_SENSE_RSS) | |
rss_feed = feedparser.parse(response.content) | |
podcast_thumbnail = rss_feed.feed["image"]["href"] | |
for episode in tqdm(rss_feed.entries, total=len(rss_feed.entries)): | |
title = episode["title"] | |
desc = episode["description"] | |
episode_id = episode["id"] | |
thumbnail = episode.get("image", {}).get("href", None) | |
if not thumbnail: | |
thumbnail = podcast_thumbnail | |
if "/" not in episode_id: | |
episode_path = transcript_path / (episode_id + ".txt") | |
else: | |
episode_path = transcript_path / (episode_id.replace("/", "_") + ".txt") | |
episode_path = str(episode_path) | |
METADATA_MAP[episode_path] = { | |
"title": title, | |
"episode_id": episode_id, | |
"thumbnail": thumbnail, | |
} | |
return METADATA_MAP | |