sense / rss.py
vanishing-grad's picture
Create the initial version
4e6bed1
import feedparser
import requests
import streamlit as st
from tqdm import tqdm
from pathlib import Path
@st.cache
def get_matadata():
METADATA_MAP = {}
transcript_path = Path("making_sense_transcripts/")
MAKING_SENSE_RSS = "https://wakingup.libsyn.com/rss"
response = requests.get(MAKING_SENSE_RSS)
rss_feed = feedparser.parse(response.content)
podcast_thumbnail = rss_feed.feed["image"]["href"]
for episode in tqdm(rss_feed.entries, total=len(rss_feed.entries)):
title = episode["title"]
desc = episode["description"]
episode_id = episode["id"]
thumbnail = episode.get("image", {}).get("href", None)
if not thumbnail:
thumbnail = podcast_thumbnail
if "/" not in episode_id:
episode_path = transcript_path / (episode_id + ".txt")
else:
episode_path = transcript_path / (episode_id.replace("/", "_") + ".txt")
episode_path = str(episode_path)
METADATA_MAP[episode_path] = {
"title": title,
"episode_id": episode_id,
"thumbnail": thumbnail,
}
return METADATA_MAP