Spaces:
Runtime error
Runtime error
import re | |
import json | |
import streamlit as st | |
from scraping_needs import get_content | |
# cache clears after 7800s | |
def trends_mwn(user_agent): | |
#page uses javascript | |
str_main_list = get_content("https://www.moroccoworldnews.com/home/post/zheadlines",user_agent,1) # we want text not html | |
main_list = json.loads(str_main_list) | |
cards_content = [] | |
for card in main_list: | |
content_dict = {} | |
content_dict["image_link"] = card["thumb"] | |
content_dict["title"] = card["post_title"] | |
try: | |
tmp = card["tsize"] | |
except KeyError: | |
try: | |
tmp = card["msize"] | |
except KeyError: | |
tmp = card["lsize"] | |
year = re.search('20\d\d(?=[\\\/])',tmp).group() | |
month = re.search('(?<=[\\\/])\d{1,2}(?=[\\\/])',tmp).group() | |
content_dict["article_link"] = "https://www.moroccoworldnews.com/" + str(year) + "/" + str(month) + "/" + str(card["ID"]) + "/" + card["post_name"] | |
cards_content.append(content_dict) | |
return cards_content | |