presum / trends_hespress.py
Python-proje's picture
Upload 12 files
55af729
from bs4 import BeautifulSoup
import requests
import streamlit as st
def get_trends_image(page):
HEADERS = {
'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
page = requests.get(
page,
headers=HEADERS)
src = page.content # variable to store page content
soup = BeautifulSoup(src, "html.parser") # beautify code
# print(soup)
image = soup.find("img") # find all divs where exists class...
image_link=image.get('src')
return image_link
@st.cache_data(ttl=7200) # cache clears after 7200s
def trends_hespress():
HEADERS = {
'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
page = requests.get(
'https://en.hespress.com/',
headers=HEADERS)
src = page.content # variable to store page content
soup = BeautifulSoup(src, "html.parser") # beautify code
# print(soup)
page = soup.find("div", {'left-side heading-box col'}) # find all divs where exists class...
all_trends = page.find_all("a", {'wpp-post-title'}) # get all a tags
article_text = ""
trends_list=[]
for x in all_trends:
trend_link = x.get('href')
trend_title = x.text
trend_image=get_trends_image(trend_link)
subdict={ 'title':trend_title,
'image_link':trend_image,
'article_link':trend_link}
trends_list.append(subdict)
extra_page = soup.find("div", {'group-item col-sm-12 col-md-6 col-xl-4 category-society bloc_col'})
extra_trend=extra_page.find("div", {'ratio-medium'})
image = extra_trend.find("img")
trend_image=image.get('src')
trend_title=image.get('alt')
trend_link = extra_page.find_all('a')
trend_link = trend_link[1].get('href')
trend_4={ 'title':trend_title,
'image_link':trend_image,
'article_link':trend_link}
trends_list.append(trend_4)
return trends_list