In [11]:
import feedparser
import pandas as pd
from datetime import datetime, timedelta
import ssl
from bs4 import BeautifulSoup
import warnings

warnings.filterwarnings("ignore")

# SSL sertifika doğrulama sorununu çözmek için
if hasattr(ssl, '_create_unverified_context'):
    ssl._create_default_https_context = ssl._create_unverified_context

In [114]:
links = ["https://bair.berkeley.edu/blog/feed.xml",
        "https://feeds.feedburner.com/nvidiablog",
        "https://www.oreilly.com/radar/feed/",
        "https://www.microsoft.com/en-us/research/feed/",
        "https://www.sciencedaily.com/rss/computers_math/artificial_intelligence.xml",
        "https://research.facebook.com/feed/",
        "https://openai.com/news/rss.xml",
        "https://deepmind.google/blog/feed/basic/",
        ]

In [31]:
if hasattr(ssl, '_create_unverified_context'):
    ssl._create_default_https_context = ssl._create_unverified_context

feed = feedparser.parse("https://www.technologyreview.com/topic/artificial-intelligence/feed")

for i in feed.entries:
    print(i.content[0]["value"])


<p><em>Artificial intelligence</em> was barely a term in 1956, when top scientists from the field of computing arrived at Dartmouth College for a summer conference. The computer scientist John McCarthy had coined the phrase in the funding proposal for the event, a gathering to work through how to build machines that could use language, solve problems like humans, and improve themselves. But it was a good choice, one that captured the organizers’ founding premise: Any feature of human intelligence could “in principle be so precisely described that a machine can be made to simulate it.”&nbsp;</p>



<p>In their proposal, the group had listed several “aspects of the artificial intelligence problem.” The last item on their list, and in hindsight perhaps the most difficult, was building a machine that could exhibit creativity and originality.</p>



<p>At the time, psychologists were grappling with how to define and measure creativity in humans. The prevailing theory—that creativity was a p

In [115]:
def fetch_feed(links):
    entries = {"Title": [], "Link": [], "Published": [], "Description": []}
    
    try:
        # Linklerin üzerinden geçilir
        for link in links:
            feed = feedparser.parse(link)
            
            # Feeddeki her bir girişi işler
            for entry in feed.entries:
                entries["Title"].append(entry.get("title", "No Title"))
                entries["Link"].append(entry.get("link", "No Link"))
                entries["Published"].append(entry.get("published", "No Date"))
                entries["Description"].append(entry.get("description", "No Description"))
                
    except Exception as e:
        # Hata durumunda hata mesajını yazdır
        print(f"An error occurred: {e}")

    # DataFrame oluşturuluyor
    df = pd.DataFrame(entries)
    return df

In [116]:
df = fetch_feed(links)

In [117]:
def extract_and_clean_data(df):
    try:
        # Regex pattern for date extraction
        pattern = r'(\d{2} \w{3} \d{4})'

        # Apply regex to the 'Published' column and extract the date
        df['date'] = df['Published'].str.extract(pattern)

        # Convert the extracted date to datetime
        df['date'] = pd.to_datetime(df['date'], format='%d %b %Y')

        # Drop the original 'Published' column
        df.drop(columns=['Published'], inplace=True)

        # Get today's date and calculate the date 7 days ago
        today = datetime.now()
        seven_days_ago = today - timedelta(days=7)

        # Filter the rows within the last 7 days
        df_last_seven_days = df[(df['date'] >= seven_days_ago) & (df['date'] <= today)]

        # Sort by date in descending order
        df_last_seven_days.sort_values(by='date', ascending=False, inplace=True)

        # Function to clean HTML tags
        def clean_html(text):
            try:
                soup = BeautifulSoup(text, "html.parser")
                return soup.get_text()
            except Exception as e:
                print(f"Error cleaning HTML: {e}")
                return text

        # Apply the HTML cleaning function and shorten descriptions to 500 characters
        df_last_seven_days['Description'] = df_last_seven_days['Description'].apply(lambda x: clean_html(x)[:500])

        # Remove newline characters from the 'Description' column
        df_last_seven_days["Description"] = df_last_seven_days["Description"].str.replace("\n", "")

        return df_last_seven_days

    except Exception as e:
        print(f"An error occurred while processing the data: {e}")
        return pd.DataFrame()  # Return an empty DataFrame in case of error


In [118]:
df_last_seven_days = extract_and_clean_data(df)

In [119]:
df_last_seven_days


Unnamed: 0,Title,Link,Description,date
0,Defending against Prompt Injection with Struct...,http://bair.berkeley.edu/blog/2025/04/11/promp...,Recent advances in Large Language Models (LLMs...,2025-04-11
10,National Robotics Week — Latest Physical AI Re...,https://blogs.nvidia.com/blog/national-robotic...,Check back here throughout the week to learn t...,2025-04-11
11,Beyond CAD: How nTop Uses AI and Accelerated C...,https://blogs.nvidia.com/blog/ntop-computer-ai...,"As a teenager, Bradley Rothenberg was obsessed...",2025-04-11
12,Myth and Mystery Await: GeForce NOW Brings ‘So...,https://blogs.nvidia.com/blog/geforce-now-thur...,Get ready to explore the Deep South. South of ...,2025-04-10
13,(AI)ways a Cut Above: GeForce RTX 50 Series Ac...,https://blogs.nvidia.com/blog/studio-rtx-ai-ga...,"As AI-powered tools continue to evolve, NVIDIA...",2025-04-10
14,NVIDIA Celebrates Partners of the Year Advanci...,https://blogs.nvidia.com/blog/nvidia-partner-n...,NVIDIA this week recognized the contributions ...,2025-04-10
109,BrowseComp: a benchmark for browsing agents,https://openai.com/index/browsecomp,BrowseComp: a benchmark for browsing agents.,2025-04-10
28,Debug-gym: an environment for AI coding tools ...,https://www.microsoft.com/en-us/research/blog/...,Developers spend a lot of time debugging code....,2025-04-10
39,Hopping gives this tiny robot a leg up,https://www.sciencedaily.com/releases/2025/04/...,"A hopping, insect-sized robot can jump over ga...",2025-04-09
110,OpenAI Pioneers Program,https://openai.com/index/openai-pioneers-program,Advancing model performance and real world eva...,2025-04-09


In [112]:
df_last_seven_days.to_excel('news.xlsx', index=False)

In [121]:
links = {"https://bair.berkeley.edu/blog/feed.xml": "The Berkeley Artificial Intelligence Research Blog",
        "https://feeds.feedburner.com/nvidiablog": "NVDIA Blog",
        "https://www.microsoft.com/en-us/research/feed/": "Microsoft Research",
        "https://www.sciencedaily.com/rss/computers_math/artificial_intelligence.xml": "Science Daily",
        "https://research.facebook.com/feed/" : "META Research",
        "https://openai.com/news/rss.xml": "OpenAI News",
        "https://deepmind.google/blog/feed/basic/" : "Google DeepMind Blog<",
    }

In [129]:
for i,j in links.items():
    print(i,j)

https://bair.berkeley.edu/blog/feed.xml The Berkeley Artificial Intelligence Research Blog
https://feeds.feedburner.com/nvidiablog NVDIA Blog
https://www.microsoft.com/en-us/research/feed/ Microsoft Research
https://www.sciencedaily.com/rss/computers_math/artificial_intelligence.xml Science Daily
https://research.facebook.com/feed/ META Research
https://openai.com/news/rss.xml OpenAI News
https://deepmind.google/blog/feed/basic/ Google DeepMind Blog<


In [127]:
list(links.keys())

['https://bair.berkeley.edu/blog/feed.xml',
 'https://feeds.feedburner.com/nvidiablog',
 'https://www.microsoft.com/en-us/research/feed/',
 'https://www.sciencedaily.com/rss/computers_math/artificial_intelligence.xml',
 'https://research.facebook.com/feed/',
 'https://openai.com/news/rss.xml',
 'https://deepmind.google/blog/feed/basic/']

In [None]:
//*[@id="content"]/section[2]/div/div[1]/div[1]/div[1]/article/div[2]
//*[@id="content"]/section[2]/div/div[1]/div[1]/article[1]/div[2]
//*[@id="content"]/section[2]/div/div[1]/div[1]/article[2]/div[2]
//*[@id="content"]/section[2]/div/div[1]/div[1]/article[9]

In [40]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

URL = "https://www.deeplearning.ai/the-batch/"

def scrape_the_batch_articles():
    res = requests.get(URL)
    soup = BeautifulSoup(res.text, "html.parser")

    articles = soup.find_all("article")
    results = []

    for article in articles:
        # Link
        link_tag = article.find("a", href=True)
        link = "https://www.deeplearning.ai" + link_tag["href"] if link_tag else "#"

        # Title
        title_tag = article.find("h2")
        title = title_tag.get_text(strip=True) if title_tag else "No title"

        # Summary
        summary_tag = article.find("div", class_="text-sm")
        summary = summary_tag.get_text(strip=True) if summary_tag else ""

        # Date (based on div with specific class)
        date_tag = article.find("div", class_="text-slate-500")
        date_str = date_tag.get_text(strip=True) if date_tag else ""

        try:
            parsed_date = datetime.strptime(date_str, "%b %d, %Y")
        except Exception as e:
            parsed_date = None

        if parsed_date:
            results.append({
                "title": title,
                "date": parsed_date,
                "summary": summary,
                "link": link
            })

    # Filter for last 7 days
    seven_days_ago = datetime.now() - timedelta(days=7)
    recent_articles = [item for item in results if item["date"] >= seven_days_ago]

    return recent_articles


In [42]:
news = scrape_the_batch_articles()
news

[{'title': 'Google Unveils Gemini 2.5, MCP Gains Momentum, Behind Sam Altman’s Fall and Rise, LLMs That Understand Misspellings',
  'date': datetime.datetime(2025, 4, 16, 0, 0),
  'summary': 'The Batch AI News and Insights: I’ve noticed that many GenAI application projects put in automated evaluations (evals) of the system’s output probably later — and rely on humans to manually examine and judge outputs longer — than they should.',
  'link': 'https://www.deeplearning.ai/the-batch/tag/apr-16-2025/'}]

In [39]:
if __name__ == "__main__":
    news = scrape_the_batch_articles()
    print(f"Found {len(news)} recent articles.\n")
    for item in news:
        print(f"{item['date'].strftime('%Y-%m-%d')} — {item['title']}")
        print(item['summary'])
        print(item['link'])
        print("-" * 80)


Found 1 recent articles.

2025-04-16 — Google Unveils Gemini 2.5, MCP Gains Momentum, Behind Sam Altman’s Fall and Rise, LLMs That Understand Misspellings
The Batch AI News and Insights: I’ve noticed that many GenAI application projects put in automated evaluations (evals) of the system’s output probably later — and rely on humans to manually examine and judge outputs longer — than they should.
https://www.deeplearning.ai/the-batch/tag/apr-16-2025/
--------------------------------------------------------------------------------
