File size: 4,218 Bytes
9228cad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6fd213
9228cad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import streamlit as st
import requests
from transformers import T5Tokenizer, T5ForConditionalGeneration
from collections import OrderedDict

# Load Models
@st.cache_resource
def load_models():
    tokenizer = T5Tokenizer.from_pretrained("t5-small")
    model = T5ForConditionalGeneration.from_pretrained("t5-small")
    return tokenizer, model

t5_tokenizer, t5_model = load_models()

# API Key for NewsAPI
NEWS_API_KEY = "66db8e116ae24c49beef53e8b879600a"

# Fetch news articles based on the user query
@st.cache_data
def fetch_news(user_query):
    NEWS_API_URL = "https://newsapi.org/v2/everything"
    params = {
        'q': user_query,
        'apiKey': NEWS_API_KEY,
        'language': 'en',
        'pageSize': 10,  # Fetch 10 articles
        'sortBy': 'relevance',
    }
    response = requests.get(NEWS_API_URL, params=params)
    if response.status_code == 200:
        articles = response.json().get('articles', [])
        return [
            {
                'title': article.get('title', 'No Title'),
                'description': article.get('description', 'No Description')
            }
            for article in articles if article.get('description')
        ]
    return []

# Summarize articles
def summarize_articles(articles):
    summaries = []
    for article in articles:
        input_text = f"summarize: Title: {article['title']}. Description: {article['description']}"
        inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
        outputs = t5_model.generate(inputs, max_length=100, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
        summary = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
        summaries.append(remove_redundancy(summary))
    return summaries

# Remove redundancy in summaries
def remove_redundancy(summary):
    sentences = summary.split('. ')
    seen = OrderedDict()
    return '. '.join([seen.setdefault(s, s) for s in sentences if s not in seen])

# Generate catchy content based on all 10 summaries
def generate_catchy_content(summarized_content):
    combined_prompt = f"""
    Write a blog post based on these insights:
    {', '.join(summarized_content)}
    """
    inputs = t5_tokenizer.encode(combined_prompt, return_tensors="pt", max_length=512, truncation=True)
    outputs = t5_model.generate(inputs, max_length=300, length_penalty=2.0, num_beams=4, early_stopping=True)
    return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)

# Main App
st.title("News Summarizer & Blog Generator")
st.subheader("Enter a topic to fetch news, summarize and generate engaging content!")

# Query Input
user_query = st.text_input("Enter a query (e.g., 'AI trends', 'Climate change impact'):")

if st.button("Fetch, Summarize and Generate"):
    if user_query:
        st.info(f"Fetching articles related to: {user_query}")
        with st.spinner("Fetching news articles..."):
            articles = fetch_news(user_query)
            if articles:
                st.success(f"Fetched {len(articles)} articles!")
                
                # Display only the first 4 articles
                st.subheader("Fetched Articles")
                for i, article in enumerate(articles[:4], 1):
                    st.write(f"**Article {i}:** {article['title']}")
                    st.write(f"*Description:* {article['description']}")

                # Summarize All Articles
                st.info("Summarizing articles...")
                summaries = summarize_articles(articles)  # Summarize all 10 articles
                st.subheader("Summarized Articles")
                for i, summary in enumerate(summaries[:4], 1):  # Display summaries for first 4 articles
                    st.write(f"**Summary {i}:** {summary}")

                # Generate Blog Post
                st.info("Generating blog post...")
                generated_content = generate_catchy_content(summaries)  # Use all 10 summaries
                st.subheader("Generated Blog Post")
                st.write(generated_content)

            else:
                st.warning("No articles found. Try a different query.")
    else:
        st.error("Please enter a query to proceed!")