news_summarizer / app.py
Kota Takahashi
ファーストコミット
a08962e
raw
history blame
3.36 kB
import streamlit as st
from news_scraper import YahooNewsScraper
from tidif_calclator import JapaneseTextVectorizer
from cosine_similarity_calculator import CosineSimilarityCalculator
from summerizer import TextSummarizer
st.title("ニュース検索アプリ")
# 初期化
best_article_text = None
best_article_url = None
best_max_word = None
max_word = None
best_max_value = -1 # cos類似度は0以上なので、初期値を-1に設定
num_news = 5
# セッションステートの初期化
if 'news_fetched' not in st.session_state:
st.session_state['news_fetched'] = False
st.session_state['article_text_list'] = []
st.session_state['article_url_list'] = []
if st.button('最新ニュース取得'):
with st.spinner('ニュースを取得中...'):
# yahooニュースをスクレイピング
scraper = YahooNewsScraper()
article_text_list = []
article_url_list = []
for i in range(num_news):
article_text, detail_url = scraper.scrape_article(i)
article_text_list.append(article_text)
article_url_list.append(detail_url)
st.session_state['news_fetched'] = True # 処理完了フラグを設定
st.session_state['article_text_list'] = article_text_list # セッションステートに保存
st.session_state['article_url_list'] = article_url_list
st.write("取得完了しました")
if st.session_state['news_fetched']:
search_word = st.text_input('名詞', placeholder='名詞を入力してください', max_chars=10, help='10文字以内の名詞')
if st.button('要約作成'):
article_text_list = st.session_state['article_text_list']
article_url_list = st.session_state['article_url_list']
for temp_article_text, temp_article_url in zip(article_text_list, article_url_list):
# TD-IDF値を計算
vectorizer = JapaneseTextVectorizer()
tfidf_dict = vectorizer.fit_transform(temp_article_text)
# cos類似度を計算
word_similarity = CosineSimilarityCalculator()
article_keyword_list = list(tfidf_dict.keys())
result_word_similarity = word_similarity.calculate_similarity(search_word, article_keyword_list)
# None でない値のみを抽出
filtered_data = {k: v for k, v in result_word_similarity.items() if v is not None}
# 最大値を持つキーとその値を取得
if filtered_data: # filtered_dataが空でないことを確認
max_word = max(filtered_data, key=filtered_data.get)
max_value = filtered_data[max_word]
# 最大値がこれまでの最大値より大きければ更新
if max_value > best_max_value:
best_max_value = max_value
best_max_word = max_word
best_article_text = temp_article_text
best_article_url = temp_article_url
# テキストを要約
summarizer = TextSummarizer()
summary_text = summarizer.summarize(best_article_text, max_length=30, min_length=20)
st.write(f'最も類似度が高いワードは「{best_max_word}」でした')
st.write(f'url:{best_article_url}')
st.text_area("要約:", summary_text, height=20)