Spaces:
Sleeping
Sleeping
import streamlit as st | |
from news_scraper import YahooNewsScraper | |
from tidif_calclator import JapaneseTextVectorizer | |
from cosine_similarity_calculator import CosineSimilarityCalculator | |
from summerizer import TextSummarizer | |
st.title("ニュース検索アプリ") | |
# 初期化 | |
best_article_text = None | |
best_article_url = None | |
best_max_word = None | |
max_word = None | |
best_max_value = -1 # cos類似度は0以上なので、初期値を-1に設定 | |
num_news = 5 | |
# セッションステートの初期化 | |
if 'news_fetched' not in st.session_state: | |
st.session_state['news_fetched'] = False | |
st.session_state['article_text_list'] = [] | |
st.session_state['article_url_list'] = [] | |
if st.button('最新ニュース取得'): | |
with st.spinner('ニュースを取得中...'): | |
# yahooニュースをスクレイピング | |
scraper = YahooNewsScraper() | |
article_text_list = [] | |
article_url_list = [] | |
for i in range(num_news): | |
article_text, detail_url = scraper.scrape_article(i) | |
article_text_list.append(article_text) | |
article_url_list.append(detail_url) | |
st.session_state['news_fetched'] = True # 処理完了フラグを設定 | |
st.session_state['article_text_list'] = article_text_list # セッションステートに保存 | |
st.session_state['article_url_list'] = article_url_list | |
st.write("取得完了しました") | |
if st.session_state['news_fetched']: | |
search_word = st.text_input('名詞', placeholder='名詞を入力してください', max_chars=10, help='10文字以内の名詞') | |
if st.button('要約作成'): | |
article_text_list = st.session_state['article_text_list'] | |
article_url_list = st.session_state['article_url_list'] | |
for temp_article_text, temp_article_url in zip(article_text_list, article_url_list): | |
# TD-IDF値を計算 | |
vectorizer = JapaneseTextVectorizer() | |
tfidf_dict = vectorizer.fit_transform(temp_article_text) | |
# cos類似度を計算 | |
word_similarity = CosineSimilarityCalculator() | |
article_keyword_list = list(tfidf_dict.keys()) | |
result_word_similarity = word_similarity.calculate_similarity(search_word, article_keyword_list) | |
# None でない値のみを抽出 | |
filtered_data = {k: v for k, v in result_word_similarity.items() if v is not None} | |
# 最大値を持つキーとその値を取得 | |
if filtered_data: # filtered_dataが空でないことを確認 | |
max_word = max(filtered_data, key=filtered_data.get) | |
max_value = filtered_data[max_word] | |
# 最大値がこれまでの最大値より大きければ更新 | |
if max_value > best_max_value: | |
best_max_value = max_value | |
best_max_word = max_word | |
best_article_text = temp_article_text | |
best_article_url = temp_article_url | |
# テキストを要約 | |
summarizer = TextSummarizer() | |
summary_text = summarizer.summarize(best_article_text, max_length=30, min_length=20) | |
st.write(f'最も類似度が高いワードは「{best_max_word}」でした') | |
st.write(f'url:{best_article_url}') | |
st.text_area("要約:", summary_text, height=20) | |