Spaces:

shach1995
/

news_summarizer

Sleeping

App Files Files Community

Kota Takahashi commited on Jun 23, 2024

Commit

44bec05

1 Parent(s): fbd2a16

説明文を修正

Browse files

Files changed (3) hide show

cosine_similarity_calculator.py +9 -9
news_scraper.py +10 -8
tfidf_calculator.py +1 -1

cosine_similarity_calculator.py CHANGED Viewed

@@ -11,10 +11,10 @@ class CosineSimilarityCalculator:
         事前トレーニング済みのWord2Vecモデルをロード
         Parameters:
-        - なし。
         Returns:
-        - なし。
         """
         self.model = gensim.models.Word2Vec.load(CosineSimilarityCalculator.model_path)
@@ -32,27 +32,27 @@ class CosineSimilarityCalculator:
     def _calculate_cosine_similarity(self, embedding1, embedding2):
         """
-        コサイン類似度を計算
         Parameters:
-        - embedding1 (numpy.ndarray): 1つ目の埋め込みベクトル（2次元配列）
-        - embedding2 (numpy.ndarray): 2つ目の埋め込みベクトル（2次元配列）
         Returns:
-        - similarity (numpy.ndarray): コサイン類似度
         """
         return cosine_similarity(embedding1, embedding2)
     def calculate_similarity(self, search_word, article_keyword_list):
         """
-        指定された検索ワードと記事のキーワードリストの間のコサイン類似度を計算
         モデルにない単語の場合はエラーメッセージを出力しブレイクする
         Parameters:
         - search_word (str): 検索ワード
         - article_keyword_list (list): 記事のキーワードリスト
         Returns:
-        - similarities (dict): 記事キーワードとそれぞれの検索ワードのコサイン類似度を含むdictを作成。
                                モデルにない単語の場合はNoneを返す
         """
         # 検索ワードの埋め込みベクトルを取得
@@ -72,4 +72,4 @@ class CosineSimilarityCalculator:
                 similarities[keyword] = similarity[0][0]
             else:
                 similarities[keyword] = None
-        return similarities

         事前トレーニング済みのWord2Vecモデルをロード
         Parameters:
+        - なし
         Returns:
+        - なし
         """
         self.model = gensim.models.Word2Vec.load(CosineSimilarityCalculator.model_path)
     def _calculate_cosine_similarity(self, embedding1, embedding2):
         """
+        ｃos類似度を計算
         Parameters:
+        - embedding1 (numpy.ndarray): 1つ目の単語ベクトル（2次元配列）
+        - embedding2 (numpy.ndarray): 2つ目の単語ベクトル（2次元配列）
         Returns:
+        - similarity (numpy.ndarray): cos類似度
         """
         return cosine_similarity(embedding1, embedding2)
     def calculate_similarity(self, search_word, article_keyword_list):
         """
+        指定された検索ワードと記事のキーワードリストの間のcos類似度を計算
         モデルにない単語の場合はエラーメッセージを出力しブレイクする
         Parameters:
         - search_word (str): 検索ワード
         - article_keyword_list (list): 記事のキーワードリスト
         Returns:
+        - similarities (dict): 記事キーワードとそれぞれの検索ワードのcos類似度を含むdictを作成
                                モデルにない単語の場合はNoneを返す
         """
         # 検索ワードの埋め込みベクトルを取得
                 similarities[keyword] = similarity[0][0]
             else:
                 similarities[keyword] = None
+        return similarities

news_scraper.py CHANGED Viewed

@@ -7,19 +7,19 @@ from time import sleep
 class Scraper:
     def __init__(self):
         """
-        Scraperクラスを初期化し、requestsセッションを作成する。
         """
         self.session = requests.Session()
     def _fetch_content(self, url):
         """
-        指定されたURLのコンテンツを取得する。
         Parameters:
-        - url (str): 取得するウェブページのURL。
         Returns:
-        - content (bytes): 取得したコンテンツのバイトデータ。
         """
         response = self.session.get(url)
         response.raise_for_status()  # HTTPエラーが発生した場合は例外を投げる
@@ -27,13 +27,13 @@ class Scraper:
     def _parse_html(self, html):
         """
-        HTMLコンテンツをBeautifulSoupでパースする。
         Parameters:
-        - html (bytes): パースするHTMLコンテンツ。
         Returns:
-        - soup (BeautifulSoup): パースされたBeautifulSoupオブジェクト。
         """
         soup = BeautifulSoup(html, 'html.parser')
         return soup
@@ -44,7 +44,7 @@ class YahooNewsScraper(Scraper):
     def get_news_urls(self):
         """
-        Yahooニュースのトップページから最新ニュース記事のURLを取得する
         Parameters:
         - なし
@@ -129,3 +129,5 @@ class YahooNewsScraper(Scraper):
         sleep(1)  # サーバー負荷を避けるためにさらに1秒待機
         article_text = self.get_full_article_text(detail_url)
         return article_text, detail_url

 class Scraper:
     def __init__(self):
         """
+        Scraperクラスを初期化し、requestsセッションを作成
         """
         self.session = requests.Session()
     def _fetch_content(self, url):
         """
+        指定されたURLのコンテンツを取得する
         Parameters:
+        - url (str): 取得するウェブページのURL
         Returns:
+        - content (bytes): 取得したコンテンツのデータ
         """
         response = self.session.get(url)
         response.raise_for_status()  # HTTPエラーが発生した場合は例外を投げる
     def _parse_html(self, html):
         """
+        HTMLコンテンツをBeautifulSoupでパース
         Parameters:
+        - html (bytes): パースするHTMLコンテンツ
         Returns:
+        - soup (BeautifulSoup): パースされたBeautifulSoupオブジェクト
         """
         soup = BeautifulSoup(html, 'html.parser')
         return soup
     def get_news_urls(self):
         """
+        Yahooニュースのトップページから最新ニュース記事のURLを取得
         Parameters:
         - なし
         sleep(1)  # サーバー負荷を避けるためにさらに1秒待機
         article_text = self.get_full_article_text(detail_url)
         return article_text, detail_url

tfidf_calculator.py CHANGED Viewed

@@ -5,7 +5,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 class JapaneseTextVectorizer:
     def __init__(self):
-        """。
         MeCabのTaggerとTF-IDFベクトライザーを初期化
         """
         self.mecab_tagger = MeCab.Tagger()

 class JapaneseTextVectorizer:
     def __init__(self):
+        """
         MeCabのTaggerとTF-IDFベクトライザーを初期化
         """
         self.mecab_tagger = MeCab.Tagger()