Spaces:

DeL-TaiseiOzaki
/

Repository_Scaner

Sleeping

App Files Files Community

DeL-TaiseiOzaki commited on Dec 22, 2024

Commit

8d28c3c

verified ·

1 Parent(s): 9970b5c

Update core/file_scanner.py

Browse files

Files changed (1) hide show

core/file_scanner.py +18 -8

core/file_scanner.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import chardet
 from pathlib import Path
 from typing import List, Optional, Set
@@ -23,6 +25,9 @@ class FileInfo:
 class FileScanner:
     EXCLUDED_DIRS = {
         '.git', '__pycache__', 'node_modules', 'venv',
         '.env', 'build', 'dist', 'target', 'bin', 'obj'
@@ -30,15 +35,16 @@ class FileScanner:
     def __init__(self, base_dir: Path, target_extensions: Set[str]):
         """
-        base_dir: 解析を開始するディレクトリ
-        target_extensions: 対象とする拡張子の集合（例: {'.py', '.js', '.md'}）
         """
         self.base_dir = base_dir
         self.target_extensions = {ext.lower() for ext in target_extensions}
     def _should_scan_file(self, path: Path) -> bool:
         """対象外フォルダ・拡張子を除外"""
-        # 除外フォルダ
         if any(excluded in path.parts for excluded in self.EXCLUDED_DIRS):
             return False
         # 拡張子チェック
@@ -47,27 +53,31 @@ class FileScanner:
         return False
     def _read_file_content(self, file_path: Path) -> (Optional[str], Optional[str]):
-        """ファイル内容を読み込み、エンコーディングを判定"""
         try:
-            # 先頭数KBを読み込み、エンコーディングを推定
             with file_path.open('rb') as rb:
                 raw_data = rb.read(4096)
                 detect_result = chardet.detect(raw_data)
                 encoding = detect_result['encoding'] if detect_result['confidence'] > 0.7 else 'utf-8'
-            # 推定したエンコーディングで読み込み
             try:
                 with file_path.open('r', encoding=encoding) as f:
                     return f.read(), encoding
             except UnicodeDecodeError:
-                # ダメなら cp932 を試す
                 with file_path.open('r', encoding='cp932') as f:
                     return f.read(), 'cp932'
         except Exception:
             return None, None
     def scan_files(self) -> List[FileInfo]:
-        """再帰的にファイルを探して、指定拡張子ならFileInfoリストにまとめる"""
         if not self.base_dir.exists():
             raise FileNotFoundError(f"指定ディレクトリが見つかりません: {self.base_dir}")

+# core/file_scanner.py
 import chardet
 from pathlib import Path
 from typing import List, Optional, Set
 class FileScanner:
+    """
+    指定された拡張子のファイルだけを再帰的に検索し、ファイル内容を読み込むクラス。
+    """
     EXCLUDED_DIRS = {
         '.git', '__pycache__', 'node_modules', 'venv',
         '.env', 'build', 'dist', 'target', 'bin', 'obj'
     def __init__(self, base_dir: Path, target_extensions: Set[str]):
         """
+        base_dir: 解析を開始するディレクトリ(Path)
+        target_extensions: 対象とする拡張子の集合 (例: {'.py', '.js', '.md'})
         """
         self.base_dir = base_dir
+        # 大文字・小文字のブレを吸収するために小文字化して保持
         self.target_extensions = {ext.lower() for ext in target_extensions}
     def _should_scan_file(self, path: Path) -> bool:
         """対象外フォルダ・拡張子を除外"""
+        # 除外フォルダ判定
         if any(excluded in path.parts for excluded in self.EXCLUDED_DIRS):
             return False
         # 拡張子チェック
         return False
     def _read_file_content(self, file_path: Path) -> (Optional[str], Optional[str]):
+        """
+        ファイル内容を読み込み、エンコーディングを判定して返す。
+        先頭4096バイトをchardetで解析し、失敗時はcp932も試す。
+        """
         try:
             with file_path.open('rb') as rb:
                 raw_data = rb.read(4096)
                 detect_result = chardet.detect(raw_data)
                 encoding = detect_result['encoding'] if detect_result['confidence'] > 0.7 else 'utf-8'
+            # 推定エンコーディングで読み込み
             try:
                 with file_path.open('r', encoding=encoding) as f:
                     return f.read(), encoding
             except UnicodeDecodeError:
+                # cp932 を再試行
                 with file_path.open('r', encoding='cp932') as f:
                     return f.read(), 'cp932'
         except Exception:
             return None, None
     def scan_files(self) -> List[FileInfo]:
+        """
+        再帰的にファイルを探して、指定拡張子だけをFileInfoオブジェクトのリストとして返す。
+        """
         if not self.base_dir.exists():
             raise FileNotFoundError(f"指定ディレクトリが見つかりません: {self.base_dir}")