|
|
import os |
|
|
import re |
|
|
|
|
|
def read_directory_files_recursively(directory_path: str, skip_dirs:list, skip_pattern: str = None) -> dict: |
|
|
""" |
|
|
Recursively reads all files in a directory and its subdirectories. |
|
|
Skips files and directories that match the given regex pattern or are in skip_dirs. |
|
|
|
|
|
Args: |
|
|
directory_path (str): The path to start reading files from. |
|
|
skip_dirs (list): List of directory names to skip. |
|
|
skip_pattern (str, optional): Regex pattern to skip files/directories. |
|
|
|
|
|
Returns: |
|
|
dict: A dictionary where keys are relative file paths and values are file contents. |
|
|
""" |
|
|
file_contents = {} |
|
|
compiled_pattern = re.compile(skip_pattern) if skip_pattern else None |
|
|
|
|
|
for root, dirs, files in os.walk(directory_path): |
|
|
|
|
|
dirs[:] = [d for d in dirs if d not in skip_dirs and not (compiled_pattern and compiled_pattern.search(os.path.join(root, d)))] |
|
|
|
|
|
for file in files: |
|
|
full_path = os.path.join(root, file) |
|
|
relative_path = os.path.relpath(full_path, directory_path) |
|
|
|
|
|
|
|
|
if compiled_pattern and compiled_pattern.search(relative_path): |
|
|
continue |
|
|
|
|
|
try: |
|
|
with open(full_path, 'r', encoding='utf-8') as f: |
|
|
file_contents[relative_path] = f.read() |
|
|
except (UnicodeDecodeError, OSError) as e: |
|
|
print(f'Failed to read {relative_path}: {e}') |
|
|
continue |
|
|
|
|
|
|
|
|
return file_contents |
|
|
|
|
|
|
|
|
|
|
|
def get_language_from_filename(file_name:str) -> str: |
|
|
file_extension = file_name.split('.')[-1] |
|
|
extension_mapping = { |
|
|
'c': 'c', |
|
|
'h': 'c', |
|
|
'cpp': 'c++', |
|
|
'cc': 'c++', |
|
|
'cxx': 'c++', |
|
|
'hpp': 'c++', |
|
|
'hh': 'c++', |
|
|
'hxx': 'c++', |
|
|
'go': 'go', |
|
|
'java': 'java', |
|
|
'py': 'python', |
|
|
'pyc': 'python', |
|
|
'pyw':'python', |
|
|
'js': 'javascript', |
|
|
'mjs': 'javascript', |
|
|
'cjs': 'javascript', |
|
|
} |
|
|
|
|
|
return extension_mapping.get(file_extension, file_extension) |