|
import os |
|
import shutil |
|
import subprocess |
|
import argparse |
|
|
|
|
|
def clone_repo(repo_url, clone_dir): |
|
"""Clone the GitHub repository into the specified directory.""" |
|
subprocess.run(["git", "clone", repo_url, clone_dir], check=True) |
|
|
|
|
|
def extract_repo_name_from_url(repo_url): |
|
"""Extract the repository name from the GitHub URL.""" |
|
repo_name = repo_url.rstrip("/").split("/")[-1] |
|
return repo_name.split(".")[0] if "." in repo_name else repo_name |
|
|
|
|
|
def get_directory_structure(root_dir): |
|
"""Get the directory structure in a tree format, ignoring .git directory.""" |
|
lines = [] |
|
for root, dirs, files in os.walk(root_dir): |
|
if ".git" in dirs: |
|
dirs.remove(".git") |
|
|
|
level = root.replace(root_dir, "").count(os.sep) |
|
indent = " " * 4 * level |
|
lines.append(f"{indent}βββ {os.path.basename(root)}/") |
|
|
|
subindent = " " * 4 * (level + 1) |
|
for file in files: |
|
lines.append(f"{subindent}βββ {file}") |
|
return "\n".join(lines) |
|
|
|
|
|
def read_file_contents(file_path): |
|
"""Read the contents of a file, ignore if in .git directory.""" |
|
if ".git" in file_path: |
|
return "[Ignored .git directory]" |
|
|
|
try: |
|
with open(file_path, "r", encoding="utf-8") as file: |
|
return file.read() |
|
except (UnicodeDecodeError, OSError) as e: |
|
return f"[Error reading file: {e}]" |
|
|
|
|
|
def extract_all_files_contents(root_dir): |
|
"""Extract contents of all files in the directory, ignoring .git directory.""" |
|
file_contents = {} |
|
for root, _, files in os.walk(root_dir): |
|
if ".git" in root: |
|
continue |
|
|
|
for file_name in files: |
|
file_path = os.path.join(root, file_name) |
|
relative_path = os.path.relpath(file_path, root_dir) |
|
file_contents[relative_path] = read_file_contents(file_path) |
|
return file_contents |
|
|
|
|
|
def count_tokens(text): |
|
"""Count the number of tokens in a given text.""" |
|
return len(text.split()) |
|
|
|
|
|
def write_output_file(output_file, directory_structure, file_contents): |
|
"""Write the directory structure and file contents to the output file with metadata.""" |
|
total_lines = directory_structure.count("\n") + sum( |
|
content.count("\n") for content in file_contents.values() |
|
) |
|
total_chars = len(directory_structure) + sum( |
|
len(content) for content in file_contents.values() |
|
) |
|
|
|
with open(output_file, "w", encoding="utf-8") as file: |
|
file.write(f"Lines: {total_lines}\nCharacters: {total_chars}\n\n") |
|
file.write("Directory Structure:\n```\n") |
|
file.write(directory_structure) |
|
file.write("\n```\n") |
|
|
|
for file_path, content in file_contents.items(): |
|
file.write(f"\nContents of {file_path}:\n```\n") |
|
file.write(content) |
|
file.write("\n```\n") |
|
|
|
|
|
def cleanup(clone_dir): |
|
"""Remove the cloned repository directory with error handling.""" |
|
if os.path.exists(clone_dir): |
|
try: |
|
shutil.rmtree(clone_dir, onerror=handle_remove_error) |
|
except Exception as e: |
|
print(f"An error occurred while cleaning up: {e}") |
|
|
|
|
|
def handle_remove_error(func, path, exc_info): |
|
"""Error handler for shutil.rmtree to handle permission errors.""" |
|
import stat |
|
|
|
if isinstance(exc_info[1], PermissionError): |
|
os.chmod(path, stat.S_IWRITE) |
|
func(path) |
|
else: |
|
print(f"Error removing {path}: {exc_info[1]}") |
|
|
|
|
|
def main(): |
|
parser = argparse.ArgumentParser( |
|
description="Generate a text file with repository structure and all file contents." |
|
) |
|
parser.add_argument("repo_url", help="URL of the GitHub repository to process.") |
|
parser.add_argument("output_file", help="Path to the output text file.") |
|
args = parser.parse_args() |
|
|
|
repo_url = args.repo_url |
|
output_file = args.output_file |
|
|
|
repo_name = extract_repo_name_from_url(repo_url) |
|
clone_dir = repo_name |
|
|
|
clone_repo(repo_url, clone_dir) |
|
|
|
directory_structure = get_directory_structure(clone_dir) |
|
file_contents = extract_all_files_contents(clone_dir) |
|
|
|
write_output_file(output_file, directory_structure, file_contents) |
|
cleanup(clone_dir) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|