File size: 4,267 Bytes
6f74dd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
import shutil
import subprocess
import argparse


def clone_repo(repo_url, clone_dir):
    """Clone the GitHub repository into the specified directory."""
    subprocess.run(["git", "clone", repo_url, clone_dir], check=True)


def extract_repo_name_from_url(repo_url):
    """Extract the repository name from the GitHub URL."""
    repo_name = repo_url.rstrip("/").split("/")[-1]
    return repo_name.split(".")[0] if "." in repo_name else repo_name


def get_directory_structure(root_dir):
    """Get the directory structure in a tree format, ignoring .git directory."""
    lines = []
    for root, dirs, files in os.walk(root_dir):
        if ".git" in dirs:
            dirs.remove(".git")  # Avoid walking into .git directory

        level = root.replace(root_dir, "").count(os.sep)
        indent = " " * 4 * level
        lines.append(f"{indent}├── {os.path.basename(root)}/")

        subindent = " " * 4 * (level + 1)
        for file in files:
            lines.append(f"{subindent}├── {file}")
    return "\n".join(lines)


def read_file_contents(file_path):
    """Read the contents of a file, ignore if in .git directory."""
    if ".git" in file_path:
        return "[Ignored .git directory]"

    try:
        with open(file_path, "r", encoding="utf-8") as file:
            return file.read()
    except (UnicodeDecodeError, OSError) as e:
        return f"[Error reading file: {e}]"


def extract_all_files_contents(root_dir):
    """Extract contents of all files in the directory, ignoring .git directory."""
    file_contents = {}
    for root, _, files in os.walk(root_dir):
        if ".git" in root:
            continue

        for file_name in files:
            file_path = os.path.join(root, file_name)
            relative_path = os.path.relpath(file_path, root_dir)
            file_contents[relative_path] = read_file_contents(file_path)
    return file_contents


def count_tokens(text):
    """Count the number of tokens in a given text."""
    return len(text.split())


def write_output_file(output_file, directory_structure, file_contents):
    """Write the directory structure and file contents to the output file with metadata."""
    total_lines = directory_structure.count("\n") + sum(
        content.count("\n") for content in file_contents.values()
    )
    total_chars = len(directory_structure) + sum(
        len(content) for content in file_contents.values()
    )

    with open(output_file, "w", encoding="utf-8") as file:
        file.write(f"Lines: {total_lines}\nCharacters: {total_chars}\n\n")
        file.write("Directory Structure:\n```\n")
        file.write(directory_structure)
        file.write("\n```\n")

        for file_path, content in file_contents.items():
            file.write(f"\nContents of {file_path}:\n```\n")
            file.write(content)
            file.write("\n```\n")


def cleanup(clone_dir):
    """Remove the cloned repository directory with error handling."""
    if os.path.exists(clone_dir):
        try:
            shutil.rmtree(clone_dir, onerror=handle_remove_error)
        except Exception as e:
            print(f"An error occurred while cleaning up: {e}")


def handle_remove_error(func, path, exc_info):
    """Error handler for shutil.rmtree to handle permission errors."""
    import stat

    if isinstance(exc_info[1], PermissionError):
        os.chmod(path, stat.S_IWRITE)
        func(path)
    else:
        print(f"Error removing {path}: {exc_info[1]}")


def main():
    parser = argparse.ArgumentParser(
        description="Generate a text file with repository structure and all file contents."
    )
    parser.add_argument("repo_url", help="URL of the GitHub repository to process.")
    parser.add_argument("output_file", help="Path to the output text file.")
    args = parser.parse_args()

    repo_url = args.repo_url
    output_file = args.output_file

    repo_name = extract_repo_name_from_url(repo_url)
    clone_dir = repo_name

    clone_repo(repo_url, clone_dir)

    directory_structure = get_directory_structure(clone_dir)
    file_contents = extract_all_files_contents(clone_dir)

    write_output_file(output_file, directory_structure, file_contents)
    cleanup(clone_dir)


if __name__ == "__main__":
    main()