File size: 4,267 Bytes
6f74dd4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import os
import shutil
import subprocess
import argparse
def clone_repo(repo_url, clone_dir):
"""Clone the GitHub repository into the specified directory."""
subprocess.run(["git", "clone", repo_url, clone_dir], check=True)
def extract_repo_name_from_url(repo_url):
"""Extract the repository name from the GitHub URL."""
repo_name = repo_url.rstrip("/").split("/")[-1]
return repo_name.split(".")[0] if "." in repo_name else repo_name
def get_directory_structure(root_dir):
"""Get the directory structure in a tree format, ignoring .git directory."""
lines = []
for root, dirs, files in os.walk(root_dir):
if ".git" in dirs:
dirs.remove(".git") # Avoid walking into .git directory
level = root.replace(root_dir, "").count(os.sep)
indent = " " * 4 * level
lines.append(f"{indent}├── {os.path.basename(root)}/")
subindent = " " * 4 * (level + 1)
for file in files:
lines.append(f"{subindent}├── {file}")
return "\n".join(lines)
def read_file_contents(file_path):
"""Read the contents of a file, ignore if in .git directory."""
if ".git" in file_path:
return "[Ignored .git directory]"
try:
with open(file_path, "r", encoding="utf-8") as file:
return file.read()
except (UnicodeDecodeError, OSError) as e:
return f"[Error reading file: {e}]"
def extract_all_files_contents(root_dir):
"""Extract contents of all files in the directory, ignoring .git directory."""
file_contents = {}
for root, _, files in os.walk(root_dir):
if ".git" in root:
continue
for file_name in files:
file_path = os.path.join(root, file_name)
relative_path = os.path.relpath(file_path, root_dir)
file_contents[relative_path] = read_file_contents(file_path)
return file_contents
def count_tokens(text):
"""Count the number of tokens in a given text."""
return len(text.split())
def write_output_file(output_file, directory_structure, file_contents):
"""Write the directory structure and file contents to the output file with metadata."""
total_lines = directory_structure.count("\n") + sum(
content.count("\n") for content in file_contents.values()
)
total_chars = len(directory_structure) + sum(
len(content) for content in file_contents.values()
)
with open(output_file, "w", encoding="utf-8") as file:
file.write(f"Lines: {total_lines}\nCharacters: {total_chars}\n\n")
file.write("Directory Structure:\n```\n")
file.write(directory_structure)
file.write("\n```\n")
for file_path, content in file_contents.items():
file.write(f"\nContents of {file_path}:\n```\n")
file.write(content)
file.write("\n```\n")
def cleanup(clone_dir):
"""Remove the cloned repository directory with error handling."""
if os.path.exists(clone_dir):
try:
shutil.rmtree(clone_dir, onerror=handle_remove_error)
except Exception as e:
print(f"An error occurred while cleaning up: {e}")
def handle_remove_error(func, path, exc_info):
"""Error handler for shutil.rmtree to handle permission errors."""
import stat
if isinstance(exc_info[1], PermissionError):
os.chmod(path, stat.S_IWRITE)
func(path)
else:
print(f"Error removing {path}: {exc_info[1]}")
def main():
parser = argparse.ArgumentParser(
description="Generate a text file with repository structure and all file contents."
)
parser.add_argument("repo_url", help="URL of the GitHub repository to process.")
parser.add_argument("output_file", help="Path to the output text file.")
args = parser.parse_args()
repo_url = args.repo_url
output_file = args.output_file
repo_name = extract_repo_name_from_url(repo_url)
clone_dir = repo_name
clone_repo(repo_url, clone_dir)
directory_structure = get_directory_structure(clone_dir)
file_contents = extract_all_files_contents(clone_dir)
write_output_file(output_file, directory_structure, file_contents)
cleanup(clone_dir)
if __name__ == "__main__":
main()
|