repo2txt

Sleeping

App Files Files Community

repo2txt / main.py

blaise-tk

v0.0.1

6f74dd4 11 months ago

raw

history blame contribute delete

4.27 kB

	import os
	import shutil
	import subprocess
	import argparse


	def clone_repo(repo_url, clone_dir):
	"""Clone the GitHub repository into the specified directory."""
	subprocess.run(["git", "clone", repo_url, clone_dir], check=True)


	def extract_repo_name_from_url(repo_url):
	"""Extract the repository name from the GitHub URL."""
	repo_name = repo_url.rstrip("/").split("/")[-1]
	return repo_name.split(".")[0] if "." in repo_name else repo_name


	def get_directory_structure(root_dir):
	"""Get the directory structure in a tree format, ignoring .git directory."""
	lines = []
	for root, dirs, files in os.walk(root_dir):
	if ".git" in dirs:
	dirs.remove(".git") # Avoid walking into .git directory

	level = root.replace(root_dir, "").count(os.sep)
	indent = " " * 4 * level
	lines.append(f"{indent}├── {os.path.basename(root)}/")

	subindent = " " * 4 * (level + 1)
	for file in files:
	lines.append(f"{subindent}├── {file}")
	return "\n".join(lines)


	def read_file_contents(file_path):
	"""Read the contents of a file, ignore if in .git directory."""
	if ".git" in file_path:
	return "[Ignored .git directory]"

	try:
	with open(file_path, "r", encoding="utf-8") as file:
	return file.read()
	except (UnicodeDecodeError, OSError) as e:
	return f"[Error reading file: {e}]"


	def extract_all_files_contents(root_dir):
	"""Extract contents of all files in the directory, ignoring .git directory."""
	file_contents = {}
	for root, _, files in os.walk(root_dir):
	if ".git" in root:
	continue

	for file_name in files:
	file_path = os.path.join(root, file_name)
	relative_path = os.path.relpath(file_path, root_dir)
	file_contents[relative_path] = read_file_contents(file_path)
	return file_contents


	def count_tokens(text):
	"""Count the number of tokens in a given text."""
	return len(text.split())


	def write_output_file(output_file, directory_structure, file_contents):
	"""Write the directory structure and file contents to the output file with metadata."""
	total_lines = directory_structure.count("\n") + sum(
	content.count("\n") for content in file_contents.values()
	)
	total_chars = len(directory_structure) + sum(
	len(content) for content in file_contents.values()
	)

	with open(output_file, "w", encoding="utf-8") as file:
	file.write(f"Lines: {total_lines}\nCharacters: {total_chars}\n\n")
	file.write("Directory Structure:\n```\n")
	file.write(directory_structure)
	file.write("\n```\n")

	for file_path, content in file_contents.items():
	file.write(f"\nContents of {file_path}:\n```\n")
	file.write(content)
	file.write("\n```\n")


	def cleanup(clone_dir):
	"""Remove the cloned repository directory with error handling."""
	if os.path.exists(clone_dir):
	try:
	shutil.rmtree(clone_dir, onerror=handle_remove_error)
	except Exception as e:
	print(f"An error occurred while cleaning up: {e}")


	def handle_remove_error(func, path, exc_info):
	"""Error handler for shutil.rmtree to handle permission errors."""
	import stat

	if isinstance(exc_info[1], PermissionError):
	os.chmod(path, stat.S_IWRITE)
	func(path)
	else:
	print(f"Error removing {path}: {exc_info[1]}")


	def main():
	parser = argparse.ArgumentParser(
	description="Generate a text file with repository structure and all file contents."
	)
	parser.add_argument("repo_url", help="URL of the GitHub repository to process.")
	parser.add_argument("output_file", help="Path to the output text file.")
	args = parser.parse_args()

	repo_url = args.repo_url
	output_file = args.output_file

	repo_name = extract_repo_name_from_url(repo_url)
	clone_dir = repo_name

	clone_repo(repo_url, clone_dir)

	directory_structure = get_directory_structure(clone_dir)
	file_contents = extract_all_files_contents(clone_dir)

	write_output_file(output_file, directory_structure, file_contents)
	cleanup(clone_dir)


	if __name__ == "__main__":
	main()