code-weaver / src /main.py
whitelotus0's picture
code weaver
fff1c68
import argparse
import os
import sys
from dotenv import load_dotenv
from streamlit.web import cli as stcli
from utils.process import process
# Load environment variables from a .env file (containing OPENAI_API_KEY)
load_dotenv()
def extract_repo_name(repo_url):
"""Extract the repository name from the given repository URL."""
repo_name = repo_url.split("/")[-1].replace(".git", "")
return repo_name
def process_repo(args):
"""
Process the git repository by cloning it, filtering files, and
creating an Activeloop dataset with the contents.
"""
repo_name = extract_repo_name(args.repo_url)
activeloop_username = os.environ.get("ACTIVELOOP_USERNAME")
if not args.activeloop_dataset_name:
args.activeloop_dataset_path = f"hub://{activeloop_username}/{repo_name}"
else:
args.activeloop_dataset_path = (
f"hub://{activeloop_username}/{args.activeloop_dataset_name}"
)
process(
args.repo_url,
args.include_file_extensions,
args.activeloop_dataset_path,
args.repo_destination,
)
def chat(args):
"""
Start the Streamlit chat application using the specified Activeloop dataset.
"""
activeloop_username = os.environ.get("ACTIVELOOP_USERNAME")
args.activeloop_dataset_path = (
f"hub://{activeloop_username}/{args.activeloop_dataset_name}"
)
sys.argv = [
"streamlit",
"run",
"src/utils/chat.py",
"--",
f"--activeloop_dataset_path={args.activeloop_dataset_path}",
]
sys.exit(stcli.main())
def main():
"""Define and parse CLI arguments, then execute the appropriate subcommand."""
parser = argparse.ArgumentParser(description="Chat with a git repository")
subparsers = parser.add_subparsers(dest="command")
# Process subcommand
process_parser = subparsers.add_parser("process", help="Process a git repository")
process_parser.add_argument(
"--repo-url", required=True, help="The git repository URL"
)
process_parser.add_argument(
"--include-file-extensions",
nargs="+",
default=None,
help=(
"Exclude all files not matching these extensions. Example:"
" --include-file-extensions .py .js .ts .html .css .md .txt"
),
)
process_parser.add_argument(
"--activeloop-dataset-name",
help=(
"The name for the Activeloop dataset. Defaults to the git repository name."
),
)
process_parser.add_argument(
"--repo-destination",
default="repos",
help="The destination to clone the repository. Defaults to 'repos'.",
)
# Chat subcommand
chat_parser = subparsers.add_parser("chat", help="Start the chat application")
chat_parser.add_argument(
"--activeloop-dataset-name",
required=True,
help="The name of one of your existing Activeloop datasets.",
)
args = parser.parse_args()
if args.command == "process":
process_repo(args)
elif args.command == "chat":
chat(args)
if __name__ == "__main__":
main()