Spaces:
Sleeping
Sleeping
| import argparse | |
| import os | |
| import sys | |
| from dotenv import load_dotenv | |
| from streamlit.web import cli as stcli | |
| from utils.process import process | |
| # Load environment variables from a .env file (containing OPENAI_API_KEY) | |
| load_dotenv() | |
| def extract_repo_name(repo_url): | |
| """Extract the repository name from the given repository URL.""" | |
| repo_name = repo_url.split("/")[-1].replace(".git", "") | |
| return repo_name | |
| def process_repo(args): | |
| """ | |
| Process the git repository by cloning it, filtering files, and | |
| creating an Activeloop dataset with the contents. | |
| """ | |
| repo_name = extract_repo_name(args.repo_url) | |
| activeloop_username = os.environ.get("ACTIVELOOP_USERNAME") | |
| if not args.activeloop_dataset_name: | |
| args.activeloop_dataset_path = f"hub://{activeloop_username}/{repo_name}" | |
| else: | |
| args.activeloop_dataset_path = ( | |
| f"hub://{activeloop_username}/{args.activeloop_dataset_name}" | |
| ) | |
| process( | |
| args.repo_url, | |
| args.include_file_extensions, | |
| args.activeloop_dataset_path, | |
| args.repo_destination, | |
| ) | |
| def chat(args): | |
| """ | |
| Start the Streamlit chat application using the specified Activeloop dataset. | |
| """ | |
| activeloop_username = os.environ.get("ACTIVELOOP_USERNAME") | |
| args.activeloop_dataset_path = ( | |
| f"hub://{activeloop_username}/{args.activeloop_dataset_name}" | |
| ) | |
| sys.argv = [ | |
| "streamlit", | |
| "run", | |
| "src/utils/chat.py", | |
| "--", | |
| f"--activeloop_dataset_path={args.activeloop_dataset_path}", | |
| ] | |
| sys.exit(stcli.main()) | |
| def main(): | |
| """Define and parse CLI arguments, then execute the appropriate subcommand.""" | |
| parser = argparse.ArgumentParser(description="Chat with a git repository") | |
| subparsers = parser.add_subparsers(dest="command") | |
| # Process subcommand | |
| process_parser = subparsers.add_parser("process", help="Process a git repository") | |
| process_parser.add_argument( | |
| "--repo-url", required=True, help="The git repository URL" | |
| ) | |
| process_parser.add_argument( | |
| "--include-file-extensions", | |
| nargs="+", | |
| default=None, | |
| help=( | |
| "Exclude all files not matching these extensions. Example:" | |
| " --include-file-extensions .py .js .ts .html .css .md .txt" | |
| ), | |
| ) | |
| process_parser.add_argument( | |
| "--activeloop-dataset-name", | |
| help=( | |
| "The name for the Activeloop dataset. Defaults to the git repository name." | |
| ), | |
| ) | |
| process_parser.add_argument( | |
| "--repo-destination", | |
| default="repos", | |
| help="The destination to clone the repository. Defaults to 'repos'.", | |
| ) | |
| # Chat subcommand | |
| chat_parser = subparsers.add_parser("chat", help="Start the chat application") | |
| chat_parser.add_argument( | |
| "--activeloop-dataset-name", | |
| required=True, | |
| help="The name of one of your existing Activeloop datasets.", | |
| ) | |
| args = parser.parse_args() | |
| if args.command == "process": | |
| process_repo(args) | |
| elif args.command == "chat": | |
| chat(args) | |
| if __name__ == "__main__": | |
| main() |