.PHONY: show-params ############################################################################### # GLOBALS # ############################################################################### PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) PROJECT_NAME := $(shell basename $(subst -,_,$(PROJECT_DIR))) PROJECT_NAME_LOWER := $(shell echo $(PROJECT_NAME) | tr '[:upper:]' '[:lower:]') ENVIRONMENT_NAME = $(PROJECT_NAME) PYTHON_INTERPRETER = python3 PIP_INTERPRETER = pip PYTHON_VERSION = 3.9 PIP_VERSION = 22.3 # --- REQUIREMENTS-RELATED REQUIREMENTS_FILE = $(PROJECT_DIR)/requirements.txt REQUIREMENTS_FILE_TEMP = $(PROJECT_DIR)/requirements.tmp REQUIREMENTS_DEV_FILE = $(PROJECT_DIR)/requirements-dev.txt REQUIREMENTS_DEV_FILE_TEMP = $(PROJECT_DIR)/requirements-dev.tmp REQUIREMENTS_DEPLOYMENT_FILE = $(PROJECT_DIR)/requirements-deploy.txt REQUIREMENTS_DEPLOYMENT_FILE_TEMP = $(PROJECT_DIR)/requirements-deploy.tmp # --- PATHS TO PROJECT DIRECTOIRES DATA_DIRECTORY = $(PROJECT_DIR)/data SRC_DIRECTORY = $(PROJECT_DIR)/src API_DIRECTORY = $(SRC_DIRECTORY)/api DATA_PROCESSING_DIRECTORY = $(SRC_DIRECTORY)/data_processing TRAINING_DIRECTORY = $(SRC_DIRECTORY)/training # -- Docker-related # Variable used for turning on/off Docker Buildkit DOCKER_BUILDKIT_VALUE=1 LOCAL_DEVELOPMENT_DIR_PATH="$(PROJECT_DIR)/docker" # -- API-related INPUT_APP_PORT=8501 OUTPUT_APP_PORT=8501 API_WEBSERVER_URL="http://localhost:$(INPUT_APP_PORT)" # -- App-related APP_SERVER_PORT=7860 APP_WEBSERVER_URL="http://localhost:$(APP_SERVER_PORT)" # ----------------------------- Python-specific ------------------------------- # - Checking what type of python one is using # Anaconda ifeq (,$(shell which conda)) HAS_CONDA=False else HAS_CONDA=True # We need to specify the following commands in order to properly activate the # Anaconda environment. SHELL=/bin/bash # Note that the extra activate is needed to ensure that the activate floats env to the front of PATH CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda activate ; conda activate CONDA_DEACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda deactivate ; conda deactivate endif # - Pyenv ifeq (,$(shell which pyenv)) HAS_PYENV=False else HAS_PYENV=True endif ############################################################################### # VARIABLES FOR COMMANDS # ############################################################################### ## Show the set of input parameters show-params: @ printf "\n-------- GENERAL ---------------\n" @ echo "PROJECT_DIR: $(PROJECT_DIR)" @ echo "PROJECT_NAME: $(PROJECT_NAME)" @ echo "LOCAL_DEVELOPMENT_DIR_PATH: $(LOCAL_DEVELOPMENT_DIR_PATH)" @ echo "ENVIRONMENT_NAME: $(ENVIRONMENT_NAME)" @ echo "PYTHON_INTERPRETER: $(PYTHON_INTERPRETER)" @ echo "PYTHON_VERSION: $(PYTHON_VERSION)" @ echo "PIP_VERSION: $(PIP_VERSION)" @ echo "REQUIREMENTS_FILE: $(REQUIREMENTS_FILE)" @ echo "REQUIREMENTS_FILE_TEMP: $(REQUIREMENTS_FILE_TEMP)" @ echo "REQUIREMENTS_DEV_FILE: $(REQUIREMENTS_DEV_FILE)" @ echo "REQUIREMENTS_DEV_FILE_TEMP: $(REQUIREMENTS_DEV_FILE_TEMP)" @ echo "REQUIREMENTS_DEPLOYMENT_FILE: $(REQUIREMENTS_DEPLOYMENT_FILE)" @ echo "REQUIREMENTS_DEPLOYMENT_FILE_TEMP: $(REQUIREMENTS_DEPLOYMENT_FILE_TEMP)" @ printf "\n-------- DOCKER ---------------\n" @ echo "DOCKER_BUILDKIT_VALUE: $(DOCKER_BUILDKIT_VALUE)" @ printf "\n-------- PYTHON ---------------\n" @ echo "HAS_CONDA: $(HAS_CONDA)" @ echo "HAS_PYENV: $(HAS_PYENV)" @ printf "\n-------- LOCAL DEVELOPMENT ---------------\n" @ echo "LOCAL_DEV_DOCKER_PROJECT_NAME: $(LOCAL_DEV_DOCKER_PROJECT_NAME)" @ echo "LOCAL_DEV_SERVICE_NAME: $(LOCAL_DEV_SERVICE_NAME)" @ printf "\n-------- API ---------------\n" @ echo "APP_PORT: $(APP_PORT)" @ echo "APP_WEBSERVER_URL: $(APP_WEBSERVER_URL)" @ echo "API_SERVICE_NAME: $(API_SERVICE_NAME)" @ echo "API_DOCKER_PROJECT_NAME: $(API_DOCKER_PROJECT_NAME)" @ printf "\n-----------------------\n" ## Initialize the repository for code development init: clean create-envrc delete-environment create-environment ifeq (True,$(HAS_CONDA)) @ ($(CONDA_ACTIVATE) $(ENVIRONMENT_NAME) ; $(MAKE) requirements) @ printf "\n\n>>> New Conda environment created. Activate with: \n\t: conda activate $(ENVIRONMENT_NAME)" @ $(MAKE) show-params @ printf "\n\n>>> Project initialized!" @ ($(CONDA_ACTIVATE) $(ENVIRONMENT_NAME) ; $(MAKE) pre-commit-install ) @ ($(CONDA_ACTIVATE) $(ENVIRONMENT_NAME) ; $(MAKE) lint ) else @ direnv allow || echo "" @ echo ">>> Continuing installation ..." @ $(MAKE) requirements @ $(MAKE) show-params @ printf "\n\n>>> Project initialized!\n" @ $(MAKE) pre-commit-install @ $(MAKE) lint endif ## Remove ALL of the artifacts + Python environments destroy: clean pre-commit-uninstall delete-environment @ echo ">>> Deleted all artifacts and environments!" ############################################################################### # MISCELLANEOUS COMMANDS # ############################################################################### # -------------------- Functions for cleaning repository ---------------------- ## Removes artifacts from the build stage, and other common Python artifacts. clean: clean-build clean-pyc clean-test clean-secrets clean-model-files clean-images ## Removes Python file artifacts clean-pyc: find . -name '*.pyc' -exec rm -f {} + find . -name '*.pyo' -exec rm -f {} + find . -name '*~' -exec rm -f {} + find . -name '__pycache__' -exec rm -fr {} + ## Remove build artifacts clean-build: rm -fr build/ rm -fr dist/ rm -fr .eggs/ find . -name '*.egg-info' -exec rm -fr {} + find . -name '*.egg' -exec rm -f {} + ## Remove test and coverage artifacts clean-test: rm -fr .tox/ rm -f .coverage rm -fr htmlcov/ rm -fr .pytest_cache ## Remove files related to pre-trained models clean-model-files: find . -name '*.pt' -exec rm -fr {} + find . -name "runs" -type d -exec rm -rf {} + || echo "" ## Clean left-over images clean-images: find . -name '*.png' -exec rm -fr {} + find . -name '*.jpg' -exec rm -fr {} + ## Removes secret artifacts - Serverless clean-secrets: find . -name "node_modules" -type d -exec rm -rf {} + || echo "" find . -name ".serverless" -type d -exec rm -rf {} + || echo "" # ---------------------- Functions for local environment ---------------------- ## Set up the envrc file for the project. create-envrc: @ echo "cat $(PROJECT_DIR)/template.envrc > $(PROJECT_DIR)/.envrc" @ cat $(PROJECT_DIR)/template.envrc > $(PROJECT_DIR)/.envrc ## Delete the local envrc file of the project delete-envrc: @ rm -rf $(PROJECT_DIR)/.envrc || echo "" ## Install git-flow git-flow-install: @ (( if [[ ! -f "`which git-flow`" ]]; then \ echo "No Git-flow installed"! ; \ if [[ -f "`which brew`" ]]; then \ echo "Homebrew installed"; \ HOMEBREW_NO_AUTO_UPDATE=1 brew install git-flow; \ elif [[ -f "`which apt-get`" ]]; then \ echo "Apt-get installed"; \ apt-get install git-flow; \ else \ echo "Could not locate package manager! (brew or apt-get)"; \ fi; \ fi ) && git flow init -f -d) || echo "Git-Flow setup could not be completed" # ---------------------- Functions for Python environment --------------------- ## Creates the Python environment create-environment: ifeq (True,$(HAS_CONDA)) @ echo ">>> Detected CONDA ... Creating new conda environment!" @ echo ">>> \tCreating environment: \t $(ENVIRONMENT_NAME)" @ conda create --name $(ENVIRONMENT_NAME) python=$(PYTHON_VERSION) -y || echo "" @ echo ">>> New conda environment created. Activate with: \n conda activate $(ENVIRONMENT_NAME)" else ifeq (True,$(HAS_PYENV)) @ echo ">>> Detected PYENV ... Creating new Pyenv environment!" @ echo ">>> \tCreating environment: \t $(ENVIRONMENT_NAME)" @ pyenv virtualenv $(PYTHON_VERSION) $(ENVIRONMENT_NAME) || echo "" @ pyenv local $(ENVIRONMENT_NAME) @ echo ">>> New Pyenv environment created: '$(ENVIRONMENT_NAME)'" @ pyenv virtualenvs @ echo endif ## Deletes the Python environment delete-environment: ifeq (True,$(HAS_CONDA)) @ echo ">>> Detected CONDA ... Deleting Conda environment, if applicable!" @ echo ">>> Deleting environment: '$(ENVIRONMENT_NAME)'" @ ($(CONDA_DEACTIVATE) ; conda env remove --name $(ENVIRONMENT_NAME) -y) || echo "" @ echo ">>> Conda environment deleted: '$(ENVIRONMENT_NAME)'" else ifeq (True,$(HAS_PYENV)) @ echo ">>> Detected PYENV ... Deleting Pyenv environment!" @ echo ">>> Deleting environment: '$(ENVIRONMENT_NAME)'" @ pyenv uninstall -f $(ENVIRONMENT_NAME) || echo "" @ rm $(PROJECT_DIR)/.python-version || echo "" @ echo ">>> Pyenv environment deleted: '$(ENVIRONMENT_NAME)'" @ pyenv virtualenvs @ echo endif ## Upgrade the version of the 'pip' package pip-upgrade: @ $(PYTHON_INTERPRETER) -m pip install --no-cache-dir -q --upgrade pip==$(PIP_VERSION) ## Sort the project packages requirements file sort-requirements: @ sort $(REQUIREMENTS_FILE) | grep "\S" > $(REQUIREMENTS_FILE_TEMP) && \ mv $(REQUIREMENTS_FILE_TEMP) $(REQUIREMENTS_FILE) @ sort $(REQUIREMENTS_DEV_FILE) | grep "\S" > $(REQUIREMENTS_DEV_FILE_TEMP) && \ mv $(REQUIREMENTS_DEV_FILE_TEMP) $(REQUIREMENTS_DEV_FILE) @ sort $(REQUIREMENTS_DEPLOYMENT_FILE) | grep "\S" > $(REQUIREMENTS_DEPLOYMENT_FILE_TEMP) && \ mv $(REQUIREMENTS_DEPLOYMENT_FILE_TEMP) $(REQUIREMENTS_DEPLOYMENT_FILE) ## Install Python dependencies into the Python environment requirements: pip-upgrade sort-requirements @ $(PYTHON_INTERPRETER) -m pip install --no-cache-dir -q -r $(REQUIREMENTS_DEV_FILE) # -------------------------- Functions for Code Linting ----------------------- ## Installing the pre-commit Git hook pre-commit-install: @ pre-commit install ## Uninstall the pre-commit Git hook pre-commit-uninstall: @ pre-commit uninstall ## Run the 'pre-commit' linting step manually lint: @ pre-commit run -a --hook-stage manual ############################################################################### # Docker Commands - Local development # ############################################################################### LOCAL_DEV_DOCKER_PROJECT_NAME="$(PROJECT_NAME_LOWER)_localdev_dind" LOCAL_DEV_SERVICE_NAME="local-dev" ## Clean Docker images docker-prune: @ docker system prune -f ## Stops both the API service and the local development service all-stop: api-stop docker-local-dev-stop app-stop @ echo "All services are down" ## Starts both the API service and the local development service all-start: api-start docker-local-dev-start app-start @ echo "All services are up!" ## Build local development Docker image docker-local-dev-build: docker-prune @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ docker compose \ --project-name $(LOCAL_DEV_DOCKER_PROJECT_NAME) \ build $(LOCAL_DEV_SERVICE_NAME) ## Start service for local development docker-local-dev-start: docker-local-dev-build docker-local-dev-stop @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ docker compose \ --project-name $(LOCAL_DEV_DOCKER_PROJECT_NAME) \ up -d $(LOCAL_DEV_SERVICE_NAME) ## Stop service for local development docker-local-dev-stop: @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ docker compose \ --project-name $(LOCAL_DEV_DOCKER_PROJECT_NAME) \ stop $(LOCAL_DEV_SERVICE_NAME) @ $(MAKE) docker-prune ## Start a shell session into the docker container docker-local-dev-login: @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ docker compose \ --project-name $(LOCAL_DEV_DOCKER_PROJECT_NAME) \ exec \ $(LOCAL_DEV_SERVICE_NAME) /bin/zsh ############################################################################### # Docker Commands - API-related # ############################################################################### API_DOCKER_PROJECT_NAME="$(PROJECT_NAME_LOWER)_api" API_SERVICE_NAME="api" ## Build API Docker image api-build: docker-prune @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ docker compose \ --project-name $(API_DOCKER_PROJECT_NAME) \ build $(API_SERVICE_NAME) ## Start API Docker image container api-start: api-stop api-build @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ docker compose \ --project-name $(API_DOCKER_PROJECT_NAME) \ up -d $(API_SERVICE_NAME) ## Stop API Docker image container api-stop: @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ docker compose \ --project-name $(API_DOCKER_PROJECT_NAME) \ stop $(API_SERVICE_NAME) @ $(MAKE) docker-prune ## Open API in web browser api-web: @ python -m webbrowser "$(API_WEBSERVER_URL)/docs" ############################################################################### # Docker Commands - App-related # ############################################################################### APP_DOCKER_PROJECT_NAME="$(PROJECT_NAME_LOWER)_app" APP_SERVICE_NAME="app" ## Build App Docker image app-app-build: docker-prune @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ docker compose \ --project-name $(APP_DOCKER_PROJECT_NAME) \ build $(APP_SERVICE_NAME) ## Start App Docker image container app-app-start: app-app-stop app-app-build @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ docker compose \ --project-name $(APP_DOCKER_PROJECT_NAME) \ up -d $(APP_SERVICE_NAME) ## Stop App Docker image container app-app-stop: @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ docker compose \ --project-name $(APP_DOCKER_PROJECT_NAME) \ stop $(APP_SERVICE_NAME) @ $(MAKE) docker-prune ## Open App in web browser app-app-web: @ python -m webbrowser "$(APP_WEBSERVER_URL)" ############################################################################### # Unit Tests and Code checking # ############################################################################### # See: https://github.com/google/addlicense for more information ## Add licenses to Python files add-licenses: @ docker run -it \ -v ${PWD}:/src \ ghcr.io/google/addlicense \ -f ./LICENSE.rst \ ./src/**/*.py ## Open up all web endpoints all-web: api-web app-app-web @ echo "All web endpoints opened!" ############################################################################### # PROJECT AND DATA FUNCTIONS # ############################################################################### DATASET_PATH="https://raw.githubusercontent.com/hamzafarooq/maven-mlsystem-design-cohort-1/main/data/df_embed.csv" DATASET_WITH_SUMMARIES_NAME="cicero_dataset_with_summaries" DATASET_WITH_FAISS_AND_EMBEDDINGS_NAME="cicero_dataset_with_embeddings_and_faiss_index" HUGGING_FACE_REPOSITORY_NAME="cicero_synthesizer" FAISS_OUTPUT_FILENAME="cicero_faiss_index" ## Run the data preparation on the input dataset prepare_data: @ $(PYTHON_INTERPRETER) \ $(DATA_PROCESSING_DIRECTORY)/prepare_dataset.py \ --dataset-path $(DATASET_PATH) ## Run the script for creating a FAISS index and text embeddings of the dataset run_faiss_and_embeddings: @ $(PYTHON_INTERPRETER) \ $(TRAINING_DIRECTORY)/create_faiss_corpus_index.py \ --dataset-name $(DATASET_WITH_SUMMARIES_NAME) \ --output-dataset-name $(DATASET_WITH_FAISS_AND_EMBEDDINGS_NAME) \ --repository-name $(HUGGING_FACE_REPOSITORY_NAME) \ --faiss-index-name $(FAISS_OUTPUT_FILENAME) ############################################################################### # Self Documenting Commands # ############################################################################### .DEFAULT_GOAL := help # Inspired by # sed script explained: # /^##/: # * save line in hold space # * purge line # * Loop: # * append newline + line to hold space # * go to next line # * if line starts with doc comment, strip comment character off and loop # * remove target prerequisites # * append hold space (+ newline) to line # * replace newline plus comments by `---` # * print line # Separate expressions are necessary because labels cannot be delimited by # semicolon; see help: @echo "$$(tput bold)Available rules:$$(tput sgr0)" @echo @sed -n -e "/^## / { \ h; \ s/.*//; \ :doc" \ -e "H; \ n; \ s/^## //; \ t doc" \ -e "s/:.*//; \ G; \ s/\\n## /---/; \ s/\\n/ /g; \ p; \ }" ${MAKEFILE_LIST} \ | LC_ALL='C' sort --ignore-case \ | awk -F '---' \ -v ncol=$$(tput cols) \ -v indent=25 \ -v col_on="$$(tput setaf 6)" \ -v col_off="$$(tput sgr0)" \ '{ \ printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ n = split($$2, words, " "); \ line_length = ncol - indent; \ for (i = 1; i <= n; i++) { \ line_length -= length(words[i]) + 1; \ if (line_length <= 0) { \ line_length = ncol - indent - length(words[i]) - 1; \ printf "\n%*s ", -indent, " "; \ } \ printf "%s ", words[i]; \ } \ printf "\n"; \ }' \ | more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')