Spaces:
Sleeping
Sleeping
.PHONY: show-params | |
############################################################################### | |
# GLOBALS # | |
############################################################################### | |
PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) | |
PROJECT_NAME := $(shell basename $(subst -,_,$(PROJECT_DIR))) | |
PROJECT_NAME_LOWER := $(shell echo $(PROJECT_NAME) | tr '[:upper:]' '[:lower:]') | |
ENVIRONMENT_NAME = $(PROJECT_NAME) | |
PYTHON_INTERPRETER = python3 | |
PIP_INTERPRETER = pip | |
PYTHON_VERSION = 3.9 | |
PIP_VERSION = 22.3 | |
# --- REQUIREMENTS-RELATED | |
REQUIREMENTS_FILE = $(PROJECT_DIR)/requirements.txt | |
REQUIREMENTS_FILE_TEMP = $(PROJECT_DIR)/requirements.tmp | |
REQUIREMENTS_DEV_FILE = $(PROJECT_DIR)/requirements-dev.txt | |
REQUIREMENTS_DEV_FILE_TEMP = $(PROJECT_DIR)/requirements-dev.tmp | |
REQUIREMENTS_DEPLOYMENT_FILE = $(PROJECT_DIR)/requirements-deploy.txt | |
REQUIREMENTS_DEPLOYMENT_FILE_TEMP = $(PROJECT_DIR)/requirements-deploy.tmp | |
# --- PATHS TO PROJECT DIRECTOIRES | |
DATA_DIRECTORY = $(PROJECT_DIR)/data | |
SRC_DIRECTORY = $(PROJECT_DIR)/src | |
API_DIRECTORY = $(SRC_DIRECTORY)/api | |
DATA_PROCESSING_DIRECTORY = $(SRC_DIRECTORY)/data_processing | |
TRAINING_DIRECTORY = $(SRC_DIRECTORY)/training | |
# -- Docker-related | |
# Variable used for turning on/off Docker Buildkit | |
DOCKER_BUILDKIT_VALUE=1 | |
LOCAL_DEVELOPMENT_DIR_PATH="$(PROJECT_DIR)/docker" | |
# -- API-related | |
INPUT_APP_PORT=8501 | |
OUTPUT_APP_PORT=8501 | |
API_WEBSERVER_URL="http://localhost:$(INPUT_APP_PORT)" | |
# -- App-related | |
APP_SERVER_PORT=7860 | |
APP_WEBSERVER_URL="http://localhost:$(APP_SERVER_PORT)" | |
# ----------------------------- Python-specific ------------------------------- | |
# - Checking what type of python one is using | |
# Anaconda | |
ifeq (,$(shell which conda)) | |
HAS_CONDA=False | |
else | |
HAS_CONDA=True | |
# We need to specify the following commands in order to properly activate the | |
# Anaconda environment. | |
SHELL=/bin/bash | |
# Note that the extra activate is needed to ensure that the activate floats env to the front of PATH | |
CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda activate ; conda activate | |
CONDA_DEACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda deactivate ; conda deactivate | |
endif | |
# - Pyenv | |
ifeq (,$(shell which pyenv)) | |
HAS_PYENV=False | |
else | |
HAS_PYENV=True | |
endif | |
############################################################################### | |
# VARIABLES FOR COMMANDS # | |
############################################################################### | |
## Show the set of input parameters | |
show-params: | |
@ printf "\n-------- GENERAL ---------------\n" | |
@ echo "PROJECT_DIR: $(PROJECT_DIR)" | |
@ echo "PROJECT_NAME: $(PROJECT_NAME)" | |
@ echo "LOCAL_DEVELOPMENT_DIR_PATH: $(LOCAL_DEVELOPMENT_DIR_PATH)" | |
@ echo "ENVIRONMENT_NAME: $(ENVIRONMENT_NAME)" | |
@ echo "PYTHON_INTERPRETER: $(PYTHON_INTERPRETER)" | |
@ echo "PYTHON_VERSION: $(PYTHON_VERSION)" | |
@ echo "PIP_VERSION: $(PIP_VERSION)" | |
@ echo "REQUIREMENTS_FILE: $(REQUIREMENTS_FILE)" | |
@ echo "REQUIREMENTS_FILE_TEMP: $(REQUIREMENTS_FILE_TEMP)" | |
@ echo "REQUIREMENTS_DEV_FILE: $(REQUIREMENTS_DEV_FILE)" | |
@ echo "REQUIREMENTS_DEV_FILE_TEMP: $(REQUIREMENTS_DEV_FILE_TEMP)" | |
@ echo "REQUIREMENTS_DEPLOYMENT_FILE: $(REQUIREMENTS_DEPLOYMENT_FILE)" | |
@ echo "REQUIREMENTS_DEPLOYMENT_FILE_TEMP: $(REQUIREMENTS_DEPLOYMENT_FILE_TEMP)" | |
@ printf "\n-------- DOCKER ---------------\n" | |
@ echo "DOCKER_BUILDKIT_VALUE: $(DOCKER_BUILDKIT_VALUE)" | |
@ printf "\n-------- PYTHON ---------------\n" | |
@ echo "HAS_CONDA: $(HAS_CONDA)" | |
@ echo "HAS_PYENV: $(HAS_PYENV)" | |
@ printf "\n-------- LOCAL DEVELOPMENT ---------------\n" | |
@ echo "LOCAL_DEV_DOCKER_PROJECT_NAME: $(LOCAL_DEV_DOCKER_PROJECT_NAME)" | |
@ echo "LOCAL_DEV_SERVICE_NAME: $(LOCAL_DEV_SERVICE_NAME)" | |
@ printf "\n-------- API ---------------\n" | |
@ echo "APP_PORT: $(APP_PORT)" | |
@ echo "APP_WEBSERVER_URL: $(APP_WEBSERVER_URL)" | |
@ echo "API_SERVICE_NAME: $(API_SERVICE_NAME)" | |
@ echo "API_DOCKER_PROJECT_NAME: $(API_DOCKER_PROJECT_NAME)" | |
@ printf "\n-----------------------\n" | |
## Initialize the repository for code development | |
init: clean create-envrc delete-environment create-environment | |
ifeq (True,$(HAS_CONDA)) | |
@ ($(CONDA_ACTIVATE) $(ENVIRONMENT_NAME) ; $(MAKE) requirements) | |
@ printf "\n\n>>> New Conda environment created. Activate with: \n\t: conda activate $(ENVIRONMENT_NAME)" | |
@ $(MAKE) show-params | |
@ printf "\n\n>>> Project initialized!" | |
@ ($(CONDA_ACTIVATE) $(ENVIRONMENT_NAME) ; $(MAKE) pre-commit-install ) | |
@ ($(CONDA_ACTIVATE) $(ENVIRONMENT_NAME) ; $(MAKE) lint ) | |
else | |
@ direnv allow || echo "" | |
@ echo ">>> Continuing installation ..." | |
@ $(MAKE) requirements | |
@ $(MAKE) show-params | |
@ printf "\n\n>>> Project initialized!\n" | |
@ $(MAKE) pre-commit-install | |
@ $(MAKE) lint | |
endif | |
## Remove ALL of the artifacts + Python environments | |
destroy: clean pre-commit-uninstall delete-environment | |
@ echo ">>> Deleted all artifacts and environments!" | |
############################################################################### | |
# MISCELLANEOUS COMMANDS # | |
############################################################################### | |
# -------------------- Functions for cleaning repository ---------------------- | |
## Removes artifacts from the build stage, and other common Python artifacts. | |
clean: clean-build clean-pyc clean-test clean-secrets clean-model-files clean-images | |
## Removes Python file artifacts | |
clean-pyc: | |
find . -name '*.pyc' -exec rm -f {} + | |
find . -name '*.pyo' -exec rm -f {} + | |
find . -name '*~' -exec rm -f {} + | |
find . -name '__pycache__' -exec rm -fr {} + | |
## Remove build artifacts | |
clean-build: | |
rm -fr build/ | |
rm -fr dist/ | |
rm -fr .eggs/ | |
find . -name '*.egg-info' -exec rm -fr {} + | |
find . -name '*.egg' -exec rm -f {} + | |
## Remove test and coverage artifacts | |
clean-test: | |
rm -fr .tox/ | |
rm -f .coverage | |
rm -fr htmlcov/ | |
rm -fr .pytest_cache | |
## Remove files related to pre-trained models | |
clean-model-files: | |
find . -name '*.pt' -exec rm -fr {} + | |
find . -name "runs" -type d -exec rm -rf {} + || echo "" | |
## Clean left-over images | |
clean-images: | |
find . -name '*.png' -exec rm -fr {} + | |
find . -name '*.jpg' -exec rm -fr {} + | |
## Removes secret artifacts - Serverless | |
clean-secrets: | |
find . -name "node_modules" -type d -exec rm -rf {} + || echo "" | |
find . -name ".serverless" -type d -exec rm -rf {} + || echo "" | |
# ---------------------- Functions for local environment ---------------------- | |
## Set up the envrc file for the project. | |
create-envrc: | |
@ echo "cat $(PROJECT_DIR)/template.envrc > $(PROJECT_DIR)/.envrc" | |
@ cat $(PROJECT_DIR)/template.envrc > $(PROJECT_DIR)/.envrc | |
## Delete the local envrc file of the project | |
delete-envrc: | |
@ rm -rf $(PROJECT_DIR)/.envrc || echo "" | |
## Install git-flow | |
git-flow-install: | |
@ (( if [[ ! -f "`which git-flow`" ]]; then \ | |
echo "No Git-flow installed"! ; \ | |
if [[ -f "`which brew`" ]]; then \ | |
echo "Homebrew installed"; \ | |
HOMEBREW_NO_AUTO_UPDATE=1 brew install git-flow; \ | |
elif [[ -f "`which apt-get`" ]]; then \ | |
echo "Apt-get installed"; \ | |
apt-get install git-flow; \ | |
else \ | |
echo "Could not locate package manager! (brew or apt-get)"; \ | |
fi; \ | |
fi ) && git flow init -f -d) || echo "Git-Flow setup could not be completed" | |
# ---------------------- Functions for Python environment --------------------- | |
## Creates the Python environment | |
create-environment: | |
ifeq (True,$(HAS_CONDA)) | |
@ echo ">>> Detected CONDA ... Creating new conda environment!" | |
@ echo ">>> \tCreating environment: \t $(ENVIRONMENT_NAME)" | |
@ conda create --name $(ENVIRONMENT_NAME) python=$(PYTHON_VERSION) -y || echo "" | |
@ echo ">>> New conda environment created. Activate with: \n conda activate $(ENVIRONMENT_NAME)" | |
else ifeq (True,$(HAS_PYENV)) | |
@ echo ">>> Detected PYENV ... Creating new Pyenv environment!" | |
@ echo ">>> \tCreating environment: \t $(ENVIRONMENT_NAME)" | |
@ pyenv virtualenv $(PYTHON_VERSION) $(ENVIRONMENT_NAME) || echo "" | |
@ pyenv local $(ENVIRONMENT_NAME) | |
@ echo ">>> New Pyenv environment created: '$(ENVIRONMENT_NAME)'" | |
@ pyenv virtualenvs | |
@ echo | |
endif | |
## Deletes the Python environment | |
delete-environment: | |
ifeq (True,$(HAS_CONDA)) | |
@ echo ">>> Detected CONDA ... Deleting Conda environment, if applicable!" | |
@ echo ">>> Deleting environment: '$(ENVIRONMENT_NAME)'" | |
@ ($(CONDA_DEACTIVATE) ; conda env remove --name $(ENVIRONMENT_NAME) -y) || echo "" | |
@ echo ">>> Conda environment deleted: '$(ENVIRONMENT_NAME)'" | |
else ifeq (True,$(HAS_PYENV)) | |
@ echo ">>> Detected PYENV ... Deleting Pyenv environment!" | |
@ echo ">>> Deleting environment: '$(ENVIRONMENT_NAME)'" | |
@ pyenv uninstall -f $(ENVIRONMENT_NAME) || echo "" | |
@ rm $(PROJECT_DIR)/.python-version || echo "" | |
@ echo ">>> Pyenv environment deleted: '$(ENVIRONMENT_NAME)'" | |
@ pyenv virtualenvs | |
@ echo | |
endif | |
## Upgrade the version of the 'pip' package | |
pip-upgrade: | |
@ $(PYTHON_INTERPRETER) -m pip install --no-cache-dir -q --upgrade pip==$(PIP_VERSION) | |
## Sort the project packages requirements file | |
sort-requirements: | |
@ sort $(REQUIREMENTS_FILE) | grep "\S" > $(REQUIREMENTS_FILE_TEMP) && \ | |
mv $(REQUIREMENTS_FILE_TEMP) $(REQUIREMENTS_FILE) | |
@ sort $(REQUIREMENTS_DEV_FILE) | grep "\S" > $(REQUIREMENTS_DEV_FILE_TEMP) && \ | |
mv $(REQUIREMENTS_DEV_FILE_TEMP) $(REQUIREMENTS_DEV_FILE) | |
@ sort $(REQUIREMENTS_DEPLOYMENT_FILE) | grep "\S" > $(REQUIREMENTS_DEPLOYMENT_FILE_TEMP) && \ | |
mv $(REQUIREMENTS_DEPLOYMENT_FILE_TEMP) $(REQUIREMENTS_DEPLOYMENT_FILE) | |
## Install Python dependencies into the Python environment | |
requirements: pip-upgrade sort-requirements | |
@ $(PYTHON_INTERPRETER) -m pip install --no-cache-dir -q -r $(REQUIREMENTS_DEV_FILE) | |
# -------------------------- Functions for Code Linting ----------------------- | |
## Installing the pre-commit Git hook | |
pre-commit-install: | |
@ pre-commit install | |
## Uninstall the pre-commit Git hook | |
pre-commit-uninstall: | |
@ pre-commit uninstall | |
## Run the 'pre-commit' linting step manually | |
lint: | |
@ pre-commit run -a --hook-stage manual | |
############################################################################### | |
# Docker Commands - Local development # | |
############################################################################### | |
LOCAL_DEV_DOCKER_PROJECT_NAME="$(PROJECT_NAME_LOWER)_localdev_dind" | |
LOCAL_DEV_SERVICE_NAME="local-dev" | |
## Clean Docker images | |
docker-prune: | |
@ docker system prune -f | |
## Stops both the API service and the local development service | |
all-stop: api-stop docker-local-dev-stop app-stop | |
@ echo "All services are down" | |
## Starts both the API service and the local development service | |
all-start: api-start docker-local-dev-start app-start | |
@ echo "All services are up!" | |
## Build local development Docker image | |
docker-local-dev-build: docker-prune | |
@ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
docker compose \ | |
--project-name $(LOCAL_DEV_DOCKER_PROJECT_NAME) \ | |
build $(LOCAL_DEV_SERVICE_NAME) | |
## Start service for local development | |
docker-local-dev-start: docker-local-dev-build docker-local-dev-stop | |
@ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
docker compose \ | |
--project-name $(LOCAL_DEV_DOCKER_PROJECT_NAME) \ | |
up -d $(LOCAL_DEV_SERVICE_NAME) | |
## Stop service for local development | |
docker-local-dev-stop: | |
@ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
docker compose \ | |
--project-name $(LOCAL_DEV_DOCKER_PROJECT_NAME) \ | |
stop $(LOCAL_DEV_SERVICE_NAME) | |
@ $(MAKE) docker-prune | |
## Start a shell session into the docker container | |
docker-local-dev-login: | |
@ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
docker compose \ | |
--project-name $(LOCAL_DEV_DOCKER_PROJECT_NAME) \ | |
exec \ | |
$(LOCAL_DEV_SERVICE_NAME) /bin/zsh | |
############################################################################### | |
# Docker Commands - API-related # | |
############################################################################### | |
API_DOCKER_PROJECT_NAME="$(PROJECT_NAME_LOWER)_api" | |
API_SERVICE_NAME="api" | |
## Build API Docker image | |
api-build: docker-prune | |
@ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
docker compose \ | |
--project-name $(API_DOCKER_PROJECT_NAME) \ | |
build $(API_SERVICE_NAME) | |
## Start API Docker image container | |
api-start: api-stop api-build | |
@ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
docker compose \ | |
--project-name $(API_DOCKER_PROJECT_NAME) \ | |
up -d $(API_SERVICE_NAME) | |
## Stop API Docker image container | |
api-stop: | |
@ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
docker compose \ | |
--project-name $(API_DOCKER_PROJECT_NAME) \ | |
stop $(API_SERVICE_NAME) | |
@ $(MAKE) docker-prune | |
## Open API in web browser | |
api-web: | |
@ python -m webbrowser "$(API_WEBSERVER_URL)/docs" | |
############################################################################### | |
# Docker Commands - App-related # | |
############################################################################### | |
APP_DOCKER_PROJECT_NAME="$(PROJECT_NAME_LOWER)_app" | |
APP_SERVICE_NAME="app" | |
## Build App Docker image | |
app-app-build: docker-prune | |
@ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
docker compose \ | |
--project-name $(APP_DOCKER_PROJECT_NAME) \ | |
build $(APP_SERVICE_NAME) | |
## Start App Docker image container | |
app-app-start: app-app-stop app-app-build | |
@ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
docker compose \ | |
--project-name $(APP_DOCKER_PROJECT_NAME) \ | |
up -d $(APP_SERVICE_NAME) | |
## Stop App Docker image container | |
app-app-stop: | |
@ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
docker compose \ | |
--project-name $(APP_DOCKER_PROJECT_NAME) \ | |
stop $(APP_SERVICE_NAME) | |
@ $(MAKE) docker-prune | |
## Open App in web browser | |
app-app-web: | |
@ python -m webbrowser "$(APP_WEBSERVER_URL)" | |
############################################################################### | |
# Unit Tests and Code checking # | |
############################################################################### | |
# See: https://github.com/google/addlicense for more information | |
## Add licenses to Python files | |
add-licenses: | |
@ docker run -it \ | |
-v ${PWD}:/src \ | |
ghcr.io/google/addlicense \ | |
-f ./LICENSE.rst \ | |
./src/**/*.py | |
## Open up all web endpoints | |
all-web: api-web app-app-web | |
@ echo "All web endpoints opened!" | |
############################################################################### | |
# PROJECT AND DATA FUNCTIONS # | |
############################################################################### | |
DATASET_PATH="https://raw.githubusercontent.com/hamzafarooq/maven-mlsystem-design-cohort-1/main/data/df_embed.csv" | |
DATASET_WITH_SUMMARIES_NAME="cicero_dataset_with_summaries" | |
DATASET_WITH_FAISS_AND_EMBEDDINGS_NAME="cicero_dataset_with_embeddings_and_faiss_index" | |
HUGGING_FACE_REPOSITORY_NAME="cicero_synthesizer" | |
FAISS_OUTPUT_FILENAME="cicero_faiss_index" | |
## Run the data preparation on the input dataset | |
prepare_data: | |
@ $(PYTHON_INTERPRETER) \ | |
$(DATA_PROCESSING_DIRECTORY)/prepare_dataset.py \ | |
--dataset-path $(DATASET_PATH) | |
## Run the script for creating a FAISS index and text embeddings of the dataset | |
run_faiss_and_embeddings: | |
@ $(PYTHON_INTERPRETER) \ | |
$(TRAINING_DIRECTORY)/create_faiss_corpus_index.py \ | |
--dataset-name $(DATASET_WITH_SUMMARIES_NAME) \ | |
--output-dataset-name $(DATASET_WITH_FAISS_AND_EMBEDDINGS_NAME) \ | |
--repository-name $(HUGGING_FACE_REPOSITORY_NAME) \ | |
--faiss-index-name $(FAISS_OUTPUT_FILENAME) | |
############################################################################### | |
# Self Documenting Commands # | |
############################################################################### | |
.DEFAULT_GOAL := help | |
# Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html> | |
# sed script explained: | |
# /^##/: | |
# * save line in hold space | |
# * purge line | |
# * Loop: | |
# * append newline + line to hold space | |
# * go to next line | |
# * if line starts with doc comment, strip comment character off and loop | |
# * remove target prerequisites | |
# * append hold space (+ newline) to line | |
# * replace newline plus comments by `---` | |
# * print line | |
# Separate expressions are necessary because labels cannot be delimited by | |
# semicolon; see <http://stackoverflow.com/a/11799865/1968> | |
help: | |
@echo "$$(tput bold)Available rules:$$(tput sgr0)" | |
@echo | |
@sed -n -e "/^## / { \ | |
h; \ | |
s/.*//; \ | |
:doc" \ | |
-e "H; \ | |
n; \ | |
s/^## //; \ | |
t doc" \ | |
-e "s/:.*//; \ | |
G; \ | |
s/\\n## /---/; \ | |
s/\\n/ /g; \ | |
p; \ | |
}" ${MAKEFILE_LIST} \ | |
| LC_ALL='C' sort --ignore-case \ | |
| awk -F '---' \ | |
-v ncol=$$(tput cols) \ | |
-v indent=25 \ | |
-v col_on="$$(tput setaf 6)" \ | |
-v col_off="$$(tput sgr0)" \ | |
'{ \ | |
printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ | |
n = split($$2, words, " "); \ | |
line_length = ncol - indent; \ | |
for (i = 1; i <= n; i++) { \ | |
line_length -= length(words[i]) + 1; \ | |
if (line_length <= 0) { \ | |
line_length = ncol - indent - length(words[i]) - 1; \ | |
printf "\n%*s ", -indent, " "; \ | |
} \ | |
printf "%s ", words[i]; \ | |
} \ | |
printf "\n"; \ | |
}' \ | |
| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars') | |