vectorsearch / backend.py
JPBianchi's picture
temp before HF pull
30ffb9e
raw
history blame
No virus
6.93 kB
import modal
from typing import List, Dict, Tuple, Union, Callable
# from preprocessing import FileIO
# assets = modal.Mount.from_local_dir(
# "./data",
# # condition=lambda pth: not ".venv" in pth,
# remote_path="./data",
# )
stub = modal.Stub("vector-search-project")
vector_search = modal.Image.debian_slim().pip_install(
"sentence_transformers==2.2.2", "llama_index==0.9.6.post1", "angle_emb==0.1.5"
)
stub.volume = modal.Volume.new()
@stub.function(image=vector_search,
gpu="A100",
timeout=600,
volumes={"/root/models": stub.volume}
# secrets are available in the environment with os.environ["SECRET_NAME"]
# secret=modal.Secret.from_name("my-huggingface-secret")
)
def encode_content_splits(content_splits,
model=None, # path or name of model
**kwargs
):
""" kwargs provided in case encode method has extra arguments """
from sentence_transformers import SentenceTransformer
import os, time
models_list = os.listdir('/root/models')
print("Models:", models_list)
if isinstance(model, str) and model[-1] == "/":
model = model[:-1]
if isinstance(model, str):
model = model.split('/')[-1]
if isinstance(model, str) and model in models_list:
if "UAE-Large-V1-300" in model:
print("Loading finetuned UAE-Large-V1-300 model from Modal Volume")
from angle_emb import AnglE
model = AnglE.from_pretrained('WhereIsAI/UAE-Large-V1',
pretrained_model_path=os.path.join('/root/models', model),
pooling_strategy='cls').cuda()
kwargs['to_numpy'] = True
# this model doesn't accept list of lists
if isinstance(content_splits[0], list):
content_splits = [chunk for episode in content_splits for chunk in episode]
else:
print(f"Loading model {model} from Modal volume")
model = SentenceTransformer(os.path.join('/root/models', model))
elif isinstance(model, str):
if model in models_list:
print(f"Loading model {model} from Modal volume")
model = SentenceTransformer(os.path.join('/root/models', model))
else:
print(f"Model {model} not found in Modal volume, loading from HuggingFace")
model = SentenceTransformer(model)
else:
print(f"Using model provided as argument")
if 'save' in kwargs:
if isinstance(kwargs['save'], str) and kwargs['save'][-1] == '/':
kwargs['save'] = kwargs['save'][:-1]
kwargs['save'] = kwargs['save'].split('/')[-1]
fname = os.path.join('/root/models', kwargs['save'])
print(f"Saving model in {fname}")
# model.save(fname)
print(f"Model saved in {fname}")
kwargs.pop('save')
print("Starting encoding")
start = time.perf_counter()
emb = [list(zip(episode, model.encode(episode, **kwargs))) for episode in content_splits]
end = time.perf_counter() - start
print(f"GPU processing lasted {end:.2f} seconds")
print("Encoding finished")
return emb
@stub.function(image=vector_search, gpu="A100", timeout=120,
mounts=[modal.Mount.from_local_dir("./data",
remote_path="/root/data",
condition=lambda pth: ".json" in pth)],
volumes={"/root/models": stub.volume}
)
def finetune(training_path='./data/training_data_300.json',
valid_path='./data/validation_data_100.json',
model_id=None):
import os
print("Data:", os.listdir('/root/data'))
print("Models:", os.listdir('/root/models'))
if model_id is None:
print("No model ID provided")
return None
elif isinstance(model_id, str) and model_id[-1] == "/":
model_id = model_id[:-1]
from llama_index.finetuning import EmbeddingQAFinetuneDataset
training_set = EmbeddingQAFinetuneDataset.from_json(training_path)
valid_set = EmbeddingQAFinetuneDataset.from_json(valid_path)
print("Datasets loaded")
num_training_examples = len(training_set.queries)
print(f"Training examples: {num_training_examples}")
from llama_index.finetuning import SentenceTransformersFinetuneEngine
print(f"Model Name is {model_id}")
model_ext = model_id.split('/')[1]
ft_model_name = f'finetuned-{model_ext}-{num_training_examples}'
model_outpath = os.path.join("/root/models", ft_model_name)
print(f'Model ID: {model_id}')
print(f'Model Outpath: {model_outpath}')
finetune_engine = SentenceTransformersFinetuneEngine(
training_set,
batch_size=32,
model_id=model_id,
model_output_path=model_outpath,
val_dataset=valid_set,
epochs=10
)
import io, os, zipfile, glob, time
try:
start = time.perf_counter()
finetune_engine.finetune()
end = time.perf_counter() - start
print(f"GPU processing lasted {end:.2f} seconds")
print(os.listdir('/root/models'))
stub.volume.commit() # Persist changes, ie the finetumed model
# TODO SHARE THE MODEL ON HUGGINGFACE
# https://huggingface.co/docs/transformers/v4.15.0/model_sharing
folder_to_zip = model_outpath
# Zip the contents of the folder at 'folder_path' and return a BytesIO object.
bytes_buffer = io.BytesIO()
with zipfile.ZipFile(bytes_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for file_path in glob.glob(folder_to_zip + "/**", recursive=True):
print(f"Processed file {file_path}")
zip_file.write(file_path, os.path.relpath(file_path, start=folder_to_zip))
# Move the pointer to the start of the BytesIO buffer before returning
bytes_buffer.seek(0)
# You can now return this zipped_folder object, write it to a file, send it over a network, etc.
# Replace with the path to the folder you want to zip
zippedio = bytes_buffer
return zippedio
except:
return "Finetuning failed"
@stub.local_entrypoint()
def test_method(content_splits=[["a"]]):
output = encode_content_splits.remote(content_splits)
return output
# deploy it with
# modal token set --token-id ak-xxxxxx --token-secret as-xxxxx # given when we create a new token
# modal deploy podcast/1/backend.py
# View Deployment: https://modal.com/apps/jpbianchi/falcon_hackaton-project <<< use this project name