|
|
|
|
|
import pandas as pd |
|
from langchain.document_loaders import DataFrameLoader |
|
from langchain.vectorstores import FAISS |
|
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings |
|
|
|
|
|
|
|
movies = pd.read_csv('../../data/processed/movies_clean.csv') |
|
movies.drop('Unnamed: 0', axis=1, inplace=True) |
|
|
|
|
|
|
|
df_loader = DataFrameLoader(movies, page_content_column='plot_sin_nombres') |
|
df_document = df_loader.load() |
|
|
|
|
|
embedding_function = SentenceTransformerEmbeddings(model_name="sentence-t5-xl") |
|
print('Transformer descargado.') |
|
|
|
|
|
db = FAISS.from_documents(df_document, embedding_function) |
|
print('DB vectorial creada.') |
|
|
|
|
|
db.save_local('plot_embeddings') |
|
|
|
|
|
if __name__ == '__main__': |
|
__name__ |
|
|