File size: 948 Bytes
ac2467f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Code to generate embeddings based on movie plots.

import pandas as pd
from langchain.document_loaders import DataFrameLoader
from langchain.vectorstores import FAISS
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings


# Importing the database ready to work.
movies = pd.read_csv('../../data/processed/movies_clean.csv')
movies.drop('Unnamed: 0', axis=1, inplace=True)


# Creating the 'document' with metadata.
df_loader = DataFrameLoader(movies, page_content_column='plot_sin_nombres')
df_document = df_loader.load()

# Defining the model to use for generating embeddings.
embedding_function = SentenceTransformerEmbeddings(model_name="sentence-t5-xl")
print('Transformer descargado.')

# Creating the vectorial database.
db = FAISS.from_documents(df_document, embedding_function)
print('DB vectorial creada.')

# Saving the database.
db.save_local('plot_embeddings')


if __name__ == '__main__':
    __name__