import pandas as pd from sentence_transformers.util import cos_sim from utils.models import SBert def get_cos_sim(model, prompt: str, response: str) -> float: prompt_vec = model(prompt) response_vec = model(response) score = cos_sim(prompt_vec, response_vec).item() return score def batch_cos_sim(df: pd.DataFrame, model_name) -> pd.DataFrame: # df.columns = ['prompt', 'response'] assert 'prompt' in df.columns assert 'response' in df.columns model = SBert(model_name) df['originality'] = df.apply(lambda x: 1 - get_cos_sim(model, x['prompt'], x['response']), axis=1) return df if __name__ == '__main__': _df = pd.read_csv('data/example_1.csv') _df_o = batch_cos_sim(_df, 'paraphrase-multilingual-MiniLM-L12-v2')