How to use this model without RAGatouille?
#2
by
freQuensy23
- opened
I've tried to use this model without RAGatouille with this code:
import torch
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from transformers import AutoModel, AutoTokenizer
from scipy.spatial.distance import cosine
# Load the model and tokenizer
model = AutoModel.from_pretrained('bclavie/JaColBERTv2').to('cuda')
tokenizer = AutoTokenizer.from_pretrained('bclavie/JaColBERTv2')
# Define the docs and queries
docs = [
"あなたは誰ですか?", # "Who are you?"
"あなたの年収は何ですか", # "What is your yearly income"
"あなたの趣味は何ですか", # "What is your hobbies"
]
queries = [
"こんにちは、私の名前はジョンです", # "Hello, my name is John"
"先月ダブルプロモーションを受けて、今は得ています", # "I got a double promotion last month and now I'm getting"
"私はバスケットボールとチェスをするのが好きです", # "I like to play basketball and chess"
]
# Function to compute embeddings
def get_embeddings(texts):
encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors='pt').to(model.device)
with torch.no_grad():
model_output = model(**encoded_input)
return model_output.pooler_output
# Compute embeddings for docs and queries
docs_embeddings = get_embeddings(docs)
queries_embeddings = get_embeddings(queries)
# Calculate cosine distances
cosine_distances = np.zeros((len(docs), len(queries)))
for i in range(len(docs)):
for j in range(len(queries)):
cosine_distances[i, j] = cosine(docs_embeddings[i].cpu().numpy(), queries_embeddings[j].cpu().numpy())
# Create heatmap
sns.set()
plt.figure(figsize=(10, 8))
ax = sns.heatmap(cosine_distances, annot=True, cmap='viridis', xticklabels=queries, yticklabels=docs)
plt.title('Cosine Distance Heatmap between Docs and Queries')
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()
But the result of the model was too bad, and was more like random values. Can you tell me what's the matter?