jaeyong2
/

gte-multilingual-base-Thai-embedding

Feature Extraction

text-embeddings-inference

Inference Endpoints

Model card Files Files and versions Community

jaeyong2 commited on 18 days ago

Commit

7f12fd5

•

1 Parent(s): 8ebec20

Update README.md

Files changed (1) hide show

README.md +4 -5

README.md CHANGED Viewed

@@ -37,7 +37,7 @@ optimizer = AdamW(model.parameters(), lr=5e-5)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
-for epoch in range(3):  # 에포크 반복
     model.train()
     total_loss = 0
     count = 0
@@ -53,11 +53,11 @@ for epoch in range(3):  # 에포크 반복
             positive_encodings = batch_to_device(positive_encodings, device)
             negative_encodings = batch_to_device(negative_encodings, device)
-            # 모델 출력 (임베딩 벡터 생성)
-            anchor_output = model(**anchor_encodings)[0][:, 0, :]  # [CLS] 토큰의 벡터
             positive_output = model(**positive_encodings)[0][:, 0, :]
             negative_output = model(**negative_encodings)[0][:, 0, :]
-            # 삼중항 손실 계산
             if loss==None:
                 loss = triplet_loss(anchor_output, positive_output, negative_output)
             else:
@@ -91,7 +91,6 @@ def evaluate(validation_dataset):
         negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
-        # 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
         positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
         negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
+for epoch in range(3):
     model.train()
     total_loss = 0
     count = 0
             positive_encodings = batch_to_device(positive_encodings, device)
             negative_encodings = batch_to_device(negative_encodings, device)
+            anchor_output = model(**anchor_encodings)[0][:, 0, :]
             positive_output = model(**positive_encodings)[0][:, 0, :]
             negative_output = model(**negative_encodings)[0][:, 0, :]
             if loss==None:
                 loss = triplet_loss(anchor_output, positive_output, negative_output)
             else:
         negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
         positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
         negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")