ninomiya commited on
Commit
8ae67d3
1 Parent(s): 80b6f9d

Upload 2 files

Browse files
Files changed (2) hide show
  1. model.pkl +3 -0
  2. script.py +23 -0
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:decf4b82afc22bac58f9128cda5e9cfc63c4a6106430055a21947a57c0c0b1cb
3
+ size 3600466
script.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+
7
+ test_df = pd.read_csv("/tmp/data/test.csv")
8
+
9
+ with open("model.pkl", "rb") as f:
10
+ model = pickle.load(f)
11
+
12
+ scores = []
13
+ for _, row in test_df.iterrows():
14
+ X_query = model["tokenizer"].transform([row["Query"]])
15
+ is_cand = sum([(model["faq_ids"] == row[f"FAQ{i+1}"]).astype(int) for i in range(3)]) > 0
16
+ sim = cosine_similarity(X_query, model["X_faq"][is_cand])[0]
17
+ score = sim.max()
18
+ scores.append(score)
19
+
20
+ predict = (np.array(scores) > model["thr"]).astype(int)
21
+
22
+ df = pd.DataFrame([(f"testid{i:04}", v) for i, v in enumerate(predict)], columns=["id", "pred"])
23
+ df.to_csv("submission.csv", index=None)