|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import sys |
|
import json |
|
import uuid |
|
import random |
|
from datetime import datetime |
|
import time |
|
from tqdm import tqdm |
|
import numpy as np |
|
from gensim.models import KeyedVectors |
|
from hints import curiosity, hint |
|
from tracking import ( |
|
calculate_moving_average, |
|
calculate_tendency_slope, |
|
) |
|
from sentence_transformers import SentenceTransformer, util |
|
import warnings |
|
from huggingface_hub import snapshot_download |
|
|
|
|
|
warnings.filterwarnings(action="ignore", category=UserWarning, module="gensim") |
|
|
|
|
|
class Model_class: |
|
|
|
base_path = os.path.dirname(os.path.abspath(__file__)) |
|
|
|
def __init__(self, lang=0, model_type="SentenceTransformer"): |
|
|
|
if model_type == "SentenceTransformer": |
|
repo_url = "git@hf.co:Jsevisal/strans_models" |
|
dest_path = "config/strans_models/" |
|
else: |
|
repo_url = "git@hf.co:Jsevisal/w2v_models" |
|
dest_path = "config/w2v_models/" |
|
|
|
|
|
if not os.path.exists( |
|
os.path.join(self.base_path, "config/strans_models/") |
|
) or not os.path.exists(os.path.join(self.base_path, "config/w2v_models/")): |
|
model_path = snapshot_download(repo_id="Jsevisal/strans_models") |
|
|
|
if lang == 1: |
|
if model_type == "word2vec": |
|
self.model = KeyedVectors.load( |
|
os.path.join(model_path, "eng_w2v_model"), |
|
mmap="r", |
|
) |
|
elif model_type == "SentenceTransformer": |
|
self.model = KeyedVectors.load( |
|
os.path.join(model_path, "eng_strans_model"), |
|
mmap="r", |
|
) |
|
|
|
else: |
|
if model_type == "word2vec": |
|
self.model = KeyedVectors.load( |
|
os.path.join(model_path, "esp_w2v_model"), |
|
mmap="r", |
|
) |
|
|
|
elif model_type == "SentenceTransformer": |
|
self.model = KeyedVectors.load( |
|
os.path.join(model_path, "esp_strans_model"), |
|
mmap="r", |
|
) |
|
|
|
self.model_st = SentenceTransformer( |
|
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2" |
|
) |
|
|
|
|
|
|
|
class Semantrix: |
|
|
|
|
|
base_path = os.path.dirname(os.path.abspath(__file__)) |
|
config_file_path = os.path.join(base_path, "config/") |
|
config_file_path = os.path.join(base_path, "config/lang.json") |
|
secret_file_path = os.path.join(base_path, "config/secret.json") |
|
data_path = os.path.join(base_path, "data/") |
|
|
|
|
|
class DictWrapper: |
|
def __init__(self, data_dict): |
|
self.__dict__.update(data_dict) |
|
|
|
|
|
def __init__(self, lang=0, model_type="SentenceTransformer", session_hash=None): |
|
|
|
|
|
with open(self.config_file_path, "r") as file: |
|
self.Config_full = json.load(file) |
|
|
|
|
|
with open(self.secret_file_path, "r") as file: |
|
self.secret = json.load(file) |
|
|
|
|
|
self.lang = lang |
|
|
|
self.session_id = str(uuid.uuid4().hex) |
|
|
|
|
|
self.model_type = model_type |
|
|
|
self.session_hash = session_hash |
|
self.ranking_path = "rankings/ranking_" + str(self.session_hash) + ".txt" |
|
|
|
self.ranking_data = [] |
|
self.ranking_msg = "" |
|
|
|
if self.lang == 1: |
|
self.Config = self.DictWrapper(self.Config_full["ENG"]["Game"]) |
|
self.secret_dict = self.secret["ENG"] |
|
self.secret_list = self.secret_dict["basic"] |
|
else: |
|
self.Config = self.DictWrapper(self.Config_full["SPA"]["Game"]) |
|
self.secret_dict = self.secret["SPA"] |
|
self.secret_list = self.secret_dict["basic"] |
|
|
|
|
|
with open(self.data_path + self.ranking_path, "w+") as file: |
|
file.write("---------------------------") |
|
|
|
def reset_game(self): |
|
self.session_id = str(uuid.uuid4().hex) |
|
|
|
with open(self.secret_file_path, "r") as file: |
|
self.secret = json.load(file) |
|
if self.lang == 1: |
|
self.Config = self.DictWrapper(self.Config_full["ENG"]["Game"]) |
|
self.secret_dict = self.secret["ENG"] |
|
self.secret_list = self.secret_dict["basic"] |
|
else: |
|
self.Config = self.DictWrapper(self.Config_full["SPA"]["Game"]) |
|
self.secret_dict = self.secret["SPA"] |
|
self.secret_list = self.secret_dict["basic"] |
|
|
|
def generate_gensim_model(self, model_class, batch_size=32): |
|
from tqdm import tqdm |
|
|
|
self.model_trans = KeyedVectors(768) |
|
|
|
self.model_trans.init_sims(replace=True) |
|
words = list(model_class.model.key_to_index.keys()) |
|
num_batches = ( |
|
len(words) + batch_size - 1 |
|
) // batch_size |
|
|
|
for batch_index in tqdm(range(num_batches)): |
|
|
|
start_index = batch_index * batch_size |
|
end_index = min(start_index + batch_size, len(words)) |
|
batch_words = words[start_index:end_index] |
|
|
|
|
|
encoded_vectors = model_class.model_st.encode( |
|
batch_words, |
|
convert_to_tensor=True, |
|
prompt="Encuentra el valor semántico de la palabra: ", |
|
).tolist() |
|
|
|
|
|
self.model_trans.add_vectors(batch_words, encoded_vectors) |
|
|
|
self.model_trans.save("config/strans_models/esp_strans_model_prompt") |
|
|
|
|
|
def prepare_game(self, secret_word_used, difficulty): |
|
|
|
|
|
self.secret = self.secret_list[secret_word_used] |
|
self.secret = self.secret.lower() |
|
|
|
self.init_time = time.time() |
|
|
|
|
|
self.words = [self.Config.secret_word] |
|
|
|
|
|
self.scores = [10] |
|
|
|
|
|
self.win = False |
|
self.n = 0 |
|
self.recent_hint = 0 |
|
self.f_dev_avg = 0 |
|
self.last_hint = -1 |
|
self.difficulty = difficulty |
|
|
|
|
|
if self.difficulty == 1: |
|
self.n = 3 |
|
|
|
|
|
def gen_rank(self, repeated): |
|
ascending_indices = np.argsort(self.scores) |
|
descending_indices = list(ascending_indices[::-1]) |
|
self.ranking_data.clear() |
|
k = len(self.words) - 1 |
|
if repeated != -1: |
|
k = repeated |
|
|
|
self.ranking_data.append(["#" + str(k), self.words[k], self.scores[k]]) |
|
|
|
self.ranking_data.append("---------------------------") |
|
for i in descending_indices: |
|
if i == 0: |
|
continue |
|
self.ranking_data.append(["#" + str(i), self.words[i], self.scores[i]]) |
|
|
|
with open(self.data_path + self.ranking_path, "w+") as file: |
|
for item in self.ranking_data: |
|
file.write("%s\n" % item) |
|
|
|
self.ranking_msg = "" |
|
for item in self.ranking_data: |
|
self.ranking_msg += f"{item}\n" |
|
|
|
|
|
def play_game(self, word, model_class): |
|
|
|
|
|
word = word.lower().strip() |
|
|
|
|
|
if word == "give_up": |
|
text = ( |
|
"[lose]" |
|
+ str(self.Config.Feedback_9) |
|
+ self.secret |
|
+ "\n\n" |
|
+ self.Config.Feedback_10 |
|
) |
|
return text |
|
|
|
|
|
if word in self.words: |
|
repeated = self.words.index(word) |
|
else: |
|
repeated = -1 |
|
self.words.append(word) |
|
|
|
|
|
if word not in model_class.model.key_to_index.keys(): |
|
|
|
self.words.pop(len(self.words) - 1) |
|
feedback = ( |
|
"I don't know that word. Try another word." |
|
if self.lang == 1 |
|
else "No conozco esa palabra. Prueba con otra palabra." |
|
) |
|
|
|
feedback += "[rank]" + self.ranking_msg if len(self.words) > 1 else "\n\n" |
|
return feedback |
|
|
|
similarity = model_class.model.similarity(self.secret, word) |
|
|
|
if self.model_type == "word2vec": |
|
score = np.round(similarity * 10, 2) |
|
else: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
score = np.round(similarity * 10, 2) |
|
|
|
|
|
if repeated == -1: |
|
self.scores.append(score) |
|
|
|
|
|
if score <= 2.5: |
|
feedback = self.Config.Feedback_0 + str(score) |
|
elif score > 2.5 and score <= 6.0: |
|
feedback = self.Config.Feedback_1 + str(score) |
|
elif score > 6.0 and score <= 7.0: |
|
feedback = self.Config.Feedback_2 + str(score) |
|
elif score > 7.0 and score <= 8: |
|
feedback = self.Config.Feedback_3 + str(score) |
|
elif score > 8 and score <= 9.0: |
|
feedback = self.Config.Feedback_4 + str(score) |
|
elif score > 9.0 and score < 10.0: |
|
feedback = self.Config.Feedback_5 + str(score) |
|
|
|
else: |
|
self.win = True |
|
feedback = "[win]" + self.Config.Feedback_8 |
|
self.words[0] = self.secret |
|
self.words.pop(len(self.words) - 1) |
|
self.scores.pop(len(self.scores) - 1) |
|
|
|
|
|
if score > self.scores[len(self.scores) - 2] and self.win == False: |
|
feedback += "\n" + self.Config.Feedback_6 |
|
elif score < self.scores[len(self.scores) - 2] and self.win == False: |
|
feedback += "\n" + self.Config.Feedback_7 |
|
|
|
|
|
|
|
if self.difficulty != 4 and len(self.scores) > 1: |
|
mov_avg = calculate_moving_average(self.scores[1:], 5) |
|
|
|
|
|
if len(mov_avg) > 1 and self.win == False: |
|
f_dev = calculate_tendency_slope(mov_avg) |
|
f_dev_avg = calculate_moving_average(f_dev, 3) |
|
|
|
|
|
if f_dev_avg[len(f_dev_avg) - 1] < 0 and self.recent_hint == 0: |
|
|
|
|
|
i = random.randint(0, len(self.Config.hint_intro) - 1) |
|
feedback += "\n\n[hint]" + self.Config.hint_intro[i] |
|
|
|
|
|
hint_text, self.n, self.last_hint = hint( |
|
self.secret, |
|
self.n, |
|
model_class.model_st, |
|
self.last_hint, |
|
self.lang, |
|
( |
|
self.DictWrapper(self.Config_full["ENG"]["Hint"]) |
|
if self.lang == 1 |
|
else self.DictWrapper(self.Config_full["SPA"]["Hint"]) |
|
), |
|
) |
|
feedback += "\n" + hint_text |
|
self.recent_hint = 3 |
|
|
|
if self.recent_hint != 0: |
|
self.recent_hint -= 1 |
|
|
|
|
|
self.gen_rank(repeated) |
|
|
|
|
|
feedback += "[rank]" + self.ranking_msg if len(self.words) > 1 else "\n\n" |
|
|
|
|
|
return feedback |
|
|
|
|
|
def curiosity(self): |
|
|
|
|
|
feedback = curiosity( |
|
self.secret, |
|
( |
|
self.DictWrapper(self.Config_full["ENG"]["Hint"]) |
|
if self.lang == 1 |
|
else self.DictWrapper(self.Config_full["SPA"]["Hint"]) |
|
), |
|
) |
|
|
|
|
|
with open(self.data_path + self.ranking_path, "r") as original_file: |
|
file_content = original_file.readlines()[2:] |
|
new_file_name = f"{self.session_id}-{self.secret}.json" |
|
play_data = { |
|
"session_id": self.session_id, |
|
"datetime": str(datetime.now()), |
|
"time": time.time() - self.init_time, |
|
"data": file_content, |
|
"win": self.win, |
|
"secret": self.secret, |
|
"number_of_hints": self.n, |
|
} |
|
|
|
with open(self.data_path + "plays/" + new_file_name, "w") as new_file: |
|
json.dump(play_data, new_file, indent=4) |
|
|
|
|
|
return feedback |
|
|
|
def get_session_id(self): |
|
return self.session_id |
|
|