Spaces:
Runtime error
Runtime error
Avoiding re-loading already loaded models. Stated unload functionality as not-implemented.
Browse files
detect-pretrain-code-contamination/src/run.py
CHANGED
@@ -23,6 +23,8 @@ import sys
|
|
23 |
import gc
|
24 |
import pickle
|
25 |
|
|
|
|
|
26 |
def save_data(filename, data):
|
27 |
with open(filename, 'wb') as filehandle:
|
28 |
# store the data as binary data stream
|
@@ -36,20 +38,18 @@ def load_data(filename):
|
|
36 |
return loaded_data
|
37 |
|
38 |
def unload_model(model,tokenizer):
|
39 |
-
model
|
40 |
-
del model
|
41 |
-
del tokenizer
|
42 |
-
time.sleep(0.5)
|
43 |
-
gc.collect()
|
44 |
-
torch.cuda.empty_cache()
|
45 |
|
46 |
def load_model(name1):
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
50 |
|
51 |
-
|
52 |
-
|
|
|
|
|
53 |
|
54 |
def calculatePerplexity(sentence, model, tokenizer, gpu):
|
55 |
"""
|
@@ -105,6 +105,7 @@ def get_neighbors(text,ref_loss,model2,tokenizer2,ratio_gen):
|
|
105 |
return neighbors_dl
|
106 |
|
107 |
def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_name):
|
|
|
108 |
print(f"all data size: {len(test_data)}")
|
109 |
random.seed(0)
|
110 |
random.shuffle(test_data)
|
|
|
23 |
import gc
|
24 |
import pickle
|
25 |
|
26 |
+
models = {}
|
27 |
+
|
28 |
def save_data(filename, data):
|
29 |
with open(filename, 'wb') as filehandle:
|
30 |
# store the data as binary data stream
|
|
|
38 |
return loaded_data
|
39 |
|
40 |
def unload_model(model,tokenizer):
|
41 |
+
print("[X] Cannot unload model! Functionality not implemented!")
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
def load_model(name1):
|
44 |
+
if name1 not in models:
|
45 |
+
model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
|
46 |
+
model1.eval()
|
47 |
+
tokenizer1 = AutoTokenizer.from_pretrained(name1)
|
48 |
|
49 |
+
tokenizer1.pad_token = tokenizer1.eos_token
|
50 |
+
models[name1] = model1
|
51 |
+
models[name1 + "_tokenizer"] = tokenizer1
|
52 |
+
return models[name1], models[name1 + "_tokenizer"]
|
53 |
|
54 |
def calculatePerplexity(sentence, model, tokenizer, gpu):
|
55 |
"""
|
|
|
105 |
return neighbors_dl
|
106 |
|
107 |
def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_name):
|
108 |
+
global model1,model2,tokenizer1,tokenizer2
|
109 |
print(f"all data size: {len(test_data)}")
|
110 |
random.seed(0)
|
111 |
random.shuffle(test_data)
|