File size: 3,204 Bytes
8db7949
e47a060
 
8db7949
e47a060
 
 
 
8db7949
e47a060
 
 
 
 
 
 
8db7949
e47a060
 
 
8db7949
 
 
e47a060
 
8db7949
 
e47a060
 
 
 
 
8db7949
e47a060
 
 
 
 
 
 
8db7949
 
e47a060
 
8db7949
 
 
e47a060
 
 
 
 
 
 
 
 
 
 
 
 
 
8db7949
e47a060
 
 
 
 
 
 
 
 
 
 
8db7949
 
e47a060
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8db7949
e47a060
8db7949
 
e47a060
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from binoculars import Binoculars
import torch
import gc

CHAT_MODEL_PAIR = {
    "observer": "deepseek-ai/deepseek-llm-7b-base",
    "performer": "deepseek-ai/deepseek-llm-7b-chat"
}

CODER_MODEL_PAIR = {
    "observer": "deepseek-ai/deepseek-llm-7b-base",
    "performer": "deepseek-ai/deepseek-coder-7b-instruct-v1.5"
}

def initialize_chat_model():
    print("Initializing chat Binoculars model...")
    
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        print(f"GPU Memory before chat model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
    
    bino_chat = Binoculars(
        mode="accuracy", 
        observer_name_or_path=CHAT_MODEL_PAIR["observer"],
        performer_name_or_path=CHAT_MODEL_PAIR["performer"],
        max_token_observed=2048
    )
    
    if torch.cuda.is_available():
        print(f"GPU Memory after chat model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
    
    return bino_chat

def initialize_coder_model():
    print("Initializing coder Binoculars model...")
    
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        print(f"GPU Memory before coder model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
    
    bino_coder = Binoculars(
        mode="accuracy", 
        observer_name_or_path=CODER_MODEL_PAIR["observer"],
        performer_name_or_path=CODER_MODEL_PAIR["performer"],
        max_token_observed=2048
    )
    
    if torch.cuda.is_available():
        print(f"GPU Memory after coder model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
    
    return bino_coder

def compute_chat_score(text):
    print("Computing chat score...")
    bino_chat = initialize_chat_model()
    
    try:
        score_chat = bino_chat.compute_score(text)
        return {"score_chat": score_chat}
    finally:
        cleanup_model(bino_chat)

def compute_coder_score(text):
    print("Computing coder score...")
    bino_coder = initialize_coder_model()
    
    try:
        score_coder = bino_coder.compute_score(text)
        return {"score_coder": score_coder}
    finally:
        cleanup_model(bino_coder)

def compute_scores(text, use_chat=True, use_coder=True):
    scores = {}
    
    if use_chat:
        chat_scores = compute_chat_score(text)
        scores.update(chat_scores)
    
    if use_coder:
        coder_scores = compute_coder_score(text)
        scores.update(coder_scores)
    
    return scores

def cleanup_model(model):
    if model:
        try:
            print(f"Cleaning up model resources...")
            model.free_memory()
            
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                torch.cuda.synchronize()
                print(f"After cleanup: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
        except Exception as e:
            print(f"Error during model cleanup: {str(e)}")
    
def cleanup_models(bino_chat, bino_coder):
    if bino_chat:
        cleanup_model(bino_chat)
    
    if bino_coder:
        cleanup_model(bino_coder)