Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,204 Bytes
8db7949 e47a060 8db7949 e47a060 8db7949 e47a060 8db7949 e47a060 8db7949 e47a060 8db7949 e47a060 8db7949 e47a060 8db7949 e47a060 8db7949 e47a060 8db7949 e47a060 8db7949 e47a060 8db7949 e47a060 8db7949 e47a060 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
from binoculars import Binoculars
import torch
import gc
CHAT_MODEL_PAIR = {
"observer": "deepseek-ai/deepseek-llm-7b-base",
"performer": "deepseek-ai/deepseek-llm-7b-chat"
}
CODER_MODEL_PAIR = {
"observer": "deepseek-ai/deepseek-llm-7b-base",
"performer": "deepseek-ai/deepseek-coder-7b-instruct-v1.5"
}
def initialize_chat_model():
print("Initializing chat Binoculars model...")
if torch.cuda.is_available():
torch.cuda.empty_cache()
print(f"GPU Memory before chat model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
bino_chat = Binoculars(
mode="accuracy",
observer_name_or_path=CHAT_MODEL_PAIR["observer"],
performer_name_or_path=CHAT_MODEL_PAIR["performer"],
max_token_observed=2048
)
if torch.cuda.is_available():
print(f"GPU Memory after chat model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
return bino_chat
def initialize_coder_model():
print("Initializing coder Binoculars model...")
if torch.cuda.is_available():
torch.cuda.empty_cache()
print(f"GPU Memory before coder model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
bino_coder = Binoculars(
mode="accuracy",
observer_name_or_path=CODER_MODEL_PAIR["observer"],
performer_name_or_path=CODER_MODEL_PAIR["performer"],
max_token_observed=2048
)
if torch.cuda.is_available():
print(f"GPU Memory after coder model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
return bino_coder
def compute_chat_score(text):
print("Computing chat score...")
bino_chat = initialize_chat_model()
try:
score_chat = bino_chat.compute_score(text)
return {"score_chat": score_chat}
finally:
cleanup_model(bino_chat)
def compute_coder_score(text):
print("Computing coder score...")
bino_coder = initialize_coder_model()
try:
score_coder = bino_coder.compute_score(text)
return {"score_coder": score_coder}
finally:
cleanup_model(bino_coder)
def compute_scores(text, use_chat=True, use_coder=True):
scores = {}
if use_chat:
chat_scores = compute_chat_score(text)
scores.update(chat_scores)
if use_coder:
coder_scores = compute_coder_score(text)
scores.update(coder_scores)
return scores
def cleanup_model(model):
if model:
try:
print(f"Cleaning up model resources...")
model.free_memory()
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.synchronize()
print(f"After cleanup: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
except Exception as e:
print(f"Error during model cleanup: {str(e)}")
def cleanup_models(bino_chat, bino_coder):
if bino_chat:
cleanup_model(bino_chat)
if bino_coder:
cleanup_model(bino_coder) |