Spaces:
Running
Running
from dataclasses import dataclass | |
from graphgen.models.evaluate.base_evaluator import BaseEvaluator | |
from graphgen.models.llm.tokenizer import Tokenizer | |
from graphgen.models.text.text_pair import TextPair | |
from graphgen.utils import create_event_loop | |
class LengthEvaluator(BaseEvaluator): | |
tokenizer_name: str = "cl100k_base" | |
def __post_init__(self): | |
self.tokenizer = Tokenizer( | |
model_name=self.tokenizer_name | |
) | |
async def evaluate_single(self, pair: TextPair) -> float: | |
loop = create_event_loop() | |
return await loop.run_in_executor(None, self._calculate_length, pair.answer) | |
def _calculate_length(self, text: str) -> float: | |
tokens = self.tokenizer.encode_string(text) | |
return len(tokens) | |