import os from flash.text import TextClassifier # ⚠️ You need this to access the state key from flash.core.data.data_source import LabelsState class PreTrainedPipeline(): def __init__(self, path=""): self.device = 'cpu' self.model = TextClassifier.load_from_checkpoint(os.path.join(path, "pytorch_model.bin")) self.data_pipeline = self.model.build_data_pipeline() self.labels = self.model._data_pipeline_state._state[LabelsState].labels self.top_k = 5 def __call__(self, inputs): x = self.data_pipeline._deserializer(inputs) x = self.data_pipeline.worker_preprocessor('predict')(x) x = self.model.transfer_batch_to_device(x, self.device, 0) x = self.data_pipeline.device_preprocessor('predict')(x) out = self.model.predict_step(x, 0) proba = out['logits'].softmax(1)[0].tolist() return [{'score': s, 'label': l} for s, l in sorted(zip(proba, self.labels), reverse=True)[:self.top_k]]