Antoine35/legi_v1 · Hugging Face

import keras
import numpy as np
import pickle
import json
from huggingface_hub import hf_hub_download
import os # Import the os module
from tensorflow.keras import layers # Import layers module
from tensorflow.keras.models import load_model # Import load_model

class CustomLegalModel(keras.Model):
    def __init__(self, model_id):
        super().__init__()
        # Download the model files to a local directory
        local_dir = "downloaded_model"
        #hf_hub_download(repo_id=model_id, filename='', local_dir=local_dir, local_dir_use_symlinks=False) 
        # Load the model from the local directory
        model_path = hf_hub_download(repo_id=model_id, filename='my_model.keras', local_dir=local_dir, local_dir_use_symlinks=False)
        self.model = load_model(model_path)
        self.encoder = self.load_pickle(model_id, "encoder.pkl")
        self.vocabulary = self.load_numpy(model_id, "vocabulary.npy")
        self.vectorizer_config = self.load_json(model_id, "vectorizer_config.json")
        
    def load_pickle(self, model_id, filename):
        path = hf_hub_download(repo_id=model_id, filename=filename)
        with open(path, 'rb') as f:
            return pickle.load(f)
        
    def load_numpy(self, model_id, filename):
        path = hf_hub_download(repo_id=model_id, filename=filename)
        return np.load(path)
        
    def load_json(self, model_id, filename):
        path = hf_hub_download(repo_id=model_id, filename=filename)
        with open(path, 'r') as f:
            return json.load(f)
    
    def predict(self, text):
        new_vectorizer = layers.TextVectorization.from_config(self.vectorizer_config)
        new_vectorizer.set_vocabulary(self.vocabulary)
        # Implement your full prediction pipeline here
        # This should include any text preprocessing, vectorization, etc.
        # Then call the model and post-process results as needed
        text_vectorized = new_vectorizer(np.array([[text]])).numpy()

        # Predict the category
        prediction = self.model.predict(text_vectorized)

        # Get the predicted class index
        predicted_class_index = np.argmax(prediction, axis=1)[0]

        # Decode the predicted class index to the original label
        predicted_label = self.encoder.inverse_transform([predicted_class_index])[0]
        return predicted_label

    os.environ["KERAS_BACKEND"] = "tensorflow"

model = CustomLegalModel("Antoine35/legi_v1")

# Make predictions
result = model.predict("délibéré laudience avril siéger délibéré laudience avril siéger")
result