Spaces:
Paused
Paused
#!/usr/bin/env python3 | |
import pytest | |
import torch | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline | |
from mappingservice.utils import predict_language | |
def classifier(): | |
model_path = "papluca/xlm-roberta-base-language-detection" | |
model = AutoModelForSequenceClassification.from_pretrained(model_path) | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
classification = pipeline( | |
"text-classification", | |
model=model, | |
tokenizer=tokenizer, | |
framework="pt", | |
device=0 if torch.cuda.is_available() else -1, | |
) | |
return classification | |
def test_model_predictions(classifier): | |
test_data = [ | |
{'input': 'Habitacion estandar con bano', 'expected_response': 'es'}, | |
{'input': 'apartamento de lujo con vistas al mar', 'expected_response': 'es'}, # noqa: E501 | |
{'input': 'casa ejecutiva', 'expected_response': 'es'}, | |
{'input': 'villa doble', 'expected_response': 'es'}, | |
{'input': 'estudio de una habitacion de lujo', 'expected_response': 'es'}, | |
{'input': 'chalet premier con dos habitaciones', 'expected_response': 'es'}, | |
{'input': 'casa de la playa premium con bano compartido', 'expected_response': 'es'}, # noqa: E501 | |
{'input': 'estudio familiar grande', 'expected_response': 'es'}, | |
{'input': 'suite familiar junior', 'expected_response': 'en'}, | |
{'input': 'bungalow tradicional sin bano', 'expected_response': 'es'}, | |
{'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'en'}, # noqa: E501 | |
{'input': 'habitacion matrimonial adaptada discapacitados', 'expected_response': 'es'}, # noqa: E501 | |
{'input': 'privilege room twin for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 | |
{'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 | |
{'input': 'premier palace double room', 'expected_response': 'en'}, | |
{'input': 'double single use deluxe', 'expected_response': 'en'}, | |
{'input': 'double room queen bed superior', 'expected_response': 'en'}, | |
{'input': 'double guest room', 'expected_response': 'en'}, | |
{'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 | |
{'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501 | |
{'input': 'superior quadruple room', 'expected_response': 'en'}, | |
{'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501 | |
{'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 | |
{'input': 'premier palace double room', 'expected_response': 'en'}, | |
{'input': 'double single use deluxe', 'expected_response': 'en'}, | |
{'input': 'double room queen bed superior', 'expected_response': 'en'}, | |
{'input': 'double guest room', 'expected_response': 'en'}, | |
{'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 | |
{'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501 | |
{'input': 'superior quadruple room', 'expected_response': 'en'}, | |
{'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501 | |
{'input': 'comfort double', 'expected_response': 'en'}, | |
{'input': '1 king bed suite nonsmoking', 'expected_response': 'en'}, | |
{'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'en'}, | |
{'input': 'family room superior', 'expected_response': 'en'} | |
] | |
for test_case in test_data: | |
description = test_case["input"] | |
expected_label = test_case["expected_response"] | |
# First, try to predict based on keywords | |
predicted_label = predict_language(description) | |
# If no prediction was made, fallback to model prediction | |
if not predicted_label: | |
print(f"Fallback to model prediction for '{description}'") | |
result = classifier(description) | |
predicted_label = result[0]["label"] | |
assert ( | |
predicted_label == expected_label | |
), f"Incorrect prediction for '{description}': expected '{expected_label}', obtained '{predicted_label}'" # noqa: E501 | |