nikhil_staging / src /signals /text_statistics_test.py
nsthorat's picture
Push
e4f9cbe
raw
history blame
No virus
1.23 kB
"""Test the semantic search signal."""
from typing import cast
from pytest import approx
from ..schema import DataType, Field
from .text_statistics import NUM_CHARS, READABILITY, TYPE_TOKEN_RATIO, TextStatisticsSignal
def test_text_statistics_fields() -> None:
signal = TextStatisticsSignal()
signal.setup()
assert signal.fields() == Field(
fields={
NUM_CHARS: Field(dtype=DataType.INT32),
READABILITY: Field(dtype=DataType.FLOAT32),
TYPE_TOKEN_RATIO: Field(dtype=DataType.FLOAT32)
})
def test_text_statistics_compute() -> None:
signal = TextStatisticsSignal()
signal.setup()
scores = signal.compute(['hello', 'hello world'])
assert list(scores) == [{
NUM_CHARS: 5,
READABILITY: approx(2.62),
TYPE_TOKEN_RATIO: 1.0
}, {
NUM_CHARS: 10,
READABILITY: approx(3.12),
TYPE_TOKEN_RATIO: 1.0
}]
def test_text_statistics_missing_value() -> None:
signal = TextStatisticsSignal()
signal.setup()
scores = signal.compute(['hello', cast(str, None), 'everybody'])
assert list(scores) == [{
NUM_CHARS: 5,
READABILITY: approx(2.62),
TYPE_TOKEN_RATIO: 1.0
}, None, {
NUM_CHARS: 9,
READABILITY: approx(21.46),
TYPE_TOKEN_RATIO: 1.0
}]