File size: 1,233 Bytes
e4f9cbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
"""Test the semantic search signal."""

from typing import cast

from pytest import approx

from ..schema import DataType, Field
from .text_statistics import NUM_CHARS, READABILITY, TYPE_TOKEN_RATIO, TextStatisticsSignal


def test_text_statistics_fields() -> None:
  signal = TextStatisticsSignal()
  signal.setup()
  assert signal.fields() == Field(
    fields={
      NUM_CHARS: Field(dtype=DataType.INT32),
      READABILITY: Field(dtype=DataType.FLOAT32),
      TYPE_TOKEN_RATIO: Field(dtype=DataType.FLOAT32)
    })


def test_text_statistics_compute() -> None:
  signal = TextStatisticsSignal()
  signal.setup()

  scores = signal.compute(['hello', 'hello world'])

  assert list(scores) == [{
    NUM_CHARS: 5,
    READABILITY: approx(2.62),
    TYPE_TOKEN_RATIO: 1.0
  }, {
    NUM_CHARS: 10,
    READABILITY: approx(3.12),
    TYPE_TOKEN_RATIO: 1.0
  }]


def test_text_statistics_missing_value() -> None:
  signal = TextStatisticsSignal()
  signal.setup()

  scores = signal.compute(['hello', cast(str, None), 'everybody'])

  assert list(scores) == [{
    NUM_CHARS: 5,
    READABILITY: approx(2.62),
    TYPE_TOKEN_RATIO: 1.0
  }, None, {
    NUM_CHARS: 9,
    READABILITY: approx(21.46),
    TYPE_TOKEN_RATIO: 1.0
  }]