Spaces:
Sleeping
Sleeping
add max text length
Browse files- analyze.py +2 -1
analyze.py
CHANGED
@@ -10,6 +10,7 @@ from presidio_analyzer import AnalyzerEngine, BatchAnalyzerEngine, RecognizerRes
|
|
10 |
Row = dict[str, Any]
|
11 |
T = TypeVar("T")
|
12 |
BATCH_SIZE = 1
|
|
|
13 |
batch_analyzer: Optional[BatchAnalyzerEngine] = None
|
14 |
|
15 |
|
@@ -124,7 +125,7 @@ def presidio_scan_entities(
|
|
124 |
if batch_analyzer is None:
|
125 |
batch_analyser = BatchAnalyzerEngine(AnalyzerEngine())
|
126 |
rows_with_scanned_columns_only = (
|
127 |
-
{column_name: get_strings(row[column_name]) for column_name in scanned_columns} for row in rows
|
128 |
)
|
129 |
for indices, batch in batched(rows_with_scanned_columns_only, BATCH_SIZE, with_indices=True):
|
130 |
yield from analyze(
|
|
|
10 |
Row = dict[str, Any]
|
11 |
T = TypeVar("T")
|
12 |
BATCH_SIZE = 1
|
13 |
+
MAX_TEXT_LENGTH = 3000
|
14 |
batch_analyzer: Optional[BatchAnalyzerEngine] = None
|
15 |
|
16 |
|
|
|
125 |
if batch_analyzer is None:
|
126 |
batch_analyser = BatchAnalyzerEngine(AnalyzerEngine())
|
127 |
rows_with_scanned_columns_only = (
|
128 |
+
{column_name: get_strings(row[column_name])[:MAX_TEXT_LENGTH] for column_name in scanned_columns} for row in rows
|
129 |
)
|
130 |
for indices, batch in batched(rows_with_scanned_columns_only, BATCH_SIZE, with_indices=True):
|
131 |
yield from analyze(
|