Spaces:
Running
Running
VladGeekPro commited on
Commit ·
2a93301
1
Parent(s): 41a9860
ReplacedDucklingWithNatasha
Browse files- Dockerfile +5 -10
- app.py +29 -99
- duckling_client.py +0 -129
- natasha_dates.py +355 -0
- requirements.txt +2 -1
- supervisord.conf +0 -26
Dockerfile
CHANGED
|
@@ -1,26 +1,21 @@
|
|
| 1 |
-
FROM rasa/duckling:latest AS duckling
|
| 2 |
FROM python:3.11-slim
|
| 3 |
|
| 4 |
ENV PYTHONUNBUFFERED=1 PIP_NO_CACHE_DIR=1 HOME=/home/user \
|
| 5 |
PATH=/home/user/.local/bin:$PATH PORT=7860 \
|
| 6 |
WHISPER_MODEL=large-v3 WHISPER_COMPUTE_TYPE=int8 \
|
| 7 |
-
|
| 8 |
|
| 9 |
-
RUN
|
| 10 |
-
&&
|
| 11 |
-
ffmpeg supervisor libgmp10 libpcre3 libssl1.1 \
|
| 12 |
-
&& rm -rf /var/lib/apt/lists/* /etc/apt/sources.list.d/bullseye.list \
|
| 13 |
&& useradd -m -u 1000 user
|
| 14 |
|
| 15 |
-
COPY --from=duckling /usr/local/bin/duckling-example-exe /usr/local/bin/
|
| 16 |
-
|
| 17 |
USER user
|
| 18 |
WORKDIR /home/user/app
|
| 19 |
|
| 20 |
COPY --chown=user requirements.txt .
|
| 21 |
RUN pip install --upgrade pip && pip install -r requirements.txt
|
| 22 |
|
| 23 |
-
COPY --chown=user app.py
|
| 24 |
|
| 25 |
EXPOSE 7860
|
| 26 |
-
CMD ["
|
|
|
|
|
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
|
| 3 |
ENV PYTHONUNBUFFERED=1 PIP_NO_CACHE_DIR=1 HOME=/home/user \
|
| 4 |
PATH=/home/user/.local/bin:$PATH PORT=7860 \
|
| 5 |
WHISPER_MODEL=large-v3 WHISPER_COMPUTE_TYPE=int8 \
|
| 6 |
+
DATE_PARSER_MODE=natasha
|
| 7 |
|
| 8 |
+
RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/* \
|
|
|
|
|
|
|
| 10 |
&& useradd -m -u 1000 user
|
| 11 |
|
|
|
|
|
|
|
| 12 |
USER user
|
| 13 |
WORKDIR /home/user/app
|
| 14 |
|
| 15 |
COPY --chown=user requirements.txt .
|
| 16 |
RUN pip install --upgrade pip && pip install -r requirements.txt
|
| 17 |
|
| 18 |
+
COPY --chown=user app.py natasha_dates.py ./
|
| 19 |
|
| 20 |
EXPOSE 7860
|
| 21 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "4", "--timeout", "120", "app:app"]
|
app.py
CHANGED
|
@@ -24,8 +24,8 @@ from sentence_transformers import SentenceTransformer
|
|
| 24 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 25 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 26 |
|
| 27 |
-
#
|
| 28 |
-
from
|
| 29 |
|
| 30 |
# HuggingFace Token (если нужен для моделей)
|
| 31 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
@@ -590,97 +590,17 @@ class ExpenseDateExtractor:
|
|
| 590 |
return value if isinstance(value, date) else datetime.strptime(value, "%Y-%m-%d").date()
|
| 591 |
|
| 592 |
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
Извлечение дат через Duckling.
|
| 596 |
-
Поддерживает как точные даты ("15 января 2025"), так и относительные ("завтра", "через 2 дня").
|
| 597 |
-
"""
|
| 598 |
-
|
| 599 |
-
def extract(self, text: str, reference_date: str | date | None = None) -> dict[str, Any]:
|
| 600 |
-
"""
|
| 601 |
-
Извлекает дату из текста с помощью Duckling.
|
| 602 |
-
|
| 603 |
-
Args:
|
| 604 |
-
text: Текст для анализа
|
| 605 |
-
reference_date: Опорная дата для относительных выражений (по умолчанию - сегодня)
|
| 606 |
-
|
| 607 |
-
Returns:
|
| 608 |
-
{
|
| 609 |
-
"date": "19.04.2026", # формат DD.MM.YYYY
|
| 610 |
-
"date_iso": "2026-04-19", # формат ISO
|
| 611 |
-
"matched_date_phrase": "завтра" # найденное выражение
|
| 612 |
-
}
|
| 613 |
-
"""
|
| 614 |
-
ref_datetime = None
|
| 615 |
-
if reference_date:
|
| 616 |
-
ref_date = self.to_date(reference_date)
|
| 617 |
-
ref_datetime = datetime.combine(ref_date, datetime.min.time()).replace(hour=12)
|
| 618 |
-
|
| 619 |
-
# Получаем все даты из текста
|
| 620 |
-
dates = parse_all_dates_with_duckling(text, locale="ru_RU", reference_time=ref_datetime)
|
| 621 |
-
|
| 622 |
-
if not dates:
|
| 623 |
-
return {
|
| 624 |
-
"date": None,
|
| 625 |
-
"date_iso": None,
|
| 626 |
-
"matched_date_phrase": None,
|
| 627 |
-
}
|
| 628 |
-
|
| 629 |
-
# Берём первую найденную дату
|
| 630 |
-
first = dates[0]
|
| 631 |
-
date_iso = first["date"]
|
| 632 |
-
|
| 633 |
-
try:
|
| 634 |
-
parsed_date = datetime.strptime(date_iso, "%Y-%m-%d")
|
| 635 |
-
date_formatted = parsed_date.strftime("%d.%m.%Y")
|
| 636 |
-
except ValueError:
|
| 637 |
-
date_formatted = None
|
| 638 |
-
|
| 639 |
-
return {
|
| 640 |
-
"date": date_formatted,
|
| 641 |
-
"date_iso": date_iso,
|
| 642 |
-
"matched_date_phrase": first["text"],
|
| 643 |
-
}
|
| 644 |
-
|
| 645 |
-
def extract_all(self, text: str, reference_date: str | date | None = None) -> list[dict[str, Any]]:
|
| 646 |
-
"""
|
| 647 |
-
Извлекает все даты из текста.
|
| 648 |
-
"""
|
| 649 |
-
ref_datetime = None
|
| 650 |
-
if reference_date:
|
| 651 |
-
ref_date = self.to_date(reference_date)
|
| 652 |
-
ref_datetime = datetime.combine(ref_date, datetime.min.time()).replace(hour=12)
|
| 653 |
-
|
| 654 |
-
dates = parse_all_dates_with_duckling(text, locale="ru_RU", reference_time=ref_datetime)
|
| 655 |
-
|
| 656 |
-
results = []
|
| 657 |
-
for d in dates:
|
| 658 |
-
try:
|
| 659 |
-
parsed_date = datetime.strptime(d["date"], "%Y-%m-%d")
|
| 660 |
-
date_formatted = parsed_date.strftime("%d.%m.%Y")
|
| 661 |
-
except ValueError:
|
| 662 |
-
date_formatted = None
|
| 663 |
-
|
| 664 |
-
results.append({
|
| 665 |
-
"date": date_formatted,
|
| 666 |
-
"date_iso": d["date"],
|
| 667 |
-
"matched_date_phrase": d["text"],
|
| 668 |
-
})
|
| 669 |
-
|
| 670 |
-
return results
|
| 671 |
-
|
| 672 |
-
@staticmethod
|
| 673 |
-
def to_date(value: str | date) -> date:
|
| 674 |
-
return value if isinstance(value, date) else datetime.strptime(value, "%Y-%m-%d").date()
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
# Выбор парсера дат: "duckling" или "legacy" (старый код)
|
| 678 |
-
DATE_PARSER_MODE = os.getenv("DATE_PARSER_MODE", "duckling")
|
| 679 |
|
| 680 |
def get_date_extractor():
|
| 681 |
-
"""
|
| 682 |
-
|
| 683 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
return ExpenseDateExtractor()
|
| 685 |
|
| 686 |
|
|
@@ -1241,10 +1161,11 @@ def index():
|
|
| 1241 |
return jsonify({
|
| 1242 |
"status": "ok",
|
| 1243 |
"message": "Voice processing API is running",
|
|
|
|
| 1244 |
"endpoints": {
|
| 1245 |
"POST /process-audio": "Process audio file",
|
| 1246 |
"GET /health": "Health check",
|
| 1247 |
-
"GET /
|
| 1248 |
}
|
| 1249 |
})
|
| 1250 |
|
|
@@ -1254,9 +1175,9 @@ def health():
|
|
| 1254 |
return jsonify({"status": "ok"})
|
| 1255 |
|
| 1256 |
|
| 1257 |
-
@app.get("/
|
| 1258 |
-
def
|
| 1259 |
-
"""Тестирование
|
| 1260 |
test_phrases = [
|
| 1261 |
"завтра",
|
| 1262 |
"через 2 дня",
|
|
@@ -1264,17 +1185,26 @@ def duckling_test():
|
|
| 1264 |
"15 января 2025",
|
| 1265 |
"позавчера",
|
| 1266 |
"в прошлый понедельник",
|
| 1267 |
-
"оплата за март"
|
|
|
|
|
|
|
|
|
|
| 1268 |
]
|
| 1269 |
|
|
|
|
| 1270 |
results = []
|
| 1271 |
for phrase in test_phrases:
|
| 1272 |
-
|
| 1273 |
-
results.append({
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1274 |
|
| 1275 |
return jsonify({
|
| 1276 |
"status": "ok",
|
| 1277 |
-
"
|
|
|
|
| 1278 |
"results": results
|
| 1279 |
})
|
| 1280 |
|
|
|
|
| 24 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 25 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 26 |
|
| 27 |
+
# Natasha - лучший парсер дат для русского языка (работает локально)
|
| 28 |
+
from natasha_dates import NatashaDateExtractor, parse_date_natasha
|
| 29 |
|
| 30 |
# HuggingFace Token (если нужен для моделей)
|
| 31 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
|
| 590 |
return value if isinstance(value, date) else datetime.strptime(value, "%Y-%m-%d").date()
|
| 591 |
|
| 592 |
|
| 593 |
+
# Парсер дат: "natasha" (рекомендуется) или "legacy"
|
| 594 |
+
DATE_PARSER_MODE = os.getenv("DATE_PARSER_MODE", "natasha")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 595 |
|
| 596 |
def get_date_extractor():
|
| 597 |
+
"""
|
| 598 |
+
Возвращает экстрактор дат.
|
| 599 |
+
- natasha: Лучший для русского языка (по умолчанию)
|
| 600 |
+
- legacy: Старый код ExpenseDateExtractor
|
| 601 |
+
"""
|
| 602 |
+
if DATE_PARSER_MODE == "natasha":
|
| 603 |
+
return NatashaDateExtractor()
|
| 604 |
return ExpenseDateExtractor()
|
| 605 |
|
| 606 |
|
|
|
|
| 1161 |
return jsonify({
|
| 1162 |
"status": "ok",
|
| 1163 |
"message": "Voice processing API is running",
|
| 1164 |
+
"date_parser": DATE_PARSER_MODE,
|
| 1165 |
"endpoints": {
|
| 1166 |
"POST /process-audio": "Process audio file",
|
| 1167 |
"GET /health": "Health check",
|
| 1168 |
+
"GET /date-test": "Test date parsing"
|
| 1169 |
}
|
| 1170 |
})
|
| 1171 |
|
|
|
|
| 1175 |
return jsonify({"status": "ok"})
|
| 1176 |
|
| 1177 |
|
| 1178 |
+
@app.get("/date-test")
|
| 1179 |
+
def date_test():
|
| 1180 |
+
"""Тестирование парсера дат (использует текущий DATE_PARSER_MODE)."""
|
| 1181 |
test_phrases = [
|
| 1182 |
"завтра",
|
| 1183 |
"через 2 дня",
|
|
|
|
| 1185 |
"15 января 2025",
|
| 1186 |
"позавчера",
|
| 1187 |
"в прошлый понедельник",
|
| 1188 |
+
"оплата за март",
|
| 1189 |
+
"5 марта",
|
| 1190 |
+
"купил вчера",
|
| 1191 |
+
"в конце месяца"
|
| 1192 |
]
|
| 1193 |
|
| 1194 |
+
extractor = get_date_extractor()
|
| 1195 |
results = []
|
| 1196 |
for phrase in test_phrases:
|
| 1197 |
+
result = extractor.extract(phrase)
|
| 1198 |
+
results.append({
|
| 1199 |
+
"phrase": phrase,
|
| 1200 |
+
"date": result.get("date_iso"),
|
| 1201 |
+
"matched": result.get("matched_date_phrase")
|
| 1202 |
+
})
|
| 1203 |
|
| 1204 |
return jsonify({
|
| 1205 |
"status": "ok",
|
| 1206 |
+
"parser": DATE_PARSER_MODE,
|
| 1207 |
+
"reference_date": date.today().isoformat(),
|
| 1208 |
"results": results
|
| 1209 |
})
|
| 1210 |
|
duckling_client.py
DELETED
|
@@ -1,129 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import requests
|
| 3 |
-
from datetime import datetime
|
| 4 |
-
from typing import Optional
|
| 5 |
-
|
| 6 |
-
DUCKLING_URL = os.getenv("DUCKLING_URL", "http://localhost:8000/parse")
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
def parse_date_with_duckling(
|
| 10 |
-
text: str,
|
| 11 |
-
locale: str = "ru_RU",
|
| 12 |
-
reference_time: Optional[datetime] = None,
|
| 13 |
-
timezone: str = "Europe/Moscow"
|
| 14 |
-
) -> Optional[str]:
|
| 15 |
-
"""
|
| 16 |
-
Извлекает дату из текста с помощью Duckling.
|
| 17 |
-
|
| 18 |
-
Args:
|
| 19 |
-
text: Текст для парсинга (например: "завтра", "через 2 дня", "15 января 2025")
|
| 20 |
-
locale: Локаль для парсинга (ru_RU для русского)
|
| 21 |
-
reference_time: Опорное время для относительных дат (по умолчанию - сейчас)
|
| 22 |
-
timezone: Часовой пояс
|
| 23 |
-
|
| 24 |
-
Returns:
|
| 25 |
-
ISO дата (YYYY-MM-DD) или None если дата не найдена
|
| 26 |
-
"""
|
| 27 |
-
if reference_time is None:
|
| 28 |
-
reference_time = datetime.now()
|
| 29 |
-
|
| 30 |
-
# Duckling требует Unix timestamp в миллисекундах
|
| 31 |
-
ref_time_ms = int(reference_time.timestamp() * 1000)
|
| 32 |
-
|
| 33 |
-
try:
|
| 34 |
-
response = requests.post(
|
| 35 |
-
DUCKLING_URL,
|
| 36 |
-
data={
|
| 37 |
-
"locale": locale,
|
| 38 |
-
"text": text,
|
| 39 |
-
"dims": '["time"]',
|
| 40 |
-
"reftime": ref_time_ms,
|
| 41 |
-
"tz": timezone
|
| 42 |
-
},
|
| 43 |
-
timeout=5
|
| 44 |
-
)
|
| 45 |
-
response.raise_for_status()
|
| 46 |
-
results = response.json()
|
| 47 |
-
|
| 48 |
-
if results and len(results) > 0:
|
| 49 |
-
# Берём первый найденный результат
|
| 50 |
-
value = results[0].get("value", {})
|
| 51 |
-
|
| 52 |
-
# Duckling возвращает разные форматы
|
| 53 |
-
if "value" in value:
|
| 54 |
-
# Формат: {"value": "2026-04-20T00:00:00.000+00:00", "grain": "day"}
|
| 55 |
-
date_str = value["value"]
|
| 56 |
-
# Извлекаем только дату
|
| 57 |
-
return date_str[:10] # "2026-04-20"
|
| 58 |
-
elif "from" in value:
|
| 59 |
-
# Интервал: {"from": {...}, "to": {...}}
|
| 60 |
-
from_value = value["from"].get("value", "")
|
| 61 |
-
return from_value[:10] if from_value else None
|
| 62 |
-
|
| 63 |
-
return None
|
| 64 |
-
|
| 65 |
-
except requests.RequestException as e:
|
| 66 |
-
print(f"Duckling error: {e}")
|
| 67 |
-
return None
|
| 68 |
-
except (KeyError, IndexError, ValueError) as e:
|
| 69 |
-
print(f"Duckling parse error: {e}")
|
| 70 |
-
return None
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
def parse_all_dates_with_duckling(
|
| 74 |
-
text: str,
|
| 75 |
-
locale: str = "ru_RU",
|
| 76 |
-
reference_time: Optional[datetime] = None,
|
| 77 |
-
timezone: str = "Europe/Moscow"
|
| 78 |
-
) -> list[dict]:
|
| 79 |
-
"""
|
| 80 |
-
Извлекает все даты из текста.
|
| 81 |
-
|
| 82 |
-
Returns:
|
| 83 |
-
Список словарей: [{"text": "завтра", "date": "2026-04-20", "start": 0, "end": 6}, ...]
|
| 84 |
-
"""
|
| 85 |
-
if reference_time is None:
|
| 86 |
-
reference_time = datetime.now()
|
| 87 |
-
|
| 88 |
-
# Duckling требует Unix timestamp в миллисекундах
|
| 89 |
-
ref_time_ms = int(reference_time.timestamp() * 1000)
|
| 90 |
-
|
| 91 |
-
try:
|
| 92 |
-
response = requests.post(
|
| 93 |
-
DUCKLING_URL,
|
| 94 |
-
data={
|
| 95 |
-
"locale": locale,
|
| 96 |
-
"text": text,
|
| 97 |
-
"dims": '["time"]',
|
| 98 |
-
"reftime": ref_time_ms,
|
| 99 |
-
"tz": timezone
|
| 100 |
-
},
|
| 101 |
-
timeout=5
|
| 102 |
-
)
|
| 103 |
-
response.raise_for_status()
|
| 104 |
-
results = response.json()
|
| 105 |
-
|
| 106 |
-
dates = []
|
| 107 |
-
for result in results:
|
| 108 |
-
value = result.get("value", {})
|
| 109 |
-
date_str = None
|
| 110 |
-
|
| 111 |
-
if "value" in value:
|
| 112 |
-
date_str = value["value"][:10]
|
| 113 |
-
elif "from" in value:
|
| 114 |
-
from_val = value["from"].get("value", "")
|
| 115 |
-
date_str = from_val[:10] if from_val else None
|
| 116 |
-
|
| 117 |
-
if date_str:
|
| 118 |
-
dates.append({
|
| 119 |
-
"text": result.get("body", ""),
|
| 120 |
-
"date": date_str,
|
| 121 |
-
"start": result.get("start", 0),
|
| 122 |
-
"end": result.get("end", 0)
|
| 123 |
-
})
|
| 124 |
-
|
| 125 |
-
return dates
|
| 126 |
-
|
| 127 |
-
except Exception as e:
|
| 128 |
-
print(f"Duckling error: {e}")
|
| 129 |
-
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
natasha_dates.py
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Профессиональный парсер дат для русского языка.
|
| 3 |
+
Использует Natasha (DatesExtractor) - лучшее решение для русского.
|
| 4 |
+
Поддерживает точные и относительные даты с морфологическим анализом.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import re
|
| 8 |
+
from datetime import date, datetime, timedelta
|
| 9 |
+
from typing import Any, Optional
|
| 10 |
+
from dateutil.relativedelta import relativedelta
|
| 11 |
+
|
| 12 |
+
from natasha import DatesExtractor, MorphVocab
|
| 13 |
+
from natasha.extractors import Match
|
| 14 |
+
|
| 15 |
+
# Инициализация Natasha
|
| 16 |
+
_MORPH_VOCAB: Optional[MorphVocab] = None
|
| 17 |
+
_DATES_EXTRACTOR: Optional[DatesExtractor] = None
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def _get_extractor() -> DatesExtractor:
|
| 21 |
+
"""Ленивая инициализация экстрактора."""
|
| 22 |
+
global _MORPH_VOCAB, _DATES_EXTRACTOR
|
| 23 |
+
if _DATES_EXTRACTOR is None:
|
| 24 |
+
_MORPH_VOCAB = MorphVocab()
|
| 25 |
+
_DATES_EXTRACTOR = DatesExtractor(_MORPH_VOCAB)
|
| 26 |
+
return _DATES_EXTRACTOR
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# Паттерны для относительных дат (Natasha не всегда их распознаёт)
|
| 30 |
+
RELATIVE_PATTERNS = {
|
| 31 |
+
# Простые относительные
|
| 32 |
+
r'\bсегодня\b': lambda ref: ref,
|
| 33 |
+
r'\bзавтра\b': lambda ref: ref + timedelta(days=1),
|
| 34 |
+
r'\bпослезавтра\b': lambda ref: ref + timedelta(days=2),
|
| 35 |
+
r'\bвчера\b': lambda ref: ref - timedelta(days=1),
|
| 36 |
+
r'\bпозавчера\b': lambda ref: ref - timedelta(days=2),
|
| 37 |
+
|
| 38 |
+
# "через X дней/недель/месяцев"
|
| 39 |
+
r'\bчерез\s+(\d+)\s+(?:день|дня|дней)\b': lambda ref, n: ref + timedelta(days=int(n)),
|
| 40 |
+
r'\bчерез\s+(\d+)\s+(?:неделю|недели|недель)\b': lambda ref, n: ref + timedelta(weeks=int(n)),
|
| 41 |
+
r'\bчерез\s+(\d+)\s+(?:месяц|месяца|месяцев)\b': lambda ref, n: ref + relativedelta(months=int(n)),
|
| 42 |
+
|
| 43 |
+
# "X дней/недель/месяцев назад"
|
| 44 |
+
r'\b(\d+)\s+(?:день|дня|дней)\s+назад\b': lambda ref, n: ref - timedelta(days=int(n)),
|
| 45 |
+
r'\b(\d+)\s+(?:неделю|недели|недель)\s+назад\b': lambda ref, n: ref - timedelta(weeks=int(n)),
|
| 46 |
+
r'\b(\d+)\s+(?:месяц|месяца|месяцев)\s+назад\b': lambda ref, n: ref - relativedelta(months=int(n)),
|
| 47 |
+
|
| 48 |
+
# Дни недели
|
| 49 |
+
r'\b(?:в\s+)?(?:прошлый|прошлую)\s+(понедельник|вторник|среду|четверг|пятницу|субботу|воскресенье)\b': 'past_weekday',
|
| 50 |
+
r'\b(?:в\s+)?(?:следующий|следующую|этот|эту)\s+(понедельник|вторник|среду|четверг|пятницу|субботу|воскресенье)\b': 'next_weekday',
|
| 51 |
+
|
| 52 |
+
# Недели
|
| 53 |
+
r'\b(?:на\s+)?(?:прошлой|прошлую)\s+неделю?\b': lambda ref: ref - timedelta(weeks=1),
|
| 54 |
+
r'\b(?:на\s+)?(?:следующей|следующую)\s+неделю?\b': lambda ref: ref + timedelta(weeks=1),
|
| 55 |
+
r'\b(?:на\s+)?этой\s+неделе\b': lambda ref: ref,
|
| 56 |
+
|
| 57 |
+
# Начало/конец периода
|
| 58 |
+
r'\b(?:в\s+)?начал[еоа]\s+месяца\b': lambda ref: ref.replace(day=1),
|
| 59 |
+
r'\b(?:в\s+)?конц[еа]\s+месяца\b': lambda ref: (ref.replace(day=1) + relativedelta(months=1) - timedelta(days=1)),
|
| 60 |
+
r'\b(?:в\s+)?начал[еоа]\s+недели\b': lambda ref: ref - timedelta(days=ref.weekday()),
|
| 61 |
+
r'\b(?:в\s+)?конц[еа]\s+недели\b': lambda ref: ref + timedelta(days=6-ref.weekday()),
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
WEEKDAY_MAP = {
|
| 65 |
+
'понедельник': 0, 'вторник': 1, 'среду': 2, 'среда': 2,
|
| 66 |
+
'четверг': 3, 'пятницу': 4, 'пятница': 4,
|
| 67 |
+
'субботу': 5, 'суббота': 5, 'воскресенье': 6,
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
# Индикаторы прошедшего времени для контекстной коррекции
|
| 71 |
+
PAST_CONTEXT_WORDS = re.compile(
|
| 72 |
+
r'\b(оплата|оплатил[аи]?|заплатил[аи]?|купил[аи]?|заказал[аи]?|'
|
| 73 |
+
r'потратил[аи]?|был[аио]?|получил[аи]?|сделал[аи]?)\b',
|
| 74 |
+
re.IGNORECASE
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def _parse_weekday(text: str, reference: date, direction: str) -> Optional[date]:
|
| 79 |
+
"""Парсит день недели относительно reference."""
|
| 80 |
+
text_lower = text.lower()
|
| 81 |
+
for name, weekday_num in WEEKDAY_MAP.items():
|
| 82 |
+
if name in text_lower:
|
| 83 |
+
days_diff = weekday_num - reference.weekday()
|
| 84 |
+
if direction == 'past_weekday':
|
| 85 |
+
if days_diff >= 0:
|
| 86 |
+
days_diff -= 7
|
| 87 |
+
else: # next_weekday
|
| 88 |
+
if days_diff <= 0:
|
| 89 |
+
days_diff += 7
|
| 90 |
+
return reference + timedelta(days=days_diff)
|
| 91 |
+
return None
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def _parse_relative_date(text: str, reference: date) -> Optional[tuple[date, str]]:
|
| 95 |
+
"""Парсит относительные даты через регулярные выражения."""
|
| 96 |
+
text_lower = text.lower()
|
| 97 |
+
|
| 98 |
+
for pattern, handler in RELATIVE_PATTERNS.items():
|
| 99 |
+
match = re.search(pattern, text_lower, re.IGNORECASE)
|
| 100 |
+
if match:
|
| 101 |
+
matched_text = match.group(0)
|
| 102 |
+
|
| 103 |
+
if handler == 'past_weekday':
|
| 104 |
+
result = _parse_weekday(matched_text, reference, 'past_weekday')
|
| 105 |
+
elif handler == 'next_weekday':
|
| 106 |
+
result = _parse_weekday(matched_text, reference, 'next_weekday')
|
| 107 |
+
elif callable(handler):
|
| 108 |
+
groups = match.groups()
|
| 109 |
+
if groups:
|
| 110 |
+
result = handler(reference, groups[0])
|
| 111 |
+
else:
|
| 112 |
+
result = handler(reference)
|
| 113 |
+
else:
|
| 114 |
+
continue
|
| 115 |
+
|
| 116 |
+
if result:
|
| 117 |
+
return result, matched_text
|
| 118 |
+
|
| 119 |
+
return None
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _natasha_match_to_date(match: Match, reference: date) -> Optional[date]:
|
| 123 |
+
"""Конвертирует результат Natasha в date."""
|
| 124 |
+
fact = match.fact
|
| 125 |
+
|
| 126 |
+
year = getattr(fact, 'year', None)
|
| 127 |
+
month = getattr(fact, 'month', None)
|
| 128 |
+
day = getattr(fact, 'day', None)
|
| 129 |
+
|
| 130 |
+
# Если год не указан, берём из reference
|
| 131 |
+
if year is None:
|
| 132 |
+
year = reference.year
|
| 133 |
+
|
| 134 |
+
# Если месяц не указан
|
| 135 |
+
if month is None:
|
| 136 |
+
return None
|
| 137 |
+
|
| 138 |
+
# Если день не указан, берём 1-е число
|
| 139 |
+
if day is None:
|
| 140 |
+
day = 1
|
| 141 |
+
|
| 142 |
+
try:
|
| 143 |
+
return date(year, month, day)
|
| 144 |
+
except ValueError:
|
| 145 |
+
return None
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def _adjust_date_by_context(parsed_date: date, text: str, reference: date) -> date:
|
| 149 |
+
"""
|
| 150 |
+
Корректирует дату по контексту.
|
| 151 |
+
Если есть индикаторы прошлого и дата в будущем - сдвигаем на год назад.
|
| 152 |
+
"""
|
| 153 |
+
if PAST_CONTEXT_WORDS.search(text) and parsed_date > reference:
|
| 154 |
+
return parsed_date - relativedelta(years=1)
|
| 155 |
+
return parsed_date
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def parse_date_natasha(
|
| 159 |
+
text: str,
|
| 160 |
+
reference_date: Optional[date] = None
|
| 161 |
+
) -> dict[str, Any]:
|
| 162 |
+
"""
|
| 163 |
+
Извлекает дату из текста с использованием Natasha.
|
| 164 |
+
|
| 165 |
+
Args:
|
| 166 |
+
text: Текст для анализа
|
| 167 |
+
reference_date: Опорная дата (по умолчанию - сегодня)
|
| 168 |
+
|
| 169 |
+
Returns:
|
| 170 |
+
{
|
| 171 |
+
"date": "19.04.2026",
|
| 172 |
+
"date_iso": "2026-04-19",
|
| 173 |
+
"matched_date_phrase": "завтра",
|
| 174 |
+
"parser": "natasha" # или "relative" или "fallback"
|
| 175 |
+
}
|
| 176 |
+
"""
|
| 177 |
+
if reference_date is None:
|
| 178 |
+
reference_date = date.today()
|
| 179 |
+
|
| 180 |
+
result = {
|
| 181 |
+
"date": None,
|
| 182 |
+
"date_iso": None,
|
| 183 |
+
"matched_date_phrase": None,
|
| 184 |
+
"parser": None
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
# 1. Сначала пробуем относительные паттерны (они надёжнее для "завтра", "через 2 дня")
|
| 188 |
+
relative_result = _parse_relative_date(text, reference_date)
|
| 189 |
+
if relative_result:
|
| 190 |
+
parsed_date, matched = relative_result
|
| 191 |
+
parsed_date = _adjust_date_by_context(parsed_date, text, reference_date)
|
| 192 |
+
result["date"] = parsed_date.strftime("%d.%m.%Y")
|
| 193 |
+
result["date_iso"] = parsed_date.isoformat()
|
| 194 |
+
result["matched_date_phrase"] = matched
|
| 195 |
+
result["parser"] = "relative"
|
| 196 |
+
return result
|
| 197 |
+
|
| 198 |
+
# 2. Затем пробуем Natasha для точных дат ("15 января 2025", "5 марта")
|
| 199 |
+
try:
|
| 200 |
+
extractor = _get_extractor()
|
| 201 |
+
matches = list(extractor(text))
|
| 202 |
+
|
| 203 |
+
if matches:
|
| 204 |
+
# Берём первый результат
|
| 205 |
+
match = matches[0]
|
| 206 |
+
parsed_date = _natasha_match_to_date(match, reference_date)
|
| 207 |
+
|
| 208 |
+
if parsed_date:
|
| 209 |
+
parsed_date = _adjust_date_by_context(parsed_date, text, reference_date)
|
| 210 |
+
result["date"] = parsed_date.strftime("%d.%m.%Y")
|
| 211 |
+
result["date_iso"] = parsed_date.isoformat()
|
| 212 |
+
result["matched_date_phrase"] = text[match.start:match.stop]
|
| 213 |
+
result["parser"] = "natasha"
|
| 214 |
+
return result
|
| 215 |
+
except Exception as e:
|
| 216 |
+
print(f"Natasha error: {e}")
|
| 217 |
+
|
| 218 |
+
# 3. Fallback: ищем месяц без дня ("за март", "за апрель")
|
| 219 |
+
month_pattern = re.compile(
|
| 220 |
+
r'\b(?:за|в|на)\s+(январ[ья]|феврал[ья]|март[а]?|апрел[ья]|ма[йя]|июн[ья]|'
|
| 221 |
+
r'июл[ья]|август[а]?|сентябр[ья]|октябр[ья]|ноябр[ья]|декабр[ья])\b',
|
| 222 |
+
re.IGNORECASE
|
| 223 |
+
)
|
| 224 |
+
month_match = month_pattern.search(text)
|
| 225 |
+
if month_match:
|
| 226 |
+
month_names = {
|
| 227 |
+
'январ': 1, 'феврал': 2, 'март': 3, 'апрел': 4, 'ма': 5, 'июн': 6,
|
| 228 |
+
'июл': 7, 'август': 8, 'сентябр': 9, 'октябр': 10, 'ноябр': 11, 'декабр': 12
|
| 229 |
+
}
|
| 230 |
+
month_text = month_match.group(1).lower()
|
| 231 |
+
for prefix, month_num in month_names.items():
|
| 232 |
+
if month_text.startswith(prefix):
|
| 233 |
+
# Определяем год по контексту
|
| 234 |
+
year = reference_date.year
|
| 235 |
+
# Если месяц > текущего и есть индикаторы прошлого - прошлый год
|
| 236 |
+
if month_num > reference_date.month and PAST_CONTEXT_WORDS.search(text):
|
| 237 |
+
year -= 1
|
| 238 |
+
# Если месяц < текущего и нет индикаторов прошлого - этот год
|
| 239 |
+
elif month_num < reference_date.month and not PAST_CONTEXT_WORDS.search(text):
|
| 240 |
+
pass # оставляем текущий год
|
| 241 |
+
|
| 242 |
+
try:
|
| 243 |
+
parsed_date = date(year, month_num, 1)
|
| 244 |
+
result["date"] = parsed_date.strftime("%d.%m.%Y")
|
| 245 |
+
result["date_iso"] = parsed_date.isoformat()
|
| 246 |
+
result["matched_date_phrase"] = month_match.group(0)
|
| 247 |
+
result["parser"] = "fallback_month"
|
| 248 |
+
return result
|
| 249 |
+
except ValueError:
|
| 250 |
+
pass
|
| 251 |
+
break
|
| 252 |
+
|
| 253 |
+
return result
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def parse_all_dates_natasha(
|
| 257 |
+
text: str,
|
| 258 |
+
reference_date: Optional[date] = None
|
| 259 |
+
) -> list[dict[str, Any]]:
|
| 260 |
+
"""Извлекает все даты из текста."""
|
| 261 |
+
if reference_date is None:
|
| 262 |
+
reference_date = date.today()
|
| 263 |
+
|
| 264 |
+
results = []
|
| 265 |
+
|
| 266 |
+
# Natasha matches
|
| 267 |
+
try:
|
| 268 |
+
extractor = _get_extractor()
|
| 269 |
+
for match in extractor(text):
|
| 270 |
+
parsed_date = _natasha_match_to_date(match, reference_date)
|
| 271 |
+
if parsed_date:
|
| 272 |
+
parsed_date = _adjust_date_by_context(parsed_date, text, reference_date)
|
| 273 |
+
results.append({
|
| 274 |
+
"date": parsed_date.strftime("%d.%m.%Y"),
|
| 275 |
+
"date_iso": parsed_date.isoformat(),
|
| 276 |
+
"matched_date_phrase": text[match.start:match.stop],
|
| 277 |
+
"start": match.start,
|
| 278 |
+
"end": match.stop,
|
| 279 |
+
"parser": "natasha"
|
| 280 |
+
})
|
| 281 |
+
except Exception as e:
|
| 282 |
+
print(f"Natasha error: {e}")
|
| 283 |
+
|
| 284 |
+
# Relative dates
|
| 285 |
+
relative_result = _parse_relative_date(text, reference_date)
|
| 286 |
+
if relative_result:
|
| 287 |
+
parsed_date, matched = relative_result
|
| 288 |
+
parsed_date = _adjust_date_by_context(parsed_date, text, reference_date)
|
| 289 |
+
# Проверяем что не дубликат
|
| 290 |
+
if not any(r["date_iso"] == parsed_date.isoformat() for r in results):
|
| 291 |
+
results.append({
|
| 292 |
+
"date": parsed_date.strftime("%d.%m.%Y"),
|
| 293 |
+
"date_iso": parsed_date.isoformat(),
|
| 294 |
+
"matched_date_phrase": matched,
|
| 295 |
+
"start": text.lower().find(matched.lower()),
|
| 296 |
+
"end": text.lower().find(matched.lower()) + len(matched),
|
| 297 |
+
"parser": "relative"
|
| 298 |
+
})
|
| 299 |
+
|
| 300 |
+
return results
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
# Для обратной совместимости
|
| 304 |
+
class NatashaDateExtractor:
|
| 305 |
+
"""
|
| 306 |
+
Экстрактор дат на основе Natasha.
|
| 307 |
+
Рекомендуемое решение для русского языка.
|
| 308 |
+
"""
|
| 309 |
+
|
| 310 |
+
def extract(self, text: str, reference_date: Optional[date] = None) -> dict[str, Any]:
|
| 311 |
+
"""Извлекает первую дату из текста."""
|
| 312 |
+
ref = reference_date or date.today()
|
| 313 |
+
if isinstance(ref, str):
|
| 314 |
+
ref = datetime.strptime(ref, "%Y-%m-%d").date()
|
| 315 |
+
|
| 316 |
+
result = parse_date_natasha(text, ref)
|
| 317 |
+
# Убираем parser из результата для совместимости
|
| 318 |
+
return {
|
| 319 |
+
"date": result["date"],
|
| 320 |
+
"date_iso": result["date_iso"],
|
| 321 |
+
"matched_date_phrase": result["matched_date_phrase"],
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
def extract_all(self, text: str, reference_date: Optional[date] = None) -> list[dict[str, Any]]:
|
| 325 |
+
"""Извлекает все даты из текста."""
|
| 326 |
+
ref = reference_date or date.today()
|
| 327 |
+
if isinstance(ref, str):
|
| 328 |
+
ref = datetime.strptime(ref, "%Y-%m-%d").date()
|
| 329 |
+
|
| 330 |
+
results = parse_all_dates_natasha(text, ref)
|
| 331 |
+
return [{
|
| 332 |
+
"date": r["date"],
|
| 333 |
+
"date_iso": r["date_iso"],
|
| 334 |
+
"matched_date_phrase": r["matched_date_phrase"],
|
| 335 |
+
} for r in results]
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
if __name__ == "__main__":
|
| 339 |
+
test_phrases = [
|
| 340 |
+
"завтра",
|
| 341 |
+
"через 2 дня",
|
| 342 |
+
"на следующей неделе",
|
| 343 |
+
"15 января 2025",
|
| 344 |
+
"позавчера",
|
| 345 |
+
"в прошлый понедельник",
|
| 346 |
+
"оплата за март",
|
| 347 |
+
"5 марта",
|
| 348 |
+
"купил вчера",
|
| 349 |
+
"в конце месяца",
|
| 350 |
+
]
|
| 351 |
+
|
| 352 |
+
print("Тестирование Natasha DateExtractor:\n")
|
| 353 |
+
for phrase in test_phrases:
|
| 354 |
+
result = parse_date_natasha(phrase)
|
| 355 |
+
print(f" '{phrase}' -> {result['date_iso']} ({result['parser']})")
|
requirements.txt
CHANGED
|
@@ -4,9 +4,10 @@ faster-whisper
|
|
| 4 |
pymorphy3
|
| 5 |
rapidfuzz
|
| 6 |
dateparser
|
|
|
|
| 7 |
iuliia
|
| 8 |
torch
|
| 9 |
sentence-transformers
|
| 10 |
scikit-learn
|
| 11 |
gliner
|
| 12 |
-
|
|
|
|
| 4 |
pymorphy3
|
| 5 |
rapidfuzz
|
| 6 |
dateparser
|
| 7 |
+
python-dateutil
|
| 8 |
iuliia
|
| 9 |
torch
|
| 10 |
sentence-transformers
|
| 11 |
scikit-learn
|
| 12 |
gliner
|
| 13 |
+
natasha
|
supervisord.conf
DELETED
|
@@ -1,26 +0,0 @@
|
|
| 1 |
-
[supervisord]
|
| 2 |
-
nodaemon=true
|
| 3 |
-
logfile=/tmp/supervisord.log
|
| 4 |
-
pidfile=/tmp/supervisord.pid
|
| 5 |
-
childlogdir=/tmp
|
| 6 |
-
|
| 7 |
-
[program:duckling]
|
| 8 |
-
command=/usr/local/bin/duckling-example-exe -p 8000
|
| 9 |
-
autostart=true
|
| 10 |
-
autorestart=true
|
| 11 |
-
stdout_logfile=/dev/stdout
|
| 12 |
-
stdout_logfile_maxbytes=0
|
| 13 |
-
stderr_logfile=/dev/stderr
|
| 14 |
-
stderr_logfile_maxbytes=0
|
| 15 |
-
startsecs=3
|
| 16 |
-
|
| 17 |
-
[program:gunicorn]
|
| 18 |
-
command=gunicorn --bind 0.0.0.0:7860 --workers 1 --threads 8 --timeout 120 app:app
|
| 19 |
-
directory=/home/user/app
|
| 20 |
-
autostart=true
|
| 21 |
-
autorestart=true
|
| 22 |
-
stdout_logfile=/dev/stdout
|
| 23 |
-
stdout_logfile_maxbytes=0
|
| 24 |
-
stderr_logfile=/dev/stderr
|
| 25 |
-
stderr_logfile_maxbytes=0
|
| 26 |
-
startsecs=5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|