|
|
""" |
|
|
Utilities for loading and saving medical terms |
|
|
""" |
|
|
|
|
|
import logging |
|
|
from pathlib import Path |
|
|
from typing import List, Set |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
def load_terms_from_file(filepath: Path) -> Set[str]: |
|
|
""" |
|
|
Загрузить медицинские термины из файла. |
|
|
|
|
|
Args: |
|
|
filepath: Путь к файлу с терминами |
|
|
|
|
|
Returns: |
|
|
Множество терминов |
|
|
""" |
|
|
try: |
|
|
with open(filepath, 'r', encoding='utf-8') as f: |
|
|
content = f.read().strip() |
|
|
|
|
|
|
|
|
terms = {term.strip() for term in content.split(',') if term.strip()} |
|
|
|
|
|
logger.info(f"Loaded {len(terms)} terms from {filepath}") |
|
|
return terms |
|
|
|
|
|
except FileNotFoundError: |
|
|
logger.error(f"File not found: {filepath}") |
|
|
return set() |
|
|
except Exception as e: |
|
|
logger.error(f"Error loading terms: {e}") |
|
|
return set() |
|
|
|
|
|
|
|
|
def save_terms_to_file(terms: List[str], filepath: Path) -> bool: |
|
|
""" |
|
|
Сохранить термины в файл. |
|
|
|
|
|
Args: |
|
|
terms: Список терминов |
|
|
filepath: Путь для сохранения |
|
|
|
|
|
Returns: |
|
|
True если успешно, False иначе |
|
|
""" |
|
|
try: |
|
|
|
|
|
content = ', '.join(sorted(terms)) |
|
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as f: |
|
|
f.write(content) |
|
|
|
|
|
logger.info(f"Saved {len(terms)} terms to {filepath}") |
|
|
return True |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error saving terms: {e}") |
|
|
return False |
|
|
|
|
|
|
|
|
def merge_term_files(file1: Path, file2: Path, output: Path) -> int: |
|
|
""" |
|
|
Объединить термины из двух файлов. |
|
|
|
|
|
Args: |
|
|
file1: Первый файл |
|
|
file2: Второй файл |
|
|
output: Файл для сохранения результата |
|
|
|
|
|
Returns: |
|
|
Количество уникальных терминов |
|
|
""" |
|
|
terms1 = load_terms_from_file(file1) |
|
|
terms2 = load_terms_from_file(file2) |
|
|
|
|
|
merged = terms1.union(terms2) |
|
|
save_terms_to_file(list(merged), output) |
|
|
|
|
|
logger.info(f"Merged {len(terms1)} + {len(terms2)} = {len(merged)} unique terms") |
|
|
return len(merged) |
|
|
|