Spaces:
Runtime error
Runtime error
File size: 815 Bytes
e2e35eb f214d73 e2e35eb 08879de e2e35eb f214d73 e2e35eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
import math
from pathlib import Path
from typing import Dict, List
import spacy
from spacy import Language
NLP: Language = spacy.load("hu_core_news_trf")
def _compute_idf(freq_file: Path) -> Dict[str, float]:
freqs: Dict[str, int] = {}
with freq_file.open() as f:
for line in f:
line = line.strip()
data: List[str] = line.split()
if len(data) == 0:
continue
word: str = data[0]
freq: int = int(data[-1])
if not line.isalpha():
freqs[word] = freq
max_freq: int = freqs["a"]
idfs: Dict[str, float] = {w: math.log2(max_freq / (float(f) + 1)) + 1 for w, f in freqs.items()}
return idfs
IDF: Dict[str, float] = _compute_idf(Path(__file__).parent.parent / "resources" / "freq.list")
|