edia_datos_es / modules /module_customSubsetsLabel.py
nanom's picture
Code correction and typing added
2d0d0c7
from typing import List, Dict
class CustomSubsetsLabel:
def __init__(
self
) -> None:
self.html_head = """
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
progress {
-webkit-appearance: none;
}
progress::-webkit-progress-bar {
background-color: #666;
border-radius: 7px;
}
progress {
width:100%;
height:4px;
border-radius: 1px;
}
#myturn {
display: block;
position: relative;
margin: auto;
width: 90%;
padding: 2px;
}
</style>
</head>
<body>
"""
self.html_footer ="</body></html>"
self.subset_links = {
'allwikis': "https://github.com/josecannete/wikiextractorforBERT",
'DGT': "http://opus.nlpl.eu/DGT.php",
'DOGC': "http://opus.nlpl.eu/DOGC.php",
'ECB': "http://opus.nlpl.eu/ECB.php",
'EMEA': "http://opus.nlpl.eu/EMEA.php",
'EUBookShop': "http://opus.nlpl.eu/EUbookshop.php",
'Europarl': "http://opus.nlpl.eu/Europarl.php",
'GlobalVoices': "http://opus.nlpl.eu/GlobalVoices.php",
'JRC': "http://opus.nlpl.eu/JRC-Acquis.php",
'multiUN': "http://opus.nlpl.eu/MultiUN.php",
'NewsCommentary11': "http://opus.nlpl.eu/News-Commentary-v11.php",
'OpenSubtitles2018': "http://opus.nlpl.eu/OpenSubtitles-v2018.php",
'ParaCrawl': "http://opus.nlpl.eu/ParaCrawl.php",
'TED': "http://opus.nlpl.eu/TED2013.php",
'UN': "http://opus.nlpl.eu/UN.php",
}
def __progressbar(
self,
percentage: float,
subset: str,
freq: int,
size: int=15
) -> str:
html = f"""
<div id="myturn">
<progress value="{int(percentage)}" max="100"></progress>
<p style="text-align:left; font-size:{size}px; padding:0px;">
<a href="{self.subset_links[subset]}" target="_blank">
<strong>{subset}</strong> <span style="font-size:{size-2}px">(Frecuencia: {freq})</span>
</a>
<span style="float:right;">
<strong>{percentage}%</strong>
</span>
</p>
</div>
"""
return html
def __render(
self,
subsets: List[str],
freqs: List[int],
percentages: List[float]
) -> str:
html = ""
for subset, freq, perc in zip(subsets, freqs, percentages):
html += self.__progressbar(
percentage=perc,
subset=subset,
freq=freq
)
return self.html_head + html + self.html_footer
def compute(
self,
subsets_dic: Dict[str, int]
) -> str:
subsets_dic_info = {
k.split()[0]:{'freq':int(k.split()[1][1:-1]),'perc':round(v*100,2)}
for k,v in subsets_dic.items()
}
subsets = list(subsets_dic_info.keys())
freqs = [
d['freq']
for d in subsets_dic_info.values()
]
percentages = [
d['perc']
for d in subsets_dic_info.values()
]
return self.__render(subsets, freqs, percentages)