from typing import List, Dict class CustomSubsetsLabel: def __init__( self ) -> None: self.html_head = """ """ self.html_footer ="" self.subset_links = { 'allwikis': "https://github.com/josecannete/wikiextractorforBERT", 'DGT': "http://opus.nlpl.eu/DGT.php", 'DOGC': "http://opus.nlpl.eu/DOGC.php", 'ECB': "http://opus.nlpl.eu/ECB.php", 'EMEA': "http://opus.nlpl.eu/EMEA.php", 'EUBookShop': "http://opus.nlpl.eu/EUbookshop.php", 'Europarl': "http://opus.nlpl.eu/Europarl.php", 'GlobalVoices': "http://opus.nlpl.eu/GlobalVoices.php", 'JRC': "http://opus.nlpl.eu/JRC-Acquis.php", 'multiUN': "http://opus.nlpl.eu/MultiUN.php", 'NewsCommentary11': "http://opus.nlpl.eu/News-Commentary-v11.php", 'OpenSubtitles2018': "http://opus.nlpl.eu/OpenSubtitles-v2018.php", 'ParaCrawl': "http://opus.nlpl.eu/ParaCrawl.php", 'TED': "http://opus.nlpl.eu/TED2013.php", 'UN': "http://opus.nlpl.eu/UN.php", } def __progressbar( self, percentage: float, subset: str, freq: int, size: int=15 ) -> str: html = f"""

{subset} (Frecuencia: {freq}) {percentage}%

""" return html def __render( self, subsets: List[str], freqs: List[int], percentages: List[float] ) -> str: html = "" for subset, freq, perc in zip(subsets, freqs, percentages): html += self.__progressbar( percentage=perc, subset=subset, freq=freq ) return self.html_head + html + self.html_footer def compute( self, subsets_dic: Dict[str, int] ) -> str: subsets_dic_info = { k.split()[0]:{'freq':int(k.split()[1][1:-1]),'perc':round(v*100,2)} for k,v in subsets_dic.items() } subsets = list(subsets_dic_info.keys()) freqs = [ d['freq'] for d in subsets_dic_info.values() ] percentages = [ d['perc'] for d in subsets_dic_info.values() ] return self.__render(subsets, freqs, percentages)