Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| from typing import List, Iterable, Union | |
| from pprint import pprint | |
| class KeyphraseCount: | |
| def __init__(self, keyphrase: str, count: int) -> None: | |
| super().__init__() | |
| self.keyphrase = keyphrase | |
| self.count = count | |
| def reduce(cls, kcs: list) : | |
| ''' | |
| kcs: List[KeyphraseCount] | |
| ''' | |
| keys = '' | |
| count = 0 | |
| for i in range(len(kcs)-1): | |
| kc = kcs[i] | |
| keys += kc.keyphrase + '/' | |
| count += kc.count | |
| keys += kcs[-1].keyphrase | |
| count += kcs[-1].count | |
| return KeyphraseCount(keys, count) | |
| class SingleCluster: | |
| def __init__(self): | |
| self.__container__ = [] | |
| self.__keyphrases__ = {} | |
| def add(self, id:int): | |
| self.__container__.append(id) | |
| def __str__(self) -> str: | |
| return str(self.__container__) | |
| def elements(self) -> List: | |
| return self.__container__ | |
| def get_keyphrases(self): | |
| ret = [] | |
| for key, count in self.__keyphrases__.items(): | |
| ret.append(KeyphraseCount(key,count)) | |
| return ret | |
| def add_keyphrase(self, keyphrase:Union[str,Iterable]): | |
| if isinstance(keyphrase,str): | |
| if keyphrase not in self.__keyphrases__.keys(): | |
| self.__keyphrases__[keyphrase] = 1 | |
| else: | |
| self.__keyphrases__[keyphrase] += 1 | |
| elif isinstance(keyphrase,Iterable): | |
| for i in keyphrase: | |
| self.add_keyphrase(i) | |
| def __len__(self): | |
| return len(self.__container__) | |
| def print_keyphrases(self): | |
| pprint(self.__keyphrases__) | |
| class ClusterList: | |
| def __init__(self, k:int): | |
| self.__clusters__ = [SingleCluster() for _ in range(k)] | |
| # subscriptable and slice-able | |
| def __getitem__(self, idx): | |
| if isinstance(idx, int): | |
| return self.__clusters__[idx] | |
| if isinstance(idx, slice): | |
| # return | |
| return self.__clusters__[0 if idx.start is None else idx.start: idx.stop: 0 if idx.step is None else idx.step] | |
| def instantiate(self, labels: Iterable): | |
| for id, label in enumerate(labels): | |
| self.__clusters__[label].add(id) | |
| def __str__(self): | |
| ret = f'There are {len(self.__clusters__)} clusters:\n' | |
| for id,cluster in enumerate(self.__clusters__): | |
| ret += f'cluster {id} contains: {cluster}.\n' | |
| return ret | |
| # return an iterator that can be used in for loop etc. | |
| def __iter__(self): | |
| return self.__clusters__.__iter__() | |
| def __len__(self): return len(self.__clusters__) | |
| def sort(self): | |
| self.__clusters__.sort(key=len,reverse=True) | |