|  | def format_predictions(words, predictions): | 
					
						
						|  | ''' | 
					
						
						|  | Chuyển đổi danh sách từ và dự đoán sang định dạng (word, label) | 
					
						
						|  | ''' | 
					
						
						|  | formatted = [] | 
					
						
						|  | for word, label in zip(words, predictions): | 
					
						
						|  | formatted.append((word, label)) | 
					
						
						|  | return formatted | 
					
						
						|  |  | 
					
						
						|  | def process_predictions(predictions): | 
					
						
						|  | ''' | 
					
						
						|  | Tách các từ có dấu gạch dưới thành các từ riêng biệt với cùng nhãn | 
					
						
						|  | ''' | 
					
						
						|  | formatted = [] | 
					
						
						|  | for word, label in predictions: | 
					
						
						|  | if '_' in word: | 
					
						
						|  | formatted.append((word.replace('_', ' '), label)) | 
					
						
						|  | else: | 
					
						
						|  | formatted.append((word, label)) | 
					
						
						|  | return formatted | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def combine_entities(predictions): | 
					
						
						|  | combined = [] | 
					
						
						|  | temp_entity = [] | 
					
						
						|  | temp_label = None | 
					
						
						|  |  | 
					
						
						|  | for word, label in predictions: | 
					
						
						|  | if label.startswith('B-'): | 
					
						
						|  | if temp_entity: | 
					
						
						|  | combined.append((' '.join(temp_entity), temp_label)) | 
					
						
						|  | temp_entity = [] | 
					
						
						|  | temp_entity.append(word) | 
					
						
						|  | temp_label = label | 
					
						
						|  | elif label.startswith('I-') and temp_label and label[2:] == temp_label[2:]: | 
					
						
						|  | temp_entity.append(word) | 
					
						
						|  | else: | 
					
						
						|  | if temp_entity: | 
					
						
						|  | combined.append((' '.join(temp_entity), temp_label)) | 
					
						
						|  | temp_entity = [] | 
					
						
						|  | temp_label = None | 
					
						
						|  | combined.append((word, label)) | 
					
						
						|  |  | 
					
						
						|  | if temp_entity: | 
					
						
						|  | combined.append((' '.join(temp_entity), temp_label)) | 
					
						
						|  |  | 
					
						
						|  | return combined | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def remove_B_prefix(entities): | 
					
						
						|  | modified_entities = [] | 
					
						
						|  | for word, label in entities: | 
					
						
						|  | if label.startswith('B-'): | 
					
						
						|  | label = label[2:] | 
					
						
						|  | modified_entities.append((word, label)) | 
					
						
						|  | return modified_entities | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def combine_i_tags(tokens_labels): | 
					
						
						|  | combined = [] | 
					
						
						|  | current_combination = [] | 
					
						
						|  | current_label = None | 
					
						
						|  |  | 
					
						
						|  | for token, label in tokens_labels: | 
					
						
						|  | if label.startswith('I-'): | 
					
						
						|  | label = label[2:] | 
					
						
						|  | if current_label is None: | 
					
						
						|  | current_label = label | 
					
						
						|  | current_combination.append(token) | 
					
						
						|  | elif current_label == label: | 
					
						
						|  | current_combination.append(token) | 
					
						
						|  | else: | 
					
						
						|  | combined.append((' '.join(current_combination), current_label)) | 
					
						
						|  | current_combination = [token] | 
					
						
						|  | current_label = label | 
					
						
						|  | else: | 
					
						
						|  | if current_combination: | 
					
						
						|  | combined.append((' '.join(current_combination), current_label)) | 
					
						
						|  | current_combination = [] | 
					
						
						|  | current_label = None | 
					
						
						|  | combined.append((token, label)) | 
					
						
						|  |  | 
					
						
						|  | if current_combination: | 
					
						
						|  | combined.append((' '.join(current_combination), current_label)) | 
					
						
						|  |  | 
					
						
						|  | return combined | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  |