Spaces:

Linhz
/

ViMNer

Runtime error

App Files Files Community

ViMNer / Model /MultimodelNER /Ner_processing.py

Linhz

Update Model/MultimodelNER/Ner_processing.py

281978e verified 5 months ago

raw

history blame contribute delete

4.38 kB

	def format_predictions(words, predictions):
	'''
	Chuyển đổi danh sách từ và dự đoán sang định dạng (word, label)
	'''
	formatted = []
	for word, label in zip(words, predictions):
	formatted.append((word, label))
	return formatted

	def process_predictions(predictions):
	'''
	Tách các từ có dấu gạch dưới thành các từ riêng biệt với cùng nhãn
	'''
	formatted = []
	for word, label in predictions:
	if '_' in word:
	formatted.append((word.replace('_', ' '), label))
	else:
	formatted.append((word, label))
	return formatted


	def combine_entities(predictions):
	combined = []
	temp_entity = []
	temp_label = None

	for word, label in predictions:
	if label.startswith('B-'):
	if temp_entity:
	combined.append((' '.join(temp_entity), temp_label))
	temp_entity = []
	temp_entity.append(word)
	temp_label = label
	elif label.startswith('I-') and temp_label and label[2:] == temp_label[2:]:
	temp_entity.append(word)
	else:
	if temp_entity:
	combined.append((' '.join(temp_entity), temp_label))
	temp_entity = []
	temp_label = None
	combined.append((word, label))

	if temp_entity:
	combined.append((' '.join(temp_entity), temp_label))

	return combined




	def remove_B_prefix(entities):
	modified_entities = []
	for word, label in entities:
	if label.startswith('B-'):
	label = label[2:] # Loại bỏ phần 'B-' khỏi nhãn
	modified_entities.append((word, label))
	return modified_entities


	def combine_i_tags(tokens_labels):
	combined = []
	current_combination = []
	current_label = None

	for token, label in tokens_labels:
	if label.startswith('I-'):
	label = label[2:] # Remove the 'I-' prefix
	if current_label is None:
	current_label = label
	current_combination.append(token)
	elif current_label == label:
	current_combination.append(token)
	else:
	combined.append((' '.join(current_combination), current_label))
	current_combination = [token]
	current_label = label
	else:
	if current_combination:
	combined.append((' '.join(current_combination), current_label))
	current_combination = []
	current_label = None
	combined.append((token, label))

	if current_combination:
	combined.append((' '.join(current_combination), current_label))

	return combined

	# tokens_labels = [('Dân', 'O'), ('trí', 'O'), ('Chức', 'O'), ('vô', 'O'), ('địch', 'O'), ('Euro 2008', 'EVENT-SPORT'), ('đầy', 'O'), ('thuyết', 'O'), ('phục', 'O'), ('của', 'O'), ('Tây Ban Nha', 'LOCATION'), ('trên', 'O'), ('đất', 'O'), ('Áo', 'LOCATION'), ('và', 'O'), ('Thụy Sĩ', 'PERSON'), ('đã', 'O'), ('mở', 'O'), ('ra', 'O'), ('kỷ', 'O'), ('nguyên', 'O'), ('vinh', 'O'), ('quanh', 'O'), ('của', 'O'), ('La', 'ORGANIZATION'), ('Furia', 'I-ORGANIZATION-SPORTS'), ('Roja', 'I-ORGANIZATION-SPORTS'), (',', 'O'), ('với', 'O'), ('lối', 'O'), ('chơi', 'O'), ('tiqui', 'O'), ('taka', 'O'), ('đầy', 'O'), ('biến', 'O'), ('ảo', 'O'), ('.', 'O'), ('Trong', 'O'), ('quá', 'O'), ('khứ', 'O'), (',', 'O'), ('Tây Ban Nha', 'LOCATION'), ('nổi', 'O'), ('tiếng', 'O'), ('với', 'O'), ('biệt', 'O'), ('danh', 'O'), ('Vua', 'O'), ('vòng', 'O'), ('loại', 'O'), ('.', 'O'), ('Họ', 'O'), ('thường', 'O'), ('thi', 'O'), ('đấu', 'O'), ('rất', 'O'), ('tốt', 'O'), ('ở', 'O'), ('vòng', 'O'), ('loại', 'O'), ('nhưng', 'O'), ('lại', 'O'), ('chưa', 'O'), ('bao', 'O'), ('giờ', 'O'), ('chứng', 'O'), ('minh', 'O'), ('được', 'O'), ('sức', 'O'), ('mạnh', 'O'), ('ở', 'O'), ('vòng', 'O'), ('chung', 'O'), ('kết', 'O'), ('giải', 'O'), ('đấu', 'O'), ('lớn', 'O'), ('.', 'O'), ('Lần', 'O'), ('duy', 'O'), ('nhất', 'O'), ('họ', 'O'), ('lên', 'O'), ('ngôi', 'O'), ('là', 'O'), ('ở', 'O'), ('kỳ', 'O'), ('Euro', 'EVENT-SPORT'), ('1964', 'O'), ('.', 'O')]

	# combined_tokens_labels = combine_i_tags(tokens_labels)
	# print(combined_tokens_labels)