STAR

Runtime error

STAR / fairseq /data /encoders /nltk_tokenizer.py

Yixuan Li

add fairseq folder

85ba398 2 months ago

755 Bytes

	# Copyright (c) Facebook, Inc. and its affiliates.
	#
	# This source code is licensed under the MIT license found in the
	# LICENSE file in the root directory of this source tree.

	from fairseq.data.encoders import register_tokenizer
	from fairseq.dataclass import FairseqDataclass


	@register_tokenizer("nltk", dataclass=FairseqDataclass)
	class NLTKTokenizer(object):
	def __init__(self, *unused):
	try:
	from nltk.tokenize import word_tokenize

	self.word_tokenize = word_tokenize
	except ImportError:
	raise ImportError("Please install nltk with: pip install nltk")

	def encode(self, x: str) -> str:
	return " ".join(self.word_tokenize(x))

	def decode(self, x: str) -> str:
	return x