Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| # | |
| # This source code is licensed under the MIT license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| import argparse | |
| from fairseq.data import Dictionary, data_utils, indexed_dataset | |
| def get_parser(): | |
| parser = argparse.ArgumentParser( | |
| description="writes text from binarized file to stdout" | |
| ) | |
| # fmt: off | |
| parser.add_argument('--dataset-impl', help='dataset implementation', | |
| choices=indexed_dataset.get_available_dataset_impl()) | |
| parser.add_argument('--dict', metavar='FP', help='dictionary containing known words', default=None) | |
| parser.add_argument('--input', metavar='FP', required=True, help='binarized file to read') | |
| # fmt: on | |
| return parser | |
| def main(): | |
| parser = get_parser() | |
| args = parser.parse_args() | |
| dictionary = Dictionary.load(args.dict) if args.dict is not None else None | |
| dataset = data_utils.load_indexed_dataset( | |
| args.input, | |
| dictionary, | |
| dataset_impl=args.dataset_impl, | |
| default="lazy", | |
| ) | |
| for tensor_line in dataset: | |
| if dictionary is None: | |
| line = " ".join([str(int(x)) for x in tensor_line]) | |
| else: | |
| line = dictionary.string(tensor_line) | |
| print(line) | |
| if __name__ == "__main__": | |
| main() | |