def parse(f, comment="#"): """ Parse a file in ``.fasta`` format. :param f: Input file object :type f: _io.TextIOWrapper :param comment: Character used for comments :type comment: str :return: names, sequence :rtype: list[str], list[str] """ starter = ">" empty = "" if "b" in f.mode: comment = b"#" starter = b">" empty = b"" names = [] sequences = [] name = None sequence = [] for line in f: if line.startswith(comment): continue line = line.strip() if line.startswith(starter): if name is not None: names.append(name) sequences.append(empty.join(sequence)) name = line[1:] sequence = [] else: sequence.append(line.upper()) if name is not None: names.append(name) sequences.append(empty.join(sequence)) return names, sequences def parse_directory(directory, extension=".seq"): """ Parse all files in a directory ending with ``extension``. :param directory: Input directory :type directory: str :param extension: Extension of all files to read in :type extension: str :return: names, sequence :rtype: list[str], list[str] """ names = [] sequences = [] for seqPath in os.listdir(directory): if seqPath.endswith(extension): n, s = parse(open(f"{directory}/{seqPath}", "rb")) names.append(n[0].decode("utf-8").strip()) sequences.append(s[0].decode("utf-8").strip()) return names, sequences def write(nam, seq, f): """ Write a file in ``.fasta`` format. :param nam: List of names :type nam: list[str] :param seq: List of sequences :type seq: list[str] :param f: Output file object :type f: _io.TextIOWrapper """ for n, s in zip(nam, seq): f.write(">{}\n".format(n)) f.write("{}\n".format(s))