Spaces:
Sleeping
Sleeping
File size: 3,366 Bytes
290c238 2cfb891 290c238 2cfb891 290c238 2cfb891 290c238 2cfb891 290c238 2cfb891 290c238 2cfb891 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import itertools
import json
class TaxonomicNode:
__slots__ = ("name", "index", "root", "_children")
def __init__(self, name, index, root):
self.name = name
self.index = index
self.root = root
self._children = {}
def add(self, name):
added = 0
if not name:
return added
first, rest = name[0], name[1:]
if first not in self._children:
self._children[first] = TaxonomicNode(first, self.root.size, self.root)
self.root.size += 1
self._children[first].add(rest)
def children(self, name):
if not name:
return set((child.name, child.index) for child in self._children.values())
first, rest = name[0], name[1:]
if first not in self._children:
return set()
return self._children[first].children(rest)
def __iter__(self):
yield self.name, self.index
for child in self._children.values():
for name, index in child:
yield f"{self.name} {name}", index
@classmethod
def from_dict(cls, dct, root):
node = cls(dct["name"], dct["index"], root)
node._children = {
child["name"]: cls.from_dict(child, root) for child in dct["children"]
}
return node
class TaxonomicTree:
"""
Efficient structure for finding taxonomic names and their descendants.
Also returns an integer index i for each possible name.
"""
def __init__(self):
self.kingdoms = {}
self.size = 0
def add(self, name: list[str]):
if not name:
return
first, rest = name[0], name[1:]
if first not in self.kingdoms:
self.kingdoms[first] = TaxonomicNode(first, self.size, self)
self.size += 1
self.kingdoms[first].add(rest)
def children(self, name=None):
if not name:
return set(
(kingdom.name, kingdom.index) for kingdom in self.kingdoms.values()
)
first, rest = name[0], name[1:]
if first not in self.kingdoms:
return set()
return self.kingdoms[first].children(rest)
def __iter__(self):
for kingdom in self.kingdoms.values():
yield from kingdom
def __len__(self):
return self.size
@classmethod
def from_dict(cls, dct):
tree = cls()
tree.kingdoms = {
kingdom["name"]: TaxonomicNode.from_dict(kingdom, tree)
for kingdom in dct["kingdoms"]
}
tree.size = dct["size"]
return tree
class TaxonomicJsonEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, TaxonomicNode):
return {
"name": obj.name,
"index": obj.index,
"children": list(obj._children.values()),
}
elif isinstance(obj, TaxonomicTree):
return {
"kingdoms": list(obj.kingdoms.values()),
"size": obj.size,
}
else:
super().default(self, obj)
def batched(iterable, n):
# batched('ABCDEFG', 3) --> ABC DEF G
if n < 1:
raise ValueError("n must be at least one")
it = iter(iterable)
while batch := tuple(itertools.islice(it, n)):
yield zip(*batch)
|