Spaces:
Sleeping
Sleeping
""" | |
An OrderedSet is a custom MutableSet that remembers its order, so that every | |
entry has an index that can be looked up. | |
Based on a recipe originally posted to ActiveState Recipes by Raymond Hettiger, | |
and released under the MIT license. | |
""" | |
import itertools as it | |
from collections import deque | |
try: | |
# Python 3 | |
from collections.abc import MutableSet, Sequence | |
except ImportError: | |
# Python 2.7 | |
from collections import MutableSet, Sequence | |
SLICE_ALL = slice(None) | |
__version__ = "3.1" | |
def is_iterable(obj): | |
""" | |
Are we being asked to look up a list of things, instead of a single thing? | |
We check for the `__iter__` attribute so that this can cover types that | |
don't have to be known by this module, such as NumPy arrays. | |
Strings, however, should be considered as atomic values to look up, not | |
iterables. The same goes for tuples, since they are immutable and therefore | |
valid entries. | |
We don't need to check for the Python 2 `unicode` type, because it doesn't | |
have an `__iter__` attribute anyway. | |
""" | |
return ( | |
hasattr(obj, "__iter__") | |
and not isinstance(obj, str) | |
and not isinstance(obj, tuple) | |
) | |
class OrderedSet(MutableSet, Sequence): | |
""" | |
An OrderedSet is a custom MutableSet that remembers its order, so that | |
every entry has an index that can be looked up. | |
Example: | |
>>> OrderedSet([1, 1, 2, 3, 2]) | |
OrderedSet([1, 2, 3]) | |
""" | |
def __init__(self, iterable=None): | |
self.items = [] | |
self.map = {} | |
if iterable is not None: | |
self |= iterable | |
def __len__(self): | |
""" | |
Returns the number of unique elements in the ordered set | |
Example: | |
>>> len(OrderedSet([])) | |
0 | |
>>> len(OrderedSet([1, 2])) | |
2 | |
""" | |
return len(self.items) | |
def __getitem__(self, index): | |
""" | |
Get the item at a given index. | |
If `index` is a slice, you will get back that slice of items, as a | |
new OrderedSet. | |
If `index` is a list or a similar iterable, you'll get a list of | |
items corresponding to those indices. This is similar to NumPy's | |
"fancy indexing". The result is not an OrderedSet because you may ask | |
for duplicate indices, and the number of elements returned should be | |
the number of elements asked for. | |
Example: | |
>>> oset = OrderedSet([1, 2, 3]) | |
>>> oset[1] | |
2 | |
""" | |
if isinstance(index, slice) and index == SLICE_ALL: | |
return self.copy() | |
elif is_iterable(index): | |
return [self.items[i] for i in index] | |
elif hasattr(index, "__index__") or isinstance(index, slice): | |
result = self.items[index] | |
if isinstance(result, list): | |
return self.__class__(result) | |
else: | |
return result | |
else: | |
raise TypeError("Don't know how to index an OrderedSet by %r" % index) | |
def copy(self): | |
""" | |
Return a shallow copy of this object. | |
Example: | |
>>> this = OrderedSet([1, 2, 3]) | |
>>> other = this.copy() | |
>>> this == other | |
True | |
>>> this is other | |
False | |
""" | |
return self.__class__(self) | |
def __getstate__(self): | |
if len(self) == 0: | |
# The state can't be an empty list. | |
# We need to return a truthy value, or else __setstate__ won't be run. | |
# | |
# This could have been done more gracefully by always putting the state | |
# in a tuple, but this way is backwards- and forwards- compatible with | |
# previous versions of OrderedSet. | |
return (None,) | |
else: | |
return list(self) | |
def __setstate__(self, state): | |
if state == (None,): | |
self.__init__([]) | |
else: | |
self.__init__(state) | |
def __contains__(self, key): | |
""" | |
Test if the item is in this ordered set | |
Example: | |
>>> 1 in OrderedSet([1, 3, 2]) | |
True | |
>>> 5 in OrderedSet([1, 3, 2]) | |
False | |
""" | |
return key in self.map | |
def add(self, key): | |
""" | |
Add `key` as an item to this OrderedSet, then return its index. | |
If `key` is already in the OrderedSet, return the index it already | |
had. | |
Example: | |
>>> oset = OrderedSet() | |
>>> oset.append(3) | |
0 | |
>>> print(oset) | |
OrderedSet([3]) | |
""" | |
if key not in self.map: | |
self.map[key] = len(self.items) | |
self.items.append(key) | |
return self.map[key] | |
append = add | |
def update(self, sequence): | |
""" | |
Update the set with the given iterable sequence, then return the index | |
of the last element inserted. | |
Example: | |
>>> oset = OrderedSet([1, 2, 3]) | |
>>> oset.update([3, 1, 5, 1, 4]) | |
4 | |
>>> print(oset) | |
OrderedSet([1, 2, 3, 5, 4]) | |
""" | |
item_index = None | |
try: | |
for item in sequence: | |
item_index = self.add(item) | |
except TypeError: | |
raise ValueError( | |
"Argument needs to be an iterable, got %s" % type(sequence) | |
) | |
return item_index | |
def index(self, key): | |
""" | |
Get the index of a given entry, raising an IndexError if it's not | |
present. | |
`key` can be an iterable of entries that is not a string, in which case | |
this returns a list of indices. | |
Example: | |
>>> oset = OrderedSet([1, 2, 3]) | |
>>> oset.index(2) | |
1 | |
""" | |
if is_iterable(key): | |
return [self.index(subkey) for subkey in key] | |
return self.map[key] | |
# Provide some compatibility with pd.Index | |
get_loc = index | |
get_indexer = index | |
def pop(self): | |
""" | |
Remove and return the last element from the set. | |
Raises KeyError if the set is empty. | |
Example: | |
>>> oset = OrderedSet([1, 2, 3]) | |
>>> oset.pop() | |
3 | |
""" | |
if not self.items: | |
raise KeyError("Set is empty") | |
elem = self.items[-1] | |
del self.items[-1] | |
del self.map[elem] | |
return elem | |
def discard(self, key): | |
""" | |
Remove an element. Do not raise an exception if absent. | |
The MutableSet mixin uses this to implement the .remove() method, which | |
*does* raise an error when asked to remove a non-existent item. | |
Example: | |
>>> oset = OrderedSet([1, 2, 3]) | |
>>> oset.discard(2) | |
>>> print(oset) | |
OrderedSet([1, 3]) | |
>>> oset.discard(2) | |
>>> print(oset) | |
OrderedSet([1, 3]) | |
""" | |
if key in self: | |
i = self.map[key] | |
del self.items[i] | |
del self.map[key] | |
for k, v in self.map.items(): | |
if v >= i: | |
self.map[k] = v - 1 | |
def clear(self): | |
""" | |
Remove all items from this OrderedSet. | |
""" | |
del self.items[:] | |
self.map.clear() | |
def __iter__(self): | |
""" | |
Example: | |
>>> list(iter(OrderedSet([1, 2, 3]))) | |
[1, 2, 3] | |
""" | |
return iter(self.items) | |
def __reversed__(self): | |
""" | |
Example: | |
>>> list(reversed(OrderedSet([1, 2, 3]))) | |
[3, 2, 1] | |
""" | |
return reversed(self.items) | |
def __repr__(self): | |
if not self: | |
return "%s()" % (self.__class__.__name__,) | |
return "%s(%r)" % (self.__class__.__name__, list(self)) | |
def __eq__(self, other): | |
""" | |
Returns true if the containers have the same items. If `other` is a | |
Sequence, then order is checked, otherwise it is ignored. | |
Example: | |
>>> oset = OrderedSet([1, 3, 2]) | |
>>> oset == [1, 3, 2] | |
True | |
>>> oset == [1, 2, 3] | |
False | |
>>> oset == [2, 3] | |
False | |
>>> oset == OrderedSet([3, 2, 1]) | |
False | |
""" | |
# In Python 2 deque is not a Sequence, so treat it as one for | |
# consistent behavior with Python 3. | |
if isinstance(other, (Sequence, deque)): | |
# Check that this OrderedSet contains the same elements, in the | |
# same order, as the other object. | |
return list(self) == list(other) | |
try: | |
other_as_set = set(other) | |
except TypeError: | |
# If `other` can't be converted into a set, it's not equal. | |
return False | |
else: | |
return set(self) == other_as_set | |
def union(self, *sets): | |
""" | |
Combines all unique items. | |
Each items order is defined by its first appearance. | |
Example: | |
>>> oset = OrderedSet.union(OrderedSet([3, 1, 4, 1, 5]), [1, 3], [2, 0]) | |
>>> print(oset) | |
OrderedSet([3, 1, 4, 5, 2, 0]) | |
>>> oset.union([8, 9]) | |
OrderedSet([3, 1, 4, 5, 2, 0, 8, 9]) | |
>>> oset | {10} | |
OrderedSet([3, 1, 4, 5, 2, 0, 10]) | |
""" | |
cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet | |
containers = map(list, it.chain([self], sets)) | |
items = it.chain.from_iterable(containers) | |
return cls(items) | |
def __and__(self, other): | |
# the parent implementation of this is backwards | |
return self.intersection(other) | |
def intersection(self, *sets): | |
""" | |
Returns elements in common between all sets. Order is defined only | |
by the first set. | |
Example: | |
>>> oset = OrderedSet.intersection(OrderedSet([0, 1, 2, 3]), [1, 2, 3]) | |
>>> print(oset) | |
OrderedSet([1, 2, 3]) | |
>>> oset.intersection([2, 4, 5], [1, 2, 3, 4]) | |
OrderedSet([2]) | |
>>> oset.intersection() | |
OrderedSet([1, 2, 3]) | |
""" | |
cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet | |
if sets: | |
common = set.intersection(*map(set, sets)) | |
items = (item for item in self if item in common) | |
else: | |
items = self | |
return cls(items) | |
def difference(self, *sets): | |
""" | |
Returns all elements that are in this set but not the others. | |
Example: | |
>>> OrderedSet([1, 2, 3]).difference(OrderedSet([2])) | |
OrderedSet([1, 3]) | |
>>> OrderedSet([1, 2, 3]).difference(OrderedSet([2]), OrderedSet([3])) | |
OrderedSet([1]) | |
>>> OrderedSet([1, 2, 3]) - OrderedSet([2]) | |
OrderedSet([1, 3]) | |
>>> OrderedSet([1, 2, 3]).difference() | |
OrderedSet([1, 2, 3]) | |
""" | |
cls = self.__class__ | |
if sets: | |
other = set.union(*map(set, sets)) | |
items = (item for item in self if item not in other) | |
else: | |
items = self | |
return cls(items) | |
def issubset(self, other): | |
""" | |
Report whether another set contains this set. | |
Example: | |
>>> OrderedSet([1, 2, 3]).issubset({1, 2}) | |
False | |
>>> OrderedSet([1, 2, 3]).issubset({1, 2, 3, 4}) | |
True | |
>>> OrderedSet([1, 2, 3]).issubset({1, 4, 3, 5}) | |
False | |
""" | |
if len(self) > len(other): # Fast check for obvious cases | |
return False | |
return all(item in other for item in self) | |
def issuperset(self, other): | |
""" | |
Report whether this set contains another set. | |
Example: | |
>>> OrderedSet([1, 2]).issuperset([1, 2, 3]) | |
False | |
>>> OrderedSet([1, 2, 3, 4]).issuperset({1, 2, 3}) | |
True | |
>>> OrderedSet([1, 4, 3, 5]).issuperset({1, 2, 3}) | |
False | |
""" | |
if len(self) < len(other): # Fast check for obvious cases | |
return False | |
return all(item in self for item in other) | |
def symmetric_difference(self, other): | |
""" | |
Return the symmetric difference of two OrderedSets as a new set. | |
That is, the new set will contain all elements that are in exactly | |
one of the sets. | |
Their order will be preserved, with elements from `self` preceding | |
elements from `other`. | |
Example: | |
>>> this = OrderedSet([1, 4, 3, 5, 7]) | |
>>> other = OrderedSet([9, 7, 1, 3, 2]) | |
>>> this.symmetric_difference(other) | |
OrderedSet([4, 5, 9, 2]) | |
""" | |
cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet | |
diff1 = cls(self).difference(other) | |
diff2 = cls(other).difference(self) | |
return diff1.union(diff2) | |
def _update_items(self, items): | |
""" | |
Replace the 'items' list of this OrderedSet with a new one, updating | |
self.map accordingly. | |
""" | |
self.items = items | |
self.map = {item: idx for (idx, item) in enumerate(items)} | |
def difference_update(self, *sets): | |
""" | |
Update this OrderedSet to remove items from one or more other sets. | |
Example: | |
>>> this = OrderedSet([1, 2, 3]) | |
>>> this.difference_update(OrderedSet([2, 4])) | |
>>> print(this) | |
OrderedSet([1, 3]) | |
>>> this = OrderedSet([1, 2, 3, 4, 5]) | |
>>> this.difference_update(OrderedSet([2, 4]), OrderedSet([1, 4, 6])) | |
>>> print(this) | |
OrderedSet([3, 5]) | |
""" | |
items_to_remove = set() | |
for other in sets: | |
items_to_remove |= set(other) | |
self._update_items([item for item in self.items if item not in items_to_remove]) | |
def intersection_update(self, other): | |
""" | |
Update this OrderedSet to keep only items in another set, preserving | |
their order in this set. | |
Example: | |
>>> this = OrderedSet([1, 4, 3, 5, 7]) | |
>>> other = OrderedSet([9, 7, 1, 3, 2]) | |
>>> this.intersection_update(other) | |
>>> print(this) | |
OrderedSet([1, 3, 7]) | |
""" | |
other = set(other) | |
self._update_items([item for item in self.items if item in other]) | |
def symmetric_difference_update(self, other): | |
""" | |
Update this OrderedSet to remove items from another set, then | |
add items from the other set that were not present in this set. | |
Example: | |
>>> this = OrderedSet([1, 4, 3, 5, 7]) | |
>>> other = OrderedSet([9, 7, 1, 3, 2]) | |
>>> this.symmetric_difference_update(other) | |
>>> print(this) | |
OrderedSet([4, 5, 9, 2]) | |
""" | |
items_to_add = [item for item in other if item not in self] | |
items_to_remove = set(other) | |
self._update_items( | |
[item for item in self.items if item not in items_to_remove] + items_to_add | |
) | |