import operator
import itertools
from pprint import pprint
from boltons.typeutils import make_sentinel
from .core import T, glom, GlomError, format_invocation, bbrepr, UnregisteredTarget, MODE
from .grouping import GROUP, target_iter, ACC_TREE, CUR_AGG
_MISSING = make_sentinel('_MISSING')
try:
basestring
except NameError:
basestring = str
[docs]class FoldError(GlomError):
"""Error raised when Fold() is called on non-iterable
targets, and possibly other uses in the future."""
pass
[docs]class Fold(object):
"""The `Fold` specifier type is glom's building block for reducing
iterables in data, implementing the classic `fold
<https://en.wikipedia.org/wiki/Fold_(higher-order_function)>`_
from functional programming, similar to Python's built-in
:func:`reduce`.
Args:
subspec: A spec representing the target to fold, which must be
an iterable, or otherwise registered to 'iterate' (with
:func:`~glom.register`).
init (callable): A function or type which will be invoked to
initialize the accumulator value.
op (callable): A function to call on the accumulator value and
every value, the result of which will become the new
accumulator value. Defaults to :func:`operator.iadd`.
Usage is as follows:
>>> target = [set([1, 2]), set([3]), set([2, 4])]
>>> result = glom(target, Fold(T, init=frozenset, op=frozenset.union))
>>> result == frozenset([1, 2, 3, 4])
True
Note the required ``spec`` and ``init`` arguments. ``op`` is
optional, but here must be used because the :class:`set` and
:class:`frozenset` types do not work with addition.
While :class:`~glom.Fold` is powerful, :class:`~glom.Flatten` and
:class:`~glom.Sum` are subtypes with more convenient defaults for
day-to-day use.
"""
def __init__(self, subspec, init, op=operator.iadd):
self.subspec = subspec
self.init = init
self.op = op
if not callable(op):
raise TypeError('expected callable for %s op param, not: %r' %
(self.__class__.__name__, op))
if not callable(init):
raise TypeError('expected callable for %s init param, not: %r' %
(self.__class__.__name__, init))
def glomit(self, target, scope):
is_agg = False
if scope[MODE] is GROUP and scope.get(CUR_AGG) is None:
scope[CUR_AGG] = self
is_agg = True
if self.subspec is not T:
target = scope[glom](target, self.subspec, scope)
if is_agg:
return self._agg(target, scope[ACC_TREE])
try:
return self._fold(target_iter(target, scope))
except UnregisteredTarget as ut:
raise FoldError('can only %s on iterable targets, not %s type (%s)'
% (self.__class__.__name__, type(target).__name__, ut))
def _fold(self, iterator):
ret, op = self.init(), self.op
for v in iterator:
ret = op(ret, v)
return ret
def _agg(self, target, tree):
if self not in tree:
tree[self] = self.init()
tree[self] = self.op(tree[self], target)
return tree[self]
def __repr__(self):
cn = self.__class__.__name__
kwargs = {'init': self.init}
if self.op is not operator.iadd:
kwargs['op'] = self.op
return format_invocation(cn, (self.subspec,), kwargs, repr=bbrepr)
[docs]class Sum(Fold):
"""The `Sum` specifier type is used to aggregate integers and other
numericals using addition, much like the :func:`sum()` builtin.
>>> glom(range(5), Sum())
10
Note that this specifier takes a callable *init* parameter like
its friends, so to change the start value, be sure to wrap it in a
callable::
>>> glom(range(5), Sum(init=lambda: 5.0))
15.0
To "sum" lists and other iterables, see the :class:`Flatten`
spec. For other objects, see the :class:`Fold` specifier type.
"""
def __init__(self, subspec=T, init=int):
super(Sum, self).__init__(subspec=subspec, init=init, op=operator.iadd)
def __repr__(self):
cn = self.__class__.__name__
args = () if self.subspec is T else (self.subspec,)
kwargs = {'init': self.init} if self.init is not int else {}
return format_invocation(cn, args, kwargs, repr=bbrepr)
class Count(Fold):
"""
takes a count of how many values occurred
>>> glom([1, 2, 3], Count())
3
"""
__slots__ = ()
def __init__(self):
super(Count, self).__init__(
subspec=T, init=int, op=lambda cur, val: cur + 1)
def __repr__(self):
return '%s()' % self.__class__.__name__
[docs]class Flatten(Fold):
"""The `Flatten` specifier type is used to combine iterables. By
default it flattens an iterable of iterables into a single list
containing items from all iterables.
>>> target = [[1], [2, 3]]
>>> glom(target, Flatten())
[1, 2, 3]
You can also set *init* to ``"lazy"``, which returns a generator
instead of a list. Use this to avoid making extra lists and other
collections during intermediate processing steps.
"""
def __init__(self, subspec=T, init=list):
if init == 'lazy':
self.lazy = True
init = list
else:
self.lazy = False
super(Flatten, self).__init__(subspec=subspec, init=init, op=operator.iadd)
def _fold(self, iterator):
if self.lazy:
return itertools.chain.from_iterable(iterator)
return super(Flatten, self)._fold(iterator)
def __repr__(self):
cn = self.__class__.__name__
args = () if self.subspec is T else (self.subspec,)
kwargs = {}
if self.lazy:
kwargs['init'] = 'lazy'
elif self.init is not list:
kwargs['init'] = self.init
return format_invocation(cn, args, kwargs, repr=bbrepr)
[docs]def flatten(target, **kwargs):
"""At its most basic, ``flatten()`` turns an iterable of iterables
into a single list. But it has a few arguments which give it more
power:
Args:
init (callable): A function or type which gives the initial
value of the return. The value must support addition. Common
values might be :class:`list` (the default), :class:`tuple`,
or even :class:`int`. You can also pass ``init="lazy"`` to
get a generator.
levels (int): A positive integer representing the number of
nested levels to flatten. Defaults to 1.
spec: The glomspec to fetch before flattening. This defaults to the
the root level of the object.
Usage is straightforward.
>>> target = [[1, 2], [3], [4]]
>>> flatten(target)
[1, 2, 3, 4]
Because integers themselves support addition, we actually have two
levels of flattening possible, to get back a single integer sum:
>>> flatten(target, init=int, levels=2)
10
However, flattening a non-iterable like an integer will raise an
exception:
>>> target = 10
>>> flatten(target)
Traceback (most recent call last):
...
FoldError: can only Flatten on iterable targets, not int type (...)
By default, ``flatten()`` will add a mix of iterables together,
making it a more-robust alternative to the built-in
``sum(list_of_lists, list())`` trick most experienced Python
programmers are familiar with using:
>>> list_of_iterables = [range(2), [2, 3], (4, 5)]
>>> sum(list_of_iterables, [])
Traceback (most recent call last):
...
TypeError: can only concatenate list (not "tuple") to list
Whereas flatten() handles this just fine:
>>> flatten(list_of_iterables)
[0, 1, 2, 3, 4, 5]
The ``flatten()`` function is a convenient wrapper around the
:class:`Flatten` specifier type. For embedding in larger specs,
and more involved flattening, see :class:`Flatten` and its base,
:class:`Fold`.
"""
subspec = kwargs.pop('spec', T)
init = kwargs.pop('init', list)
levels = kwargs.pop('levels', 1)
if kwargs:
raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))
if levels == 0:
return target
if levels < 0:
raise ValueError('expected levels >= 0, not %r' % levels)
spec = (subspec,)
spec += (Flatten(init="lazy"),) * (levels - 1)
spec += (Flatten(init=init),)
return glom(target, spec)
[docs]class Merge(Fold):
"""By default, Merge turns an iterable of mappings into a single,
merged :class:`dict`, leveraging the behavior of the
:meth:`~dict.update` method. The start state can be customized
with *init*, as well as the update operation, with *op*.
Args:
subspec: The location of the iterable of mappings. Defaults to ``T``.
init (callable): A type or callable which returns a base
instance into which all other values will be merged.
op (callable): A callable, which takes two arguments, and
performs a merge of the second into the first. Can also be
the string name of a method to fetch on the instance created
from *init*. Defaults to ``"update"``.
.. note::
Besides the differing defaults, the primary difference between
:class:`Merge` and other :class:`Fold` subtypes is that its
*op* argument is assumed to be a two-argument function which
has no return value and modifies the left parameter
in-place. Because the initial state is a new object created with
the *init* parameter, none of the target values are modified.
"""
def __init__(self, subspec=T, init=dict, op=None):
if op is None:
op = 'update'
if isinstance(op, basestring):
test_init = init()
op = getattr(type(test_init), op, None)
if not callable(op):
raise ValueError('expected callable "op" arg or an "init" with an .update()'
' method not %r and %r' % (op, init))
super(Merge, self).__init__(subspec=subspec, init=init, op=op)
def _fold(self, iterator):
# the difference here is that ret is mutated in-place, the
# variable not being reassigned, as in base Fold.
ret, op = self.init(), self.op
for v in iterator:
op(ret, v)
return ret
def _agg(self, target, tree):
if self not in tree:
acc = tree[self] = self.init()
else:
acc = tree[self]
self.op(acc, target)
return acc
[docs]def merge(target, **kwargs):
"""By default, ``merge()`` turns an iterable of mappings into a
single, merged :class:`dict`, leveraging the behavior of the
:meth:`~dict.update` method. A new mapping is created and none of
the passed mappings are modified.
>>> target = [{'a': 'alpha'}, {'b': 'B'}, {'a': 'A'}]
>>> res = merge(target)
>>> pprint(res)
{'a': 'A', 'b': 'B'}
Args:
target: The list of dicts, or some other iterable of mappings.
The start state can be customized with the *init* keyword
argument, as well as the update operation, with the *op* keyword
argument. For more on those customizations, see the :class:`Merge`
spec.
"""
subspec = kwargs.pop('spec', T)
init = kwargs.pop('init', dict)
op = kwargs.pop('op', None)
if kwargs:
raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))
spec = Merge(subspec, init, op)
return glom(target, spec)