Code Snippets

This is a new section dedicated to smallish snippets of code I've written that I am particularly fond of for one reason or another.

Recursion & Generators

This is the description of the problem.

# Given a data structure like this:

x = {
    'lion':['a','b'],
    'tiger':['m','o'],
    'cheetah':['y','z'],
    }

# I want to end up with this:

y = [
    { 'lion':'a', 'tiger':'m', 'cheetah':'y', },
    { 'lion':'a', 'tiger':'m', 'cheetah':'z', },
    { 'lion':'a', 'tiger':'o', 'cheetah':'y', },
    { 'lion':'a', 'tiger':'o', 'cheetah':'z', },

    { 'lion':'b', 'tiger':'m', 'cheetah':'y', },
    { 'lion':'b', 'tiger':'m', 'cheetah':'z', },
    { 'lion':'b', 'tiger':'o', 'cheetah':'y', },
    { 'lion':'b', 'tiger':'o', 'cheetah':'z', },
    ]


    

And here is what I came up with. I really like how generators and recursion play well together. Makes for an elegant solution IMO.

def combinator(items):
    expanded = [[(key, v) for v in values] for (key, values) in items]
    return (dict(l) for l in _citer(expanded))

def _citer(lst):
    if lst:
        (head, tail) = (lst[0], lst[1:])
        if not tail:
            for t in head:
                yield [t]
        else:
            for t in head:
                for h in _citer(tail):
                    yield [t] + h

if __name__=="__main__":
    from pprint import pprint
    val = combinator(x.items())
    pprint(val)

    

Trie

This is an version of a Trie I whipped up for a bot agent-string matching code that used the xml database from user-agents.org to match any bot. Below is just the Trie part with some simple test input at the end so you can run it and 'see' it in action.

#!/usr/bin/python

class Trie(object):
    """ Tree/graph structure that supports prefix matchin lookups.
    """

    def __init__(self):
        self.root = [None, {}]
        self.processed = []
        self.last = None

    def add(self, key, value):
        """ Add the given value for the given key.
        """
        node = self.root
        key = key.lower()
        ch = key[0]
        if ch not in self.processed:
            processed = self.processed
            if self.last: self.pack(self.last)
            processed.append(ch)
            self.last = ch
        elif ch != self.last:
            raise RuntimeError("Unsorted agent string: %s" % key)
        for ch in key:
            node = node[1].setdefault(ch, [None, {}])
        node[0] = value

    def find(self, key):
        """ Return the value for the given key or None.
        """

        node = self.root
        for ch in key.lower():
            if ch in node[1]:
                node = node[1][ch]
            elif 'x' in node[1]:
                # source uses 'x' as wildcard
                node = node[1]['x']
            elif node[0]:
                return node[0]
            else:
                return None
        return node[0]

    def pack(self, start=None):
        """ memory efficiency packing.
        """
        if start:
            prune(self.root[1][start])
        else:
            prune(self.root)

    def find_prefix(self, key):
        """ Find longest prefix-key available.

        Find as much of the key as one can, by using the longest
        prefix that has a value. Return (value, remainder) where
        remainder is the rest of the given string.
        """

        node = self.root
        remainder = key
        for ch in key:
            try:
                node = node[1][ch]
            except KeyError:
                return (node[0], remainder)
            remainder = remainder[1:]
        return (node[0], remainder)

def _prune(node, point, prune_points):
    """ Recusive prunning call.

        node is recursive current node
        point is last >2 branch point
        prune_points is result list of all found points
    """
    prune_children = False
    if len(node[1]) > 1:
        prune_children = True
    if node[0] and point:
        prune_points.append((point, node))
    for n in node[1].values():
        if prune_children:
            _prune(n, n, prune_points)
        else:
            _prune(n, point, prune_points)

def prune(node):
    """ Given a node, prunes unncessary suffix's from all subnodes.

        Works by recursing depth first through the tree tracking the
        last point where there was more than one branch. It prunes
        the tree one place below that.
    """
    pp = []
    _prune(node, None, pp)
    for (p, end) in pp:
        (p[0], p[1]) = end


if __name__ == "__main__":
    t = Trie()
    t.add("fobar", "1")
    t.add("fozar", "2")
    t.add("azar",  "3")
    from copy import deepcopy
    rc = deepcopy(t.root)
    print t.root
    prune(t.root[1]['f'])
    print
    print t.root
    print
    print rc == t.root