-->

Lazy data-flow (spreadsheet like) properties with

2019-02-03 08:52发布

问题:

My problem is the following: I have some python classes that have properties that are derived from other properties; and those should be cached once they are calculated, and the cached results should be invalidated each time the base properties are changed.

I could do it manually, but it seems quite difficult to maintain if the number of properties grows. So I would like to have something like Makefile rules inside my objects to automatically keep track of what needs to be recalculated.

The desired syntax and behaviour should be something like that:

# this does dirty magic, like generating the reverse dependency graph,
# and preparing the setters that invalidate the cached values
@dataflow_class
class Test(object):

    def calc_a(self):
        return self.b + self.c

    def calc_c(self):
        return self.d * 2

    a = managed_property(calculate=calc_a, depends_on=('b', 'c'))
    b = managed_property(default=0)
    c = managed_property(calculate=calc_c, depends_on=('d',))
    d = managed_property(default=0)


t = Test()

print t.a
# a has not been initialized, so it calls calc_a
# gets b value
# c has not been initialized, so it calls calc_c
# c value is calculated and stored in t.__c
# a value is calculated and stored in t.__a

t.b = 1
# invalidates the calculated value stored in self.__a

print t.a
# a has been invalidated, so it calls calc_a
# gets b value
# gets c value, from t.__c
# a value is calculated and stored in t.__a

print t.a
# gets value from t.__a

t.d = 2
# invalidates the calculated values stored in t.__a and t.__c

So, is there something like this already available or should I start implementing my own? In the second case, suggestions are welcome :-)

回答1:

Here, this should do the trick. The descriptor mechanism (through which the language implements "property") is more than enough for what you want.

If the code bellow does not work in some corner cases, just write me.

class DependentProperty(object):
    def __init__(self, calculate=None, default=None, depends_on=()):
        # "name" and "dependence_tree" properties are attributes
        # set up by the metaclass of the owner class
        if calculate:
            self.calculate = calculate
        else:
            self.default = default
        self.depends_on = set(depends_on)

    def __get__(self, instance, owner):
        if hasattr(self, "default"):
            return self.default
        if not hasattr(instance, "_" + self.name):
            setattr(instance, "_" + self.name,
                self.calculate(instance, getattr(instance, "_" + self.name + "_last_value")))
        return getattr(instance, "_" + self.name)

    def __set__(self, instance, value):
        setattr(instance, "_" + self.name + "_last_value", value)
        setattr(instance, "_" + self.name, self.calculate(instance, value))
        for attr in self.dependence_tree[self.name]:
            delattr(instance, attr)

    def __delete__(self, instance):
        try:
            delattr(instance, "_" + self.name)
        except AttributeError:
            pass


def assemble_tree(name,  dict_, all_deps = None):
    if all_deps is None:
        all_deps = set()
    for dependance in dict_[name].depends_on:
        all_deps.add(dependance)
        assemble_tree(dependance, dict_, all_deps)
    return all_deps

def invert_tree(tree):
    new_tree = {}
    for key, val in tree.items():
        for dependence in val:
            if dependence not in new_tree:
                new_tree[dependence] = set()
            new_tree[dependence].add(key)
    return new_tree

class DependenceMeta(type):
    def __new__(cls, name, bases, dict_):
        dependence_tree = {}
        properties = []
        for key, val in dict_.items():
            if not isinstance(val, DependentProperty):
                continue
            val.name = key
            val.dependence_tree = dependence_tree
            dependence_tree[key] = set()
            properties.append(val)
        inverted_tree = {}
        for property in properties:
            inverted_tree[property.name] = assemble_tree(property.name, dict_)
        dependence_tree.update(invert_tree(inverted_tree))
        return type.__new__(cls, name, bases, dict_)


if __name__ == "__main__":
    # Example and visual test:

    class Bla:
        __metaclass__ = DependenceMeta

        def calc_b(self, x):
            print "Calculating b"
            return x + self.a

        def calc_c(self, x):
            print "Calculating c"
            return x + self.b

        a = DependentProperty(default=10)    
        b = DependentProperty(depends_on=("a",), calculate=calc_b)
        c = DependentProperty(depends_on=("b",), calculate=calc_c)




    bla = Bla()
    bla.b = 5
    bla.c = 10

    print bla.a, bla.b, bla.c
    bla.b = 10
    print bla.b
    print bla.c


回答2:

I would like to have something like Makefile rules

then use one! You may consider this model:

  • one rule = one python file
  • one result = one *.data file
  • the pipe is implemented as a makefile or with another dependency analysis tool (cmake, scons)

The hardware test team in our company use such a framework for intensive exploratory tests:

  • you can integrate other languages and tools easily
  • you get a stable and proven solution
  • computations may be distributed one multiple cpu/computers
  • you track dependencies on values and rules
  • debug of intermediate values is easy

the (big) downside to this method is that you have to give up python import keyword because it creates an implicit (and untracked) dependency (there are workarounds for this).



回答3:

import collections

sentinel=object()

class ManagedProperty(object):
    '''
    If deptree = {'a':set('b','c')}, then ManagedProperties `b` and
    `c` will be reset whenever `a` is modified.
    '''
    def __init__(self,property_name,calculate=None,depends_on=tuple(),
                 default=sentinel):
        self.property_name=property_name
        self.private_name='_'+property_name 
        self.calculate=calculate
        self.depends_on=depends_on
        self.default=default
    def __get__(self,obj,objtype):
        if obj is None:
            # Allows getattr(cls,mprop) to return the ManagedProperty instance
            return self
        try:
            return getattr(obj,self.private_name)
        except AttributeError:
            result=(getattr(obj,self.calculate)()
                    if self.default is sentinel else self.default)
            setattr(obj,self.private_name,result)
            return result
    def __set__(self,obj,value):
        # obj._dependencies is defined by @register
        map(obj.__delattr__,getattr(obj,'_dependencies').get(self.property_name,tuple()))
        setattr(obj,self.private_name,value)        
    def __delete__(self,obj):
        if hasattr(obj,self.private_name):
            delattr(obj,self.private_name)

def register(*mproperties):
    def flatten_dependencies(name, deptree, all_deps=None):
        '''
        A deptree such as {'c': set(['a']), 'd': set(['c'])} means
        'a' depends on 'c' and 'c' depends on 'd'.

        Given such a deptree, flatten_dependencies('d', deptree) returns the set
        of all property_names that depend on 'd' (i.e. set(['a','c']) in the
        above case).
        '''
        if all_deps is None:
            all_deps = set()
        for dep in deptree.get(name,tuple()):
            all_deps.add(dep)
            flatten_dependencies(dep, deptree, all_deps)
        return all_deps

    def classdecorator(cls):
        deptree=collections.defaultdict(set)
        for mprop in mproperties:
            setattr(cls,mprop.property_name,mprop)
        # Find all ManagedProperties in dir(cls). Note that some of these may be
        # inherited from bases of cls; they may not be listed in mproperties.
        # Doing it this way allows ManagedProperties to be overridden by subclasses.
        for propname in dir(cls):
            mprop=getattr(cls,propname)
            if not isinstance(mprop,ManagedProperty):
                continue
            for underlying_prop in mprop.depends_on:
                deptree[underlying_prop].add(mprop.property_name)

        # Flatten the dependency tree so no recursion is necessary. If one were
        # to use recursion instead, then a naive algorithm would make duplicate
        # calls to __delete__. By flattening the tree, there are no duplicate
        # calls to __delete__.
        dependencies={key:flatten_dependencies(key,deptree)
                      for key in deptree.keys()}
        setattr(cls,'_dependencies',dependencies)
        return cls
    return classdecorator

These are the unit tests I used to verify its behavior.

if __name__ == "__main__":
    import unittest
    import sys
    def count(meth):
        def wrapper(self,*args):
            countname=meth.func_name+'_count'
            setattr(self,countname,getattr(self,countname,0)+1)
            return meth(self,*args)
        return wrapper

    class Test(unittest.TestCase):
        def setUp(self):
            @register(
                ManagedProperty('d',default=0),
                ManagedProperty('b',default=0),
                ManagedProperty('c',calculate='calc_c',depends_on=('d',)),
                ManagedProperty('a',calculate='calc_a',depends_on=('b','c')))
            class Foo(object):
                @count
                def calc_a(self):
                    return self.b + self.c
                @count
                def calc_c(self):
                    return self.d * 2
            @register(ManagedProperty('c',calculate='calc_c',depends_on=('b',)),
                      ManagedProperty('a',calculate='calc_a',depends_on=('b','c')))
            class Bar(Foo):
                @count
                def calc_c(self):
                    return self.b * 3
            self.Foo=Foo
            self.Bar=Bar
            self.foo=Foo()
            self.foo2=Foo()            
            self.bar=Bar()

        def test_two_instances(self):
            self.foo.b = 1
            self.assertEqual(self.foo.a,1)
            self.assertEqual(self.foo.b,1)
            self.assertEqual(self.foo.c,0)
            self.assertEqual(self.foo.d,0)

            self.assertEqual(self.foo2.a,0)
            self.assertEqual(self.foo2.b,0)
            self.assertEqual(self.foo2.c,0)
            self.assertEqual(self.foo2.d,0)


        def test_initialization(self):
            self.assertEqual(self.foo.a,0)
            self.assertEqual(self.foo.calc_a_count,1)
            self.assertEqual(self.foo.a,0)
            self.assertEqual(self.foo.calc_a_count,1)            
            self.assertEqual(self.foo.b,0)
            self.assertEqual(self.foo.c,0)
            self.assertEqual(self.foo.d,0)
            self.assertEqual(self.bar.a,0)
            self.assertEqual(self.bar.b,0)
            self.assertEqual(self.bar.c,0)
            self.assertEqual(self.bar.d,0)

        def test_dependence(self):
            self.assertEqual(self.Foo._dependencies,
                             {'c': set(['a']), 'b': set(['a']), 'd': set(['a', 'c'])})

            self.assertEqual(self.Bar._dependencies,
                             {'c': set(['a']), 'b': set(['a', 'c'])})

        def test_setting_property_updates_dependent(self):
            self.assertEqual(self.foo.a,0)
            self.assertEqual(self.foo.calc_a_count,1)

            self.foo.b = 1
            # invalidates the calculated value stored in foo.a
            self.assertEqual(self.foo.a,1)
            self.assertEqual(self.foo.calc_a_count,2)
            self.assertEqual(self.foo.b,1)
            self.assertEqual(self.foo.c,0)
            self.assertEqual(self.foo.d,0)

            self.foo.d = 2
            # invalidates the calculated values stored in foo.a and foo.c
            self.assertEqual(self.foo.a,5)
            self.assertEqual(self.foo.calc_a_count,3)
            self.assertEqual(self.foo.b,1)
            self.assertEqual(self.foo.c,4)
            self.assertEqual(self.foo.d,2)

            self.assertEqual(self.bar.a,0)
            self.assertEqual(self.bar.calc_a_count,1)
            self.assertEqual(self.bar.b,0)
            self.assertEqual(self.bar.c,0)
            self.assertEqual(self.bar.calc_c_count,1)
            self.assertEqual(self.bar.d,0)

            self.bar.b = 2
            self.assertEqual(self.bar.a,8)
            self.assertEqual(self.bar.calc_a_count,2)
            self.assertEqual(self.bar.b,2)
            self.assertEqual(self.bar.c,6)
            self.assertEqual(self.bar.calc_c_count,2)
            self.assertEqual(self.bar.d,0)

            self.bar.d = 2
            self.assertEqual(self.bar.a,8)
            self.assertEqual(self.bar.calc_a_count,2)            
            self.assertEqual(self.bar.b,2)
            self.assertEqual(self.bar.c,6)
            self.assertEqual(self.bar.calc_c_count,2)
            self.assertEqual(self.bar.d,2)

    sys.argv.insert(1,'--verbose')
    unittest.main(argv=sys.argv)