Up

sparktk.lazyloader module

# vim: set encoding=utf-8

#  Copyright (c) 2016 Intel Corporation 
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

import os
import re
import sys
import inspect
import importlib
import logging
from decorator import decorator
logger = logging.getLogger(__name__)

from sparktk.arguments import implicit


class LazyLoader(object):
    """
    Building-block object used to build out on-demand access to library subpackages, modules, and module contents.

    For a given file path, we build a custom class type (inheriting LazyLoader) which has properties to access each
    child subpackage or module found in the path.  These properties in turn, when accessed during runtime, create
    more lazy loader blocks for their descendents.  Only when module content is directly accessed does the module
    hierarchy actually get imported.

    Enables interactive script code, tabbing out the dots, like this:

    ``tc.models.clustering.kmeans.train(...)``

    Where nothing is actually loaded until the content of the kmeans module is needed.

    Also supports implicitly filling in function kwargs.  The lazy loader at creation time can be given a dict
    of kwargs which it will use to fill in function calls when referenced off the lazy loader.  For example, we can
    define the function:


        def great_read(uri, config, tc=implicit):
           return tc.thing.read(uri, config)

    If we lazily load great_read under tc somewhere, we'd like the tc to be passed implicitly, rather than
    awkwardly specifying it again:

        config = {'x': 1, 'y': 2}
        tc.readers.great_read('my_uri', config)  # no need to pass 'tc'

    """
    pass


def get_lazy_loader(instance, name, parent_path=None, package_name='sparktk', implicit_kwargs=None):
    """
    Gets the lazy loader for the given instance and relative descendent name

    :param instance: object which will own this lazy loader (the parent)
    :param name: the name the lazy loader will assume under this instance, which is the name of the module to be loaded
    :param parent_path: os path to the parent folder of the module called name; if not specified, it will be the path to THIS module's parent
    :param package_name: name of the root package within the parent_path
    :param implicit_kwargs:  dict of kwargs to use to implicitly fill arguments in functions loaded by this loader (and its descendents)
    :return: a lazy loader object
    """
    validate_public_python_name(name)
    private_name = name_to_private(name)
    logger.debug("get_lazy_loader(name=%s) --> private name=%s", name, private_name)
    if private_name not in instance.__dict__:
        if parent_path is None:
            parent_path = os.path.dirname(os.path.abspath(__file__))  # default to local sparktk path, relative to here
        path = os.path.join(parent_path, name)
        lazy_loader = create_lazy_loader(path, package_name, implicit_kwargs)
        setattr(instance, private_name, lazy_loader)
    return getattr(instance, private_name)


def create_lazy_loader(path, package_name, implicit_kwargs):
    """Creates a lazy loader access object for the given path, usually an absolute path"""
    class_name = ''.join([piece.capitalize()
                          for piece in get_module_name(path, package_name).split('.')]) + LazyLoader.__name__
    logger.debug("create_lazy_loader(path=%s, package_name=%s, implicit_kwargs=%s) --> class_name=%s", path, package_name, implicit_kwargs, class_name)
    lazy_loader_class = create_class_type(class_name, baseclass=LazyLoader)
    init_lazy_loader_class(lazy_loader_class, path, package_name, implicit_kwargs)
    instance = lazy_loader_class()
    return instance


def init_lazy_loader_class(cls, path, package_name, implicit_kwargs):
    """Initializes class (not instance!) by adding properties to access descendent subpackages, modules, and content"""

    # If path is a directory, then this lazy loader class needs properties for the descendents
    if os.path.isdir(path):
        children = os.listdir(path)
        for child_name in children:
            child_path = os.path.join(path, child_name)
            if os.path.isdir(child_path):
                add_loader_property(cls, child_name, child_path, package_name, implicit_kwargs)
            elif os.path.isfile(child_path) and child_path.endswith('.py') and not child_name.startswith('_'):
                add_loader_property(cls, child_name[:-3], child_path, package_name, implicit_kwargs)
            else:
                logger.debug("LazyLoader skipping %s", child_path)

    # If path is a .py file, then this lazy loader class needs properties for the elements of the module
    elif os.path.isfile(path) and path.endswith('.py'):
        add_module_element_properties(cls, path, package_name, implicit_kwargs)
    else:
        raise ValueError("Bad path for LazyLoader init.  Expected valid package dir or .py file, but got %s" % path)


def create_class_type(new_class_name, baseclass):
    """Dynamically create a class type with the given name and namespace_obj"""
    logger.debug("Creating new class type '%s' with baseclass=%s", new_class_name, baseclass)
    new_class = type(str(new_class_name),
                     (baseclass,),
                     {'__module__': sys.modules[__name__]})
    # assign to its module, and to globals
    # http://stackoverflow.com/questions/13624603/python-how-to-register-dynamic-class-in-module
    setattr(sys.modules[new_class.__module__.__name__], new_class.__name__, new_class)
    globals()[new_class.__name__] = new_class
    return new_class


def get_module_name(path, package_name):
    """Determines the correct python module name for the given os path, relative to a particular package"""
    package_index = path.rfind(package_name)
    if package_index >= 0:
        path = path[package_index:]
    else:
        raise ValueError("package_name %s not found in path %s" % (package_name, path))
    if path.endswith('.py'):
        path = path[:-3]
    return path.replace('/', '.')


def name_to_private(name):
    """makes private version of the name"""
    return name if name.startswith('_') else '_' + name


def is_public_python_name(s):
    if s is None:
        raise ValueError("Expected string value, got None")
    return re.match('^[A-Za-z][\w_]*$', s) is not None


def validate_public_python_name(s):
    if not is_public_python_name(s):
        raise ValueError("Value %s is not a valid python variable name" % s)


def create_loader_property(name, path, package_name, implicit_kwargs):
    """Creates a property whose getter will create, if it does not yet exist, a lazy loader object for the path"""
    loader_path = path
    private_name = name_to_private(name)
    loader_package_name = package_name

    def fget(self):
        if private_name not in self.__dict__:   # don't use hasattr, because it would match an inherited prop
            loader = create_lazy_loader(loader_path, loader_package_name, implicit_kwargs)
            setattr(self, private_name, loader)
        return getattr(self, private_name)

    prop = property(fget=fget)
    return prop


def add_loader_property(cls, name, path, package_name, implicit_kwargs):
    """Adds a property to the cls which accesses a lazy loader for a descendent"""
    prop = create_loader_property(name, path, package_name, implicit_kwargs)
    logger.debug("Adding lazy loader property named %s to class %s for path %s", name, cls, path)
    setattr(cls, name, prop)


def add_module_element_properties(cls, path, package_name, implicit_kwargs):
    """
    Dynamically imports the module and adds properties for each element found in the module to the given class

    __all__ is used if it is defined for the module, otherwise all non-private elements are aliased

    Module-level methods are aliased as static methods in the lazy loader class
    """
    module_name = get_module_name(path, package_name)
    logger.info("Dynamically loading module %s", module_name)
    m = importlib.import_module(module_name)
    if hasattr(m, '__all__'):
        d = dict([(k, m.__dict__[k]) for k in m.__all__])
    else:
        d = dict([(k, v) for k, v in m.__dict__.items() if not k.startswith('_')])

    for k, v in d.items():
        if hasattr(v, '__call__') and not inspect.isclass(v):
            if implicit_kwargs:
                v = wrap_for_implicit_kwargs(v, implicit_kwargs)
            v = staticmethod(v)
        logger.debug("Adding property %s for element %s to class %s for path %s", k, v, cls, path)
        setattr(cls, k, v)


def wrap_for_implicit_kwargs(function, implicit_kwargs):
    """possibly wraps the function in a decorator which will implicitly fill in kwargs when called"""
    logger.debug("wrap_for_implicit_kwargs(function=%s, implicit_kwargs=%s", function.__name__, implicit_kwargs)
    args, varargs, varkwargs, defaults = inspect.getargspec(function)
    logger.debug("argspec = (args=%s,varargs=%s,varkwargs=%s,defaults=%s)", args, varargs, varkwargs, defaults)
    kwarg_index_value_pairs = [(i, implicit_kwargs[key]) for i, key in enumerate(args)
                               if key in implicit_kwargs and validate_is_implicit(function.__name__, i, args, defaults)]
    if kwarg_index_value_pairs:
        def call_with_implicit_kwargs(func, *a, **kw):
            """call_with_implicit_kwargs wrapper used for a decorator"""
            args_list = list(a)
            for index, value in kwarg_index_value_pairs:
                if args_list[index] is implicit:
                    args_list[index]=value
            a = tuple(args_list)
            return func(*a, **kw)

        logger.debug("wrap_for_implicit_kwargs decorating %s with %s", function.__name__, kwarg_index_value_pairs)
        return decorator(call_with_implicit_kwargs, function)
    return function


def validate_is_implicit(function_name, arg_index, args, defaults):
    """Raises a TypeError if the kwarg does not have an implicit default value"""
    try:
        default_offset = len(args) - (len(defaults) if defaults else 0)
        default_index = arg_index - default_offset
        if default_index >= 0:
            assert(defaults[default_index] is implicit)
    except:
        raise TypeError("Lazyloader asked to implicitly fill arg '%s' but it is not marked implicit in function %s" %
                        (args[arg_index], function_name))
    return True  # return true for list comp construction

Module variables

var logger

Functions

def add_loader_property(

cls, name, path, package_name, implicit_kwargs)

Adds a property to the cls which accesses a lazy loader for a descendent

def add_loader_property(cls, name, path, package_name, implicit_kwargs):
    """Adds a property to the cls which accesses a lazy loader for a descendent"""
    prop = create_loader_property(name, path, package_name, implicit_kwargs)
    logger.debug("Adding lazy loader property named %s to class %s for path %s", name, cls, path)
    setattr(cls, name, prop)

def add_module_element_properties(

cls, path, package_name, implicit_kwargs)

Dynamically imports the module and adds properties for each element found in the module to the given class

all is used if it is defined for the module, otherwise all non-private elements are aliased

Module-level methods are aliased as static methods in the lazy loader class

def add_module_element_properties(cls, path, package_name, implicit_kwargs):
    """
    Dynamically imports the module and adds properties for each element found in the module to the given class

    __all__ is used if it is defined for the module, otherwise all non-private elements are aliased

    Module-level methods are aliased as static methods in the lazy loader class
    """
    module_name = get_module_name(path, package_name)
    logger.info("Dynamically loading module %s", module_name)
    m = importlib.import_module(module_name)
    if hasattr(m, '__all__'):
        d = dict([(k, m.__dict__[k]) for k in m.__all__])
    else:
        d = dict([(k, v) for k, v in m.__dict__.items() if not k.startswith('_')])

    for k, v in d.items():
        if hasattr(v, '__call__') and not inspect.isclass(v):
            if implicit_kwargs:
                v = wrap_for_implicit_kwargs(v, implicit_kwargs)
            v = staticmethod(v)
        logger.debug("Adding property %s for element %s to class %s for path %s", k, v, cls, path)
        setattr(cls, k, v)

def create_class_type(

new_class_name, baseclass)

Dynamically create a class type with the given name and namespace_obj

def create_class_type(new_class_name, baseclass):
    """Dynamically create a class type with the given name and namespace_obj"""
    logger.debug("Creating new class type '%s' with baseclass=%s", new_class_name, baseclass)
    new_class = type(str(new_class_name),
                     (baseclass,),
                     {'__module__': sys.modules[__name__]})
    # assign to its module, and to globals
    # http://stackoverflow.com/questions/13624603/python-how-to-register-dynamic-class-in-module
    setattr(sys.modules[new_class.__module__.__name__], new_class.__name__, new_class)
    globals()[new_class.__name__] = new_class
    return new_class

def create_lazy_loader(

path, package_name, implicit_kwargs)

Creates a lazy loader access object for the given path, usually an absolute path

def create_lazy_loader(path, package_name, implicit_kwargs):
    """Creates a lazy loader access object for the given path, usually an absolute path"""
    class_name = ''.join([piece.capitalize()
                          for piece in get_module_name(path, package_name).split('.')]) + LazyLoader.__name__
    logger.debug("create_lazy_loader(path=%s, package_name=%s, implicit_kwargs=%s) --> class_name=%s", path, package_name, implicit_kwargs, class_name)
    lazy_loader_class = create_class_type(class_name, baseclass=LazyLoader)
    init_lazy_loader_class(lazy_loader_class, path, package_name, implicit_kwargs)
    instance = lazy_loader_class()
    return instance

def create_loader_property(

name, path, package_name, implicit_kwargs)

Creates a property whose getter will create, if it does not yet exist, a lazy loader object for the path

def create_loader_property(name, path, package_name, implicit_kwargs):
    """Creates a property whose getter will create, if it does not yet exist, a lazy loader object for the path"""
    loader_path = path
    private_name = name_to_private(name)
    loader_package_name = package_name

    def fget(self):
        if private_name not in self.__dict__:   # don't use hasattr, because it would match an inherited prop
            loader = create_lazy_loader(loader_path, loader_package_name, implicit_kwargs)
            setattr(self, private_name, loader)
        return getattr(self, private_name)

    prop = property(fget=fget)
    return prop

def get_lazy_loader(

instance, name, parent_path=None, package_name='sparktk', implicit_kwargs=None)

Gets the lazy loader for the given instance and relative descendent name

instance: object which will own this lazy loader (the parent)
name: the name the lazy loader will assume under this instance, which is the name of the module to be loaded
parent_path: os path to the parent folder of the module called name; if not specified, it will be the path to THIS module's parent
package_name: name of the root package within the parent_path
implicit_kwargs: dict of kwargs to use to implicitly fill arguments in functions loaded by this loader (and its descendents)

Returns: a lazy loader object

def get_lazy_loader(instance, name, parent_path=None, package_name='sparktk', implicit_kwargs=None):
    """
    Gets the lazy loader for the given instance and relative descendent name

    :param instance: object which will own this lazy loader (the parent)
    :param name: the name the lazy loader will assume under this instance, which is the name of the module to be loaded
    :param parent_path: os path to the parent folder of the module called name; if not specified, it will be the path to THIS module's parent
    :param package_name: name of the root package within the parent_path
    :param implicit_kwargs:  dict of kwargs to use to implicitly fill arguments in functions loaded by this loader (and its descendents)
    :return: a lazy loader object
    """
    validate_public_python_name(name)
    private_name = name_to_private(name)
    logger.debug("get_lazy_loader(name=%s) --> private name=%s", name, private_name)
    if private_name not in instance.__dict__:
        if parent_path is None:
            parent_path = os.path.dirname(os.path.abspath(__file__))  # default to local sparktk path, relative to here
        path = os.path.join(parent_path, name)
        lazy_loader = create_lazy_loader(path, package_name, implicit_kwargs)
        setattr(instance, private_name, lazy_loader)
    return getattr(instance, private_name)

def get_module_name(

path, package_name)

Determines the correct python module name for the given os path, relative to a particular package

def get_module_name(path, package_name):
    """Determines the correct python module name for the given os path, relative to a particular package"""
    package_index = path.rfind(package_name)
    if package_index >= 0:
        path = path[package_index:]
    else:
        raise ValueError("package_name %s not found in path %s" % (package_name, path))
    if path.endswith('.py'):
        path = path[:-3]
    return path.replace('/', '.')

def init_lazy_loader_class(

cls, path, package_name, implicit_kwargs)

Initializes class (not instance!) by adding properties to access descendent subpackages, modules, and content

def init_lazy_loader_class(cls, path, package_name, implicit_kwargs):
    """Initializes class (not instance!) by adding properties to access descendent subpackages, modules, and content"""

    # If path is a directory, then this lazy loader class needs properties for the descendents
    if os.path.isdir(path):
        children = os.listdir(path)
        for child_name in children:
            child_path = os.path.join(path, child_name)
            if os.path.isdir(child_path):
                add_loader_property(cls, child_name, child_path, package_name, implicit_kwargs)
            elif os.path.isfile(child_path) and child_path.endswith('.py') and not child_name.startswith('_'):
                add_loader_property(cls, child_name[:-3], child_path, package_name, implicit_kwargs)
            else:
                logger.debug("LazyLoader skipping %s", child_path)

    # If path is a .py file, then this lazy loader class needs properties for the elements of the module
    elif os.path.isfile(path) and path.endswith('.py'):
        add_module_element_properties(cls, path, package_name, implicit_kwargs)
    else:
        raise ValueError("Bad path for LazyLoader init.  Expected valid package dir or .py file, but got %s" % path)

def is_public_python_name(

s)

def is_public_python_name(s):
    if s is None:
        raise ValueError("Expected string value, got None")
    return re.match('^[A-Za-z][\w_]*$', s) is not None

def name_to_private(

name)

makes private version of the name

def name_to_private(name):
    """makes private version of the name"""
    return name if name.startswith('_') else '_' + name

def validate_is_implicit(

function_name, arg_index, args, defaults)

Raises a TypeError if the kwarg does not have an implicit default value

def validate_is_implicit(function_name, arg_index, args, defaults):
    """Raises a TypeError if the kwarg does not have an implicit default value"""
    try:
        default_offset = len(args) - (len(defaults) if defaults else 0)
        default_index = arg_index - default_offset
        if default_index >= 0:
            assert(defaults[default_index] is implicit)
    except:
        raise TypeError("Lazyloader asked to implicitly fill arg '%s' but it is not marked implicit in function %s" %
                        (args[arg_index], function_name))
    return True  # return true for list comp construction

def validate_public_python_name(

s)

def validate_public_python_name(s):
    if not is_public_python_name(s):
        raise ValueError("Value %s is not a valid python variable name" % s)

def wrap_for_implicit_kwargs(

function, implicit_kwargs)

possibly wraps the function in a decorator which will implicitly fill in kwargs when called

def wrap_for_implicit_kwargs(function, implicit_kwargs):
    """possibly wraps the function in a decorator which will implicitly fill in kwargs when called"""
    logger.debug("wrap_for_implicit_kwargs(function=%s, implicit_kwargs=%s", function.__name__, implicit_kwargs)
    args, varargs, varkwargs, defaults = inspect.getargspec(function)
    logger.debug("argspec = (args=%s,varargs=%s,varkwargs=%s,defaults=%s)", args, varargs, varkwargs, defaults)
    kwarg_index_value_pairs = [(i, implicit_kwargs[key]) for i, key in enumerate(args)
                               if key in implicit_kwargs and validate_is_implicit(function.__name__, i, args, defaults)]
    if kwarg_index_value_pairs:
        def call_with_implicit_kwargs(func, *a, **kw):
            """call_with_implicit_kwargs wrapper used for a decorator"""
            args_list = list(a)
            for index, value in kwarg_index_value_pairs:
                if args_list[index] is implicit:
                    args_list[index]=value
            a = tuple(args_list)
            return func(*a, **kw)

        logger.debug("wrap_for_implicit_kwargs decorating %s with %s", function.__name__, kwarg_index_value_pairs)
        return decorator(call_with_implicit_kwargs, function)
    return function

Classes

class LazyLoader

Building-block object used to build out on-demand access to library subpackages, modules, and module contents.

For a given file path, we build a custom class type (inheriting LazyLoader) which has properties to access each child subpackage or module found in the path. These properties in turn, when accessed during runtime, create more lazy loader blocks for their descendents. Only when module content is directly accessed does the module hierarchy actually get imported.

Enables interactive script code, tabbing out the dots, like this:

tc.models.clustering.kmeans.train(...)

Where nothing is actually loaded until the content of the kmeans module is needed.

Also supports implicitly filling in function kwargs. The lazy loader at creation time can be given a dict of kwargs which it will use to fill in function calls when referenced off the lazy loader. For example, we can define the function:

def great_read(uri, config, tc=implicit):
   return tc.thing.read(uri, config)

If we lazily load great_read under tc somewhere, we'd like the tc to be passed implicitly, rather than awkwardly specifying it again:

config = {'x': 1, 'y': 2}
tc.readers.great_read('my_uri', config)  # no need to pass 'tc'
class LazyLoader(object):
    """
    Building-block object used to build out on-demand access to library subpackages, modules, and module contents.

    For a given file path, we build a custom class type (inheriting LazyLoader) which has properties to access each
    child subpackage or module found in the path.  These properties in turn, when accessed during runtime, create
    more lazy loader blocks for their descendents.  Only when module content is directly accessed does the module
    hierarchy actually get imported.

    Enables interactive script code, tabbing out the dots, like this:

    ``tc.models.clustering.kmeans.train(...)``

    Where nothing is actually loaded until the content of the kmeans module is needed.

    Also supports implicitly filling in function kwargs.  The lazy loader at creation time can be given a dict
    of kwargs which it will use to fill in function calls when referenced off the lazy loader.  For example, we can
    define the function:


        def great_read(uri, config, tc=implicit):
           return tc.thing.read(uri, config)

    If we lazily load great_read under tc somewhere, we'd like the tc to be passed implicitly, rather than
    awkwardly specifying it again:

        config = {'x': 1, 'y': 2}
        tc.readers.great_read('my_uri', config)  # no need to pass 'tc'

    """
    pass

Ancestors (in MRO)