Source code for xdress.autoall

"""This module is used to scrape the all of the APIs from a given source file
and return their name and kind.  These include classes, structs, functions,
and certain variable types.  It is not used to actually describe these elements.
That is the job of the autodescriber.

This module is available as an xdress plugin by the name ``xdress.autoall``.

Including this plugin enables the ``classes``, ``functions``, and ``variables``  
run control parameters to have an asterix ('*') in the name positon (index 0).
For example, rather than writing::

    classes = [
        ('People', 'people'),
        ('JoanOfArc', 'people'),
        ('JEdgaHoover', 'people'),
        ('Leslie', 'people'),
        ('HuaMulan', 'people'),
        ]

we can instead simply write::

    classes = [('*', 'people')]

Isn't this grand?!

:author: Anthony Scopatz <scopatz@gmail.com>

Automatic Finder API
====================
"""
from __future__ import print_function
import os
import io
import re
import sys
from hashlib import md5
from pprint import pprint, pformat
from warnings import warn
try:
    import cPickle as pickle
except ImportError:
    import pickle

try:
    import pycparser
except ImportError:
    pycparser = None

try:
    from . import clang
    from .clang.cindex import CursorKind
except ImportError:
    clang = None

from . import utils
from . import astparsers

from .utils import find_source, FORBIDDEN_NAMES, NotSpecified, RunControl, apiname, \
    ensure_apiname

if os.name == 'nt':
    import ntpath
    import posixpath

if sys.version_info[0] >= 3:
    basestring = str

[docs]class GccxmlFinder(object):
    """Class used for discovering APIs using an etree representation of 
    the GCC-XML AST."""

    def __init__(self, root=None, onlyin=None, verbose=False):
        """Parameters
        -------------
        root : element tree node, optional
            The root element node of the AST.  
        onlyin :  str, optional
            Filename to search, prevents finding APIs coming from other libraries.
        verbose : bool, optional
            Flag to display extra information while visiting the file.

        """
        self.verbose = verbose
        self._root = root
        origonlyin = onlyin
        onlyin = [onlyin] if isinstance(onlyin, basestring) else onlyin
        onlyin = set() if onlyin is None else set(onlyin)
        onlyin = [root.find("File[@name='{0}']".format(oi)) for oi in onlyin]
        self.onlyin = set([oi.attrib['id'] for oi in onlyin if oi is not None])
        if 0 == len(self.onlyin):
            msg = ("None of these files are present: {0!r}; "
                   "autodescribing will probably fail.")
            msg = msg.format(origonlyin)
            warn(msg, RuntimeWarning)
        self.variables = []
        self.functions = []
        self.classes = []

    def __str__(self):
        return ("vars = " + pformat(self.variables) + "\n" + 
                "funcs = " + pformat(self.functions) + "\n" +
                "classes = " + pformat(self.classes) + "\n")

    def _pprint(self, node):
        if self.verbose:
            print("Auto-Found: {0} {1} {2}".format(node.tag,
                                        node.attrib.get('id', ''),
                                        node.attrib.get('name', None)))

[docs]    def visit(self, node=None):
        """Visits the node and all sub-nodes, filling the API names
        as it goes.

        Parameters
        ----------
        node : element tree node, optional
            The element tree node to start from.  If this is None, then the 
            top-level node is found and visited.

        """
        node = node or self._root
        self.variables += self.visit_kinds(node, "Enumeration")
        self.functions += self.visit_kinds(node, "Function")
        self.classes += self.visit_kinds(node, ["Class", "Struct"])

[docs]    def visit_kinds(self, node, kinds):
        """Visits the node and all sub-nodes, finding instances of the kinds 
        and recording the names as it goes.

        Parameters
        ----------
        node : element tree node
            The element tree node to start from.  
        kinds : str or sequence of str
            The API elements to find.

        Returns
        -------
        names : list of str
            Names of the API elements in this file that match the kinds provided.

        """
        if not isinstance(kinds, basestring):
            names = []
            for k in kinds:
                names += self.visit_kinds(node, k)
            names = [n for n in names if n not in FORBIDDEN_NAMES]
            return names
        names = set()
        for child in node.iterfind(".//" + kinds):
            if child.attrib.get('file', None) not in self.onlyin:
                continue
            name = child.attrib.get('name', '_')
            if name.startswith('_'):
                continue
            if name in FORBIDDEN_NAMES:
                continue
            names.add(utils.parse_template(name))
            self._pprint(child)
        return sorted(names)
            

[docs]def gccxml_findall(filename, includes=(), defines=('XDRESS',), undefines=(),
                   extra_parser_args=(), verbose=False, debug=False, 
                   builddir='build', language='c++', clang_includes=()):
    """Automatically finds all API elements in a file via GCC-XML.

    Parameters
    ----------
    filename : str
        The path to the file
    includes : list of str, optional
        The list of extra include directories to search for header files.
    defines : list of str, optional
        The list of extra macro definitions to apply.
    undefines : list of str, optional
        The list of extra macro undefinitions to apply.
    extra_parser_args : list of str, optional
        Further command line arguments to pass to the parser.
    verbose : bool, optional
        Flag to diplay extra information while describing the class.
    debug : bool, optional
        Flag to enable/disable debug mode.
    builddir : str, optional
        Location of -- often temporary -- build files.
    language : str
        Valid language flag.
    clang_includes : ignored

    Returns
    -------
    variables : list of strings
        A list of variable names to wrap from the file.
    functions : list of strings
        A list of function names to wrap from the file.
    classes : list of strings
        A list of class names to wrap from the file.

    """
    if os.name == 'nt':
        # GCC-XML and/or Cygwin wants posix paths on Windows.
        filename = posixpath.join(*ntpath.split(filename))
    root = astparsers.gccxml_parse(filename, includes=includes, defines=defines,
            undefines=undefines, extra_parser_args=extra_parser_args, verbose=verbose, 
            debug=debug, builddir=builddir)
    basename = filename.rsplit('.', 1)[0]
    onlyin = set([filename] + 
                 [basename + '.' + h for h in utils._hdr_exts if h.startswith('h')])
    finder = GccxmlFinder(root, onlyin=onlyin, verbose=verbose)
    finder.visit()
    return finder.variables, finder.functions, finder.classes


[docs]def clang_findall(filename, includes=(), defines=('XDRESS',), undefines=(),
                  extra_parser_args=(), verbose=False, debug=False, builddir='build', 
                  language='c++', clang_includes=()):
    """Automatically finds all API elements in a file via clang.

    Parameters
    ----------
    filename : str
        The path to the file
    includes : list of str, optional
        The list of extra include directories to search for header files.
    defines : list of str, optional
        The list of extra macro definitions to apply.
    undefines : list of str, optional
        The list of extra macro undefinitions to apply.
    extra_parser_args : list of str, optional
        Further command line arguments to pass to the parser.
    language : str
        Valid language flag.
    verbose : Ignored
    debug : Ignored
    builddir : Ignored
    clang_includes : list of str, optional
        clang-specific include paths.

    Returns
    -------
    variables : list of strings
        A list of variable names to wrap from the file.
    functions : list of strings
        A list of function names to wrap from the file.
    classes : list of strings
        A list of class names to wrap from the file.

    """
    tu = astparsers.clang_parse(filename, includes=includes, defines=defines,
                                undefines=undefines, 
                                extra_parser_args=extra_parser_args, verbose=verbose, 
                                debug=debug, language=language, 
                                clang_includes=clang_includes)
    basename = filename.rsplit('.', 1)[0]
    onlyin = frozenset([filename] +
                       [basename + '.' + h for h in utils._hdr_exts if h.startswith('h')])
    variables, functions, classes = [],[],[]
    def visit(node):
        kind = node.kind
        if kind == CursorKind.NAMESPACE:
            for kid in node.get_children():
                visit(kid)
        elif kind == CursorKind.ENUM_DECL:
            variables.append(node.spelling)
        elif kind == CursorKind.FUNCTION_DECL:
            functions.append(node.spelling)
        elif kind in (CursorKind.CLASS_DECL,CursorKind.STRUCT_DECL):
            classes.append(node.spelling)
    for node in tu.cursor.get_children():
        file = node.extent.start.file
        if file and file.name in onlyin:
            visit(node)
    return variables, functions, classes


[docs]class PycparserFinder(astparsers.PycparserNodeVisitor):
    """Class used for discovering APIs using the pycparser AST."""

    def __init__(self, root=None, onlyin=None, verbose=False):
        """Parameters
        -------------
        root : element tree node, optional
            The root element node of the AST.  
        onlyin :  str, optional
            Filename to search, prevents finding APIs coming from other libraries.
        verbose : bool, optional
            Flag to display extra information while visiting the file.

        """
        super(PycparserFinder, self).__init__()
        self.verbose = verbose
        self._root = root
        self.onlyin = onlyin
        self.variables = []
        self.functions = []
        self.classes = []

    def __str__(self):
        return ("vars = " + pformat(self.variables) + "\n" + 
                "funcs = " + pformat(self.functions) + "\n" +
                "classes = " + pformat(self.classes) + "\n")

    def _pprint(self, node):
        if self.verbose:
            node.show()

[docs]    def visit(self, node=None):
        """Visits the node and all sub-nodes, filling the API names
        as it goes.

        Parameters
        ----------
        node : element tree node, optional
            The element tree node to start from.  If this is None, then the 
            top-level node is found and visited.

        """
        node = node or self._root
        super(PycparserFinder, self).visit(node)

    def visit_Enum(self, node):
        if node.coord.file not in self.onlyin:
            return
        name = node.name
        if name.startswith('_'):
            return
        if name in FORBIDDEN_NAMES:
            return
        self._pprint(node)
        self.variables.append(name)

    def visit_FuncDecl(self, node):
        if node.coord.file not in self.onlyin:
            return
        if isinstance(node.type, pycparser.c_ast.PtrDecl):
            name = node.type.type.declname
        else:
            name = node.type.declname
        if name is None or name.startswith('_'):
            return
        if name in FORBIDDEN_NAMES:
            return
        self._pprint(node)
        self.functions.append(name)

    def visit_Struct(self, node):
        if node.coord.file not in self.onlyin:
            return
        name = node.name
        if name is None:
            self._status = "<name-not-found>"
            return
        if name.startswith('_'):
            return 
        if name in FORBIDDEN_NAMES:
            return
        self._pprint(node)
        self.classes.append(name)

    def visit_Typedef(self, node):
        if node.coord.file not in self.onlyin:
            return
        self._pprint(node)
        self._status = None
        self.visit(node.type)
        stat = self._status
        self._status = None
        if stat is None:
            return
        if stat == "<name-not-found>":
            name = node.name
        if name is None or name.startswith('_'):
            return
        if name in FORBIDDEN_NAMES:
            return
        self.classes.append(name)


[docs]def pycparser_findall(filename, includes=(), defines=('XDRESS',), undefines=(),
                      extra_parser_args=(), verbose=False, debug=False, 
                      builddir='build', language='c', clang_includes=()):
    """Automatically finds all API elements in a file via GCC-XML.

    Parameters
    ----------
    filename : str
        The path to the file
    includes : list of str, optional
        The list of extra include directories to search for header files.
    defines : list of str, optional
        The list of extra macro definitions to apply.
    undefines : list of str, optional
        The list of extra macro undefinitions to apply.
    extra_parser_args : list of str, optional
        Further command line arguments to pass to the parser.
    verbose : bool, optional
        Flag to diplay extra information while describing the class.
    debug : bool, optional
        Flag to enable/disable debug mode.
    builddir : str, optional
        Location of -- often temporary -- build files.
    language : str
        Valid language flag.
    clang_includes : ignored

    Returns
    -------
    variables : list of strings
        A list of variable names to wrap from the file.
    functions : list of strings
        A list of function names to wrap from the file.
    classes : list of strings
        A list of class names to wrap from the file.

    """
    root = astparsers.pycparser_parse(filename, includes=includes, defines=defines,
                undefines=undefines, extra_parser_args=extra_parser_args, 
                verbose=verbose, debug=debug, builddir=builddir)
    basename = filename.rsplit('.', 1)[0]
    onlyin = set([filename, basename + '.h'])
    finder = PycparserFinder(root, onlyin=onlyin, verbose=verbose)
    finder.visit()
    return finder.variables, finder.functions, finder.classes


#
# Top-level function
#

_finders = {
    'clang': clang_findall,
    'gccxml': gccxml_findall,
    'pycparser': pycparser_findall,
    }

[docs]def findall(filename, includes=(), defines=('XDRESS',), undefines=(), 
            extra_parser_args=(), parsers='gccxml', verbose=False, debug=False, 
            builddir='build', language='c++', clang_includes=()):
    """Automatically finds all API elements in a file.  This is the main entry point.

    Parameters
    ----------
    filename : str
        The path to the file.
    includes: list of str, optional
        The list of extra include directories to search for header files.
    defines: list of str, optional
        The list of extra macro definitions to apply.
    undefines: list of str, optional
        The list of extra macro undefinitions to apply.
    extra_parser_args : list of str, optional
        Further command line arguments to pass to the parser.
    parsers : str, list, or dict, optional
        The parser / AST to use to use for the file.  Currently 'clang', 'gccxml', 
        and 'pycparser' are supported, though others may be implemented in the 
        future.  If this is a string, then this parser is used.  If this is a list, 
        this specifies the parser order to use based on availability.  If this is
        a dictionary, it specifies the order to use parser based on language, i.e.
        ``{'c' ['pycparser', 'gccxml'], 'c++': ['gccxml', 'pycparser']}``.
    verbose : bool, optional
        Flag to diplay extra information while describing the class.
    debug : bool, optional
        Flag to enable/disable debug mode.
    builddir : str, optional
        Location of -- often temporary -- build files.
    language : str
        Valid language flag.
    clang_includes : list of str, optional
        clang-specific include paths.

    Returns
    -------
    variables : list of strings
        A list of variable names to wrap from the file.
    functions : list of strings
        A list of function names to wrap from the file.
    classes : list of strings
        A list of class names to wrap from the file.

    """
    parser = astparsers.pick_parser(language, parsers)
    finder = _finders[parser]
    rtn = finder(filename, includes=includes, defines=defines, undefines=undefines, 
                 extra_parser_args=extra_parser_args, verbose=verbose, debug=debug, 
                 builddir=builddir, language=language, clang_includes=clang_includes)
    return rtn


#
# Persisted Cache for great speed up
#

[docs]class AutoNameCache(object):
    """A quick persistent cache for name lists automatically found in files.  
    The keys are (classname, filename, kind) tuples.  The values are 
    (hashes-of-the-file, finder-results) tuples."""

    def __init__(self, cachefile=os.path.join('build', 'autoname.cache')):
        """Parameters
        -------------
        cachefile : str, optional
            Path to description cachefile.

        """
        self.cachefile = cachefile
        if os.path.isfile(cachefile):
            with io.open(cachefile, 'rb') as f:
                self.cache = pickle.load(f)
        else:
            self.cache = {}

[docs]    def isvalid(self, filename):
        """Boolean on whether the cach value for a filename matches the state 
        of the file on the system."""
        key = filename
        if key not in self.cache:
            return False
        cachehash = self.cache[key][0]
        with io.open(filename, 'rb') as f:
            filebytes = f.read()
        currhash = md5(filebytes).hexdigest()
        return cachehash == currhash

    def __getitem__(self, key):
        return self.cache[key][1]  # return the results of the finder only

    def __setitem__(self, key, value):
        filename = key
        with io.open(filename, 'rb') as f:
            filebytes = f.read()
        currhash = md5(filebytes).hexdigest()
        self.cache[key] = (currhash, value)

    def __delitem__(self, key):
        del self.cache[key]

[docs]    def dump(self):
        """Writes the cache out to the filesystem."""
        if not os.path.exists(self.cachefile):
            pardir = os.path.split(self.cachefile)[0]
            if not os.path.exists(pardir):
                os.makedirs(pardir)
        with io.open(self.cachefile, 'wb') as f:
            pickle.dump(self.cache, f, pickle.HIGHEST_PROTOCOL)

    def __str__(self):
        return pformat(self.cache)

#
# Plugin
#

[docs]class XDressPlugin(astparsers.ParserPlugin):
    """This plugin resolves the '*' syntax in wrapper types by parsing the 
    source files prio to describing them. 
    """
    allsrc = varhasstar = fnchasstar = clshasstar = None

    def defaultrc(self):
        rc = RunControl()
        rc._update(super(XDressPlugin, self).defaultrc)
        return rc

    def report_debug(self, rc):
        msg = super(XDressPlugin, self).report_debug(rc)
        msg += "Autoall:\n\n"
        msg += "allsrc = {0}\n\n".format(pformat(self.allsrc))
        msg += "varhasstar = {0}\n\n".format(pformat(self.varhasstar))
        msg += "fnchasstar = {0}\n\n".format(pformat(self.fnchasstar))
        msg += "clshasstar = {0}\n\n".format(pformat(self.clshasstar))
        return msg

[docs]    def setup(self, rc):
        """Expands variables, functions, and classes in the rc based on 
        copying src filenames to tar filename and the special '*' all syntax."""
        super(XDressPlugin, self).setup(rc)
        self.setup_basic(rc)
        self.setup_heavy(rc)

    def execute(self, rc):
        # dummy
        pass

    # Helper methods

[docs]    def setup_basic(self, rc):
        """Does the easy part of setting up an autodecsibe environment"""
        # first pass -- gather and expand target
        allsrc = {}
        varhasstar = False
        for i, var in enumerate(rc.variables):
            rc.variables[i] = var = ensure_apiname(var)
            if var.srcname == '*':
                allsrc.update(zip(var.srcfiles, [var.language]*len(var.srcfiles)))
                varhasstar = True
        fnchasstar = False
        for i, fnc in enumerate(rc.functions):
            rc.functions[i] = fnc = ensure_apiname(fnc)
            if fnc.srcname == '*':
                allsrc.update(zip(fnc.srcfiles, [fnc.language]*len(fnc.srcfiles)))
                #allsrc.update(fnc.srcfiles)
                fnchasstar = True
        clshasstar = False
        for i, cls in enumerate(rc.classes):
            rc.classes[i] = cls = ensure_apiname(cls)
            if cls.srcname == '*':
                allsrc.update(zip(cls.srcfiles, [cls.language]*len(cls.srcfiles)))
                #allsrc.update(cls.srcfiles)
                clshasstar = True
        self.allsrc = allsrc
        self.varhasstar = varhasstar
        self.fnchasstar = fnchasstar
        self.clshasstar = clshasstar

[docs]    def setup_heavy(self, rc):
        """Does the hard work of actually searching the source files."""
        print("autoall: discovering API names")
        if not self.varhasstar and not self.fnchasstar and not self.clshasstar:
            print("autoall: no API names to discover!")
            return
        allsrc = self.allsrc
        kinds = ['variables', 'functions', 'classes']

        # second pass -- find all
        allfiles = {}
        cachefile = os.path.join(rc.builddir, 'autoname.cache')
        autonamecache = AutoNameCache(cachefile=cachefile)
        for i, (srcfile, lang) in enumerate(allsrc.items()):
            print("autoall: searching {0}".format(srcfile))
            if autonamecache.isvalid(srcfile):
                found = autonamecache[srcfile]
            else:
                found = findall(srcfile, includes=rc.includes, defines=rc.defines, 
                                undefines=rc.undefines, 
                                extra_parser_args=rc.extra_parser_args, 
                                parsers=rc.parsers, verbose=rc.verbose, 
                                debug=rc.debug, builddir=rc.builddir, language=lang,
                                clang_includes=rc.clang_includes)
                autonamecache[srcfile] = found
                autonamecache.dump()
            allfiles[srcfile] = found
            for k, kind in enumerate(kinds):
                if 0 < len(found[k]):
                    fstr = ", ".join([str(_) for _ in found[k]])
                    print("autoall: found {0}: {1}".format(kind, fstr))
            if 0 == i%rc.clear_parser_cache_period:
                astparsers.clearmemo()

        # third pass -- replace *s
        if self.varhasstar:
            newvars = []
            for var in rc.variables:
                if var.srcname == '*':
                    for srcfile in var.srcfiles:
                        for x in allfiles[srcfile][0]:
                            newvars.append(var._replace(srcname=x, tarname=x))
                else:
                    newvars.append(var)
            rc.variables = newvars
        if self.fnchasstar:
            newfncs = []
            for fnc in rc.functions:
                if fnc.srcname == '*':
                    for srcfile in fnc.srcfiles:
                        for x in allfiles[srcfile][1]:
                            newfncs.append(fnc._replace(srcname=x, tarname=x))
                else:
                    newfncs.append(fnc)
            rc.functions = newfncs
        if self.clshasstar:
            newclss = []
            for cls in rc.classes:
                if cls.srcname == '*':
                    for srcfile in cls.srcfiles:
                        for x in allfiles[srcfile][2]:
                            newclss.append(cls._replace(srcname=x, tarname=x))
                else:
                    newclss.append(cls)
            rc.classes = newclss
Navigation

Source code for xdress.autoall

Quick search

Navigation