Source code for xdress.astparsers

"""This module creates abstract syntax trees using external tools
(GCC-XML, pycparser) of C/C++ code.

:author: Anthony Scopatz <scopatz@gmail.com>

AST Parsers API
==========================
"""
from __future__ import print_function
import os
import io
import sys
from copy import deepcopy
import linecache
import subprocess
import itertools
import tempfile
import functools
import collections
from pprint import pprint, pformat
from warnings import warn
import gzip
try:
    import cPickle as pickle
except ImportError:
    import pickle

if os.name == 'nt':
    import ntpath
    import posixpath

# GCC-XML conditional imports
HAVE_LXML = False
try:
    from lxml import etree
    HAVE_LXML = True
except ImportError:
    try:
        # Python 2.5
        import xml.etree.cElementTree as etree
    except ImportError:
        try:
            # Python 2.5
            import xml.etree.ElementTree as etree
        except ImportError:
            try:
                # normal cElementTree install
                import cElementTree as etree
            except ImportError:
                try:
                    # normal ElementTree install
                    import elementtree.ElementTree as etree
                except ImportError:
                    pass

# pycparser conditional imports
try:
    import pycparser
    PycparserNodeVisitor = pycparser.c_ast.NodeVisitor
except ImportError:
    pycparser = None
    PycparserNodeVisitor = object  # fake this for class definitions

# clang conditional imports
try:
    from . import clang
    from .clang import cindex
    # Make sure we use our own version of libclang.so
    from .clang import libclang
    cindex.Config.set_library_file(libclang.__file__)
except ImportError:
    clang = None

from . import utils
from .utils import guess_language, RunControl, NotSpecified, ensuredirs
from .plugins import Plugin

PARSERS_AVAILABLE = {
    'clang': clang is not None,
    'pycparser': pycparser is not None,
    }
with tempfile.NamedTemporaryFile() as f:
    # If gccxml is not availble, an OSError is raised.  Otherwise, it will
    # return 0 (typically indicates successful invocation).
    try:
        retcode = subprocess.call(['gccxml'], stdout=f, stderr=f)
        if retcode == 0:
            PARSERS_AVAILABLE['gccxml'] = True
        else:
            PARSERS_AVAILABLE['gccxml'] = False
    except OSError:
        PARSERS_AVAILABLE['gccxml'] = False
del f

if sys.version_info[0] >= 3:
    basestring = str

def _makekey(obj):
    if isinstance(obj, basestring):
        return obj
    elif isinstance(obj, collections.Sequence):
        return tuple([_makekey(o) for o in obj])
    elif isinstance(obj, collections.Set):
        return frozenset([_makekey(o) for o in obj])
    elif isinstance(obj, collections.Mapping):
        return tuple([(_makekey(k), _makekey(v)) for k, v in sorted(obj.items())])
    else:
        return obj

def _memoize_parser(f):
    # based off code from http://wiki.python.org/moin/PythonDecoratorLibrary
    cache = f.cache = {}
    @functools.wraps(f)
    def memoizer(*args, **kwargs):
        key = _makekey(args) + _makekey(kwargs)
        if key in cache:
            value = cache[key]
        else:
            value = f(*args, **kwargs)
            try:
                cache[key] = value
            except TypeError:
                pass
        return value
    return memoizer

[docs]def clearmemo(): """Clears all function memoizations for autodescribers.""" for x in globals().values(): if callable(x) and hasattr(x, 'cache'): x.cache.clear()
def not_implemented(obj): if not isinstance(obj, type): if obj.__doc__ is None: obj.__doc__ = '' obj.__doc__ += ("\n\n.. warning:: This has not yet been implemented " "fully or at all.\n\n") @functools.wraps(obj) def func(*args, **kwargs): msg = "The functionality in {0} has not been implemented fully or at all" msg = msg.format(obj) raise NotImplementedError(msg) return func # # GCC-XML Describers # @_memoize_parser
[docs]def gccxml_parse(filename, includes=(), defines=('XDRESS',), undefines=(), extra_parser_args=(), verbose=False, debug=False, builddir='build', clang_includes=()): """Use GCC-XML to parse a file. This function is automatically memoized. Parameters ---------- filename : str The path to the file. includes : list of str, optional The list of extra include directories to search for header files. defines : list of str, optional The list of extra macro definitions to apply. undefines : list of str, optional The list of extra macro undefinitions to apply. extra_parser_args : list of str, optional Further command line arguments to pass to the parser. verbose : bool, optional Flag to display extra information while describing the class. debug : bool, optional Flag to enable/disable debug mode. builddir : str, optional Location of -- often temporary -- build files. clang_includes : ignored Returns ------- root : XML etree An in memory tree representing the parsed file. """ drive, xmlname = os.path.splitdrive(filename) if len(drive) > 0: # Windows drive handling, 'C:' -> 'C_' xmlname = drive.replace(':', '_') + xmlname xmlname = xmlname.replace(os.path.sep, '_').rsplit('.', 1)[0] + '.xml' xmlname = os.path.join(builddir, xmlname) cmd = ['gccxml', filename, '-fxml=' + xmlname] cmd += ['-I' + i for i in includes] cmd += ['-D' + d for d in defines] cmd += ['-U' + u for u in undefines] cmd += extra_parser_args if verbose: print(" ".join(cmd)) if os.path.isfile(xmlname): f = io.open(xmlname, 'r+b') else: ensuredirs(xmlname) f = io.open(xmlname, 'w+b') subprocess.call(cmd) f.seek(0) root = etree.parse(f) f.close() return root # # clang parser #
@_memoize_parser
[docs]def clang_parse(filename, includes=(), defines=('XDRESS',), undefines=(), extra_parser_args=(), verbose=False, debug=False, builddir='build', language='c++', clang_includes=()): """Use clang to parse a file. Parameters ---------- filename : str The path to the file. includes: list of str, optional The list of extra include directories to search for header files. defines: list of str, optional The list of extra macro definitions to apply. undefines: list of str, optional The list of extra macro undefinitions to apply. extra_parser_args : list of str, optional Further command line arguments to pass to the parser. verbose : bool, optional Flag to display extra information while describing the class. Ignored. debug : bool, optional Flag to enable/disable debug mode. Currently ignored. builddir : str, optional Ignored. Exists only for compatibility with gccxml_describe. language : str Valid language flag. clang_includes : list of str, optional clang-specific includes paths. Returns ------- tu : libclang TranslationUnit object """ index = cindex.Index.create() options = cindex.TranslationUnit.PARSE_SKIP_FUNCTION_BODIES tu = index.parse(filename, options=options, args=['-x', language] + ['-I' + i for i in tuple(clang_includes) + tuple(includes)] + ['-D' + d for d in defines] + ['-U' + u for u in undefines] + list(extra_parser_args)) # Check for fatal errors failed = False for d in tu.diagnostics: if d.severity >= cindex.Diagnostic.Error: print(d.format()) failed = True if failed: raise RuntimeError('failed to parse {0}'.format(filename)) return tu # # pycparser Describers #
@_memoize_parser
[docs]def pycparser_parse(filename, includes=(), defines=('XDRESS',), undefines=(), extra_parser_args=(), verbose=False, debug=False, builddir='build', clang_includes=()): """Use pycparser to parse a file. This functions is automatically memoized. Parameters ---------- filename : str The path to the file. includes : list of str, optional The list of extra include directories to search for header files. defines : list of str, optional The list of extra macro definitions to apply. undefines : list of str, optional The list of extra macro undefinitions to apply. extra_parser_args : list of str, optional Further command line arguments to pass to the parser. verbose : bool, optional Flag to display extra information while describing the class. debug : bool, optional Flag to enable/disable debug mode. builddir : str, optional Location of -- often temporary -- build files. clang_includes : ignored Returns ------- root : AST A pycparser abstract syntax tree. """ pklgzname = filename.replace(os.path.sep, '_').rsplit('.', 1)[0] + '.pkl.gz' pklgzname = os.path.join(builddir, pklgzname) if os.path.isfile(pklgzname): with gzip.open(pklgzname, 'rb') as f: root = pickle.loads(f.read()) return root kwargs = {'cpp_args': [r'-D__attribute__(x)=', # Workaround for GNU libc r'-D__asm__(x)=', r'-D__const=', r'-D__builtin_va_list=int', # just fake this r'-D__restrict=', r'-D__extension__=', r'-D__inline__=', r'-D__inline=', ]} kwargs['cpp_args'] += ['-I' + i for i in includes] kwargs['cpp_args'] += ['-D' + d for d in defines] kwargs['cpp_args'] += ['-U' + u for u in undefines] kwargs['cpp_args'] += extra_parser_args root = pycparser.parse_file(filename, use_cpp=True, **kwargs) with gzip.open(pklgzname, 'wb') as f: f.write(pickle.dumps(root, pickle.HIGHEST_PROTOCOL)) return root # # General utilities #
[docs]def pick_parser(file_or_lang, parsers): """Determines the parse to use for a file. Parameters ---------- file_or_lang : str The path to the file OR a valid language flag. parsers : str, list, or dict, optional The parser / AST to use to use for the file. Currently 'clang', 'gccxml', and 'pycparser' are supported, though others may be implemented in the future. If this is a string, then this parser is used. If this is a list, this specifies the parser order to use based on availability. If this is a dictionary, it specifies the order to use parser based on language, i.e. ``{'c' ['pycparser', 'gccxml'], 'c++': ['gccxml', 'pycparser']}``. Returns ------- parser : str The name of the parser to use. """ if isinstance(parsers, basestring): parser = parsers elif isinstance(parsers, collections.Sequence): ps = [p for p in parsers if PARSERS_AVAILABLE[p.lower()]] if len(ps) == 0: msg = "Parsers not available: {0}".format(", ".join(parsers)) raise RuntimeError(msg) parser = ps[0].lower() elif isinstance(parsers, collections.Mapping): lang = file_or_lang if file_or_lang in parsers \ else guess_language(file_or_lang) ps = parsers[lang] ps = [p for p in ps if PARSERS_AVAILABLE[p.lower()]] if len(ps) == 0: msg = "{0} parsers not available: {1}" msg = msg.format(lang.capitalize(), ", ".join(parsers)) raise RuntimeError(msg) parser = ps[0].lower() else: raise ValueError("type of parsers not intelligible") return parser
def _pformat_etree_inplace(elem, level=0): i = "\n" + level*" " if len(elem): if not elem.text or not elem.text.strip(): elem.text = i + " " if not elem.tail or not elem.tail.strip(): elem.tail = i for elem in elem: _pformat_etree_inplace(elem, level+1) if not elem.tail or not elem.tail.strip(): elem.tail = i else: if level and (not elem.tail or not elem.tail.strip()): elem.tail = i
[docs]def dumpast(filename, parsers, sourcedir, includes=(), defines=('XDRESS',), undefines=(), verbose=False, debug=False, builddir='build'): """Prints an abstract syntax tree to stdout.""" if not os.path.isfile(filename): filename = os.path.join(sourcedir, filename) if not os.path.isfile(filename): sys.exit(filename + " is not a regular file") parser = pick_parser(filename, parsers) if parser == 'pycparser': root = pycparser_parse(filename, includes=includes, defines=defines, undefines=undefines, verbose=verbose, debug=debug, builddir=builddir) root.show() elif parser == 'gccxml': root = gccxml_parse(filename, includes=includes, defines=defines, undefines=undefines, verbose=verbose, debug=debug, builddir=builddir) if HAVE_LXML: print(etree.tostring(root, pretty_print=True)) else: _pformat_etree_inplace(root) print(etree.tostring(root)) else: sys.exit(parser + " is not a valid parser") # # Plugin #
[docs]class ParserPlugin(Plugin): """This is a base plugin for tools that wish to wrap parsing. It should not be used directly.""" requires = ('xdress.base',) """This plugin requires 'xdress.base'.""" defaultrc = utils.RunControl( includes=['.'], clang_includes=(), defines=["XDRESS"], undefines=[], variables=(), functions=(), classes=(), parsers={'c': ['pycparser', 'clang', 'gccxml'], 'c++':['clang', 'gccxml', 'pycparser']}, clear_parser_cache_period=50, dumpast=NotSpecified, extra_parser_args=(), ) rcupdaters = {'includes': (lambda old, new: list(new) + list(old)), 'extra_parser_args': (lambda old, new: list(old) + list(new)), } rcdocs = { 'includes': "Additional include directories", 'defines': "Set additional macro definitions", 'undefines': "Unset additional macro definitions", 'variables': ("A list of variable names in sequence, mapping, " "or apiname format"), 'functions': ("A list of function names in sequence, mapping, " "or apiname format"), 'classes': ("A list of class names in sequence, mapping, " "or apiname format"), 'parsers': "Parser(s) name, list, or dict", 'clear_parser_cache_period': ("Number of parser calls to perform before " "clearing the internal cache. This prevents " "nasty memory overflow issues."), 'dumpast': "Prints the abstract syntax tree of a file.", 'clang_includes': "clang-specific include paths", 'extra_parser_args': "Further command line arguments to pass to the parser" } def update_argparser(self, parser): rcdocs = self.rcdocs() if callable(self.rcdocs) else self.rcdocs parser.add_argument('-I', '--includes', action='store', dest='includes', nargs="+", help=rcdocs["includes"]) parser.add_argument('-D', '--defines', action='append', dest='defines', nargs="+", help=rcdocs["defines"]) parser.add_argument('-U', '--undefines', action='append', dest='undefines', nargs="+", type=str, help=rcdocs["undefines"]) parser.add_argument('-p', action='store', dest='parsers', help=rcdocs["parsers"]) parser.add_argument('--clear-parser-cache-period', action='store', dest='clear_parser_cache_period', type=int, help=rcdocs["clear_parser_cache_period"]) parser.add_argument('--dumpast', action='store', dest='dumpast', metavar="FILE", help=rcdocs["dumpast"]) parser.add_argument('--clang-includes', action='store', dest='clang_includes', nargs="+", help=rcdocs["clang_includes"]) parser.add_argument('--extra-parser-args', action='store', dest='extra_parser_args', nargs="+", help=rcdocs["extra_parser_args"])
[docs] def setup(self, rc): """Remember to call super() on subclasses!""" if isinstance(rc.parsers, basestring): if '[' in rc.parsers or '{' in rc.parsers: rc.parsers = eval(rc.parsers) # This should go last if rc.dumpast is not NotSpecified: dumpast(rc.dumpast, rc.parsers, rc.sourcedir, includes=rc.includes, defines=rc.defines, undefines=rc.undefines, verbose=rc.verbose, debug=rc.debug, builddir=rc.builddir, clang_includes=rc.clang_includes) sys.exit()
def execute(self, rc): raise TypeError("ParserPlugin is not a complete plugin. Do not use directly")
[docs] def report_debug(self, rc): """Remember to call super() on subclasses!""" msg = 'Autodescriber parsers available:\n\n{0}\n\n' msg = msg.format(pformat(PARSERS_AVAILABLE)) return msg