cef/tools/cef_api_hash.py

# Copyright (c) 2013 The Chromium Embedded Framework Authors. All rights
# reserved. Use of this source code is governed by a BSD-style license that
# can be found in the LICENSE file.

from __future__ import absolute_import
from __future__ import print_function
from clang_util import clang_eval
from file_util import *
import hashlib
import itertools
import os
import re
import string
import sys
import time
from version_util import EXP_VERSION

# Determines string type for python 2 and python 3.
if sys.version_info[0] == 3:
  string_type = str
else:
  string_type = basestring


def _run_clang_eval(filename, content, api_version, added_defines, verbose):
  # Add a tag so we know where the header-specific output begins.
  tag = 'int begin_includes_tag;\n'
  find = '#ifdef __cplusplus\nextern "C" {'
  pos = content.find(find)
  assert pos > 0, filename
  content = content[0:pos] + tag + content[pos:]

  defines = [
      # Makes sure CEF_EXPORT is defined.
      'USING_CEF_SHARED',

      # Avoid include of generated headers.
      'GENERATING_CEF_API_HASH',
  ]

  if filename.find('test/') >= 0:
    # Avoids errors parsing test includes.
    defines.append('UNIT_TEST')

  # Not the experimental version.
  api_version = int(api_version)
  if api_version != EXP_VERSION:
    # Specify the exact version.
    defines.append('CEF_API_VERSION=%d' % api_version)

  if not added_defines is None:
    defines.extend(added_defines)

  includes = [
      # Includes relative to the 'src/cef' directory.
      '.',
      # Includes relative to the 'src' directory.
      '..',
  ]

  result = clang_eval(
      filename,
      content,
      defines=defines,
      includes=includes,
      as_cpp=False,
      verbose=verbose)
  if result is None:
    return None

  pos = result.find(tag)
  assert pos > 0, filename
  result = result[pos + len(tag):]

  replacements = [
      # Undo substitutions from cef_export.h
      ['__declspec(dllimport)', 'CEF_EXPORT'],
      ['__attribute__((visibility("default")))', 'CEF_EXPORT'],
      ['__stdcall', ''],
  ]

  for find, replace in replacements:
    result = result.replace(find, replace)

  return result


class cef_api_hash:
  """ CEF API hash calculator """

  def __init__(self, headerdir, verbose=False):
    if headerdir is None or len(headerdir) == 0:
      raise AssertionError("headerdir is not specified")

    self.__headerdir = headerdir
    self.__verbose = verbose

    self.platforms = ["windows", "mac", "linux"]

    cef_dir = os.path.abspath(os.path.join(self.__headerdir, os.pardir))

    # Read the variables list from the autogenerated cef_paths.gypi file.
    cef_paths = eval_file(os.path.join(cef_dir, 'cef_paths.gypi'))
    cef_paths = cef_paths['variables']

    # Read the variables list from the manually edited cef_paths2.gypi file.
    cef_paths2 = eval_file(os.path.join(cef_dir, 'cef_paths2.gypi'))
    cef_paths2 = cef_paths2['variables']

    # Excluded files (paths relative to the include/ directory).
    excluded_files = []

    # List of platform-specific C API include/ files.
    self.platform_files = {
        "windows":
            self.__get_filenames(cef_dir, cef_paths2['includes_win_capi'],
                                 excluded_files),
        "mac":
            self.__get_filenames(cef_dir, cef_paths2['includes_mac_capi'],
                                 excluded_files),
        "linux":
            self.__get_filenames(cef_dir, cef_paths2['includes_linux_capi'],
                                 excluded_files)
    }

    # List of all C API include/ files.
    paths = cef_paths2['includes_capi'] + cef_paths2['includes_common_capi'] + \
        cef_paths2['includes_linux_capi'] + cef_paths2['includes_mac_capi'] + \
        cef_paths2['includes_win_capi'] + cef_paths['autogen_capi_includes']
    self.filenames = self.__get_filenames(cef_dir, paths, excluded_files)

    self.filecontents = {}
    self.filecontentobjs = {}

    # Cache values that will not change between calls to calculate().
    for filename in self.filenames:
      if self.__verbose:
        print("Processing " + filename + "...")

      assert not filename in self.filecontents, filename
      assert not filename in self.filecontentobjs, filename

      content = read_file(os.path.join(self.__headerdir, filename), True)
      content_objects = None

      # Parse cef_string.h happens in special case: grab only defined CEF_STRING_TYPE_xxx declaration
      if filename == "internal/cef_string.h":
        content_objects = self.__parse_string_type(content)
      elif content.find('#if CEF_API') >= 0:
        # Needs to be passed to clang with version-specific defines.
        self.filecontents[filename] = content
      else:
        content_objects = self.__parse_objects(content)

      if not content_objects is None:
        self.__prepare_objects(filename, content_objects)
        self.filecontentobjs[filename] = content_objects

  def calculate(self, api_version, debug_dir=None, added_defines=None):
    debug_enabled = not (debug_dir is None) and len(debug_dir) > 0

    objects = []
    for filename in self.filenames:
      if self.__verbose:
        print("Processing " + filename + "...")

      content = self.filecontents.get(filename, None)
      if not content is None:
        assert content.find('#if CEF_API') >= 0, filename
        content = _run_clang_eval(filename, content, api_version, added_defines,
                                  self.__verbose)
        if content is None:
          sys.stderr.write(
              'ERROR: Failed to compute API hash for %s\n' % filename)
          return False
        if debug_enabled:
          self.__write_debug_file(
              debug_dir, 'clang-' + filename.replace('/', '-'), content)

        # content must always start with newline as required by __parse_objects()
        content_objects = self.__parse_objects('\n' + content)
        self.__prepare_objects(filename, content_objects)
      else:
        content_objects = self.filecontentobjs.get(filename, None)

      assert not content_objects is None, filename
      objects.extend(content_objects)

    # objects will be sorted including filename, to make stable universal hashes
    objects = sorted(objects, key=lambda o: o["name"] + "@" + o["filename"])

    if debug_enabled:
      namelen = max([len(o["name"]) for o in objects])
      filenamelen = max([len(o["filename"]) for o in objects])
      dumpsig = []
      for o in objects:
        dumpsig.append(
            format(o["name"], str(namelen) + "s") + "|" + format(
                o["filename"], "" + str(filenamelen) + "s") + "|" + o["text"])
      self.__write_debug_file(debug_dir, "objects.txt", dumpsig)

    revisions = {}

    for platform in itertools.chain(["universal"], self.platforms):
      sig = self.__get_final_sig(objects, platform)
      if debug_enabled:
        self.__write_debug_file(debug_dir, platform + ".sig", sig)
      revstr = hashlib.sha1(sig.encode('utf-8')).hexdigest()
      revisions[platform] = revstr

    return revisions

  def __parse_objects(self, content):
    """ Returns array of objects in content file. """
    objects = []
    content = re.sub(r"//.*\n", "", content)

    # function declarations
    for m in re.finditer(
        r"\nCEF_EXPORT\s+?.*?\s+?(\w+)\s*?\(.*?\)\s*?;",
        content,
        flags=re.DOTALL):
      object = {"name": m.group(1), "text": m.group(0).strip()}
      objects.append(object)

    # structs
    for m in re.finditer(
        r"\ntypedef\s+?struct\s+?(\w+)\s+?\{.*?\}\s+?(\w+)\s*?;",
        content,
        flags=re.DOTALL):
      text = m.group(0).strip()
      # remove 'CEF_CALLBACK' to normalize cross-platform clang output
      text = text.replace('CEF_CALLBACK', '')
      object = {"name": m.group(2), "text": text}
      objects.append(object)

    # enums
    for m in re.finditer(
        r"\ntypedef\s+?enum\s+?\{.*?\}\s+?(\w+)\s*?;", content,
        flags=re.DOTALL):
      object = {"name": m.group(1), "text": m.group(0).strip()}
      objects.append(object)

    # typedefs
    for m in re.finditer(r"\ntypedef\s+?.*?\s+(\w+);", content, flags=0):
      object = {"name": m.group(1), "text": m.group(0).strip()}
      objects.append(object)

    return objects

  def __prepare_objects(self, filename, objects):
    platforms = list(
        [p for p in self.platforms if self.__is_platform_filename(filename, p)])
    for o in objects:
      o["text"] = self.__prepare_text(o["text"])
      o["platforms"] = platforms
      o["filename"] = filename

  def __parse_string_type(self, content):
    """ Grab defined CEF_STRING_TYPE_xxx """
    objects = []
    for m in re.finditer(
        r"\n\s*?#\s*?define\s+?(CEF_STRING_TYPE_\w+)\s+?.*?\n",
        content,
        flags=0):
      object = {
          "name": m.group(1),
          "text": m.group(0),
      }
      objects.append(object)
    return objects

  def __prepare_text(self, text):
    text = text.strip()
    text = re.sub(r"\s+", " ", text)
    text = re.sub(r"\(\s+", "(", text)
    return text

  def __get_final_sig(self, objects, platform):
    sig = []

    for o in objects:
      if platform == "universal" or platform in o["platforms"]:
        sig.append(o["text"])

    return "\n".join(sig)

  def __get_filenames(self, cef_dir, paths, excluded_files):
    """ Returns file names to be processed, relative to headerdir """
    filenames = [
        os.path.relpath(os.path.join(cef_dir, filename),
                        self.__headerdir).replace('\\', '/').lower()
        for filename in paths
    ]

    if len(excluded_files) == 0:
      return filenames

    return [
        filename for filename in filenames if not filename in excluded_files
    ]

  def __is_platform_filename(self, filename, platform):
    if platform == "universal":
      return True
    if not platform in self.platform_files:
      return False
    listed = False
    for p in self.platforms:
      if filename in self.platform_files[p]:
        if p == platform:
          return True
        else:
          listed = True
    return not listed

  def __write_debug_file(self, debug_dir, filename, content):
    make_dir(debug_dir)
    outfile = os.path.join(debug_dir, filename)
    dir = os.path.dirname(outfile)
    make_dir(dir)
    if not isinstance(content, string_type):
      content = "\n".join(content)
    write_file(outfile, content)


if __name__ == "__main__":
  from optparse import OptionParser
  import time

  disc = """
    This utility calculates CEF API hash.
    """

  parser = OptionParser(description=disc)
  parser.add_option(
      '--cpp-header-dir',
      dest='cppheaderdir',
      metavar='DIR',
      help='input directory for C++ header files [required]')
  parser.add_option(
      '--debug-dir',
      dest='debugdir',
      metavar='DIR',
      help='intermediate directory for easy debugging')
  parser.add_option(
      '-v',
      '--verbose',
      action='store_true',
      dest='verbose',
      default=False,
      help='output detailed status information')
  (options, args) = parser.parse_args()

  # the cppheader option is required
  if options.cppheaderdir is None:
    parser.print_help(sys.stdout)
    sys.exit()

  # calculate
  c_start_time = time.time()

  calc = cef_api_hash(options.cppheaderdir, options.debugdir, options.verbose)
  revisions = calc.calculate(api_version=EXP_VERSION)

  c_completed_in = time.time() - c_start_time

  if bool(revisions):
    print("{")
    for k in sorted(revisions.keys()):
      print(format("\"" + k + "\"", ">12s") + ": \"" + revisions[k] + "\"")
    print("}")

  # print
  # print 'Completed in: ' + str(c_completed_in)
  # print

  # print "Press any key to continue...";
  # sys.stdin.readline();