Files
cef/tools/cef_api_hash.py
Marshall Greenblatt 40fef43488 tools: Fix assert when all file contents are excluded (see #3836)
The full contents of a file may be excluded at certain API versions.
2025-03-07 11:59:43 -05:00

406 lines
14 KiB
Python

# Copyright (c) 2013 The Chromium Embedded Framework Authors. All rights
# reserved. Use of this source code is governed by a BSD-style license that
# can be found in the LICENSE file.
from __future__ import absolute_import
from __future__ import print_function
from typing import Dict, List
from clang_util import clang_eval
from file_util import *
import hashlib
import os
import re
import sys
from version_util import EXP_VERSION
API_TOKEN = '#if CEF_API'
OS_TOKEN = '#if defined(OS_'
INCLUDE_START_MARKER = 'int begin_includes_tag;\n'
INCLUDE_DIRS = [
'.', # Includes relative to the 'src/cef' directory.
'..', # Includes relative to the 'src' directory.
]
PLATFORM_DEFINES = {
'windows': ['OS_WIN', 'ARCH_CPU_X86_64'],
'mac': ['OS_MAC', 'OS_POSIX', 'ARCH_CPU_ARM64'],
'linux': ['OS_LINUX', 'OS_POSIX', 'CEF_X11', 'ARCH_CPU_X86_64'],
}
SYSTEM_INCLUDES_PATTERN = re.compile(r'(#include <[^>]+>)')
FUNCTION_DECLARATION_PATTERN = re.compile(
r'\nCEF_EXPORT\s+?.*?\s+?(\w+)\s*?\(.*?\)\s*?;', re.DOTALL)
STRUCT_PATTERN = re.compile(
r'\ntypedef\s+?struct\s+?(\w+)\s+?\{.*?\}\s+?(\w+)\s*?;', re.DOTALL)
ENUM_PATTERN = re.compile(r'\ntypedef\s+?enum\s+?\{.*?\}\s+?(\w+)\s*?;',
re.DOTALL)
TYPEDEF_PATTERN = re.compile(r'\ntypedef\s+?.*?\s+(\w+);')
WHITESPACE_PATTERN = re.compile(r'\s+')
PARENTHESIS_PATTERN = re.compile(r'\(\s+')
SINGLE_LINE_COMMENT_PATTERN = re.compile(r'//.*\n')
CEF_STRING_TYPE_PATTERN = re.compile(
r'\n\s*?#\s*?define\s+?(CEF_STRING_TYPE_\w+)\s+?.*?\n')
DEFINE_PATTERN = re.compile(r'#define\s+?.*?')
def _prepare_content(content: str) -> str:
"""
Add a marker to the content to indicate where the header-specific output
begins and replace system includes with a placeholder
"""
find = '#ifdef __cplusplus\nextern "C" {'
pos = content.find(find)
assert pos > 0, f'Cannot find "{find}" in {content}'
content = content[0:pos] + INCLUDE_START_MARKER + content[pos:]
content = DEFINE_PATTERN.sub('//DEFINE_PLACEHOLDER//', content)
return SYSTEM_INCLUDES_PATTERN.sub('//INCLUDE_PLACEHOLDER//', content)
def _process_result(result: str) -> str:
"""
Remove the non header-specific output and undo substitutions from cef_export.h
"""
pos = result.find(INCLUDE_START_MARKER)
if pos == -1:
return result
result = result[pos + len(INCLUDE_START_MARKER):]
replacements = [
['__declspec(dllimport)', 'CEF_EXPORT'],
['__attribute__((visibility("default")))', 'CEF_EXPORT'],
['__stdcall', ''],
]
for find, replace in replacements:
result = result.replace(find, replace)
# Must always start with newline as required by _parse_objects()
return '\n' + result
def _get_defines(filename: str, added_defines: list) -> List[str]:
defines = [
# Makes sure CEF_EXPORT is defined.
'USING_CEF_SHARED',
# Avoid include of generated headers.
'GENERATING_CEF_API_HASH',
]
# Avoids errors parsing test includes.
if filename.find('test/') >= 0:
defines.append('UNIT_TEST')
defines.extend(added_defines)
return defines
def parse_versioned_content(filename: str,
content: str,
api_version: str,
added_defines: list,
debug_dir,
verbose) -> list:
"""
Parse the header file content using clang with the specified API version
Used for files that are version-specific but not platform-specific
"""
content = _prepare_content(content)
defines = _get_defines(filename, added_defines)
if api_version != EXP_VERSION:
# Specify the exact version.
defines.append(f'CEF_API_VERSION={api_version}')
result = clang_eval(filename, content, defines, INCLUDE_DIRS, verbose)
assert result, f'clang failed to eval {filename} with {api_version=}'
result = _process_result(result)
if debug_dir:
_write_debug_file(debug_dir, 'clang-' + filename.replace('/', '-'), result)
return _parse_objects(filename, result, 'universal')
def parse_platformed_content(filename: str,
content: str,
debug_dir,
verbose,
api_version,
added_defines: list) -> list:
"""
Parse the header file content using clang for every supported platform
with the specified API version. Used for files that are both version-specific
and platform-specific.
"""
content = _prepare_content(content)
defines = _get_defines(filename, added_defines)
if api_version and api_version != EXP_VERSION:
# Specify the exact version.
defines.append(f'CEF_API_VERSION={api_version}')
content_objects = []
for platform, platform_defines in PLATFORM_DEFINES.items():
result = clang_eval(filename, content, defines + platform_defines,
INCLUDE_DIRS, verbose)
assert result, f'clang failed to eval {filename} on {platform=}'
result = _process_result(result)
if debug_dir:
_write_debug_file(
debug_dir, f'clang-{platform}-' + filename.replace('/', '-'), result)
objects = _parse_objects(filename, result, platform)
content_objects.extend(objects)
return content_objects
def _write_debug_file(debug_dir, filename, content) -> None:
make_dir(debug_dir)
outfile = os.path.join(debug_dir, filename)
dir = os.path.dirname(outfile)
make_dir(dir)
if not isinstance(content, str):
content = '\n'.join(content)
write_file(outfile, content)
def _parse_objects(filename: str, content: str, platform: str) -> list:
objects = []
content = SINGLE_LINE_COMMENT_PATTERN.sub('', content)
for m in FUNCTION_DECLARATION_PATTERN.finditer(content):
objects.append({
'filename': filename,
'name': m.group(1),
'platform': platform,
'text': _prepare_text(m.group(0))
})
for m in STRUCT_PATTERN.finditer(content):
# Remove 'CEF_CALLBACK' to normalize cross-platform clang output
objects.append({
'filename': filename,
'name': m.group(2),
'platform': platform,
'text': _prepare_text(m.group(0).replace('CEF_CALLBACK', ''))
})
for m in ENUM_PATTERN.finditer(content):
objects.append({
'filename': filename,
'name': m.group(1),
'platform': platform,
'text': _prepare_text(m.group(0))
})
for m in TYPEDEF_PATTERN.finditer(content):
if m.group(1) == 'char16_t':
# Skip char16_t typedefs to avoid platform-specific differences.
continue
objects.append({
'filename': filename,
'name': m.group(1),
'platform': platform,
'text': _prepare_text(m.group(0))
})
return objects
def _prepare_text(text: str) -> str:
""" Normalize text for hashing. """
text = WHITESPACE_PATTERN.sub(' ', text.strip())
return PARENTHESIS_PATTERN.sub('(', text)
def _parse_string_type(filename: str, content: str) -> list:
""" Grab defined CEF_STRING_TYPE_xxx """
objects = []
for m in CEF_STRING_TYPE_PATTERN.finditer(content):
objects.append({
'filename': filename,
'name': m.group(1),
'text': _prepare_text(m.group(0)),
'platform': 'universal'
})
return objects
def _get_final_sig(objects, platform) -> str:
return '\n'.join(o['text'] for o in objects
if o['platform'] == 'universal' or o['platform'] == platform)
def _get_filenames(header_dir) -> List[str]:
""" Return file names to be processed, relative to header_dir """
cef_dir = os.path.abspath(os.path.join(header_dir, os.pardir))
# Read the variables list from the autogenerated cef_paths.gypi file.
cef_paths = eval_file(os.path.join(cef_dir, 'cef_paths.gypi'))
cef_paths = cef_paths['variables']
# Read the variables list from the manually edited cef_paths2.gypi file.
cef_paths2 = eval_file(os.path.join(cef_dir, 'cef_paths2.gypi'))
cef_paths2 = cef_paths2['variables']
# List of all C API include/ files.
paths = cef_paths2['includes_capi'] + cef_paths2['includes_common_capi'] + \
cef_paths2['includes_linux_capi'] + cef_paths2['includes_mac_capi'] + \
cef_paths2['includes_win_capi'] + cef_paths['autogen_capi_includes']
return [
os.path.relpath(os.path.join(cef_dir, filename), header_dir).replace(
'\\', '/').lower() for filename in paths
]
def _save_objects_to_file(debug_dir: str, objects: list) -> None:
name_len = max([len(o['name']) for o in objects])
filename_len = max([len(o['filename']) for o in objects])
platform_len = max([len(o['platform']) for o in objects])
dump_sig = [
f"{o['name']:<{name_len}}|{o['filename']:<{filename_len}}|{o['platform']:<{platform_len}}|{o['text']}"
for o in objects
]
_write_debug_file(debug_dir, 'objects.txt', dump_sig)
class CefApiHasher:
""" CEF API hash calculator """
def __init__(self, header_dir, debug_dir, verbose=False):
if header_dir is None or len(header_dir) == 0:
raise AssertionError('header_dir is not specified')
self.filenames = _get_filenames(header_dir)
self.debug_dir = debug_dir
self.verbose = verbose
self.versioned_contents = {}
self.platformed_contents = {}
self.file_content_objs = {}
# Cache values that will not change between calls to calculate().
for filename in self.filenames:
self.print(f'Processing {filename} ...')
if filename in self.versioned_contents \
or filename in self.platformed_contents \
or filename in self.file_content_objs:
self.print(f'{filename} already processed, skipping...')
continue
content = read_file(os.path.join(header_dir, filename), normalize=True)
content_objects = None
is_platform_specific = OS_TOKEN in content
is_version_specific = API_TOKEN in content
# Special case for parsing cef_string.h:
# Only extract declarations of the form #define CEF_STRING_TYPE_xxx
if filename == 'internal/cef_string.h':
content_objects = _parse_string_type(filename, content)
elif is_platform_specific and not is_version_specific:
# Parse once and cache for all platforms
content_objects = parse_platformed_content(
filename,
content,
debug_dir,
verbose,
api_version=None,
added_defines=[])
elif is_platform_specific and is_version_specific:
# Needs to be passed to clang with version and platform defines.
self.platformed_contents[filename] = content
elif is_version_specific:
# Needs to be passed to clang with version defines.
self.versioned_contents[filename] = content
else:
content_objects = _parse_objects(filename, content, 'universal')
if content_objects is not None:
self.file_content_objs[filename] = content_objects
def calculate(self, api_version: str, added_defines: list) -> Dict[str, str]:
""" Calculate the API hash per platform for the specified API version """
debug_dir = os.path.join(self.debug_dir,
api_version) if self.debug_dir else None
objects = []
for filename in self.filenames:
self.print(f'Processing {filename}...')
if filename in self.versioned_contents:
content = self.versioned_contents.get(filename, None)
content_objects = parse_versioned_content(filename, content,
api_version, added_defines,
debug_dir, self.verbose)
elif filename in self.platformed_contents:
content = self.platformed_contents.get(filename, None)
content_objects = parse_platformed_content(filename, content, debug_dir,
self.verbose, api_version,
added_defines)
else:
content_objects = self.file_content_objs.get(filename, None)
# May be None if the full contents of the file are excluded at certain API versions.
if not content_objects is None:
objects.extend(content_objects)
# objects will be sorted including filename to make stable hashes
objects = sorted(objects, key=lambda o: f"{o['name']}@{o['filename']}")
if debug_dir:
_save_objects_to_file(debug_dir, objects)
hashes = {}
for platform in PLATFORM_DEFINES.keys():
sig = _get_final_sig(objects, platform)
if debug_dir:
_write_debug_file(debug_dir, f'{platform}.sig', sig)
hashes[platform] = hashlib.sha1(sig.encode('utf-8')).hexdigest()
return hashes
def print(self, msg):
if self.verbose:
print(msg)
if __name__ == '__main__':
from optparse import OptionParser
parser = OptionParser(description='This utility calculates CEF API hash')
parser.add_option(
'--cpp-header-dir',
dest='cpp_header_dir',
metavar='DIR',
help='input directory for C++ header files [required]')
parser.add_option(
'--debug-dir',
dest='debug_dir',
metavar='DIR',
help='intermediate directory for easy debugging')
parser.add_option(
'-v',
'--verbose',
action='store_true',
dest='verbose',
default=False,
help='output detailed status information')
(options, args) = parser.parse_args()
# the cpp_header_dir option is required
if options.cpp_header_dir is None:
parser.print_help(sys.stdout)
sys.exit()
calc = CefApiHasher(options.cpp_header_dir, options.debug_dir,
options.verbose)
revisions = calc.calculate(EXP_VERSION, [])
print('{')
for k in sorted(revisions.keys()):
print(format('"' + k + '"', '>12s') + ': "' + revisions[k] + '"')
print('}')