diff --git a/.style.cfg b/.style.cfg new file mode 100644 index 000000000..f39eb0375 --- /dev/null +++ b/.style.cfg @@ -0,0 +1,9 @@ +# Copyright (c) 2017 The Chromium Embedded Framework Authors. All rights +# reserved. Use of this source code is governed by a BSD-style license that +# can be found in the LICENSE file + +# Configuration settings for tools/fix_style.py +{ + # Directories containing these path components will be ignored. + 'ignore_directories': ['yapf'], +} diff --git a/.style.yapf b/.style.yapf new file mode 100644 index 000000000..de0c6a70f --- /dev/null +++ b/.style.yapf @@ -0,0 +1,2 @@ +[style] +based_on_style = chromium diff --git a/tools/fix_style.py b/tools/fix_style.py index 1456cafc7..b610c0dc2 100644 --- a/tools/fix_style.py +++ b/tools/fix_style.py @@ -5,13 +5,22 @@ import os, re, sys from clang_util import clang_format -from file_util import * +from file_util import eval_file, get_files, read_file, write_file from git_util import get_changed_files +from yapf_util import yapf_format -# Valid extensions for files we want to clang-format. -DEFAULT_LINT_WHITELIST_REGEX = r"(.*\.cpp|.*\.cc|.*\.h|.*\.mm)$" +# File extensions that can be formatted. +DEFAULT_LINT_WHITELIST_REGEX = r"(.*\.cpp|.*\.cc|.*\.h|.*\.java|.*\.mm|.*\.py)$" DEFAULT_LINT_BLACKLIST_REGEX = r"$^" +# Directories containing these path components will be ignored. +IGNORE_DIRECTORIES = [] + +# Script directory. +script_dir = os.path.dirname(__file__) +root_dir = os.path.join(script_dir, os.pardir) + + def msg(filename, status): if sys.platform == 'win32': # Use Unix path separator. @@ -27,14 +36,32 @@ def msg(filename, status): print "%-60s %s" % (filename, status) + updatect = 0 + + +def read_config(): + style_cfg = os.path.join(root_dir, ".style.cfg") + if os.path.exists(style_cfg): + config = eval_file(style_cfg) + if 'ignore_directories' in config: + global IGNORE_DIRECTORIES + IGNORE_DIRECTORIES = config['ignore_directories'] + + def update_file(filename): oldcontents = read_file(filename) if len(oldcontents) == 0: msg(filename, "empty") - return; + return + + if os.path.splitext(filename)[1] == ".py": + # Format Python files using YAPF. + newcontents = yapf_format(filename, oldcontents) + else: + # Format C/C++/ObjC/Java files using clang-format. + newcontents = clang_format(filename, oldcontents) - newcontents = clang_format(filename, oldcontents) if newcontents is None: raise Exception("Failed to process %s" % filename) @@ -47,7 +74,8 @@ def update_file(filename): msg(filename, "ok") return -def fix_style(filenames, white_list = None, black_list = None): + +def fix_style(filenames, white_list=None, black_list=None): """ Execute clang-format with the specified arguments. """ if not white_list: white_list = DEFAULT_LINT_WHITELIST_REGEX @@ -57,6 +85,16 @@ def fix_style(filenames, white_list = None, black_list = None): black_regex = re.compile(black_list) for filename in filenames: + # Ignore files from specific directories. + ignore = False + for dir_part in filename.split(os.sep): + if dir_part in IGNORE_DIRECTORIES: + msg(filename, "ignored") + ignore = True + break + if ignore: + continue + if filename.find('*') > 0: # Expand wildcards. filenames.extend(get_files(filename)) @@ -83,6 +121,7 @@ def fix_style(filenames, white_list = None, black_list = None): else: msg(filename, "skipped") + if __name__ == "__main__": if len(sys.argv) == 1: print "Usage: %s [file-path|git-hash|unstaged|staged] ..." % sys.argv[0] @@ -96,6 +135,9 @@ if __name__ == "__main__": print " staged\t\tProcess all staged files in the Git repo." sys.exit(1) + # Read the configuration file. + read_config() + # Process anything passed on the command-line. fix_style(sys.argv[1:]) print 'Done - Wrote %d files.' % updatect diff --git a/tools/yapf/LICENSE b/tools/yapf/LICENSE new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/tools/yapf/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/tools/yapf/README.cef b/tools/yapf/README.cef new file mode 100644 index 000000000..44e7cedb4 --- /dev/null +++ b/tools/yapf/README.cef @@ -0,0 +1,14 @@ +Name: yapf +Short Name: yapf +URL: https://github.com/google/yapf +Date: 28 May 2017 +Version: 0.16.2 +Revision: 9f168a12 +License: Apache 2.0 +License File: LICENSE + +Description: +A formatter for Python files. + +Local Modifications: +None diff --git a/tools/yapf/__main__.py b/tools/yapf/__main__.py new file mode 100644 index 000000000..88f1ec69e --- /dev/null +++ b/tools/yapf/__main__.py @@ -0,0 +1,16 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import yapf + +yapf.run_main() diff --git a/tools/yapf/yapf/__init__.py b/tools/yapf/yapf/__init__.py new file mode 100644 index 000000000..92580523c --- /dev/null +++ b/tools/yapf/yapf/__init__.py @@ -0,0 +1,303 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""YAPF. + +YAPF uses the algorithm in clang-format to figure out the "best" formatting for +Python code. It looks at the program as a series of "unwrappable lines" --- +i.e., lines which, if there were no column limit, we would place all tokens on +that line. It then uses a priority queue to figure out what the best formatting +is --- i.e., the formatting with the least penalty. + +It differs from tools like autopep8 and pep8ify in that it doesn't just look for +violations of the style guide, but looks at the module as a whole, making +formatting decisions based on what's the best format for each line. + +If no filenames are specified, YAPF reads the code from stdin. +""" +from __future__ import print_function + +import argparse +import logging +import os +import sys + +from yapf.yapflib import errors +from yapf.yapflib import file_resources +from yapf.yapflib import py3compat +from yapf.yapflib import style +from yapf.yapflib import yapf_api + +__version__ = '0.16.2' + + +def main(argv): + """Main program. + + Arguments: + argv: command-line arguments, such as sys.argv (including the program name + in argv[0]). + + Returns: + 0 if there were no changes, non-zero otherwise. + + Raises: + YapfError: if none of the supplied files were Python files. + """ + parser = argparse.ArgumentParser(description='Formatter for Python code.') + parser.add_argument( + '-v', + '--version', + action='store_true', + help='show version number and exit') + + diff_inplace_group = parser.add_mutually_exclusive_group() + diff_inplace_group.add_argument( + '-d', + '--diff', + action='store_true', + help='print the diff for the fixed source') + diff_inplace_group.add_argument( + '-i', + '--in-place', + action='store_true', + help='make changes to files in place') + + lines_recursive_group = parser.add_mutually_exclusive_group() + lines_recursive_group.add_argument( + '-r', + '--recursive', + action='store_true', + help='run recursively over directories') + lines_recursive_group.add_argument( + '-l', + '--lines', + metavar='START-END', + action='append', + default=None, + help='range of lines to reformat, one-based') + + parser.add_argument( + '-e', + '--exclude', + metavar='PATTERN', + action='append', + default=None, + help='patterns for files to exclude from formatting') + parser.add_argument( + '--style', + action='store', + help=('specify formatting style: either a style name (for example "pep8" ' + 'or "google"), or the name of a file with style settings. The ' + 'default is pep8 unless a %s or %s file located in one of the ' + 'parent directories of the source file (or current directory for ' + 'stdin)' % (style.LOCAL_STYLE, style.SETUP_CONFIG))) + parser.add_argument( + '--style-help', + action='store_true', + help=('show style settings and exit; this output can be ' + 'saved to .style.yapf to make your settings ' + 'permanent')) + parser.add_argument( + '--no-local-style', + action='store_true', + help="don't search for local style definition") + parser.add_argument('--verify', action='store_true', help=argparse.SUPPRESS) + parser.add_argument( + '-p', + '--parallel', + action='store_true', + help=('Run yapf in parallel when formatting multiple files. Requires ' + 'concurrent.futures in Python 2.X')) + + parser.add_argument('files', nargs='*') + args = parser.parse_args(argv[1:]) + + if args.version: + print('yapf {}'.format(__version__)) + return 0 + + if args.style_help: + style.SetGlobalStyle(style.CreateStyleFromConfig(args.style)) + print('[style]') + for option, docstring in sorted(style.Help().items()): + for line in docstring.splitlines(): + print('#', line and ' ' or '', line, sep='') + print(option.lower(), '=', style.Get(option), sep='') + print() + return 0 + + if args.lines and len(args.files) > 1: + parser.error('cannot use -l/--lines with more than one file') + + lines = _GetLines(args.lines) if args.lines is not None else None + if not args.files: + # No arguments specified. Read code from stdin. + if args.in_place or args.diff: + parser.error('cannot use --in-place or --diff flags when reading ' + 'from stdin') + + original_source = [] + while True: + try: + # Use 'raw_input' instead of 'sys.stdin.read', because otherwise the + # user will need to hit 'Ctrl-D' more than once if they're inputting + # the program by hand. 'raw_input' throws an EOFError exception if + # 'Ctrl-D' is pressed, which makes it easy to bail out of this loop. + original_source.append(py3compat.raw_input()) + except EOFError: + break + + style_config = args.style + if style_config is None and not args.no_local_style: + style_config = file_resources.GetDefaultStyleForDir(os.getcwd()) + + source = [line.rstrip() for line in original_source] + reformatted_source, _ = yapf_api.FormatCode( + py3compat.unicode('\n'.join(source) + '\n'), + filename='', + style_config=style_config, + lines=lines, + verify=args.verify) + file_resources.WriteReformattedCode('', reformatted_source) + return 0 + + files = file_resources.GetCommandLineFiles(args.files, args.recursive, + args.exclude) + if not files: + raise errors.YapfError('Input filenames did not match any python files') + + FormatFiles( + files, + lines, + style_config=args.style, + no_local_style=args.no_local_style, + in_place=args.in_place, + print_diff=args.diff, + verify=args.verify, + parallel=args.parallel) + return 0 + + +def FormatFiles(filenames, + lines, + style_config=None, + no_local_style=False, + in_place=False, + print_diff=False, + verify=True, + parallel=False): + """Format a list of files. + + Arguments: + filenames: (list of unicode) A list of files to reformat. + lines: (list of tuples of integers) A list of tuples of lines, [start, end], + that we want to format. The lines are 1-based indexed. This argument + overrides the 'args.lines'. It can be used by third-party code (e.g., + IDEs) when reformatting a snippet of code. + style_config: (string) Style name or file path. + no_local_style: (string) If style_config is None don't search for + directory-local style configuration. + in_place: (bool) Modify the files in place. + print_diff: (bool) Instead of returning the reformatted source, return a + diff that turns the formatted source into reformatter source. + verify: (bool) True if reformatted code should be verified for syntax. + parallel: (bool) True if should format multiple files in parallel. + + Returns: + True if the source code changed in any of the files being formatted. + """ + changed = False + if parallel: + import multiprocessing # pylint: disable=g-import-not-at-top + import concurrent.futures # pylint: disable=g-import-not-at-top + workers = min(multiprocessing.cpu_count(), len(filenames)) + with concurrent.futures.ProcessPoolExecutor(workers) as executor: + future_formats = [ + executor.submit(_FormatFile, filename, lines, style_config, + no_local_style, in_place, print_diff, verify) + for filename in filenames + ] + for future in concurrent.futures.as_completed(future_formats): + changed |= future.result() + else: + for filename in filenames: + changed |= _FormatFile(filename, lines, style_config, no_local_style, + in_place, print_diff, verify) + return changed + + +def _FormatFile(filename, + lines, + style_config=None, + no_local_style=False, + in_place=False, + print_diff=False, + verify=True): + logging.info('Reformatting %s', filename) + if style_config is None and not no_local_style: + style_config = ( + file_resources.GetDefaultStyleForDir(os.path.dirname(filename))) + try: + reformatted_code, encoding, has_change = yapf_api.FormatFile( + filename, + in_place=in_place, + style_config=style_config, + lines=lines, + print_diff=print_diff, + verify=verify, + logger=logging.warning) + if not in_place and reformatted_code: + file_resources.WriteReformattedCode(filename, reformatted_code, in_place, + encoding) + return has_change + except SyntaxError as e: + e.filename = filename + raise + + +def _GetLines(line_strings): + """Parses the start and end lines from a line string like 'start-end'. + + Arguments: + line_strings: (array of string) A list of strings representing a line + range like 'start-end'. + + Returns: + A list of tuples of the start and end line numbers. + + Raises: + ValueError: If the line string failed to parse or was an invalid line range. + """ + lines = [] + for line_string in line_strings: + # The 'list' here is needed by Python 3. + line = list(map(int, line_string.split('-', 1))) + if line[0] < 1: + raise errors.YapfError('invalid start of line range: %r' % line) + if line[0] > line[1]: + raise errors.YapfError('end comes before start in line range: %r', line) + lines.append(tuple(line)) + return lines + + +def run_main(): # pylint: disable=invalid-name + try: + sys.exit(main(sys.argv)) + except errors.YapfError as e: + sys.stderr.write('yapf: ' + str(e) + '\n') + sys.exit(1) + + +if __name__ == '__main__': + run_main() diff --git a/tools/yapf/yapf/yapflib/__init__.py b/tools/yapf/yapf/yapflib/__init__.py new file mode 100644 index 000000000..80217ac4a --- /dev/null +++ b/tools/yapf/yapf/yapflib/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tools/yapf/yapf/yapflib/blank_line_calculator.py b/tools/yapf/yapf/yapflib/blank_line_calculator.py new file mode 100644 index 000000000..bcd7a867a --- /dev/null +++ b/tools/yapf/yapf/yapflib/blank_line_calculator.py @@ -0,0 +1,183 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Calculate the number of blank lines between top-level entities. + +Calculates how many blank lines we need between classes, functions, and other +entities at the same level. + + CalculateBlankLines(): the main function exported by this module. + +Annotations: + newlines: The number of newlines required before the node. +""" + +from lib2to3 import pytree + +from yapf.yapflib import py3compat +from yapf.yapflib import pytree_utils +from yapf.yapflib import pytree_visitor + +_NO_BLANK_LINES = 1 +_ONE_BLANK_LINE = 2 +_TWO_BLANK_LINES = 3 + +_PYTHON_STATEMENTS = frozenset({ + 'small_stmt', 'expr_stmt', 'print_stmt', 'del_stmt', 'pass_stmt', + 'break_stmt', 'continue_stmt', 'return_stmt', 'raise_stmt', 'yield_stmt', + 'import_stmt', 'global_stmt', 'exec_stmt', 'assert_stmt', 'if_stmt', + 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt', 'nonlocal_stmt', + 'async_stmt', 'simple_stmt' +}) + + +def CalculateBlankLines(tree): + """Run the blank line calculator visitor over the tree. + + This modifies the tree in place. + + Arguments: + tree: the top-level pytree node to annotate with subtypes. + """ + blank_line_calculator = _BlankLineCalculator() + blank_line_calculator.Visit(tree) + + +class _BlankLineCalculator(pytree_visitor.PyTreeVisitor): + """_BlankLineCalculator - see file-level docstring for a description.""" + + def __init__(self): + self.class_level = 0 + self.function_level = 0 + self.last_comment_lineno = 0 + self.last_was_decorator = False + self.last_was_class_or_function = False + + def Visit_simple_stmt(self, node): # pylint: disable=invalid-name + self.DefaultNodeVisit(node) + if pytree_utils.NodeName(node.children[0]) == 'COMMENT': + self.last_comment_lineno = node.children[0].lineno + + def Visit_decorator(self, node): # pylint: disable=invalid-name + if (self.last_comment_lineno and + self.last_comment_lineno == node.children[0].lineno - 1): + self._SetNumNewlines(node.children[0], _NO_BLANK_LINES) + else: + self._SetNumNewlines(node.children[0], self._GetNumNewlines(node)) + for child in node.children: + self.Visit(child) + self.last_was_decorator = True + + def Visit_classdef(self, node): # pylint: disable=invalid-name + self.last_was_class_or_function = False + index = self._SetBlankLinesBetweenCommentAndClassFunc(node) + self.last_was_decorator = False + self.class_level += 1 + for child in node.children[index:]: + self.Visit(child) + self.class_level -= 1 + self.last_was_class_or_function = True + + def Visit_funcdef(self, node): # pylint: disable=invalid-name + self.last_was_class_or_function = False + index = self._SetBlankLinesBetweenCommentAndClassFunc(node) + if _AsyncFunction(node): + index = self._SetBlankLinesBetweenCommentAndClassFunc( + node.prev_sibling.parent) + self._SetNumNewlines(node.children[0], None) + else: + index = self._SetBlankLinesBetweenCommentAndClassFunc(node) + self.last_was_decorator = False + self.function_level += 1 + for child in node.children[index:]: + self.Visit(child) + self.function_level -= 1 + self.last_was_class_or_function = True + + def DefaultNodeVisit(self, node): + """Override the default visitor for Node. + + This will set the blank lines required if the last entity was a class or + function. + + Arguments: + node: (pytree.Node) The node to visit. + """ + if self.last_was_class_or_function: + if pytree_utils.NodeName(node) in _PYTHON_STATEMENTS: + leaf = _GetFirstChildLeaf(node) + self._SetNumNewlines(leaf, self._GetNumNewlines(leaf)) + self.last_was_class_or_function = False + super(_BlankLineCalculator, self).DefaultNodeVisit(node) + + def _SetBlankLinesBetweenCommentAndClassFunc(self, node): + """Set the number of blanks between a comment and class or func definition. + + Class and function definitions have leading comments as children of the + classdef and functdef nodes. + + Arguments: + node: (pytree.Node) The classdef or funcdef node. + + Returns: + The index of the first child past the comment nodes. + """ + index = 0 + while pytree_utils.IsCommentStatement(node.children[index]): + # Standalone comments are wrapped in a simple_stmt node with the comment + # node as its only child. + self.Visit(node.children[index].children[0]) + if not self.last_was_decorator: + self._SetNumNewlines(node.children[index].children[0], _ONE_BLANK_LINE) + index += 1 + if (index and node.children[index].lineno - + 1 == node.children[index - 1].children[0].lineno): + self._SetNumNewlines(node.children[index], _NO_BLANK_LINES) + else: + if self.last_comment_lineno + 1 == node.children[index].lineno: + num_newlines = _NO_BLANK_LINES + else: + num_newlines = self._GetNumNewlines(node) + self._SetNumNewlines(node.children[index], num_newlines) + return index + + def _GetNumNewlines(self, node): + if self.last_was_decorator: + return _NO_BLANK_LINES + elif self._IsTopLevel(node): + return _TWO_BLANK_LINES + return _ONE_BLANK_LINE + + def _SetNumNewlines(self, node, num_newlines): + pytree_utils.SetNodeAnnotation(node, pytree_utils.Annotation.NEWLINES, + num_newlines) + + def _IsTopLevel(self, node): + return (not (self.class_level or self.function_level) and + _StartsInZerothColumn(node)) + + +def _StartsInZerothColumn(node): + return (_GetFirstChildLeaf(node).column == 0 or + (_AsyncFunction(node) and node.prev_sibling.column == 0)) + + +def _AsyncFunction(node): + return (py3compat.PY3 and node.prev_sibling and + pytree_utils.NodeName(node.prev_sibling) == 'ASYNC') + + +def _GetFirstChildLeaf(node): + if isinstance(node, pytree.Leaf): + return node + return _GetFirstChildLeaf(node.children[0]) diff --git a/tools/yapf/yapf/yapflib/comment_splicer.py b/tools/yapf/yapf/yapflib/comment_splicer.py new file mode 100644 index 000000000..7c79e805d --- /dev/null +++ b/tools/yapf/yapf/yapflib/comment_splicer.py @@ -0,0 +1,374 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Comment splicer for lib2to3 trees. + +The lib2to3 syntax tree produced by the parser holds comments and whitespace in +prefix attributes of nodes, rather than nodes themselves. This module provides +functionality to splice comments out of prefixes and into nodes of their own, +making them easier to process. + + SpliceComments(): the main function exported by this module. +""" + +from lib2to3 import pygram +from lib2to3 import pytree +from lib2to3.pgen2 import token + +from yapf.yapflib import pytree_utils + + +def SpliceComments(tree): + """Given a pytree, splice comments into nodes of their own right. + + Extract comments from the prefixes where they are housed after parsing. + The prefixes that previously housed the comments become empty. + + Args: + tree: a pytree.Node - the tree to work on. The tree is modified by this + function. + """ + # The previous leaf node encountered in the traversal. + # This is a list because Python 2.x doesn't have 'nonlocal' :) + prev_leaf = [None] + _AnnotateIndents(tree) + + def _VisitNodeRec(node): + # This loop may insert into node.children, so we'll iterate over a copy. + for child in node.children[:]: + if isinstance(child, pytree.Node): + # Nodes don't have prefixes. + _VisitNodeRec(child) + else: + if child.prefix.lstrip().startswith('#'): + # We have a comment prefix in this child, so splicing is needed. + comment_prefix = child.prefix + comment_lineno = child.lineno - comment_prefix.count('\n') + comment_column = child.column + + # Remember the leading indentation of this prefix and clear it. + # Mopping up the prefix is important because we may go over this same + # child in the next iteration... + child_prefix = child.prefix.lstrip('\n') + prefix_indent = child_prefix[:child_prefix.find('#')] + if '\n' in prefix_indent: + prefix_indent = prefix_indent[prefix_indent.rfind('\n') + 1:] + child.prefix = '' + + if child.type == token.NEWLINE: + # If the prefix was on a NEWLINE leaf, it's part of the line so it + # will be inserted after the previously encountered leaf. + # We can't just insert it before the NEWLINE node, because as a + # result of the way pytrees are organized, this node can be under + # an inappropriate parent. + comment_column -= len(comment_prefix) + comment_column += len(comment_prefix) - len(comment_prefix.lstrip()) + pytree_utils.InsertNodesAfter( + _CreateCommentsFromPrefix( + comment_prefix, + comment_lineno, + comment_column, + standalone=False), prev_leaf[0]) + elif child.type == token.DEDENT: + # Comment prefixes on DEDENT nodes also deserve special treatment, + # because their final placement depends on their prefix. + # We'll look for an ancestor of this child with a matching + # indentation, and insert the comment after it. + ancestor_at_indent = _FindAncestorAtIndent(child, prefix_indent) + if ancestor_at_indent.type == token.DEDENT: + comments = comment_prefix.split('\n') + + # lib2to3 places comments that should be separated into the same + # DEDENT node. For example, "comment 1" and "comment 2" will be + # combined. + # + # def _(): + # for x in y: + # pass + # # comment 1 + # + # # comment 2 + # pass + # + # In this case, we need to split them up ourselves. + before = [] + after = [] + after_lineno = comment_lineno + + index = 0 + while index < len(comments): + cmt = comments[index] + if not cmt.strip() or cmt.startswith(prefix_indent + '#'): + before.append(cmt) + else: + after_lineno += index + after.extend(comments[index:]) + break + index += 1 + + # Special case where the comment is inserted in the same + # indentation level as the DEDENT it was originally attached to. + pytree_utils.InsertNodesBefore( + _CreateCommentsFromPrefix( + '\n'.join(before) + '\n', + comment_lineno, + comment_column, + standalone=True), ancestor_at_indent) + if after: + after_column = len(after[0]) - len(after[0].lstrip()) + comment_column -= comment_column - after_column + pytree_utils.InsertNodesAfter( + _CreateCommentsFromPrefix( + '\n'.join(after) + '\n', + after_lineno, + comment_column, + standalone=True), _FindNextAncestor(ancestor_at_indent)) + else: + pytree_utils.InsertNodesAfter( + _CreateCommentsFromPrefix( + comment_prefix, + comment_lineno, + comment_column, + standalone=True), ancestor_at_indent) + else: + # Otherwise there are two cases. + # + # 1. The comment is on its own line + # 2. The comment is part of an expression. + # + # Unfortunately, it's fairly difficult to distinguish between the + # two in lib2to3 trees. The algorithm here is to determine whether + # child is the first leaf in the statement it belongs to. If it is, + # then the comment (which is a prefix) belongs on a separate line. + # If it is not, it means the comment is buried deep in the statement + # and is part of some expression. + stmt_parent = _FindStmtParent(child) + + for leaf_in_parent in stmt_parent.leaves(): + if leaf_in_parent.type == token.NEWLINE: + continue + elif id(leaf_in_parent) == id(child): + # This comment stands on its own line, and it has to be inserted + # into the appropriate parent. We'll have to find a suitable + # parent to insert into. See comments above + # _STANDALONE_LINE_NODES for more details. + node_with_line_parent = _FindNodeWithStandaloneLineParent(child) + pytree_utils.InsertNodesBefore( + _CreateCommentsFromPrefix( + comment_prefix, comment_lineno, 0, standalone=True), + node_with_line_parent) + break + else: + if comment_lineno == prev_leaf[0].lineno: + comment_lines = comment_prefix.splitlines() + value = comment_lines[0].lstrip() + if value.rstrip('\n'): + comment_column = prev_leaf[0].column + comment_column += len(prev_leaf[0].value) + comment_column += ( + len(comment_lines[0]) - len(comment_lines[0].lstrip())) + comment_leaf = pytree.Leaf( + type=token.COMMENT, + value=value.rstrip('\n'), + context=('', (comment_lineno, comment_column))) + pytree_utils.InsertNodesAfter([comment_leaf], prev_leaf[0]) + comment_prefix = '\n'.join(comment_lines[1:]) + comment_lineno += 1 + + rindex = (0 if '\n' not in comment_prefix.rstrip() else + comment_prefix.rstrip().rindex('\n') + 1) + comment_column = (len(comment_prefix[rindex:]) - + len(comment_prefix[rindex:].lstrip())) + comments = _CreateCommentsFromPrefix( + comment_prefix, + comment_lineno, + comment_column, + standalone=False) + pytree_utils.InsertNodesBefore(comments, child) + break + + prev_leaf[0] = child + + _VisitNodeRec(tree) + + +def _CreateCommentsFromPrefix(comment_prefix, + comment_lineno, + comment_column, + standalone=False): + """Create pytree nodes to represent the given comment prefix. + + Args: + comment_prefix: (unicode) the text of the comment from the node's prefix. + comment_lineno: (int) the line number for the start of the comment. + comment_column: (int) the column for the start of the comment. + standalone: (bool) determines if the comment is standalone or not. + + Returns: + The simple_stmt nodes if this is a standalone comment, otherwise a list of + new COMMENT leafs. The prefix may consist of multiple comment blocks, + separated by blank lines. Each block gets its own leaf. + """ + # The comment is stored in the prefix attribute, with no lineno of its + # own. So we only know at which line it ends. To find out at which line it + # starts, look at how many newlines the comment itself contains. + comments = [] + + lines = comment_prefix.split('\n') + index = 0 + while index < len(lines): + comment_block = [] + while index < len(lines) and lines[index].lstrip().startswith('#'): + comment_block.append(lines[index].strip()) + index += 1 + + if comment_block: + new_lineno = comment_lineno + index - 1 + comment_block[0] = comment_block[0].strip() + comment_block[-1] = comment_block[-1].strip() + comment_leaf = pytree.Leaf( + type=token.COMMENT, + value='\n'.join(comment_block), + context=('', (new_lineno, comment_column))) + comment_node = comment_leaf if not standalone else pytree.Node( + pygram.python_symbols.simple_stmt, [comment_leaf]) + comments.append(comment_node) + + while index < len(lines) and not lines[index].lstrip(): + index += 1 + + return comments + + +# "Standalone line nodes" are tree nodes that have to start a new line in Python +# code (and cannot follow a ';' or ':'). Other nodes, like 'expr_stmt', serve as +# parents of other nodes but can come later in a line. This is a list of +# standalone line nodes in the grammar. It is meant to be exhaustive +# *eventually*, and we'll modify it with time as we discover more corner cases +# in the parse tree. +# +# When splicing a standalone comment (i.e. a comment that appears on its own +# line, not on the same line with other code), it's important to insert it into +# an appropriate parent of the node it's attached to. An appropriate parent +# is the first "standaline line node" in the parent chain of a node. +_STANDALONE_LINE_NODES = frozenset([ + 'suite', 'if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt', + 'funcdef', 'classdef', 'decorated', 'file_input' +]) + + +def _FindNodeWithStandaloneLineParent(node): + """Find a node whose parent is a 'standalone line' node. + + See the comment above _STANDALONE_LINE_NODES for more details. + + Arguments: + node: node to start from + + Returns: + Suitable node that's either the node itself or one of its ancestors. + """ + if pytree_utils.NodeName(node.parent) in _STANDALONE_LINE_NODES: + return node + else: + # This is guaranteed to terminate because 'file_input' is the root node of + # any pytree. + return _FindNodeWithStandaloneLineParent(node.parent) + + +# "Statement nodes" are standalone statements. The don't have to start a new +# line. +_STATEMENT_NODES = frozenset(['simple_stmt']) | _STANDALONE_LINE_NODES + + +def _FindStmtParent(node): + """Find the nearest parent of node that is a statement node. + + Arguments: + node: node to start from + + Returns: + Nearest parent (or node itself, if suitable). + """ + if pytree_utils.NodeName(node) in _STATEMENT_NODES: + return node + else: + return _FindStmtParent(node.parent) + + +def _FindAncestorAtIndent(node, indent): + """Find an ancestor of node with the given indentation. + + Arguments: + node: node to start from. This must not be the tree root. + indent: indentation string for the ancestor we're looking for. + See _AnnotateIndents for more details. + + Returns: + An ancestor node with suitable indentation. If no suitable ancestor is + found, the closest ancestor to the tree root is returned. + """ + if node.parent.parent is None: + # Our parent is the tree root, so there's nowhere else to go. + return node + + # If the parent has an indent annotation, and it's shorter than node's + # indent, this is a suitable ancestor. + # The reason for "shorter" rather than "equal" is that comments may be + # improperly indented (i.e. by three spaces, where surrounding statements + # have either zero or two or four), and we don't want to propagate them all + # the way to the root. + parent_indent = pytree_utils.GetNodeAnnotation( + node.parent, pytree_utils.Annotation.CHILD_INDENT) + if parent_indent is not None and indent.startswith(parent_indent): + return node + else: + # Keep looking up the tree. + return _FindAncestorAtIndent(node.parent, indent) + + +def _FindNextAncestor(node): + if node.parent is None: + return node + + if node.parent.next_sibling is not None: + return node.parent.next_sibling + + return _FindNextAncestor(node.parent) + + +def _AnnotateIndents(tree): + """Annotate the tree with child_indent annotations. + + A child_indent annotation on a node specifies the indentation (as a string, + like " ") of its children. It is inferred from the INDENT child of a node. + + Arguments: + tree: root of a pytree. The pytree is modified to add annotations to nodes. + + Raises: + RuntimeError: if the tree is malformed. + """ + # Annotate the root of the tree with zero indent. + if tree.parent is None: + pytree_utils.SetNodeAnnotation(tree, pytree_utils.Annotation.CHILD_INDENT, + '') + for child in tree.children: + if child.type == token.INDENT: + child_indent = pytree_utils.GetNodeAnnotation( + tree, pytree_utils.Annotation.CHILD_INDENT) + if child_indent is not None and child_indent != child.value: + raise RuntimeError('inconsistent indentation for child', (tree, child)) + pytree_utils.SetNodeAnnotation(tree, pytree_utils.Annotation.CHILD_INDENT, + child.value) + _AnnotateIndents(child) diff --git a/tools/yapf/yapf/yapflib/continuation_splicer.py b/tools/yapf/yapf/yapflib/continuation_splicer.py new file mode 100644 index 000000000..74ea1a0cb --- /dev/null +++ b/tools/yapf/yapf/yapflib/continuation_splicer.py @@ -0,0 +1,52 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Insert "continuation" nodes into lib2to3 tree. + +The "backslash-newline" continuation marker is shoved into the node's prefix. +Pull them out and make it into nodes of their own. + + SpliceContinuations(): the main funciton exported by this module. +""" + +from lib2to3 import pytree + +from yapf.yapflib import format_token + + +def SpliceContinuations(tree): + """Given a pytree, splice the continuation marker into nodes. + + Arguments: + tree: (pytree.Node) The tree to work on. The tree is modified by this + function. + """ + + def RecSplicer(node): + """Inserts a continuation marker into the node.""" + if isinstance(node, pytree.Leaf): + if node.prefix.lstrip().startswith('\\\n'): + new_lineno = node.lineno - node.prefix.count('\n') + return pytree.Leaf( + type=format_token.CONTINUATION, + value=node.prefix, + context=('', (new_lineno, 0))) + return None + num_inserted = 0 + for index, child in enumerate(node.children[:]): + continuation_node = RecSplicer(child) + if continuation_node: + node.children.insert(index + num_inserted, continuation_node) + num_inserted += 1 + + RecSplicer(tree) diff --git a/tools/yapf/yapf/yapflib/errors.py b/tools/yapf/yapf/yapflib/errors.py new file mode 100644 index 000000000..aa8f3eada --- /dev/null +++ b/tools/yapf/yapf/yapflib/errors.py @@ -0,0 +1,23 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""YAPF error object.""" + + +class YapfError(Exception): + """Parent class for user errors or input errors. + + Exceptions of this type are handled by the command line tool + and result in clear error messages, as opposed to backtraces. + """ + pass diff --git a/tools/yapf/yapf/yapflib/file_resources.py b/tools/yapf/yapf/yapflib/file_resources.py new file mode 100644 index 000000000..e7f9acdc7 --- /dev/null +++ b/tools/yapf/yapf/yapflib/file_resources.py @@ -0,0 +1,169 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Interface to file resources. + +This module provides functions for interfacing with files: opening, writing, and +querying. +""" + +import fnmatch +import os +import re + +from lib2to3.pgen2 import tokenize + +from yapf.yapflib import errors +from yapf.yapflib import py3compat +from yapf.yapflib import style + +CR = '\r' +LF = '\n' +CRLF = '\r\n' + + +def GetDefaultStyleForDir(dirname): + """Return default style name for a given directory. + + Looks for .style.yapf or setup.cfg in the parent directories. + + Arguments: + dirname: (unicode) The name of the directory. + + Returns: + The filename if found, otherwise return the global default (pep8). + """ + dirname = os.path.abspath(dirname) + while True: + # See if we have a .style.yapf file. + style_file = os.path.join(dirname, style.LOCAL_STYLE) + if os.path.exists(style_file): + return style_file + + # See if we have a setup.cfg file with a '[yapf]' section. + config_file = os.path.join(dirname, style.SETUP_CONFIG) + if os.path.exists(config_file): + with open(config_file) as fd: + config = py3compat.ConfigParser() + config.read_file(fd) + if config.has_section('yapf'): + return config_file + + dirname = os.path.dirname(dirname) + if (not dirname or not os.path.basename(dirname) or + dirname == os.path.abspath(os.path.sep)): + break + + global_file = os.path.expanduser(style.GLOBAL_STYLE) + if os.path.exists(global_file): + return global_file + + return style.DEFAULT_STYLE + + +def GetCommandLineFiles(command_line_file_list, recursive, exclude): + """Return the list of files specified on the command line.""" + return _FindPythonFiles(command_line_file_list, recursive, exclude) + + +def WriteReformattedCode(filename, + reformatted_code, + in_place=False, + encoding=''): + """Emit the reformatted code. + + Write the reformatted code into the file, if in_place is True. Otherwise, + write to stdout. + + Arguments: + filename: (unicode) The name of the unformatted file. + reformatted_code: (unicode) The reformatted code. + in_place: (bool) If True, then write the reformatted code to the file. + encoding: (unicode) The encoding of the file. + """ + if in_place: + with py3compat.open_with_encoding( + filename, mode='w', encoding=encoding, newline='') as fd: + fd.write(reformatted_code) + else: + py3compat.EncodeAndWriteToStdout(reformatted_code) + + +def LineEnding(lines): + """Retrieve the line ending of the original source.""" + endings = {CRLF: 0, CR: 0, LF: 0} + for line in lines: + if line.endswith(CRLF): + endings[CRLF] += 1 + elif line.endswith(CR): + endings[CR] += 1 + elif line.endswith(LF): + endings[LF] += 1 + return (sorted(endings, key=endings.get, reverse=True) or [LF])[0] + + +def _FindPythonFiles(filenames, recursive, exclude): + """Find all Python files.""" + python_files = [] + for filename in filenames: + if os.path.isdir(filename): + if recursive: + # TODO(morbo): Look into a version of os.walk that can handle recursion. + python_files.extend( + os.path.join(dirpath, f) + for dirpath, _, filelist in os.walk(filename) for f in filelist + if IsPythonFile(os.path.join(dirpath, f))) + else: + raise errors.YapfError( + "directory specified without '--recursive' flag: %s" % filename) + elif os.path.isfile(filename): + python_files.append(filename) + + if exclude: + return [ + f for f in python_files + if not any(fnmatch.fnmatch(f, p) for p in exclude) + ] + + return python_files + + +def IsPythonFile(filename): + """Return True if filename is a Python file.""" + if os.path.splitext(filename)[1] == '.py': + return True + + try: + with open(filename, 'rb') as fd: + encoding = tokenize.detect_encoding(fd.readline)[0] + + # Check for correctness of encoding. + with py3compat.open_with_encoding( + filename, mode='r', encoding=encoding) as fd: + fd.read() + except UnicodeDecodeError: + encoding = 'latin-1' + except (IOError, SyntaxError): + # If we fail to detect encoding (or the encoding cookie is incorrect - which + # will make detect_encoding raise SyntaxError), assume it's not a Python + # file. + return False + + try: + with py3compat.open_with_encoding( + filename, mode='r', encoding=encoding) as fd: + first_line = fd.readlines()[0] + except (IOError, IndexError): + return False + + return re.match(r'^#!.*\bpython[23]?\b', first_line) diff --git a/tools/yapf/yapf/yapflib/format_decision_state.py b/tools/yapf/yapf/yapflib/format_decision_state.py new file mode 100644 index 000000000..3c17dc4a2 --- /dev/null +++ b/tools/yapf/yapf/yapflib/format_decision_state.py @@ -0,0 +1,799 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Implements a format decision state object that manages whitespace decisions. + +Each token is processed one at a time, at which point its whitespace formatting +decisions are made. A graph of potential whitespace formattings is created, +where each node in the graph is a format decision state object. The heuristic +tries formatting the token with and without a newline before it to determine +which one has the least penalty. Therefore, the format decision state object for +each decision needs to be its own unique copy. + +Once the heuristic determines the best formatting, it makes a non-dry run pass +through the code to commit the whitespace formatting. + + FormatDecisionState: main class exported by this module. +""" + +from yapf.yapflib import format_token +from yapf.yapflib import split_penalty +from yapf.yapflib import style +from yapf.yapflib import unwrapped_line + +_COMPOUND_STMTS = frozenset( + {'for', 'while', 'if', 'elif', 'with', 'except', 'def', 'class'}) + + +class FormatDecisionState(object): + """The current state when indenting an unwrapped line. + + The FormatDecisionState object is meant to be copied instead of referenced. + + Attributes: + first_indent: The indent of the first token. + column: The number of used columns in the current line. + next_token: The next token to be formatted. + paren_level: The level of nesting inside (), [], and {}. + start_of_line_level: The paren_level at the start of this line. + lowest_level_on_line: The lowest paren_level on the current line. + newline: Indicates if a newline is added along the edge to this format + decision state node. + previous: The previous format decision state in the decision tree. + stack: A stack (of _ParenState) keeping track of properties applying to + parenthesis levels. + ignore_stack_for_comparison: Ignore the stack of _ParenState for state + comparison. + """ + + def __init__(self, line, first_indent): + """Initializer. + + Initializes to the state after placing the first token from 'line' at + 'first_indent'. + + Arguments: + line: (UnwrappedLine) The unwrapped line we're currently processing. + first_indent: (int) The indent of the first token. + """ + self.next_token = line.first + self.column = first_indent + self.line = line + self.paren_level = 0 + self.start_of_line_level = 0 + self.lowest_level_on_line = 0 + self.ignore_stack_for_comparison = False + self.stack = [_ParenState(first_indent, first_indent)] + self.first_indent = first_indent + self.newline = False + self.previous = None + self.column_limit = style.Get('COLUMN_LIMIT') + + def Clone(self): + """Clones a FormatDecisionState object.""" + new = FormatDecisionState(self.line, self.first_indent) + new.next_token = self.next_token + new.column = self.column + new.line = self.line + new.paren_level = self.paren_level + new.start_of_line_level = self.start_of_line_level + new.lowest_level_on_line = self.lowest_level_on_line + new.ignore_stack_for_comparison = self.ignore_stack_for_comparison + new.first_indent = self.first_indent + new.newline = self.newline + new.previous = self.previous + new.stack = [state.Clone() for state in self.stack] + return new + + def __eq__(self, other): + # Note: 'first_indent' is implicit in the stack. Also, we ignore 'previous', + # because it shouldn't have a bearing on this comparison. (I.e., it will + # report equal if 'next_token' does.) + return (self.next_token == other.next_token and + self.column == other.column and + self.paren_level == other.paren_level and + self.start_of_line_level == other.start_of_line_level and + self.lowest_level_on_line == other.lowest_level_on_line and + (self.ignore_stack_for_comparison or + other.ignore_stack_for_comparison or self.stack == other.stack)) + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash((self.next_token, self.column, self.paren_level, + self.start_of_line_level, self.lowest_level_on_line)) + + def __repr__(self): + return ('column::%d, next_token::%s, paren_level::%d, stack::[\n\t%s' % + (self.column, repr(self.next_token), self.paren_level, + '\n\t'.join(repr(s) for s in self.stack) + ']')) + + def CanSplit(self, must_split): + """Determine if we can split before the next token. + + Arguments: + must_split: (bool) A newline was required before this token. + + Returns: + True if the line can be split before the next token. + """ + current = self.next_token + + if current.is_pseudo_paren: + return False + + if (not must_split and + format_token.Subtype.DICTIONARY_KEY_PART in current.subtypes and + format_token.Subtype.DICTIONARY_KEY not in current.subtypes and + not style.Get('ALLOW_MULTILINE_DICTIONARY_KEYS')): + # In some situations, a dictionary may be multiline, but pylint doesn't + # like it. So don't allow it unless forced to. + return False + + return current.can_break_before + + def MustSplit(self): + """Returns True if the line must split before the next token.""" + current = self.next_token + previous = current.previous_token + + if current.is_pseudo_paren: + return False + + if current.must_break_before: + return True + + if not previous: + return False + + if self.stack[-1].split_before_closing_bracket and current.value in '}]': + # Split before the closing bracket if we can. + return current.node_split_penalty != split_penalty.UNBREAKABLE + + # Prevent splitting before the first argument in compound statements + # with the exception of function declarations. + if (style.Get('SPLIT_BEFORE_FIRST_ARGUMENT') and + self.line.first.value != 'def' and + self.line.first.value in _COMPOUND_STMTS): + return False + + ########################################################################### + # List Splitting + if (style.Get('DEDENT_CLOSING_BRACKETS') or + style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')): + bracket = current if current.ClosesScope() else previous + if format_token.Subtype.SUBSCRIPT_BRACKET not in bracket.subtypes: + if bracket.OpensScope(): + if style.Get('COALESCE_BRACKETS'): + if current.OpensScope(): + # Prefer to keep all opening brackets together. + return False + + if (not _IsLastScopeInLine(bracket) or + unwrapped_line.IsSurroundedByBrackets(bracket)): + last_token = bracket.matching_bracket + else: + last_token = _LastTokenInLine(bracket.matching_bracket) + + if not self._FitsOnLine(bracket, last_token): + # Split before the first element if the whole list can't fit on a + # single line. + self.stack[-1].split_before_closing_bracket = True + return True + + elif style.Get('DEDENT_CLOSING_BRACKETS') and current.ClosesScope(): + # Split before and dedent the closing bracket. + return self.stack[-1].split_before_closing_bracket + + if (current.is_name or current.is_string) and previous.value == ',': + # If the list has function calls in it and the full list itself cannot + # fit on the line, then we want to split. Otherwise, we'll get something + # like this: + # + # X = [ + # Bar(xxx='some string', + # yyy='another long string', + # zzz='a third long string'), Bar( + # xxx='some string', + # yyy='another long string', + # zzz='a third long string') + # ] + # + # or when a string formatting syntax. + func_call_or_string_format = False + if current.is_name: + tok = current.next_token + while tok and (tok.is_name or tok.value == '.'): + tok = tok.next_token + func_call_or_string_format = tok and tok.value == '(' + elif current.is_string: + tok = current.next_token + while tok and tok.is_string: + tok = tok.next_token + func_call_or_string_format = tok and tok.value == '%' + if func_call_or_string_format: + open_bracket = unwrapped_line.IsSurroundedByBrackets(current) + if open_bracket and open_bracket.value in '[{': + if not self._FitsOnLine(open_bracket, open_bracket.matching_bracket): + return True + + ########################################################################### + # Dict/Set Splitting + if (style.Get('EACH_DICT_ENTRY_ON_SEPARATE_LINE') and + format_token.Subtype.DICTIONARY_KEY in current.subtypes and + not current.is_comment): + # Place each dictionary entry onto its own line. + if previous.value == '{' and previous.previous_token: + opening = _GetOpeningBracket(previous.previous_token) + if (opening and opening.value == '(' and opening.previous_token and + opening.previous_token.is_name): + # This is a dictionary that's an argument to a function. + if self._FitsOnLine(previous, previous.matching_bracket): + return False + return True + + if (style.Get('SPLIT_BEFORE_DICT_SET_GENERATOR') and + format_token.Subtype.DICT_SET_GENERATOR in current.subtypes): + # Split before a dict/set generator. + return True + + if (format_token.Subtype.DICTIONARY_VALUE in current.subtypes or + (previous.is_pseudo_paren and previous.value == '(' and + not current.is_comment)): + # Split before the dictionary value if we can't fit every dictionary + # entry on its own line. + if not current.OpensScope(): + opening = _GetOpeningBracket(current) + if not self._EachDictEntryFitsOnOneLine(opening): + return True + + if previous.value == '{': + # Split if the dict/set cannot fit on one line and ends in a comma. + closing = previous.matching_bracket + if (not self._FitsOnLine(previous, closing) and + closing.previous_token.value == ','): + self.stack[-1].split_before_closing_bracket = True + return True + + ########################################################################### + # Argument List Splitting + if (style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and not current.is_comment and + format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in + current.subtypes): + if (previous.value not in {'=', ':', '*', '**'} and + current.value not in ':=,)' and not _IsFunctionDefinition(previous)): + # If we're going to split the lines because of named arguments, then we + # want to split after the opening bracket as well. But not when this is + # part of a function definition. + if previous.value == '(': + # Make sure we don't split after the opening bracket if the + # continuation indent is greater than the opening bracket: + # + # a( + # b=1, + # c=2) + if (self._FitsOnLine(previous, previous.matching_bracket) and + unwrapped_line.IsSurroundedByBrackets(previous)): + # An argument to a function is a function call with named + # assigns. + return False + + column = self.column - self.stack[-1].last_space + return column > style.Get('CONTINUATION_INDENT_WIDTH') + + opening = _GetOpeningBracket(current) + if opening: + arglist_length = (opening.matching_bracket.total_length - + opening.total_length + self.stack[-1].indent) + return arglist_length > self.column_limit + + if style.Get('SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED'): + # Split before arguments in a function call or definition if the + # arguments are terminated by a comma. + opening = _GetOpeningBracket(current) + if opening and opening.previous_token and opening.previous_token.is_name: + if previous.value in '(,': + if opening.matching_bracket.previous_token.value == ',': + return True + + if ((current.is_name or current.value in {'*', '**'}) and + previous.value == ','): + # If we have a function call within an argument list and it won't fit on + # the remaining line, but it will fit on a line by itself, then go ahead + # and split before the call. + opening = _GetOpeningBracket(current) + if (opening and opening.value == '(' and opening.previous_token and + (opening.previous_token.is_name or + opening.previous_token.value in {'*', '**'})): + is_func_call = False + token = current + while token: + if token.value == '(': + is_func_call = True + break + if (not (token.is_name or token.value in {'*', '**'}) and + token.value != '.'): + break + token = token.next_token + + if is_func_call: + if not self._FitsOnLine(current, opening.matching_bracket): + return True + + pprevious = previous.previous_token + if (current.is_name and pprevious and pprevious.is_name and + previous.value == '('): + if (not self._FitsOnLine(previous, previous.matching_bracket) and + _IsFunctionCallWithArguments(current)): + # There is a function call, with more than 1 argument, where the first + # argument is itself a function call with arguments. In this specific + # case, if we split after the first argument's opening '(', then the + # formatting will look bad for the rest of the arguments. E.g.: + # + # outer_function_call(inner_function_call( + # inner_arg1, inner_arg2), + # outer_arg1, outer_arg2) + # + # Instead, enforce a split before that argument to keep things looking + # good. + return True + + if (previous.OpensScope() and not current.OpensScope() and + format_token.Subtype.SUBSCRIPT_BRACKET not in previous.subtypes): + if not current.is_comment: + if pprevious and not pprevious.is_keyword and not pprevious.is_name: + # We want to split if there's a comment in the container. + token = current + while token != previous.matching_bracket: + if token.is_comment: + return True + token = token.next_token + + if previous.value == '(': + pptoken = previous.previous_token + if not pptoken or not pptoken.is_name: + # Split after the opening of a tuple if it doesn't fit on the current + # line and it's not a function call. + if self._FitsOnLine(previous, previous.matching_bracket): + return False + elif not self._FitsOnLine(previous, previous.matching_bracket): + if (self.column_limit - self.column) / float(self.column_limit) < 0.3: + # Try not to squish all of the arguments off to the right. + return current.next_token != previous.matching_bracket + else: + # Split after the opening of a container if it doesn't fit on the + # current line or if it has a comment. + if not self._FitsOnLine(previous, previous.matching_bracket): + return True + + ########################################################################### + # List Comprehension Splitting + if (format_token.Subtype.COMP_FOR in current.subtypes and + format_token.Subtype.COMP_FOR not in previous.subtypes): + # Split at the beginning of a list comprehension. + length = _GetLengthOfSubtype(current, format_token.Subtype.COMP_FOR, + format_token.Subtype.COMP_IF) + if length + self.column > self.column_limit: + return True + + if (format_token.Subtype.COMP_IF in current.subtypes and + format_token.Subtype.COMP_IF not in previous.subtypes): + # Split at the beginning of an if expression. + length = _GetLengthOfSubtype(current, format_token.Subtype.COMP_IF) + if length + self.column > self.column_limit: + return True + + ########################################################################### + # Original Formatting Splitting + # These checks rely upon the original formatting. This is in order to + # attempt to keep hand-written code in the same condition as it was before. + # However, this may cause the formatter to fail to be idempotent. + if (style.Get('SPLIT_BEFORE_BITWISE_OPERATOR') and current.value in '&|' and + previous.lineno < current.lineno): + # Retain the split before a bitwise operator. + return True + + if (current.is_comment and + previous.lineno < current.lineno - current.value.count('\n')): + # If a comment comes in the middle of an unwrapped line (like an if + # conditional with comments interspersed), then we want to split if the + # original comments were on a separate line. + return True + + return False + + def AddTokenToState(self, newline, dry_run, must_split=False): + """Add a token to the format decision state. + + Allow the heuristic to try out adding the token with and without a newline. + Later on, the algorithm will determine which one has the lowest penalty. + + Arguments: + newline: (bool) Add the token on a new line if True. + dry_run: (bool) Don't commit whitespace changes to the FormatToken if + True. + must_split: (bool) A newline was required before this token. + + Returns: + The penalty of splitting after the current token. + """ + penalty = 0 + if newline: + penalty = self._AddTokenOnNewline(dry_run, must_split) + else: + self._AddTokenOnCurrentLine(dry_run) + + return self.MoveStateToNextToken() + penalty + + def _AddTokenOnCurrentLine(self, dry_run): + """Puts the token on the current line. + + Appends the next token to the state and updates information necessary for + indentation. + + Arguments: + dry_run: (bool) Commit whitespace changes to the FormatToken if True. + """ + current = self.next_token + previous = current.previous_token + + spaces = current.spaces_required_before + if not dry_run: + current.AddWhitespacePrefix(newlines_before=0, spaces=spaces) + + if previous.OpensScope(): + if not current.is_comment: + # Align closing scopes that are on a newline with the opening scope: + # + # foo = [a, + # b, + # ] + self.stack[-1].closing_scope_indent = self.column - 1 + if style.Get('ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'): + self.stack[-1].closing_scope_indent += 1 + self.stack[-1].indent = self.column + spaces + else: + self.stack[-1].closing_scope_indent = ( + self.stack[-1].indent - style.Get('CONTINUATION_INDENT_WIDTH')) + + self.column += spaces + + def _AddTokenOnNewline(self, dry_run, must_split): + """Adds a line break and necessary indentation. + + Appends the next token to the state and updates information necessary for + indentation. + + Arguments: + dry_run: (bool) Don't commit whitespace changes to the FormatToken if + True. + must_split: (bool) A newline was required before this token. + + Returns: + The split penalty for splitting after the current state. + """ + current = self.next_token + previous = current.previous_token + + self.column = self._GetNewlineColumn() + + if not dry_run: + current.AddWhitespacePrefix(newlines_before=1, spaces=self.column) + + if not current.is_comment: + self.stack[-1].last_space = self.column + self.start_of_line_level = self.paren_level + self.lowest_level_on_line = self.paren_level + + if (previous.OpensScope() or + (previous.is_comment and previous.previous_token is not None and + previous.previous_token.OpensScope())): + self.stack[-1].closing_scope_indent = max( + 0, self.stack[-1].indent - style.Get('CONTINUATION_INDENT_WIDTH')) + + split_before_closing_bracket = True + if style.Get('COALESCE_BRACKETS'): + split_before_closing_bracket = False + + self.stack[-1].split_before_closing_bracket = split_before_closing_bracket + + # Calculate the split penalty. + penalty = current.split_penalty + + if must_split: + # Don't penalize for a must split. + return penalty + + if previous.is_pseudo_paren and previous.value == '(': + # Small penalty for splitting after a pseudo paren. + penalty += 50 + + # Add a penalty for each increasing newline we add, but don't penalize for + # splitting before an if-expression or list comprehension. + if current.value not in {'if', 'for'}: + last = self.stack[-1] + last.num_line_splits += 1 + penalty += (style.Get('SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT') * + last.num_line_splits) + + if current.OpensScope() and previous.OpensScope(): + # Prefer to keep opening brackets coalesced (unless it's at the beginning + # of a function call). + pprev = previous.previous_token + if not pprev or not pprev.is_name: + penalty += 10 + + return penalty + 10 + + def _GetNewlineColumn(self): + """Return the new column on the newline.""" + current = self.next_token + previous = current.previous_token + top_of_stack = self.stack[-1] + + if current.spaces_required_before > 2 or self.line.disable: + return current.spaces_required_before + + if current.OpensScope(): + return top_of_stack.indent if self.paren_level else self.first_indent + + if current.ClosesScope(): + if (previous.OpensScope() or + (previous.is_comment and previous.previous_token is not None and + previous.previous_token.OpensScope())): + return max(0, + top_of_stack.indent - style.Get('CONTINUATION_INDENT_WIDTH')) + return top_of_stack.closing_scope_indent + + if (previous and previous.is_string and current.is_string and + format_token.Subtype.DICTIONARY_VALUE in current.subtypes): + return previous.column + + if style.Get('INDENT_DICTIONARY_VALUE'): + if previous and (previous.value == ':' or previous.is_pseudo_paren): + if format_token.Subtype.DICTIONARY_VALUE in current.subtypes: + return top_of_stack.indent + + if (self.line.first.value in _COMPOUND_STMTS and + (not style.Get('DEDENT_CLOSING_BRACKETS') or + style.Get('SPLIT_BEFORE_FIRST_ARGUMENT'))): + token_indent = (len(self.line.first.whitespace_prefix.split('\n')[-1]) + + style.Get('INDENT_WIDTH')) + if token_indent == top_of_stack.indent: + return top_of_stack.indent + style.Get('CONTINUATION_INDENT_WIDTH') + + return top_of_stack.indent + + def MoveStateToNextToken(self): + """Calculate format decision state information and move onto the next token. + + Before moving onto the next token, we first calculate the format decision + state given the current token and its formatting decisions. Then the format + decision state is set up so that the next token can be added. + + Returns: + The penalty for the number of characters over the column limit. + """ + current = self.next_token + if not current.OpensScope() and not current.ClosesScope(): + self.lowest_level_on_line = min(self.lowest_level_on_line, + self.paren_level) + + # If we encounter an opening bracket, we add a level to our stack to prepare + # for the subsequent tokens. + if current.OpensScope(): + last = self.stack[-1] + new_indent = style.Get('CONTINUATION_INDENT_WIDTH') + last.last_space + + self.stack.append(_ParenState(new_indent, self.stack[-1].last_space)) + self.paren_level += 1 + + # If we encounter a closing bracket, we can remove a level from our + # parenthesis stack. + if len(self.stack) > 1 and current.ClosesScope(): + self.stack[-2].last_space = self.stack[-1].last_space + self.stack.pop() + self.paren_level -= 1 + + is_multiline_string = current.is_string and '\n' in current.value + if is_multiline_string: + # This is a multiline string. Only look at the first line. + self.column += len(current.value.split('\n')[0]) + elif not current.is_pseudo_paren: + self.column += len(current.value) + + self.next_token = self.next_token.next_token + + # Calculate the penalty for overflowing the column limit. + penalty = 0 + if not current.is_pylint_comment and self.column > self.column_limit: + excess_characters = self.column - self.column_limit + penalty += style.Get('SPLIT_PENALTY_EXCESS_CHARACTER') * excess_characters + + if is_multiline_string: + # If this is a multiline string, the column is actually the + # end of the last line in the string. + self.column = len(current.value.split('\n')[-1]) + + return penalty + + def _FitsOnLine(self, start, end): + """Determines if line between start and end can fit on the current line.""" + length = end.total_length - start.total_length + if not start.is_pseudo_paren: + length += len(start.value) + return length + self.column <= self.column_limit + + def _EachDictEntryFitsOnOneLine(self, opening): + """Determine if each dict elems can fit on one line.""" + + def PreviousNonCommentToken(tok): + tok = tok.previous_token + while tok.is_comment: + tok = tok.previous_token + return tok + + def ImplicitStringConcatenation(tok): + num_strings = 0 + if tok.is_pseudo_paren: + tok = tok.next_token + while tok.is_string: + num_strings += 1 + tok = tok.next_token + return num_strings > 1 + + closing = opening.matching_bracket + entry_start = opening.next_token + current = opening.next_token.next_token + + while current and current != closing: + if format_token.Subtype.DICTIONARY_KEY in current.subtypes: + prev = PreviousNonCommentToken(current) + length = prev.total_length - entry_start.total_length + length += len(entry_start.value) + if length + self.stack[-2].indent >= self.column_limit: + return False + entry_start = current + if current.OpensScope(): + if ((current.value == '{' or + (current.is_pseudo_paren and current.next_token.value == '{') and + format_token.Subtype.DICTIONARY_VALUE in current.subtypes) or + ImplicitStringConcatenation(current)): + # A dictionary entry that cannot fit on a single line shouldn't matter + # to this calcuation. If it can't fit on a single line, then the + # opening should be on the same line as the key and the rest on + # newlines after it. But the other entries should be on single lines + # if possible. + if current.matching_bracket: + current = current.matching_bracket + while current: + if current == closing: + return True + if format_token.Subtype.DICTIONARY_KEY in current.subtypes: + entry_start = current + break + current = current.next_token + else: + current = current.matching_bracket + else: + current = current.next_token + + # At this point, current is the closing bracket. Go back one to get the the + # end of the dictionary entry. + current = PreviousNonCommentToken(current) + length = current.total_length - entry_start.total_length + length += len(entry_start.value) + return length + self.stack[-2].indent <= self.column_limit + + +def _IsFunctionCallWithArguments(token): + while token: + if token.value == '(': + token = token.next_token + return token and token.value != ')' + elif token.name not in {'NAME', 'DOT'}: + break + token = token.next_token + return False + + +def _GetLengthOfSubtype(token, subtype, exclude=None): + current = token + while (current.next_token and subtype in current.subtypes and + (exclude is None or exclude not in current.subtypes)): + current = current.next_token + return current.total_length - token.total_length + 1 + + +def _GetOpeningBracket(current): + """Get the opening bracket containing the current token.""" + if current.matching_bracket and not current.is_pseudo_paren: + return current.matching_bracket + while current: + if current.ClosesScope(): + current = current.matching_bracket + elif current.is_pseudo_paren: + current = current.previous_token + elif current.OpensScope(): + return current + current = current.previous_token + return None + + +def _LastTokenInLine(current): + while not current.is_comment and current.next_token: + current = current.next_token + return current + + +def _IsFunctionDefinition(current): + prev = current.previous_token + return (current.value == '(' and prev and + format_token.Subtype.FUNC_DEF in prev.subtypes) + + +def _IsLastScopeInLine(current): + while current: + current = current.next_token + if current and current.OpensScope(): + return False + return True + + +class _ParenState(object): + """Maintains the state of the bracket enclosures. + + A stack of _ParenState objects are kept so that we know how to indent relative + to the brackets. + + Attributes: + indent: The column position to which a specified parenthesis level needs to + be indented. + last_space: The column position of the last space on each level. + split_before_closing_bracket: Whether a newline needs to be inserted before + the closing bracket. We only want to insert a newline before the closing + bracket if there also was a newline after the beginning left bracket. + num_line_splits: Number of line splits this _ParenState contains already. + Each subsequent line split gets an increasing penalty. + """ + + # TODO(morbo): This doesn't track "bin packing." + + def __init__(self, indent, last_space): + self.indent = indent + self.last_space = last_space + self.closing_scope_indent = 0 + self.split_before_closing_bracket = False + self.num_line_splits = 0 + + def Clone(self): + state = _ParenState(self.indent, self.last_space) + state.closing_scope_indent = self.closing_scope_indent + state.split_before_closing_bracket = self.split_before_closing_bracket + state.num_line_splits = self.num_line_splits + return state + + def __repr__(self): + return '[indent::%d, last_space::%d, closing_scope_indent::%d]' % ( + self.indent, self.last_space, self.closing_scope_indent) + + def __eq__(self, other): + return hash(self) == hash(other) + + def __ne__(self, other): + return not self == other + + def __hash__(self, *args, **kwargs): + return hash((self.indent, self.last_space, self.closing_scope_indent, + self.split_before_closing_bracket, self.num_line_splits)) diff --git a/tools/yapf/yapf/yapflib/format_token.py b/tools/yapf/yapf/yapflib/format_token.py new file mode 100644 index 000000000..de270cf58 --- /dev/null +++ b/tools/yapf/yapf/yapflib/format_token.py @@ -0,0 +1,283 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Pytree nodes with extra formatting information. + +This is a thin wrapper around a pytree.Leaf node. +""" + +import keyword +import re + +from lib2to3.pgen2 import token + +from yapf.yapflib import py3compat +from yapf.yapflib import pytree_utils +from yapf.yapflib import style + +CONTINUATION = token.N_TOKENS +token.N_TOKENS += 1 + + +class Subtype(object): + """Subtype information about tokens. + + Gleaned from parsing the code. Helps determine the best formatting. + """ + NONE = 0 + UNARY_OPERATOR = 1 + BINARY_OPERATOR = 2 + SUBSCRIPT_COLON = 3 + SUBSCRIPT_BRACKET = 4 + DEFAULT_OR_NAMED_ASSIGN = 5 + DEFAULT_OR_NAMED_ASSIGN_ARG_LIST = 6 + VARARGS_LIST = 7 + VARARGS_STAR = 8 + KWARGS_STAR_STAR = 9 + ASSIGN_OPERATOR = 10 + DICTIONARY_KEY = 11 + DICTIONARY_KEY_PART = 12 + DICTIONARY_VALUE = 13 + DICT_SET_GENERATOR = 14 + COMP_FOR = 15 + COMP_IF = 16 + FUNC_DEF = 17 + DECORATOR = 18 + + +class FormatToken(object): + """A wrapper around pytree Leaf nodes. + + This represents the token plus additional information useful for reformatting + the code. + + Attributes: + next_token: The token in the unwrapped line after this token or None if this + is the last token in the unwrapped line. + previous_token: The token in the unwrapped line before this token or None if + this is the first token in the unwrapped line. + matching_bracket: If a bracket token ('[', '{', or '(') the matching + bracket. + whitespace_prefix: The prefix for the whitespace. + spaces_required_before: The number of spaces required before a token. This + is a lower-bound for the formatter and not a hard requirement. For + instance, a comment may have n required spaces before it. But the + formatter won't place n spaces before all comments. Only those that are + moved to the end of a line of code. The formatter may use different + spacing when appropriate. + can_break_before: True if we're allowed to break before this token. + must_break_before: True if we're required to break before this token. + total_length: The total length of the unwrapped line up to and including + whitespace and this token. However, this doesn't include the initial + indentation amount. + split_penalty: The penalty for splitting the line before this token. + """ + + def __init__(self, node): + """Constructor. + + Arguments: + node: (pytree.Leaf) The node that's being wrapped. + """ + self.node = node + self.next_token = None + self.previous_token = None + self.matching_bracket = None + self.whitespace_prefix = '' + self.can_break_before = False + self.must_break_before = False + self.total_length = 0 # TODO(morbo): Think up a better name. + self.split_penalty = 0 + + if self.is_comment: + self.spaces_required_before = style.Get('SPACES_BEFORE_COMMENT') + else: + self.spaces_required_before = 0 + + if self.is_continuation: + self.value = self.node.value.rstrip() + else: + self.value = self.node.value + + def AddWhitespacePrefix(self, newlines_before, spaces=0, indent_level=0): + """Register a token's whitespace prefix. + + This is the whitespace that will be output before a token's string. + + Arguments: + newlines_before: (int) The number of newlines to place before the token. + spaces: (int) The number of spaces to place before the token. + indent_level: (int) The indentation level. + """ + indent_char = '\t' if style.Get('USE_TABS') else ' ' + token_indent_char = indent_char if newlines_before > 0 else ' ' + indent_before = (indent_char * indent_level * style.Get('INDENT_WIDTH') + + token_indent_char * spaces) + + if self.is_comment: + comment_lines = [s.lstrip() for s in self.value.splitlines()] + self.node.value = ('\n' + indent_before).join(comment_lines) + + # Update our own value since we are changing node value + self.value = self.node.value + + if not self.whitespace_prefix: + self.whitespace_prefix = ( + '\n' * (self.newlines or newlines_before) + indent_before) + else: + self.whitespace_prefix += indent_before + + def AdjustNewlinesBefore(self, newlines_before): + """Change the number of newlines before this token.""" + self.whitespace_prefix = ( + '\n' * newlines_before + self.whitespace_prefix.lstrip('\n')) + + def RetainHorizontalSpacing(self, first_column, depth): + """Retains a token's horizontal spacing.""" + previous = self.previous_token + if previous is None: + return + + cur_lineno = self.lineno + prev_lineno = previous.lineno + if previous.is_multiline_string: + prev_lineno += previous.value.count('\n') + + if (cur_lineno != prev_lineno or + (previous.is_pseudo_paren and previous.value != ')' and + cur_lineno != previous.previous_token.lineno)): + self.spaces_required_before = ( + self.column - first_column + depth * style.Get('INDENT_WIDTH')) + return + + cur_column = self.node.column + prev_column = previous.node.column + prev_len = len(previous.value) + + if previous.is_pseudo_paren and previous.value == ')': + prev_column -= 1 + prev_len = 0 + + if previous.is_multiline_string: + prev_len = len(previous.value.split('\n')[-1]) + if '\n' in previous.value: + prev_column = 0 # Last line starts in column 0. + self.spaces_required_before = cur_column - (prev_column + prev_len) + + def OpensScope(self): + return self.value in pytree_utils.OPENING_BRACKETS + + def ClosesScope(self): + return self.value in pytree_utils.CLOSING_BRACKETS + + def __repr__(self): + msg = 'FormatToken(name={0}, value={1}'.format(self.name, self.value) + msg += ', pseudo)' if self.is_pseudo_paren else ')' + return msg + + @property + @py3compat.lru_cache() + def node_split_penalty(self): + """Split penalty attached to the pytree node of this token.""" + return pytree_utils.GetNodeAnnotation( + self.node, pytree_utils.Annotation.SPLIT_PENALTY, default=0) + + @property + def newlines(self): + """The number of newlines needed before this token.""" + return pytree_utils.GetNodeAnnotation(self.node, + pytree_utils.Annotation.NEWLINES) + + @property + def must_split(self): + """Return true if the token requires a split before it.""" + return pytree_utils.GetNodeAnnotation(self.node, + pytree_utils.Annotation.MUST_SPLIT) + + @property + def column(self): + """The original column number of the node in the source.""" + return self.node.column + + @property + def lineno(self): + """The original line number of the node in the source.""" + return self.node.lineno + + @property + @py3compat.lru_cache() + def subtypes(self): + """Extra type information for directing formatting.""" + value = pytree_utils.GetNodeAnnotation(self.node, + pytree_utils.Annotation.SUBTYPE) + return [Subtype.NONE] if value is None else value + + @property + @py3compat.lru_cache() + def is_binary_op(self): + """Token is a binary operator.""" + return Subtype.BINARY_OPERATOR in self.subtypes + + @property + @py3compat.lru_cache() + def name(self): + """A string representation of the node's name.""" + return pytree_utils.NodeName(self.node) + + @property + def is_comment(self): + return self.node.type == token.COMMENT + + @property + def is_continuation(self): + return self.node.type == CONTINUATION + + @property + @py3compat.lru_cache() + def is_keyword(self): + return keyword.iskeyword(self.value) + + @property + @py3compat.lru_cache() + def is_name(self): + return self.node.type == token.NAME and not self.is_keyword + + @property + def is_number(self): + return self.node.type == token.NUMBER + + @property + def is_string(self): + return self.node.type == token.STRING + + @property + @py3compat.lru_cache() + def is_multiline_string(self): + return (self.is_string and + re.match(r'^[uUbB]?[rR]?(?P"""|\'\'\').*(?P=delim)$', + self.value, re.DOTALL) is not None) + + @property + @py3compat.lru_cache() + def is_docstring(self): + return self.is_multiline_string and not self.node.prev_sibling + + @property + @py3compat.lru_cache() + def is_pseudo_paren(self): + return hasattr(self.node, 'is_pseudo') and self.node.is_pseudo + + @property + def is_pylint_comment(self): + return self.is_comment and re.match(r'#.*\bpylint:\s*(disable|enable)=', + self.value) diff --git a/tools/yapf/yapf/yapflib/line_joiner.py b/tools/yapf/yapf/yapflib/line_joiner.py new file mode 100644 index 000000000..860fce788 --- /dev/null +++ b/tools/yapf/yapf/yapflib/line_joiner.py @@ -0,0 +1,109 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Join unwrapped lines together. + +Determine how many lines can be joined into one line. For instance, we could +join these statements into one line: + + if a == 42: + continue + +like this: + + if a == 42: continue + +There are a few restrictions: + + 1. The lines should have been joined in the original source. + 2. The joined lines must not go over the column boundary if placed on the same + line. + 3. They need to be very simple statements. + +Note: Because we don't allow the use of a semicolon to separate statements, it +follows that there can only be at most two lines to join. +""" + +from yapf.yapflib import style + +_CLASS_OR_FUNC = frozenset({'def', 'class'}) + + +def CanMergeMultipleLines(lines, last_was_merged=False): + """Determine if multiple lines can be joined into one. + + Arguments: + lines: (list of UnwrappedLine) This is a splice of UnwrappedLines from the + full code base. + last_was_merged: (bool) The last line was merged. + + Returns: + True if two consecutive lines can be joined together. In reality, this will + only happen if two consecutive lines can be joined, due to the style guide. + """ + # The indentation amount for the starting line (number of spaces). + indent_amt = lines[0].depth * style.Get('INDENT_WIDTH') + if len(lines) == 1 or indent_amt > style.Get('COLUMN_LIMIT'): + return False + + if (len(lines) >= 3 and lines[2].depth >= lines[1].depth and + lines[0].depth != lines[2].depth): + # If lines[2]'s depth is greater than or equal to line[1]'s depth, we're not + # looking at a single statement (e.g., if-then, while, etc.). A following + # line with the same depth as the first line isn't part of the lines we + # would want to combine. + return False # Don't merge more than two lines together. + + if lines[0].first.value in _CLASS_OR_FUNC: + # Don't join lines onto the starting line of a class or function. + return False + + limit = style.Get('COLUMN_LIMIT') - indent_amt + if lines[0].last.total_length < limit: + limit -= lines[0].last.total_length + + if lines[0].first.value == 'if': + return _CanMergeLineIntoIfStatement(lines, limit) + if last_was_merged and lines[0].first.value in {'elif', 'else'}: + return _CanMergeLineIntoIfStatement(lines, limit) + + # TODO(morbo): Other control statements? + + return False + + +def _CanMergeLineIntoIfStatement(lines, limit): + """Determine if we can merge a short if-then statement into one line. + + Two lines of an if-then statement can be merged if they were that way in the + original source, fit on the line without going over the column limit, and are + considered "simple" statements --- typically statements like 'pass', + 'continue', and 'break'. + + Arguments: + lines: (list of UnwrappedLine) The lines we are wanting to merge. + limit: (int) The amount of space remaining on the line. + + Returns: + True if the lines can be merged, False otherwise. + """ + if len(lines[1].tokens) == 1 and lines[1].last.is_multiline_string: + # This might be part of a multiline shebang. + return True + if lines[0].lineno != lines[1].lineno: + # Don't merge lines if the original lines weren't merged. + return False + if lines[1].last.total_length >= limit: + # Don't merge lines if the result goes over the column limit. + return False + return style.Get('JOIN_MULTIPLE_LINES') diff --git a/tools/yapf/yapf/yapflib/py3compat.py b/tools/yapf/yapf/yapflib/py3compat.py new file mode 100644 index 000000000..2886c384d --- /dev/null +++ b/tools/yapf/yapf/yapflib/py3compat.py @@ -0,0 +1,113 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utilities for Python2 / Python3 compatibility.""" + +import io +import os +import sys + +PY3 = sys.version_info[0] >= 3 +PY36 = sys.version_info[0] >= 3 and sys.version_info[1] >= 6 + +if PY3: + StringIO = io.StringIO + BytesIO = io.BytesIO + + import codecs + + def open_with_encoding(filename, mode, encoding, newline=''): # pylint: disable=unused-argument + return codecs.open(filename, mode=mode, encoding=encoding) + + import functools + lru_cache = functools.lru_cache + + range = range + ifilter = filter + raw_input = input + + import configparser + + # Mappings from strings to booleans (such as '1' to True, 'false' to False, + # etc.) + CONFIGPARSER_BOOLEAN_STATES = configparser.ConfigParser.BOOLEAN_STATES +else: + import __builtin__ + import cStringIO + StringIO = BytesIO = cStringIO.StringIO + + open_with_encoding = io.open + + # Python 2.7 doesn't have a native LRU cache, so do nothing. + def lru_cache(maxsize=128, typed=False): + + def fake_wrapper(user_function): + return user_function + + return fake_wrapper + + range = xrange + + from itertools import ifilter + raw_input = raw_input + + import ConfigParser as configparser + CONFIGPARSER_BOOLEAN_STATES = configparser.ConfigParser._boolean_states # pylint: disable=protected-access + + +def EncodeAndWriteToStdout(s, encoding='utf-8'): + """Encode the given string and emit to stdout. + + The string may contain non-ascii characters. This is a problem when stdout is + redirected, because then Python doesn't know the encoding and we may get a + UnicodeEncodeError. + + Arguments: + s: (string) The string to encode. + encoding: (string) The encoding of the string. + """ + if PY3: + sys.stdout.buffer.write(s.encode(encoding)) + elif sys.platform == 'win32': + # On python 2 and Windows universal newline transformation will be in + # effect on stdout. Python 2 will not let us avoid the easily because + # it happens based on whether the file handle is opened in O_BINARY or + # O_TEXT state. However we can tell Windows itself to change the current + # mode, and python 2 will follow suit. However we must take care to change + # the mode on the actual external stdout not just the current sys.stdout + # which may have been monkey-patched inside the python environment. + import msvcrt # pylint: disable=g-import-not-at-top + if sys.__stdout__ is sys.stdout: + msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) + sys.stdout.write(s.encode(encoding)) + else: + sys.stdout.write(s.encode(encoding)) + + +if PY3: + unicode = str # pylint: disable=redefined-builtin,invalid-name +else: + + def unicode(s): # pylint: disable=invalid-name + """Force conversion of s to unicode.""" + return __builtin__.unicode(s, 'utf-8') + + +# In Python 3.2+, readfp is deprecated in favor of read_file, which doesn't +# exist in Python 2 yet. To avoid deprecation warnings, subclass ConfigParser to +# fix this - now read_file works across all Python versions we care about. +class ConfigParser(configparser.ConfigParser): + if not PY3: + + def read_file(self, fp, source=None): + self.readfp(fp, filename=source) diff --git a/tools/yapf/yapf/yapflib/pytree_unwrapper.py b/tools/yapf/yapf/yapflib/pytree_unwrapper.py new file mode 100644 index 000000000..c67c1c6ea --- /dev/null +++ b/tools/yapf/yapf/yapflib/pytree_unwrapper.py @@ -0,0 +1,376 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTreeUnwrapper - produces a list of unwrapped lines from a pytree. + +[for a description of what an unwrapped line is, see unwrapped_line.py] + +This is a pytree visitor that goes over a parse tree and produces a list of +UnwrappedLine containers from it, each with its own depth and containing all +the tokens that could fit on the line if there were no maximal line-length +limitations. + +Note: a precondition to running this visitor and obtaining correct results is +for the tree to have its comments spliced in as nodes. Prefixes are ignored. + +For most uses, the convenience function UnwrapPyTree should be sufficient. +""" + +# The word "token" is overloaded within this module, so for clarity rename +# the imported pgen2.token module. +from lib2to3 import pytree +from lib2to3.pgen2 import token as grammar_token + +from yapf.yapflib import pytree_utils +from yapf.yapflib import pytree_visitor +from yapf.yapflib import split_penalty +from yapf.yapflib import unwrapped_line + + +def UnwrapPyTree(tree): + """Create and return a list of unwrapped lines from the given pytree. + + Arguments: + tree: the top-level pytree node to unwrap. + + Returns: + A list of UnwrappedLine objects. + """ + unwrapper = PyTreeUnwrapper() + unwrapper.Visit(tree) + uwlines = unwrapper.GetUnwrappedLines() + uwlines.sort(key=lambda x: x.lineno) + return uwlines + + +# Grammar tokens considered as whitespace for the purpose of unwrapping. +_WHITESPACE_TOKENS = frozenset([ + grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT, + grammar_token.ENDMARKER +]) + + +class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): + """PyTreeUnwrapper - see file-level docstring for detailed description. + + Note: since this implements PyTreeVisitor and node names in lib2to3 are + underscore_separated, the visiting methods of this class are named as + Visit_node_name. invalid-name pragmas are added to each such method to silence + a style warning. This is forced on us by the usage of lib2to3, and re-munging + method names to make them different from actual node names sounded like a + confusing and brittle affair that wasn't worth it for this small & controlled + deviation from the style guide. + + To understand the connection between visitor methods in this class, some + familiarity with the Python grammar is required. + """ + + def __init__(self): + # A list of all unwrapped lines finished visiting so far. + self._unwrapped_lines = [] + + # Builds up a "current" unwrapped line while visiting pytree nodes. Some + # nodes will finish a line and start a new one. + self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(0) + + # Current indentation depth. + self._cur_depth = 0 + + def GetUnwrappedLines(self): + """Fetch the result of the tree walk. + + Note: only call this after visiting the whole tree. + + Returns: + A list of UnwrappedLine objects. + """ + # Make sure the last line that was being populated is flushed. + self._StartNewLine() + return self._unwrapped_lines + + def _StartNewLine(self): + """Finish current line and start a new one. + + Place the currently accumulated line into the _unwrapped_lines list and + start a new one. + """ + if self._cur_unwrapped_line.tokens: + self._unwrapped_lines.append(self._cur_unwrapped_line) + _MatchBrackets(self._cur_unwrapped_line) + _AdjustSplitPenalty(self._cur_unwrapped_line) + self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(self._cur_depth) + + _STMT_TYPES = frozenset({ + 'if_stmt', + 'while_stmt', + 'for_stmt', + 'try_stmt', + 'expect_clause', + 'with_stmt', + 'funcdef', + 'classdef', + }) + + # pylint: disable=invalid-name,missing-docstring + def Visit_simple_stmt(self, node): + # A 'simple_stmt' conveniently represents a non-compound Python statement, + # i.e. a statement that does not contain other statements. + + # When compound nodes have a single statement as their suite, the parser + # can leave it in the tree directly without creating a suite. But we have + # to increase depth in these cases as well. However, don't increase the + # depth of we have a simple_stmt that's a comment node. This represents a + # standalone comment and in the case of it coming directly after the + # funcdef, it is a "top" comment for the whole function. + # TODO(eliben): add more relevant compound statements here. + single_stmt_suite = (node.parent and + pytree_utils.NodeName(node.parent) in self._STMT_TYPES) + is_comment_stmt = pytree_utils.IsCommentStatement(node) + if single_stmt_suite and not is_comment_stmt: + self._cur_depth += 1 + self._StartNewLine() + self.DefaultNodeVisit(node) + if single_stmt_suite and not is_comment_stmt: + self._cur_depth -= 1 + + def _VisitCompoundStatement(self, node, substatement_names): + """Helper for visiting compound statements. + + Python compound statements serve as containers for other statements. Thus, + when we encounter a new compound statement we start a new unwrapped line. + + Arguments: + node: the node to visit. + substatement_names: set of node names. A compound statement will be + recognized as a NAME node with a name in this set. + """ + for child in node.children: + # A pytree is structured in such a way that a single 'if_stmt' node will + # contain all the 'if', 'elif' and 'else' nodes as children (similar + # structure applies to 'while' statements, 'try' blocks, etc). Therefore, + # we visit all children here and create a new line before the requested + # set of nodes. + if (child.type == grammar_token.NAME and + child.value in substatement_names): + self._StartNewLine() + self.Visit(child) + + _IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'}) + + def Visit_if_stmt(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._IF_STMT_ELEMS) + + _WHILE_STMT_ELEMS = frozenset({'while', 'else'}) + + def Visit_while_stmt(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS) + + _FOR_STMT_ELEMS = frozenset({'for', 'else'}) + + def Visit_for_stmt(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS) + + _TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'}) + + def Visit_try_stmt(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS) + + _EXCEPT_STMT_ELEMS = frozenset({'except'}) + + def Visit_except_clause(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS) + + _FUNC_DEF_ELEMS = frozenset({'def'}) + + def Visit_funcdef(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS) + + def Visit_async_funcdef(self, node): # pylint: disable=invalid-name + self._StartNewLine() + index = 0 + for child in node.children: + index += 1 + self.Visit(child) + if pytree_utils.NodeName(child) == 'ASYNC': + break + for child in node.children[index].children: + self.Visit(child) + + _CLASS_DEF_ELEMS = frozenset({'class'}) + + def Visit_classdef(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS) + + def Visit_async_stmt(self, node): # pylint: disable=invalid-name + self._StartNewLine() + index = 0 + for child in node.children: + index += 1 + self.Visit(child) + if pytree_utils.NodeName(child) == 'ASYNC': + break + for child in node.children[index].children: + self.Visit(child) + + def Visit_decorators(self, node): # pylint: disable=invalid-name + for child in node.children: + self._StartNewLine() + self.Visit(child) + + def Visit_decorated(self, node): # pylint: disable=invalid-name + for child in node.children: + self._StartNewLine() + self.Visit(child) + + _WITH_STMT_ELEMS = frozenset({'with'}) + + def Visit_with_stmt(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS) + + def Visit_suite(self, node): # pylint: disable=invalid-name + # A 'suite' starts a new indentation level in Python. + self._cur_depth += 1 + self._StartNewLine() + self.DefaultNodeVisit(node) + self._cur_depth -= 1 + + def Visit_listmaker(self, node): # pylint: disable=invalid-name + _DetermineMustSplitAnnotation(node) + self.DefaultNodeVisit(node) + + def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name + _DetermineMustSplitAnnotation(node) + self.DefaultNodeVisit(node) + + def Visit_import_as_names(self, node): # pylint: disable=invalid-name + if node.prev_sibling.value == '(': + _DetermineMustSplitAnnotation(node) + self.DefaultNodeVisit(node) + + def Visit_testlist_gexp(self, node): # pylint: disable=invalid-name + if _ContainsComments(node): + _DetermineMustSplitAnnotation(node) + self.DefaultNodeVisit(node) + + def Visit_arglist(self, node): # pylint: disable=invalid-name + _DetermineMustSplitAnnotation(node) + self.DefaultNodeVisit(node) + + def Visit_typedargslist(self, node): # pylint: disable=invalid-name + _DetermineMustSplitAnnotation(node) + self.DefaultNodeVisit(node) + + def DefaultLeafVisit(self, leaf): + """Default visitor for tree leaves. + + A tree leaf is always just gets appended to the current unwrapped line. + + Arguments: + leaf: the leaf to visit. + """ + if leaf.type in _WHITESPACE_TOKENS: + self._StartNewLine() + elif leaf.type != grammar_token.COMMENT or leaf.value.strip(): + if leaf.value == ';': + # Split up multiple statements on one line. + self._StartNewLine() + else: + # Add non-whitespace tokens and comments that aren't empty. + self._cur_unwrapped_line.AppendNode(leaf) + + +_BRACKET_MATCH = {')': '(', '}': '{', ']': '['} + + +def _MatchBrackets(uwline): + """Visit the node and match the brackets. + + For every open bracket ('[', '{', or '('), find the associated closing bracket + and "match" them up. I.e., save in the token a pointer to its associated open + or close bracket. + + Arguments: + uwline: (UnwrappedLine) An unwrapped line. + """ + bracket_stack = [] + for token in uwline.tokens: + if token.value in pytree_utils.OPENING_BRACKETS: + bracket_stack.append(token) + elif token.value in pytree_utils.CLOSING_BRACKETS: + bracket_stack[-1].matching_bracket = token + token.matching_bracket = bracket_stack[-1] + bracket_stack.pop() + + +def _AdjustSplitPenalty(uwline): + """Visit the node and adjust the split penalties if needed. + + A token shouldn't be split if it's not within a bracket pair. Mark any token + that's not within a bracket pair as "unbreakable". + + Arguments: + uwline: (UnwrappedLine) An unwrapped line. + """ + bracket_level = 0 + for index, token in enumerate(uwline.tokens): + if index and not bracket_level: + pytree_utils.SetNodeAnnotation(token.node, + pytree_utils.Annotation.SPLIT_PENALTY, + split_penalty.UNBREAKABLE) + if token.value in pytree_utils.OPENING_BRACKETS: + bracket_level += 1 + elif token.value in pytree_utils.CLOSING_BRACKETS: + bracket_level -= 1 + + +def _DetermineMustSplitAnnotation(node): + """Enforce a split in the list if the list ends with a comma.""" + if not _ContainsComments(node): + if (not isinstance(node.children[-1], pytree.Leaf) or + node.children[-1].value != ','): + return + num_children = len(node.children) + index = 0 + _SetMustSplitOnFirstLeaf(node.children[0]) + while index < num_children - 1: + child = node.children[index] + if isinstance(child, pytree.Leaf) and child.value == ',': + next_child = node.children[index + 1] + if next_child.type == grammar_token.COMMENT: + index += 1 + if index >= num_children - 1: + break + _SetMustSplitOnFirstLeaf(node.children[index + 1]) + index += 1 + + +def _ContainsComments(node): + """Return True if the list has a comment in it.""" + if isinstance(node, pytree.Leaf): + return node.type == grammar_token.COMMENT + for child in node.children: + if _ContainsComments(child): + return True + return False + + +def _SetMustSplitOnFirstLeaf(node): + """Set the "must split" annotation on the first leaf node.""" + + def FindFirstLeaf(node): + if isinstance(node, pytree.Leaf): + return node + return FindFirstLeaf(node.children[0]) + + pytree_utils.SetNodeAnnotation( + FindFirstLeaf(node), pytree_utils.Annotation.MUST_SPLIT, True) diff --git a/tools/yapf/yapf/yapflib/pytree_utils.py b/tools/yapf/yapf/yapflib/pytree_utils.py new file mode 100644 index 000000000..60fd955ff --- /dev/null +++ b/tools/yapf/yapf/yapflib/pytree_utils.py @@ -0,0 +1,297 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""pytree-related utilities. + +This module collects various utilities related to the parse trees produced by +the lib2to3 library. + + NodeName(): produces a string name for pytree nodes. + ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse + a given string with code to a pytree. + InsertNodeBefore(): insert a node before another in a pytree. + InsertNodeAfter(): insert a node after another in a pytree. + {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes. +""" + +import ast +from lib2to3 import pygram +from lib2to3 import pytree +from lib2to3.pgen2 import driver +from lib2to3.pgen2 import parse +from lib2to3.pgen2 import token + +# TODO(eliben): We may want to get rid of this filtering at some point once we +# have a better understanding of what information we need from the tree. Then, +# these tokens may be filtered out from the tree before the tree gets to the +# unwrapper. +NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER']) + +OPENING_BRACKETS = frozenset({'(', '[', '{'}) +CLOSING_BRACKETS = frozenset({')', ']', '}'}) + + +class Annotation(object): + """Annotation names associated with pytrees.""" + CHILD_INDENT = 'child_indent' + NEWLINES = 'newlines' + MUST_SPLIT = 'must_split' + SPLIT_PENALTY = 'split_penalty' + SUBTYPE = 'subtype' + + +def NodeName(node): + """Produce a string name for a given node. + + For a Leaf this is the token name, and for a Node this is the type. + + Arguments: + node: a tree node + + Returns: + Name as a string. + """ + # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols. + if node.type < 256: + return token.tok_name[node.type] + else: + return pygram.python_grammar.number2symbol[node.type] + + +# lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for +# parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a +# context where a keyword is disallowed). +# It forgets to do the same for 'exec' though. Luckily, Python is amenable to +# monkey-patching. +_GRAMMAR_FOR_PY3 = pygram.python_grammar_no_print_statement.copy() +del _GRAMMAR_FOR_PY3.keywords['exec'] + +_GRAMMAR_FOR_PY2 = pygram.python_grammar.copy() +del _GRAMMAR_FOR_PY2.keywords['nonlocal'] + + +def ParseCodeToTree(code): + """Parse the given code to a lib2to3 pytree. + + Arguments: + code: a string with the code to parse. + + Raises: + SyntaxError if the code is invalid syntax. + parse.ParseError if some other parsing failure. + + Returns: + The root node of the parsed tree. + """ + # This function is tiny, but the incantation for invoking the parser correctly + # is sufficiently magical to be worth abstracting away. + try: + # Try to parse using a Python 3 grammar, which is more permissive (print and + # exec are not keywords). + parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert) + tree = parser_driver.parse_string(code, debug=False) + except parse.ParseError: + # Now try to parse using a Python 2 grammar; If this fails, then + # there's something else wrong with the code. + try: + parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert) + tree = parser_driver.parse_string(code, debug=False) + except parse.ParseError: + # Raise a syntax error if the code is invalid python syntax. + try: + ast.parse(code) + except SyntaxError as e: + raise e + else: + raise + return _WrapEndMarker(tree) + + +def _WrapEndMarker(tree): + """Wrap a single ENDMARKER token in a "file_input" node. + + Arguments: + tree: (pytree.Node) The root node of the parsed tree. + + Returns: + The root node of the parsed tree. If the tree is a single ENDMARKER node, + then that node is wrapped in a "file_input" node. That will ensure we don't + skip comments attached to that node. + """ + if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER: + return pytree.Node(pygram.python_symbols.file_input, [tree]) + return tree + + +def InsertNodesBefore(new_nodes, target): + """Insert new_nodes before the given target location in the tree. + + Arguments: + new_nodes: a sequence of new nodes to insert (the nodes should not be in the + tree). + target: the target node before which the new node node will be inserted. + + Raises: + RuntimeError: if the tree is corrupted, or the insertion would corrupt it. + """ + for node in new_nodes: + _InsertNodeAt(node, target, after=False) + + +def InsertNodesAfter(new_nodes, target): + """Insert new_nodes after the given target location in the tree. + + Arguments: + new_nodes: a sequence of new nodes to insert (the nodes should not be in the + tree). + target: the target node after which the new node node will be inserted. + + Raises: + RuntimeError: if the tree is corrupted, or the insertion would corrupt it. + """ + for node in reversed(new_nodes): + _InsertNodeAt(node, target, after=True) + + +def _InsertNodeAt(new_node, target, after=False): + """Underlying implementation for node insertion. + + Arguments: + new_node: a new node to insert (this node should not be in the tree). + target: the target node. + after: if True, new_node is inserted after target. Otherwise, it's inserted + before target. + + Returns: + nothing + + Raises: + RuntimeError: if the tree is corrupted, or the insertion would corrupt it. + """ + + # Protect against attempts to insert nodes which already belong to some tree. + if new_node.parent is not None: + raise RuntimeError('inserting node which already has a parent', + (new_node, new_node.parent)) + + # The code here is based on pytree.Base.next_sibling + parent_of_target = target.parent + if parent_of_target is None: + raise RuntimeError('expected target node to have a parent', (target,)) + + for i, child in enumerate(parent_of_target.children): + if child is target: + insertion_index = i + 1 if after else i + parent_of_target.insert_child(insertion_index, new_node) + return + + raise RuntimeError('unable to find insertion point for target node', + (target,)) + + +# The following constant and functions implement a simple custom annotation +# mechanism for pytree nodes. We attach new attributes to nodes. Each attribute +# is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be +# managed through GetNodeAnnotation and SetNodeAnnotation. +_NODE_ANNOTATION_PREFIX = '_yapf_annotation_' + + +def GetNodeAnnotation(node, annotation, default=None): + """Get annotation value from a node. + + Arguments: + node: the node. + annotation: annotation name - a string. + default: the default value to return if there's no annotation. + + Returns: + Value of the annotation in the given node. If the node doesn't have this + particular annotation name yet, returns default. + """ + return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default) + + +def SetNodeAnnotation(node, annotation, value): + """Set annotation value on a node. + + Arguments: + node: the node. + annotation: annotation name - a string. + value: annotation value to set. + """ + setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value) + + +def AppendNodeAnnotation(node, annotation, value): + """Appends an annotation value to a list of annotations on the node. + + Arguments: + node: the node. + annotation: annotation name - a string. + value: annotation value to set. + """ + attr = GetNodeAnnotation(node, annotation, set()) + attr.add(value) + SetNodeAnnotation(node, annotation, attr) + + +def RemoveSubtypeAnnotation(node, value): + """Removes an annotation value from the subtype annotations on the node. + + Arguments: + node: the node. + value: annotation value to remove. + """ + attr = GetNodeAnnotation(node, Annotation.SUBTYPE) + if attr and value in attr: + attr.remove(value) + SetNodeAnnotation(node, Annotation.SUBTYPE, attr) + + +def DumpNodeToString(node): + """Dump a string representation of the given node. For debugging. + + Arguments: + node: the node. + + Returns: + The string representation. + """ + if isinstance(node, pytree.Leaf): + fmt = '{name}({value}) [lineno={lineno}, column={column}, prefix={prefix}]' + return fmt.format( + name=NodeName(node), + value=_PytreeNodeRepr(node), + lineno=node.lineno, + column=node.column, + prefix=repr(node.prefix)) + else: + fmt = '{node} [{len} children] [child_indent="{indent}"]' + return fmt.format( + node=NodeName(node), + len=len(node.children), + indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT)) + + +def _PytreeNodeRepr(node): + """Like pytree.Node.__repr__, but names instead of numbers for tokens.""" + if isinstance(node, pytree.Node): + return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), + [_PytreeNodeRepr(c) for c in node.children]) + if isinstance(node, pytree.Leaf): + return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value) + + +def IsCommentStatement(node): + return (NodeName(node) == 'simple_stmt' and + node.children[0].type == token.COMMENT) diff --git a/tools/yapf/yapf/yapflib/pytree_visitor.py b/tools/yapf/yapf/yapflib/pytree_visitor.py new file mode 100644 index 000000000..3f1ab0b71 --- /dev/null +++ b/tools/yapf/yapf/yapflib/pytree_visitor.py @@ -0,0 +1,135 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Generic visitor pattern for pytrees. + +The lib2to3 parser produces a "pytree" - syntax tree consisting of Node +and Leaf types. This module implements a visitor pattern for such trees. + +It also exports a basic "dumping" visitor that dumps a textual representation of +a pytree into a stream. + + PyTreeVisitor: a generic visitor pattern fo pytrees. + PyTreeDumper: a configurable "dumper" for displaying pytrees. + DumpPyTree(): a convenience function to dump a pytree. +""" + +import sys + +from lib2to3 import pytree + +from yapf.yapflib import pytree_utils + + +class PyTreeVisitor(object): + """Visitor pattern for pytree trees. + + Methods named Visit_XXX will be invoked when a node with type XXX is + encountered in the tree. The type is either a token type (for Leaf nodes) or + grammar symbols (for Node nodes). The return value of Visit_XXX methods is + ignored by the visitor. + + Visitors can modify node contents but must not change the tree structure + (e.g. add/remove children and move nodes around). + + This is a very common visitor pattern in Python code; it's also used in the + Python standard library ast module for providing AST visitors. + + Note: this makes names that aren't style conformant, so such visitor methods + need to be marked with # pylint: disable=invalid-name We don't have a choice + here, because lib2to3 nodes have under_separated names. + + For more complex behavior, the visit, DefaultNodeVisit and DefaultLeafVisit + methods can be overridden. Don't forget to invoke DefaultNodeVisit for nodes + that may have children - otherwise the children will not be visited. + """ + + def Visit(self, node): + """Visit a node.""" + method = 'Visit_{0}'.format(pytree_utils.NodeName(node)) + if hasattr(self, method): + # Found a specific visitor for this node + getattr(self, method)(node) + else: + if isinstance(node, pytree.Leaf): + self.DefaultLeafVisit(node) + else: + self.DefaultNodeVisit(node) + + def DefaultNodeVisit(self, node): + """Default visitor for Node: visits the node's children depth-first. + + This method is invoked when no specific visitor for the node is defined. + + Arguments: + node: the node to visit + """ + for child in node.children: + self.Visit(child) + + def DefaultLeafVisit(self, leaf): + """Default visitor for Leaf: no-op. + + This method is invoked when no specific visitor for the leaf is defined. + + Arguments: + leaf: the leaf to visit + """ + pass + + +def DumpPyTree(tree, target_stream=sys.stdout): + """Convenience function for dumping a given pytree. + + This function presents a very minimal interface. For more configurability (for + example, controlling how specific node types are displayed), use PyTreeDumper + directly. + + Arguments: + tree: the tree to dump. + target_stream: the stream to dump the tree to. A file-like object. By + default will dump into stdout. + """ + dumper = PyTreeDumper(target_stream) + dumper.Visit(tree) + + +class PyTreeDumper(PyTreeVisitor): + """Visitor that dumps the tree to a stream. + + Implements the PyTreeVisitor interface. + """ + + def __init__(self, target_stream=sys.stdout): + """Create a tree dumper. + + Arguments: + target_stream: the stream to dump the tree to. A file-like object. By + default will dump into stdout. + """ + self._target_stream = target_stream + self._current_indent = 0 + + def _DumpString(self, s): + self._target_stream.write('{0}{1}\n'.format(' ' * self._current_indent, s)) + + def DefaultNodeVisit(self, node): + # Dump information about the current node, and then use the generic + # DefaultNodeVisit visitor to dump each of its children. + self._DumpString(pytree_utils.DumpNodeToString(node)) + self._current_indent += 2 + super(PyTreeDumper, self).DefaultNodeVisit(node) + self._current_indent -= 2 + + def DefaultLeafVisit(self, leaf): + self._DumpString(pytree_utils.DumpNodeToString(leaf)) diff --git a/tools/yapf/yapf/yapflib/reformatter.py b/tools/yapf/yapf/yapflib/reformatter.py new file mode 100644 index 000000000..fd8f3178a --- /dev/null +++ b/tools/yapf/yapf/yapflib/reformatter.py @@ -0,0 +1,588 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Decide what the format for the code should be. + +The `unwrapped_line.UnwrappedLine`s are now ready to be formatted. +UnwrappedLines that can be merged together are. The best formatting is returned +as a string. + + Reformat(): the main function exported by this module. +""" + +from __future__ import unicode_literals +import collections +import heapq +import re + +from lib2to3 import pytree +from lib2to3.pgen2 import token + +from yapf.yapflib import format_decision_state +from yapf.yapflib import format_token +from yapf.yapflib import line_joiner +from yapf.yapflib import pytree_utils +from yapf.yapflib import style +from yapf.yapflib import verifier + + +def Reformat(uwlines, verify=False): + """Reformat the unwrapped lines. + + Arguments: + uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format. + verify: (bool) True if reformatted code should be verified for syntax. + + Returns: + A string representing the reformatted code. + """ + final_lines = [] + prev_uwline = None # The previous line. + indent_width = style.Get('INDENT_WIDTH') + + for uwline in _SingleOrMergedLines(uwlines): + first_token = uwline.first + _FormatFirstToken(first_token, uwline.depth, prev_uwline, final_lines) + + indent_amt = indent_width * uwline.depth + state = format_decision_state.FormatDecisionState(uwline, indent_amt) + state.MoveStateToNextToken() + + if not uwline.disable: + if uwline.first.is_comment: + uwline.first.node.value = uwline.first.node.value.rstrip() + elif uwline.last.is_comment: + uwline.last.node.value = uwline.last.node.value.rstrip() + if prev_uwline and prev_uwline.disable: + # Keep the vertical spacing between a disabled and enabled formatting + # region. + _RetainVerticalSpacingBetweenTokens(uwline.first, prev_uwline.last) + if any(tok.is_comment for tok in uwline.tokens): + _RetainVerticalSpacingBeforeComments(uwline) + + if (_LineContainsI18n(uwline) or uwline.disable or + _LineHasContinuationMarkers(uwline)): + _RetainHorizontalSpacing(uwline) + _RetainVerticalSpacing(uwline, prev_uwline) + _EmitLineUnformatted(state) + elif _CanPlaceOnSingleLine(uwline) and not any(tok.must_split + for tok in uwline.tokens): + # The unwrapped line fits on one line. + while state.next_token: + state.AddTokenToState(newline=False, dry_run=False) + else: + if not _AnalyzeSolutionSpace(state): + # Failsafe mode. If there isn't a solution to the line, then just emit + # it as is. + state = format_decision_state.FormatDecisionState(uwline, indent_amt) + state.MoveStateToNextToken() + _RetainHorizontalSpacing(uwline) + _RetainVerticalSpacing(uwline, prev_uwline) + _EmitLineUnformatted(state) + + final_lines.append(uwline) + prev_uwline = uwline + return _FormatFinalLines(final_lines, verify) + + +def _RetainHorizontalSpacing(uwline): + """Retain all horizontal spacing between tokens.""" + for tok in uwline.tokens: + tok.RetainHorizontalSpacing(uwline.first.column, uwline.depth) + + +def _RetainVerticalSpacing(cur_uwline, prev_uwline): + prev_tok = None + if prev_uwline is not None: + prev_tok = prev_uwline.last + for cur_tok in cur_uwline.tokens: + _RetainVerticalSpacingBetweenTokens(cur_tok, prev_tok) + prev_tok = cur_tok + + +def _RetainVerticalSpacingBetweenTokens(cur_tok, prev_tok): + """Retain vertical spacing between two tokens.""" + if prev_tok is None: + return + + if prev_tok.is_string: + prev_lineno = prev_tok.lineno + prev_tok.value.count('\n') + elif prev_tok.is_pseudo_paren: + if not prev_tok.previous_token.is_multiline_string: + prev_lineno = prev_tok.previous_token.lineno + else: + prev_lineno = prev_tok.lineno + else: + prev_lineno = prev_tok.lineno + + if cur_tok.is_comment: + cur_lineno = cur_tok.lineno - cur_tok.value.count('\n') + else: + cur_lineno = cur_tok.lineno + + cur_tok.AdjustNewlinesBefore(cur_lineno - prev_lineno) + + +def _RetainVerticalSpacingBeforeComments(uwline): + """Retain vertical spacing before comments.""" + prev_token = None + for tok in uwline.tokens: + if tok.is_comment and prev_token: + if tok.lineno - tok.value.count('\n') - prev_token.lineno > 1: + tok.AdjustNewlinesBefore(ONE_BLANK_LINE) + + prev_token = tok + + +def _EmitLineUnformatted(state): + """Emit the line without formatting. + + The line contains code that if reformatted would break a non-syntactic + convention. E.g., i18n comments and function calls are tightly bound by + convention. Instead, we calculate when / if a newline should occur and honor + that. But otherwise the code emitted will be the same as the original code. + + Arguments: + state: (format_decision_state.FormatDecisionState) The format decision + state. + """ + prev_lineno = None + while state.next_token: + previous_token = state.next_token.previous_token + previous_lineno = previous_token.lineno + + if previous_token.is_multiline_string: + previous_lineno += previous_token.value.count('\n') + + if previous_token.is_continuation: + newline = False + else: + newline = (prev_lineno is not None and + state.next_token.lineno > previous_lineno) + + prev_lineno = state.next_token.lineno + state.AddTokenToState(newline=newline, dry_run=False) + + +def _LineContainsI18n(uwline): + """Return true if there are i18n comments or function calls in the line. + + I18n comments and pseudo-function calls are closely related. They cannot + be moved apart without breaking i18n. + + Arguments: + uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted. + + Returns: + True if the line contains i18n comments or function calls. False otherwise. + """ + if style.Get('I18N_COMMENT'): + for tok in uwline.tokens: + if tok.is_comment and re.match(style.Get('I18N_COMMENT'), tok.value): + # Contains an i18n comment. + return True + + if style.Get('I18N_FUNCTION_CALL'): + length = len(uwline.tokens) + index = 0 + while index < length - 1: + if (uwline.tokens[index + 1].value == '(' and + uwline.tokens[index].value in style.Get('I18N_FUNCTION_CALL')): + return True + index += 1 + + return False + + +def _LineHasContinuationMarkers(uwline): + """Return true if the line has continuation markers in it.""" + return any(tok.is_continuation for tok in uwline.tokens) + + +def _CanPlaceOnSingleLine(uwline): + """Determine if the unwrapped line can go on a single line. + + Arguments: + uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted. + + Returns: + True if the line can or should be added to a single line. False otherwise. + """ + indent_amt = style.Get('INDENT_WIDTH') * uwline.depth + last = uwline.last + last_index = -1 + if last.is_pylint_comment: + last = last.previous_token + last_index = -2 + if last is None: + return True + return (last.total_length + indent_amt <= style.Get('COLUMN_LIMIT') and + not any(tok.is_comment for tok in uwline.tokens[:last_index])) + + +def _FormatFinalLines(final_lines, verify): + """Compose the final output from the finalized lines.""" + formatted_code = [] + for line in final_lines: + formatted_line = [] + for tok in line.tokens: + if not tok.is_pseudo_paren: + formatted_line.append(tok.whitespace_prefix) + formatted_line.append(tok.value) + else: + if (not tok.next_token.whitespace_prefix.startswith('\n') and + not tok.next_token.whitespace_prefix.startswith(' ')): + if (tok.previous_token.value == ':' or + tok.next_token.value not in ',}])'): + formatted_line.append(' ') + + formatted_code.append(''.join(formatted_line)) + if verify: + verifier.VerifyCode(formatted_code[-1]) + + return ''.join(formatted_code) + '\n' + + +class _StateNode(object): + """An edge in the solution space from 'previous.state' to 'state'. + + Attributes: + state: (format_decision_state.FormatDecisionState) The format decision state + for this node. + newline: If True, then on the edge from 'previous.state' to 'state' a + newline is inserted. + previous: (_StateNode) The previous state node in the graph. + """ + + # TODO(morbo): Add a '__cmp__' method. + + def __init__(self, state, newline, previous): + self.state = state.Clone() + self.newline = newline + self.previous = previous + + def __repr__(self): # pragma: no cover + return 'StateNode(state=[\n{0}\n], newline={1})'.format( + self.state, self.newline) + + +# A tuple of (penalty, count) that is used to prioritize the BFS. In case of +# equal penalties, we prefer states that were inserted first. During state +# generation, we make sure that we insert states first that break the line as +# late as possible. +_OrderedPenalty = collections.namedtuple('OrderedPenalty', ['penalty', 'count']) + +# An item in the prioritized BFS search queue. The 'StateNode's 'state' has +# the given '_OrderedPenalty'. +_QueueItem = collections.namedtuple('QueueItem', + ['ordered_penalty', 'state_node']) + + +def _AnalyzeSolutionSpace(initial_state): + """Analyze the entire solution space starting from initial_state. + + This implements a variant of Dijkstra's algorithm on the graph that spans + the solution space (LineStates are the nodes). The algorithm tries to find + the shortest path (the one with the lowest penalty) from 'initial_state' to + the state where all tokens are placed. + + Arguments: + initial_state: (format_decision_state.FormatDecisionState) The initial state + to start the search from. + + Returns: + True if a formatting solution was found. False otherwise. + """ + count = 0 + seen = set() + p_queue = [] + + # Insert start element. + node = _StateNode(initial_state, False, None) + heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(0, count), node)) + + count += 1 + while p_queue: + item = p_queue[0] + penalty = item.ordered_penalty.penalty + node = item.state_node + if not node.state.next_token: + break + heapq.heappop(p_queue) + + if count > 10000: + node.state.ignore_stack_for_comparison = True + + if node.state in seen: + continue + + seen.add(node.state) + + # FIXME(morbo): Add a 'decision' element? + + count = _AddNextStateToQueue(penalty, node, False, count, p_queue) + count = _AddNextStateToQueue(penalty, node, True, count, p_queue) + + if not p_queue: + # We weren't able to find a solution. Do nothing. + return False + + _ReconstructPath(initial_state, heapq.heappop(p_queue).state_node) + return True + + +def _AddNextStateToQueue(penalty, previous_node, newline, count, p_queue): + """Add the following state to the analysis queue. + + Assume the current state is 'previous_node' and has been reached with a + penalty of 'penalty'. Insert a line break if 'newline' is True. + + Arguments: + penalty: (int) The penalty associated with the path up to this point. + previous_node: (_StateNode) The last _StateNode inserted into the priority + queue. + newline: (bool) Add a newline if True. + count: (int) The number of elements in the queue. + p_queue: (heapq) The priority queue representing the solution space. + + Returns: + The updated number of elements in the queue. + """ + must_split = previous_node.state.MustSplit() + if newline and not previous_node.state.CanSplit(must_split): + # Don't add a newline if the token cannot be split. + return count + if not newline and must_split: + # Don't add a token we must split but where we aren't splitting. + return count + + node = _StateNode(previous_node.state, newline, previous_node) + penalty += node.state.AddTokenToState( + newline=newline, dry_run=True, must_split=must_split) + heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(penalty, count), node)) + return count + 1 + + +def _ReconstructPath(initial_state, current): + """Reconstruct the path through the queue with lowest penalty. + + Arguments: + initial_state: (format_decision_state.FormatDecisionState) The initial state + to start the search from. + current: (_StateNode) The node in the decision graph that is the end point + of the path with the least penalty. + """ + path = collections.deque() + + while current.previous: + path.appendleft(current) + current = current.previous + + for node in path: + initial_state.AddTokenToState(newline=node.newline, dry_run=False) + + +def _FormatFirstToken(first_token, indent_depth, prev_uwline, final_lines): + """Format the first token in the unwrapped line. + + Add a newline and the required indent before the first token of the unwrapped + line. + + Arguments: + first_token: (format_token.FormatToken) The first token in the unwrapped + line. + indent_depth: (int) The line's indentation depth. + prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line + previous to this line. + final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines + that have already been processed. + """ + first_token.AddWhitespacePrefix( + _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline, + final_lines), + indent_level=indent_depth) + + +NO_BLANK_LINES = 1 +ONE_BLANK_LINE = 2 +TWO_BLANK_LINES = 3 + + +def _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline, + final_lines): + """Calculate the number of newlines we need to add. + + Arguments: + first_token: (format_token.FormatToken) The first token in the unwrapped + line. + indent_depth: (int) The line's indentation depth. + prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line + previous to this line. + final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines + that have already been processed. + + Returns: + The number of newlines needed before the first token. + """ + # TODO(morbo): Special handling for imports. + # TODO(morbo): Create a knob that can tune these. + if prev_uwline is None: + # The first line in the file. Don't add blank lines. + # FIXME(morbo): Is this correct? + if first_token.newlines is not None: + pytree_utils.SetNodeAnnotation(first_token.node, + pytree_utils.Annotation.NEWLINES, None) + return 0 + + if first_token.is_docstring: + if (prev_uwline.first.value == 'class' and + style.Get('BLANK_LINE_BEFORE_CLASS_DOCSTRING')): + # Enforce a blank line before a class's docstring. + return ONE_BLANK_LINE + # The docstring shouldn't have a newline before it. + return NO_BLANK_LINES + + prev_last_token = prev_uwline.last + if prev_last_token.is_docstring: + if (not indent_depth and first_token.value in {'class', 'def', 'async'}): + # Separate a class or function from the module-level docstring with two + # blank lines. + return TWO_BLANK_LINES + if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token, + prev_last_token): + return NO_BLANK_LINES + else: + return ONE_BLANK_LINE + + if first_token.value in {'class', 'def', 'async', '@'}: + # TODO(morbo): This can go once the blank line calculator is more + # sophisticated. + if not indent_depth: + # This is a top-level class or function. + is_inline_comment = prev_last_token.whitespace_prefix.count('\n') == 0 + if (not prev_uwline.disable and prev_last_token.is_comment and + not is_inline_comment): + # This token follows a non-inline comment. + if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token, + prev_last_token): + # Assume that the comment is "attached" to the current line. + # Therefore, we want two blank lines before the comment. + index = len(final_lines) - 1 + while index > 0: + if not final_lines[index - 1].is_comment: + break + index -= 1 + if final_lines[index - 1].first.value == '@': + final_lines[index].first.AdjustNewlinesBefore(NO_BLANK_LINES) + else: + prev_last_token.AdjustNewlinesBefore(TWO_BLANK_LINES) + if first_token.newlines is not None: + pytree_utils.SetNodeAnnotation( + first_token.node, pytree_utils.Annotation.NEWLINES, None) + return NO_BLANK_LINES + elif prev_uwline.first.value in {'class', 'def', 'async'}: + if not style.Get('BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF'): + pytree_utils.SetNodeAnnotation(first_token.node, + pytree_utils.Annotation.NEWLINES, None) + return NO_BLANK_LINES + + # Calculate how many newlines were between the original lines. We want to + # retain that formatting if it doesn't violate one of the style guide rules. + if first_token.is_comment: + first_token_lineno = first_token.lineno - first_token.value.count('\n') + else: + first_token_lineno = first_token.lineno + + prev_last_token_lineno = prev_last_token.lineno + if prev_last_token.is_multiline_string: + prev_last_token_lineno += prev_last_token.value.count('\n') + + if first_token_lineno - prev_last_token_lineno > 1: + return ONE_BLANK_LINE + + return NO_BLANK_LINES + + +def _SingleOrMergedLines(uwlines): + """Generate the lines we want to format. + + Arguments: + uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format. + + Yields: + Either a single line, if the current line cannot be merged with the + succeeding line, or the next two lines merged into one line. + """ + index = 0 + last_was_merged = False + while index < len(uwlines): + if uwlines[index].disable: + uwline = uwlines[index] + index += 1 + while index < len(uwlines): + column = uwline.last.column + 2 + if uwlines[index].lineno != uwline.lineno: + break + if uwline.last.value != ':': + leaf = pytree.Leaf( + type=token.SEMI, value=';', context=('', (uwline.lineno, column))) + uwline.AppendToken(format_token.FormatToken(leaf)) + for tok in uwlines[index].tokens: + uwline.AppendToken(tok) + index += 1 + yield uwline + elif line_joiner.CanMergeMultipleLines(uwlines[index:], last_was_merged): + # TODO(morbo): This splice is potentially very slow. Come up with a more + # performance-friendly way of determining if two lines can be merged. + next_uwline = uwlines[index + 1] + for tok in next_uwline.tokens: + uwlines[index].AppendToken(tok) + if (len(next_uwline.tokens) == 1 and + next_uwline.first.is_multiline_string): + # This may be a multiline shebang. In that case, we want to retain the + # formatting. Otherwise, it could mess up the shell script's syntax. + uwlines[index].disable = True + yield uwlines[index] + index += 2 + last_was_merged = True + else: + yield uwlines[index] + index += 1 + last_was_merged = False + + +def _NoBlankLinesBeforeCurrentToken(text, cur_token, prev_token): + """Determine if there are no blank lines before the current token. + + The previous token is a docstring or comment. The prev_token_lineno is the + start of the text of that token. Counting the number of newlines in its text + gives us the extent and thus where the line number of the end of the + docstring or comment. After that, we just compare it to the current token's + line number to see if there are blank lines between them. + + Arguments: + text: (unicode) The text of the docstring or comment before the current + token. + cur_token: (format_token.FormatToken) The current token in the unwrapped + line. + prev_token: (format_token.FormatToken) The previous token in the unwrapped + line. + + Returns: + True if there is no blank line before the current token. + """ + cur_token_lineno = cur_token.lineno + if cur_token.is_comment: + cur_token_lineno -= cur_token.value.count('\n') + num_newlines = text.count('\n') if not prev_token.is_comment else 0 + return prev_token.lineno + num_newlines == cur_token_lineno - 1 diff --git a/tools/yapf/yapf/yapflib/split_penalty.py b/tools/yapf/yapf/yapflib/split_penalty.py new file mode 100644 index 000000000..3ef4d8c20 --- /dev/null +++ b/tools/yapf/yapf/yapflib/split_penalty.py @@ -0,0 +1,559 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Computation of split penalties before/between tokens.""" + +from lib2to3 import pytree + +from yapf.yapflib import format_token +from yapf.yapflib import py3compat +from yapf.yapflib import pytree_utils +from yapf.yapflib import pytree_visitor +from yapf.yapflib import style + +# TODO(morbo): Document the annotations in a centralized place. E.g., the +# README file. +UNBREAKABLE = 1000 * 1000 +NAMED_ASSIGN = 8500 +DOTTED_NAME = 4000 +VERY_STRONGLY_CONNECTED = 3500 +STRONGLY_CONNECTED = 3000 + +OR_TEST = 1000 +AND_TEST = 1100 +NOT_TEST = 1200 +COMPARISON = 1300 +STAR_EXPR = 1300 +EXPR = 1400 +XOR_EXPR = 1500 +AND_EXPR = 1700 +SHIFT_EXPR = 1800 +ARITH_EXPR = 1900 +TERM = 2000 +FACTOR = 2100 +POWER = 2200 +ATOM = 2300 +ONE_ELEMENT_ARGUMENT = 2500 + + +def ComputeSplitPenalties(tree): + """Compute split penalties on tokens in the given parse tree. + + Arguments: + tree: the top-level pytree node to annotate with penalties. + """ + _SplitPenaltyAssigner().Visit(tree) + + +class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): + """Assigns split penalties to tokens, based on parse tree structure. + + Split penalties are attached as annotations to tokens. + """ + + def Visit_import_as_names(self, node): # pyline: disable=invalid-name + # import_as_names ::= import_as_name (',' import_as_name)* [','] + self.DefaultNodeVisit(node) + prev_child = None + for child in node.children: + if (prev_child and isinstance(prev_child, pytree.Leaf) and + prev_child.value == ','): + _SetSplitPenalty(child, style.Get('SPLIT_PENALTY_IMPORT_NAMES')) + prev_child = child + + def Visit_classdef(self, node): # pylint: disable=invalid-name + # classdef ::= 'class' NAME ['(' [arglist] ')'] ':' suite + # + # NAME + _SetUnbreakable(node.children[1]) + if len(node.children) > 4: + # opening '(' + _SetUnbreakable(node.children[2]) + # ':' + _SetUnbreakable(node.children[-2]) + self.DefaultNodeVisit(node) + + def Visit_funcdef(self, node): # pylint: disable=invalid-name + # funcdef ::= 'def' NAME parameters ['->' test] ':' suite + # + # Can't break before the function name and before the colon. The parameters + # are handled by child iteration. + colon_idx = 1 + while pytree_utils.NodeName(node.children[colon_idx]) == 'simple_stmt': + colon_idx += 1 + _SetUnbreakable(node.children[colon_idx]) + arrow_idx = -1 + while colon_idx < len(node.children): + if isinstance(node.children[colon_idx], pytree.Leaf): + if node.children[colon_idx].value == ':': + break + if node.children[colon_idx].value == '->': + arrow_idx = colon_idx + colon_idx += 1 + _SetUnbreakable(node.children[colon_idx]) + self.DefaultNodeVisit(node) + if arrow_idx > 0: + _SetSplitPenalty(_LastChildNode(node.children[arrow_idx - 1]), 0) + _SetUnbreakable(node.children[arrow_idx]) + _SetStronglyConnected(node.children[arrow_idx + 1]) + + def Visit_lambdef(self, node): # pylint: disable=invalid-name + # lambdef ::= 'lambda' [varargslist] ':' test + # Loop over the lambda up to and including the colon. + if style.Get('ALLOW_MULTILINE_LAMBDAS'): + _SetStronglyConnected(node) + else: + self._SetUnbreakableOnChildren(node) + + def Visit_parameters(self, node): # pylint: disable=invalid-name + # parameters ::= '(' [typedargslist] ')' + self.DefaultNodeVisit(node) + + # Can't break before the opening paren of a parameter list. + _SetUnbreakable(node.children[0]) + if not style.Get('DEDENT_CLOSING_BRACKETS'): + _SetStronglyConnected(node.children[-1]) + + def Visit_arglist(self, node): # pylint: disable=invalid-name + # arglist ::= argument (',' argument)* [','] + self.DefaultNodeVisit(node) + index = 1 + while index < len(node.children): + child = node.children[index] + if isinstance(child, pytree.Leaf) and child.value == ',': + _SetUnbreakable(child) + index += 1 + + def Visit_argument(self, node): # pylint: disable=invalid-name + # argument ::= test [comp_for] | test '=' test # Really [keyword '='] test + self.DefaultNodeVisit(node) + + index = 1 + while index < len(node.children) - 1: + child = node.children[index] + if isinstance(child, pytree.Leaf) and child.value == '=': + _SetSplitPenalty(_FirstChildNode(node.children[index]), NAMED_ASSIGN) + _SetSplitPenalty( + _FirstChildNode(node.children[index + 1]), NAMED_ASSIGN) + index += 1 + + def Visit_dotted_name(self, node): # pylint: disable=invalid-name + # dotted_name ::= NAME ('.' NAME)* + self._SetUnbreakableOnChildren(node) + + def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name + # dictsetmaker ::= ( (test ':' test + # (comp_for | (',' test ':' test)* [','])) | + # (test (comp_for | (',' test)* [','])) ) + for child in node.children: + self.Visit(child) + if pytree_utils.NodeName(child) == 'COLON': + # This is a key to a dictionary. We don't want to split the key if at + # all possible. + _SetStronglyConnected(child) + + def Visit_trailer(self, node): # pylint: disable=invalid-name + # trailer ::= '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME + self.DefaultNodeVisit(node) + if node.children[0].value == '.': + self._SetUnbreakableOnChildren(node) + _SetSplitPenalty(node.children[1], DOTTED_NAME) + elif len(node.children) == 2: + # Don't split an empty argument list if at all possible. + _SetSplitPenalty(node.children[1], VERY_STRONGLY_CONNECTED) + elif len(node.children) == 3: + name = pytree_utils.NodeName(node.children[1]) + if name == 'power': + if pytree_utils.NodeName(node.children[1].children[0]) != 'atom': + # Don't split an argument list with one element if at all possible. + _SetStronglyConnected(node.children[1], node.children[2]) + _SetSplitPenalty( + _FirstChildNode(node.children[1]), ONE_ELEMENT_ARGUMENT) + elif (pytree_utils.NodeName(node.children[0]) == 'LSQB' and + len(node.children[1].children) > 2 and + (name.endswith('_test') or name.endswith('_expr'))): + _SetStronglyConnected(node.children[1].children[0]) + _SetStronglyConnected(node.children[1].children[2]) + + # Still allow splitting around the operator. + split_before = ((name.endswith('_test') and + style.Get('SPLIT_BEFORE_LOGICAL_OPERATOR')) or + (name.endswith('_expr') and + style.Get('SPLIT_BEFORE_BITWISE_OPERATOR'))) + if split_before: + _SetSplitPenalty(_LastChildNode(node.children[1].children[1]), 0) + else: + _SetSplitPenalty(_FirstChildNode(node.children[1].children[2]), 0) + + # Don't split the ending bracket of a subscript list. + _SetVeryStronglyConnected(node.children[-1]) + elif name not in { + 'arglist', 'argument', 'term', 'or_test', 'and_test', 'comparison', + 'atom' + }: + # Don't split an argument list with one element if at all possible. + _SetStronglyConnected(node.children[1], node.children[2]) + + def Visit_power(self, node): # pylint: disable=invalid-name,missing-docstring + # power ::= atom trailer* ['**' factor] + self.DefaultNodeVisit(node) + + # When atom is followed by a trailer, we can not break between them. + # E.g. arr[idx] - no break allowed between 'arr' and '['. + if (len(node.children) > 1 and + pytree_utils.NodeName(node.children[1]) == 'trailer'): + # children[1] itself is a whole trailer: we don't want to + # mark all of it as unbreakable, only its first token: (, [ or . + _SetUnbreakable(node.children[1].children[0]) + + # A special case when there are more trailers in the sequence. Given: + # atom tr1 tr2 + # The last token of tr1 and the first token of tr2 comprise an unbreakable + # region. For example: foo.bar.baz(1) + # We can't put breaks between either of the '.', '(', or '[' and the names + # *preceding* them. + prev_trailer_idx = 1 + while prev_trailer_idx < len(node.children) - 1: + cur_trailer_idx = prev_trailer_idx + 1 + cur_trailer = node.children[cur_trailer_idx] + if pytree_utils.NodeName(cur_trailer) == 'trailer': + # Now we know we have two trailers one after the other + prev_trailer = node.children[prev_trailer_idx] + if prev_trailer.children[-1].value != ')': + # Set the previous node unbreakable if it's not a function call: + # atom tr1() tr2 + # It may be necessary (though undesirable) to split up a previous + # function call's parentheses to the next line. + _SetStronglyConnected(prev_trailer.children[-1]) + _SetStronglyConnected(cur_trailer.children[0]) + prev_trailer_idx = cur_trailer_idx + else: + break + + # We don't want to split before the last ')' of a function call. This also + # takes care of the special case of: + # atom tr1 tr2 ... trn + # where the 'tr#' are trailers that may end in a ')'. + for trailer in node.children[1:]: + if pytree_utils.NodeName(trailer) != 'trailer': + break + if trailer.children[0].value in '([': + if len(trailer.children) > 2: + subtypes = pytree_utils.GetNodeAnnotation( + trailer.children[0], pytree_utils.Annotation.SUBTYPE) + if subtypes and format_token.Subtype.SUBSCRIPT_BRACKET in subtypes: + _SetStronglyConnected(_FirstChildNode(trailer.children[1])) + + last_child_node = _LastChildNode(trailer) + if last_child_node.value.strip().startswith('#'): + last_child_node = last_child_node.prev_sibling + if not style.Get('DEDENT_CLOSING_BRACKETS'): + if _LastChildNode(last_child_node.prev_sibling).value != ',': + if last_child_node.value == ']': + _SetUnbreakable(last_child_node) + else: + _SetSplitPenalty(last_child_node, VERY_STRONGLY_CONNECTED) + else: + # If the trailer's children are '()', then make it a strongly + # connected region. It's sometimes necessary, though undesirable, to + # split the two. + _SetStronglyConnected(trailer.children[-1]) + + # If the original source has a "builder" style calls, then we should allow + # the reformatter to retain that. + _AllowBuilderStyleCalls(node) + + def Visit_subscript(self, node): # pylint: disable=invalid-name + # subscript ::= test | [test] ':' [test] [sliceop] + _SetStronglyConnected(*node.children) + self.DefaultNodeVisit(node) + + def Visit_comp_for(self, node): # pylint: disable=invalid-name + # comp_for ::= 'for' exprlist 'in' testlist_safe [comp_iter] + _SetSplitPenalty(_FirstChildNode(node), 0) + _SetStronglyConnected(*node.children[1:]) + self.DefaultNodeVisit(node) + + def Visit_comp_if(self, node): # pylint: disable=invalid-name + # comp_if ::= 'if' old_test [comp_iter] + _SetSplitPenalty(node.children[0], + style.Get('SPLIT_PENALTY_BEFORE_IF_EXPR')) + _SetStronglyConnected(*node.children[1:]) + self.DefaultNodeVisit(node) + + def Visit_or_test(self, node): # pylint: disable=invalid-name + # or_test ::= and_test ('or' and_test)* + self.DefaultNodeVisit(node) + _IncreasePenalty(node, OR_TEST) + index = 1 + while index + 1 < len(node.children): + if style.Get('SPLIT_BEFORE_LOGICAL_OPERATOR'): + _DecrementSplitPenalty(_FirstChildNode(node.children[index]), OR_TEST) + else: + _DecrementSplitPenalty( + _FirstChildNode(node.children[index + 1]), OR_TEST) + index += 2 + + def Visit_and_test(self, node): # pylint: disable=invalid-name + # and_test ::= not_test ('and' not_test)* + self.DefaultNodeVisit(node) + _IncreasePenalty(node, AND_TEST) + index = 1 + while index + 1 < len(node.children): + if style.Get('SPLIT_BEFORE_LOGICAL_OPERATOR'): + _DecrementSplitPenalty(_FirstChildNode(node.children[index]), AND_TEST) + else: + _DecrementSplitPenalty( + _FirstChildNode(node.children[index + 1]), AND_TEST) + index += 2 + + def Visit_not_test(self, node): # pylint: disable=invalid-name + # not_test ::= 'not' not_test | comparison + self.DefaultNodeVisit(node) + _IncreasePenalty(node, NOT_TEST) + + def Visit_comparison(self, node): # pylint: disable=invalid-name + # comparison ::= expr (comp_op expr)* + self.DefaultNodeVisit(node) + if len(node.children) == 3 and _StronglyConnectedCompOp(node): + _SetSplitPenalty(_FirstChildNode(node.children[1]), STRONGLY_CONNECTED) + _SetSplitPenalty(_FirstChildNode(node.children[2]), STRONGLY_CONNECTED) + else: + _IncreasePenalty(node, COMPARISON) + + def Visit_star_expr(self, node): # pylint: disable=invalid-name + # star_expr ::= '*' expr + self.DefaultNodeVisit(node) + _IncreasePenalty(node, STAR_EXPR) + + def Visit_expr(self, node): # pylint: disable=invalid-name + # expr ::= xor_expr ('|' xor_expr)* + self.DefaultNodeVisit(node) + _IncreasePenalty(node, EXPR) + index = 1 + while index < len(node.children) - 1: + child = node.children[index] + if isinstance(child, pytree.Leaf) and child.value == '|': + if style.Get('SPLIT_BEFORE_BITWISE_OPERATOR'): + _SetSplitPenalty(child, style.Get('SPLIT_PENALTY_BITWISE_OPERATOR')) + else: + _SetSplitPenalty( + _FirstChildNode(node.children[index + 1]), + style.Get('SPLIT_PENALTY_BITWISE_OPERATOR')) + index += 1 + + def Visit_xor_expr(self, node): # pylint: disable=invalid-name + # xor_expr ::= and_expr ('^' and_expr)* + self.DefaultNodeVisit(node) + _IncreasePenalty(node, XOR_EXPR) + + def Visit_and_expr(self, node): # pylint: disable=invalid-name + # and_expr ::= shift_expr ('&' shift_expr)* + self.DefaultNodeVisit(node) + _IncreasePenalty(node, AND_EXPR) + + def Visit_shift_expr(self, node): # pylint: disable=invalid-name + # shift_expr ::= arith_expr (('<<'|'>>') arith_expr)* + self.DefaultNodeVisit(node) + _IncreasePenalty(node, SHIFT_EXPR) + + def Visit_arith_expr(self, node): # pylint: disable=invalid-name + # arith_expr ::= term (('+'|'-') term)* + self.DefaultNodeVisit(node) + _IncreasePenalty(node, ARITH_EXPR) + + def Visit_term(self, node): # pylint: disable=invalid-name + # term ::= factor (('*'|'@'|'/'|'%'|'//') factor)* + _IncreasePenalty(node, TERM) + self.DefaultNodeVisit(node) + + def Visit_factor(self, node): # pyline: disable=invalid-name + # factor ::= ('+'|'-'|'~') factor | power + self.DefaultNodeVisit(node) + _IncreasePenalty(node, FACTOR) + + def Visit_atom(self, node): # pylint: disable=invalid-name + # atom ::= ('(' [yield_expr|testlist_gexp] ')' + # '[' [listmaker] ']' | + # '{' [dictsetmaker] '}') + self.DefaultNodeVisit(node) + if node.children[0].value == '(': + if node.children[-1].value == ')': + if pytree_utils.NodeName(node.parent) == 'if_stmt': + _SetSplitPenalty(node.children[-1], UNBREAKABLE) + else: + if len(node.children) > 2: + _SetSplitPenalty(_FirstChildNode(node.children[1]), EXPR) + _SetSplitPenalty(node.children[-1], ATOM) + elif node.children[0].value in '[{' and len(node.children) == 2: + # Keep empty containers together if we can. + _SetUnbreakable(node.children[-1]) + + def Visit_testlist_gexp(self, node): # pylint: disable=invalid-name + self.DefaultNodeVisit(node) + prev_was_comma = False + for child in node.children: + if isinstance(child, pytree.Leaf) and child.value == ',': + _SetUnbreakable(child) + prev_was_comma = True + else: + if prev_was_comma: + _SetSplitPenalty(_FirstChildNode(child), 0) + prev_was_comma = False + + ############################################################################ + # Helper methods that set the annotations. + + def _SetUnbreakableOnChildren(self, node): + """Set an UNBREAKABLE penalty annotation on children of node.""" + for child in node.children: + self.Visit(child) + start = 2 if hasattr(node.children[0], 'is_pseudo') else 1 + for i in py3compat.range(start, len(node.children)): + _SetUnbreakable(node.children[i]) + + +def _SetUnbreakable(node): + """Set an UNBREAKABLE penalty annotation for the given node.""" + _RecAnnotate(node, pytree_utils.Annotation.SPLIT_PENALTY, UNBREAKABLE) + + +def _SetStronglyConnected(*nodes): + """Set a STRONGLY_CONNECTED penalty annotation for the given nodes.""" + for node in nodes: + _RecAnnotate(node, pytree_utils.Annotation.SPLIT_PENALTY, + STRONGLY_CONNECTED) + + +def _SetVeryStronglyConnected(*nodes): + """Set a VERY_STRONGLY_CONNECTED penalty annotation for the given nodes.""" + for node in nodes: + _RecAnnotate(node, pytree_utils.Annotation.SPLIT_PENALTY, + VERY_STRONGLY_CONNECTED) + + +def _SetExpressionPenalty(node, penalty): + """Set a penalty annotation on children nodes.""" + + def RecExpression(node, first_child_leaf): + if node is first_child_leaf: + return + + if isinstance(node, pytree.Leaf): + if node.value in {'(', 'for', 'if'}: + return + penalty_annotation = pytree_utils.GetNodeAnnotation( + node, pytree_utils.Annotation.SPLIT_PENALTY, default=0) + if penalty_annotation < penalty: + _SetSplitPenalty(node, penalty) + else: + for child in node.children: + RecExpression(child, first_child_leaf) + + RecExpression(node, _FirstChildNode(node)) + + +def _IncreasePenalty(node, amt): + """Increase a penalty annotation on children nodes.""" + + def RecExpression(node, first_child_leaf): + if node is first_child_leaf: + return + + if isinstance(node, pytree.Leaf): + if node.value in {'(', 'for', 'if'}: + return + penalty = pytree_utils.GetNodeAnnotation( + node, pytree_utils.Annotation.SPLIT_PENALTY, default=0) + _SetSplitPenalty(node, penalty + amt) + else: + for child in node.children: + RecExpression(child, first_child_leaf) + + RecExpression(node, _FirstChildNode(node)) + + +def _RecAnnotate(tree, annotate_name, annotate_value): + """Recursively set the given annotation on all leafs of the subtree. + + Takes care to only increase the penalty. If the node already has a higher + or equal penalty associated with it, this is a no-op. + + Args: + tree: subtree to annotate + annotate_name: name of the annotation to set + annotate_value: value of the annotation to set + """ + for child in tree.children: + _RecAnnotate(child, annotate_name, annotate_value) + if isinstance(tree, pytree.Leaf): + cur_annotate = pytree_utils.GetNodeAnnotation( + tree, annotate_name, default=0) + if cur_annotate < annotate_value: + pytree_utils.SetNodeAnnotation(tree, annotate_name, annotate_value) + + +def _StronglyConnectedCompOp(op): + if (len(op.children[1].children) == 2 and + pytree_utils.NodeName(op.children[1]) == 'comp_op' and + _FirstChildNode(op.children[1]).value == 'not' and + _LastChildNode(op.children[1]).value == 'in'): + return True + if (isinstance(op.children[1], pytree.Leaf) and + op.children[1].value in {'==', 'in'}): + return True + return False + + +def _DecrementSplitPenalty(node, amt): + penalty = pytree_utils.GetNodeAnnotation( + node, pytree_utils.Annotation.SPLIT_PENALTY, default=amt) + penalty = penalty - amt if amt < penalty else 0 + _SetSplitPenalty(node, penalty) + + +def _SetSplitPenalty(node, penalty): + pytree_utils.SetNodeAnnotation(node, pytree_utils.Annotation.SPLIT_PENALTY, + penalty) + + +def _AllowBuilderStyleCalls(node): + """Allow splitting before '.' if it's a builder style function call.""" + + def RecGetLeaves(node): + if isinstance(node, pytree.Leaf): + return [node] + children = [] + for child in node.children: + children += RecGetLeaves(child) + return children + + list_of_children = RecGetLeaves(node) + prev_child = None + for child in list_of_children: + if child.value == '.': + if prev_child.lineno != child.lineno: + _SetSplitPenalty(child, 0) + prev_child = child + + +def _FirstChildNode(node): + if isinstance(node, pytree.Leaf): + return node + return _FirstChildNode(node.children[0]) + + +def _LastChildNode(node): + if isinstance(node, pytree.Leaf): + return node + return _LastChildNode(node.children[-1]) diff --git a/tools/yapf/yapf/yapflib/style.py b/tools/yapf/yapf/yapflib/style.py new file mode 100644 index 000000000..14be59e71 --- /dev/null +++ b/tools/yapf/yapf/yapflib/style.py @@ -0,0 +1,489 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Python formatting style settings.""" + +import os +import re +import textwrap + +from yapf.yapflib import errors +from yapf.yapflib import py3compat + + +class StyleConfigError(errors.YapfError): + """Raised when there's a problem reading the style configuration.""" + pass + + +def Get(setting_name): + """Get a style setting.""" + return _style[setting_name] + + +def Help(): + """Return dict mapping style names to help strings.""" + return _STYLE_HELP + + +def SetGlobalStyle(style): + """Set a style dict.""" + global _style + global _GLOBAL_STYLE_FACTORY + factory = _GetStyleFactory(style) + if factory: + _GLOBAL_STYLE_FACTORY = factory + _style = style + + +_STYLE_HELP = dict( + ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT=textwrap.dedent("""\ + Align closing bracket with visual indentation."""), + ALLOW_MULTILINE_LAMBDAS=textwrap.dedent("""\ + Allow lambdas to be formatted on more than one line."""), + ALLOW_MULTILINE_DICTIONARY_KEYS=textwrap.dedent("""\ + Allow dictionary keys to exist on multiple lines. For example: + + x = { + ('this is the first element of a tuple', + 'this is the second element of a tuple'): + value, + }"""), + BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF=textwrap.dedent("""\ + Insert a blank line before a 'def' or 'class' immediately nested + within another 'def' or 'class'. For example: + + class Foo: + # <------ this blank line + def method(): + ..."""), + BLANK_LINE_BEFORE_CLASS_DOCSTRING=textwrap.dedent("""\ + Insert a blank line before a class-level docstring."""), + COALESCE_BRACKETS=textwrap.dedent("""\ + Do not split consecutive brackets. Only relevant when + dedent_closing_brackets is set. For example: + + call_func_that_takes_a_dict( + { + 'key1': 'value1', + 'key2': 'value2', + } + ) + + would reformat to: + + call_func_that_takes_a_dict({ + 'key1': 'value1', + 'key2': 'value2', + })"""), + COLUMN_LIMIT=textwrap.dedent("""\ + The column limit."""), + CONTINUATION_INDENT_WIDTH=textwrap.dedent("""\ + Indent width used for line continuations."""), + DEDENT_CLOSING_BRACKETS=textwrap.dedent("""\ + Put closing brackets on a separate line, dedented, if the bracketed + expression can't fit in a single line. Applies to all kinds of brackets, + including function definitions and calls. For example: + + config = { + 'key1': 'value1', + 'key2': 'value2', + } # <--- this bracket is dedented and on a separate line + + time_series = self.remote_client.query_entity_counters( + entity='dev3246.region1', + key='dns.query_latency_tcp', + transform=Transformation.AVERAGE(window=timedelta(seconds=60)), + start_ts=now()-timedelta(days=3), + end_ts=now(), + ) # <--- this bracket is dedented and on a separate line"""), + EACH_DICT_ENTRY_ON_SEPARATE_LINE=textwrap.dedent("""\ + Place each dictionary entry onto its own line."""), + I18N_COMMENT=textwrap.dedent("""\ + The regex for an i18n comment. The presence of this comment stops + reformatting of that line, because the comments are required to be + next to the string they translate."""), + I18N_FUNCTION_CALL=textwrap.dedent("""\ + The i18n function call names. The presence of this function stops + reformattting on that line, because the string it has cannot be moved + away from the i18n comment."""), + INDENT_DICTIONARY_VALUE=textwrap.dedent("""\ + Indent the dictionary value if it cannot fit on the same line as the + dictionary key. For example: + + config = { + 'key1': + 'value1', + 'key2': value1 + + value2, + }"""), + INDENT_WIDTH=textwrap.dedent("""\ + The number of columns to use for indentation."""), + JOIN_MULTIPLE_LINES=textwrap.dedent("""\ + Join short lines into one line. E.g., single line 'if' statements."""), + SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET=textwrap.dedent("""\ + Insert a space between the ending comma and closing bracket of a list, + etc."""), + SPACES_AROUND_POWER_OPERATOR=textwrap.dedent("""\ + Use spaces around the power operator."""), + SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN=textwrap.dedent("""\ + Use spaces around default or named assigns."""), + SPACES_BEFORE_COMMENT=textwrap.dedent("""\ + The number of spaces required before a trailing comment."""), + SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED=textwrap.dedent("""\ + Split before arguments if the argument list is terminated by a + comma."""), + SPLIT_BEFORE_BITWISE_OPERATOR=textwrap.dedent("""\ + Set to True to prefer splitting before '&', '|' or '^' rather than + after."""), + SPLIT_BEFORE_DICT_SET_GENERATOR=textwrap.dedent("""\ + Split before a dictionary or set generator (comp_for). For example, note + the split before the 'for': + + foo = { + variable: 'Hello world, have a nice day!' + for variable in bar if variable != 42 + }"""), + SPLIT_BEFORE_FIRST_ARGUMENT=textwrap.dedent("""\ + If an argument / parameter list is going to be split, then split before + the first argument."""), + SPLIT_BEFORE_LOGICAL_OPERATOR=textwrap.dedent("""\ + Set to True to prefer splitting before 'and' or 'or' rather than + after."""), + SPLIT_BEFORE_NAMED_ASSIGNS=textwrap.dedent("""\ + Split named assignments onto individual lines."""), + SPLIT_PENALTY_AFTER_OPENING_BRACKET=textwrap.dedent("""\ + The penalty for splitting right after the opening bracket."""), + SPLIT_PENALTY_AFTER_UNARY_OPERATOR=textwrap.dedent("""\ + The penalty for splitting the line after a unary operator."""), + SPLIT_PENALTY_BEFORE_IF_EXPR=textwrap.dedent("""\ + The penalty for splitting right before an if expression."""), + SPLIT_PENALTY_BITWISE_OPERATOR=textwrap.dedent("""\ + The penalty of splitting the line around the '&', '|', and '^' + operators."""), + SPLIT_PENALTY_EXCESS_CHARACTER=textwrap.dedent("""\ + The penalty for characters over the column limit."""), + SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT=textwrap.dedent("""\ + The penalty incurred by adding a line split to the unwrapped line. The + more line splits added the higher the penalty."""), + SPLIT_PENALTY_IMPORT_NAMES=textwrap.dedent("""\ + The penalty of splitting a list of "import as" names. For example: + + from a_very_long_or_indented_module_name_yada_yad import (long_argument_1, + long_argument_2, + long_argument_3) + + would reformat to something like: + + from a_very_long_or_indented_module_name_yada_yad import ( + long_argument_1, long_argument_2, long_argument_3) + """), + SPLIT_PENALTY_LOGICAL_OPERATOR=textwrap.dedent("""\ + The penalty of splitting the line around the 'and' and 'or' + operators."""), + USE_TABS=textwrap.dedent("""\ + Use the Tab character for indentation."""), + # BASED_ON_STYLE='Which predefined style this style is based on', +) + + +def CreatePEP8Style(): + return dict( + ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT=True, + ALLOW_MULTILINE_LAMBDAS=False, + ALLOW_MULTILINE_DICTIONARY_KEYS=False, + BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF=False, + BLANK_LINE_BEFORE_CLASS_DOCSTRING=False, + COALESCE_BRACKETS=False, + COLUMN_LIMIT=79, + CONTINUATION_INDENT_WIDTH=4, + DEDENT_CLOSING_BRACKETS=False, + EACH_DICT_ENTRY_ON_SEPARATE_LINE=True, + I18N_COMMENT='', + I18N_FUNCTION_CALL='', + INDENT_DICTIONARY_VALUE=False, + INDENT_WIDTH=4, + JOIN_MULTIPLE_LINES=True, + SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET=True, + SPACES_AROUND_POWER_OPERATOR=False, + SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN=False, + SPACES_BEFORE_COMMENT=2, + SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED=False, + SPLIT_BEFORE_BITWISE_OPERATOR=False, + SPLIT_BEFORE_DICT_SET_GENERATOR=True, + SPLIT_BEFORE_FIRST_ARGUMENT=False, + SPLIT_BEFORE_LOGICAL_OPERATOR=False, + SPLIT_BEFORE_NAMED_ASSIGNS=True, + SPLIT_PENALTY_AFTER_OPENING_BRACKET=30, + SPLIT_PENALTY_AFTER_UNARY_OPERATOR=10000, + SPLIT_PENALTY_BEFORE_IF_EXPR=0, + SPLIT_PENALTY_BITWISE_OPERATOR=300, + SPLIT_PENALTY_EXCESS_CHARACTER=4500, + SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT=30, + SPLIT_PENALTY_IMPORT_NAMES=0, + SPLIT_PENALTY_LOGICAL_OPERATOR=300, + USE_TABS=False,) + + +def CreateGoogleStyle(): + style = CreatePEP8Style() + style['ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'] = False + style['BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF'] = True + style['COLUMN_LIMIT'] = 80 + style['INDENT_WIDTH'] = 4 + style['I18N_COMMENT'] = r'#\..*' + style['I18N_FUNCTION_CALL'] = ['N_', '_'] + style['SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET'] = False + return style + + +def CreateChromiumStyle(): + style = CreateGoogleStyle() + style['ALLOW_MULTILINE_DICTIONARY_KEYS'] = True + style['INDENT_DICTIONARY_VALUE'] = True + style['INDENT_WIDTH'] = 2 + style['JOIN_MULTIPLE_LINES'] = False + style['SPLIT_BEFORE_BITWISE_OPERATOR'] = True + return style + + +def CreateFacebookStyle(): + style = CreatePEP8Style() + style['ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'] = False + style['COLUMN_LIMIT'] = 80 + style['DEDENT_CLOSING_BRACKETS'] = True + style['JOIN_MULTIPLE_LINES'] = False + style['SPACES_BEFORE_COMMENT'] = 2 + style['SPLIT_PENALTY_AFTER_OPENING_BRACKET'] = 0 + style['SPLIT_PENALTY_BEFORE_IF_EXPR'] = 30 + style['SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT'] = 30 + return style + + +_STYLE_NAME_TO_FACTORY = dict( + pep8=CreatePEP8Style, + chromium=CreateChromiumStyle, + google=CreateGoogleStyle, + facebook=CreateFacebookStyle,) + +_DEFAULT_STYLE_TO_FACTORY = [ + (CreateChromiumStyle(), CreateChromiumStyle), + (CreateFacebookStyle(), CreateFacebookStyle), + (CreateGoogleStyle(), CreateGoogleStyle), + (CreatePEP8Style(), CreatePEP8Style), +] + + +def _GetStyleFactory(style): + for def_style, factory in _DEFAULT_STYLE_TO_FACTORY: + if style == def_style: + return factory + return None + + +def _StringListConverter(s): + """Option value converter for a comma-separated list of strings.""" + return [part.strip() for part in s.split(',')] + + +def _BoolConverter(s): + """Option value converter for a boolean.""" + return py3compat.CONFIGPARSER_BOOLEAN_STATES[s.lower()] + + +# Different style options need to have their values interpreted differently when +# read from the config file. This dict maps an option name to a "converter" +# function that accepts the string read for the option's value from the file and +# returns it wrapper in actual Python type that's going to be meaningful to +# yapf. +# +# Note: this dict has to map all the supported style options. +_STYLE_OPTION_VALUE_CONVERTER = dict( + ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT=_BoolConverter, + ALLOW_MULTILINE_LAMBDAS=_BoolConverter, + ALLOW_MULTILINE_DICTIONARY_KEYS=_BoolConverter, + BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF=_BoolConverter, + BLANK_LINE_BEFORE_CLASS_DOCSTRING=_BoolConverter, + COALESCE_BRACKETS=_BoolConverter, + COLUMN_LIMIT=int, + CONTINUATION_INDENT_WIDTH=int, + DEDENT_CLOSING_BRACKETS=_BoolConverter, + EACH_DICT_ENTRY_ON_SEPARATE_LINE=_BoolConverter, + I18N_COMMENT=str, + I18N_FUNCTION_CALL=_StringListConverter, + INDENT_DICTIONARY_VALUE=_BoolConverter, + INDENT_WIDTH=int, + JOIN_MULTIPLE_LINES=_BoolConverter, + SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET=_BoolConverter, + SPACES_AROUND_POWER_OPERATOR=_BoolConverter, + SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN=_BoolConverter, + SPACES_BEFORE_COMMENT=int, + SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED=_BoolConverter, + SPLIT_BEFORE_BITWISE_OPERATOR=_BoolConverter, + SPLIT_BEFORE_DICT_SET_GENERATOR=_BoolConverter, + SPLIT_BEFORE_FIRST_ARGUMENT=_BoolConverter, + SPLIT_BEFORE_LOGICAL_OPERATOR=_BoolConverter, + SPLIT_BEFORE_NAMED_ASSIGNS=_BoolConverter, + SPLIT_PENALTY_AFTER_OPENING_BRACKET=int, + SPLIT_PENALTY_AFTER_UNARY_OPERATOR=int, + SPLIT_PENALTY_BEFORE_IF_EXPR=int, + SPLIT_PENALTY_BITWISE_OPERATOR=int, + SPLIT_PENALTY_EXCESS_CHARACTER=int, + SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT=int, + SPLIT_PENALTY_IMPORT_NAMES=int, + SPLIT_PENALTY_LOGICAL_OPERATOR=int, + USE_TABS=_BoolConverter,) + + +def CreateStyleFromConfig(style_config): + """Create a style dict from the given config. + + Arguments: + style_config: either a style name or a file name. The file is expected to + contain settings. It can have a special BASED_ON_STYLE setting naming the + style which it derives from. If no such setting is found, it derives from + the default style. When style_config is None, the _GLOBAL_STYLE_FACTORY + config is created. + + Returns: + A style dict. + + Raises: + StyleConfigError: if an unknown style option was encountered. + """ + + def GlobalStyles(): + for style, _ in _DEFAULT_STYLE_TO_FACTORY: + yield style + + def_style = False + if style_config is None: + for style in GlobalStyles(): + if _style == style: + def_style = True + break + if not def_style: + return _style + return _GLOBAL_STYLE_FACTORY() + style_factory = _STYLE_NAME_TO_FACTORY.get(style_config.lower()) + if style_factory is not None: + return style_factory() + if style_config.startswith('{'): + # Most likely a style specification from the command line. + config = _CreateConfigParserFromConfigString(style_config) + else: + # Unknown config name: assume it's a file name then. + config = _CreateConfigParserFromConfigFile(style_config) + return _CreateStyleFromConfigParser(config) + + +def _CreateConfigParserFromConfigString(config_string): + """Given a config string from the command line, return a config parser.""" + if config_string[0] != '{' or config_string[-1] != '}': + raise StyleConfigError( + "Invalid style dict syntax: '{}'.".format(config_string)) + config = py3compat.ConfigParser() + config.add_section('style') + for key, value in re.findall(r'([a-zA-Z0-9_]+)\s*[:=]\s*([a-zA-Z0-9_]+)', + config_string): + config.set('style', key, value) + return config + + +def _CreateConfigParserFromConfigFile(config_filename): + """Read the file and return a ConfigParser object.""" + if not os.path.exists(config_filename): + # Provide a more meaningful error here. + raise StyleConfigError( + '"{0}" is not a valid style or file path'.format(config_filename)) + with open(config_filename) as style_file: + config = py3compat.ConfigParser() + config.read_file(style_file) + if config_filename.endswith(SETUP_CONFIG): + if not config.has_section('yapf'): + raise StyleConfigError( + 'Unable to find section [yapf] in {0}'.format(config_filename)) + elif config_filename.endswith(LOCAL_STYLE): + if not config.has_section('style'): + raise StyleConfigError( + 'Unable to find section [style] in {0}'.format(config_filename)) + else: + if not config.has_section('style'): + raise StyleConfigError( + 'Unable to find section [style] in {0}'.format(config_filename)) + return config + + +def _CreateStyleFromConfigParser(config): + """Create a style dict from a configuration file. + + Arguments: + config: a ConfigParser object. + + Returns: + A style dict. + + Raises: + StyleConfigError: if an unknown style option was encountered. + """ + # Initialize the base style. + section = 'yapf' if config.has_section('yapf') else 'style' + if config.has_option('style', 'based_on_style'): + based_on = config.get('style', 'based_on_style').lower() + base_style = _STYLE_NAME_TO_FACTORY[based_on]() + elif config.has_option('yapf', 'based_on_style'): + based_on = config.get('yapf', 'based_on_style').lower() + base_style = _STYLE_NAME_TO_FACTORY[based_on]() + else: + base_style = _GLOBAL_STYLE_FACTORY() + + # Read all options specified in the file and update the style. + for option, value in config.items(section): + if option.lower() == 'based_on_style': + # Now skip this one - we've already handled it and it's not one of the + # recognized style options. + continue + option = option.upper() + if option not in _STYLE_OPTION_VALUE_CONVERTER: + raise StyleConfigError('Unknown style option "{0}"'.format(option)) + try: + base_style[option] = _STYLE_OPTION_VALUE_CONVERTER[option](value) + except ValueError: + raise StyleConfigError( + "'{}' is not a valid setting for {}.".format(value, option)) + return base_style + + +# The default style - used if yapf is not invoked without specifically +# requesting a formatting style. +DEFAULT_STYLE = 'pep8' +DEFAULT_STYLE_FACTORY = CreatePEP8Style +_GLOBAL_STYLE_FACTORY = CreatePEP8Style + +# The name of the file to use for global style definition. +GLOBAL_STYLE = (os.path.join( + os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config'), 'yapf', + 'style')) + +# The name of the file to use for directory-local style definition. +LOCAL_STYLE = '.style.yapf' + +# Alternative place for directory-local style definition. Style should be +# specified in the '[yapf]' section. +SETUP_CONFIG = 'setup.cfg' + +# TODO(eliben): For now we're preserving the global presence of a style dict. +# Refactor this so that the style is passed around through yapf rather than +# being global. +_style = None +SetGlobalStyle(_GLOBAL_STYLE_FACTORY()) diff --git a/tools/yapf/yapf/yapflib/subtype_assigner.py b/tools/yapf/yapf/yapflib/subtype_assigner.py new file mode 100644 index 000000000..646cdc8a4 --- /dev/null +++ b/tools/yapf/yapf/yapflib/subtype_assigner.py @@ -0,0 +1,416 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Subtype assigner for lib2to3 trees. + +This module assigns extra type information to the lib2to3 trees. This +information is more specific than whether something is an operator or an +identifier. For instance, it can specify if a node in the tree is part of a +subscript. + + AssignSubtypes(): the main function exported by this module. + +Annotations: + subtype: The subtype of a pytree token. See 'format_token' module for a list + of subtypes. +""" + +from lib2to3 import pytree +from lib2to3.pgen2 import token +from lib2to3.pygram import python_symbols as syms + +from yapf.yapflib import format_token +from yapf.yapflib import pytree_utils +from yapf.yapflib import pytree_visitor +from yapf.yapflib import style + + +def AssignSubtypes(tree): + """Run the subtype assigner visitor over the tree, modifying it in place. + + Arguments: + tree: the top-level pytree node to annotate with subtypes. + """ + subtype_assigner = _SubtypeAssigner() + subtype_assigner.Visit(tree) + + +# Map tokens in argument lists to their respective subtype. +_ARGLIST_TOKEN_TO_SUBTYPE = { + '=': format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN, + ':': format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN, + '*': format_token.Subtype.VARARGS_STAR, + '**': format_token.Subtype.KWARGS_STAR_STAR, +} + + +class _SubtypeAssigner(pytree_visitor.PyTreeVisitor): + """_SubtypeAssigner - see file-level docstring for detailed description. + + The subtype is added as an annotation to the pytree token. + """ + + def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name + # dictsetmaker ::= (test ':' test (comp_for | + # (',' test ':' test)* [','])) | + # (test (comp_for | (',' test)* [','])) + for child in node.children: + self.Visit(child) + + comp_for = False + dict_maker = False + + for child in node.children: + if pytree_utils.NodeName(child) == 'comp_for': + comp_for = True + _AppendFirstLeafTokenSubtype(child, + format_token.Subtype.DICT_SET_GENERATOR) + elif pytree_utils.NodeName(child) in ('COLON', 'DOUBLESTAR'): + dict_maker = True + + if not comp_for and dict_maker: + last_was_colon = False + for child in node.children: + if dict_maker: + if pytree_utils.NodeName(child) == 'DOUBLESTAR': + _AppendFirstLeafTokenSubtype(child, + format_token.Subtype.KWARGS_STAR_STAR) + if last_was_colon: + if style.Get('INDENT_DICTIONARY_VALUE'): + _InsertPseudoParentheses(child) + else: + _AppendFirstLeafTokenSubtype( + child, format_token.Subtype.DICTIONARY_VALUE) + elif ( + child is not None and + (isinstance(child, pytree.Node) or + (not child.value.startswith('#') and child.value not in '{:,'))): + # Mark the first leaf of a key entry as a DICTIONARY_KEY. We + # normally want to split before them if the dictionary cannot exist + # on a single line. + _AppendFirstLeafTokenSubtype(child, + format_token.Subtype.DICTIONARY_KEY) + _AppendSubtypeRec(child, format_token.Subtype.DICTIONARY_KEY_PART) + last_was_colon = pytree_utils.NodeName(child) == 'COLON' + + def Visit_expr_stmt(self, node): # pylint: disable=invalid-name + # expr_stmt ::= testlist_star_expr (augassign (yield_expr|testlist) + # | ('=' (yield_expr|testlist_star_expr))*) + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value == '=': + _AppendTokenSubtype(child, format_token.Subtype.ASSIGN_OPERATOR) + + def Visit_or_test(self, node): # pylint: disable=invalid-name + # or_test ::= and_test ('or' and_test)* + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value == 'or': + _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + + def Visit_and_test(self, node): # pylint: disable=invalid-name + # and_test ::= not_test ('and' not_test)* + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value == 'and': + _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + + def Visit_not_test(self, node): # pylint: disable=invalid-name + # not_test ::= 'not' not_test | comparison + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value == 'not': + _AppendTokenSubtype(child, format_token.Subtype.UNARY_OPERATOR) + + def Visit_comparison(self, node): # pylint: disable=invalid-name + # comparison ::= expr (comp_op expr)* + # comp_op ::= '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not in'|'is'|'is not' + for child in node.children: + self.Visit(child) + if (isinstance(child, pytree.Leaf) and + child.value in {'<', '>', '==', '>=', '<=', '<>', '!=', 'in', 'is'}): + _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + elif pytree_utils.NodeName(child) == 'comp_op': + for grandchild in child.children: + _AppendTokenSubtype(grandchild, format_token.Subtype.BINARY_OPERATOR) + + def Visit_star_expr(self, node): # pylint: disable=invalid-name + # star_expr ::= '*' expr + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value == '*': + _AppendTokenSubtype(child, format_token.Subtype.UNARY_OPERATOR) + + def Visit_expr(self, node): # pylint: disable=invalid-name + # expr ::= xor_expr ('|' xor_expr)* + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value == '|': + _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + + def Visit_xor_expr(self, node): # pylint: disable=invalid-name + # xor_expr ::= and_expr ('^' and_expr)* + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value == '^': + _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + + def Visit_and_expr(self, node): # pylint: disable=invalid-name + # and_expr ::= shift_expr ('&' shift_expr)* + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value == '&': + _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + + def Visit_shift_expr(self, node): # pylint: disable=invalid-name + # shift_expr ::= arith_expr (('<<'|'>>') arith_expr)* + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value in {'<<', '>>'}: + _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + + def Visit_arith_expr(self, node): # pylint: disable=invalid-name + # arith_expr ::= term (('+'|'-') term)* + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value in '+-': + _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + + def Visit_term(self, node): # pylint: disable=invalid-name + # term ::= factor (('*'|'/'|'%'|'//') factor)* + for child in node.children: + self.Visit(child) + if (isinstance(child, pytree.Leaf) and + child.value in {'*', '/', '%', '//'}): + _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + + def Visit_factor(self, node): # pylint: disable=invalid-name + # factor ::= ('+'|'-'|'~') factor | power + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value in '+-~': + _AppendTokenSubtype(child, format_token.Subtype.UNARY_OPERATOR) + + def Visit_power(self, node): # pylint: disable=invalid-name + # power ::= atom trailer* ['**' factor] + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value == '**': + _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + + def Visit_trailer(self, node): # pylint: disable=invalid-name + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value in '[]': + _AppendTokenSubtype(child, format_token.Subtype.SUBSCRIPT_BRACKET) + + def Visit_subscript(self, node): # pylint: disable=invalid-name + # subscript ::= test | [test] ':' [test] [sliceop] + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value == ':': + _AppendTokenSubtype(child, format_token.Subtype.SUBSCRIPT_COLON) + + def Visit_sliceop(self, node): # pylint: disable=invalid-name + # sliceop ::= ':' [test] + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value == ':': + _AppendTokenSubtype(child, format_token.Subtype.SUBSCRIPT_COLON) + + def Visit_argument(self, node): # pylint: disable=invalid-name + # argument ::= + # test [comp_for] | test '=' test + self._ProcessArgLists(node) + + def Visit_arglist(self, node): # pylint: disable=invalid-name + # arglist ::= + # (argument ',')* (argument [','] + # | '*' test (',' argument)* [',' '**' test] + # | '**' test) + self._ProcessArgLists(node) + _SetDefaultOrNamedAssignArgListSubtype(node) + + def Visit_tname(self, node): # pylint: disable=invalid-name + self._ProcessArgLists(node) + _SetDefaultOrNamedAssignArgListSubtype(node) + + def Visit_decorator(self, node): # pylint: disable=invalid-name + # decorator ::= + # '@' dotted_name [ '(' [arglist] ')' ] NEWLINE + for child in node.children: + if isinstance(child, pytree.Leaf) and child.value == '@': + _AppendTokenSubtype(child, subtype=format_token.Subtype.DECORATOR) + self.Visit(child) + + def Visit_funcdef(self, node): # pylint: disable=invalid-name + # funcdef ::= + # 'def' NAME parameters ['->' test] ':' suite + for child in node.children: + if pytree_utils.NodeName(child) == 'NAME' and child.value != 'def': + _AppendTokenSubtype(child, format_token.Subtype.FUNC_DEF) + break + for child in node.children: + self.Visit(child) + + def Visit_typedargslist(self, node): # pylint: disable=invalid-name + # typedargslist ::= + # ((tfpdef ['=' test] ',')* + # ('*' [tname] (',' tname ['=' test])* [',' '**' tname] + # | '**' tname) + # | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) + self._ProcessArgLists(node) + _SetDefaultOrNamedAssignArgListSubtype(node) + + def Visit_varargslist(self, node): # pylint: disable=invalid-name + # varargslist ::= + # ((vfpdef ['=' test] ',')* + # ('*' [vname] (',' vname ['=' test])* [',' '**' vname] + # | '**' vname) + # | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) + self._ProcessArgLists(node) + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf) and child.value == '=': + _AppendTokenSubtype(child, format_token.Subtype.VARARGS_LIST) + + def Visit_comp_for(self, node): # pylint: disable=invalid-name + # comp_for ::= 'for' exprlist 'in' testlist_safe [comp_iter] + _AppendSubtypeRec(node, format_token.Subtype.COMP_FOR) + self.DefaultNodeVisit(node) + + def Visit_comp_if(self, node): # pylint: disable=invalid-name + # comp_if ::= 'if' old_test [comp_iter] + _AppendSubtypeRec(node, format_token.Subtype.COMP_IF) + self.DefaultNodeVisit(node) + + def _ProcessArgLists(self, node): + """Common method for processing argument lists.""" + for child in node.children: + self.Visit(child) + if isinstance(child, pytree.Leaf): + _AppendTokenSubtype( + child, + subtype=_ARGLIST_TOKEN_TO_SUBTYPE.get(child.value, + format_token.Subtype.NONE)) + + +def _SetDefaultOrNamedAssignArgListSubtype(node): + """Set named assign subtype on elements in a arg list.""" + + def HasDefaultOrNamedAssignSubtype(node): + """Return True if the arg list has a named assign subtype.""" + if isinstance(node, pytree.Leaf): + if (format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in + pytree_utils.GetNodeAnnotation(node, pytree_utils.Annotation.SUBTYPE, + set())): + return True + return False + has_subtype = False + for child in node.children: + if pytree_utils.NodeName(child) != 'arglist': + has_subtype |= HasDefaultOrNamedAssignSubtype(child) + return has_subtype + + if HasDefaultOrNamedAssignSubtype(node): + for child in node.children: + if pytree_utils.NodeName(child) != 'COMMA': + _AppendFirstLeafTokenSubtype( + child, format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST) + + +def _AppendTokenSubtype(node, subtype): + """Append the token's subtype only if it's not already set.""" + pytree_utils.AppendNodeAnnotation(node, pytree_utils.Annotation.SUBTYPE, + subtype) + + +def _AppendFirstLeafTokenSubtype(node, subtype): + """Append the first leaf token's subtypes.""" + if isinstance(node, pytree.Leaf): + _AppendTokenSubtype(node, subtype) + return + _AppendFirstLeafTokenSubtype(node.children[0], subtype) + + +def _AppendSubtypeRec(node, subtype, force=True): + """Append the leafs in the node to the given subtype.""" + if isinstance(node, pytree.Leaf): + _AppendTokenSubtype(node, subtype) + return + for child in node.children: + _AppendSubtypeRec(child, subtype, force=force) + + +def _InsertPseudoParentheses(node): + """Insert pseudo parentheses so that dicts can be formatted correctly.""" + comment_node = None + if isinstance(node, pytree.Node): + if node.children[-1].type == token.COMMENT: + comment_node = node.children[-1].clone() + node.children[-1].remove() + + first = _GetFirstLeafNode(node) + last = _GetLastLeafNode(node) + + if first == last and first.type == token.COMMENT: + # A comment was inserted before the value, which is a pytree.Leaf. + # Encompass the dictionary's value into an ATOM node. + last = first.next_sibling + new_node = pytree.Node(syms.atom, [first.clone(), last.clone()]) + node.replace(new_node) + node = new_node + last.remove() + + first = _GetFirstLeafNode(node) + last = _GetLastLeafNode(node) + + lparen = pytree.Leaf( + token.LPAR, u'(', context=('', (first.get_lineno(), first.column - 1))) + last_lineno = last.get_lineno() + if last.type == token.STRING and '\n' in last.value: + last_lineno += last.value.count('\n') + + if last.type == token.STRING and '\n' in last.value: + last_column = len(last.value.split('\n')[-1]) + 1 + else: + last_column = last.column + len(last.value) + 1 + rparen = pytree.Leaf( + token.RPAR, u')', context=('', (last_lineno, last_column))) + + lparen.is_pseudo = True + rparen.is_pseudo = True + + if isinstance(node, pytree.Node): + node.insert_child(0, lparen) + node.append_child(rparen) + if comment_node: + node.append_child(comment_node) + _AppendFirstLeafTokenSubtype(node, format_token.Subtype.DICTIONARY_VALUE) + else: + clone = node.clone() + new_node = pytree.Node(syms.atom, [lparen, clone, rparen]) + node.replace(new_node) + _AppendFirstLeafTokenSubtype(clone, format_token.Subtype.DICTIONARY_VALUE) + + +def _GetFirstLeafNode(node): + if isinstance(node, pytree.Leaf): + return node + return _GetFirstLeafNode(node.children[0]) + + +def _GetLastLeafNode(node): + if isinstance(node, pytree.Leaf): + return node + return _GetLastLeafNode(node.children[-1]) diff --git a/tools/yapf/yapf/yapflib/unwrapped_line.py b/tools/yapf/yapf/yapflib/unwrapped_line.py new file mode 100644 index 000000000..dc782a91b --- /dev/null +++ b/tools/yapf/yapf/yapflib/unwrapped_line.py @@ -0,0 +1,497 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""UnwrappedLine primitive for formatting. + +An unwrapped line is the containing data structure produced by the parser. It +collects all nodes (stored in FormatToken objects) that could appear on a +single line if there were no line length restrictions. It's then used by the +parser to perform the wrapping required to comply with the style guide. +""" + +from yapf.yapflib import format_token +from yapf.yapflib import py3compat +from yapf.yapflib import pytree_utils +from yapf.yapflib import split_penalty +from yapf.yapflib import style + + +class UnwrappedLine(object): + """Represents a single unwrapped line in the output. + + Attributes: + depth: indentation depth of this line. This is just a numeric value used to + distinguish lines that are more deeply nested than others. It is not the + actual amount of spaces, which is style-dependent. + """ + + def __init__(self, depth, tokens=None): + """Constructor. + + Creates a new unwrapped line with the given depth an initial list of tokens. + Constructs the doubly-linked lists for format tokens using their built-in + next_token and previous_token attributes. + + Arguments: + depth: indentation depth of this line + tokens: initial list of tokens + """ + self.depth = depth + self._tokens = tokens or [] + self.disable = False + + if self._tokens: + # Set up a doubly linked list. + for index, tok in enumerate(self._tokens[1:]): + # Note, 'index' is the index to the previous token. + tok.previous_token = self._tokens[index] + self._tokens[index].next_token = tok + + def CalculateFormattingInformation(self): + """Calculate the split penalty and total length for the tokens.""" + # Say that the first token in the line should have a space before it. This + # means only that if this unwrapped line is joined with a predecessor line, + # then there will be a space between them. + self.first.spaces_required_before = 1 + self.first.total_length = len(self.first.value) + + prev_token = self.first + prev_length = self.first.total_length + for token in self._tokens[1:]: + if (token.spaces_required_before == 0 and + _SpaceRequiredBetween(prev_token, token)): + token.spaces_required_before = 1 + + tok_len = len(token.value) if not token.is_pseudo_paren else 0 + token.total_length = prev_length + tok_len + token.spaces_required_before + + # The split penalty has to be computed before {must|can}_break_before, + # because these may use it for their decision. + token.split_penalty += _SplitPenalty(prev_token, token) + token.must_break_before = _MustBreakBefore(prev_token, token) + token.can_break_before = (token.must_break_before or + _CanBreakBefore(prev_token, token)) + + prev_length = token.total_length + prev_token = token + + ############################################################################ + # Token Access and Manipulation Methods # + ############################################################################ + + def AppendToken(self, token): + """Append a new FormatToken to the tokens contained in this line.""" + if self._tokens: + token.previous_token = self.last + self.last.next_token = token + self._tokens.append(token) + + def AppendNode(self, node): + """Convenience method to append a pytree node directly. + + Wraps the node with a FormatToken. + + Arguments: + node: the node to append + """ + self.AppendToken(format_token.FormatToken(node)) + + @property + def first(self): + """Returns the first non-whitespace token.""" + return self._tokens[0] + + @property + def last(self): + """Returns the last non-whitespace token.""" + return self._tokens[-1] + + ############################################################################ + # Token -> String Methods # + ############################################################################ + + def AsCode(self, indent_per_depth=2): + """Return a "code" representation of this line. + + The code representation shows how the line would be printed out as code. + + TODO(eliben): for now this is rudimentary for debugging - once we add + formatting capabilities, this method will have other uses (not all tokens + have spaces around them, for example). + + Arguments: + indent_per_depth: how much spaces to indend per depth level. + + Returns: + A string representing the line as code. + """ + indent = ' ' * indent_per_depth * self.depth + tokens_str = ' '.join(tok.value for tok in self._tokens) + return indent + tokens_str + + def __str__(self): # pragma: no cover + return self.AsCode() + + def __repr__(self): # pragma: no cover + tokens_repr = ','.join( + ['{0}({1!r})'.format(tok.name, tok.value) for tok in self._tokens]) + return 'UnwrappedLine(depth={0}, tokens=[{1}])'.format( + self.depth, tokens_repr) + + ############################################################################ + # Properties # + ############################################################################ + + @property + def tokens(self): + """Access the tokens contained within this line. + + The caller must not modify the tokens list returned by this method. + + Returns: + List of tokens in this line. + """ + return self._tokens + + @property + def lineno(self): + """Return the line number of this unwrapped line. + + Returns: + The line number of the first token in this unwrapped line. + """ + return self.first.lineno + + @property + def is_comment(self): + return self.first.is_comment + + +def _IsIdNumberStringToken(tok): + return tok.is_keyword or tok.is_name or tok.is_number or tok.is_string + + +def _IsUnaryOperator(tok): + return format_token.Subtype.UNARY_OPERATOR in tok.subtypes + + +def _SpaceRequiredBetween(left, right): + """Return True if a space is required between the left and right token.""" + lval = left.value + rval = right.value + if (left.is_pseudo_paren and _IsIdNumberStringToken(right) and + left.previous_token and _IsIdNumberStringToken(left.previous_token)): + # Space between keyword... tokens and pseudo parens. + return True + if left.is_pseudo_paren or right.is_pseudo_paren: + # There should be a space after the ':' in a dictionary. + if left.OpensScope(): + return True + # The closing pseudo-paren shouldn't affect spacing. + return False + if left.is_continuation or right.is_continuation: + # The continuation node's value has all of the spaces it needs. + return False + if right.name in pytree_utils.NONSEMANTIC_TOKENS: + # No space before a non-semantic token. + return False + if _IsIdNumberStringToken(left) and _IsIdNumberStringToken(right): + # Spaces between keyword, string, number, and identifier tokens. + return True + if lval == ',' and rval == ':': + # We do want a space between a comma and colon. + return True + if rval in ':,': + # Otherwise, we never want a space before a colon or comma. + return False + if lval == ',' and rval in ']})': + # Add a space between ending ',' and closing bracket if requested. + return style.Get('SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET') + if lval == ',': + # We want a space after a comma. + return True + if lval == 'from' and rval == '.': + # Space before the '.' in an import statement. + return True + if lval == '.' and rval == 'import': + # Space after the '.' in an import statement. + return True + if lval == '=' and rval == '.': + # Space between equal and '.' as in "X = ...". + return True + if ((right.is_keyword or right.is_name) and + (left.is_keyword or left.is_name)): + # Don't merge two keywords/identifiers. + return True + if left.is_string: + if (rval == '=' and format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in + right.subtypes): + # If there is a type hint, then we don't want to add a space between the + # equal sign and the hint. + return False + if rval not in '[)]}.': + # A string followed by something other than a subscript, closing bracket, + # or dot should have a space after it. + return True + if left.is_binary_op and lval != '**' and _IsUnaryOperator(right): + # Space between the binary opertor and the unary operator. + return True + if _IsUnaryOperator(left) and _IsUnaryOperator(right): + # No space between two unary operators. + return False + if left.is_binary_op or right.is_binary_op: + if lval == '**' or rval == '**': + # Space around the "power" operator. + return style.Get('SPACES_AROUND_POWER_OPERATOR') + # Enforce spaces around binary operators. + return True + if (_IsUnaryOperator(left) and lval != 'not' and + (right.is_name or right.is_number or rval == '(')): + # The previous token was a unary op. No space is desired between it and + # the current token. + return False + if (format_token.Subtype.SUBSCRIPT_COLON in left.subtypes or + format_token.Subtype.SUBSCRIPT_COLON in right.subtypes): + # A subscript shouldn't have spaces separating its colons. + return False + if (format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in left.subtypes or + format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in right.subtypes): + # A named argument or default parameter shouldn't have spaces around it. + # However, a typed argument should have a space after the colon. + return lval == ':' or style.Get('SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN') + if (format_token.Subtype.VARARGS_LIST in left.subtypes or + format_token.Subtype.VARARGS_LIST in right.subtypes): + return False + if (format_token.Subtype.VARARGS_STAR in left.subtypes or + format_token.Subtype.KWARGS_STAR_STAR in left.subtypes): + # Don't add a space after a vararg's star or a keyword's star-star. + return False + if lval == '@' and format_token.Subtype.DECORATOR in left.subtypes: + # Decorators shouldn't be separated from the 'at' sign. + return False + if lval == '.' or rval == '.': + # Don't place spaces between dots. + return False + if ((lval == '(' and rval == ')') or (lval == '[' and rval == ']') or + (lval == '{' and rval == '}')): + # Empty objects shouldn't be separted by spaces. + return False + if (lval in pytree_utils.OPENING_BRACKETS and + rval in pytree_utils.OPENING_BRACKETS): + # Nested objects' opening brackets shouldn't be separated. + return False + if (lval in pytree_utils.CLOSING_BRACKETS and + rval in pytree_utils.CLOSING_BRACKETS): + # Nested objects' closing brackets shouldn't be separated. + return False + if lval in pytree_utils.CLOSING_BRACKETS and rval in '([': + # A call, set, dictionary, or subscript that has a call or subscript after + # it shouldn't have a space between them. + return False + if lval in pytree_utils.OPENING_BRACKETS and _IsIdNumberStringToken(right): + # Don't separate the opening bracket from the first item. + return False + if left.is_name and rval in '([': + # Don't separate a call or array access from the name. + return False + if rval in pytree_utils.CLOSING_BRACKETS: + # Don't separate the closing bracket from the last item. + # FIXME(morbo): This might be too permissive. + return False + if lval == 'print' and rval == '(': + # Special support for the 'print' function. + return False + if lval in pytree_utils.OPENING_BRACKETS and _IsUnaryOperator(right): + # Don't separate a unary operator from the opening bracket. + return False + if (lval in pytree_utils.OPENING_BRACKETS and + (format_token.Subtype.VARARGS_STAR in right.subtypes or + format_token.Subtype.KWARGS_STAR_STAR in right.subtypes)): + # Don't separate a '*' or '**' from the opening bracket. + return False + if rval == ';': + # Avoid spaces before a semicolon. (Why is there a semicolon?!) + return False + if lval == '(' and rval == 'await': + # Special support for the 'await' keyword. Don't separate the 'await' + # keyword from an opening paren. + return False + return True + + +def _MustBreakBefore(prev_token, cur_token): + """Return True if a line break is required before the current token.""" + if prev_token.is_comment: + # Must break if the previous token was a comment. + return True + if (cur_token.is_string and prev_token.is_string and + IsSurroundedByBrackets(cur_token)): + # We want consecutive strings to be on separate lines. This is a + # reasonable assumption, because otherwise they should have written them + # all on the same line, or with a '+'. + return True + return pytree_utils.GetNodeAnnotation( + cur_token.node, pytree_utils.Annotation.MUST_SPLIT, default=False) + + +def _CanBreakBefore(prev_token, cur_token): + """Return True if a line break may occur before the current token.""" + pval = prev_token.value + cval = cur_token.value + if py3compat.PY3: + if pval == 'yield' and cval == 'from': + # Don't break before a yield argument. + return False + if pval in {'async', 'await'} and cval in {'def', 'with', 'for'}: + # Don't break after sync keywords. + return False + if cur_token.split_penalty >= split_penalty.UNBREAKABLE: + return False + if pval == '@': + # Don't break right after the beginning of a decorator. + return False + if cval == ':': + # Don't break before the start of a block of code. + return False + if cval == ',': + # Don't break before a comma. + return False + if prev_token.is_name and cval == '(': + # Don't break in the middle of a function definition or call. + return False + if prev_token.is_name and cval == '[': + # Don't break in the middle of an array dereference. + return False + if prev_token.is_name and cval == '.': + # Don't break before the '.' in a dotted name. + return False + if cur_token.is_comment and prev_token.lineno == cur_token.lineno: + # Don't break a comment at the end of the line. + return False + if format_token.Subtype.UNARY_OPERATOR in prev_token.subtypes: + # Don't break after a unary token. + return False + return True + + +def IsSurroundedByBrackets(tok): + """Return True if the token is surrounded by brackets.""" + paren_count = 0 + brace_count = 0 + sq_bracket_count = 0 + previous_token = tok.previous_token + while previous_token: + if previous_token.value == ')': + paren_count -= 1 + elif previous_token.value == '}': + brace_count -= 1 + elif previous_token.value == ']': + sq_bracket_count -= 1 + + if previous_token.value == '(': + if paren_count == 0: + return previous_token + paren_count += 1 + elif previous_token.value == '{': + if brace_count == 0: + return previous_token + brace_count += 1 + elif previous_token.value == '[': + if sq_bracket_count == 0: + return previous_token + sq_bracket_count += 1 + + previous_token = previous_token.previous_token + return None + + +_LOGICAL_OPERATORS = frozenset({'and', 'or'}) +_BITWISE_OPERATORS = frozenset({'&', '|', '^'}) +_TERM_OPERATORS = frozenset({'*', '/', '%', '//'}) + + +def _SplitPenalty(prev_token, cur_token): + """Return the penalty for breaking the line before the current token.""" + pval = prev_token.value + cval = cur_token.value + if pval == 'not': + return split_penalty.UNBREAKABLE + + if cur_token.node_split_penalty > 0: + return cur_token.node_split_penalty + + if style.Get('SPLIT_BEFORE_LOGICAL_OPERATOR'): + # Prefer to split before 'and' and 'or'. + if pval in _LOGICAL_OPERATORS: + return style.Get('SPLIT_PENALTY_LOGICAL_OPERATOR') + if cval in _LOGICAL_OPERATORS: + return 0 + else: + # Prefer to split after 'and' and 'or'. + if pval in _LOGICAL_OPERATORS: + return 0 + if cval in _LOGICAL_OPERATORS: + return style.Get('SPLIT_PENALTY_LOGICAL_OPERATOR') + + if style.Get('SPLIT_BEFORE_BITWISE_OPERATOR'): + # Prefer to split before '&', '|', and '^'. + if pval in _BITWISE_OPERATORS: + return style.Get('SPLIT_PENALTY_BITWISE_OPERATOR') + if cval in _BITWISE_OPERATORS: + return 0 + else: + # Prefer to split after '&', '|', and '^'. + if pval in _BITWISE_OPERATORS: + return 0 + if cval in _BITWISE_OPERATORS: + return style.Get('SPLIT_PENALTY_BITWISE_OPERATOR') + + if (format_token.Subtype.COMP_FOR in cur_token.subtypes or + format_token.Subtype.COMP_IF in cur_token.subtypes): + # We don't mind breaking before the 'for' or 'if' of a list comprehension. + return 0 + if format_token.Subtype.UNARY_OPERATOR in prev_token.subtypes: + # Try not to break after a unary operator. + return style.Get('SPLIT_PENALTY_AFTER_UNARY_OPERATOR') + if pval == ',': + # Breaking after a comma is fine, if need be. + return 0 + if prev_token.is_binary_op: + # We would rather not split after an equality operator. + return 20 + if (format_token.Subtype.VARARGS_STAR in prev_token.subtypes or + format_token.Subtype.KWARGS_STAR_STAR in prev_token.subtypes): + # Don't split after a varargs * or kwargs **. + return split_penalty.UNBREAKABLE + if prev_token.OpensScope() and cval != '(': + # Slightly prefer + return style.Get('SPLIT_PENALTY_AFTER_OPENING_BRACKET') + if cval == ':': + # Don't split before a colon. + return split_penalty.UNBREAKABLE + if cval == '=': + # Don't split before an assignment. + return split_penalty.UNBREAKABLE + if (format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in prev_token.subtypes or + format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in cur_token.subtypes): + # Don't break before or after an default or named assignment. + return split_penalty.UNBREAKABLE + if cval == '==': + # We would rather not split before an equality operator. + return split_penalty.STRONGLY_CONNECTED + if cur_token.ClosesScope(): + # Give a slight penalty for splitting before the closing scope. + return 100 + if pval in _TERM_OPERATORS or cval in _TERM_OPERATORS: + return 50 + return 0 diff --git a/tools/yapf/yapf/yapflib/verifier.py b/tools/yapf/yapf/yapflib/verifier.py new file mode 100644 index 000000000..b16aefb88 --- /dev/null +++ b/tools/yapf/yapf/yapflib/verifier.py @@ -0,0 +1,93 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Verify that the generated code is valid code. + +This takes a line of code and "normalizes" it. I.e., it transforms the snippet +into something that has the potential to compile. + + VerifyCode(): the main function exported by this module. +""" + +import ast +import re +import sys +import textwrap + + +class InternalError(Exception): + """Internal error in verifying formatted code.""" + pass + + +def VerifyCode(code): + """Verify that the reformatted code is syntactically correct. + + Arguments: + code: (unicode) The reformatted code snippet. + + Raises: + SyntaxError if the code was reformatted incorrectly. + """ + try: + compile(textwrap.dedent(code).encode('UTF-8'), '', 'exec') + except SyntaxError: + try: + ast.parse(textwrap.dedent(code.lstrip('\n')).lstrip(), '', 'exec') + except SyntaxError: + try: + normalized_code = _NormalizeCode(code) + compile(normalized_code.encode('UTF-8'), '', 'exec') + except SyntaxError: + raise InternalError(sys.exc_info()[1]) + + +def _NormalizeCode(code): + """Make sure that the code snippet is compilable.""" + code = textwrap.dedent(code.lstrip('\n')).lstrip() + + # Split the code to lines and get rid of all leading full-comment lines as + # they can mess up the normalization attempt. + lines = code.split('\n') + i = 0 + for i, line in enumerate(lines): + line = line.strip() + if line and not line.startswith('#'): + break + code = '\n'.join(lines[i:]) + '\n' + + if re.match(r'(if|while|for|with|def|class|async|await)\b', code): + code += '\n pass' + elif re.match(r'(elif|else)\b', code): + try: + try_code = 'if True:\n pass\n' + code + '\n pass' + ast.parse( + textwrap.dedent(try_code.lstrip('\n')).lstrip(), '', 'exec') + code = try_code + except SyntaxError: + # The assumption here is that the code is on a single line. + code = 'if True: pass\n' + code + elif code.startswith('@'): + code += '\ndef _():\n pass' + elif re.match(r'try\b', code): + code += '\n pass\nexcept:\n pass' + elif re.match(r'(except|finally)\b', code): + code = 'try:\n pass\n' + code + '\n pass' + elif re.match(r'(return|yield)\b', code): + code = 'def _():\n ' + code + elif re.match(r'(continue|break)\b', code): + code = 'while True:\n ' + code + elif re.match(r'print\b', code): + code = 'from __future__ import print_function\n' + code + + return code + '\n' diff --git a/tools/yapf/yapf/yapflib/yapf_api.py b/tools/yapf/yapf/yapflib/yapf_api.py new file mode 100644 index 000000000..dd91849ea --- /dev/null +++ b/tools/yapf/yapf/yapflib/yapf_api.py @@ -0,0 +1,295 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Entry points for YAPF. + +The main APIs that YAPF exposes to drive the reformatting. + + FormatFile(): reformat a file. + FormatCode(): reformat a string of code. + +These APIs have some common arguments: + + style_config: (string) Either a style name or a path to a file that contains + formatting style settings. If None is specified, use the default style + as set in style.DEFAULT_STYLE_FACTORY + lines: (list of tuples of integers) A list of tuples of lines, [start, end], + that we want to format. The lines are 1-based indexed. It can be used by + third-party code (e.g., IDEs) when reformatting a snippet of code rather + than a whole file. + print_diff: (bool) Instead of returning the reformatted source, return a + diff that turns the formatted source into reformatter source. + verify: (bool) True if reformatted code should be verified for syntax. +""" + +import difflib +import re +import sys + +from lib2to3.pgen2 import tokenize + +from yapf.yapflib import blank_line_calculator +from yapf.yapflib import comment_splicer +from yapf.yapflib import continuation_splicer +from yapf.yapflib import file_resources +from yapf.yapflib import py3compat +from yapf.yapflib import pytree_unwrapper +from yapf.yapflib import pytree_utils +from yapf.yapflib import reformatter +from yapf.yapflib import split_penalty +from yapf.yapflib import style +from yapf.yapflib import subtype_assigner + + +def FormatFile(filename, + style_config=None, + lines=None, + print_diff=False, + verify=False, + in_place=False, + logger=None): + """Format a single Python file and return the formatted code. + + Arguments: + filename: (unicode) The file to reformat. + in_place: (bool) If True, write the reformatted code back to the file. + logger: (io streamer) A stream to output logging. + remaining arguments: see comment at the top of this module. + + Returns: + Tuple of (reformatted_code, encoding, changed). reformatted_code is None if + the file is sucessfully written to (having used in_place). reformatted_code + is a diff if print_diff is True. + + Raises: + IOError: raised if there was an error reading the file. + ValueError: raised if in_place and print_diff are both specified. + """ + _CheckPythonVersion() + + if in_place and print_diff: + raise ValueError('Cannot pass both in_place and print_diff.') + + original_source, newline, encoding = ReadFile(filename, logger) + reformatted_source, changed = FormatCode( + original_source, + style_config=style_config, + filename=filename, + lines=lines, + print_diff=print_diff, + verify=verify) + if reformatted_source.rstrip('\n'): + lines = reformatted_source.rstrip('\n').split('\n') + reformatted_source = newline.join(line for line in lines) + newline + if in_place: + if original_source and original_source != reformatted_source: + file_resources.WriteReformattedCode(filename, reformatted_source, + in_place, encoding) + return None, encoding, changed + + return reformatted_source, encoding, changed + + +def FormatCode(unformatted_source, + filename='', + style_config=None, + lines=None, + print_diff=False, + verify=False): + """Format a string of Python code. + + This provides an alternative entry point to YAPF. + + Arguments: + unformatted_source: (unicode) The code to format. + filename: (unicode) The name of the file being reformatted. + remaining arguments: see comment at the top of this module. + + Returns: + Tuple of (reformatted_source, changed). reformatted_source conforms to the + desired formatting style. changed is True if the source changed. + """ + _CheckPythonVersion() + style.SetGlobalStyle(style.CreateStyleFromConfig(style_config)) + if not unformatted_source.endswith('\n'): + unformatted_source += '\n' + tree = pytree_utils.ParseCodeToTree(unformatted_source) + + # Run passes on the tree, modifying it in place. + comment_splicer.SpliceComments(tree) + continuation_splicer.SpliceContinuations(tree) + subtype_assigner.AssignSubtypes(tree) + split_penalty.ComputeSplitPenalties(tree) + blank_line_calculator.CalculateBlankLines(tree) + + uwlines = pytree_unwrapper.UnwrapPyTree(tree) + for uwl in uwlines: + uwl.CalculateFormattingInformation() + + _MarkLinesToFormat(uwlines, lines) + reformatted_source = reformatter.Reformat(uwlines, verify) + + if unformatted_source == reformatted_source: + return '' if print_diff else reformatted_source, False + + code_diff = _GetUnifiedDiff( + unformatted_source, reformatted_source, filename=filename) + + if print_diff: + return code_diff, code_diff != '' + + return reformatted_source, True + + +def _CheckPythonVersion(): # pragma: no cover + errmsg = 'yapf is only supported for Python 2.7 or 3.4+' + if sys.version_info[0] == 2: + if sys.version_info[1] < 7: + raise RuntimeError(errmsg) + elif sys.version_info[0] == 3: + if sys.version_info[1] < 4: + raise RuntimeError(errmsg) + + +def ReadFile(filename, logger=None): + """Read the contents of the file. + + An optional logger can be specified to emit messages to your favorite logging + stream. If specified, then no exception is raised. This is external so that it + can be used by third-party applications. + + Arguments: + filename: (unicode) The name of the file. + logger: (function) A function or lambda that takes a string and emits it. + + Returns: + The contents of filename. + + Raises: + IOError: raised if there was an error reading the file. + """ + try: + with open(filename, 'rb') as fd: + encoding = tokenize.detect_encoding(fd.readline)[0] + except IOError as err: + if logger: + logger(err) + raise + + try: + # Preserves line endings. + with py3compat.open_with_encoding( + filename, mode='r', encoding=encoding, newline='') as fd: + lines = fd.readlines() + + line_ending = file_resources.LineEnding(lines) + source = '\n'.join(line.rstrip('\r\n') for line in lines) + '\n' + return source, line_ending, encoding + except IOError as err: # pragma: no cover + if logger: + logger(err) + raise + + +DISABLE_PATTERN = r'^#.*\byapf:\s*disable\b' +ENABLE_PATTERN = r'^#.*\byapf:\s*enable\b' + + +def _MarkLinesToFormat(uwlines, lines): + """Skip sections of code that we shouldn't reformat.""" + if lines: + for uwline in uwlines: + uwline.disable = True + + # Sort and combine overlapping ranges. + lines = sorted(lines) + line_ranges = [lines[0]] if len(lines[0]) else [] + index = 1 + while index < len(lines): + current = line_ranges[-1] + if lines[index][0] <= current[1]: + # The ranges overlap, so combine them. + line_ranges[-1] = (current[0], max(lines[index][1], current[1])) + else: + line_ranges.append(lines[index]) + index += 1 + + # Mark lines to format as not being disabled. + index = 0 + for start, end in sorted(line_ranges): + while index < len(uwlines) and uwlines[index].last.lineno < start: + index += 1 + if index >= len(uwlines): + break + + while index < len(uwlines): + if uwlines[index].lineno > end: + break + if (uwlines[index].lineno >= start or + uwlines[index].last.lineno >= start): + uwlines[index].disable = False + index += 1 + + # Now go through the lines and disable any lines explicitly marked as + # disabled. + index = 0 + while index < len(uwlines): + uwline = uwlines[index] + if uwline.is_comment: + if _DisableYAPF(uwline.first.value.strip()): + index += 1 + while index < len(uwlines): + uwline = uwlines[index] + if uwline.is_comment and _EnableYAPF(uwline.first.value.strip()): + break + uwline.disable = True + index += 1 + elif re.search(DISABLE_PATTERN, uwline.last.value.strip(), re.IGNORECASE): + uwline.disable = True + index += 1 + + +def _DisableYAPF(line): + return ( + re.search(DISABLE_PATTERN, line.split('\n')[0].strip(), re.IGNORECASE) or + re.search(DISABLE_PATTERN, line.split('\n')[-1].strip(), re.IGNORECASE)) + + +def _EnableYAPF(line): + return ( + re.search(ENABLE_PATTERN, line.split('\n')[0].strip(), re.IGNORECASE) or + re.search(ENABLE_PATTERN, line.split('\n')[-1].strip(), re.IGNORECASE)) + + +def _GetUnifiedDiff(before, after, filename='code'): + """Get a unified diff of the changes. + + Arguments: + before: (unicode) The original source code. + after: (unicode) The reformatted source code. + filename: (unicode) The code's filename. + + Returns: + The unified diff text. + """ + before = before.splitlines() + after = after.splitlines() + return '\n'.join( + difflib.unified_diff( + before, + after, + filename, + filename, + '(original)', + '(reformatted)', + lineterm='')) + '\n' diff --git a/tools/yapf_util.py b/tools/yapf_util.py new file mode 100644 index 000000000..9618a3fc6 --- /dev/null +++ b/tools/yapf_util.py @@ -0,0 +1,26 @@ +# Copyright (c) 2017 The Chromium Embedded Framework Authors. All rights +# reserved. Use of this source code is governed by a BSD-style license that +# can be found in the LICENSE file + +from exec_util import exec_cmd +import os +import sys + +# Script directory. +script_dir = os.path.dirname(__file__) +root_dir = os.path.join(script_dir, os.pardir) + + +def yapf_format(file_name, file_contents): + # Reads .style.yapf in the root_dir when specifying contents via stdin. + result = exec_cmd("%s %s/yapf" % (sys.executable, script_dir), root_dir, + file_contents) + if result['err'] != '': + print "yapf error: %s" % result['err'] + if result['out'] != '': + output = result['out'] + if sys.platform == 'win32': + # Convert to Unix line endings. + output = output.replace("\r", "") + return output + return None