Update tooling to use yapf for Python file formatting (issue #2171)

This commit is contained in:
Marshall Greenblatt 2017-05-28 15:03:42 +02:00
parent d4f06e3806
commit 59606b88d2
28 changed files with 6483 additions and 6 deletions

9
.style.cfg Normal file
View File

@ -0,0 +1,9 @@
# Copyright (c) 2017 The Chromium Embedded Framework Authors. All rights
# reserved. Use of this source code is governed by a BSD-style license that
# can be found in the LICENSE file
# Configuration settings for tools/fix_style.py
{
# Directories containing these path components will be ignored.
'ignore_directories': ['yapf'],
}

2
.style.yapf Normal file
View File

@ -0,0 +1,2 @@
[style]
based_on_style = chromium

View File

@ -5,13 +5,22 @@
import os, re, sys
from clang_util import clang_format
from file_util import *
from file_util import eval_file, get_files, read_file, write_file
from git_util import get_changed_files
from yapf_util import yapf_format
# Valid extensions for files we want to clang-format.
DEFAULT_LINT_WHITELIST_REGEX = r"(.*\.cpp|.*\.cc|.*\.h|.*\.mm)$"
# File extensions that can be formatted.
DEFAULT_LINT_WHITELIST_REGEX = r"(.*\.cpp|.*\.cc|.*\.h|.*\.java|.*\.mm|.*\.py)$"
DEFAULT_LINT_BLACKLIST_REGEX = r"$^"
# Directories containing these path components will be ignored.
IGNORE_DIRECTORIES = []
# Script directory.
script_dir = os.path.dirname(__file__)
root_dir = os.path.join(script_dir, os.pardir)
def msg(filename, status):
if sys.platform == 'win32':
# Use Unix path separator.
@ -27,14 +36,32 @@ def msg(filename, status):
print "%-60s %s" % (filename, status)
updatect = 0
def read_config():
style_cfg = os.path.join(root_dir, ".style.cfg")
if os.path.exists(style_cfg):
config = eval_file(style_cfg)
if 'ignore_directories' in config:
global IGNORE_DIRECTORIES
IGNORE_DIRECTORIES = config['ignore_directories']
def update_file(filename):
oldcontents = read_file(filename)
if len(oldcontents) == 0:
msg(filename, "empty")
return;
return
if os.path.splitext(filename)[1] == ".py":
# Format Python files using YAPF.
newcontents = yapf_format(filename, oldcontents)
else:
# Format C/C++/ObjC/Java files using clang-format.
newcontents = clang_format(filename, oldcontents)
newcontents = clang_format(filename, oldcontents)
if newcontents is None:
raise Exception("Failed to process %s" % filename)
@ -47,7 +74,8 @@ def update_file(filename):
msg(filename, "ok")
return
def fix_style(filenames, white_list = None, black_list = None):
def fix_style(filenames, white_list=None, black_list=None):
""" Execute clang-format with the specified arguments. """
if not white_list:
white_list = DEFAULT_LINT_WHITELIST_REGEX
@ -57,6 +85,16 @@ def fix_style(filenames, white_list = None, black_list = None):
black_regex = re.compile(black_list)
for filename in filenames:
# Ignore files from specific directories.
ignore = False
for dir_part in filename.split(os.sep):
if dir_part in IGNORE_DIRECTORIES:
msg(filename, "ignored")
ignore = True
break
if ignore:
continue
if filename.find('*') > 0:
# Expand wildcards.
filenames.extend(get_files(filename))
@ -83,6 +121,7 @@ def fix_style(filenames, white_list = None, black_list = None):
else:
msg(filename, "skipped")
if __name__ == "__main__":
if len(sys.argv) == 1:
print "Usage: %s [file-path|git-hash|unstaged|staged] ..." % sys.argv[0]
@ -96,6 +135,9 @@ if __name__ == "__main__":
print " staged\t\tProcess all staged files in the Git repo."
sys.exit(1)
# Read the configuration file.
read_config()
# Process anything passed on the command-line.
fix_style(sys.argv[1:])
print 'Done - Wrote %d files.' % updatect

202
tools/yapf/LICENSE Normal file
View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

14
tools/yapf/README.cef Normal file
View File

@ -0,0 +1,14 @@
Name: yapf
Short Name: yapf
URL: https://github.com/google/yapf
Date: 28 May 2017
Version: 0.16.2
Revision: 9f168a12
License: Apache 2.0
License File: LICENSE
Description:
A formatter for Python files.
Local Modifications:
None

16
tools/yapf/__main__.py Normal file
View File

@ -0,0 +1,16 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import yapf
yapf.run_main()

303
tools/yapf/yapf/__init__.py Normal file
View File

@ -0,0 +1,303 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""YAPF.
YAPF uses the algorithm in clang-format to figure out the "best" formatting for
Python code. It looks at the program as a series of "unwrappable lines" ---
i.e., lines which, if there were no column limit, we would place all tokens on
that line. It then uses a priority queue to figure out what the best formatting
is --- i.e., the formatting with the least penalty.
It differs from tools like autopep8 and pep8ify in that it doesn't just look for
violations of the style guide, but looks at the module as a whole, making
formatting decisions based on what's the best format for each line.
If no filenames are specified, YAPF reads the code from stdin.
"""
from __future__ import print_function
import argparse
import logging
import os
import sys
from yapf.yapflib import errors
from yapf.yapflib import file_resources
from yapf.yapflib import py3compat
from yapf.yapflib import style
from yapf.yapflib import yapf_api
__version__ = '0.16.2'
def main(argv):
"""Main program.
Arguments:
argv: command-line arguments, such as sys.argv (including the program name
in argv[0]).
Returns:
0 if there were no changes, non-zero otherwise.
Raises:
YapfError: if none of the supplied files were Python files.
"""
parser = argparse.ArgumentParser(description='Formatter for Python code.')
parser.add_argument(
'-v',
'--version',
action='store_true',
help='show version number and exit')
diff_inplace_group = parser.add_mutually_exclusive_group()
diff_inplace_group.add_argument(
'-d',
'--diff',
action='store_true',
help='print the diff for the fixed source')
diff_inplace_group.add_argument(
'-i',
'--in-place',
action='store_true',
help='make changes to files in place')
lines_recursive_group = parser.add_mutually_exclusive_group()
lines_recursive_group.add_argument(
'-r',
'--recursive',
action='store_true',
help='run recursively over directories')
lines_recursive_group.add_argument(
'-l',
'--lines',
metavar='START-END',
action='append',
default=None,
help='range of lines to reformat, one-based')
parser.add_argument(
'-e',
'--exclude',
metavar='PATTERN',
action='append',
default=None,
help='patterns for files to exclude from formatting')
parser.add_argument(
'--style',
action='store',
help=('specify formatting style: either a style name (for example "pep8" '
'or "google"), or the name of a file with style settings. The '
'default is pep8 unless a %s or %s file located in one of the '
'parent directories of the source file (or current directory for '
'stdin)' % (style.LOCAL_STYLE, style.SETUP_CONFIG)))
parser.add_argument(
'--style-help',
action='store_true',
help=('show style settings and exit; this output can be '
'saved to .style.yapf to make your settings '
'permanent'))
parser.add_argument(
'--no-local-style',
action='store_true',
help="don't search for local style definition")
parser.add_argument('--verify', action='store_true', help=argparse.SUPPRESS)
parser.add_argument(
'-p',
'--parallel',
action='store_true',
help=('Run yapf in parallel when formatting multiple files. Requires '
'concurrent.futures in Python 2.X'))
parser.add_argument('files', nargs='*')
args = parser.parse_args(argv[1:])
if args.version:
print('yapf {}'.format(__version__))
return 0
if args.style_help:
style.SetGlobalStyle(style.CreateStyleFromConfig(args.style))
print('[style]')
for option, docstring in sorted(style.Help().items()):
for line in docstring.splitlines():
print('#', line and ' ' or '', line, sep='')
print(option.lower(), '=', style.Get(option), sep='')
print()
return 0
if args.lines and len(args.files) > 1:
parser.error('cannot use -l/--lines with more than one file')
lines = _GetLines(args.lines) if args.lines is not None else None
if not args.files:
# No arguments specified. Read code from stdin.
if args.in_place or args.diff:
parser.error('cannot use --in-place or --diff flags when reading '
'from stdin')
original_source = []
while True:
try:
# Use 'raw_input' instead of 'sys.stdin.read', because otherwise the
# user will need to hit 'Ctrl-D' more than once if they're inputting
# the program by hand. 'raw_input' throws an EOFError exception if
# 'Ctrl-D' is pressed, which makes it easy to bail out of this loop.
original_source.append(py3compat.raw_input())
except EOFError:
break
style_config = args.style
if style_config is None and not args.no_local_style:
style_config = file_resources.GetDefaultStyleForDir(os.getcwd())
source = [line.rstrip() for line in original_source]
reformatted_source, _ = yapf_api.FormatCode(
py3compat.unicode('\n'.join(source) + '\n'),
filename='<stdin>',
style_config=style_config,
lines=lines,
verify=args.verify)
file_resources.WriteReformattedCode('<stdout>', reformatted_source)
return 0
files = file_resources.GetCommandLineFiles(args.files, args.recursive,
args.exclude)
if not files:
raise errors.YapfError('Input filenames did not match any python files')
FormatFiles(
files,
lines,
style_config=args.style,
no_local_style=args.no_local_style,
in_place=args.in_place,
print_diff=args.diff,
verify=args.verify,
parallel=args.parallel)
return 0
def FormatFiles(filenames,
lines,
style_config=None,
no_local_style=False,
in_place=False,
print_diff=False,
verify=True,
parallel=False):
"""Format a list of files.
Arguments:
filenames: (list of unicode) A list of files to reformat.
lines: (list of tuples of integers) A list of tuples of lines, [start, end],
that we want to format. The lines are 1-based indexed. This argument
overrides the 'args.lines'. It can be used by third-party code (e.g.,
IDEs) when reformatting a snippet of code.
style_config: (string) Style name or file path.
no_local_style: (string) If style_config is None don't search for
directory-local style configuration.
in_place: (bool) Modify the files in place.
print_diff: (bool) Instead of returning the reformatted source, return a
diff that turns the formatted source into reformatter source.
verify: (bool) True if reformatted code should be verified for syntax.
parallel: (bool) True if should format multiple files in parallel.
Returns:
True if the source code changed in any of the files being formatted.
"""
changed = False
if parallel:
import multiprocessing # pylint: disable=g-import-not-at-top
import concurrent.futures # pylint: disable=g-import-not-at-top
workers = min(multiprocessing.cpu_count(), len(filenames))
with concurrent.futures.ProcessPoolExecutor(workers) as executor:
future_formats = [
executor.submit(_FormatFile, filename, lines, style_config,
no_local_style, in_place, print_diff, verify)
for filename in filenames
]
for future in concurrent.futures.as_completed(future_formats):
changed |= future.result()
else:
for filename in filenames:
changed |= _FormatFile(filename, lines, style_config, no_local_style,
in_place, print_diff, verify)
return changed
def _FormatFile(filename,
lines,
style_config=None,
no_local_style=False,
in_place=False,
print_diff=False,
verify=True):
logging.info('Reformatting %s', filename)
if style_config is None and not no_local_style:
style_config = (
file_resources.GetDefaultStyleForDir(os.path.dirname(filename)))
try:
reformatted_code, encoding, has_change = yapf_api.FormatFile(
filename,
in_place=in_place,
style_config=style_config,
lines=lines,
print_diff=print_diff,
verify=verify,
logger=logging.warning)
if not in_place and reformatted_code:
file_resources.WriteReformattedCode(filename, reformatted_code, in_place,
encoding)
return has_change
except SyntaxError as e:
e.filename = filename
raise
def _GetLines(line_strings):
"""Parses the start and end lines from a line string like 'start-end'.
Arguments:
line_strings: (array of string) A list of strings representing a line
range like 'start-end'.
Returns:
A list of tuples of the start and end line numbers.
Raises:
ValueError: If the line string failed to parse or was an invalid line range.
"""
lines = []
for line_string in line_strings:
# The 'list' here is needed by Python 3.
line = list(map(int, line_string.split('-', 1)))
if line[0] < 1:
raise errors.YapfError('invalid start of line range: %r' % line)
if line[0] > line[1]:
raise errors.YapfError('end comes before start in line range: %r', line)
lines.append(tuple(line))
return lines
def run_main(): # pylint: disable=invalid-name
try:
sys.exit(main(sys.argv))
except errors.YapfError as e:
sys.stderr.write('yapf: ' + str(e) + '\n')
sys.exit(1)
if __name__ == '__main__':
run_main()

View File

@ -0,0 +1,13 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@ -0,0 +1,183 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Calculate the number of blank lines between top-level entities.
Calculates how many blank lines we need between classes, functions, and other
entities at the same level.
CalculateBlankLines(): the main function exported by this module.
Annotations:
newlines: The number of newlines required before the node.
"""
from lib2to3 import pytree
from yapf.yapflib import py3compat
from yapf.yapflib import pytree_utils
from yapf.yapflib import pytree_visitor
_NO_BLANK_LINES = 1
_ONE_BLANK_LINE = 2
_TWO_BLANK_LINES = 3
_PYTHON_STATEMENTS = frozenset({
'small_stmt', 'expr_stmt', 'print_stmt', 'del_stmt', 'pass_stmt',
'break_stmt', 'continue_stmt', 'return_stmt', 'raise_stmt', 'yield_stmt',
'import_stmt', 'global_stmt', 'exec_stmt', 'assert_stmt', 'if_stmt',
'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt', 'nonlocal_stmt',
'async_stmt', 'simple_stmt'
})
def CalculateBlankLines(tree):
"""Run the blank line calculator visitor over the tree.
This modifies the tree in place.
Arguments:
tree: the top-level pytree node to annotate with subtypes.
"""
blank_line_calculator = _BlankLineCalculator()
blank_line_calculator.Visit(tree)
class _BlankLineCalculator(pytree_visitor.PyTreeVisitor):
"""_BlankLineCalculator - see file-level docstring for a description."""
def __init__(self):
self.class_level = 0
self.function_level = 0
self.last_comment_lineno = 0
self.last_was_decorator = False
self.last_was_class_or_function = False
def Visit_simple_stmt(self, node): # pylint: disable=invalid-name
self.DefaultNodeVisit(node)
if pytree_utils.NodeName(node.children[0]) == 'COMMENT':
self.last_comment_lineno = node.children[0].lineno
def Visit_decorator(self, node): # pylint: disable=invalid-name
if (self.last_comment_lineno and
self.last_comment_lineno == node.children[0].lineno - 1):
self._SetNumNewlines(node.children[0], _NO_BLANK_LINES)
else:
self._SetNumNewlines(node.children[0], self._GetNumNewlines(node))
for child in node.children:
self.Visit(child)
self.last_was_decorator = True
def Visit_classdef(self, node): # pylint: disable=invalid-name
self.last_was_class_or_function = False
index = self._SetBlankLinesBetweenCommentAndClassFunc(node)
self.last_was_decorator = False
self.class_level += 1
for child in node.children[index:]:
self.Visit(child)
self.class_level -= 1
self.last_was_class_or_function = True
def Visit_funcdef(self, node): # pylint: disable=invalid-name
self.last_was_class_or_function = False
index = self._SetBlankLinesBetweenCommentAndClassFunc(node)
if _AsyncFunction(node):
index = self._SetBlankLinesBetweenCommentAndClassFunc(
node.prev_sibling.parent)
self._SetNumNewlines(node.children[0], None)
else:
index = self._SetBlankLinesBetweenCommentAndClassFunc(node)
self.last_was_decorator = False
self.function_level += 1
for child in node.children[index:]:
self.Visit(child)
self.function_level -= 1
self.last_was_class_or_function = True
def DefaultNodeVisit(self, node):
"""Override the default visitor for Node.
This will set the blank lines required if the last entity was a class or
function.
Arguments:
node: (pytree.Node) The node to visit.
"""
if self.last_was_class_or_function:
if pytree_utils.NodeName(node) in _PYTHON_STATEMENTS:
leaf = _GetFirstChildLeaf(node)
self._SetNumNewlines(leaf, self._GetNumNewlines(leaf))
self.last_was_class_or_function = False
super(_BlankLineCalculator, self).DefaultNodeVisit(node)
def _SetBlankLinesBetweenCommentAndClassFunc(self, node):
"""Set the number of blanks between a comment and class or func definition.
Class and function definitions have leading comments as children of the
classdef and functdef nodes.
Arguments:
node: (pytree.Node) The classdef or funcdef node.
Returns:
The index of the first child past the comment nodes.
"""
index = 0
while pytree_utils.IsCommentStatement(node.children[index]):
# Standalone comments are wrapped in a simple_stmt node with the comment
# node as its only child.
self.Visit(node.children[index].children[0])
if not self.last_was_decorator:
self._SetNumNewlines(node.children[index].children[0], _ONE_BLANK_LINE)
index += 1
if (index and node.children[index].lineno -
1 == node.children[index - 1].children[0].lineno):
self._SetNumNewlines(node.children[index], _NO_BLANK_LINES)
else:
if self.last_comment_lineno + 1 == node.children[index].lineno:
num_newlines = _NO_BLANK_LINES
else:
num_newlines = self._GetNumNewlines(node)
self._SetNumNewlines(node.children[index], num_newlines)
return index
def _GetNumNewlines(self, node):
if self.last_was_decorator:
return _NO_BLANK_LINES
elif self._IsTopLevel(node):
return _TWO_BLANK_LINES
return _ONE_BLANK_LINE
def _SetNumNewlines(self, node, num_newlines):
pytree_utils.SetNodeAnnotation(node, pytree_utils.Annotation.NEWLINES,
num_newlines)
def _IsTopLevel(self, node):
return (not (self.class_level or self.function_level) and
_StartsInZerothColumn(node))
def _StartsInZerothColumn(node):
return (_GetFirstChildLeaf(node).column == 0 or
(_AsyncFunction(node) and node.prev_sibling.column == 0))
def _AsyncFunction(node):
return (py3compat.PY3 and node.prev_sibling and
pytree_utils.NodeName(node.prev_sibling) == 'ASYNC')
def _GetFirstChildLeaf(node):
if isinstance(node, pytree.Leaf):
return node
return _GetFirstChildLeaf(node.children[0])

View File

@ -0,0 +1,374 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Comment splicer for lib2to3 trees.
The lib2to3 syntax tree produced by the parser holds comments and whitespace in
prefix attributes of nodes, rather than nodes themselves. This module provides
functionality to splice comments out of prefixes and into nodes of their own,
making them easier to process.
SpliceComments(): the main function exported by this module.
"""
from lib2to3 import pygram
from lib2to3 import pytree
from lib2to3.pgen2 import token
from yapf.yapflib import pytree_utils
def SpliceComments(tree):
"""Given a pytree, splice comments into nodes of their own right.
Extract comments from the prefixes where they are housed after parsing.
The prefixes that previously housed the comments become empty.
Args:
tree: a pytree.Node - the tree to work on. The tree is modified by this
function.
"""
# The previous leaf node encountered in the traversal.
# This is a list because Python 2.x doesn't have 'nonlocal' :)
prev_leaf = [None]
_AnnotateIndents(tree)
def _VisitNodeRec(node):
# This loop may insert into node.children, so we'll iterate over a copy.
for child in node.children[:]:
if isinstance(child, pytree.Node):
# Nodes don't have prefixes.
_VisitNodeRec(child)
else:
if child.prefix.lstrip().startswith('#'):
# We have a comment prefix in this child, so splicing is needed.
comment_prefix = child.prefix
comment_lineno = child.lineno - comment_prefix.count('\n')
comment_column = child.column
# Remember the leading indentation of this prefix and clear it.
# Mopping up the prefix is important because we may go over this same
# child in the next iteration...
child_prefix = child.prefix.lstrip('\n')
prefix_indent = child_prefix[:child_prefix.find('#')]
if '\n' in prefix_indent:
prefix_indent = prefix_indent[prefix_indent.rfind('\n') + 1:]
child.prefix = ''
if child.type == token.NEWLINE:
# If the prefix was on a NEWLINE leaf, it's part of the line so it
# will be inserted after the previously encountered leaf.
# We can't just insert it before the NEWLINE node, because as a
# result of the way pytrees are organized, this node can be under
# an inappropriate parent.
comment_column -= len(comment_prefix)
comment_column += len(comment_prefix) - len(comment_prefix.lstrip())
pytree_utils.InsertNodesAfter(
_CreateCommentsFromPrefix(
comment_prefix,
comment_lineno,
comment_column,
standalone=False), prev_leaf[0])
elif child.type == token.DEDENT:
# Comment prefixes on DEDENT nodes also deserve special treatment,
# because their final placement depends on their prefix.
# We'll look for an ancestor of this child with a matching
# indentation, and insert the comment after it.
ancestor_at_indent = _FindAncestorAtIndent(child, prefix_indent)
if ancestor_at_indent.type == token.DEDENT:
comments = comment_prefix.split('\n')
# lib2to3 places comments that should be separated into the same
# DEDENT node. For example, "comment 1" and "comment 2" will be
# combined.
#
# def _():
# for x in y:
# pass
# # comment 1
#
# # comment 2
# pass
#
# In this case, we need to split them up ourselves.
before = []
after = []
after_lineno = comment_lineno
index = 0
while index < len(comments):
cmt = comments[index]
if not cmt.strip() or cmt.startswith(prefix_indent + '#'):
before.append(cmt)
else:
after_lineno += index
after.extend(comments[index:])
break
index += 1
# Special case where the comment is inserted in the same
# indentation level as the DEDENT it was originally attached to.
pytree_utils.InsertNodesBefore(
_CreateCommentsFromPrefix(
'\n'.join(before) + '\n',
comment_lineno,
comment_column,
standalone=True), ancestor_at_indent)
if after:
after_column = len(after[0]) - len(after[0].lstrip())
comment_column -= comment_column - after_column
pytree_utils.InsertNodesAfter(
_CreateCommentsFromPrefix(
'\n'.join(after) + '\n',
after_lineno,
comment_column,
standalone=True), _FindNextAncestor(ancestor_at_indent))
else:
pytree_utils.InsertNodesAfter(
_CreateCommentsFromPrefix(
comment_prefix,
comment_lineno,
comment_column,
standalone=True), ancestor_at_indent)
else:
# Otherwise there are two cases.
#
# 1. The comment is on its own line
# 2. The comment is part of an expression.
#
# Unfortunately, it's fairly difficult to distinguish between the
# two in lib2to3 trees. The algorithm here is to determine whether
# child is the first leaf in the statement it belongs to. If it is,
# then the comment (which is a prefix) belongs on a separate line.
# If it is not, it means the comment is buried deep in the statement
# and is part of some expression.
stmt_parent = _FindStmtParent(child)
for leaf_in_parent in stmt_parent.leaves():
if leaf_in_parent.type == token.NEWLINE:
continue
elif id(leaf_in_parent) == id(child):
# This comment stands on its own line, and it has to be inserted
# into the appropriate parent. We'll have to find a suitable
# parent to insert into. See comments above
# _STANDALONE_LINE_NODES for more details.
node_with_line_parent = _FindNodeWithStandaloneLineParent(child)
pytree_utils.InsertNodesBefore(
_CreateCommentsFromPrefix(
comment_prefix, comment_lineno, 0, standalone=True),
node_with_line_parent)
break
else:
if comment_lineno == prev_leaf[0].lineno:
comment_lines = comment_prefix.splitlines()
value = comment_lines[0].lstrip()
if value.rstrip('\n'):
comment_column = prev_leaf[0].column
comment_column += len(prev_leaf[0].value)
comment_column += (
len(comment_lines[0]) - len(comment_lines[0].lstrip()))
comment_leaf = pytree.Leaf(
type=token.COMMENT,
value=value.rstrip('\n'),
context=('', (comment_lineno, comment_column)))
pytree_utils.InsertNodesAfter([comment_leaf], prev_leaf[0])
comment_prefix = '\n'.join(comment_lines[1:])
comment_lineno += 1
rindex = (0 if '\n' not in comment_prefix.rstrip() else
comment_prefix.rstrip().rindex('\n') + 1)
comment_column = (len(comment_prefix[rindex:]) -
len(comment_prefix[rindex:].lstrip()))
comments = _CreateCommentsFromPrefix(
comment_prefix,
comment_lineno,
comment_column,
standalone=False)
pytree_utils.InsertNodesBefore(comments, child)
break
prev_leaf[0] = child
_VisitNodeRec(tree)
def _CreateCommentsFromPrefix(comment_prefix,
comment_lineno,
comment_column,
standalone=False):
"""Create pytree nodes to represent the given comment prefix.
Args:
comment_prefix: (unicode) the text of the comment from the node's prefix.
comment_lineno: (int) the line number for the start of the comment.
comment_column: (int) the column for the start of the comment.
standalone: (bool) determines if the comment is standalone or not.
Returns:
The simple_stmt nodes if this is a standalone comment, otherwise a list of
new COMMENT leafs. The prefix may consist of multiple comment blocks,
separated by blank lines. Each block gets its own leaf.
"""
# The comment is stored in the prefix attribute, with no lineno of its
# own. So we only know at which line it ends. To find out at which line it
# starts, look at how many newlines the comment itself contains.
comments = []
lines = comment_prefix.split('\n')
index = 0
while index < len(lines):
comment_block = []
while index < len(lines) and lines[index].lstrip().startswith('#'):
comment_block.append(lines[index].strip())
index += 1
if comment_block:
new_lineno = comment_lineno + index - 1
comment_block[0] = comment_block[0].strip()
comment_block[-1] = comment_block[-1].strip()
comment_leaf = pytree.Leaf(
type=token.COMMENT,
value='\n'.join(comment_block),
context=('', (new_lineno, comment_column)))
comment_node = comment_leaf if not standalone else pytree.Node(
pygram.python_symbols.simple_stmt, [comment_leaf])
comments.append(comment_node)
while index < len(lines) and not lines[index].lstrip():
index += 1
return comments
# "Standalone line nodes" are tree nodes that have to start a new line in Python
# code (and cannot follow a ';' or ':'). Other nodes, like 'expr_stmt', serve as
# parents of other nodes but can come later in a line. This is a list of
# standalone line nodes in the grammar. It is meant to be exhaustive
# *eventually*, and we'll modify it with time as we discover more corner cases
# in the parse tree.
#
# When splicing a standalone comment (i.e. a comment that appears on its own
# line, not on the same line with other code), it's important to insert it into
# an appropriate parent of the node it's attached to. An appropriate parent
# is the first "standaline line node" in the parent chain of a node.
_STANDALONE_LINE_NODES = frozenset([
'suite', 'if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt',
'funcdef', 'classdef', 'decorated', 'file_input'
])
def _FindNodeWithStandaloneLineParent(node):
"""Find a node whose parent is a 'standalone line' node.
See the comment above _STANDALONE_LINE_NODES for more details.
Arguments:
node: node to start from
Returns:
Suitable node that's either the node itself or one of its ancestors.
"""
if pytree_utils.NodeName(node.parent) in _STANDALONE_LINE_NODES:
return node
else:
# This is guaranteed to terminate because 'file_input' is the root node of
# any pytree.
return _FindNodeWithStandaloneLineParent(node.parent)
# "Statement nodes" are standalone statements. The don't have to start a new
# line.
_STATEMENT_NODES = frozenset(['simple_stmt']) | _STANDALONE_LINE_NODES
def _FindStmtParent(node):
"""Find the nearest parent of node that is a statement node.
Arguments:
node: node to start from
Returns:
Nearest parent (or node itself, if suitable).
"""
if pytree_utils.NodeName(node) in _STATEMENT_NODES:
return node
else:
return _FindStmtParent(node.parent)
def _FindAncestorAtIndent(node, indent):
"""Find an ancestor of node with the given indentation.
Arguments:
node: node to start from. This must not be the tree root.
indent: indentation string for the ancestor we're looking for.
See _AnnotateIndents for more details.
Returns:
An ancestor node with suitable indentation. If no suitable ancestor is
found, the closest ancestor to the tree root is returned.
"""
if node.parent.parent is None:
# Our parent is the tree root, so there's nowhere else to go.
return node
# If the parent has an indent annotation, and it's shorter than node's
# indent, this is a suitable ancestor.
# The reason for "shorter" rather than "equal" is that comments may be
# improperly indented (i.e. by three spaces, where surrounding statements
# have either zero or two or four), and we don't want to propagate them all
# the way to the root.
parent_indent = pytree_utils.GetNodeAnnotation(
node.parent, pytree_utils.Annotation.CHILD_INDENT)
if parent_indent is not None and indent.startswith(parent_indent):
return node
else:
# Keep looking up the tree.
return _FindAncestorAtIndent(node.parent, indent)
def _FindNextAncestor(node):
if node.parent is None:
return node
if node.parent.next_sibling is not None:
return node.parent.next_sibling
return _FindNextAncestor(node.parent)
def _AnnotateIndents(tree):
"""Annotate the tree with child_indent annotations.
A child_indent annotation on a node specifies the indentation (as a string,
like " ") of its children. It is inferred from the INDENT child of a node.
Arguments:
tree: root of a pytree. The pytree is modified to add annotations to nodes.
Raises:
RuntimeError: if the tree is malformed.
"""
# Annotate the root of the tree with zero indent.
if tree.parent is None:
pytree_utils.SetNodeAnnotation(tree, pytree_utils.Annotation.CHILD_INDENT,
'')
for child in tree.children:
if child.type == token.INDENT:
child_indent = pytree_utils.GetNodeAnnotation(
tree, pytree_utils.Annotation.CHILD_INDENT)
if child_indent is not None and child_indent != child.value:
raise RuntimeError('inconsistent indentation for child', (tree, child))
pytree_utils.SetNodeAnnotation(tree, pytree_utils.Annotation.CHILD_INDENT,
child.value)
_AnnotateIndents(child)

View File

@ -0,0 +1,52 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Insert "continuation" nodes into lib2to3 tree.
The "backslash-newline" continuation marker is shoved into the node's prefix.
Pull them out and make it into nodes of their own.
SpliceContinuations(): the main funciton exported by this module.
"""
from lib2to3 import pytree
from yapf.yapflib import format_token
def SpliceContinuations(tree):
"""Given a pytree, splice the continuation marker into nodes.
Arguments:
tree: (pytree.Node) The tree to work on. The tree is modified by this
function.
"""
def RecSplicer(node):
"""Inserts a continuation marker into the node."""
if isinstance(node, pytree.Leaf):
if node.prefix.lstrip().startswith('\\\n'):
new_lineno = node.lineno - node.prefix.count('\n')
return pytree.Leaf(
type=format_token.CONTINUATION,
value=node.prefix,
context=('', (new_lineno, 0)))
return None
num_inserted = 0
for index, child in enumerate(node.children[:]):
continuation_node = RecSplicer(child)
if continuation_node:
node.children.insert(index + num_inserted, continuation_node)
num_inserted += 1
RecSplicer(tree)

View File

@ -0,0 +1,23 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""YAPF error object."""
class YapfError(Exception):
"""Parent class for user errors or input errors.
Exceptions of this type are handled by the command line tool
and result in clear error messages, as opposed to backtraces.
"""
pass

View File

@ -0,0 +1,169 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Interface to file resources.
This module provides functions for interfacing with files: opening, writing, and
querying.
"""
import fnmatch
import os
import re
from lib2to3.pgen2 import tokenize
from yapf.yapflib import errors
from yapf.yapflib import py3compat
from yapf.yapflib import style
CR = '\r'
LF = '\n'
CRLF = '\r\n'
def GetDefaultStyleForDir(dirname):
"""Return default style name for a given directory.
Looks for .style.yapf or setup.cfg in the parent directories.
Arguments:
dirname: (unicode) The name of the directory.
Returns:
The filename if found, otherwise return the global default (pep8).
"""
dirname = os.path.abspath(dirname)
while True:
# See if we have a .style.yapf file.
style_file = os.path.join(dirname, style.LOCAL_STYLE)
if os.path.exists(style_file):
return style_file
# See if we have a setup.cfg file with a '[yapf]' section.
config_file = os.path.join(dirname, style.SETUP_CONFIG)
if os.path.exists(config_file):
with open(config_file) as fd:
config = py3compat.ConfigParser()
config.read_file(fd)
if config.has_section('yapf'):
return config_file
dirname = os.path.dirname(dirname)
if (not dirname or not os.path.basename(dirname) or
dirname == os.path.abspath(os.path.sep)):
break
global_file = os.path.expanduser(style.GLOBAL_STYLE)
if os.path.exists(global_file):
return global_file
return style.DEFAULT_STYLE
def GetCommandLineFiles(command_line_file_list, recursive, exclude):
"""Return the list of files specified on the command line."""
return _FindPythonFiles(command_line_file_list, recursive, exclude)
def WriteReformattedCode(filename,
reformatted_code,
in_place=False,
encoding=''):
"""Emit the reformatted code.
Write the reformatted code into the file, if in_place is True. Otherwise,
write to stdout.
Arguments:
filename: (unicode) The name of the unformatted file.
reformatted_code: (unicode) The reformatted code.
in_place: (bool) If True, then write the reformatted code to the file.
encoding: (unicode) The encoding of the file.
"""
if in_place:
with py3compat.open_with_encoding(
filename, mode='w', encoding=encoding, newline='') as fd:
fd.write(reformatted_code)
else:
py3compat.EncodeAndWriteToStdout(reformatted_code)
def LineEnding(lines):
"""Retrieve the line ending of the original source."""
endings = {CRLF: 0, CR: 0, LF: 0}
for line in lines:
if line.endswith(CRLF):
endings[CRLF] += 1
elif line.endswith(CR):
endings[CR] += 1
elif line.endswith(LF):
endings[LF] += 1
return (sorted(endings, key=endings.get, reverse=True) or [LF])[0]
def _FindPythonFiles(filenames, recursive, exclude):
"""Find all Python files."""
python_files = []
for filename in filenames:
if os.path.isdir(filename):
if recursive:
# TODO(morbo): Look into a version of os.walk that can handle recursion.
python_files.extend(
os.path.join(dirpath, f)
for dirpath, _, filelist in os.walk(filename) for f in filelist
if IsPythonFile(os.path.join(dirpath, f)))
else:
raise errors.YapfError(
"directory specified without '--recursive' flag: %s" % filename)
elif os.path.isfile(filename):
python_files.append(filename)
if exclude:
return [
f for f in python_files
if not any(fnmatch.fnmatch(f, p) for p in exclude)
]
return python_files
def IsPythonFile(filename):
"""Return True if filename is a Python file."""
if os.path.splitext(filename)[1] == '.py':
return True
try:
with open(filename, 'rb') as fd:
encoding = tokenize.detect_encoding(fd.readline)[0]
# Check for correctness of encoding.
with py3compat.open_with_encoding(
filename, mode='r', encoding=encoding) as fd:
fd.read()
except UnicodeDecodeError:
encoding = 'latin-1'
except (IOError, SyntaxError):
# If we fail to detect encoding (or the encoding cookie is incorrect - which
# will make detect_encoding raise SyntaxError), assume it's not a Python
# file.
return False
try:
with py3compat.open_with_encoding(
filename, mode='r', encoding=encoding) as fd:
first_line = fd.readlines()[0]
except (IOError, IndexError):
return False
return re.match(r'^#!.*\bpython[23]?\b', first_line)

View File

@ -0,0 +1,799 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implements a format decision state object that manages whitespace decisions.
Each token is processed one at a time, at which point its whitespace formatting
decisions are made. A graph of potential whitespace formattings is created,
where each node in the graph is a format decision state object. The heuristic
tries formatting the token with and without a newline before it to determine
which one has the least penalty. Therefore, the format decision state object for
each decision needs to be its own unique copy.
Once the heuristic determines the best formatting, it makes a non-dry run pass
through the code to commit the whitespace formatting.
FormatDecisionState: main class exported by this module.
"""
from yapf.yapflib import format_token
from yapf.yapflib import split_penalty
from yapf.yapflib import style
from yapf.yapflib import unwrapped_line
_COMPOUND_STMTS = frozenset(
{'for', 'while', 'if', 'elif', 'with', 'except', 'def', 'class'})
class FormatDecisionState(object):
"""The current state when indenting an unwrapped line.
The FormatDecisionState object is meant to be copied instead of referenced.
Attributes:
first_indent: The indent of the first token.
column: The number of used columns in the current line.
next_token: The next token to be formatted.
paren_level: The level of nesting inside (), [], and {}.
start_of_line_level: The paren_level at the start of this line.
lowest_level_on_line: The lowest paren_level on the current line.
newline: Indicates if a newline is added along the edge to this format
decision state node.
previous: The previous format decision state in the decision tree.
stack: A stack (of _ParenState) keeping track of properties applying to
parenthesis levels.
ignore_stack_for_comparison: Ignore the stack of _ParenState for state
comparison.
"""
def __init__(self, line, first_indent):
"""Initializer.
Initializes to the state after placing the first token from 'line' at
'first_indent'.
Arguments:
line: (UnwrappedLine) The unwrapped line we're currently processing.
first_indent: (int) The indent of the first token.
"""
self.next_token = line.first
self.column = first_indent
self.line = line
self.paren_level = 0
self.start_of_line_level = 0
self.lowest_level_on_line = 0
self.ignore_stack_for_comparison = False
self.stack = [_ParenState(first_indent, first_indent)]
self.first_indent = first_indent
self.newline = False
self.previous = None
self.column_limit = style.Get('COLUMN_LIMIT')
def Clone(self):
"""Clones a FormatDecisionState object."""
new = FormatDecisionState(self.line, self.first_indent)
new.next_token = self.next_token
new.column = self.column
new.line = self.line
new.paren_level = self.paren_level
new.start_of_line_level = self.start_of_line_level
new.lowest_level_on_line = self.lowest_level_on_line
new.ignore_stack_for_comparison = self.ignore_stack_for_comparison
new.first_indent = self.first_indent
new.newline = self.newline
new.previous = self.previous
new.stack = [state.Clone() for state in self.stack]
return new
def __eq__(self, other):
# Note: 'first_indent' is implicit in the stack. Also, we ignore 'previous',
# because it shouldn't have a bearing on this comparison. (I.e., it will
# report equal if 'next_token' does.)
return (self.next_token == other.next_token and
self.column == other.column and
self.paren_level == other.paren_level and
self.start_of_line_level == other.start_of_line_level and
self.lowest_level_on_line == other.lowest_level_on_line and
(self.ignore_stack_for_comparison or
other.ignore_stack_for_comparison or self.stack == other.stack))
def __ne__(self, other):
return not self == other
def __hash__(self):
return hash((self.next_token, self.column, self.paren_level,
self.start_of_line_level, self.lowest_level_on_line))
def __repr__(self):
return ('column::%d, next_token::%s, paren_level::%d, stack::[\n\t%s' %
(self.column, repr(self.next_token), self.paren_level,
'\n\t'.join(repr(s) for s in self.stack) + ']'))
def CanSplit(self, must_split):
"""Determine if we can split before the next token.
Arguments:
must_split: (bool) A newline was required before this token.
Returns:
True if the line can be split before the next token.
"""
current = self.next_token
if current.is_pseudo_paren:
return False
if (not must_split and
format_token.Subtype.DICTIONARY_KEY_PART in current.subtypes and
format_token.Subtype.DICTIONARY_KEY not in current.subtypes and
not style.Get('ALLOW_MULTILINE_DICTIONARY_KEYS')):
# In some situations, a dictionary may be multiline, but pylint doesn't
# like it. So don't allow it unless forced to.
return False
return current.can_break_before
def MustSplit(self):
"""Returns True if the line must split before the next token."""
current = self.next_token
previous = current.previous_token
if current.is_pseudo_paren:
return False
if current.must_break_before:
return True
if not previous:
return False
if self.stack[-1].split_before_closing_bracket and current.value in '}]':
# Split before the closing bracket if we can.
return current.node_split_penalty != split_penalty.UNBREAKABLE
# Prevent splitting before the first argument in compound statements
# with the exception of function declarations.
if (style.Get('SPLIT_BEFORE_FIRST_ARGUMENT') and
self.line.first.value != 'def' and
self.line.first.value in _COMPOUND_STMTS):
return False
###########################################################################
# List Splitting
if (style.Get('DEDENT_CLOSING_BRACKETS') or
style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')):
bracket = current if current.ClosesScope() else previous
if format_token.Subtype.SUBSCRIPT_BRACKET not in bracket.subtypes:
if bracket.OpensScope():
if style.Get('COALESCE_BRACKETS'):
if current.OpensScope():
# Prefer to keep all opening brackets together.
return False
if (not _IsLastScopeInLine(bracket) or
unwrapped_line.IsSurroundedByBrackets(bracket)):
last_token = bracket.matching_bracket
else:
last_token = _LastTokenInLine(bracket.matching_bracket)
if not self._FitsOnLine(bracket, last_token):
# Split before the first element if the whole list can't fit on a
# single line.
self.stack[-1].split_before_closing_bracket = True
return True
elif style.Get('DEDENT_CLOSING_BRACKETS') and current.ClosesScope():
# Split before and dedent the closing bracket.
return self.stack[-1].split_before_closing_bracket
if (current.is_name or current.is_string) and previous.value == ',':
# If the list has function calls in it and the full list itself cannot
# fit on the line, then we want to split. Otherwise, we'll get something
# like this:
#
# X = [
# Bar(xxx='some string',
# yyy='another long string',
# zzz='a third long string'), Bar(
# xxx='some string',
# yyy='another long string',
# zzz='a third long string')
# ]
#
# or when a string formatting syntax.
func_call_or_string_format = False
if current.is_name:
tok = current.next_token
while tok and (tok.is_name or tok.value == '.'):
tok = tok.next_token
func_call_or_string_format = tok and tok.value == '('
elif current.is_string:
tok = current.next_token
while tok and tok.is_string:
tok = tok.next_token
func_call_or_string_format = tok and tok.value == '%'
if func_call_or_string_format:
open_bracket = unwrapped_line.IsSurroundedByBrackets(current)
if open_bracket and open_bracket.value in '[{':
if not self._FitsOnLine(open_bracket, open_bracket.matching_bracket):
return True
###########################################################################
# Dict/Set Splitting
if (style.Get('EACH_DICT_ENTRY_ON_SEPARATE_LINE') and
format_token.Subtype.DICTIONARY_KEY in current.subtypes and
not current.is_comment):
# Place each dictionary entry onto its own line.
if previous.value == '{' and previous.previous_token:
opening = _GetOpeningBracket(previous.previous_token)
if (opening and opening.value == '(' and opening.previous_token and
opening.previous_token.is_name):
# This is a dictionary that's an argument to a function.
if self._FitsOnLine(previous, previous.matching_bracket):
return False
return True
if (style.Get('SPLIT_BEFORE_DICT_SET_GENERATOR') and
format_token.Subtype.DICT_SET_GENERATOR in current.subtypes):
# Split before a dict/set generator.
return True
if (format_token.Subtype.DICTIONARY_VALUE in current.subtypes or
(previous.is_pseudo_paren and previous.value == '(' and
not current.is_comment)):
# Split before the dictionary value if we can't fit every dictionary
# entry on its own line.
if not current.OpensScope():
opening = _GetOpeningBracket(current)
if not self._EachDictEntryFitsOnOneLine(opening):
return True
if previous.value == '{':
# Split if the dict/set cannot fit on one line and ends in a comma.
closing = previous.matching_bracket
if (not self._FitsOnLine(previous, closing) and
closing.previous_token.value == ','):
self.stack[-1].split_before_closing_bracket = True
return True
###########################################################################
# Argument List Splitting
if (style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and not current.is_comment and
format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in
current.subtypes):
if (previous.value not in {'=', ':', '*', '**'} and
current.value not in ':=,)' and not _IsFunctionDefinition(previous)):
# If we're going to split the lines because of named arguments, then we
# want to split after the opening bracket as well. But not when this is
# part of a function definition.
if previous.value == '(':
# Make sure we don't split after the opening bracket if the
# continuation indent is greater than the opening bracket:
#
# a(
# b=1,
# c=2)
if (self._FitsOnLine(previous, previous.matching_bracket) and
unwrapped_line.IsSurroundedByBrackets(previous)):
# An argument to a function is a function call with named
# assigns.
return False
column = self.column - self.stack[-1].last_space
return column > style.Get('CONTINUATION_INDENT_WIDTH')
opening = _GetOpeningBracket(current)
if opening:
arglist_length = (opening.matching_bracket.total_length -
opening.total_length + self.stack[-1].indent)
return arglist_length > self.column_limit
if style.Get('SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED'):
# Split before arguments in a function call or definition if the
# arguments are terminated by a comma.
opening = _GetOpeningBracket(current)
if opening and opening.previous_token and opening.previous_token.is_name:
if previous.value in '(,':
if opening.matching_bracket.previous_token.value == ',':
return True
if ((current.is_name or current.value in {'*', '**'}) and
previous.value == ','):
# If we have a function call within an argument list and it won't fit on
# the remaining line, but it will fit on a line by itself, then go ahead
# and split before the call.
opening = _GetOpeningBracket(current)
if (opening and opening.value == '(' and opening.previous_token and
(opening.previous_token.is_name or
opening.previous_token.value in {'*', '**'})):
is_func_call = False
token = current
while token:
if token.value == '(':
is_func_call = True
break
if (not (token.is_name or token.value in {'*', '**'}) and
token.value != '.'):
break
token = token.next_token
if is_func_call:
if not self._FitsOnLine(current, opening.matching_bracket):
return True
pprevious = previous.previous_token
if (current.is_name and pprevious and pprevious.is_name and
previous.value == '('):
if (not self._FitsOnLine(previous, previous.matching_bracket) and
_IsFunctionCallWithArguments(current)):
# There is a function call, with more than 1 argument, where the first
# argument is itself a function call with arguments. In this specific
# case, if we split after the first argument's opening '(', then the
# formatting will look bad for the rest of the arguments. E.g.:
#
# outer_function_call(inner_function_call(
# inner_arg1, inner_arg2),
# outer_arg1, outer_arg2)
#
# Instead, enforce a split before that argument to keep things looking
# good.
return True
if (previous.OpensScope() and not current.OpensScope() and
format_token.Subtype.SUBSCRIPT_BRACKET not in previous.subtypes):
if not current.is_comment:
if pprevious and not pprevious.is_keyword and not pprevious.is_name:
# We want to split if there's a comment in the container.
token = current
while token != previous.matching_bracket:
if token.is_comment:
return True
token = token.next_token
if previous.value == '(':
pptoken = previous.previous_token
if not pptoken or not pptoken.is_name:
# Split after the opening of a tuple if it doesn't fit on the current
# line and it's not a function call.
if self._FitsOnLine(previous, previous.matching_bracket):
return False
elif not self._FitsOnLine(previous, previous.matching_bracket):
if (self.column_limit - self.column) / float(self.column_limit) < 0.3:
# Try not to squish all of the arguments off to the right.
return current.next_token != previous.matching_bracket
else:
# Split after the opening of a container if it doesn't fit on the
# current line or if it has a comment.
if not self._FitsOnLine(previous, previous.matching_bracket):
return True
###########################################################################
# List Comprehension Splitting
if (format_token.Subtype.COMP_FOR in current.subtypes and
format_token.Subtype.COMP_FOR not in previous.subtypes):
# Split at the beginning of a list comprehension.
length = _GetLengthOfSubtype(current, format_token.Subtype.COMP_FOR,
format_token.Subtype.COMP_IF)
if length + self.column > self.column_limit:
return True
if (format_token.Subtype.COMP_IF in current.subtypes and
format_token.Subtype.COMP_IF not in previous.subtypes):
# Split at the beginning of an if expression.
length = _GetLengthOfSubtype(current, format_token.Subtype.COMP_IF)
if length + self.column > self.column_limit:
return True
###########################################################################
# Original Formatting Splitting
# These checks rely upon the original formatting. This is in order to
# attempt to keep hand-written code in the same condition as it was before.
# However, this may cause the formatter to fail to be idempotent.
if (style.Get('SPLIT_BEFORE_BITWISE_OPERATOR') and current.value in '&|' and
previous.lineno < current.lineno):
# Retain the split before a bitwise operator.
return True
if (current.is_comment and
previous.lineno < current.lineno - current.value.count('\n')):
# If a comment comes in the middle of an unwrapped line (like an if
# conditional with comments interspersed), then we want to split if the
# original comments were on a separate line.
return True
return False
def AddTokenToState(self, newline, dry_run, must_split=False):
"""Add a token to the format decision state.
Allow the heuristic to try out adding the token with and without a newline.
Later on, the algorithm will determine which one has the lowest penalty.
Arguments:
newline: (bool) Add the token on a new line if True.
dry_run: (bool) Don't commit whitespace changes to the FormatToken if
True.
must_split: (bool) A newline was required before this token.
Returns:
The penalty of splitting after the current token.
"""
penalty = 0
if newline:
penalty = self._AddTokenOnNewline(dry_run, must_split)
else:
self._AddTokenOnCurrentLine(dry_run)
return self.MoveStateToNextToken() + penalty
def _AddTokenOnCurrentLine(self, dry_run):
"""Puts the token on the current line.
Appends the next token to the state and updates information necessary for
indentation.
Arguments:
dry_run: (bool) Commit whitespace changes to the FormatToken if True.
"""
current = self.next_token
previous = current.previous_token
spaces = current.spaces_required_before
if not dry_run:
current.AddWhitespacePrefix(newlines_before=0, spaces=spaces)
if previous.OpensScope():
if not current.is_comment:
# Align closing scopes that are on a newline with the opening scope:
#
# foo = [a,
# b,
# ]
self.stack[-1].closing_scope_indent = self.column - 1
if style.Get('ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'):
self.stack[-1].closing_scope_indent += 1
self.stack[-1].indent = self.column + spaces
else:
self.stack[-1].closing_scope_indent = (
self.stack[-1].indent - style.Get('CONTINUATION_INDENT_WIDTH'))
self.column += spaces
def _AddTokenOnNewline(self, dry_run, must_split):
"""Adds a line break and necessary indentation.
Appends the next token to the state and updates information necessary for
indentation.
Arguments:
dry_run: (bool) Don't commit whitespace changes to the FormatToken if
True.
must_split: (bool) A newline was required before this token.
Returns:
The split penalty for splitting after the current state.
"""
current = self.next_token
previous = current.previous_token
self.column = self._GetNewlineColumn()
if not dry_run:
current.AddWhitespacePrefix(newlines_before=1, spaces=self.column)
if not current.is_comment:
self.stack[-1].last_space = self.column
self.start_of_line_level = self.paren_level
self.lowest_level_on_line = self.paren_level
if (previous.OpensScope() or
(previous.is_comment and previous.previous_token is not None and
previous.previous_token.OpensScope())):
self.stack[-1].closing_scope_indent = max(
0, self.stack[-1].indent - style.Get('CONTINUATION_INDENT_WIDTH'))
split_before_closing_bracket = True
if style.Get('COALESCE_BRACKETS'):
split_before_closing_bracket = False
self.stack[-1].split_before_closing_bracket = split_before_closing_bracket
# Calculate the split penalty.
penalty = current.split_penalty
if must_split:
# Don't penalize for a must split.
return penalty
if previous.is_pseudo_paren and previous.value == '(':
# Small penalty for splitting after a pseudo paren.
penalty += 50
# Add a penalty for each increasing newline we add, but don't penalize for
# splitting before an if-expression or list comprehension.
if current.value not in {'if', 'for'}:
last = self.stack[-1]
last.num_line_splits += 1
penalty += (style.Get('SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT') *
last.num_line_splits)
if current.OpensScope() and previous.OpensScope():
# Prefer to keep opening brackets coalesced (unless it's at the beginning
# of a function call).
pprev = previous.previous_token
if not pprev or not pprev.is_name:
penalty += 10
return penalty + 10
def _GetNewlineColumn(self):
"""Return the new column on the newline."""
current = self.next_token
previous = current.previous_token
top_of_stack = self.stack[-1]
if current.spaces_required_before > 2 or self.line.disable:
return current.spaces_required_before
if current.OpensScope():
return top_of_stack.indent if self.paren_level else self.first_indent
if current.ClosesScope():
if (previous.OpensScope() or
(previous.is_comment and previous.previous_token is not None and
previous.previous_token.OpensScope())):
return max(0,
top_of_stack.indent - style.Get('CONTINUATION_INDENT_WIDTH'))
return top_of_stack.closing_scope_indent
if (previous and previous.is_string and current.is_string and
format_token.Subtype.DICTIONARY_VALUE in current.subtypes):
return previous.column
if style.Get('INDENT_DICTIONARY_VALUE'):
if previous and (previous.value == ':' or previous.is_pseudo_paren):
if format_token.Subtype.DICTIONARY_VALUE in current.subtypes:
return top_of_stack.indent
if (self.line.first.value in _COMPOUND_STMTS and
(not style.Get('DEDENT_CLOSING_BRACKETS') or
style.Get('SPLIT_BEFORE_FIRST_ARGUMENT'))):
token_indent = (len(self.line.first.whitespace_prefix.split('\n')[-1]) +
style.Get('INDENT_WIDTH'))
if token_indent == top_of_stack.indent:
return top_of_stack.indent + style.Get('CONTINUATION_INDENT_WIDTH')
return top_of_stack.indent
def MoveStateToNextToken(self):
"""Calculate format decision state information and move onto the next token.
Before moving onto the next token, we first calculate the format decision
state given the current token and its formatting decisions. Then the format
decision state is set up so that the next token can be added.
Returns:
The penalty for the number of characters over the column limit.
"""
current = self.next_token
if not current.OpensScope() and not current.ClosesScope():
self.lowest_level_on_line = min(self.lowest_level_on_line,
self.paren_level)
# If we encounter an opening bracket, we add a level to our stack to prepare
# for the subsequent tokens.
if current.OpensScope():
last = self.stack[-1]
new_indent = style.Get('CONTINUATION_INDENT_WIDTH') + last.last_space
self.stack.append(_ParenState(new_indent, self.stack[-1].last_space))
self.paren_level += 1
# If we encounter a closing bracket, we can remove a level from our
# parenthesis stack.
if len(self.stack) > 1 and current.ClosesScope():
self.stack[-2].last_space = self.stack[-1].last_space
self.stack.pop()
self.paren_level -= 1
is_multiline_string = current.is_string and '\n' in current.value
if is_multiline_string:
# This is a multiline string. Only look at the first line.
self.column += len(current.value.split('\n')[0])
elif not current.is_pseudo_paren:
self.column += len(current.value)
self.next_token = self.next_token.next_token
# Calculate the penalty for overflowing the column limit.
penalty = 0
if not current.is_pylint_comment and self.column > self.column_limit:
excess_characters = self.column - self.column_limit
penalty += style.Get('SPLIT_PENALTY_EXCESS_CHARACTER') * excess_characters
if is_multiline_string:
# If this is a multiline string, the column is actually the
# end of the last line in the string.
self.column = len(current.value.split('\n')[-1])
return penalty
def _FitsOnLine(self, start, end):
"""Determines if line between start and end can fit on the current line."""
length = end.total_length - start.total_length
if not start.is_pseudo_paren:
length += len(start.value)
return length + self.column <= self.column_limit
def _EachDictEntryFitsOnOneLine(self, opening):
"""Determine if each dict elems can fit on one line."""
def PreviousNonCommentToken(tok):
tok = tok.previous_token
while tok.is_comment:
tok = tok.previous_token
return tok
def ImplicitStringConcatenation(tok):
num_strings = 0
if tok.is_pseudo_paren:
tok = tok.next_token
while tok.is_string:
num_strings += 1
tok = tok.next_token
return num_strings > 1
closing = opening.matching_bracket
entry_start = opening.next_token
current = opening.next_token.next_token
while current and current != closing:
if format_token.Subtype.DICTIONARY_KEY in current.subtypes:
prev = PreviousNonCommentToken(current)
length = prev.total_length - entry_start.total_length
length += len(entry_start.value)
if length + self.stack[-2].indent >= self.column_limit:
return False
entry_start = current
if current.OpensScope():
if ((current.value == '{' or
(current.is_pseudo_paren and current.next_token.value == '{') and
format_token.Subtype.DICTIONARY_VALUE in current.subtypes) or
ImplicitStringConcatenation(current)):
# A dictionary entry that cannot fit on a single line shouldn't matter
# to this calcuation. If it can't fit on a single line, then the
# opening should be on the same line as the key and the rest on
# newlines after it. But the other entries should be on single lines
# if possible.
if current.matching_bracket:
current = current.matching_bracket
while current:
if current == closing:
return True
if format_token.Subtype.DICTIONARY_KEY in current.subtypes:
entry_start = current
break
current = current.next_token
else:
current = current.matching_bracket
else:
current = current.next_token
# At this point, current is the closing bracket. Go back one to get the the
# end of the dictionary entry.
current = PreviousNonCommentToken(current)
length = current.total_length - entry_start.total_length
length += len(entry_start.value)
return length + self.stack[-2].indent <= self.column_limit
def _IsFunctionCallWithArguments(token):
while token:
if token.value == '(':
token = token.next_token
return token and token.value != ')'
elif token.name not in {'NAME', 'DOT'}:
break
token = token.next_token
return False
def _GetLengthOfSubtype(token, subtype, exclude=None):
current = token
while (current.next_token and subtype in current.subtypes and
(exclude is None or exclude not in current.subtypes)):
current = current.next_token
return current.total_length - token.total_length + 1
def _GetOpeningBracket(current):
"""Get the opening bracket containing the current token."""
if current.matching_bracket and not current.is_pseudo_paren:
return current.matching_bracket
while current:
if current.ClosesScope():
current = current.matching_bracket
elif current.is_pseudo_paren:
current = current.previous_token
elif current.OpensScope():
return current
current = current.previous_token
return None
def _LastTokenInLine(current):
while not current.is_comment and current.next_token:
current = current.next_token
return current
def _IsFunctionDefinition(current):
prev = current.previous_token
return (current.value == '(' and prev and
format_token.Subtype.FUNC_DEF in prev.subtypes)
def _IsLastScopeInLine(current):
while current:
current = current.next_token
if current and current.OpensScope():
return False
return True
class _ParenState(object):
"""Maintains the state of the bracket enclosures.
A stack of _ParenState objects are kept so that we know how to indent relative
to the brackets.
Attributes:
indent: The column position to which a specified parenthesis level needs to
be indented.
last_space: The column position of the last space on each level.
split_before_closing_bracket: Whether a newline needs to be inserted before
the closing bracket. We only want to insert a newline before the closing
bracket if there also was a newline after the beginning left bracket.
num_line_splits: Number of line splits this _ParenState contains already.
Each subsequent line split gets an increasing penalty.
"""
# TODO(morbo): This doesn't track "bin packing."
def __init__(self, indent, last_space):
self.indent = indent
self.last_space = last_space
self.closing_scope_indent = 0
self.split_before_closing_bracket = False
self.num_line_splits = 0
def Clone(self):
state = _ParenState(self.indent, self.last_space)
state.closing_scope_indent = self.closing_scope_indent
state.split_before_closing_bracket = self.split_before_closing_bracket
state.num_line_splits = self.num_line_splits
return state
def __repr__(self):
return '[indent::%d, last_space::%d, closing_scope_indent::%d]' % (
self.indent, self.last_space, self.closing_scope_indent)
def __eq__(self, other):
return hash(self) == hash(other)
def __ne__(self, other):
return not self == other
def __hash__(self, *args, **kwargs):
return hash((self.indent, self.last_space, self.closing_scope_indent,
self.split_before_closing_bracket, self.num_line_splits))

View File

@ -0,0 +1,283 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Pytree nodes with extra formatting information.
This is a thin wrapper around a pytree.Leaf node.
"""
import keyword
import re
from lib2to3.pgen2 import token
from yapf.yapflib import py3compat
from yapf.yapflib import pytree_utils
from yapf.yapflib import style
CONTINUATION = token.N_TOKENS
token.N_TOKENS += 1
class Subtype(object):
"""Subtype information about tokens.
Gleaned from parsing the code. Helps determine the best formatting.
"""
NONE = 0
UNARY_OPERATOR = 1
BINARY_OPERATOR = 2
SUBSCRIPT_COLON = 3
SUBSCRIPT_BRACKET = 4
DEFAULT_OR_NAMED_ASSIGN = 5
DEFAULT_OR_NAMED_ASSIGN_ARG_LIST = 6
VARARGS_LIST = 7
VARARGS_STAR = 8
KWARGS_STAR_STAR = 9
ASSIGN_OPERATOR = 10
DICTIONARY_KEY = 11
DICTIONARY_KEY_PART = 12
DICTIONARY_VALUE = 13
DICT_SET_GENERATOR = 14
COMP_FOR = 15
COMP_IF = 16
FUNC_DEF = 17
DECORATOR = 18
class FormatToken(object):
"""A wrapper around pytree Leaf nodes.
This represents the token plus additional information useful for reformatting
the code.
Attributes:
next_token: The token in the unwrapped line after this token or None if this
is the last token in the unwrapped line.
previous_token: The token in the unwrapped line before this token or None if
this is the first token in the unwrapped line.
matching_bracket: If a bracket token ('[', '{', or '(') the matching
bracket.
whitespace_prefix: The prefix for the whitespace.
spaces_required_before: The number of spaces required before a token. This
is a lower-bound for the formatter and not a hard requirement. For
instance, a comment may have n required spaces before it. But the
formatter won't place n spaces before all comments. Only those that are
moved to the end of a line of code. The formatter may use different
spacing when appropriate.
can_break_before: True if we're allowed to break before this token.
must_break_before: True if we're required to break before this token.
total_length: The total length of the unwrapped line up to and including
whitespace and this token. However, this doesn't include the initial
indentation amount.
split_penalty: The penalty for splitting the line before this token.
"""
def __init__(self, node):
"""Constructor.
Arguments:
node: (pytree.Leaf) The node that's being wrapped.
"""
self.node = node
self.next_token = None
self.previous_token = None
self.matching_bracket = None
self.whitespace_prefix = ''
self.can_break_before = False
self.must_break_before = False
self.total_length = 0 # TODO(morbo): Think up a better name.
self.split_penalty = 0
if self.is_comment:
self.spaces_required_before = style.Get('SPACES_BEFORE_COMMENT')
else:
self.spaces_required_before = 0
if self.is_continuation:
self.value = self.node.value.rstrip()
else:
self.value = self.node.value
def AddWhitespacePrefix(self, newlines_before, spaces=0, indent_level=0):
"""Register a token's whitespace prefix.
This is the whitespace that will be output before a token's string.
Arguments:
newlines_before: (int) The number of newlines to place before the token.
spaces: (int) The number of spaces to place before the token.
indent_level: (int) The indentation level.
"""
indent_char = '\t' if style.Get('USE_TABS') else ' '
token_indent_char = indent_char if newlines_before > 0 else ' '
indent_before = (indent_char * indent_level * style.Get('INDENT_WIDTH') +
token_indent_char * spaces)
if self.is_comment:
comment_lines = [s.lstrip() for s in self.value.splitlines()]
self.node.value = ('\n' + indent_before).join(comment_lines)
# Update our own value since we are changing node value
self.value = self.node.value
if not self.whitespace_prefix:
self.whitespace_prefix = (
'\n' * (self.newlines or newlines_before) + indent_before)
else:
self.whitespace_prefix += indent_before
def AdjustNewlinesBefore(self, newlines_before):
"""Change the number of newlines before this token."""
self.whitespace_prefix = (
'\n' * newlines_before + self.whitespace_prefix.lstrip('\n'))
def RetainHorizontalSpacing(self, first_column, depth):
"""Retains a token's horizontal spacing."""
previous = self.previous_token
if previous is None:
return
cur_lineno = self.lineno
prev_lineno = previous.lineno
if previous.is_multiline_string:
prev_lineno += previous.value.count('\n')
if (cur_lineno != prev_lineno or
(previous.is_pseudo_paren and previous.value != ')' and
cur_lineno != previous.previous_token.lineno)):
self.spaces_required_before = (
self.column - first_column + depth * style.Get('INDENT_WIDTH'))
return
cur_column = self.node.column
prev_column = previous.node.column
prev_len = len(previous.value)
if previous.is_pseudo_paren and previous.value == ')':
prev_column -= 1
prev_len = 0
if previous.is_multiline_string:
prev_len = len(previous.value.split('\n')[-1])
if '\n' in previous.value:
prev_column = 0 # Last line starts in column 0.
self.spaces_required_before = cur_column - (prev_column + prev_len)
def OpensScope(self):
return self.value in pytree_utils.OPENING_BRACKETS
def ClosesScope(self):
return self.value in pytree_utils.CLOSING_BRACKETS
def __repr__(self):
msg = 'FormatToken(name={0}, value={1}'.format(self.name, self.value)
msg += ', pseudo)' if self.is_pseudo_paren else ')'
return msg
@property
@py3compat.lru_cache()
def node_split_penalty(self):
"""Split penalty attached to the pytree node of this token."""
return pytree_utils.GetNodeAnnotation(
self.node, pytree_utils.Annotation.SPLIT_PENALTY, default=0)
@property
def newlines(self):
"""The number of newlines needed before this token."""
return pytree_utils.GetNodeAnnotation(self.node,
pytree_utils.Annotation.NEWLINES)
@property
def must_split(self):
"""Return true if the token requires a split before it."""
return pytree_utils.GetNodeAnnotation(self.node,
pytree_utils.Annotation.MUST_SPLIT)
@property
def column(self):
"""The original column number of the node in the source."""
return self.node.column
@property
def lineno(self):
"""The original line number of the node in the source."""
return self.node.lineno
@property
@py3compat.lru_cache()
def subtypes(self):
"""Extra type information for directing formatting."""
value = pytree_utils.GetNodeAnnotation(self.node,
pytree_utils.Annotation.SUBTYPE)
return [Subtype.NONE] if value is None else value
@property
@py3compat.lru_cache()
def is_binary_op(self):
"""Token is a binary operator."""
return Subtype.BINARY_OPERATOR in self.subtypes
@property
@py3compat.lru_cache()
def name(self):
"""A string representation of the node's name."""
return pytree_utils.NodeName(self.node)
@property
def is_comment(self):
return self.node.type == token.COMMENT
@property
def is_continuation(self):
return self.node.type == CONTINUATION
@property
@py3compat.lru_cache()
def is_keyword(self):
return keyword.iskeyword(self.value)
@property
@py3compat.lru_cache()
def is_name(self):
return self.node.type == token.NAME and not self.is_keyword
@property
def is_number(self):
return self.node.type == token.NUMBER
@property
def is_string(self):
return self.node.type == token.STRING
@property
@py3compat.lru_cache()
def is_multiline_string(self):
return (self.is_string and
re.match(r'^[uUbB]?[rR]?(?P<delim>"""|\'\'\').*(?P=delim)$',
self.value, re.DOTALL) is not None)
@property
@py3compat.lru_cache()
def is_docstring(self):
return self.is_multiline_string and not self.node.prev_sibling
@property
@py3compat.lru_cache()
def is_pseudo_paren(self):
return hasattr(self.node, 'is_pseudo') and self.node.is_pseudo
@property
def is_pylint_comment(self):
return self.is_comment and re.match(r'#.*\bpylint:\s*(disable|enable)=',
self.value)

View File

@ -0,0 +1,109 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Join unwrapped lines together.
Determine how many lines can be joined into one line. For instance, we could
join these statements into one line:
if a == 42:
continue
like this:
if a == 42: continue
There are a few restrictions:
1. The lines should have been joined in the original source.
2. The joined lines must not go over the column boundary if placed on the same
line.
3. They need to be very simple statements.
Note: Because we don't allow the use of a semicolon to separate statements, it
follows that there can only be at most two lines to join.
"""
from yapf.yapflib import style
_CLASS_OR_FUNC = frozenset({'def', 'class'})
def CanMergeMultipleLines(lines, last_was_merged=False):
"""Determine if multiple lines can be joined into one.
Arguments:
lines: (list of UnwrappedLine) This is a splice of UnwrappedLines from the
full code base.
last_was_merged: (bool) The last line was merged.
Returns:
True if two consecutive lines can be joined together. In reality, this will
only happen if two consecutive lines can be joined, due to the style guide.
"""
# The indentation amount for the starting line (number of spaces).
indent_amt = lines[0].depth * style.Get('INDENT_WIDTH')
if len(lines) == 1 or indent_amt > style.Get('COLUMN_LIMIT'):
return False
if (len(lines) >= 3 and lines[2].depth >= lines[1].depth and
lines[0].depth != lines[2].depth):
# If lines[2]'s depth is greater than or equal to line[1]'s depth, we're not
# looking at a single statement (e.g., if-then, while, etc.). A following
# line with the same depth as the first line isn't part of the lines we
# would want to combine.
return False # Don't merge more than two lines together.
if lines[0].first.value in _CLASS_OR_FUNC:
# Don't join lines onto the starting line of a class or function.
return False
limit = style.Get('COLUMN_LIMIT') - indent_amt
if lines[0].last.total_length < limit:
limit -= lines[0].last.total_length
if lines[0].first.value == 'if':
return _CanMergeLineIntoIfStatement(lines, limit)
if last_was_merged and lines[0].first.value in {'elif', 'else'}:
return _CanMergeLineIntoIfStatement(lines, limit)
# TODO(morbo): Other control statements?
return False
def _CanMergeLineIntoIfStatement(lines, limit):
"""Determine if we can merge a short if-then statement into one line.
Two lines of an if-then statement can be merged if they were that way in the
original source, fit on the line without going over the column limit, and are
considered "simple" statements --- typically statements like 'pass',
'continue', and 'break'.
Arguments:
lines: (list of UnwrappedLine) The lines we are wanting to merge.
limit: (int) The amount of space remaining on the line.
Returns:
True if the lines can be merged, False otherwise.
"""
if len(lines[1].tokens) == 1 and lines[1].last.is_multiline_string:
# This might be part of a multiline shebang.
return True
if lines[0].lineno != lines[1].lineno:
# Don't merge lines if the original lines weren't merged.
return False
if lines[1].last.total_length >= limit:
# Don't merge lines if the result goes over the column limit.
return False
return style.Get('JOIN_MULTIPLE_LINES')

View File

@ -0,0 +1,113 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for Python2 / Python3 compatibility."""
import io
import os
import sys
PY3 = sys.version_info[0] >= 3
PY36 = sys.version_info[0] >= 3 and sys.version_info[1] >= 6
if PY3:
StringIO = io.StringIO
BytesIO = io.BytesIO
import codecs
def open_with_encoding(filename, mode, encoding, newline=''): # pylint: disable=unused-argument
return codecs.open(filename, mode=mode, encoding=encoding)
import functools
lru_cache = functools.lru_cache
range = range
ifilter = filter
raw_input = input
import configparser
# Mappings from strings to booleans (such as '1' to True, 'false' to False,
# etc.)
CONFIGPARSER_BOOLEAN_STATES = configparser.ConfigParser.BOOLEAN_STATES
else:
import __builtin__
import cStringIO
StringIO = BytesIO = cStringIO.StringIO
open_with_encoding = io.open
# Python 2.7 doesn't have a native LRU cache, so do nothing.
def lru_cache(maxsize=128, typed=False):
def fake_wrapper(user_function):
return user_function
return fake_wrapper
range = xrange
from itertools import ifilter
raw_input = raw_input
import ConfigParser as configparser
CONFIGPARSER_BOOLEAN_STATES = configparser.ConfigParser._boolean_states # pylint: disable=protected-access
def EncodeAndWriteToStdout(s, encoding='utf-8'):
"""Encode the given string and emit to stdout.
The string may contain non-ascii characters. This is a problem when stdout is
redirected, because then Python doesn't know the encoding and we may get a
UnicodeEncodeError.
Arguments:
s: (string) The string to encode.
encoding: (string) The encoding of the string.
"""
if PY3:
sys.stdout.buffer.write(s.encode(encoding))
elif sys.platform == 'win32':
# On python 2 and Windows universal newline transformation will be in
# effect on stdout. Python 2 will not let us avoid the easily because
# it happens based on whether the file handle is opened in O_BINARY or
# O_TEXT state. However we can tell Windows itself to change the current
# mode, and python 2 will follow suit. However we must take care to change
# the mode on the actual external stdout not just the current sys.stdout
# which may have been monkey-patched inside the python environment.
import msvcrt # pylint: disable=g-import-not-at-top
if sys.__stdout__ is sys.stdout:
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
sys.stdout.write(s.encode(encoding))
else:
sys.stdout.write(s.encode(encoding))
if PY3:
unicode = str # pylint: disable=redefined-builtin,invalid-name
else:
def unicode(s): # pylint: disable=invalid-name
"""Force conversion of s to unicode."""
return __builtin__.unicode(s, 'utf-8')
# In Python 3.2+, readfp is deprecated in favor of read_file, which doesn't
# exist in Python 2 yet. To avoid deprecation warnings, subclass ConfigParser to
# fix this - now read_file works across all Python versions we care about.
class ConfigParser(configparser.ConfigParser):
if not PY3:
def read_file(self, fp, source=None):
self.readfp(fp, filename=source)

View File

@ -0,0 +1,376 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""PyTreeUnwrapper - produces a list of unwrapped lines from a pytree.
[for a description of what an unwrapped line is, see unwrapped_line.py]
This is a pytree visitor that goes over a parse tree and produces a list of
UnwrappedLine containers from it, each with its own depth and containing all
the tokens that could fit on the line if there were no maximal line-length
limitations.
Note: a precondition to running this visitor and obtaining correct results is
for the tree to have its comments spliced in as nodes. Prefixes are ignored.
For most uses, the convenience function UnwrapPyTree should be sufficient.
"""
# The word "token" is overloaded within this module, so for clarity rename
# the imported pgen2.token module.
from lib2to3 import pytree
from lib2to3.pgen2 import token as grammar_token
from yapf.yapflib import pytree_utils
from yapf.yapflib import pytree_visitor
from yapf.yapflib import split_penalty
from yapf.yapflib import unwrapped_line
def UnwrapPyTree(tree):
"""Create and return a list of unwrapped lines from the given pytree.
Arguments:
tree: the top-level pytree node to unwrap.
Returns:
A list of UnwrappedLine objects.
"""
unwrapper = PyTreeUnwrapper()
unwrapper.Visit(tree)
uwlines = unwrapper.GetUnwrappedLines()
uwlines.sort(key=lambda x: x.lineno)
return uwlines
# Grammar tokens considered as whitespace for the purpose of unwrapping.
_WHITESPACE_TOKENS = frozenset([
grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT,
grammar_token.ENDMARKER
])
class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor):
"""PyTreeUnwrapper - see file-level docstring for detailed description.
Note: since this implements PyTreeVisitor and node names in lib2to3 are
underscore_separated, the visiting methods of this class are named as
Visit_node_name. invalid-name pragmas are added to each such method to silence
a style warning. This is forced on us by the usage of lib2to3, and re-munging
method names to make them different from actual node names sounded like a
confusing and brittle affair that wasn't worth it for this small & controlled
deviation from the style guide.
To understand the connection between visitor methods in this class, some
familiarity with the Python grammar is required.
"""
def __init__(self):
# A list of all unwrapped lines finished visiting so far.
self._unwrapped_lines = []
# Builds up a "current" unwrapped line while visiting pytree nodes. Some
# nodes will finish a line and start a new one.
self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(0)
# Current indentation depth.
self._cur_depth = 0
def GetUnwrappedLines(self):
"""Fetch the result of the tree walk.
Note: only call this after visiting the whole tree.
Returns:
A list of UnwrappedLine objects.
"""
# Make sure the last line that was being populated is flushed.
self._StartNewLine()
return self._unwrapped_lines
def _StartNewLine(self):
"""Finish current line and start a new one.
Place the currently accumulated line into the _unwrapped_lines list and
start a new one.
"""
if self._cur_unwrapped_line.tokens:
self._unwrapped_lines.append(self._cur_unwrapped_line)
_MatchBrackets(self._cur_unwrapped_line)
_AdjustSplitPenalty(self._cur_unwrapped_line)
self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(self._cur_depth)
_STMT_TYPES = frozenset({
'if_stmt',
'while_stmt',
'for_stmt',
'try_stmt',
'expect_clause',
'with_stmt',
'funcdef',
'classdef',
})
# pylint: disable=invalid-name,missing-docstring
def Visit_simple_stmt(self, node):
# A 'simple_stmt' conveniently represents a non-compound Python statement,
# i.e. a statement that does not contain other statements.
# When compound nodes have a single statement as their suite, the parser
# can leave it in the tree directly without creating a suite. But we have
# to increase depth in these cases as well. However, don't increase the
# depth of we have a simple_stmt that's a comment node. This represents a
# standalone comment and in the case of it coming directly after the
# funcdef, it is a "top" comment for the whole function.
# TODO(eliben): add more relevant compound statements here.
single_stmt_suite = (node.parent and
pytree_utils.NodeName(node.parent) in self._STMT_TYPES)
is_comment_stmt = pytree_utils.IsCommentStatement(node)
if single_stmt_suite and not is_comment_stmt:
self._cur_depth += 1
self._StartNewLine()
self.DefaultNodeVisit(node)
if single_stmt_suite and not is_comment_stmt:
self._cur_depth -= 1
def _VisitCompoundStatement(self, node, substatement_names):
"""Helper for visiting compound statements.
Python compound statements serve as containers for other statements. Thus,
when we encounter a new compound statement we start a new unwrapped line.
Arguments:
node: the node to visit.
substatement_names: set of node names. A compound statement will be
recognized as a NAME node with a name in this set.
"""
for child in node.children:
# A pytree is structured in such a way that a single 'if_stmt' node will
# contain all the 'if', 'elif' and 'else' nodes as children (similar
# structure applies to 'while' statements, 'try' blocks, etc). Therefore,
# we visit all children here and create a new line before the requested
# set of nodes.
if (child.type == grammar_token.NAME and
child.value in substatement_names):
self._StartNewLine()
self.Visit(child)
_IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'})
def Visit_if_stmt(self, node): # pylint: disable=invalid-name
self._VisitCompoundStatement(node, self._IF_STMT_ELEMS)
_WHILE_STMT_ELEMS = frozenset({'while', 'else'})
def Visit_while_stmt(self, node): # pylint: disable=invalid-name
self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS)
_FOR_STMT_ELEMS = frozenset({'for', 'else'})
def Visit_for_stmt(self, node): # pylint: disable=invalid-name
self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS)
_TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'})
def Visit_try_stmt(self, node): # pylint: disable=invalid-name
self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS)
_EXCEPT_STMT_ELEMS = frozenset({'except'})
def Visit_except_clause(self, node): # pylint: disable=invalid-name
self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS)
_FUNC_DEF_ELEMS = frozenset({'def'})
def Visit_funcdef(self, node): # pylint: disable=invalid-name
self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS)
def Visit_async_funcdef(self, node): # pylint: disable=invalid-name
self._StartNewLine()
index = 0
for child in node.children:
index += 1
self.Visit(child)
if pytree_utils.NodeName(child) == 'ASYNC':
break
for child in node.children[index].children:
self.Visit(child)
_CLASS_DEF_ELEMS = frozenset({'class'})
def Visit_classdef(self, node): # pylint: disable=invalid-name
self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS)
def Visit_async_stmt(self, node): # pylint: disable=invalid-name
self._StartNewLine()
index = 0
for child in node.children:
index += 1
self.Visit(child)
if pytree_utils.NodeName(child) == 'ASYNC':
break
for child in node.children[index].children:
self.Visit(child)
def Visit_decorators(self, node): # pylint: disable=invalid-name
for child in node.children:
self._StartNewLine()
self.Visit(child)
def Visit_decorated(self, node): # pylint: disable=invalid-name
for child in node.children:
self._StartNewLine()
self.Visit(child)
_WITH_STMT_ELEMS = frozenset({'with'})
def Visit_with_stmt(self, node): # pylint: disable=invalid-name
self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS)
def Visit_suite(self, node): # pylint: disable=invalid-name
# A 'suite' starts a new indentation level in Python.
self._cur_depth += 1
self._StartNewLine()
self.DefaultNodeVisit(node)
self._cur_depth -= 1
def Visit_listmaker(self, node): # pylint: disable=invalid-name
_DetermineMustSplitAnnotation(node)
self.DefaultNodeVisit(node)
def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name
_DetermineMustSplitAnnotation(node)
self.DefaultNodeVisit(node)
def Visit_import_as_names(self, node): # pylint: disable=invalid-name
if node.prev_sibling.value == '(':
_DetermineMustSplitAnnotation(node)
self.DefaultNodeVisit(node)
def Visit_testlist_gexp(self, node): # pylint: disable=invalid-name
if _ContainsComments(node):
_DetermineMustSplitAnnotation(node)
self.DefaultNodeVisit(node)
def Visit_arglist(self, node): # pylint: disable=invalid-name
_DetermineMustSplitAnnotation(node)
self.DefaultNodeVisit(node)
def Visit_typedargslist(self, node): # pylint: disable=invalid-name
_DetermineMustSplitAnnotation(node)
self.DefaultNodeVisit(node)
def DefaultLeafVisit(self, leaf):
"""Default visitor for tree leaves.
A tree leaf is always just gets appended to the current unwrapped line.
Arguments:
leaf: the leaf to visit.
"""
if leaf.type in _WHITESPACE_TOKENS:
self._StartNewLine()
elif leaf.type != grammar_token.COMMENT or leaf.value.strip():
if leaf.value == ';':
# Split up multiple statements on one line.
self._StartNewLine()
else:
# Add non-whitespace tokens and comments that aren't empty.
self._cur_unwrapped_line.AppendNode(leaf)
_BRACKET_MATCH = {')': '(', '}': '{', ']': '['}
def _MatchBrackets(uwline):
"""Visit the node and match the brackets.
For every open bracket ('[', '{', or '('), find the associated closing bracket
and "match" them up. I.e., save in the token a pointer to its associated open
or close bracket.
Arguments:
uwline: (UnwrappedLine) An unwrapped line.
"""
bracket_stack = []
for token in uwline.tokens:
if token.value in pytree_utils.OPENING_BRACKETS:
bracket_stack.append(token)
elif token.value in pytree_utils.CLOSING_BRACKETS:
bracket_stack[-1].matching_bracket = token
token.matching_bracket = bracket_stack[-1]
bracket_stack.pop()
def _AdjustSplitPenalty(uwline):
"""Visit the node and adjust the split penalties if needed.
A token shouldn't be split if it's not within a bracket pair. Mark any token
that's not within a bracket pair as "unbreakable".
Arguments:
uwline: (UnwrappedLine) An unwrapped line.
"""
bracket_level = 0
for index, token in enumerate(uwline.tokens):
if index and not bracket_level:
pytree_utils.SetNodeAnnotation(token.node,
pytree_utils.Annotation.SPLIT_PENALTY,
split_penalty.UNBREAKABLE)
if token.value in pytree_utils.OPENING_BRACKETS:
bracket_level += 1
elif token.value in pytree_utils.CLOSING_BRACKETS:
bracket_level -= 1
def _DetermineMustSplitAnnotation(node):
"""Enforce a split in the list if the list ends with a comma."""
if not _ContainsComments(node):
if (not isinstance(node.children[-1], pytree.Leaf) or
node.children[-1].value != ','):
return
num_children = len(node.children)
index = 0
_SetMustSplitOnFirstLeaf(node.children[0])
while index < num_children - 1:
child = node.children[index]
if isinstance(child, pytree.Leaf) and child.value == ',':
next_child = node.children[index + 1]
if next_child.type == grammar_token.COMMENT:
index += 1
if index >= num_children - 1:
break
_SetMustSplitOnFirstLeaf(node.children[index + 1])
index += 1
def _ContainsComments(node):
"""Return True if the list has a comment in it."""
if isinstance(node, pytree.Leaf):
return node.type == grammar_token.COMMENT
for child in node.children:
if _ContainsComments(child):
return True
return False
def _SetMustSplitOnFirstLeaf(node):
"""Set the "must split" annotation on the first leaf node."""
def FindFirstLeaf(node):
if isinstance(node, pytree.Leaf):
return node
return FindFirstLeaf(node.children[0])
pytree_utils.SetNodeAnnotation(
FindFirstLeaf(node), pytree_utils.Annotation.MUST_SPLIT, True)

View File

@ -0,0 +1,297 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""pytree-related utilities.
This module collects various utilities related to the parse trees produced by
the lib2to3 library.
NodeName(): produces a string name for pytree nodes.
ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse
a given string with code to a pytree.
InsertNodeBefore(): insert a node before another in a pytree.
InsertNodeAfter(): insert a node after another in a pytree.
{Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes.
"""
import ast
from lib2to3 import pygram
from lib2to3 import pytree
from lib2to3.pgen2 import driver
from lib2to3.pgen2 import parse
from lib2to3.pgen2 import token
# TODO(eliben): We may want to get rid of this filtering at some point once we
# have a better understanding of what information we need from the tree. Then,
# these tokens may be filtered out from the tree before the tree gets to the
# unwrapper.
NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER'])
OPENING_BRACKETS = frozenset({'(', '[', '{'})
CLOSING_BRACKETS = frozenset({')', ']', '}'})
class Annotation(object):
"""Annotation names associated with pytrees."""
CHILD_INDENT = 'child_indent'
NEWLINES = 'newlines'
MUST_SPLIT = 'must_split'
SPLIT_PENALTY = 'split_penalty'
SUBTYPE = 'subtype'
def NodeName(node):
"""Produce a string name for a given node.
For a Leaf this is the token name, and for a Node this is the type.
Arguments:
node: a tree node
Returns:
Name as a string.
"""
# Nodes with values < 256 are tokens. Values >= 256 are grammar symbols.
if node.type < 256:
return token.tok_name[node.type]
else:
return pygram.python_grammar.number2symbol[node.type]
# lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for
# parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a
# context where a keyword is disallowed).
# It forgets to do the same for 'exec' though. Luckily, Python is amenable to
# monkey-patching.
_GRAMMAR_FOR_PY3 = pygram.python_grammar_no_print_statement.copy()
del _GRAMMAR_FOR_PY3.keywords['exec']
_GRAMMAR_FOR_PY2 = pygram.python_grammar.copy()
del _GRAMMAR_FOR_PY2.keywords['nonlocal']
def ParseCodeToTree(code):
"""Parse the given code to a lib2to3 pytree.
Arguments:
code: a string with the code to parse.
Raises:
SyntaxError if the code is invalid syntax.
parse.ParseError if some other parsing failure.
Returns:
The root node of the parsed tree.
"""
# This function is tiny, but the incantation for invoking the parser correctly
# is sufficiently magical to be worth abstracting away.
try:
# Try to parse using a Python 3 grammar, which is more permissive (print and
# exec are not keywords).
parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert)
tree = parser_driver.parse_string(code, debug=False)
except parse.ParseError:
# Now try to parse using a Python 2 grammar; If this fails, then
# there's something else wrong with the code.
try:
parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert)
tree = parser_driver.parse_string(code, debug=False)
except parse.ParseError:
# Raise a syntax error if the code is invalid python syntax.
try:
ast.parse(code)
except SyntaxError as e:
raise e
else:
raise
return _WrapEndMarker(tree)
def _WrapEndMarker(tree):
"""Wrap a single ENDMARKER token in a "file_input" node.
Arguments:
tree: (pytree.Node) The root node of the parsed tree.
Returns:
The root node of the parsed tree. If the tree is a single ENDMARKER node,
then that node is wrapped in a "file_input" node. That will ensure we don't
skip comments attached to that node.
"""
if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER:
return pytree.Node(pygram.python_symbols.file_input, [tree])
return tree
def InsertNodesBefore(new_nodes, target):
"""Insert new_nodes before the given target location in the tree.
Arguments:
new_nodes: a sequence of new nodes to insert (the nodes should not be in the
tree).
target: the target node before which the new node node will be inserted.
Raises:
RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
"""
for node in new_nodes:
_InsertNodeAt(node, target, after=False)
def InsertNodesAfter(new_nodes, target):
"""Insert new_nodes after the given target location in the tree.
Arguments:
new_nodes: a sequence of new nodes to insert (the nodes should not be in the
tree).
target: the target node after which the new node node will be inserted.
Raises:
RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
"""
for node in reversed(new_nodes):
_InsertNodeAt(node, target, after=True)
def _InsertNodeAt(new_node, target, after=False):
"""Underlying implementation for node insertion.
Arguments:
new_node: a new node to insert (this node should not be in the tree).
target: the target node.
after: if True, new_node is inserted after target. Otherwise, it's inserted
before target.
Returns:
nothing
Raises:
RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
"""
# Protect against attempts to insert nodes which already belong to some tree.
if new_node.parent is not None:
raise RuntimeError('inserting node which already has a parent',
(new_node, new_node.parent))
# The code here is based on pytree.Base.next_sibling
parent_of_target = target.parent
if parent_of_target is None:
raise RuntimeError('expected target node to have a parent', (target,))
for i, child in enumerate(parent_of_target.children):
if child is target:
insertion_index = i + 1 if after else i
parent_of_target.insert_child(insertion_index, new_node)
return
raise RuntimeError('unable to find insertion point for target node',
(target,))
# The following constant and functions implement a simple custom annotation
# mechanism for pytree nodes. We attach new attributes to nodes. Each attribute
# is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be
# managed through GetNodeAnnotation and SetNodeAnnotation.
_NODE_ANNOTATION_PREFIX = '_yapf_annotation_'
def GetNodeAnnotation(node, annotation, default=None):
"""Get annotation value from a node.
Arguments:
node: the node.
annotation: annotation name - a string.
default: the default value to return if there's no annotation.
Returns:
Value of the annotation in the given node. If the node doesn't have this
particular annotation name yet, returns default.
"""
return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default)
def SetNodeAnnotation(node, annotation, value):
"""Set annotation value on a node.
Arguments:
node: the node.
annotation: annotation name - a string.
value: annotation value to set.
"""
setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value)
def AppendNodeAnnotation(node, annotation, value):
"""Appends an annotation value to a list of annotations on the node.
Arguments:
node: the node.
annotation: annotation name - a string.
value: annotation value to set.
"""
attr = GetNodeAnnotation(node, annotation, set())
attr.add(value)
SetNodeAnnotation(node, annotation, attr)
def RemoveSubtypeAnnotation(node, value):
"""Removes an annotation value from the subtype annotations on the node.
Arguments:
node: the node.
value: annotation value to remove.
"""
attr = GetNodeAnnotation(node, Annotation.SUBTYPE)
if attr and value in attr:
attr.remove(value)
SetNodeAnnotation(node, Annotation.SUBTYPE, attr)
def DumpNodeToString(node):
"""Dump a string representation of the given node. For debugging.
Arguments:
node: the node.
Returns:
The string representation.
"""
if isinstance(node, pytree.Leaf):
fmt = '{name}({value}) [lineno={lineno}, column={column}, prefix={prefix}]'
return fmt.format(
name=NodeName(node),
value=_PytreeNodeRepr(node),
lineno=node.lineno,
column=node.column,
prefix=repr(node.prefix))
else:
fmt = '{node} [{len} children] [child_indent="{indent}"]'
return fmt.format(
node=NodeName(node),
len=len(node.children),
indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT))
def _PytreeNodeRepr(node):
"""Like pytree.Node.__repr__, but names instead of numbers for tokens."""
if isinstance(node, pytree.Node):
return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node),
[_PytreeNodeRepr(c) for c in node.children])
if isinstance(node, pytree.Leaf):
return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value)
def IsCommentStatement(node):
return (NodeName(node) == 'simple_stmt' and
node.children[0].type == token.COMMENT)

View File

@ -0,0 +1,135 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generic visitor pattern for pytrees.
The lib2to3 parser produces a "pytree" - syntax tree consisting of Node
and Leaf types. This module implements a visitor pattern for such trees.
It also exports a basic "dumping" visitor that dumps a textual representation of
a pytree into a stream.
PyTreeVisitor: a generic visitor pattern fo pytrees.
PyTreeDumper: a configurable "dumper" for displaying pytrees.
DumpPyTree(): a convenience function to dump a pytree.
"""
import sys
from lib2to3 import pytree
from yapf.yapflib import pytree_utils
class PyTreeVisitor(object):
"""Visitor pattern for pytree trees.
Methods named Visit_XXX will be invoked when a node with type XXX is
encountered in the tree. The type is either a token type (for Leaf nodes) or
grammar symbols (for Node nodes). The return value of Visit_XXX methods is
ignored by the visitor.
Visitors can modify node contents but must not change the tree structure
(e.g. add/remove children and move nodes around).
This is a very common visitor pattern in Python code; it's also used in the
Python standard library ast module for providing AST visitors.
Note: this makes names that aren't style conformant, so such visitor methods
need to be marked with # pylint: disable=invalid-name We don't have a choice
here, because lib2to3 nodes have under_separated names.
For more complex behavior, the visit, DefaultNodeVisit and DefaultLeafVisit
methods can be overridden. Don't forget to invoke DefaultNodeVisit for nodes
that may have children - otherwise the children will not be visited.
"""
def Visit(self, node):
"""Visit a node."""
method = 'Visit_{0}'.format(pytree_utils.NodeName(node))
if hasattr(self, method):
# Found a specific visitor for this node
getattr(self, method)(node)
else:
if isinstance(node, pytree.Leaf):
self.DefaultLeafVisit(node)
else:
self.DefaultNodeVisit(node)
def DefaultNodeVisit(self, node):
"""Default visitor for Node: visits the node's children depth-first.
This method is invoked when no specific visitor for the node is defined.
Arguments:
node: the node to visit
"""
for child in node.children:
self.Visit(child)
def DefaultLeafVisit(self, leaf):
"""Default visitor for Leaf: no-op.
This method is invoked when no specific visitor for the leaf is defined.
Arguments:
leaf: the leaf to visit
"""
pass
def DumpPyTree(tree, target_stream=sys.stdout):
"""Convenience function for dumping a given pytree.
This function presents a very minimal interface. For more configurability (for
example, controlling how specific node types are displayed), use PyTreeDumper
directly.
Arguments:
tree: the tree to dump.
target_stream: the stream to dump the tree to. A file-like object. By
default will dump into stdout.
"""
dumper = PyTreeDumper(target_stream)
dumper.Visit(tree)
class PyTreeDumper(PyTreeVisitor):
"""Visitor that dumps the tree to a stream.
Implements the PyTreeVisitor interface.
"""
def __init__(self, target_stream=sys.stdout):
"""Create a tree dumper.
Arguments:
target_stream: the stream to dump the tree to. A file-like object. By
default will dump into stdout.
"""
self._target_stream = target_stream
self._current_indent = 0
def _DumpString(self, s):
self._target_stream.write('{0}{1}\n'.format(' ' * self._current_indent, s))
def DefaultNodeVisit(self, node):
# Dump information about the current node, and then use the generic
# DefaultNodeVisit visitor to dump each of its children.
self._DumpString(pytree_utils.DumpNodeToString(node))
self._current_indent += 2
super(PyTreeDumper, self).DefaultNodeVisit(node)
self._current_indent -= 2
def DefaultLeafVisit(self, leaf):
self._DumpString(pytree_utils.DumpNodeToString(leaf))

View File

@ -0,0 +1,588 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Decide what the format for the code should be.
The `unwrapped_line.UnwrappedLine`s are now ready to be formatted.
UnwrappedLines that can be merged together are. The best formatting is returned
as a string.
Reformat(): the main function exported by this module.
"""
from __future__ import unicode_literals
import collections
import heapq
import re
from lib2to3 import pytree
from lib2to3.pgen2 import token
from yapf.yapflib import format_decision_state
from yapf.yapflib import format_token
from yapf.yapflib import line_joiner
from yapf.yapflib import pytree_utils
from yapf.yapflib import style
from yapf.yapflib import verifier
def Reformat(uwlines, verify=False):
"""Reformat the unwrapped lines.
Arguments:
uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format.
verify: (bool) True if reformatted code should be verified for syntax.
Returns:
A string representing the reformatted code.
"""
final_lines = []
prev_uwline = None # The previous line.
indent_width = style.Get('INDENT_WIDTH')
for uwline in _SingleOrMergedLines(uwlines):
first_token = uwline.first
_FormatFirstToken(first_token, uwline.depth, prev_uwline, final_lines)
indent_amt = indent_width * uwline.depth
state = format_decision_state.FormatDecisionState(uwline, indent_amt)
state.MoveStateToNextToken()
if not uwline.disable:
if uwline.first.is_comment:
uwline.first.node.value = uwline.first.node.value.rstrip()
elif uwline.last.is_comment:
uwline.last.node.value = uwline.last.node.value.rstrip()
if prev_uwline and prev_uwline.disable:
# Keep the vertical spacing between a disabled and enabled formatting
# region.
_RetainVerticalSpacingBetweenTokens(uwline.first, prev_uwline.last)
if any(tok.is_comment for tok in uwline.tokens):
_RetainVerticalSpacingBeforeComments(uwline)
if (_LineContainsI18n(uwline) or uwline.disable or
_LineHasContinuationMarkers(uwline)):
_RetainHorizontalSpacing(uwline)
_RetainVerticalSpacing(uwline, prev_uwline)
_EmitLineUnformatted(state)
elif _CanPlaceOnSingleLine(uwline) and not any(tok.must_split
for tok in uwline.tokens):
# The unwrapped line fits on one line.
while state.next_token:
state.AddTokenToState(newline=False, dry_run=False)
else:
if not _AnalyzeSolutionSpace(state):
# Failsafe mode. If there isn't a solution to the line, then just emit
# it as is.
state = format_decision_state.FormatDecisionState(uwline, indent_amt)
state.MoveStateToNextToken()
_RetainHorizontalSpacing(uwline)
_RetainVerticalSpacing(uwline, prev_uwline)
_EmitLineUnformatted(state)
final_lines.append(uwline)
prev_uwline = uwline
return _FormatFinalLines(final_lines, verify)
def _RetainHorizontalSpacing(uwline):
"""Retain all horizontal spacing between tokens."""
for tok in uwline.tokens:
tok.RetainHorizontalSpacing(uwline.first.column, uwline.depth)
def _RetainVerticalSpacing(cur_uwline, prev_uwline):
prev_tok = None
if prev_uwline is not None:
prev_tok = prev_uwline.last
for cur_tok in cur_uwline.tokens:
_RetainVerticalSpacingBetweenTokens(cur_tok, prev_tok)
prev_tok = cur_tok
def _RetainVerticalSpacingBetweenTokens(cur_tok, prev_tok):
"""Retain vertical spacing between two tokens."""
if prev_tok is None:
return
if prev_tok.is_string:
prev_lineno = prev_tok.lineno + prev_tok.value.count('\n')
elif prev_tok.is_pseudo_paren:
if not prev_tok.previous_token.is_multiline_string:
prev_lineno = prev_tok.previous_token.lineno
else:
prev_lineno = prev_tok.lineno
else:
prev_lineno = prev_tok.lineno
if cur_tok.is_comment:
cur_lineno = cur_tok.lineno - cur_tok.value.count('\n')
else:
cur_lineno = cur_tok.lineno
cur_tok.AdjustNewlinesBefore(cur_lineno - prev_lineno)
def _RetainVerticalSpacingBeforeComments(uwline):
"""Retain vertical spacing before comments."""
prev_token = None
for tok in uwline.tokens:
if tok.is_comment and prev_token:
if tok.lineno - tok.value.count('\n') - prev_token.lineno > 1:
tok.AdjustNewlinesBefore(ONE_BLANK_LINE)
prev_token = tok
def _EmitLineUnformatted(state):
"""Emit the line without formatting.
The line contains code that if reformatted would break a non-syntactic
convention. E.g., i18n comments and function calls are tightly bound by
convention. Instead, we calculate when / if a newline should occur and honor
that. But otherwise the code emitted will be the same as the original code.
Arguments:
state: (format_decision_state.FormatDecisionState) The format decision
state.
"""
prev_lineno = None
while state.next_token:
previous_token = state.next_token.previous_token
previous_lineno = previous_token.lineno
if previous_token.is_multiline_string:
previous_lineno += previous_token.value.count('\n')
if previous_token.is_continuation:
newline = False
else:
newline = (prev_lineno is not None and
state.next_token.lineno > previous_lineno)
prev_lineno = state.next_token.lineno
state.AddTokenToState(newline=newline, dry_run=False)
def _LineContainsI18n(uwline):
"""Return true if there are i18n comments or function calls in the line.
I18n comments and pseudo-function calls are closely related. They cannot
be moved apart without breaking i18n.
Arguments:
uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted.
Returns:
True if the line contains i18n comments or function calls. False otherwise.
"""
if style.Get('I18N_COMMENT'):
for tok in uwline.tokens:
if tok.is_comment and re.match(style.Get('I18N_COMMENT'), tok.value):
# Contains an i18n comment.
return True
if style.Get('I18N_FUNCTION_CALL'):
length = len(uwline.tokens)
index = 0
while index < length - 1:
if (uwline.tokens[index + 1].value == '(' and
uwline.tokens[index].value in style.Get('I18N_FUNCTION_CALL')):
return True
index += 1
return False
def _LineHasContinuationMarkers(uwline):
"""Return true if the line has continuation markers in it."""
return any(tok.is_continuation for tok in uwline.tokens)
def _CanPlaceOnSingleLine(uwline):
"""Determine if the unwrapped line can go on a single line.
Arguments:
uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted.
Returns:
True if the line can or should be added to a single line. False otherwise.
"""
indent_amt = style.Get('INDENT_WIDTH') * uwline.depth
last = uwline.last
last_index = -1
if last.is_pylint_comment:
last = last.previous_token
last_index = -2
if last is None:
return True
return (last.total_length + indent_amt <= style.Get('COLUMN_LIMIT') and
not any(tok.is_comment for tok in uwline.tokens[:last_index]))
def _FormatFinalLines(final_lines, verify):
"""Compose the final output from the finalized lines."""
formatted_code = []
for line in final_lines:
formatted_line = []
for tok in line.tokens:
if not tok.is_pseudo_paren:
formatted_line.append(tok.whitespace_prefix)
formatted_line.append(tok.value)
else:
if (not tok.next_token.whitespace_prefix.startswith('\n') and
not tok.next_token.whitespace_prefix.startswith(' ')):
if (tok.previous_token.value == ':' or
tok.next_token.value not in ',}])'):
formatted_line.append(' ')
formatted_code.append(''.join(formatted_line))
if verify:
verifier.VerifyCode(formatted_code[-1])
return ''.join(formatted_code) + '\n'
class _StateNode(object):
"""An edge in the solution space from 'previous.state' to 'state'.
Attributes:
state: (format_decision_state.FormatDecisionState) The format decision state
for this node.
newline: If True, then on the edge from 'previous.state' to 'state' a
newline is inserted.
previous: (_StateNode) The previous state node in the graph.
"""
# TODO(morbo): Add a '__cmp__' method.
def __init__(self, state, newline, previous):
self.state = state.Clone()
self.newline = newline
self.previous = previous
def __repr__(self): # pragma: no cover
return 'StateNode(state=[\n{0}\n], newline={1})'.format(
self.state, self.newline)
# A tuple of (penalty, count) that is used to prioritize the BFS. In case of
# equal penalties, we prefer states that were inserted first. During state
# generation, we make sure that we insert states first that break the line as
# late as possible.
_OrderedPenalty = collections.namedtuple('OrderedPenalty', ['penalty', 'count'])
# An item in the prioritized BFS search queue. The 'StateNode's 'state' has
# the given '_OrderedPenalty'.
_QueueItem = collections.namedtuple('QueueItem',
['ordered_penalty', 'state_node'])
def _AnalyzeSolutionSpace(initial_state):
"""Analyze the entire solution space starting from initial_state.
This implements a variant of Dijkstra's algorithm on the graph that spans
the solution space (LineStates are the nodes). The algorithm tries to find
the shortest path (the one with the lowest penalty) from 'initial_state' to
the state where all tokens are placed.
Arguments:
initial_state: (format_decision_state.FormatDecisionState) The initial state
to start the search from.
Returns:
True if a formatting solution was found. False otherwise.
"""
count = 0
seen = set()
p_queue = []
# Insert start element.
node = _StateNode(initial_state, False, None)
heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(0, count), node))
count += 1
while p_queue:
item = p_queue[0]
penalty = item.ordered_penalty.penalty
node = item.state_node
if not node.state.next_token:
break
heapq.heappop(p_queue)
if count > 10000:
node.state.ignore_stack_for_comparison = True
if node.state in seen:
continue
seen.add(node.state)
# FIXME(morbo): Add a 'decision' element?
count = _AddNextStateToQueue(penalty, node, False, count, p_queue)
count = _AddNextStateToQueue(penalty, node, True, count, p_queue)
if not p_queue:
# We weren't able to find a solution. Do nothing.
return False
_ReconstructPath(initial_state, heapq.heappop(p_queue).state_node)
return True
def _AddNextStateToQueue(penalty, previous_node, newline, count, p_queue):
"""Add the following state to the analysis queue.
Assume the current state is 'previous_node' and has been reached with a
penalty of 'penalty'. Insert a line break if 'newline' is True.
Arguments:
penalty: (int) The penalty associated with the path up to this point.
previous_node: (_StateNode) The last _StateNode inserted into the priority
queue.
newline: (bool) Add a newline if True.
count: (int) The number of elements in the queue.
p_queue: (heapq) The priority queue representing the solution space.
Returns:
The updated number of elements in the queue.
"""
must_split = previous_node.state.MustSplit()
if newline and not previous_node.state.CanSplit(must_split):
# Don't add a newline if the token cannot be split.
return count
if not newline and must_split:
# Don't add a token we must split but where we aren't splitting.
return count
node = _StateNode(previous_node.state, newline, previous_node)
penalty += node.state.AddTokenToState(
newline=newline, dry_run=True, must_split=must_split)
heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(penalty, count), node))
return count + 1
def _ReconstructPath(initial_state, current):
"""Reconstruct the path through the queue with lowest penalty.
Arguments:
initial_state: (format_decision_state.FormatDecisionState) The initial state
to start the search from.
current: (_StateNode) The node in the decision graph that is the end point
of the path with the least penalty.
"""
path = collections.deque()
while current.previous:
path.appendleft(current)
current = current.previous
for node in path:
initial_state.AddTokenToState(newline=node.newline, dry_run=False)
def _FormatFirstToken(first_token, indent_depth, prev_uwline, final_lines):
"""Format the first token in the unwrapped line.
Add a newline and the required indent before the first token of the unwrapped
line.
Arguments:
first_token: (format_token.FormatToken) The first token in the unwrapped
line.
indent_depth: (int) The line's indentation depth.
prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line
previous to this line.
final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines
that have already been processed.
"""
first_token.AddWhitespacePrefix(
_CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline,
final_lines),
indent_level=indent_depth)
NO_BLANK_LINES = 1
ONE_BLANK_LINE = 2
TWO_BLANK_LINES = 3
def _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline,
final_lines):
"""Calculate the number of newlines we need to add.
Arguments:
first_token: (format_token.FormatToken) The first token in the unwrapped
line.
indent_depth: (int) The line's indentation depth.
prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line
previous to this line.
final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines
that have already been processed.
Returns:
The number of newlines needed before the first token.
"""
# TODO(morbo): Special handling for imports.
# TODO(morbo): Create a knob that can tune these.
if prev_uwline is None:
# The first line in the file. Don't add blank lines.
# FIXME(morbo): Is this correct?
if first_token.newlines is not None:
pytree_utils.SetNodeAnnotation(first_token.node,
pytree_utils.Annotation.NEWLINES, None)
return 0
if first_token.is_docstring:
if (prev_uwline.first.value == 'class' and
style.Get('BLANK_LINE_BEFORE_CLASS_DOCSTRING')):
# Enforce a blank line before a class's docstring.
return ONE_BLANK_LINE
# The docstring shouldn't have a newline before it.
return NO_BLANK_LINES
prev_last_token = prev_uwline.last
if prev_last_token.is_docstring:
if (not indent_depth and first_token.value in {'class', 'def', 'async'}):
# Separate a class or function from the module-level docstring with two
# blank lines.
return TWO_BLANK_LINES
if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token,
prev_last_token):
return NO_BLANK_LINES
else:
return ONE_BLANK_LINE
if first_token.value in {'class', 'def', 'async', '@'}:
# TODO(morbo): This can go once the blank line calculator is more
# sophisticated.
if not indent_depth:
# This is a top-level class or function.
is_inline_comment = prev_last_token.whitespace_prefix.count('\n') == 0
if (not prev_uwline.disable and prev_last_token.is_comment and
not is_inline_comment):
# This token follows a non-inline comment.
if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token,
prev_last_token):
# Assume that the comment is "attached" to the current line.
# Therefore, we want two blank lines before the comment.
index = len(final_lines) - 1
while index > 0:
if not final_lines[index - 1].is_comment:
break
index -= 1
if final_lines[index - 1].first.value == '@':
final_lines[index].first.AdjustNewlinesBefore(NO_BLANK_LINES)
else:
prev_last_token.AdjustNewlinesBefore(TWO_BLANK_LINES)
if first_token.newlines is not None:
pytree_utils.SetNodeAnnotation(
first_token.node, pytree_utils.Annotation.NEWLINES, None)
return NO_BLANK_LINES
elif prev_uwline.first.value in {'class', 'def', 'async'}:
if not style.Get('BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF'):
pytree_utils.SetNodeAnnotation(first_token.node,
pytree_utils.Annotation.NEWLINES, None)
return NO_BLANK_LINES
# Calculate how many newlines were between the original lines. We want to
# retain that formatting if it doesn't violate one of the style guide rules.
if first_token.is_comment:
first_token_lineno = first_token.lineno - first_token.value.count('\n')
else:
first_token_lineno = first_token.lineno
prev_last_token_lineno = prev_last_token.lineno
if prev_last_token.is_multiline_string:
prev_last_token_lineno += prev_last_token.value.count('\n')
if first_token_lineno - prev_last_token_lineno > 1:
return ONE_BLANK_LINE
return NO_BLANK_LINES
def _SingleOrMergedLines(uwlines):
"""Generate the lines we want to format.
Arguments:
uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format.
Yields:
Either a single line, if the current line cannot be merged with the
succeeding line, or the next two lines merged into one line.
"""
index = 0
last_was_merged = False
while index < len(uwlines):
if uwlines[index].disable:
uwline = uwlines[index]
index += 1
while index < len(uwlines):
column = uwline.last.column + 2
if uwlines[index].lineno != uwline.lineno:
break
if uwline.last.value != ':':
leaf = pytree.Leaf(
type=token.SEMI, value=';', context=('', (uwline.lineno, column)))
uwline.AppendToken(format_token.FormatToken(leaf))
for tok in uwlines[index].tokens:
uwline.AppendToken(tok)
index += 1
yield uwline
elif line_joiner.CanMergeMultipleLines(uwlines[index:], last_was_merged):
# TODO(morbo): This splice is potentially very slow. Come up with a more
# performance-friendly way of determining if two lines can be merged.
next_uwline = uwlines[index + 1]
for tok in next_uwline.tokens:
uwlines[index].AppendToken(tok)
if (len(next_uwline.tokens) == 1 and
next_uwline.first.is_multiline_string):
# This may be a multiline shebang. In that case, we want to retain the
# formatting. Otherwise, it could mess up the shell script's syntax.
uwlines[index].disable = True
yield uwlines[index]
index += 2
last_was_merged = True
else:
yield uwlines[index]
index += 1
last_was_merged = False
def _NoBlankLinesBeforeCurrentToken(text, cur_token, prev_token):
"""Determine if there are no blank lines before the current token.
The previous token is a docstring or comment. The prev_token_lineno is the
start of the text of that token. Counting the number of newlines in its text
gives us the extent and thus where the line number of the end of the
docstring or comment. After that, we just compare it to the current token's
line number to see if there are blank lines between them.
Arguments:
text: (unicode) The text of the docstring or comment before the current
token.
cur_token: (format_token.FormatToken) The current token in the unwrapped
line.
prev_token: (format_token.FormatToken) The previous token in the unwrapped
line.
Returns:
True if there is no blank line before the current token.
"""
cur_token_lineno = cur_token.lineno
if cur_token.is_comment:
cur_token_lineno -= cur_token.value.count('\n')
num_newlines = text.count('\n') if not prev_token.is_comment else 0
return prev_token.lineno + num_newlines == cur_token_lineno - 1

View File

@ -0,0 +1,559 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Computation of split penalties before/between tokens."""
from lib2to3 import pytree
from yapf.yapflib import format_token
from yapf.yapflib import py3compat
from yapf.yapflib import pytree_utils
from yapf.yapflib import pytree_visitor
from yapf.yapflib import style
# TODO(morbo): Document the annotations in a centralized place. E.g., the
# README file.
UNBREAKABLE = 1000 * 1000
NAMED_ASSIGN = 8500
DOTTED_NAME = 4000
VERY_STRONGLY_CONNECTED = 3500
STRONGLY_CONNECTED = 3000
OR_TEST = 1000
AND_TEST = 1100
NOT_TEST = 1200
COMPARISON = 1300
STAR_EXPR = 1300
EXPR = 1400
XOR_EXPR = 1500
AND_EXPR = 1700
SHIFT_EXPR = 1800
ARITH_EXPR = 1900
TERM = 2000
FACTOR = 2100
POWER = 2200
ATOM = 2300
ONE_ELEMENT_ARGUMENT = 2500
def ComputeSplitPenalties(tree):
"""Compute split penalties on tokens in the given parse tree.
Arguments:
tree: the top-level pytree node to annotate with penalties.
"""
_SplitPenaltyAssigner().Visit(tree)
class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor):
"""Assigns split penalties to tokens, based on parse tree structure.
Split penalties are attached as annotations to tokens.
"""
def Visit_import_as_names(self, node): # pyline: disable=invalid-name
# import_as_names ::= import_as_name (',' import_as_name)* [',']
self.DefaultNodeVisit(node)
prev_child = None
for child in node.children:
if (prev_child and isinstance(prev_child, pytree.Leaf) and
prev_child.value == ','):
_SetSplitPenalty(child, style.Get('SPLIT_PENALTY_IMPORT_NAMES'))
prev_child = child
def Visit_classdef(self, node): # pylint: disable=invalid-name
# classdef ::= 'class' NAME ['(' [arglist] ')'] ':' suite
#
# NAME
_SetUnbreakable(node.children[1])
if len(node.children) > 4:
# opening '('
_SetUnbreakable(node.children[2])
# ':'
_SetUnbreakable(node.children[-2])
self.DefaultNodeVisit(node)
def Visit_funcdef(self, node): # pylint: disable=invalid-name
# funcdef ::= 'def' NAME parameters ['->' test] ':' suite
#
# Can't break before the function name and before the colon. The parameters
# are handled by child iteration.
colon_idx = 1
while pytree_utils.NodeName(node.children[colon_idx]) == 'simple_stmt':
colon_idx += 1
_SetUnbreakable(node.children[colon_idx])
arrow_idx = -1
while colon_idx < len(node.children):
if isinstance(node.children[colon_idx], pytree.Leaf):
if node.children[colon_idx].value == ':':
break
if node.children[colon_idx].value == '->':
arrow_idx = colon_idx
colon_idx += 1
_SetUnbreakable(node.children[colon_idx])
self.DefaultNodeVisit(node)
if arrow_idx > 0:
_SetSplitPenalty(_LastChildNode(node.children[arrow_idx - 1]), 0)
_SetUnbreakable(node.children[arrow_idx])
_SetStronglyConnected(node.children[arrow_idx + 1])
def Visit_lambdef(self, node): # pylint: disable=invalid-name
# lambdef ::= 'lambda' [varargslist] ':' test
# Loop over the lambda up to and including the colon.
if style.Get('ALLOW_MULTILINE_LAMBDAS'):
_SetStronglyConnected(node)
else:
self._SetUnbreakableOnChildren(node)
def Visit_parameters(self, node): # pylint: disable=invalid-name
# parameters ::= '(' [typedargslist] ')'
self.DefaultNodeVisit(node)
# Can't break before the opening paren of a parameter list.
_SetUnbreakable(node.children[0])
if not style.Get('DEDENT_CLOSING_BRACKETS'):
_SetStronglyConnected(node.children[-1])
def Visit_arglist(self, node): # pylint: disable=invalid-name
# arglist ::= argument (',' argument)* [',']
self.DefaultNodeVisit(node)
index = 1
while index < len(node.children):
child = node.children[index]
if isinstance(child, pytree.Leaf) and child.value == ',':
_SetUnbreakable(child)
index += 1
def Visit_argument(self, node): # pylint: disable=invalid-name
# argument ::= test [comp_for] | test '=' test # Really [keyword '='] test
self.DefaultNodeVisit(node)
index = 1
while index < len(node.children) - 1:
child = node.children[index]
if isinstance(child, pytree.Leaf) and child.value == '=':
_SetSplitPenalty(_FirstChildNode(node.children[index]), NAMED_ASSIGN)
_SetSplitPenalty(
_FirstChildNode(node.children[index + 1]), NAMED_ASSIGN)
index += 1
def Visit_dotted_name(self, node): # pylint: disable=invalid-name
# dotted_name ::= NAME ('.' NAME)*
self._SetUnbreakableOnChildren(node)
def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name
# dictsetmaker ::= ( (test ':' test
# (comp_for | (',' test ':' test)* [','])) |
# (test (comp_for | (',' test)* [','])) )
for child in node.children:
self.Visit(child)
if pytree_utils.NodeName(child) == 'COLON':
# This is a key to a dictionary. We don't want to split the key if at
# all possible.
_SetStronglyConnected(child)
def Visit_trailer(self, node): # pylint: disable=invalid-name
# trailer ::= '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
self.DefaultNodeVisit(node)
if node.children[0].value == '.':
self._SetUnbreakableOnChildren(node)
_SetSplitPenalty(node.children[1], DOTTED_NAME)
elif len(node.children) == 2:
# Don't split an empty argument list if at all possible.
_SetSplitPenalty(node.children[1], VERY_STRONGLY_CONNECTED)
elif len(node.children) == 3:
name = pytree_utils.NodeName(node.children[1])
if name == 'power':
if pytree_utils.NodeName(node.children[1].children[0]) != 'atom':
# Don't split an argument list with one element if at all possible.
_SetStronglyConnected(node.children[1], node.children[2])
_SetSplitPenalty(
_FirstChildNode(node.children[1]), ONE_ELEMENT_ARGUMENT)
elif (pytree_utils.NodeName(node.children[0]) == 'LSQB' and
len(node.children[1].children) > 2 and
(name.endswith('_test') or name.endswith('_expr'))):
_SetStronglyConnected(node.children[1].children[0])
_SetStronglyConnected(node.children[1].children[2])
# Still allow splitting around the operator.
split_before = ((name.endswith('_test') and
style.Get('SPLIT_BEFORE_LOGICAL_OPERATOR')) or
(name.endswith('_expr') and
style.Get('SPLIT_BEFORE_BITWISE_OPERATOR')))
if split_before:
_SetSplitPenalty(_LastChildNode(node.children[1].children[1]), 0)
else:
_SetSplitPenalty(_FirstChildNode(node.children[1].children[2]), 0)
# Don't split the ending bracket of a subscript list.
_SetVeryStronglyConnected(node.children[-1])
elif name not in {
'arglist', 'argument', 'term', 'or_test', 'and_test', 'comparison',
'atom'
}:
# Don't split an argument list with one element if at all possible.
_SetStronglyConnected(node.children[1], node.children[2])
def Visit_power(self, node): # pylint: disable=invalid-name,missing-docstring
# power ::= atom trailer* ['**' factor]
self.DefaultNodeVisit(node)
# When atom is followed by a trailer, we can not break between them.
# E.g. arr[idx] - no break allowed between 'arr' and '['.
if (len(node.children) > 1 and
pytree_utils.NodeName(node.children[1]) == 'trailer'):
# children[1] itself is a whole trailer: we don't want to
# mark all of it as unbreakable, only its first token: (, [ or .
_SetUnbreakable(node.children[1].children[0])
# A special case when there are more trailers in the sequence. Given:
# atom tr1 tr2
# The last token of tr1 and the first token of tr2 comprise an unbreakable
# region. For example: foo.bar.baz(1)
# We can't put breaks between either of the '.', '(', or '[' and the names
# *preceding* them.
prev_trailer_idx = 1
while prev_trailer_idx < len(node.children) - 1:
cur_trailer_idx = prev_trailer_idx + 1
cur_trailer = node.children[cur_trailer_idx]
if pytree_utils.NodeName(cur_trailer) == 'trailer':
# Now we know we have two trailers one after the other
prev_trailer = node.children[prev_trailer_idx]
if prev_trailer.children[-1].value != ')':
# Set the previous node unbreakable if it's not a function call:
# atom tr1() tr2
# It may be necessary (though undesirable) to split up a previous
# function call's parentheses to the next line.
_SetStronglyConnected(prev_trailer.children[-1])
_SetStronglyConnected(cur_trailer.children[0])
prev_trailer_idx = cur_trailer_idx
else:
break
# We don't want to split before the last ')' of a function call. This also
# takes care of the special case of:
# atom tr1 tr2 ... trn
# where the 'tr#' are trailers that may end in a ')'.
for trailer in node.children[1:]:
if pytree_utils.NodeName(trailer) != 'trailer':
break
if trailer.children[0].value in '([':
if len(trailer.children) > 2:
subtypes = pytree_utils.GetNodeAnnotation(
trailer.children[0], pytree_utils.Annotation.SUBTYPE)
if subtypes and format_token.Subtype.SUBSCRIPT_BRACKET in subtypes:
_SetStronglyConnected(_FirstChildNode(trailer.children[1]))
last_child_node = _LastChildNode(trailer)
if last_child_node.value.strip().startswith('#'):
last_child_node = last_child_node.prev_sibling
if not style.Get('DEDENT_CLOSING_BRACKETS'):
if _LastChildNode(last_child_node.prev_sibling).value != ',':
if last_child_node.value == ']':
_SetUnbreakable(last_child_node)
else:
_SetSplitPenalty(last_child_node, VERY_STRONGLY_CONNECTED)
else:
# If the trailer's children are '()', then make it a strongly
# connected region. It's sometimes necessary, though undesirable, to
# split the two.
_SetStronglyConnected(trailer.children[-1])
# If the original source has a "builder" style calls, then we should allow
# the reformatter to retain that.
_AllowBuilderStyleCalls(node)
def Visit_subscript(self, node): # pylint: disable=invalid-name
# subscript ::= test | [test] ':' [test] [sliceop]
_SetStronglyConnected(*node.children)
self.DefaultNodeVisit(node)
def Visit_comp_for(self, node): # pylint: disable=invalid-name
# comp_for ::= 'for' exprlist 'in' testlist_safe [comp_iter]
_SetSplitPenalty(_FirstChildNode(node), 0)
_SetStronglyConnected(*node.children[1:])
self.DefaultNodeVisit(node)
def Visit_comp_if(self, node): # pylint: disable=invalid-name
# comp_if ::= 'if' old_test [comp_iter]
_SetSplitPenalty(node.children[0],
style.Get('SPLIT_PENALTY_BEFORE_IF_EXPR'))
_SetStronglyConnected(*node.children[1:])
self.DefaultNodeVisit(node)
def Visit_or_test(self, node): # pylint: disable=invalid-name
# or_test ::= and_test ('or' and_test)*
self.DefaultNodeVisit(node)
_IncreasePenalty(node, OR_TEST)
index = 1
while index + 1 < len(node.children):
if style.Get('SPLIT_BEFORE_LOGICAL_OPERATOR'):
_DecrementSplitPenalty(_FirstChildNode(node.children[index]), OR_TEST)
else:
_DecrementSplitPenalty(
_FirstChildNode(node.children[index + 1]), OR_TEST)
index += 2
def Visit_and_test(self, node): # pylint: disable=invalid-name
# and_test ::= not_test ('and' not_test)*
self.DefaultNodeVisit(node)
_IncreasePenalty(node, AND_TEST)
index = 1
while index + 1 < len(node.children):
if style.Get('SPLIT_BEFORE_LOGICAL_OPERATOR'):
_DecrementSplitPenalty(_FirstChildNode(node.children[index]), AND_TEST)
else:
_DecrementSplitPenalty(
_FirstChildNode(node.children[index + 1]), AND_TEST)
index += 2
def Visit_not_test(self, node): # pylint: disable=invalid-name
# not_test ::= 'not' not_test | comparison
self.DefaultNodeVisit(node)
_IncreasePenalty(node, NOT_TEST)
def Visit_comparison(self, node): # pylint: disable=invalid-name
# comparison ::= expr (comp_op expr)*
self.DefaultNodeVisit(node)
if len(node.children) == 3 and _StronglyConnectedCompOp(node):
_SetSplitPenalty(_FirstChildNode(node.children[1]), STRONGLY_CONNECTED)
_SetSplitPenalty(_FirstChildNode(node.children[2]), STRONGLY_CONNECTED)
else:
_IncreasePenalty(node, COMPARISON)
def Visit_star_expr(self, node): # pylint: disable=invalid-name
# star_expr ::= '*' expr
self.DefaultNodeVisit(node)
_IncreasePenalty(node, STAR_EXPR)
def Visit_expr(self, node): # pylint: disable=invalid-name
# expr ::= xor_expr ('|' xor_expr)*
self.DefaultNodeVisit(node)
_IncreasePenalty(node, EXPR)
index = 1
while index < len(node.children) - 1:
child = node.children[index]
if isinstance(child, pytree.Leaf) and child.value == '|':
if style.Get('SPLIT_BEFORE_BITWISE_OPERATOR'):
_SetSplitPenalty(child, style.Get('SPLIT_PENALTY_BITWISE_OPERATOR'))
else:
_SetSplitPenalty(
_FirstChildNode(node.children[index + 1]),
style.Get('SPLIT_PENALTY_BITWISE_OPERATOR'))
index += 1
def Visit_xor_expr(self, node): # pylint: disable=invalid-name
# xor_expr ::= and_expr ('^' and_expr)*
self.DefaultNodeVisit(node)
_IncreasePenalty(node, XOR_EXPR)
def Visit_and_expr(self, node): # pylint: disable=invalid-name
# and_expr ::= shift_expr ('&' shift_expr)*
self.DefaultNodeVisit(node)
_IncreasePenalty(node, AND_EXPR)
def Visit_shift_expr(self, node): # pylint: disable=invalid-name
# shift_expr ::= arith_expr (('<<'|'>>') arith_expr)*
self.DefaultNodeVisit(node)
_IncreasePenalty(node, SHIFT_EXPR)
def Visit_arith_expr(self, node): # pylint: disable=invalid-name
# arith_expr ::= term (('+'|'-') term)*
self.DefaultNodeVisit(node)
_IncreasePenalty(node, ARITH_EXPR)
def Visit_term(self, node): # pylint: disable=invalid-name
# term ::= factor (('*'|'@'|'/'|'%'|'//') factor)*
_IncreasePenalty(node, TERM)
self.DefaultNodeVisit(node)
def Visit_factor(self, node): # pyline: disable=invalid-name
# factor ::= ('+'|'-'|'~') factor | power
self.DefaultNodeVisit(node)
_IncreasePenalty(node, FACTOR)
def Visit_atom(self, node): # pylint: disable=invalid-name
# atom ::= ('(' [yield_expr|testlist_gexp] ')'
# '[' [listmaker] ']' |
# '{' [dictsetmaker] '}')
self.DefaultNodeVisit(node)
if node.children[0].value == '(':
if node.children[-1].value == ')':
if pytree_utils.NodeName(node.parent) == 'if_stmt':
_SetSplitPenalty(node.children[-1], UNBREAKABLE)
else:
if len(node.children) > 2:
_SetSplitPenalty(_FirstChildNode(node.children[1]), EXPR)
_SetSplitPenalty(node.children[-1], ATOM)
elif node.children[0].value in '[{' and len(node.children) == 2:
# Keep empty containers together if we can.
_SetUnbreakable(node.children[-1])
def Visit_testlist_gexp(self, node): # pylint: disable=invalid-name
self.DefaultNodeVisit(node)
prev_was_comma = False
for child in node.children:
if isinstance(child, pytree.Leaf) and child.value == ',':
_SetUnbreakable(child)
prev_was_comma = True
else:
if prev_was_comma:
_SetSplitPenalty(_FirstChildNode(child), 0)
prev_was_comma = False
############################################################################
# Helper methods that set the annotations.
def _SetUnbreakableOnChildren(self, node):
"""Set an UNBREAKABLE penalty annotation on children of node."""
for child in node.children:
self.Visit(child)
start = 2 if hasattr(node.children[0], 'is_pseudo') else 1
for i in py3compat.range(start, len(node.children)):
_SetUnbreakable(node.children[i])
def _SetUnbreakable(node):
"""Set an UNBREAKABLE penalty annotation for the given node."""
_RecAnnotate(node, pytree_utils.Annotation.SPLIT_PENALTY, UNBREAKABLE)
def _SetStronglyConnected(*nodes):
"""Set a STRONGLY_CONNECTED penalty annotation for the given nodes."""
for node in nodes:
_RecAnnotate(node, pytree_utils.Annotation.SPLIT_PENALTY,
STRONGLY_CONNECTED)
def _SetVeryStronglyConnected(*nodes):
"""Set a VERY_STRONGLY_CONNECTED penalty annotation for the given nodes."""
for node in nodes:
_RecAnnotate(node, pytree_utils.Annotation.SPLIT_PENALTY,
VERY_STRONGLY_CONNECTED)
def _SetExpressionPenalty(node, penalty):
"""Set a penalty annotation on children nodes."""
def RecExpression(node, first_child_leaf):
if node is first_child_leaf:
return
if isinstance(node, pytree.Leaf):
if node.value in {'(', 'for', 'if'}:
return
penalty_annotation = pytree_utils.GetNodeAnnotation(
node, pytree_utils.Annotation.SPLIT_PENALTY, default=0)
if penalty_annotation < penalty:
_SetSplitPenalty(node, penalty)
else:
for child in node.children:
RecExpression(child, first_child_leaf)
RecExpression(node, _FirstChildNode(node))
def _IncreasePenalty(node, amt):
"""Increase a penalty annotation on children nodes."""
def RecExpression(node, first_child_leaf):
if node is first_child_leaf:
return
if isinstance(node, pytree.Leaf):
if node.value in {'(', 'for', 'if'}:
return
penalty = pytree_utils.GetNodeAnnotation(
node, pytree_utils.Annotation.SPLIT_PENALTY, default=0)
_SetSplitPenalty(node, penalty + amt)
else:
for child in node.children:
RecExpression(child, first_child_leaf)
RecExpression(node, _FirstChildNode(node))
def _RecAnnotate(tree, annotate_name, annotate_value):
"""Recursively set the given annotation on all leafs of the subtree.
Takes care to only increase the penalty. If the node already has a higher
or equal penalty associated with it, this is a no-op.
Args:
tree: subtree to annotate
annotate_name: name of the annotation to set
annotate_value: value of the annotation to set
"""
for child in tree.children:
_RecAnnotate(child, annotate_name, annotate_value)
if isinstance(tree, pytree.Leaf):
cur_annotate = pytree_utils.GetNodeAnnotation(
tree, annotate_name, default=0)
if cur_annotate < annotate_value:
pytree_utils.SetNodeAnnotation(tree, annotate_name, annotate_value)
def _StronglyConnectedCompOp(op):
if (len(op.children[1].children) == 2 and
pytree_utils.NodeName(op.children[1]) == 'comp_op' and
_FirstChildNode(op.children[1]).value == 'not' and
_LastChildNode(op.children[1]).value == 'in'):
return True
if (isinstance(op.children[1], pytree.Leaf) and
op.children[1].value in {'==', 'in'}):
return True
return False
def _DecrementSplitPenalty(node, amt):
penalty = pytree_utils.GetNodeAnnotation(
node, pytree_utils.Annotation.SPLIT_PENALTY, default=amt)
penalty = penalty - amt if amt < penalty else 0
_SetSplitPenalty(node, penalty)
def _SetSplitPenalty(node, penalty):
pytree_utils.SetNodeAnnotation(node, pytree_utils.Annotation.SPLIT_PENALTY,
penalty)
def _AllowBuilderStyleCalls(node):
"""Allow splitting before '.' if it's a builder style function call."""
def RecGetLeaves(node):
if isinstance(node, pytree.Leaf):
return [node]
children = []
for child in node.children:
children += RecGetLeaves(child)
return children
list_of_children = RecGetLeaves(node)
prev_child = None
for child in list_of_children:
if child.value == '.':
if prev_child.lineno != child.lineno:
_SetSplitPenalty(child, 0)
prev_child = child
def _FirstChildNode(node):
if isinstance(node, pytree.Leaf):
return node
return _FirstChildNode(node.children[0])
def _LastChildNode(node):
if isinstance(node, pytree.Leaf):
return node
return _LastChildNode(node.children[-1])

View File

@ -0,0 +1,489 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Python formatting style settings."""
import os
import re
import textwrap
from yapf.yapflib import errors
from yapf.yapflib import py3compat
class StyleConfigError(errors.YapfError):
"""Raised when there's a problem reading the style configuration."""
pass
def Get(setting_name):
"""Get a style setting."""
return _style[setting_name]
def Help():
"""Return dict mapping style names to help strings."""
return _STYLE_HELP
def SetGlobalStyle(style):
"""Set a style dict."""
global _style
global _GLOBAL_STYLE_FACTORY
factory = _GetStyleFactory(style)
if factory:
_GLOBAL_STYLE_FACTORY = factory
_style = style
_STYLE_HELP = dict(
ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT=textwrap.dedent("""\
Align closing bracket with visual indentation."""),
ALLOW_MULTILINE_LAMBDAS=textwrap.dedent("""\
Allow lambdas to be formatted on more than one line."""),
ALLOW_MULTILINE_DICTIONARY_KEYS=textwrap.dedent("""\
Allow dictionary keys to exist on multiple lines. For example:
x = {
('this is the first element of a tuple',
'this is the second element of a tuple'):
value,
}"""),
BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF=textwrap.dedent("""\
Insert a blank line before a 'def' or 'class' immediately nested
within another 'def' or 'class'. For example:
class Foo:
# <------ this blank line
def method():
..."""),
BLANK_LINE_BEFORE_CLASS_DOCSTRING=textwrap.dedent("""\
Insert a blank line before a class-level docstring."""),
COALESCE_BRACKETS=textwrap.dedent("""\
Do not split consecutive brackets. Only relevant when
dedent_closing_brackets is set. For example:
call_func_that_takes_a_dict(
{
'key1': 'value1',
'key2': 'value2',
}
)
would reformat to:
call_func_that_takes_a_dict({
'key1': 'value1',
'key2': 'value2',
})"""),
COLUMN_LIMIT=textwrap.dedent("""\
The column limit."""),
CONTINUATION_INDENT_WIDTH=textwrap.dedent("""\
Indent width used for line continuations."""),
DEDENT_CLOSING_BRACKETS=textwrap.dedent("""\
Put closing brackets on a separate line, dedented, if the bracketed
expression can't fit in a single line. Applies to all kinds of brackets,
including function definitions and calls. For example:
config = {
'key1': 'value1',
'key2': 'value2',
} # <--- this bracket is dedented and on a separate line
time_series = self.remote_client.query_entity_counters(
entity='dev3246.region1',
key='dns.query_latency_tcp',
transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
start_ts=now()-timedelta(days=3),
end_ts=now(),
) # <--- this bracket is dedented and on a separate line"""),
EACH_DICT_ENTRY_ON_SEPARATE_LINE=textwrap.dedent("""\
Place each dictionary entry onto its own line."""),
I18N_COMMENT=textwrap.dedent("""\
The regex for an i18n comment. The presence of this comment stops
reformatting of that line, because the comments are required to be
next to the string they translate."""),
I18N_FUNCTION_CALL=textwrap.dedent("""\
The i18n function call names. The presence of this function stops
reformattting on that line, because the string it has cannot be moved
away from the i18n comment."""),
INDENT_DICTIONARY_VALUE=textwrap.dedent("""\
Indent the dictionary value if it cannot fit on the same line as the
dictionary key. For example:
config = {
'key1':
'value1',
'key2': value1 +
value2,
}"""),
INDENT_WIDTH=textwrap.dedent("""\
The number of columns to use for indentation."""),
JOIN_MULTIPLE_LINES=textwrap.dedent("""\
Join short lines into one line. E.g., single line 'if' statements."""),
SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET=textwrap.dedent("""\
Insert a space between the ending comma and closing bracket of a list,
etc."""),
SPACES_AROUND_POWER_OPERATOR=textwrap.dedent("""\
Use spaces around the power operator."""),
SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN=textwrap.dedent("""\
Use spaces around default or named assigns."""),
SPACES_BEFORE_COMMENT=textwrap.dedent("""\
The number of spaces required before a trailing comment."""),
SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED=textwrap.dedent("""\
Split before arguments if the argument list is terminated by a
comma."""),
SPLIT_BEFORE_BITWISE_OPERATOR=textwrap.dedent("""\
Set to True to prefer splitting before '&', '|' or '^' rather than
after."""),
SPLIT_BEFORE_DICT_SET_GENERATOR=textwrap.dedent("""\
Split before a dictionary or set generator (comp_for). For example, note
the split before the 'for':
foo = {
variable: 'Hello world, have a nice day!'
for variable in bar if variable != 42
}"""),
SPLIT_BEFORE_FIRST_ARGUMENT=textwrap.dedent("""\
If an argument / parameter list is going to be split, then split before
the first argument."""),
SPLIT_BEFORE_LOGICAL_OPERATOR=textwrap.dedent("""\
Set to True to prefer splitting before 'and' or 'or' rather than
after."""),
SPLIT_BEFORE_NAMED_ASSIGNS=textwrap.dedent("""\
Split named assignments onto individual lines."""),
SPLIT_PENALTY_AFTER_OPENING_BRACKET=textwrap.dedent("""\
The penalty for splitting right after the opening bracket."""),
SPLIT_PENALTY_AFTER_UNARY_OPERATOR=textwrap.dedent("""\
The penalty for splitting the line after a unary operator."""),
SPLIT_PENALTY_BEFORE_IF_EXPR=textwrap.dedent("""\
The penalty for splitting right before an if expression."""),
SPLIT_PENALTY_BITWISE_OPERATOR=textwrap.dedent("""\
The penalty of splitting the line around the '&', '|', and '^'
operators."""),
SPLIT_PENALTY_EXCESS_CHARACTER=textwrap.dedent("""\
The penalty for characters over the column limit."""),
SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT=textwrap.dedent("""\
The penalty incurred by adding a line split to the unwrapped line. The
more line splits added the higher the penalty."""),
SPLIT_PENALTY_IMPORT_NAMES=textwrap.dedent("""\
The penalty of splitting a list of "import as" names. For example:
from a_very_long_or_indented_module_name_yada_yad import (long_argument_1,
long_argument_2,
long_argument_3)
would reformat to something like:
from a_very_long_or_indented_module_name_yada_yad import (
long_argument_1, long_argument_2, long_argument_3)
"""),
SPLIT_PENALTY_LOGICAL_OPERATOR=textwrap.dedent("""\
The penalty of splitting the line around the 'and' and 'or'
operators."""),
USE_TABS=textwrap.dedent("""\
Use the Tab character for indentation."""),
# BASED_ON_STYLE='Which predefined style this style is based on',
)
def CreatePEP8Style():
return dict(
ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT=True,
ALLOW_MULTILINE_LAMBDAS=False,
ALLOW_MULTILINE_DICTIONARY_KEYS=False,
BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF=False,
BLANK_LINE_BEFORE_CLASS_DOCSTRING=False,
COALESCE_BRACKETS=False,
COLUMN_LIMIT=79,
CONTINUATION_INDENT_WIDTH=4,
DEDENT_CLOSING_BRACKETS=False,
EACH_DICT_ENTRY_ON_SEPARATE_LINE=True,
I18N_COMMENT='',
I18N_FUNCTION_CALL='',
INDENT_DICTIONARY_VALUE=False,
INDENT_WIDTH=4,
JOIN_MULTIPLE_LINES=True,
SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET=True,
SPACES_AROUND_POWER_OPERATOR=False,
SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN=False,
SPACES_BEFORE_COMMENT=2,
SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED=False,
SPLIT_BEFORE_BITWISE_OPERATOR=False,
SPLIT_BEFORE_DICT_SET_GENERATOR=True,
SPLIT_BEFORE_FIRST_ARGUMENT=False,
SPLIT_BEFORE_LOGICAL_OPERATOR=False,
SPLIT_BEFORE_NAMED_ASSIGNS=True,
SPLIT_PENALTY_AFTER_OPENING_BRACKET=30,
SPLIT_PENALTY_AFTER_UNARY_OPERATOR=10000,
SPLIT_PENALTY_BEFORE_IF_EXPR=0,
SPLIT_PENALTY_BITWISE_OPERATOR=300,
SPLIT_PENALTY_EXCESS_CHARACTER=4500,
SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT=30,
SPLIT_PENALTY_IMPORT_NAMES=0,
SPLIT_PENALTY_LOGICAL_OPERATOR=300,
USE_TABS=False,)
def CreateGoogleStyle():
style = CreatePEP8Style()
style['ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'] = False
style['BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF'] = True
style['COLUMN_LIMIT'] = 80
style['INDENT_WIDTH'] = 4
style['I18N_COMMENT'] = r'#\..*'
style['I18N_FUNCTION_CALL'] = ['N_', '_']
style['SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET'] = False
return style
def CreateChromiumStyle():
style = CreateGoogleStyle()
style['ALLOW_MULTILINE_DICTIONARY_KEYS'] = True
style['INDENT_DICTIONARY_VALUE'] = True
style['INDENT_WIDTH'] = 2
style['JOIN_MULTIPLE_LINES'] = False
style['SPLIT_BEFORE_BITWISE_OPERATOR'] = True
return style
def CreateFacebookStyle():
style = CreatePEP8Style()
style['ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'] = False
style['COLUMN_LIMIT'] = 80
style['DEDENT_CLOSING_BRACKETS'] = True
style['JOIN_MULTIPLE_LINES'] = False
style['SPACES_BEFORE_COMMENT'] = 2
style['SPLIT_PENALTY_AFTER_OPENING_BRACKET'] = 0
style['SPLIT_PENALTY_BEFORE_IF_EXPR'] = 30
style['SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT'] = 30
return style
_STYLE_NAME_TO_FACTORY = dict(
pep8=CreatePEP8Style,
chromium=CreateChromiumStyle,
google=CreateGoogleStyle,
facebook=CreateFacebookStyle,)
_DEFAULT_STYLE_TO_FACTORY = [
(CreateChromiumStyle(), CreateChromiumStyle),
(CreateFacebookStyle(), CreateFacebookStyle),
(CreateGoogleStyle(), CreateGoogleStyle),
(CreatePEP8Style(), CreatePEP8Style),
]
def _GetStyleFactory(style):
for def_style, factory in _DEFAULT_STYLE_TO_FACTORY:
if style == def_style:
return factory
return None
def _StringListConverter(s):
"""Option value converter for a comma-separated list of strings."""
return [part.strip() for part in s.split(',')]
def _BoolConverter(s):
"""Option value converter for a boolean."""
return py3compat.CONFIGPARSER_BOOLEAN_STATES[s.lower()]
# Different style options need to have their values interpreted differently when
# read from the config file. This dict maps an option name to a "converter"
# function that accepts the string read for the option's value from the file and
# returns it wrapper in actual Python type that's going to be meaningful to
# yapf.
#
# Note: this dict has to map all the supported style options.
_STYLE_OPTION_VALUE_CONVERTER = dict(
ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT=_BoolConverter,
ALLOW_MULTILINE_LAMBDAS=_BoolConverter,
ALLOW_MULTILINE_DICTIONARY_KEYS=_BoolConverter,
BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF=_BoolConverter,
BLANK_LINE_BEFORE_CLASS_DOCSTRING=_BoolConverter,
COALESCE_BRACKETS=_BoolConverter,
COLUMN_LIMIT=int,
CONTINUATION_INDENT_WIDTH=int,
DEDENT_CLOSING_BRACKETS=_BoolConverter,
EACH_DICT_ENTRY_ON_SEPARATE_LINE=_BoolConverter,
I18N_COMMENT=str,
I18N_FUNCTION_CALL=_StringListConverter,
INDENT_DICTIONARY_VALUE=_BoolConverter,
INDENT_WIDTH=int,
JOIN_MULTIPLE_LINES=_BoolConverter,
SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET=_BoolConverter,
SPACES_AROUND_POWER_OPERATOR=_BoolConverter,
SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN=_BoolConverter,
SPACES_BEFORE_COMMENT=int,
SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED=_BoolConverter,
SPLIT_BEFORE_BITWISE_OPERATOR=_BoolConverter,
SPLIT_BEFORE_DICT_SET_GENERATOR=_BoolConverter,
SPLIT_BEFORE_FIRST_ARGUMENT=_BoolConverter,
SPLIT_BEFORE_LOGICAL_OPERATOR=_BoolConverter,
SPLIT_BEFORE_NAMED_ASSIGNS=_BoolConverter,
SPLIT_PENALTY_AFTER_OPENING_BRACKET=int,
SPLIT_PENALTY_AFTER_UNARY_OPERATOR=int,
SPLIT_PENALTY_BEFORE_IF_EXPR=int,
SPLIT_PENALTY_BITWISE_OPERATOR=int,
SPLIT_PENALTY_EXCESS_CHARACTER=int,
SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT=int,
SPLIT_PENALTY_IMPORT_NAMES=int,
SPLIT_PENALTY_LOGICAL_OPERATOR=int,
USE_TABS=_BoolConverter,)
def CreateStyleFromConfig(style_config):
"""Create a style dict from the given config.
Arguments:
style_config: either a style name or a file name. The file is expected to
contain settings. It can have a special BASED_ON_STYLE setting naming the
style which it derives from. If no such setting is found, it derives from
the default style. When style_config is None, the _GLOBAL_STYLE_FACTORY
config is created.
Returns:
A style dict.
Raises:
StyleConfigError: if an unknown style option was encountered.
"""
def GlobalStyles():
for style, _ in _DEFAULT_STYLE_TO_FACTORY:
yield style
def_style = False
if style_config is None:
for style in GlobalStyles():
if _style == style:
def_style = True
break
if not def_style:
return _style
return _GLOBAL_STYLE_FACTORY()
style_factory = _STYLE_NAME_TO_FACTORY.get(style_config.lower())
if style_factory is not None:
return style_factory()
if style_config.startswith('{'):
# Most likely a style specification from the command line.
config = _CreateConfigParserFromConfigString(style_config)
else:
# Unknown config name: assume it's a file name then.
config = _CreateConfigParserFromConfigFile(style_config)
return _CreateStyleFromConfigParser(config)
def _CreateConfigParserFromConfigString(config_string):
"""Given a config string from the command line, return a config parser."""
if config_string[0] != '{' or config_string[-1] != '}':
raise StyleConfigError(
"Invalid style dict syntax: '{}'.".format(config_string))
config = py3compat.ConfigParser()
config.add_section('style')
for key, value in re.findall(r'([a-zA-Z0-9_]+)\s*[:=]\s*([a-zA-Z0-9_]+)',
config_string):
config.set('style', key, value)
return config
def _CreateConfigParserFromConfigFile(config_filename):
"""Read the file and return a ConfigParser object."""
if not os.path.exists(config_filename):
# Provide a more meaningful error here.
raise StyleConfigError(
'"{0}" is not a valid style or file path'.format(config_filename))
with open(config_filename) as style_file:
config = py3compat.ConfigParser()
config.read_file(style_file)
if config_filename.endswith(SETUP_CONFIG):
if not config.has_section('yapf'):
raise StyleConfigError(
'Unable to find section [yapf] in {0}'.format(config_filename))
elif config_filename.endswith(LOCAL_STYLE):
if not config.has_section('style'):
raise StyleConfigError(
'Unable to find section [style] in {0}'.format(config_filename))
else:
if not config.has_section('style'):
raise StyleConfigError(
'Unable to find section [style] in {0}'.format(config_filename))
return config
def _CreateStyleFromConfigParser(config):
"""Create a style dict from a configuration file.
Arguments:
config: a ConfigParser object.
Returns:
A style dict.
Raises:
StyleConfigError: if an unknown style option was encountered.
"""
# Initialize the base style.
section = 'yapf' if config.has_section('yapf') else 'style'
if config.has_option('style', 'based_on_style'):
based_on = config.get('style', 'based_on_style').lower()
base_style = _STYLE_NAME_TO_FACTORY[based_on]()
elif config.has_option('yapf', 'based_on_style'):
based_on = config.get('yapf', 'based_on_style').lower()
base_style = _STYLE_NAME_TO_FACTORY[based_on]()
else:
base_style = _GLOBAL_STYLE_FACTORY()
# Read all options specified in the file and update the style.
for option, value in config.items(section):
if option.lower() == 'based_on_style':
# Now skip this one - we've already handled it and it's not one of the
# recognized style options.
continue
option = option.upper()
if option not in _STYLE_OPTION_VALUE_CONVERTER:
raise StyleConfigError('Unknown style option "{0}"'.format(option))
try:
base_style[option] = _STYLE_OPTION_VALUE_CONVERTER[option](value)
except ValueError:
raise StyleConfigError(
"'{}' is not a valid setting for {}.".format(value, option))
return base_style
# The default style - used if yapf is not invoked without specifically
# requesting a formatting style.
DEFAULT_STYLE = 'pep8'
DEFAULT_STYLE_FACTORY = CreatePEP8Style
_GLOBAL_STYLE_FACTORY = CreatePEP8Style
# The name of the file to use for global style definition.
GLOBAL_STYLE = (os.path.join(
os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config'), 'yapf',
'style'))
# The name of the file to use for directory-local style definition.
LOCAL_STYLE = '.style.yapf'
# Alternative place for directory-local style definition. Style should be
# specified in the '[yapf]' section.
SETUP_CONFIG = 'setup.cfg'
# TODO(eliben): For now we're preserving the global presence of a style dict.
# Refactor this so that the style is passed around through yapf rather than
# being global.
_style = None
SetGlobalStyle(_GLOBAL_STYLE_FACTORY())

View File

@ -0,0 +1,416 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Subtype assigner for lib2to3 trees.
This module assigns extra type information to the lib2to3 trees. This
information is more specific than whether something is an operator or an
identifier. For instance, it can specify if a node in the tree is part of a
subscript.
AssignSubtypes(): the main function exported by this module.
Annotations:
subtype: The subtype of a pytree token. See 'format_token' module for a list
of subtypes.
"""
from lib2to3 import pytree
from lib2to3.pgen2 import token
from lib2to3.pygram import python_symbols as syms
from yapf.yapflib import format_token
from yapf.yapflib import pytree_utils
from yapf.yapflib import pytree_visitor
from yapf.yapflib import style
def AssignSubtypes(tree):
"""Run the subtype assigner visitor over the tree, modifying it in place.
Arguments:
tree: the top-level pytree node to annotate with subtypes.
"""
subtype_assigner = _SubtypeAssigner()
subtype_assigner.Visit(tree)
# Map tokens in argument lists to their respective subtype.
_ARGLIST_TOKEN_TO_SUBTYPE = {
'=': format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN,
':': format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN,
'*': format_token.Subtype.VARARGS_STAR,
'**': format_token.Subtype.KWARGS_STAR_STAR,
}
class _SubtypeAssigner(pytree_visitor.PyTreeVisitor):
"""_SubtypeAssigner - see file-level docstring for detailed description.
The subtype is added as an annotation to the pytree token.
"""
def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name
# dictsetmaker ::= (test ':' test (comp_for |
# (',' test ':' test)* [','])) |
# (test (comp_for | (',' test)* [',']))
for child in node.children:
self.Visit(child)
comp_for = False
dict_maker = False
for child in node.children:
if pytree_utils.NodeName(child) == 'comp_for':
comp_for = True
_AppendFirstLeafTokenSubtype(child,
format_token.Subtype.DICT_SET_GENERATOR)
elif pytree_utils.NodeName(child) in ('COLON', 'DOUBLESTAR'):
dict_maker = True
if not comp_for and dict_maker:
last_was_colon = False
for child in node.children:
if dict_maker:
if pytree_utils.NodeName(child) == 'DOUBLESTAR':
_AppendFirstLeafTokenSubtype(child,
format_token.Subtype.KWARGS_STAR_STAR)
if last_was_colon:
if style.Get('INDENT_DICTIONARY_VALUE'):
_InsertPseudoParentheses(child)
else:
_AppendFirstLeafTokenSubtype(
child, format_token.Subtype.DICTIONARY_VALUE)
elif (
child is not None and
(isinstance(child, pytree.Node) or
(not child.value.startswith('#') and child.value not in '{:,'))):
# Mark the first leaf of a key entry as a DICTIONARY_KEY. We
# normally want to split before them if the dictionary cannot exist
# on a single line.
_AppendFirstLeafTokenSubtype(child,
format_token.Subtype.DICTIONARY_KEY)
_AppendSubtypeRec(child, format_token.Subtype.DICTIONARY_KEY_PART)
last_was_colon = pytree_utils.NodeName(child) == 'COLON'
def Visit_expr_stmt(self, node): # pylint: disable=invalid-name
# expr_stmt ::= testlist_star_expr (augassign (yield_expr|testlist)
# | ('=' (yield_expr|testlist_star_expr))*)
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value == '=':
_AppendTokenSubtype(child, format_token.Subtype.ASSIGN_OPERATOR)
def Visit_or_test(self, node): # pylint: disable=invalid-name
# or_test ::= and_test ('or' and_test)*
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value == 'or':
_AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
def Visit_and_test(self, node): # pylint: disable=invalid-name
# and_test ::= not_test ('and' not_test)*
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value == 'and':
_AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
def Visit_not_test(self, node): # pylint: disable=invalid-name
# not_test ::= 'not' not_test | comparison
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value == 'not':
_AppendTokenSubtype(child, format_token.Subtype.UNARY_OPERATOR)
def Visit_comparison(self, node): # pylint: disable=invalid-name
# comparison ::= expr (comp_op expr)*
# comp_op ::= '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not in'|'is'|'is not'
for child in node.children:
self.Visit(child)
if (isinstance(child, pytree.Leaf) and
child.value in {'<', '>', '==', '>=', '<=', '<>', '!=', 'in', 'is'}):
_AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
elif pytree_utils.NodeName(child) == 'comp_op':
for grandchild in child.children:
_AppendTokenSubtype(grandchild, format_token.Subtype.BINARY_OPERATOR)
def Visit_star_expr(self, node): # pylint: disable=invalid-name
# star_expr ::= '*' expr
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value == '*':
_AppendTokenSubtype(child, format_token.Subtype.UNARY_OPERATOR)
def Visit_expr(self, node): # pylint: disable=invalid-name
# expr ::= xor_expr ('|' xor_expr)*
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value == '|':
_AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
def Visit_xor_expr(self, node): # pylint: disable=invalid-name
# xor_expr ::= and_expr ('^' and_expr)*
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value == '^':
_AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
def Visit_and_expr(self, node): # pylint: disable=invalid-name
# and_expr ::= shift_expr ('&' shift_expr)*
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value == '&':
_AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
def Visit_shift_expr(self, node): # pylint: disable=invalid-name
# shift_expr ::= arith_expr (('<<'|'>>') arith_expr)*
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value in {'<<', '>>'}:
_AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
def Visit_arith_expr(self, node): # pylint: disable=invalid-name
# arith_expr ::= term (('+'|'-') term)*
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value in '+-':
_AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
def Visit_term(self, node): # pylint: disable=invalid-name
# term ::= factor (('*'|'/'|'%'|'//') factor)*
for child in node.children:
self.Visit(child)
if (isinstance(child, pytree.Leaf) and
child.value in {'*', '/', '%', '//'}):
_AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
def Visit_factor(self, node): # pylint: disable=invalid-name
# factor ::= ('+'|'-'|'~') factor | power
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value in '+-~':
_AppendTokenSubtype(child, format_token.Subtype.UNARY_OPERATOR)
def Visit_power(self, node): # pylint: disable=invalid-name
# power ::= atom trailer* ['**' factor]
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value == '**':
_AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
def Visit_trailer(self, node): # pylint: disable=invalid-name
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value in '[]':
_AppendTokenSubtype(child, format_token.Subtype.SUBSCRIPT_BRACKET)
def Visit_subscript(self, node): # pylint: disable=invalid-name
# subscript ::= test | [test] ':' [test] [sliceop]
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value == ':':
_AppendTokenSubtype(child, format_token.Subtype.SUBSCRIPT_COLON)
def Visit_sliceop(self, node): # pylint: disable=invalid-name
# sliceop ::= ':' [test]
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value == ':':
_AppendTokenSubtype(child, format_token.Subtype.SUBSCRIPT_COLON)
def Visit_argument(self, node): # pylint: disable=invalid-name
# argument ::=
# test [comp_for] | test '=' test
self._ProcessArgLists(node)
def Visit_arglist(self, node): # pylint: disable=invalid-name
# arglist ::=
# (argument ',')* (argument [',']
# | '*' test (',' argument)* [',' '**' test]
# | '**' test)
self._ProcessArgLists(node)
_SetDefaultOrNamedAssignArgListSubtype(node)
def Visit_tname(self, node): # pylint: disable=invalid-name
self._ProcessArgLists(node)
_SetDefaultOrNamedAssignArgListSubtype(node)
def Visit_decorator(self, node): # pylint: disable=invalid-name
# decorator ::=
# '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
for child in node.children:
if isinstance(child, pytree.Leaf) and child.value == '@':
_AppendTokenSubtype(child, subtype=format_token.Subtype.DECORATOR)
self.Visit(child)
def Visit_funcdef(self, node): # pylint: disable=invalid-name
# funcdef ::=
# 'def' NAME parameters ['->' test] ':' suite
for child in node.children:
if pytree_utils.NodeName(child) == 'NAME' and child.value != 'def':
_AppendTokenSubtype(child, format_token.Subtype.FUNC_DEF)
break
for child in node.children:
self.Visit(child)
def Visit_typedargslist(self, node): # pylint: disable=invalid-name
# typedargslist ::=
# ((tfpdef ['=' test] ',')*
# ('*' [tname] (',' tname ['=' test])* [',' '**' tname]
# | '**' tname)
# | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
self._ProcessArgLists(node)
_SetDefaultOrNamedAssignArgListSubtype(node)
def Visit_varargslist(self, node): # pylint: disable=invalid-name
# varargslist ::=
# ((vfpdef ['=' test] ',')*
# ('*' [vname] (',' vname ['=' test])* [',' '**' vname]
# | '**' vname)
# | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
self._ProcessArgLists(node)
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf) and child.value == '=':
_AppendTokenSubtype(child, format_token.Subtype.VARARGS_LIST)
def Visit_comp_for(self, node): # pylint: disable=invalid-name
# comp_for ::= 'for' exprlist 'in' testlist_safe [comp_iter]
_AppendSubtypeRec(node, format_token.Subtype.COMP_FOR)
self.DefaultNodeVisit(node)
def Visit_comp_if(self, node): # pylint: disable=invalid-name
# comp_if ::= 'if' old_test [comp_iter]
_AppendSubtypeRec(node, format_token.Subtype.COMP_IF)
self.DefaultNodeVisit(node)
def _ProcessArgLists(self, node):
"""Common method for processing argument lists."""
for child in node.children:
self.Visit(child)
if isinstance(child, pytree.Leaf):
_AppendTokenSubtype(
child,
subtype=_ARGLIST_TOKEN_TO_SUBTYPE.get(child.value,
format_token.Subtype.NONE))
def _SetDefaultOrNamedAssignArgListSubtype(node):
"""Set named assign subtype on elements in a arg list."""
def HasDefaultOrNamedAssignSubtype(node):
"""Return True if the arg list has a named assign subtype."""
if isinstance(node, pytree.Leaf):
if (format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in
pytree_utils.GetNodeAnnotation(node, pytree_utils.Annotation.SUBTYPE,
set())):
return True
return False
has_subtype = False
for child in node.children:
if pytree_utils.NodeName(child) != 'arglist':
has_subtype |= HasDefaultOrNamedAssignSubtype(child)
return has_subtype
if HasDefaultOrNamedAssignSubtype(node):
for child in node.children:
if pytree_utils.NodeName(child) != 'COMMA':
_AppendFirstLeafTokenSubtype(
child, format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST)
def _AppendTokenSubtype(node, subtype):
"""Append the token's subtype only if it's not already set."""
pytree_utils.AppendNodeAnnotation(node, pytree_utils.Annotation.SUBTYPE,
subtype)
def _AppendFirstLeafTokenSubtype(node, subtype):
"""Append the first leaf token's subtypes."""
if isinstance(node, pytree.Leaf):
_AppendTokenSubtype(node, subtype)
return
_AppendFirstLeafTokenSubtype(node.children[0], subtype)
def _AppendSubtypeRec(node, subtype, force=True):
"""Append the leafs in the node to the given subtype."""
if isinstance(node, pytree.Leaf):
_AppendTokenSubtype(node, subtype)
return
for child in node.children:
_AppendSubtypeRec(child, subtype, force=force)
def _InsertPseudoParentheses(node):
"""Insert pseudo parentheses so that dicts can be formatted correctly."""
comment_node = None
if isinstance(node, pytree.Node):
if node.children[-1].type == token.COMMENT:
comment_node = node.children[-1].clone()
node.children[-1].remove()
first = _GetFirstLeafNode(node)
last = _GetLastLeafNode(node)
if first == last and first.type == token.COMMENT:
# A comment was inserted before the value, which is a pytree.Leaf.
# Encompass the dictionary's value into an ATOM node.
last = first.next_sibling
new_node = pytree.Node(syms.atom, [first.clone(), last.clone()])
node.replace(new_node)
node = new_node
last.remove()
first = _GetFirstLeafNode(node)
last = _GetLastLeafNode(node)
lparen = pytree.Leaf(
token.LPAR, u'(', context=('', (first.get_lineno(), first.column - 1)))
last_lineno = last.get_lineno()
if last.type == token.STRING and '\n' in last.value:
last_lineno += last.value.count('\n')
if last.type == token.STRING and '\n' in last.value:
last_column = len(last.value.split('\n')[-1]) + 1
else:
last_column = last.column + len(last.value) + 1
rparen = pytree.Leaf(
token.RPAR, u')', context=('', (last_lineno, last_column)))
lparen.is_pseudo = True
rparen.is_pseudo = True
if isinstance(node, pytree.Node):
node.insert_child(0, lparen)
node.append_child(rparen)
if comment_node:
node.append_child(comment_node)
_AppendFirstLeafTokenSubtype(node, format_token.Subtype.DICTIONARY_VALUE)
else:
clone = node.clone()
new_node = pytree.Node(syms.atom, [lparen, clone, rparen])
node.replace(new_node)
_AppendFirstLeafTokenSubtype(clone, format_token.Subtype.DICTIONARY_VALUE)
def _GetFirstLeafNode(node):
if isinstance(node, pytree.Leaf):
return node
return _GetFirstLeafNode(node.children[0])
def _GetLastLeafNode(node):
if isinstance(node, pytree.Leaf):
return node
return _GetLastLeafNode(node.children[-1])

View File

@ -0,0 +1,497 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""UnwrappedLine primitive for formatting.
An unwrapped line is the containing data structure produced by the parser. It
collects all nodes (stored in FormatToken objects) that could appear on a
single line if there were no line length restrictions. It's then used by the
parser to perform the wrapping required to comply with the style guide.
"""
from yapf.yapflib import format_token
from yapf.yapflib import py3compat
from yapf.yapflib import pytree_utils
from yapf.yapflib import split_penalty
from yapf.yapflib import style
class UnwrappedLine(object):
"""Represents a single unwrapped line in the output.
Attributes:
depth: indentation depth of this line. This is just a numeric value used to
distinguish lines that are more deeply nested than others. It is not the
actual amount of spaces, which is style-dependent.
"""
def __init__(self, depth, tokens=None):
"""Constructor.
Creates a new unwrapped line with the given depth an initial list of tokens.
Constructs the doubly-linked lists for format tokens using their built-in
next_token and previous_token attributes.
Arguments:
depth: indentation depth of this line
tokens: initial list of tokens
"""
self.depth = depth
self._tokens = tokens or []
self.disable = False
if self._tokens:
# Set up a doubly linked list.
for index, tok in enumerate(self._tokens[1:]):
# Note, 'index' is the index to the previous token.
tok.previous_token = self._tokens[index]
self._tokens[index].next_token = tok
def CalculateFormattingInformation(self):
"""Calculate the split penalty and total length for the tokens."""
# Say that the first token in the line should have a space before it. This
# means only that if this unwrapped line is joined with a predecessor line,
# then there will be a space between them.
self.first.spaces_required_before = 1
self.first.total_length = len(self.first.value)
prev_token = self.first
prev_length = self.first.total_length
for token in self._tokens[1:]:
if (token.spaces_required_before == 0 and
_SpaceRequiredBetween(prev_token, token)):
token.spaces_required_before = 1
tok_len = len(token.value) if not token.is_pseudo_paren else 0
token.total_length = prev_length + tok_len + token.spaces_required_before
# The split penalty has to be computed before {must|can}_break_before,
# because these may use it for their decision.
token.split_penalty += _SplitPenalty(prev_token, token)
token.must_break_before = _MustBreakBefore(prev_token, token)
token.can_break_before = (token.must_break_before or
_CanBreakBefore(prev_token, token))
prev_length = token.total_length
prev_token = token
############################################################################
# Token Access and Manipulation Methods #
############################################################################
def AppendToken(self, token):
"""Append a new FormatToken to the tokens contained in this line."""
if self._tokens:
token.previous_token = self.last
self.last.next_token = token
self._tokens.append(token)
def AppendNode(self, node):
"""Convenience method to append a pytree node directly.
Wraps the node with a FormatToken.
Arguments:
node: the node to append
"""
self.AppendToken(format_token.FormatToken(node))
@property
def first(self):
"""Returns the first non-whitespace token."""
return self._tokens[0]
@property
def last(self):
"""Returns the last non-whitespace token."""
return self._tokens[-1]
############################################################################
# Token -> String Methods #
############################################################################
def AsCode(self, indent_per_depth=2):
"""Return a "code" representation of this line.
The code representation shows how the line would be printed out as code.
TODO(eliben): for now this is rudimentary for debugging - once we add
formatting capabilities, this method will have other uses (not all tokens
have spaces around them, for example).
Arguments:
indent_per_depth: how much spaces to indend per depth level.
Returns:
A string representing the line as code.
"""
indent = ' ' * indent_per_depth * self.depth
tokens_str = ' '.join(tok.value for tok in self._tokens)
return indent + tokens_str
def __str__(self): # pragma: no cover
return self.AsCode()
def __repr__(self): # pragma: no cover
tokens_repr = ','.join(
['{0}({1!r})'.format(tok.name, tok.value) for tok in self._tokens])
return 'UnwrappedLine(depth={0}, tokens=[{1}])'.format(
self.depth, tokens_repr)
############################################################################
# Properties #
############################################################################
@property
def tokens(self):
"""Access the tokens contained within this line.
The caller must not modify the tokens list returned by this method.
Returns:
List of tokens in this line.
"""
return self._tokens
@property
def lineno(self):
"""Return the line number of this unwrapped line.
Returns:
The line number of the first token in this unwrapped line.
"""
return self.first.lineno
@property
def is_comment(self):
return self.first.is_comment
def _IsIdNumberStringToken(tok):
return tok.is_keyword or tok.is_name or tok.is_number or tok.is_string
def _IsUnaryOperator(tok):
return format_token.Subtype.UNARY_OPERATOR in tok.subtypes
def _SpaceRequiredBetween(left, right):
"""Return True if a space is required between the left and right token."""
lval = left.value
rval = right.value
if (left.is_pseudo_paren and _IsIdNumberStringToken(right) and
left.previous_token and _IsIdNumberStringToken(left.previous_token)):
# Space between keyword... tokens and pseudo parens.
return True
if left.is_pseudo_paren or right.is_pseudo_paren:
# There should be a space after the ':' in a dictionary.
if left.OpensScope():
return True
# The closing pseudo-paren shouldn't affect spacing.
return False
if left.is_continuation or right.is_continuation:
# The continuation node's value has all of the spaces it needs.
return False
if right.name in pytree_utils.NONSEMANTIC_TOKENS:
# No space before a non-semantic token.
return False
if _IsIdNumberStringToken(left) and _IsIdNumberStringToken(right):
# Spaces between keyword, string, number, and identifier tokens.
return True
if lval == ',' and rval == ':':
# We do want a space between a comma and colon.
return True
if rval in ':,':
# Otherwise, we never want a space before a colon or comma.
return False
if lval == ',' and rval in ']})':
# Add a space between ending ',' and closing bracket if requested.
return style.Get('SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET')
if lval == ',':
# We want a space after a comma.
return True
if lval == 'from' and rval == '.':
# Space before the '.' in an import statement.
return True
if lval == '.' and rval == 'import':
# Space after the '.' in an import statement.
return True
if lval == '=' and rval == '.':
# Space between equal and '.' as in "X = ...".
return True
if ((right.is_keyword or right.is_name) and
(left.is_keyword or left.is_name)):
# Don't merge two keywords/identifiers.
return True
if left.is_string:
if (rval == '=' and format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in
right.subtypes):
# If there is a type hint, then we don't want to add a space between the
# equal sign and the hint.
return False
if rval not in '[)]}.':
# A string followed by something other than a subscript, closing bracket,
# or dot should have a space after it.
return True
if left.is_binary_op and lval != '**' and _IsUnaryOperator(right):
# Space between the binary opertor and the unary operator.
return True
if _IsUnaryOperator(left) and _IsUnaryOperator(right):
# No space between two unary operators.
return False
if left.is_binary_op or right.is_binary_op:
if lval == '**' or rval == '**':
# Space around the "power" operator.
return style.Get('SPACES_AROUND_POWER_OPERATOR')
# Enforce spaces around binary operators.
return True
if (_IsUnaryOperator(left) and lval != 'not' and
(right.is_name or right.is_number or rval == '(')):
# The previous token was a unary op. No space is desired between it and
# the current token.
return False
if (format_token.Subtype.SUBSCRIPT_COLON in left.subtypes or
format_token.Subtype.SUBSCRIPT_COLON in right.subtypes):
# A subscript shouldn't have spaces separating its colons.
return False
if (format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in left.subtypes or
format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in right.subtypes):
# A named argument or default parameter shouldn't have spaces around it.
# However, a typed argument should have a space after the colon.
return lval == ':' or style.Get('SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN')
if (format_token.Subtype.VARARGS_LIST in left.subtypes or
format_token.Subtype.VARARGS_LIST in right.subtypes):
return False
if (format_token.Subtype.VARARGS_STAR in left.subtypes or
format_token.Subtype.KWARGS_STAR_STAR in left.subtypes):
# Don't add a space after a vararg's star or a keyword's star-star.
return False
if lval == '@' and format_token.Subtype.DECORATOR in left.subtypes:
# Decorators shouldn't be separated from the 'at' sign.
return False
if lval == '.' or rval == '.':
# Don't place spaces between dots.
return False
if ((lval == '(' and rval == ')') or (lval == '[' and rval == ']') or
(lval == '{' and rval == '}')):
# Empty objects shouldn't be separted by spaces.
return False
if (lval in pytree_utils.OPENING_BRACKETS and
rval in pytree_utils.OPENING_BRACKETS):
# Nested objects' opening brackets shouldn't be separated.
return False
if (lval in pytree_utils.CLOSING_BRACKETS and
rval in pytree_utils.CLOSING_BRACKETS):
# Nested objects' closing brackets shouldn't be separated.
return False
if lval in pytree_utils.CLOSING_BRACKETS and rval in '([':
# A call, set, dictionary, or subscript that has a call or subscript after
# it shouldn't have a space between them.
return False
if lval in pytree_utils.OPENING_BRACKETS and _IsIdNumberStringToken(right):
# Don't separate the opening bracket from the first item.
return False
if left.is_name and rval in '([':
# Don't separate a call or array access from the name.
return False
if rval in pytree_utils.CLOSING_BRACKETS:
# Don't separate the closing bracket from the last item.
# FIXME(morbo): This might be too permissive.
return False
if lval == 'print' and rval == '(':
# Special support for the 'print' function.
return False
if lval in pytree_utils.OPENING_BRACKETS and _IsUnaryOperator(right):
# Don't separate a unary operator from the opening bracket.
return False
if (lval in pytree_utils.OPENING_BRACKETS and
(format_token.Subtype.VARARGS_STAR in right.subtypes or
format_token.Subtype.KWARGS_STAR_STAR in right.subtypes)):
# Don't separate a '*' or '**' from the opening bracket.
return False
if rval == ';':
# Avoid spaces before a semicolon. (Why is there a semicolon?!)
return False
if lval == '(' and rval == 'await':
# Special support for the 'await' keyword. Don't separate the 'await'
# keyword from an opening paren.
return False
return True
def _MustBreakBefore(prev_token, cur_token):
"""Return True if a line break is required before the current token."""
if prev_token.is_comment:
# Must break if the previous token was a comment.
return True
if (cur_token.is_string and prev_token.is_string and
IsSurroundedByBrackets(cur_token)):
# We want consecutive strings to be on separate lines. This is a
# reasonable assumption, because otherwise they should have written them
# all on the same line, or with a '+'.
return True
return pytree_utils.GetNodeAnnotation(
cur_token.node, pytree_utils.Annotation.MUST_SPLIT, default=False)
def _CanBreakBefore(prev_token, cur_token):
"""Return True if a line break may occur before the current token."""
pval = prev_token.value
cval = cur_token.value
if py3compat.PY3:
if pval == 'yield' and cval == 'from':
# Don't break before a yield argument.
return False
if pval in {'async', 'await'} and cval in {'def', 'with', 'for'}:
# Don't break after sync keywords.
return False
if cur_token.split_penalty >= split_penalty.UNBREAKABLE:
return False
if pval == '@':
# Don't break right after the beginning of a decorator.
return False
if cval == ':':
# Don't break before the start of a block of code.
return False
if cval == ',':
# Don't break before a comma.
return False
if prev_token.is_name and cval == '(':
# Don't break in the middle of a function definition or call.
return False
if prev_token.is_name and cval == '[':
# Don't break in the middle of an array dereference.
return False
if prev_token.is_name and cval == '.':
# Don't break before the '.' in a dotted name.
return False
if cur_token.is_comment and prev_token.lineno == cur_token.lineno:
# Don't break a comment at the end of the line.
return False
if format_token.Subtype.UNARY_OPERATOR in prev_token.subtypes:
# Don't break after a unary token.
return False
return True
def IsSurroundedByBrackets(tok):
"""Return True if the token is surrounded by brackets."""
paren_count = 0
brace_count = 0
sq_bracket_count = 0
previous_token = tok.previous_token
while previous_token:
if previous_token.value == ')':
paren_count -= 1
elif previous_token.value == '}':
brace_count -= 1
elif previous_token.value == ']':
sq_bracket_count -= 1
if previous_token.value == '(':
if paren_count == 0:
return previous_token
paren_count += 1
elif previous_token.value == '{':
if brace_count == 0:
return previous_token
brace_count += 1
elif previous_token.value == '[':
if sq_bracket_count == 0:
return previous_token
sq_bracket_count += 1
previous_token = previous_token.previous_token
return None
_LOGICAL_OPERATORS = frozenset({'and', 'or'})
_BITWISE_OPERATORS = frozenset({'&', '|', '^'})
_TERM_OPERATORS = frozenset({'*', '/', '%', '//'})
def _SplitPenalty(prev_token, cur_token):
"""Return the penalty for breaking the line before the current token."""
pval = prev_token.value
cval = cur_token.value
if pval == 'not':
return split_penalty.UNBREAKABLE
if cur_token.node_split_penalty > 0:
return cur_token.node_split_penalty
if style.Get('SPLIT_BEFORE_LOGICAL_OPERATOR'):
# Prefer to split before 'and' and 'or'.
if pval in _LOGICAL_OPERATORS:
return style.Get('SPLIT_PENALTY_LOGICAL_OPERATOR')
if cval in _LOGICAL_OPERATORS:
return 0
else:
# Prefer to split after 'and' and 'or'.
if pval in _LOGICAL_OPERATORS:
return 0
if cval in _LOGICAL_OPERATORS:
return style.Get('SPLIT_PENALTY_LOGICAL_OPERATOR')
if style.Get('SPLIT_BEFORE_BITWISE_OPERATOR'):
# Prefer to split before '&', '|', and '^'.
if pval in _BITWISE_OPERATORS:
return style.Get('SPLIT_PENALTY_BITWISE_OPERATOR')
if cval in _BITWISE_OPERATORS:
return 0
else:
# Prefer to split after '&', '|', and '^'.
if pval in _BITWISE_OPERATORS:
return 0
if cval in _BITWISE_OPERATORS:
return style.Get('SPLIT_PENALTY_BITWISE_OPERATOR')
if (format_token.Subtype.COMP_FOR in cur_token.subtypes or
format_token.Subtype.COMP_IF in cur_token.subtypes):
# We don't mind breaking before the 'for' or 'if' of a list comprehension.
return 0
if format_token.Subtype.UNARY_OPERATOR in prev_token.subtypes:
# Try not to break after a unary operator.
return style.Get('SPLIT_PENALTY_AFTER_UNARY_OPERATOR')
if pval == ',':
# Breaking after a comma is fine, if need be.
return 0
if prev_token.is_binary_op:
# We would rather not split after an equality operator.
return 20
if (format_token.Subtype.VARARGS_STAR in prev_token.subtypes or
format_token.Subtype.KWARGS_STAR_STAR in prev_token.subtypes):
# Don't split after a varargs * or kwargs **.
return split_penalty.UNBREAKABLE
if prev_token.OpensScope() and cval != '(':
# Slightly prefer
return style.Get('SPLIT_PENALTY_AFTER_OPENING_BRACKET')
if cval == ':':
# Don't split before a colon.
return split_penalty.UNBREAKABLE
if cval == '=':
# Don't split before an assignment.
return split_penalty.UNBREAKABLE
if (format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in prev_token.subtypes or
format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in cur_token.subtypes):
# Don't break before or after an default or named assignment.
return split_penalty.UNBREAKABLE
if cval == '==':
# We would rather not split before an equality operator.
return split_penalty.STRONGLY_CONNECTED
if cur_token.ClosesScope():
# Give a slight penalty for splitting before the closing scope.
return 100
if pval in _TERM_OPERATORS or cval in _TERM_OPERATORS:
return 50
return 0

View File

@ -0,0 +1,93 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Verify that the generated code is valid code.
This takes a line of code and "normalizes" it. I.e., it transforms the snippet
into something that has the potential to compile.
VerifyCode(): the main function exported by this module.
"""
import ast
import re
import sys
import textwrap
class InternalError(Exception):
"""Internal error in verifying formatted code."""
pass
def VerifyCode(code):
"""Verify that the reformatted code is syntactically correct.
Arguments:
code: (unicode) The reformatted code snippet.
Raises:
SyntaxError if the code was reformatted incorrectly.
"""
try:
compile(textwrap.dedent(code).encode('UTF-8'), '<string>', 'exec')
except SyntaxError:
try:
ast.parse(textwrap.dedent(code.lstrip('\n')).lstrip(), '<string>', 'exec')
except SyntaxError:
try:
normalized_code = _NormalizeCode(code)
compile(normalized_code.encode('UTF-8'), '<string>', 'exec')
except SyntaxError:
raise InternalError(sys.exc_info()[1])
def _NormalizeCode(code):
"""Make sure that the code snippet is compilable."""
code = textwrap.dedent(code.lstrip('\n')).lstrip()
# Split the code to lines and get rid of all leading full-comment lines as
# they can mess up the normalization attempt.
lines = code.split('\n')
i = 0
for i, line in enumerate(lines):
line = line.strip()
if line and not line.startswith('#'):
break
code = '\n'.join(lines[i:]) + '\n'
if re.match(r'(if|while|for|with|def|class|async|await)\b', code):
code += '\n pass'
elif re.match(r'(elif|else)\b', code):
try:
try_code = 'if True:\n pass\n' + code + '\n pass'
ast.parse(
textwrap.dedent(try_code.lstrip('\n')).lstrip(), '<string>', 'exec')
code = try_code
except SyntaxError:
# The assumption here is that the code is on a single line.
code = 'if True: pass\n' + code
elif code.startswith('@'):
code += '\ndef _():\n pass'
elif re.match(r'try\b', code):
code += '\n pass\nexcept:\n pass'
elif re.match(r'(except|finally)\b', code):
code = 'try:\n pass\n' + code + '\n pass'
elif re.match(r'(return|yield)\b', code):
code = 'def _():\n ' + code
elif re.match(r'(continue|break)\b', code):
code = 'while True:\n ' + code
elif re.match(r'print\b', code):
code = 'from __future__ import print_function\n' + code
return code + '\n'

View File

@ -0,0 +1,295 @@
# Copyright 2015-2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Entry points for YAPF.
The main APIs that YAPF exposes to drive the reformatting.
FormatFile(): reformat a file.
FormatCode(): reformat a string of code.
These APIs have some common arguments:
style_config: (string) Either a style name or a path to a file that contains
formatting style settings. If None is specified, use the default style
as set in style.DEFAULT_STYLE_FACTORY
lines: (list of tuples of integers) A list of tuples of lines, [start, end],
that we want to format. The lines are 1-based indexed. It can be used by
third-party code (e.g., IDEs) when reformatting a snippet of code rather
than a whole file.
print_diff: (bool) Instead of returning the reformatted source, return a
diff that turns the formatted source into reformatter source.
verify: (bool) True if reformatted code should be verified for syntax.
"""
import difflib
import re
import sys
from lib2to3.pgen2 import tokenize
from yapf.yapflib import blank_line_calculator
from yapf.yapflib import comment_splicer
from yapf.yapflib import continuation_splicer
from yapf.yapflib import file_resources
from yapf.yapflib import py3compat
from yapf.yapflib import pytree_unwrapper
from yapf.yapflib import pytree_utils
from yapf.yapflib import reformatter
from yapf.yapflib import split_penalty
from yapf.yapflib import style
from yapf.yapflib import subtype_assigner
def FormatFile(filename,
style_config=None,
lines=None,
print_diff=False,
verify=False,
in_place=False,
logger=None):
"""Format a single Python file and return the formatted code.
Arguments:
filename: (unicode) The file to reformat.
in_place: (bool) If True, write the reformatted code back to the file.
logger: (io streamer) A stream to output logging.
remaining arguments: see comment at the top of this module.
Returns:
Tuple of (reformatted_code, encoding, changed). reformatted_code is None if
the file is sucessfully written to (having used in_place). reformatted_code
is a diff if print_diff is True.
Raises:
IOError: raised if there was an error reading the file.
ValueError: raised if in_place and print_diff are both specified.
"""
_CheckPythonVersion()
if in_place and print_diff:
raise ValueError('Cannot pass both in_place and print_diff.')
original_source, newline, encoding = ReadFile(filename, logger)
reformatted_source, changed = FormatCode(
original_source,
style_config=style_config,
filename=filename,
lines=lines,
print_diff=print_diff,
verify=verify)
if reformatted_source.rstrip('\n'):
lines = reformatted_source.rstrip('\n').split('\n')
reformatted_source = newline.join(line for line in lines) + newline
if in_place:
if original_source and original_source != reformatted_source:
file_resources.WriteReformattedCode(filename, reformatted_source,
in_place, encoding)
return None, encoding, changed
return reformatted_source, encoding, changed
def FormatCode(unformatted_source,
filename='<unknown>',
style_config=None,
lines=None,
print_diff=False,
verify=False):
"""Format a string of Python code.
This provides an alternative entry point to YAPF.
Arguments:
unformatted_source: (unicode) The code to format.
filename: (unicode) The name of the file being reformatted.
remaining arguments: see comment at the top of this module.
Returns:
Tuple of (reformatted_source, changed). reformatted_source conforms to the
desired formatting style. changed is True if the source changed.
"""
_CheckPythonVersion()
style.SetGlobalStyle(style.CreateStyleFromConfig(style_config))
if not unformatted_source.endswith('\n'):
unformatted_source += '\n'
tree = pytree_utils.ParseCodeToTree(unformatted_source)
# Run passes on the tree, modifying it in place.
comment_splicer.SpliceComments(tree)
continuation_splicer.SpliceContinuations(tree)
subtype_assigner.AssignSubtypes(tree)
split_penalty.ComputeSplitPenalties(tree)
blank_line_calculator.CalculateBlankLines(tree)
uwlines = pytree_unwrapper.UnwrapPyTree(tree)
for uwl in uwlines:
uwl.CalculateFormattingInformation()
_MarkLinesToFormat(uwlines, lines)
reformatted_source = reformatter.Reformat(uwlines, verify)
if unformatted_source == reformatted_source:
return '' if print_diff else reformatted_source, False
code_diff = _GetUnifiedDiff(
unformatted_source, reformatted_source, filename=filename)
if print_diff:
return code_diff, code_diff != ''
return reformatted_source, True
def _CheckPythonVersion(): # pragma: no cover
errmsg = 'yapf is only supported for Python 2.7 or 3.4+'
if sys.version_info[0] == 2:
if sys.version_info[1] < 7:
raise RuntimeError(errmsg)
elif sys.version_info[0] == 3:
if sys.version_info[1] < 4:
raise RuntimeError(errmsg)
def ReadFile(filename, logger=None):
"""Read the contents of the file.
An optional logger can be specified to emit messages to your favorite logging
stream. If specified, then no exception is raised. This is external so that it
can be used by third-party applications.
Arguments:
filename: (unicode) The name of the file.
logger: (function) A function or lambda that takes a string and emits it.
Returns:
The contents of filename.
Raises:
IOError: raised if there was an error reading the file.
"""
try:
with open(filename, 'rb') as fd:
encoding = tokenize.detect_encoding(fd.readline)[0]
except IOError as err:
if logger:
logger(err)
raise
try:
# Preserves line endings.
with py3compat.open_with_encoding(
filename, mode='r', encoding=encoding, newline='') as fd:
lines = fd.readlines()
line_ending = file_resources.LineEnding(lines)
source = '\n'.join(line.rstrip('\r\n') for line in lines) + '\n'
return source, line_ending, encoding
except IOError as err: # pragma: no cover
if logger:
logger(err)
raise
DISABLE_PATTERN = r'^#.*\byapf:\s*disable\b'
ENABLE_PATTERN = r'^#.*\byapf:\s*enable\b'
def _MarkLinesToFormat(uwlines, lines):
"""Skip sections of code that we shouldn't reformat."""
if lines:
for uwline in uwlines:
uwline.disable = True
# Sort and combine overlapping ranges.
lines = sorted(lines)
line_ranges = [lines[0]] if len(lines[0]) else []
index = 1
while index < len(lines):
current = line_ranges[-1]
if lines[index][0] <= current[1]:
# The ranges overlap, so combine them.
line_ranges[-1] = (current[0], max(lines[index][1], current[1]))
else:
line_ranges.append(lines[index])
index += 1
# Mark lines to format as not being disabled.
index = 0
for start, end in sorted(line_ranges):
while index < len(uwlines) and uwlines[index].last.lineno < start:
index += 1
if index >= len(uwlines):
break
while index < len(uwlines):
if uwlines[index].lineno > end:
break
if (uwlines[index].lineno >= start or
uwlines[index].last.lineno >= start):
uwlines[index].disable = False
index += 1
# Now go through the lines and disable any lines explicitly marked as
# disabled.
index = 0
while index < len(uwlines):
uwline = uwlines[index]
if uwline.is_comment:
if _DisableYAPF(uwline.first.value.strip()):
index += 1
while index < len(uwlines):
uwline = uwlines[index]
if uwline.is_comment and _EnableYAPF(uwline.first.value.strip()):
break
uwline.disable = True
index += 1
elif re.search(DISABLE_PATTERN, uwline.last.value.strip(), re.IGNORECASE):
uwline.disable = True
index += 1
def _DisableYAPF(line):
return (
re.search(DISABLE_PATTERN, line.split('\n')[0].strip(), re.IGNORECASE) or
re.search(DISABLE_PATTERN, line.split('\n')[-1].strip(), re.IGNORECASE))
def _EnableYAPF(line):
return (
re.search(ENABLE_PATTERN, line.split('\n')[0].strip(), re.IGNORECASE) or
re.search(ENABLE_PATTERN, line.split('\n')[-1].strip(), re.IGNORECASE))
def _GetUnifiedDiff(before, after, filename='code'):
"""Get a unified diff of the changes.
Arguments:
before: (unicode) The original source code.
after: (unicode) The reformatted source code.
filename: (unicode) The code's filename.
Returns:
The unified diff text.
"""
before = before.splitlines()
after = after.splitlines()
return '\n'.join(
difflib.unified_diff(
before,
after,
filename,
filename,
'(original)',
'(reformatted)',
lineterm='')) + '\n'

26
tools/yapf_util.py Normal file
View File

@ -0,0 +1,26 @@
# Copyright (c) 2017 The Chromium Embedded Framework Authors. All rights
# reserved. Use of this source code is governed by a BSD-style license that
# can be found in the LICENSE file
from exec_util import exec_cmd
import os
import sys
# Script directory.
script_dir = os.path.dirname(__file__)
root_dir = os.path.join(script_dir, os.pardir)
def yapf_format(file_name, file_contents):
# Reads .style.yapf in the root_dir when specifying contents via stdin.
result = exec_cmd("%s %s/yapf" % (sys.executable, script_dir), root_dir,
file_contents)
if result['err'] != '':
print "yapf error: %s" % result['err']
if result['out'] != '':
output = result['out']
if sys.platform == 'win32':
# Convert to Unix line endings.
output = output.replace("\r", "")
return output
return None