Qual: Backport/pre commit / log annotation simplification (#28614)

* Qual: ci: Run pre-commit/php-cs with cache (#28079)

This adds a hook to .pre-commit-config.yaml and updates the workflow
to run php-cs with cache when it is run for all files.
When running on changed files only, the cache is not useful.

The php-codesniffer ruleset.xml was cleaned up (duplicates removal/formatted)

* Fix: Make all 'relative paths' absolute (#28196)

# Fix: Make all 'relative paths' absolute

The phpcs ruleset xml file's relative exclude patterns are relative to
the filename(s) provided on the command line.
Hence with partial verifications, the path exclusion does not function
as we would like.
Removing the relative-path attribute from the patterns the exclusion
works.
At the same time, the patterns were optimized and a comment was added.

* Qual: Optimize workflow (#28386)

# Qual: Optimize workflow

The log annotation based on the pre-commit logs is now simplified.
This commit is contained in:
MDW 2024-03-04 18:21:32 +01:00 committed by GitHub
parent ce4dc610ed
commit f166f15f7d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 601 deletions

588
.github/logToCs.py vendored
View File

@ -1,588 +0,0 @@
#!/usr/bin/env python3
# pylint: disable=invalid-name
"""
Convert a log to CheckStyle format.
Url: https://github.com/mdeweerd/LogToCheckStyle
The log can then be used for generating annotations in a github action.
Note: this script is very young and "quick and dirty".
Patterns can be added to "PATTERNS" to match more messages.
# Examples
Assumes that logToCs.py is available as .github/logToCs.py.
## Example 1:
```yaml
- run: |
pre-commit run -all-files | tee pre-commit.log
.github/logToCs.py pre-commit.log pre-commit.xml
- uses: staabm/annotate-pull-request-from-checkstyle-action@v1
with:
files: pre-commit.xml
notices-as-warnings: true # optional
```
## Example 2:
```yaml
- run: |
pre-commit run --all-files | tee pre-commit.log
- name: Add results to PR
if: ${{ always() }}
run: |
.github/logToCs.py pre-commit.log | cs2pr
```
Author(s):
- https://github.com/mdeweerd
License: MIT License
"""
import argparse
import datetime as dt
import json
import os
import re
import sys
import xml.etree.ElementTree as ET # nosec
def remove_prefix(string, prefix):
"""
Remove prefix from string
Provided for backward compatibility.
"""
if prefix and string.startswith(prefix):
return string[len(prefix) :]
return string
def convert_notices_to_checkstyle(notices, root_path=None):
"""
Convert annotation list to CheckStyle xml string
"""
root = ET.Element("checkstyle")
for fields in notices:
add_error_entry(root, **fields, root_path=root_path)
return ET.tostring(root, encoding="utf_8").decode("utf_8")
def convert_lines_to_notices(lines):
"""
Convert provided message to CheckStyle format.
"""
notices = []
for line in lines:
fields = parse_message(line)
if fields:
notices.append(fields)
return notices
def convert_text_to_notices(text):
"""
Convert provided message to CheckStyle format.
"""
return parse_file(text)
# Initial version for Checkrun from:
# https://github.com/tayfun/flake8-your-pr/blob/50a175cde4dd26a656734c5b64ba1e5bb27151cb/src/main.py#L7C1-L123C36
# MIT Licence
class CheckRun:
"""
Represents the check run
"""
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", None)
GITHUB_EVENT_PATH = os.environ.get("GITHUB_EVENT_PATH", None)
URI = "https://api.github.com"
API_VERSION = "2022-11-28"
ACCEPT_HEADER_VALUE = "application/vnd.github+json"
AUTH_HEADER_VALUE = f"Bearer {GITHUB_TOKEN}"
# This is the max annotations Github API accepts in one go.
MAX_ANNOTATIONS = 50
def __init__(self):
"""
Initialise Check Run object with information from checkrun
"""
self.read_event_file()
self.read_meta_data()
def read_event_file(self):
"""
Read the event file to get the event information later.
"""
if self.GITHUB_EVENT_PATH is None:
raise ValueError("Not running in github workflow")
with open(self.GITHUB_EVENT_PATH, encoding="utf_8") as event_file:
self.event = json.loads(event_file.read())
def read_meta_data(self):
"""
Get meta data from event information
"""
self.repo_full_name = self.event["repository"]["full_name"]
pull_request = self.event.get("pull_request")
print("%r", self.event)
if pull_request:
self.head_sha = pull_request["head"]["sha"]
else:
print("%r", self.event)
check_suite = self.event.get("check_suite", None)
if check_suite is not None:
self.head_sha = check_suite["pull_requests"][0]["base"]["sha"]
else:
self.head_sha = None # Can't annotate?
def submit( # pylint: disable=too-many-arguments
self,
notices,
title=None,
summary=None,
text=None,
conclusion=None,
):
"""
Submit annotations to github
See:
https://docs.github.com/en/rest/checks/runs?apiVersion=2022-11-28
#update-a-check-run
:param conclusion: success, failure
"""
# pylint: disable=import-outside-toplevel
import requests # Import here to not impose presence of module
if self.head_sha is None:
return
output = {
"annotations": notices[: CheckRun.MAX_ANNOTATIONS],
}
if title is not None:
output["title"] = title
if summary is not None:
output["summary"] = summary
if text is not None:
output["text"] = text
if conclusion is None:
# action_required, cancelled, failure, neutral, success
# skipped, stale, timed_out
if bool(notices):
conclusion = "failure"
else:
conclusion = "success"
payload = {
"name": "log-to-pr-annotation",
"head_sha": self.head_sha,
"status": "completed", # queued, in_progress, completed
"conclusion": conclusion,
# "started_at": dt.datetime.now(dt.timezone.utc).isoformat(),
"completed_at": dt.datetime.now(dt.timezone.utc).isoformat(),
"output": output,
}
# Create the check-run
response = requests.post(
f"{self.URI}/repos/{self.repo_full_name}/check-runs",
headers={
"Accept": self.ACCEPT_HEADER_VALUE,
"Authorization": self.AUTH_HEADER_VALUE,
"X-GitHub-Api-Version": self.API_VERSION,
},
json=payload,
timeout=30,
)
print(response.content)
response.raise_for_status()
ANY_REGEX = r".*?"
FILE_REGEX = r"\s*(?P<file_name>\S.*?)\s*?"
FILEGROUP_REGEX = r"\s*(?P<file_group>\S.*?)\s*?"
EOL_REGEX = r"[\r\n]"
LINE_REGEX = r"\s*(?P<line>\d+?)\s*?"
COLUMN_REGEX = r"\s*(?P<column>\d+?)\s*?"
SEVERITY_REGEX = r"\s*(?P<severity>error|warning|notice|style|info)\s*?"
MSG_REGEX = r"\s*(?P<message>.+?)\s*?"
MULTILINE_MSG_REGEX = r"\s*(?P<message>(?:.|.[\r\n])+)"
# cpplint confidence index
CONFIDENCE_REGEX = r"\s*\[(?P<confidence>\d+)\]\s*?"
# List of message patterns, add more specific patterns earlier in the list
# Creating patterns by using constants makes them easier to define and read.
PATTERNS = [
# beautysh
# File ftp.sh: error: "esac" before "case" in line 90.
re.compile(
f"^File {FILE_REGEX}:{SEVERITY_REGEX}:"
f" {MSG_REGEX} in line {LINE_REGEX}.$"
),
# beautysh
# File socks4echo.sh: error: indent/outdent mismatch: -2.
re.compile(f"^File {FILE_REGEX}:{SEVERITY_REGEX}: {MSG_REGEX}$"),
# yamllint
# ##[group].pre-commit-config.yaml
# ##[error]97:14 [trailing-spaces] trailing spaces
# ##[endgroup]
re.compile(rf"^##\[group\]{FILEGROUP_REGEX}$"), # Start file group
re.compile(
rf"^##\[{SEVERITY_REGEX}\]{LINE_REGEX}:{COLUMN_REGEX}{MSG_REGEX}$"
), # Msg
re.compile(r"^##(?P<file_endgroup>\[endgroup\])$"), # End file group
# File socks4echo.sh: error: indent/outdent mismatch: -2.
re.compile(f"^File {FILE_REGEX}:{SEVERITY_REGEX}: {MSG_REGEX}$"),
# Emacs style
# path/to/file:845:5: error - Expected 1 space after closing brace
re.compile(
rf"^{FILE_REGEX}:{LINE_REGEX}:{COLUMN_REGEX}:{SEVERITY_REGEX}"
rf"-?\s{MSG_REGEX}$"
),
# ESLint (JavaScript Linter), RoboCop, shellcheck
# path/to/file.js:10:2: Some linting issue
# path/to/file.rb:10:5: Style/Indentation: Incorrect indentation detected
# path/to/script.sh:10:1: SC2034: Some shell script issue
re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{COLUMN_REGEX}: {MSG_REGEX}$"),
# Cpplint default output:
# '%s:%s: %s [%s] [%d]\n'
# % (filename, linenum, message, category, confidence)
re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{MSG_REGEX}{CONFIDENCE_REGEX}$"),
# MSVC
# file.cpp(10): error C1234: Some error message
re.compile(
f"^{FILE_REGEX}\\({LINE_REGEX}\\):{SEVERITY_REGEX}{MSG_REGEX}$"
),
# Java compiler
# File.java:10: error: Some error message
re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{SEVERITY_REGEX}:{MSG_REGEX}$"),
# Python
# File ".../logToCs.py", line 90 (note: code line follows)
re.compile(f'^File "{FILE_REGEX}", line {LINE_REGEX}$'),
# Pylint, others
# path/to/file.py:10: [C0111] Missing docstring
# others
re.compile(f"^{FILE_REGEX}:{LINE_REGEX}: {MSG_REGEX}$"),
# Shellcheck:
# In script.sh line 76:
re.compile(
f"^In {FILE_REGEX} line {LINE_REGEX}:{EOL_REGEX}?"
f"({MULTILINE_MSG_REGEX})?{EOL_REGEX}{EOL_REGEX}"
),
# eslint:
# /path/to/filename
# 14:5 error Unexpected trailing comma comma-dangle
re.compile(
f"^{FILE_REGEX}{EOL_REGEX}"
rf"\s+{LINE_REGEX}:{COLUMN_REGEX}\s+{SEVERITY_REGEX}\s+{MSG_REGEX}$"
),
]
# Exceptionnaly some regexes match messages that are not error.
# This pattern matches those exceptions
EXCLUDE_MSG_PATTERN = re.compile(
r"^("
r"Placeholder pattern" # To remove on first message pattern
r")"
)
# Exceptionnaly some regexes match messages that are not error.
# This pattern matches those exceptions
EXCLUDE_FILE_PATTERN = re.compile(
r"^("
# Codespell: (appears as a file name):
r"Used config files\b"
r")"
)
# Severities available in CodeSniffer report format
SEVERITY_NOTICE = "notice"
SEVERITY_WARNING = "warning"
SEVERITY_ERROR = "error"
def strip_ansi(text: str):
"""
Strip ANSI escape sequences from string (colors, etc)
"""
return re.sub(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])", "", text)
def parse_file(text):
"""
Parse all messages in a file
Returns the fields in a dict.
"""
# pylint: disable=too-many-branches,too-many-statements
# regex required to allow same group names
try:
import regex # pylint: disable=import-outside-toplevel
except ImportError as exc:
raise ImportError(
"The 'parsefile' method requires 'python -m pip install regex'"
) from exc
patterns = [pattern.pattern for pattern in PATTERNS]
# patterns = [PATTERNS[0].pattern]
file_group = None # The file name for the group (if any)
full_regex = "(?:(?:" + (")|(?:".join(patterns)) + "))"
results = []
for fields in regex.finditer(
full_regex, strip_ansi(text), regex.MULTILINE | regex.IGNORECASE
):
if not fields:
continue
result = fields.groupdict()
if len(result) == 0:
continue
severity = result.get("severity", None)
file_name = result.get("file_name", None)
confidence = result.pop("confidence", None)
new_file_group = result.pop("file_group", None)
file_endgroup = result.pop("file_endgroup", None)
message = result.get("message", None)
if new_file_group is not None:
# Start of file_group, just store file
file_group = new_file_group
continue
if file_endgroup is not None:
file_group = None
continue
if file_name is None:
if file_group is not None:
file_name = file_group
result["file_name"] = file_name
else:
# No filename, skip
continue
else:
if EXCLUDE_FILE_PATTERN.search(file_name):
# This file_name is excluded
continue
if message is not None:
if EXCLUDE_MSG_PATTERN.search(message):
# This message is excluded
continue
if confidence is not None:
# Convert confidence level of cpplint
# to warning, etc.
confidence = int(confidence)
if confidence <= 1:
severity = SEVERITY_NOTICE
elif confidence >= 5:
severity = SEVERITY_ERROR
else:
severity = SEVERITY_WARNING
if severity is None:
severity = SEVERITY_ERROR
else:
severity = severity.lower()
if severity in ["info", "style"]:
severity = SEVERITY_NOTICE
result["severity"] = severity
results.append(result)
return results
def parse_message(message):
"""
Parse message until it matches a pattern.
Returns the fields in a dict.
"""
for pattern in PATTERNS:
fields = pattern.match(message, re.IGNORECASE)
if not fields:
continue
result = fields.groupdict()
if len(result) == 0:
continue
if "confidence" in result:
# Convert confidence level of cpplint
# to warning, etc.
confidence = int(result["confidence"])
del result["confidence"]
if confidence <= 1:
severity = SEVERITY_NOTICE
elif confidence >= 5:
severity = SEVERITY_ERROR
else:
severity = SEVERITY_WARNING
result["severity"] = severity
if "severity" not in result:
result["severity"] = SEVERITY_ERROR
else:
result["severity"] = result["severity"].lower()
if result["severity"] in ["info", "style"]:
result["severity"] = SEVERITY_NOTICE
return result
# Nothing matched
return None
def add_error_entry( # pylint: disable=too-many-arguments
root,
severity,
file_name,
line=None,
column=None,
message=None,
source=None,
root_path=None,
):
"""
Add error information to the CheckStyle output being created.
"""
file_element = find_or_create_file_element(
root, file_name, root_path=root_path
)
error_element = ET.SubElement(file_element, "error")
error_element.set("severity", severity)
if line:
error_element.set("line", line)
if column:
error_element.set("column", column)
if message:
error_element.set("message", message)
if source:
# To verify if this is a valid attribute
error_element.set("source", source)
def find_or_create_file_element(root, file_name: str, root_path=None):
"""
Find/create file element in XML document tree.
"""
if root_path is not None:
file_name = remove_prefix(file_name, root_path)
for file_element in root.findall("file"):
if file_element.get("name") == file_name:
return file_element
file_element = ET.SubElement(root, "file")
file_element.set("name", file_name)
return file_element
def main():
"""
Parse the script arguments and get the conversion done.
"""
parser = argparse.ArgumentParser(
description="Convert messages to Checkstyle XML format."
)
parser.add_argument(
"input", help="Input file. Use '-' for stdin.", nargs="?", default="-"
)
parser.add_argument(
"output",
help="Output file. Use '-' for stdout.",
nargs="?",
default="-",
)
parser.add_argument(
"-i",
"--in",
dest="input_named",
help="Input filename. Overrides positional input.",
)
parser.add_argument(
"-o",
"--out",
dest="output_named",
help="Output filename. Overrides positional output.",
)
parser.add_argument(
"--root",
metavar="ROOT_PATH",
help="Root directory to remove from file paths."
" Defaults to working directory.",
default=os.getcwd(),
)
parser.add_argument(
"--github-annotate",
action=argparse.BooleanOptionalAction,
help="Annotate when in Github workflow.",
# Currently disabled,
# Future: (os.environ.get("GITHUB_EVENT_PATH", None) is not None),
default=False,
)
args = parser.parse_args()
if args.input == "-" and args.input_named:
with open(
args.input_named, encoding="utf_8", errors="surrogateescape"
) as input_file:
text = input_file.read()
elif args.input != "-":
with open(
args.input, encoding="utf_8", errors="surrogateescape"
) as input_file:
text = input_file.read()
else:
text = sys.stdin.read()
root_path = os.path.join(args.root, "")
try:
notices = convert_text_to_notices(text)
except ImportError:
notices = convert_lines_to_notices(re.split(r"[\r\n]+", text))
checkstyle_xml = convert_notices_to_checkstyle(
notices, root_path=root_path
)
if args.output == "-" and args.output_named:
with open(args.output_named, "w", encoding="utf_8") as output_file:
output_file.write(checkstyle_xml)
elif args.output != "-":
with open(args.output, "w", encoding="utf_8") as output_file:
output_file.write(checkstyle_xml)
else:
print(checkstyle_xml)
if args.github_annotate:
checkrun = CheckRun()
checkrun.submit(notices)
if __name__ == "__main__":
main()

View File

@ -7,7 +7,6 @@ jobs:
pre-commit:
runs-on: ubuntu-latest
env:
LOG_TO_CS: .github/logToCs.py
RAW_LOG: pre-commit.log
CS_XML: pre-commit.xml
steps:
@ -37,7 +36,7 @@ jobs:
with:
cache: pip
python-version: "3.11"
- run: python -m pip install pre-commit regex
- run: python -m pip install pre-commit
# Restore previous cache of precommit
- uses: actions/cache/restore@v4
with:
@ -102,19 +101,12 @@ jobs:
pre-commit run --hook-stage manual -a php-cs-with-cache | tee -a ${RAW_LOG}
ls -l ~/.cache/pre-commit/
# If error, we convert log in the checkstyle format
- name: Convert Raw Log to CheckStyle format
if: ${{ failure() }}
run: |
python ${LOG_TO_CS} ${RAW_LOG} ${CS_XML}
# Annotate the git sources with the log messages
- name: Annotate Source Code with Messages
uses: staabm/annotate-pull-request-from-checkstyle-action@v1
- name: Convert Raw Log to Annotations
uses: mdeweerd/logToCheckStyle@v2024.2.9
if: ${{ failure() }}
with:
files: ${{ env.CS_XML }}
notices-as-warnings: true # optional
prepend-filename: true # optional
in: ${{ env.RAW_LOG }}
# Save the precommit cache
- uses: actions/cache/save@v4
if: ${{ ! cancelled() }}