From f166f15f7d8dfd15ae2e53fec92935e4b815c6ab Mon Sep 17 00:00:00 2001
From: MDW <mdeweerd@users.noreply.github.com>
Date: Mon, 4 Mar 2024 18:21:32 +0100
Subject: [PATCH] Qual: Backport/pre commit / log annotation simplification
 (#28614)

* Qual: ci: Run pre-commit/php-cs with cache (#28079)

This adds a hook to .pre-commit-config.yaml and updates the workflow
to run php-cs with cache when it is run for all files.
When running on changed files only, the cache is not useful.

The php-codesniffer ruleset.xml was cleaned up (duplicates removal/formatted)

* Fix: Make all 'relative paths' absolute (#28196)

# Fix: Make all 'relative paths' absolute

The phpcs ruleset xml file's relative exclude patterns are relative to
the filename(s) provided on the command line.
Hence with partial verifications, the path exclusion does not function
as we would like.
Removing the relative-path attribute from the patterns the exclusion
works.
At the same time, the patterns were optimized and a comment was added.

* Qual: Optimize workflow (#28386)

# Qual: Optimize workflow

The log annotation based on the pre-commit logs is now simplified.
---
 .github/logToCs.py               | 588 -------------------------------
 .github/workflows/pre-commit.yml |  18 +-
 2 files changed, 5 insertions(+), 601 deletions(-)
 delete mode 100755 .github/logToCs.py

diff --git a/.github/logToCs.py b/.github/logToCs.py
deleted file mode 100755
index 7befd310a0a..00000000000
--- a/.github/logToCs.py
+++ /dev/null
@@ -1,588 +0,0 @@
-#!/usr/bin/env python3
-# pylint: disable=invalid-name
-"""
-Convert a log to CheckStyle format.
-
-Url: https://github.com/mdeweerd/LogToCheckStyle
-
-The log can then be used for generating annotations in a github action.
-
-Note: this script is very young and "quick and dirty".
-      Patterns can be added to "PATTERNS" to match more messages.
-
-# Examples
-
-Assumes that logToCs.py is available as .github/logToCs.py.
-
-## Example 1:
-
-
-```yaml
-      - run: |
-          pre-commit run -all-files | tee pre-commit.log
-          .github/logToCs.py pre-commit.log pre-commit.xml
-      - uses: staabm/annotate-pull-request-from-checkstyle-action@v1
-        with:
-          files: pre-commit.xml
-          notices-as-warnings: true # optional
-```
-
-## Example 2:
-
-
-```yaml
-      - run: |
-          pre-commit run --all-files | tee pre-commit.log
-      - name: Add results to PR
-        if: ${{ always() }}
-        run: |
-          .github/logToCs.py pre-commit.log | cs2pr
-```
-
-Author(s):
-  - https://github.com/mdeweerd
-
-License: MIT License
-
-"""
-
-import argparse
-import datetime as dt
-import json
-import os
-import re
-import sys
-import xml.etree.ElementTree as ET  # nosec
-
-
-def remove_prefix(string, prefix):
-    """
-    Remove prefix from string
-
-    Provided for backward compatibility.
-    """
-    if prefix and string.startswith(prefix):
-        return string[len(prefix) :]
-    return string
-
-
-def convert_notices_to_checkstyle(notices, root_path=None):
-    """
-    Convert annotation list to CheckStyle xml string
-    """
-    root = ET.Element("checkstyle")
-    for fields in notices:
-        add_error_entry(root, **fields, root_path=root_path)
-    return ET.tostring(root, encoding="utf_8").decode("utf_8")
-
-
-def convert_lines_to_notices(lines):
-    """
-    Convert provided message to CheckStyle format.
-    """
-    notices = []
-    for line in lines:
-        fields = parse_message(line)
-        if fields:
-            notices.append(fields)
-    return notices
-
-
-def convert_text_to_notices(text):
-    """
-    Convert provided message to CheckStyle format.
-    """
-    return parse_file(text)
-
-
-# Initial version for Checkrun from:
-# https://github.com/tayfun/flake8-your-pr/blob/50a175cde4dd26a656734c5b64ba1e5bb27151cb/src/main.py#L7C1-L123C36
-# MIT Licence
-class CheckRun:
-    """
-    Represents the check run
-    """
-
-    GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", None)
-    GITHUB_EVENT_PATH = os.environ.get("GITHUB_EVENT_PATH", None)
-
-    URI = "https://api.github.com"
-    API_VERSION = "2022-11-28"
-    ACCEPT_HEADER_VALUE = "application/vnd.github+json"
-    AUTH_HEADER_VALUE = f"Bearer {GITHUB_TOKEN}"
-    # This is the max annotations Github API accepts in one go.
-    MAX_ANNOTATIONS = 50
-
-    def __init__(self):
-        """
-        Initialise Check Run object with information from checkrun
-        """
-        self.read_event_file()
-        self.read_meta_data()
-
-    def read_event_file(self):
-        """
-        Read the event file to get the event information later.
-        """
-        if self.GITHUB_EVENT_PATH is None:
-            raise ValueError("Not running in github workflow")
-        with open(self.GITHUB_EVENT_PATH, encoding="utf_8") as event_file:
-            self.event = json.loads(event_file.read())
-
-    def read_meta_data(self):
-        """
-        Get meta data from event information
-        """
-        self.repo_full_name = self.event["repository"]["full_name"]
-        pull_request = self.event.get("pull_request")
-        print("%r", self.event)
-        if pull_request:
-            self.head_sha = pull_request["head"]["sha"]
-        else:
-            print("%r", self.event)
-            check_suite = self.event.get("check_suite", None)
-            if check_suite is not None:
-                self.head_sha = check_suite["pull_requests"][0]["base"]["sha"]
-            else:
-                self.head_sha = None  # Can't annotate?
-
-    def submit(  # pylint: disable=too-many-arguments
-        self,
-        notices,
-        title=None,
-        summary=None,
-        text=None,
-        conclusion=None,
-    ):
-        """
-        Submit annotations to github
-
-        See:
-        https://docs.github.com/en/rest/checks/runs?apiVersion=2022-11-28
-              #update-a-check-run
-
-        :param conclusion: success, failure
-        """
-        # pylint: disable=import-outside-toplevel
-        import requests  # Import here to not impose presence of module
-
-        if self.head_sha is None:
-            return
-
-        output = {
-            "annotations": notices[: CheckRun.MAX_ANNOTATIONS],
-        }
-        if title is not None:
-            output["title"] = title
-        if summary is not None:
-            output["summary"] = summary
-        if text is not None:
-            output["text"] = text
-        if conclusion is None:
-            # action_required, cancelled, failure, neutral, success
-            # skipped, stale, timed_out
-            if bool(notices):
-                conclusion = "failure"
-            else:
-                conclusion = "success"
-
-        payload = {
-            "name": "log-to-pr-annotation",
-            "head_sha": self.head_sha,
-            "status": "completed",  # queued, in_progress, completed
-            "conclusion": conclusion,
-            # "started_at": dt.datetime.now(dt.timezone.utc).isoformat(),
-            "completed_at": dt.datetime.now(dt.timezone.utc).isoformat(),
-            "output": output,
-        }
-
-        # Create the check-run
-        response = requests.post(
-            f"{self.URI}/repos/{self.repo_full_name}/check-runs",
-            headers={
-                "Accept": self.ACCEPT_HEADER_VALUE,
-                "Authorization": self.AUTH_HEADER_VALUE,
-                "X-GitHub-Api-Version": self.API_VERSION,
-            },
-            json=payload,
-            timeout=30,
-        )
-        print(response.content)
-        response.raise_for_status()
-
-
-ANY_REGEX = r".*?"
-FILE_REGEX = r"\s*(?P<file_name>\S.*?)\s*?"
-FILEGROUP_REGEX = r"\s*(?P<file_group>\S.*?)\s*?"
-EOL_REGEX = r"[\r\n]"
-LINE_REGEX = r"\s*(?P<line>\d+?)\s*?"
-COLUMN_REGEX = r"\s*(?P<column>\d+?)\s*?"
-SEVERITY_REGEX = r"\s*(?P<severity>error|warning|notice|style|info)\s*?"
-MSG_REGEX = r"\s*(?P<message>.+?)\s*?"
-MULTILINE_MSG_REGEX = r"\s*(?P<message>(?:.|.[\r\n])+)"
-# cpplint confidence index
-CONFIDENCE_REGEX = r"\s*\[(?P<confidence>\d+)\]\s*?"
-
-
-# List of message patterns, add more specific patterns earlier in the list
-# Creating patterns by using constants makes them easier to define and read.
-PATTERNS = [
-    # beautysh
-    #  File ftp.sh: error: "esac" before "case" in line 90.
-    re.compile(
-        f"^File {FILE_REGEX}:{SEVERITY_REGEX}:"
-        f" {MSG_REGEX} in line {LINE_REGEX}.$"
-    ),
-    # beautysh
-    #  File socks4echo.sh: error: indent/outdent mismatch: -2.
-    re.compile(f"^File {FILE_REGEX}:{SEVERITY_REGEX}: {MSG_REGEX}$"),
-    # yamllint
-    # ##[group].pre-commit-config.yaml
-    # ##[error]97:14 [trailing-spaces] trailing spaces
-    # ##[endgroup]
-    re.compile(rf"^##\[group\]{FILEGROUP_REGEX}$"),  # Start file group
-    re.compile(
-        rf"^##\[{SEVERITY_REGEX}\]{LINE_REGEX}:{COLUMN_REGEX}{MSG_REGEX}$"
-    ),  # Msg
-    re.compile(r"^##(?P<file_endgroup>\[endgroup\])$"),  # End file group
-    #  File socks4echo.sh: error: indent/outdent mismatch: -2.
-    re.compile(f"^File {FILE_REGEX}:{SEVERITY_REGEX}: {MSG_REGEX}$"),
-    # Emacs style
-    #  path/to/file:845:5: error - Expected 1 space after closing brace
-    re.compile(
-        rf"^{FILE_REGEX}:{LINE_REGEX}:{COLUMN_REGEX}:{SEVERITY_REGEX}"
-        rf"-?\s{MSG_REGEX}$"
-    ),
-    # ESLint (JavaScript Linter), RoboCop, shellcheck
-    #  path/to/file.js:10:2: Some linting issue
-    #  path/to/file.rb:10:5: Style/Indentation: Incorrect indentation detected
-    #  path/to/script.sh:10:1: SC2034: Some shell script issue
-    re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{COLUMN_REGEX}: {MSG_REGEX}$"),
-    # Cpplint default output:
-    #           '%s:%s:  %s  [%s] [%d]\n'
-    #   % (filename, linenum, message, category, confidence)
-    re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{MSG_REGEX}{CONFIDENCE_REGEX}$"),
-    # MSVC
-    # file.cpp(10): error C1234: Some error message
-    re.compile(
-        f"^{FILE_REGEX}\\({LINE_REGEX}\\):{SEVERITY_REGEX}{MSG_REGEX}$"
-    ),
-    # Java compiler
-    # File.java:10: error: Some error message
-    re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{SEVERITY_REGEX}:{MSG_REGEX}$"),
-    # Python
-    # File ".../logToCs.py", line 90 (note: code line follows)
-    re.compile(f'^File "{FILE_REGEX}", line {LINE_REGEX}$'),
-    # Pylint, others
-    # path/to/file.py:10: [C0111] Missing docstring
-    # others
-    re.compile(f"^{FILE_REGEX}:{LINE_REGEX}: {MSG_REGEX}$"),
-    # Shellcheck:
-    # In script.sh line 76:
-    re.compile(
-        f"^In {FILE_REGEX} line {LINE_REGEX}:{EOL_REGEX}?"
-        f"({MULTILINE_MSG_REGEX})?{EOL_REGEX}{EOL_REGEX}"
-    ),
-    # eslint:
-    #  /path/to/filename
-    #    14:5  error  Unexpected trailing comma  comma-dangle
-    re.compile(
-        f"^{FILE_REGEX}{EOL_REGEX}"
-        rf"\s+{LINE_REGEX}:{COLUMN_REGEX}\s+{SEVERITY_REGEX}\s+{MSG_REGEX}$"
-    ),
-]
-
-# Exceptionnaly some regexes match messages that are not error.
-# This pattern matches those exceptions
-EXCLUDE_MSG_PATTERN = re.compile(
-    r"^("
-    r"Placeholder pattern"  # To remove on first message pattern
-    r")"
-)
-
-# Exceptionnaly some regexes match messages that are not error.
-# This pattern matches those exceptions
-EXCLUDE_FILE_PATTERN = re.compile(
-    r"^("
-    # Codespell:  (appears as a file name):
-    r"Used config files\b"
-    r")"
-)
-
-# Severities available in CodeSniffer report format
-SEVERITY_NOTICE = "notice"
-SEVERITY_WARNING = "warning"
-SEVERITY_ERROR = "error"
-
-
-def strip_ansi(text: str):
-    """
-    Strip ANSI escape sequences from string (colors, etc)
-    """
-    return re.sub(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])", "", text)
-
-
-def parse_file(text):
-    """
-    Parse all messages in a file
-
-    Returns the fields in a dict.
-    """
-    # pylint: disable=too-many-branches,too-many-statements
-    # regex required to allow same group names
-    try:
-        import regex  # pylint: disable=import-outside-toplevel
-    except ImportError as exc:
-        raise ImportError(
-            "The 'parsefile' method requires 'python -m pip install regex'"
-        ) from exc
-
-    patterns = [pattern.pattern for pattern in PATTERNS]
-    # patterns = [PATTERNS[0].pattern]
-
-    file_group = None  # The file name for the group (if any)
-    full_regex = "(?:(?:" + (")|(?:".join(patterns)) + "))"
-    results = []
-
-    for fields in regex.finditer(
-        full_regex, strip_ansi(text), regex.MULTILINE | regex.IGNORECASE
-    ):
-        if not fields:
-            continue
-        result = fields.groupdict()
-
-        if len(result) == 0:
-            continue
-
-        severity = result.get("severity", None)
-        file_name = result.get("file_name", None)
-        confidence = result.pop("confidence", None)
-        new_file_group = result.pop("file_group", None)
-        file_endgroup = result.pop("file_endgroup", None)
-        message = result.get("message", None)
-
-        if new_file_group is not None:
-            # Start of file_group, just store file
-            file_group = new_file_group
-            continue
-
-        if file_endgroup is not None:
-            file_group = None
-            continue
-
-        if file_name is None:
-            if file_group is not None:
-                file_name = file_group
-                result["file_name"] = file_name
-            else:
-                # No filename, skip
-                continue
-        else:
-            if EXCLUDE_FILE_PATTERN.search(file_name):
-                # This file_name is excluded
-                continue
-
-        if message is not None:
-            if EXCLUDE_MSG_PATTERN.search(message):
-                # This message is excluded
-                continue
-
-        if confidence is not None:
-            # Convert confidence level of cpplint
-            # to warning, etc.
-            confidence = int(confidence)
-
-            if confidence <= 1:
-                severity = SEVERITY_NOTICE
-            elif confidence >= 5:
-                severity = SEVERITY_ERROR
-            else:
-                severity = SEVERITY_WARNING
-
-        if severity is None:
-            severity = SEVERITY_ERROR
-        else:
-            severity = severity.lower()
-
-        if severity in ["info", "style"]:
-            severity = SEVERITY_NOTICE
-
-        result["severity"] = severity
-
-        results.append(result)
-
-    return results
-
-
-def parse_message(message):
-    """
-    Parse message until it matches a pattern.
-
-    Returns the fields in a dict.
-    """
-    for pattern in PATTERNS:
-        fields = pattern.match(message, re.IGNORECASE)
-        if not fields:
-            continue
-        result = fields.groupdict()
-        if len(result) == 0:
-            continue
-
-        if "confidence" in result:
-            # Convert confidence level of cpplint
-            # to warning, etc.
-            confidence = int(result["confidence"])
-            del result["confidence"]
-
-            if confidence <= 1:
-                severity = SEVERITY_NOTICE
-            elif confidence >= 5:
-                severity = SEVERITY_ERROR
-            else:
-                severity = SEVERITY_WARNING
-            result["severity"] = severity
-
-        if "severity" not in result:
-            result["severity"] = SEVERITY_ERROR
-        else:
-            result["severity"] = result["severity"].lower()
-
-        if result["severity"] in ["info", "style"]:
-            result["severity"] = SEVERITY_NOTICE
-
-        return result
-
-    # Nothing matched
-    return None
-
-
-def add_error_entry(  # pylint: disable=too-many-arguments
-    root,
-    severity,
-    file_name,
-    line=None,
-    column=None,
-    message=None,
-    source=None,
-    root_path=None,
-):
-    """
-    Add error information to the CheckStyle output being created.
-    """
-    file_element = find_or_create_file_element(
-        root, file_name, root_path=root_path
-    )
-    error_element = ET.SubElement(file_element, "error")
-    error_element.set("severity", severity)
-    if line:
-        error_element.set("line", line)
-    if column:
-        error_element.set("column", column)
-    if message:
-        error_element.set("message", message)
-    if source:
-        # To verify if this is a valid attribute
-        error_element.set("source", source)
-
-
-def find_or_create_file_element(root, file_name: str, root_path=None):
-    """
-    Find/create file element in XML document tree.
-    """
-
-    if root_path is not None:
-        file_name = remove_prefix(file_name, root_path)
-    for file_element in root.findall("file"):
-        if file_element.get("name") == file_name:
-            return file_element
-    file_element = ET.SubElement(root, "file")
-    file_element.set("name", file_name)
-    return file_element
-
-
-def main():
-    """
-    Parse the script arguments and get the conversion done.
-    """
-    parser = argparse.ArgumentParser(
-        description="Convert messages to Checkstyle XML format."
-    )
-    parser.add_argument(
-        "input", help="Input file. Use '-' for stdin.", nargs="?", default="-"
-    )
-    parser.add_argument(
-        "output",
-        help="Output file. Use '-' for stdout.",
-        nargs="?",
-        default="-",
-    )
-    parser.add_argument(
-        "-i",
-        "--in",
-        dest="input_named",
-        help="Input filename. Overrides positional input.",
-    )
-    parser.add_argument(
-        "-o",
-        "--out",
-        dest="output_named",
-        help="Output filename. Overrides positional output.",
-    )
-    parser.add_argument(
-        "--root",
-        metavar="ROOT_PATH",
-        help="Root directory to remove from file paths."
-        "  Defaults to working directory.",
-        default=os.getcwd(),
-    )
-    parser.add_argument(
-        "--github-annotate",
-        action=argparse.BooleanOptionalAction,
-        help="Annotate when in Github workflow.",
-        # Currently disabled,
-        #  Future: (os.environ.get("GITHUB_EVENT_PATH", None) is not None),
-        default=False,
-    )
-
-    args = parser.parse_args()
-
-    if args.input == "-" and args.input_named:
-        with open(
-            args.input_named, encoding="utf_8", errors="surrogateescape"
-        ) as input_file:
-            text = input_file.read()
-    elif args.input != "-":
-        with open(
-            args.input, encoding="utf_8", errors="surrogateescape"
-        ) as input_file:
-            text = input_file.read()
-    else:
-        text = sys.stdin.read()
-
-    root_path = os.path.join(args.root, "")
-
-    try:
-        notices = convert_text_to_notices(text)
-    except ImportError:
-        notices = convert_lines_to_notices(re.split(r"[\r\n]+", text))
-
-    checkstyle_xml = convert_notices_to_checkstyle(
-        notices, root_path=root_path
-    )
-
-    if args.output == "-" and args.output_named:
-        with open(args.output_named, "w", encoding="utf_8") as output_file:
-            output_file.write(checkstyle_xml)
-    elif args.output != "-":
-        with open(args.output, "w", encoding="utf_8") as output_file:
-            output_file.write(checkstyle_xml)
-    else:
-        print(checkstyle_xml)
-
-    if args.github_annotate:
-        checkrun = CheckRun()
-        checkrun.submit(notices)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index d0cdcb90ef3..1ae4d3387f8 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -7,7 +7,6 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     env:
-      LOG_TO_CS: .github/logToCs.py
       RAW_LOG: pre-commit.log
       CS_XML: pre-commit.xml
     steps:
@@ -37,7 +36,7 @@ jobs:
         with:
           cache: pip
           python-version: "3.11"
-      - run: python -m pip install pre-commit regex
+      - run: python -m pip install pre-commit
       # Restore previous cache of precommit
       - uses: actions/cache/restore@v4
         with:
@@ -102,19 +101,12 @@ jobs:
           pre-commit run --hook-stage manual -a php-cs-with-cache | tee -a ${RAW_LOG}
           ls -l ~/.cache/pre-commit/
 
-      # If error, we convert log in the checkstyle format
-      - name: Convert Raw Log to CheckStyle format
-        if: ${{ failure() }}
-        run: |
-          python ${LOG_TO_CS} ${RAW_LOG} ${CS_XML}
-      # Annotate the git sources with the log messages
-      - name: Annotate Source Code with Messages
-        uses: staabm/annotate-pull-request-from-checkstyle-action@v1
+      - name: Convert Raw Log to Annotations
+        uses: mdeweerd/logToCheckStyle@v2024.2.9
         if: ${{ failure() }}
         with:
-          files: ${{ env.CS_XML }}
-          notices-as-warnings: true # optional
-          prepend-filename: true # optional
+          in: ${{ env.RAW_LOG }}
+
       # Save the precommit cache
       - uses: actions/cache/save@v4
         if: ${{ ! cancelled() }}