NEW: Script for fast identification of missing/unused/duplicate translations

# NEW: Script for fast identification of missing/unused/duplicate translations

Fast analysis of files to identify missing, unused and duplicate translations.
Fast = less than 3 seconds when there are no exceptions (on my development machine).
This commit is contained in:
MDW 2024-01-22 02:57:10 +01:00
parent ec1b8284c9
commit c23794a23c
No known key found for this signature in database
6 changed files with 5358 additions and 2 deletions

View File

@ -62,7 +62,9 @@ repos:
rev: v6.2.1
hooks:
- id: beautysh
exclude: (?x)^(dev/setup/git/hooks/pre-commit)$
exclude: |
(?x)^(dev/setup/git/hooks/pre-commit
)$
args: [--tab]
# Run local script
@ -77,6 +79,13 @@ repos:
# ```
- repo: local
hooks:
- name: Find missing/unused/duplicate language keys
id: check-translations
files: (?x)^(htdocs/langs/en_US/.*\.lang)
language: script
entry: ./dev/translation/sanity_check_trans_missing_unused.sh
pass_filenames: false
args: [list]
- id: local-precommit-script
name: Run local script before commit if it exists
language: system

View File

@ -0,0 +1,265 @@
# File with duplicate translation keys that are ignored for
# reporting duplicates.
# FYI: Headers generated on https://manytools.org/hacker-tools/ascii-banner/ - DOS Rebel or ANSI Shadow
# Can also use 'figlet' tool (installable with `apt-get install figlet`)
#
# ██████████
# ░░███░░░░░█
# ░███ █ ░ █████ █████ ████████ ██████ ████████ █████ ██████ █████
# ░██████ ░░███ ░░███ ░░███░░███ ███░░███░░███░░███ ███░░ ███░░███ ███░░
# ░███░░█ ░░░█████░ ░███ ░███░███████ ░███ ░███ ░░█████ ░███████ ░░█████
# ░███ ░ █ ███░░░███ ░███ ░███░███░░░ ░███ ░███ ░░░░███░███░░░ ░░░░███
# ██████████ █████ █████ ░███████ ░░██████ ████ █████ ██████ ░░██████ ██████
# ░░░░░░░░░░ ░░░░░ ░░░░░ ░███░░░ ░░░░░░ ░░░░ ░░░░░ ░░░░░░ ░░░░░░ ░░░░░░
# ░███
# █████
# ░░░░░
# Expenses and trips overlap quite a bit, which is by design
#
AUTHOR
AUTHORPAIEMENT
AddTrip
AllExpenseReport
AllExpenseReports
AnyOtherInThisListCanValidate
AttachTheNewLineToTheDocument
AucuneLigne
BrouillonnerTrip
CANCEL_USER
CarCategory
ClassifyRefunded
CompanyVisited
ConfirmBrouillonnerTrip
ConfirmCancelTrip
ConfirmCloneExpenseReport
ConfirmDeleteTrip
ConfirmPaidTrip
ConfirmRefuseTrip
ConfirmSaveTrip
ConfirmValideTrip
DATE_CANCEL
DATE_PAIEMENT
DATE_REFUS
DATE_SAVE
DefaultCategoryCar
DefaultRangeNumber
DeleteTrip
EX_BRE
EX_CAM
EX_CAM_VP
EX_CAR
EX_CUR
EX_DOC
EX_EMM
EX_FUE
EX_FUE_VP
EX_GUM
EX_HOT
EX_IND
EX_KME
EX_OTR
EX_PAR
EX_PAR_VP
EX_POS
EX_SUM
EX_SUO
EX_TAX
EX_TOL
EX_TOL_VP
ErrorBadValueForParameter
ErrorDoubleDeclaration
ErrorRecordNotFound
Error_EXPENSEREPORT_ADDON_NotDefined
ExpenseRangeOffset
ExpenseReportApplyTo
ExpenseReportApproved
ExpenseReportApprovedMessage
ExpenseReportCanceled
ExpenseReportCanceledMessage
ExpenseReportConstraintViolationError
ExpenseReportConstraintViolationWarning
ExpenseReportDateEnd
ExpenseReportDateStart
ExpenseReportDomain
ExpenseReportIkDesc
ExpenseReportLimitAmount
ExpenseReportLimitOn
ExpenseReportLine
ExpenseReportPaid
ExpenseReportPaidMessage
ExpenseReportPayment
ExpenseReportRef
ExpenseReportRefused
ExpenseReportRefusedMessage
ExpenseReportRestrictive
ExpenseReportRuleErrorOnSave
ExpenseReportRuleSave
ExpenseReportRulesDesc
ExpenseReportWaitingForApproval
ExpenseReportWaitingForApprovalMessage
ExpenseReportWaitingForReApproval
ExpenseReportWaitingForReApprovalMessage
ExpenseReportsIk
ExpenseReportsRules
ExpenseReportsToApprove
ExpenseReportsToPay
ExpensesArea
FeesKilometersOrAmout
LastExpenseReports
ListOfFees
ListOfTrips
ListToApprove
ListTripsAndExpenses
MOTIF_CANCEL
MOTIF_REFUS
ModePaiement
NOT_AUTHOR
NewTrip
NoTripsToExportCSV
OnExpense
PDFStandardExpenseReports
PaidTrip
REFUSEUR
RangeIk
RangeNum
SaveTrip
ShowExpenseReport
ShowTrip
TF_BUS
TF_CAR
TF_ESSENCE
TF_HOTEL
TF_LUNCH
TF_METRO
TF_OTHER
TF_PEAGE
TF_TAXI
TF_TRAIN
TF_TRIP
TripCard
TripId
TripNDF
TripSociete
Trips
TripsAndExpenses
TripsAndExpensesStatistics
TypeFees
UploadANewFileNow
VALIDATOR
VALIDOR
ValidateAndSubmit
ValidatedWaitingApproval
ValideTrip
byEX_DAY
byEX_EXP
byEX_MON
byEX_YEA
expenseReportCatDisabled
expenseReportCoef
expenseReportCoefUndefined
expenseReportOffset
expenseReportPrintExample
expenseReportRangeDisabled
expenseReportRangeFromTo
expenseReportRangeMoreThan
expenseReportTotalForFive
nolimitbyEX_DAY
nolimitbyEX_EXP
nolimitbyEX_MON
nolimitbyEX_YEA
#
# █████████ █████ ███
# ███░░░░░███ ░░███ ░░░
# ░███ ░░░ ███████ ████████ ████ ████████ ██████
# ░░█████████ ░░░███░ ░░███░░███░░███ ░░███░░███ ███░░███
# ░░░░░░░░███ ░███ ░███ ░░░ ░███ ░███ ░███░███████
# ███ ░███ ░███ ███ ░███ ░███ ░███ ░███░███░░░
# ░░█████████ ░░█████ █████ █████ ░███████ ░░██████
# ░░░░░░░░░ ░░░░░ ░░░░░ ░░░░░ ░███░░░ ░░░░░░
# ░███
# █████
# ░░░░░
# Stripe is similar to Paybox and has some keys in common
#
AccountParameter
CSSUrlForPaymentForm
Continue
Creditor
FollowingUrlAreAvailableToMakePayments
InformationToFindParameters
PaymentCode
PaymentForm
ThisIsInformationOnPayment
ThisScreenAllowsYouToPay
ToComplete
UsageParameter
WelcomeOnPaymentPage
YourEMail
#############################################################
#############################################################
AccountancyCode
AffectedTo
AvailableFormats
BIC
BankTransferAmount
Buy
ByDefaultInList
ByYear
CashDesk
ChooseFileToImport
ConfirmCloneAsk
ContractSigned
ContractStatusClosed
CreateUser
CreatedBy
Customer
CustomerInvoicePayment
DatabaseName
DatabaseServer
DeleteFromCat
DeleteType
DriverType
ExportableDatas
ExportsArea
History
IBAN
IdModule
InterventionSentByEMail
InvoiceRef
InvoiceSubtype
LanguageFile
LineId
ListOfStockMovements
Location
MinimumAmount
Movements
NewSubscription
NewUser
NoSupplierOrder
NoticePeriod
OrderWaiting
PriceFormatInCurrentLanguage
Prospect
Prospect
ReOpen
ReceptionClassifyClosedInDolibarr
Rejects
Salaries
Sell
Server
ShowCompany
ShowTask
ShowTypeCard
StatusInterInvoiced
StatusToPay
Stock
Stocks
SubscriptionPayment
Suppliers
Type
Unit
Upgrade
WithdrawalReceipt

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,212 @@
#!/bin/bash
#
# Find unused translations pretty fast...
#
# Principle:
#
# 1.Generate two files:
# - one for available translations keys,
# - one for expected keys.
# 2. Make the difference between the files.
#
# Find expected translation keys:
# 1. Find all occurrences that look like `->trans("` or `->trans('`
# with fast grep.
# 2. Split result to have only one '->trans(' on each line
# 3. Filter the text between the single or double quotes.
#
# Find available translation keys:
# 1. Get all strings before '=' token in the language files
#
# Notes:
# - Some side effects from translations on variables.
# - Some other minors side effects to be examined (#, %).
#
# Copyright (C) 2024 MDW <mdeweerd@users.noreply.github.com>
LANG_DIR=htdocs/langs/en_US/
MYDIR=$(dirname "$(realpath "$0")")
TMP=${TMP:=/tmp} # Most of the time defined on the system.
EXPECTED_FILE=${TMP}/expected_translations
AVAILABLE_FILE_NODEDUP=${TMP}/available_translations_no_dedup
AVAILABLE_FILE=${TMP}/available_translations
DUPLICATE_KEYS_FILE=${TMP}/duplicate_keys
DYNAMIC_KEYS_FILE=${TMP}/dynamic_keys
MISSING_AND_UNUSED_FILE=${TMP}/missing_and_unused
MISSING_FILE=${TMP}/missing
UNUSED_FILE=${TMP}/unused
EXPECTED_REGEX='(Country..|Language_.._..|MonthVeryShort\d\d|PaperFormat.*|ProfId\d(..)?|unit.*)'
DYNAMIC_KEYS_SRC_FILE=${MYDIR}/dynamic_translation_keys.lst
EXCLUDE_KEYS_SRC_FILE=${MYDIR}/ignore_translation_keys.lst
DUPLICATE_KEYS_SRC_FILE=${MYDIR}/duplicate_translation_keys.lst
# Grep options that are reused (normal grep)
GREP_OPTS=""
GREP_OPTS="${GREP_OPTS} --exclude=htdocs/theme/common/fontawe*/"
GREP_OPTS="${GREP_OPTS} --exclude-dir=.cache --exclude-dir=.git"
GREP_OPTS="${GREP_OPTS} --exclude=*.phar --exclude=*.webp --exclude=*.z"
GREP_OPTS="${GREP_OPTS} --exclude=*.sw? --exclude=*.json"
# Note: using 'git grep' to restrict to version controlled files
# and more flexible globbing.
# TODO/to ignore:
# transnoentities(), transnoentitiesnoconv(),
# formSetup->newItem()
exit_code=0
# Find all translations keys available in the language files (for the language)
grep --no-filename -r -oP -- '^([^#=]+?)(?=\s*=.*)' "${LANG_DIR}" \
| grep -x -v -F -f "${EXCLUDE_KEYS_SRC_FILE}" \
| sort > "${AVAILABLE_FILE_NODEDUP}"
sort -u \
< "${AVAILABLE_FILE_NODEDUP}" \
> "${AVAILABLE_FILE}"
# Combine strings found in sources with pre-determined dynamic string values.
## Build some regex strings to match translations
#
EXTRACT_STR=""
JOIN_STR=""
for t in '->trans' '->transnoentities' '->transnoentitiesnoconv' 'formSetup->newItem' ; do
MATCH_STR="$MATCH_STR$JOIN_STR$t"
EXTRACT_STR="$EXTRACT_STR$JOIN_STR(?<=${t}\\([\"'])([^\"']+)(?=[\"']\$)"
JOIN_STR="|"
done
{
# Find static strings that are translated in the sources (comments stripped)
# shellcheck disable=2086
# With std grep: `grep --no-filename -r ${GREP_OPTS} -- '->trans(' . `
# Using git grep avoiding to look into unversioned files
# transnoentitiesnoconv
git grep -h -r -P -- "${MATCH_STR}\\(" ':*.php' ':*.html' \
| sed 's@\(^#\|[^:]//\|/\*\|^\s*\*\).*@@' \
| sed 's@)\|\(['"'"'"]\)\(,\)@\1\n@g' \
| grep -aPo "$EXTRACT_STR(?=.$)"
# "Append" the list of strings that are used in dynamic expressions.
# (Fixed list: needs to be updated if the dynamic strings evolve.)
cat "${DYNAMIC_KEYS_SRC_FILE}"
} \
| grep -x -v -F -f "${EXCLUDE_KEYS_SRC_FILE}" \
| sort -u \
| grep -v -P '^(#|$)' \
> "${EXPECTED_FILE}"
# shellcheck disable=2050
if [ 0 = 1 ] ; then
# Find dynamic keys for call to trans.
# shellcheck disable=2086
grep --no-filename ${GREP_OPTS} -r -- '->trans(' . \
| tr ')' '\n' \
| grep -- '->trans(' \
| grep -v -P '(?<=->trans\(["'"'"'])([^"'"'"']*)(?=["'"'"'])' \
| grep -Po '(?<=->trans\()(.*)' \
| sort -u \
> "${DYNAMIC_KEYS_FILE}"
fi
# Produce reports on STDOUT.
# Some output is already compatible with message extraction for github annotation (logToCs.py)
# # Produce reports on STDOUT.
# Some output is already compatible with message extraction for github annotation (logToCs.py)
#
diff "${AVAILABLE_FILE}" "${EXPECTED_FILE}" \
| grep -E "^[<>]" \
| grep -v -P "^< ${EXPECTED_REGEX}$" \
| sort \
> "${MISSING_AND_UNUSED_FILE}"
if [ -s "${MISSING_AND_UNUSED_FILE}" ] ; then
echo "##[group]List Apparently Unused Translations (<) and Missing Translations (>)"
echo
echo "## :warning: Unused Translations may match ->trans(\$key.'SomeString')."
echo "## You can add such dynamic keys to $(basename "$DYNAMIC_KEYS_SRC_FILE")"
echo "## so that they are ignored for this report."
echo "## :warning: Unused Translations may be commented in the code"
echo "## You can add such 'disabled' keys to $(basename "$EXCLUDE_KEYS_SRC_FILE")"
echo "## so that they are ignored for this report."
echo
cat "${MISSING_AND_UNUSED_FILE}"
echo "##[endgroup]"
echo
fi
sed -n 's@< \(.*\)@^\1\\s*=@p' \
< "${MISSING_AND_UNUSED_FILE}" \
> "${UNUSED_FILE}.grep"
# Too many results, git grep is slow
#sed -n 's@> \(.*\)@trans.["'"'"']\1["'"'"'].@p' \
# < "${MISSING_AND_UNUSED_FILE}" \
# > "${MISSING_FILE}.grep"
#
# Prepare file with exact matches for use with `git grep`, supposing " quotes
#
REPL_STR=""
for t in trans transnoentities transnoentitiesnoconv ; do
REPL_STR="${REPL_STR}\n->${t}(\"\\1\","
REPL_STR="${REPL_STR}\n->${t}('\\1',"
REPL_STR="${REPL_STR}\n->${t}(\"\\1\")"
REPL_STR="${REPL_STR}\n->${t}('\\1')"
done
sed -n 's@> \(.*\)'"@${REPL_STR}@p" \
< "${MISSING_AND_UNUSED_FILE}" \
| grep -v -E '^$' \
> "${MISSING_FILE}.grep"
if [ -s "${UNUSED_FILE}.grep" ] ; then
exit_code=1
# Report unused translation in recognizable format
git grep -n --column -r -f "${UNUSED_FILE}.grep" -- "${LANG_DIR}"'/*.lang' \
| sort -t: -k 4 \
| sed 's@^\([^:]*:[^:]*:[^:]*:\)\s*@\1 Not used, translated; @'
fi
if [ -s "${MISSING_FILE}.grep" ] ; then
exit_code=1
# Report missing translation in recognizable format
git grep -n --column -r -F -f "${MISSING_FILE}.grep" -- ':*.php' ':*.html' \
| sort -t: -k 4 \
| sed 's@^\([^:]*:[^:]*:[^:]*:\)\s*@\1 Missing translation; @'
fi
diff "${AVAILABLE_FILE_NODEDUP}" "${AVAILABLE_FILE}" \
| grep -Po '(?<=^\< )(.*)$' \
| grep -x -v -F -f "${DUPLICATE_KEYS_SRC_FILE}" \
| sed 's/.*/^\0=/' \
> "${DUPLICATE_KEYS_FILE}"
if [ -s "${DUPLICATE_KEYS_FILE}" ] ; then
exit_code=1
echo
echo "##[group]List Duplicate Keys"
echo "## :warning:"
echo "## Duplicate keys may be expected across language files."
echo "## You may want to avoid them or they could be a copy/paste mistake."
echo "## You can add add valid duplicates to $(basename "$DUPLICATE_KEYS_SRC_FILE")"
echo "## so that they are ignored for this report."
cat "${DUPLICATE_KEYS_FILE}"
echo "##[endgroup]"
echo
git grep -n -r -f "${DUPLICATE_KEYS_FILE}" -- "${LANG_DIR}"'/*.lang' \
| sort -t: -k 3 \
| sed 's@^\([^:]*:[^:]*:\)\s*@\1 Is/Has duplicate @'
fi
exit $exit_code

View File

@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
# `codespell` can be run as a standalone program from the CLI
# with the appropriate default options.
skip = "*/.*/*,*/langs/*,*/build/exe/*,**.log,*.pdf,*dev/resources/*,*.phar,*.z,*.gz,*.sql,*.svg,*htdocs/includes/*,*/textiso.txt,*.js,*README-*,*build/rpm/*spec,*build/pad/*ml,*htdocs/includes/phpoffice/*,*htdocs/includes/tecnickcom/*,*dev/initdemo/removeconfdemo.sh,*dev/tools/codespell/*,*pyproject.toml,*build/exe/*,*fontawe*,*htdocs/theme/*/flags-sprite.inc.php,*dev/setup/codetemplates/codetemplates.xml,*/php.ini,*/html_cerfafr.*,*/lessc.class.php,*.asciidoc,*.xml,*opensurvey/css/style.css,*dev/tools/phan/stubs/*,*/documents"
skip = "*/.*/*,*/langs/*,*/build/exe/*,**.log,*.pdf,*dev/resources/*,*.phar,*.z,*.gz,*.sql,*.svg,*htdocs/includes/*,*/textiso.txt,*.js,*README-*,*build/rpm/*spec,*build/pad/*ml,*htdocs/includes/phpoffice/*,*htdocs/includes/tecnickcom/*,*dev/initdemo/removeconfdemo.sh,*dev/tools/codespell/*,*dev/trans*/ignore_translation_keys.lst,*pyproject.toml,*build/exe/*,*fontawe*,*htdocs/theme/*/flags-sprite.inc.php,*dev/setup/codetemplates/codetemplates.xml,*/php.ini,*/html_cerfafr.*,*/lessc.class.php,*.asciidoc,*.xml,*opensurvey/css/style.css,*dev/tools/phan/stubs/*,*/documents"
check-hidden = true
quiet-level=2