2015-12-11 05:08:32 +01:00
#!/usr/bin/env php
2014-05-22 14:33:42 +02:00
< ? php
2014-11-30 16:20:58 +01:00
/* Copyright ( C ) 2014 by FromDual GmbH , licensed under GPL v2
2014-12-09 00:02:43 +01:00
* Copyright ( C ) 2014 Laurent Destailleur < eldy @ users . sourceforge . net >
2024-02-10 17:26:34 +01:00
* Copyright ( C ) 2024 MDW < mdeweerd @ users . noreply . github . com >
2014-12-09 00:02:43 +01:00
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
2021-11-09 03:01:48 +01:00
* the Free Software Foundation ; either version 3 of the License , or
2014-12-09 00:02:43 +01:00
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
2019-06-23 15:35:12 +02:00
* along with this program . If not , see < https :// www . gnu . org / licenses />.
2014-05-22 14:33:42 +02:00
*
2014-11-30 16:20:58 +01:00
* -----
2014-05-22 14:33:42 +02:00
*
* Compares a secondary language translation file with its primary
* language file and strips redundant translations .
*
* Todo : Check if it works with multi byte ( mb_ * ) character sets !
*
* Usage :
* cd htdocs / langs
2015-03-13 17:42:50 +01:00
* ./ dev / translation / strip_language_file . php < primary_lang_dir > < secondary_lang_dir > [ file . lang | all ]
2014-05-22 14:33:42 +02:00
*
2014-11-30 16:20:58 +01:00
* To rename all . delta files , you can do
* for fic in `ls *.delta` ; do f = `echo $fic | sed -e 's/\.delta//'` ; echo $f ; mv $f . delta $f ; done
2014-05-22 14:33:42 +02:00
*
* Rules :
* secondary string == primary string -> strip
* secondary string redundant -> strip and warning
* secondary string not in primary -> strip and warning
* secondary string has no value -> strip and warning
* secondary string != primary string -> secondary . lang . delta
*/
/**
* \file dev / translation / strip_language_file . php
* \ingroup dev
* \brief This script clean sub - languages from duplicate keys - values
*/
$sapi_type = php_sapi_name ();
$script_file = basename ( __FILE__ );
$path = dirname ( __FILE__ ) . '/' ;
// Test if batch mode
if ( substr ( $sapi_type , 0 , 3 ) == 'cgi' ) {
echo " Error: You are using PHP for CGI. To execute " . $script_file . " from command line, you must use PHP for CLI mode. \n " ;
exit ;
}
$rc = 0 ;
// Get and check arguments
2023-12-04 11:21:01 +01:00
$lPrimary = isset ( $argv [ 1 ]) ? $argv [ 1 ] : '' ;
$lSecondary = isset ( $argv [ 2 ]) ? $argv [ 2 ] : '' ;
2015-03-14 02:37:18 +01:00
$lEnglish = 'en_US' ;
2023-12-04 11:21:01 +01:00
$filesToProcess = isset ( $argv [ 3 ]) ? $argv [ 3 ] : '' ;
2014-05-22 14:33:42 +02:00
2021-03-01 00:48:36 +01:00
if ( empty ( $lPrimary ) || empty ( $lSecondary ) || empty ( $filesToProcess )) {
2014-05-22 14:33:42 +02:00
$rc = 1 ;
$msg = '***** Script to clean language files *****' . " \n " ;
2015-03-13 17:42:50 +01:00
$msg .= 'Usage: ./dev/translation/strip_language_file.php xx_XX xx_YY [file.lang|all]' . " \n " ;
2014-05-22 14:33:42 +02:00
print $msg . " (rc= $rc ). \n " ;
exit ( $rc );
}
$aPrimary = array ();
$aSecondary = array ();
2015-03-14 02:37:18 +01:00
$aEnglish = array ();
2014-05-22 14:33:42 +02:00
// Define array $filesToProcess
2021-03-01 00:48:36 +01:00
if ( $filesToProcess == 'all' ) {
2015-03-13 17:42:50 +01:00
$dir = new DirectoryIterator ( 'htdocs/langs/' . $lPrimary );
2020-05-21 01:41:27 +02:00
while ( $dir -> valid ()) {
if ( ! $dir -> isDot () && $dir -> isFile () && ! preg_match ( '/^\./' , $dir -> getFilename ())) {
2014-05-22 14:33:42 +02:00
$files [] = $dir -> getFilename ();
}
$dir -> next ();
}
$filesToProcess = $files ;
2021-03-01 00:48:36 +01:00
} else {
$filesToProcess = explode ( ',' , $filesToProcess );
2014-05-22 14:33:42 +02:00
}
// Arguments should be OK here.
// Loop on each file
2021-03-01 00:48:36 +01:00
foreach ( $filesToProcess as $fileToProcess ) {
2015-03-13 17:42:50 +01:00
$lPrimaryFile = 'htdocs/langs/' . $lPrimary . '/' . $fileToProcess ;
$lSecondaryFile = 'htdocs/langs/' . $lSecondary . '/' . $fileToProcess ;
2015-03-14 02:37:18 +01:00
$lEnglishFile = 'htdocs/langs/' . $lEnglish . '/' . $fileToProcess ;
2014-05-22 14:33:42 +02:00
$output = $lSecondaryFile . '.delta' ;
print " ---- Process language file " . $lSecondaryFile . " \n " ;
2021-03-01 00:48:36 +01:00
if ( ! is_readable ( $lPrimaryFile )) {
2014-05-22 14:33:42 +02:00
$rc = 2 ;
$msg = " Cannot read primary language file $lPrimaryFile . " ;
print $msg . " (rc= $rc ). \n " ;
exit ( $rc );
}
2021-03-01 00:48:36 +01:00
if ( ! is_readable ( $lSecondaryFile )) {
2014-05-22 14:33:42 +02:00
$rc = 3 ;
$msg = " Cannot read secondary language file $lSecondaryFile . We discard this file. " ;
print $msg . " \n " ;
continue ;
}
2021-03-01 00:48:36 +01:00
if ( ! is_readable ( $lEnglishFile )) {
2015-03-14 02:37:18 +01:00
$rc = 3 ;
$msg = " Cannot read english language file $lEnglishFile . We discard this file. " ;
print $msg . " \n " ;
continue ;
}
2014-05-22 14:33:42 +02:00
// Start reading and parsing Secondary
2021-03-01 00:48:36 +01:00
if ( $handle = fopen ( $lSecondaryFile , 'r' )) {
2014-05-22 14:33:42 +02:00
print " Read Secondary File $lSecondaryFile : \n " ;
$cnt = 0 ;
2021-03-01 00:48:36 +01:00
while (( $line = fgets ( $handle )) !== false ) {
2014-05-22 14:33:42 +02:00
$cnt ++ ;
// strip comments
2021-03-01 00:48:36 +01:00
if ( preg_match ( " /^ \ w*#/ " , $line )) {
2014-05-22 14:33:42 +02:00
continue ;
}
// strip empty lines
2021-03-01 00:48:36 +01:00
if ( preg_match ( " /^ \ w* $ / " , $line )) {
2014-05-22 14:33:42 +02:00
continue ;
}
$a = mb_split ( '=' , trim ( $line ), 2 );
2021-03-01 00:48:36 +01:00
if ( count ( $a ) != 2 ) {
2024-02-10 17:26:34 +01:00
print " File $lSecondaryFile :ERROR: " . trim ( $line ) . " in line $cnt . \n " ;
2014-05-22 14:33:42 +02:00
continue ;
}
list ( $key , $value ) = $a ;
// key is redundant
2021-03-01 00:48:36 +01:00
if ( array_key_exists ( $key , $aSecondary )) {
2024-02-10 17:26:34 +01:00
print " File $lSecondaryFile :WARNING: Key $key is redundant in line $cnt . \n " ;
2014-05-22 14:33:42 +02:00
continue ;
}
// String has no value
2021-03-01 00:48:36 +01:00
if ( $value == '' ) {
2024-02-10 17:26:34 +01:00
print " File $lSecondaryFile :WARNING: Key $key has no value in line: $cnt . \n " ;
2014-05-22 14:33:42 +02:00
continue ;
}
$aSecondary [ $key ] = trim ( $value );
}
2021-03-01 00:48:36 +01:00
if ( ! feof ( $handle )) {
2014-05-22 14:33:42 +02:00
$rc = 5 ;
$msg = " Unexpected fgets() fail " ;
print $msg . " (rc= $rc ). \n " ;
exit ( $rc );
}
fclose ( $handle );
2021-03-01 00:48:36 +01:00
} else {
2014-05-22 14:33:42 +02:00
$rc = 6 ;
$msg = " Cannot open file $lSecondaryFile " ;
print $msg . " (rc= $rc ). \n " ;
exit ( $rc );
}
2015-03-14 02:37:18 +01:00
// Start reading and parsing English
2021-08-17 23:23:15 +02:00
$aEnglish = array ();
2021-03-01 00:48:36 +01:00
if ( $handle = fopen ( $lEnglishFile , 'r' )) {
2015-03-14 02:37:18 +01:00
print " Read English File $lEnglishFile : \n " ;
$cnt = 0 ;
2021-03-01 00:48:36 +01:00
while (( $line = fgets ( $handle )) !== false ) {
2015-03-14 02:37:18 +01:00
$cnt ++ ;
// strip comments
2021-03-01 00:48:36 +01:00
if ( preg_match ( " /^ \ w*#/ " , $line )) {
2015-03-14 02:37:18 +01:00
continue ;
}
// strip empty lines
2021-03-01 00:48:36 +01:00
if ( preg_match ( " /^ \ w* $ / " , $line )) {
2015-03-14 02:37:18 +01:00
continue ;
}
$a = mb_split ( '=' , trim ( $line ), 2 );
2021-03-01 00:48:36 +01:00
if ( count ( $a ) != 2 ) {
2024-02-10 17:26:34 +01:00
print " File $lEnglishFile :ERROR: " . trim ( $line ) . " in line $cnt . \n " ;
2015-03-14 02:37:18 +01:00
continue ;
}
list ( $key , $value ) = $a ;
// key is redundant
2021-03-01 00:48:36 +01:00
if ( array_key_exists ( $key , $aEnglish )) {
2024-02-10 17:26:34 +01:00
print " File $lEnglishFile :WARNING: Key $key is redundant in line $cnt . \n " ;
2015-03-14 02:37:18 +01:00
continue ;
}
// String has no value
2021-03-01 00:48:36 +01:00
if ( $value == '' ) {
2024-02-10 17:26:34 +01:00
print " File $lEnglishFile :WARNING: Key $key has no value in line $cnt . \n " ;
2015-03-14 02:37:18 +01:00
continue ;
}
$aEnglish [ $key ] = trim ( $value );
}
2021-03-01 00:48:36 +01:00
if ( ! feof ( $handle )) {
2015-03-14 02:37:18 +01:00
$rc = 5 ;
$msg = " Unexpected fgets() fail " ;
print $msg . " (rc= $rc ). \n " ;
exit ( $rc );
}
fclose ( $handle );
2021-03-01 00:48:36 +01:00
} else {
2015-03-14 02:37:18 +01:00
$rc = 6 ;
$msg = " Cannot open file $lEnglishFile " ;
print $msg . " (rc= $rc ). \n " ;
exit ( $rc );
}
2014-05-22 14:33:42 +02:00
// Start reading and parsing Primary. See rules in header!
$arrayofkeytoalwayskeep = array ( 'DIRECTION' , 'FONTFORPDF' , 'FONTSIZEFORPDF' , 'SeparatorDecimal' , 'SeparatorThousand' );
2021-03-01 00:48:36 +01:00
if ( $handle = fopen ( $lPrimaryFile , 'r' )) {
if ( ! $oh = fopen ( $output , 'w' )) {
2024-02-10 17:26:34 +01:00
print " ERROR writing to file " . $output . " \n " ;
2014-05-22 14:33:42 +02:00
exit ;
}
2020-05-23 22:01:17 +02:00
print " Read Primary File " . $lPrimaryFile . " and write " . $output . " : \n " ;
2014-05-22 14:33:42 +02:00
2019-01-27 11:55:16 +01:00
fwrite ( $oh , " # Dolibarr language file - Source file is en_US - " . ( preg_replace ( '/\.lang$/' , '' , $fileToProcess )) . " \n " );
2014-05-22 14:33:42 +02:00
2021-08-17 23:23:15 +02:00
$fileFirstFound = array ();
$lineFirstFound = array ();
2014-05-22 14:33:42 +02:00
$cnt = 0 ;
2021-03-01 00:48:36 +01:00
while (( $line = fgets ( $handle )) !== false ) {
2014-05-22 14:33:42 +02:00
$cnt ++ ;
// strip comments
2021-03-01 00:48:36 +01:00
if ( preg_match ( " /^ \ w*#/ " , $line )) {
2014-05-22 14:33:42 +02:00
continue ;
}
// strip empty lines
2021-03-01 00:48:36 +01:00
if ( preg_match ( " /^ \ w* $ / " , $line )) {
2014-05-22 14:33:42 +02:00
continue ;
}
$a = mb_split ( '=' , trim ( $line ), 2 );
2021-03-01 00:48:36 +01:00
if ( count ( $a ) != 2 ) {
2024-02-10 17:26:34 +01:00
print " File $lPrimaryFile :ERROR: " . trim ( $line ) . " in line $cnt . \n " ;
2014-05-22 14:33:42 +02:00
continue ;
}
list ( $key , $value ) = $a ;
// key is redundant
2021-03-01 00:48:36 +01:00
if ( array_key_exists ( $key , $aPrimary )) {
2024-02-10 17:26:34 +01:00
$prefix = " File $lPrimaryFile :WARNING: Key $key is redundant " ;
$postfix = " in line $cnt . \n " ;
2024-01-25 01:12:33 +01:00
if ( ! empty ( $fileFirstFound [ $key ])) {
2024-02-10 17:26:34 +01:00
print " $prefix [Already found in ' " . $fileFirstFound [ $key ];
print " ' (line: " . $lineFirstFound [ $key ] . " )] $postfix " ; " ; " ;
2024-01-25 01:12:33 +01:00
} else {
$fileFirstFound [ $key ] = $fileToProcess ;
$lineFirstFound [ $key ] = $cnt ;
2024-02-10 17:26:34 +01:00
print " $prefix [Already found in main file] $postfix " ;
2024-01-25 01:12:33 +01:00
}
2014-05-22 14:33:42 +02:00
continue ;
2021-03-01 00:48:36 +01:00
} else {
2014-05-22 14:33:42 +02:00
$fileFirstFound [ $key ] = $fileToProcess ;
$lineFirstFound [ $key ] = $cnt ;
}
// String has no value
2021-03-01 00:48:36 +01:00
if ( $value == '' ) {
2024-02-10 17:26:34 +01:00
print " File $lPrimaryFile :WARNING: Key $key has no value in line $cnt . \n " ;
2014-05-22 14:33:42 +02:00
continue ;
}
$aPrimary [ $key ] = trim ( $value );
$fileFirstFound [ $key ] = $fileToProcess ;
$lineFirstFound [ $key ] = $cnt ;
// ----- Process output now -----
2017-07-09 19:32:35 +02:00
//print "Found primary key = ".$key."\n";
2014-05-22 14:33:42 +02:00
// Key not in other file
2021-03-01 00:48:36 +01:00
if ( in_array ( $key , $arrayofkeytoalwayskeep ) || preg_match ( '/^FormatDate/' , $key ) || preg_match ( '/^FormatHour/' , $key )) {
2014-05-22 14:33:42 +02:00
//print "Key $key is a key we always want to see into secondary file (line: $cnt).\n";
2021-03-01 00:48:36 +01:00
} elseif ( ! array_key_exists ( $key , $aSecondary )) {
2014-05-22 14:33:42 +02:00
//print "Key $key does NOT exist in secondary language (line: $cnt).\n";
continue ;
}
2015-05-31 03:07:17 +02:00
// String exists in both files and value into alternative language differs from main language but also from english files
2021-08-17 23:23:15 +02:00
// so we keep it.
2022-08-31 22:14:20 +02:00
if (( ! empty ( $aSecondary [ $key ]) && $aSecondary [ $key ] != $aPrimary [ $key ]
&& ! empty ( $aEnglish [ $key ]) && $aSecondary [ $key ] != $aEnglish [ $key ])
2019-01-27 11:55:16 +01:00
|| in_array ( $key , $arrayofkeytoalwayskeep ) || preg_match ( '/^FormatDate/' , $key ) || preg_match ( '/^FormatHour/' , $key )
2024-02-10 17:26:34 +01:00
) {
2017-07-09 19:32:35 +02:00
//print "Key $key differs (aSecondary=".$aSecondary[$key].", aPrimary=".$aPrimary[$key].", aEnglish=".$aEnglish[$key].") so we add it into new secondary language (line: $cnt).\n";
2023-12-04 11:21:01 +01:00
fwrite ( $oh , $key . " = " . ( empty ( $aSecondary [ $key ]) ? $aPrimary [ $key ] : $aSecondary [ $key ]) . " \n " );
2014-05-22 14:33:42 +02:00
}
}
2021-03-01 00:48:36 +01:00
if ( ! feof ( $handle )) {
2014-05-22 14:33:42 +02:00
$rc = 7 ;
$msg = " Unexpected fgets() fail " ;
print $msg . " (rc= $rc ). \n " ;
exit ( $rc );
}
fclose ( $oh );
fclose ( $handle );
2021-03-01 00:48:36 +01:00
} else {
2014-05-22 14:33:42 +02:00
$rc = 8 ;
$msg = " Cannot open file $lPrimaryFile " ;
print $msg . " (rc= $rc ). \n " ;
exit ( $rc );
}
print " Output can be found at $output . \n " ;
2014-11-30 16:20:58 +01:00
2015-03-13 17:42:50 +01:00
print " To rename all .delta files, you can do: \n " ;
print '> for fic in `ls htdocs/langs/' . $lSecondary . '/*.delta`; do f=`echo $fic | sed -e \'s/\.delta//\'`; echo $f; mv $f.delta $f; done' . " \n " ;
2014-05-22 14:33:42 +02:00
}
return 0 ;