2020-02-10 20:35:18 +01:00
#!/usr/bin/env php
< ? php
2020-02-11 11:48:10 +01:00
/*
* Copyright ( C ) 2005 - 2011 James Grant < james @ lightbox . org > Lightbox Technologies Inc .
* Copyright ( C ) 2020 Laurent Destailleur < eldy @ users . sourceforge . net >
2020-02-10 20:35:18 +01:00
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 3 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
2020-02-11 11:48:10 +01:00
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
2020-02-10 20:35:18 +01:00
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program . If not , see < https :// www . gnu . org / licenses />.
*
2020-02-11 11:48:10 +01:00
* This file is base on pg2mysql provided as Open source by lightbox . org .
* It was enhanced and updated by the Dolibarr team .
2020-02-10 20:35:18 +01:00
*/
/**
2023-06-30 10:45:58 +02:00
* \file dev / tools / dolibarr - postgres2mysql . php
* \brief Script to migrate a postgresql dump into a mysql dump
2020-02-10 20:35:18 +01:00
*/
$sapi_type = php_sapi_name ();
$script_file = basename ( __FILE__ );
2020-02-11 11:48:10 +01:00
$path = dirname ( __FILE__ ) . '/' ;
2020-02-10 20:35:18 +01:00
// Test si mode batch
$sapi_type = php_sapi_name ();
if ( substr ( $sapi_type , 0 , 3 ) == 'cgi' ) {
2020-02-11 11:48:10 +01:00
echo " Error: You are using PHP for CGI. To execute " . $script_file . " from command line, you must use PHP for CLI mode. \n " ;
exit ();
2020-02-10 20:35:18 +01:00
}
2023-12-04 10:22:29 +01:00
error_reporting ( E_ALL & ~ E_DEPRECATED );
2020-02-11 11:48:10 +01:00
define ( 'PRODUCT' , " pg2mysql " );
define ( 'VERSION' , " 2.0 " );
2020-02-10 20:35:18 +01:00
2020-02-11 11:48:10 +01:00
// this is the default, it can be overridden here, or specified as the third parameter on the command line
$config [ 'engine' ] = " InnoDB " ;
2020-02-10 20:35:18 +01:00
2023-12-04 10:22:29 +01:00
if ( ! ( $argv [ 1 ] && $argv [ 2 ])) {
2020-02-10 20:35:18 +01:00
echo " Usage: php pg2mysql_cli.php <inputfilename> <outputfilename> [engine] \n " ;
2020-02-11 11:48:10 +01:00
exit ();
} else {
2021-03-01 00:48:36 +01:00
if ( isset ( $argv [ 3 ])) {
2020-02-11 11:48:10 +01:00
$config [ 'engine' ] = $argv [ 3 ];
2021-03-01 00:48:36 +01:00
}
2020-02-10 20:35:18 +01:00
pg2mysql_large ( $argv [ 1 ], $argv [ 2 ]);
echo <<< XHTML
Notes :
- No its not perfect
- Yes it discards ALL stored procedures
- Yes it discards ALL queries except for CREATE TABLE and INSERT INTO
- If you ' re having problems creating your postgres dump , make sure you use " --format p --inserts "
- Default output engine if not specified is InnoDB
XHTML ;
}
2020-02-11 11:48:10 +01:00
/**
* getfieldname
*
2023-06-30 10:45:58 +02:00
* @ param string $l String
* @ return string | null Field name
2020-02-11 11:48:10 +01:00
*/
2020-02-10 20:35:18 +01:00
function getfieldname ( $l )
{
2020-02-11 11:48:10 +01:00
// first check if its in nice quotes for us
2020-02-10 20:35:18 +01:00
$regs = array ();
2020-02-11 11:48:10 +01:00
if ( preg_match ( " /`(.*)`/ " , $l , $regs )) {
2021-03-01 00:48:36 +01:00
if ( $regs [ 1 ]) {
2020-02-10 20:35:18 +01:00
return $regs [ 1 ];
2021-03-01 00:48:36 +01:00
} else {
return null ;
}
} elseif ( preg_match ( " /([^ \ ]*)/ " , trim ( $l ), $regs )) {
// if its not in quotes, then it should (we hope!) be the first "word" on the line, up to the first space.
if ( $regs [ 1 ]) {
2020-02-10 20:35:18 +01:00
return $regs [ 1 ];
2021-03-01 00:48:36 +01:00
} else {
return null ;
}
2020-02-10 20:35:18 +01:00
}
}
2020-02-11 11:48:10 +01:00
/**
* formatsize
*
2023-06-30 10:45:58 +02:00
* @ param string $s Size to format
2024-01-12 17:14:13 +01:00
* @ return string Formatted size
2020-02-11 11:48:10 +01:00
*/
function formatsize ( $s )
{
2021-03-01 00:48:36 +01:00
if ( $s < pow ( 2 , 14 )) {
2020-02-10 20:35:18 +01:00
return " { $s } B " ;
2021-03-01 00:48:36 +01:00
} elseif ( $s < pow ( 2 , 20 )) {
2020-02-11 11:48:10 +01:00
return sprintf ( " %.1f " , round ( $s / 1024 , 1 )) . " K " ;
2021-03-01 00:48:36 +01:00
} elseif ( $s < pow ( 2 , 30 )) {
2020-02-11 11:48:10 +01:00
return sprintf ( " %.1f " , round ( $s / 1024 / 1024 , 1 )) . " M " ;
2021-03-01 00:48:36 +01:00
} else {
return sprintf ( " %.1f " , round ( $s / 1024 / 1024 / 1024 , 1 )) . " G " ;
}
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
/**
* pg2mysql_large
*
2023-06-30 10:45:58 +02:00
* @ param string $infilename Input filename
* @ param string $outfilename Output filename
* @ return int < 0 if KO , >= 0 if OK
2020-02-11 11:48:10 +01:00
*/
function pg2mysql_large ( $infilename , $outfilename )
{
$infp = fopen ( $infilename , " rt " );
$outfp = fopen ( $outfilename , " wt " );
2020-02-10 20:35:18 +01:00
$outputatend = '' ;
$arrayofprimaryalreadyintabledef = array ();
2020-02-11 11:48:10 +01:00
// we read until we get a semicolon followed by a newline (;\n);
$pgsqlchunk = array ();
$chunkcount = 1 ;
$linenum = 0 ;
$inquotes = false ;
$first = true ;
if ( empty ( $infp )) {
print 'Failed to open file ' . $infilename . " \n " ;
return - 1 ;
}
$fs = filesize ( $infilename );
echo " Filesize: " . formatsize ( $fs ) . " \n " ;
while ( $instr = fgets ( $infp )) {
2023-12-04 10:22:29 +01:00
$linenum ++ ;
2020-02-11 11:48:10 +01:00
$memusage = round ( memory_get_usage ( true ) / 1024 / 1024 );
$len = strlen ( $instr );
$pgsqlchunk [] = $instr ;
$c = substr_count ( $instr , " ' " );
// we have an odd number of ' marks
if ( $c % 2 != 0 ) {
2021-03-01 00:48:36 +01:00
if ( $inquotes ) {
2020-02-11 11:48:10 +01:00
$inquotes = false ;
2021-03-01 00:48:36 +01:00
} else {
$inquotes = true ;
}
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
if ( $linenum % 10000 == 0 ) {
$currentpos = ftell ( $infp );
$percent = round ( $currentpos / $fs * 100 );
$position = formatsize ( $currentpos );
printf ( " Reading progress: %3d%% position: %7s line: %9d sql chunk: %9d mem usage: %4dM \r " , $percent , $position , $linenum , $chunkcount , $memusage );
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
if ( strlen ( $instr ) > 3 && ( $instr [ $len - 3 ] == " ) " && $instr [ $len - 2 ] == " ; " && $instr [ $len - 1 ] == " \n " ) && $inquotes == false ) {
2023-12-04 10:22:29 +01:00
$chunkcount ++ ;
2020-02-10 20:35:18 +01:00
if ( $linenum % 10000 == 0 ) {
2020-02-11 11:48:10 +01:00
$currentpos = ftell ( $infp );
$percent = round ( $currentpos / $fs * 100 );
$position = formatsize ( $currentpos );
printf ( " Processing progress: %3d%% position: %7s line: %9d sql chunk: %9d mem usage: %4dM \r " , $percent , $position , $linenum , $chunkcount , $memusage );
2020-02-10 20:35:18 +01:00
}
/*
2020-02-11 11:48:10 +01:00
* echo " sending chunk: \n " ;
* echo " ======================= \n " ;
* print_r ( $pgsqlchunk );
* echo " ======================= \n " ;
*/
2020-02-10 20:35:18 +01:00
2020-02-11 11:48:10 +01:00
/*
* foreach ( $pgsqlchunk as $aaa ) {
* if ( preg_match ( '/MAIN_ENABLE_DEFAULT|MAIN_MAIL_SMTP_SE/' , $aaa )) {
* var_dump ( $pgsqlchunk );
* }
* }
*/
$mysqlchunk = pg2mysql ( $pgsqlchunk , $arrayofprimaryalreadyintabledef , $first );
2020-02-10 20:35:18 +01:00
fputs ( $outfp , $mysqlchunk [ 'output' ]);
/*
2020-02-11 11:48:10 +01:00
* $break = false ;
* foreach ( $pgsqlchunk as $aaa ) {
* if ( preg_match ( '/MAIN_ENABLE_DEFAULT|MAIN_MAIL_SMTP_SE/' , $aaa )) {
* var_dump ( $mysqlchunk );
* }
* if ( preg_match ( '/MAIN_MAIL_SMTP_SE/' , $aaa )) {
* $break = true ;
* }
* }
* if ( $break ) break ;
*/
$outputatend .= $mysqlchunk [ 'outputatend' ];
$first = false ;
$pgsqlchunk = array ();
$mysqlchunk = " " ;
2020-02-10 20:35:18 +01:00
}
}
echo " \n \n " ;
fputs ( $outfp , $outputatend );
fputs ( $outfp , " \n " );
2020-02-11 11:48:10 +01:00
fputs ( $outfp , '/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;' . " \n " );
fputs ( $outfp , '/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;' . " \n " );
fputs ( $outfp , '/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;' . " \n " );
fputs ( $outfp , '/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;' . " \n " );
fputs ( $outfp , '/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;' . " \n " );
fputs ( $outfp , '/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;' . " \n " );
fputs ( $outfp , '/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;' . " \n " );
2020-02-10 20:35:18 +01:00
2020-02-11 11:48:10 +01:00
printf ( " Completed! %9d lines %9d sql chunks \n \n " , $linenum , $chunkcount );
2020-02-10 20:35:18 +01:00
fclose ( $infp );
fclose ( $outfp );
2020-02-11 11:48:10 +01:00
return 0 ;
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
/**
* pg2mysql
*
2023-06-30 10:45:58 +02:00
* @ param array $input Array of input
* @ param array $arrayofprimaryalreadyintabledef Array of table already output with a primary key set into definition
* @ param boolean $header Boolean
* @ return string [] Array of output
2020-02-11 11:48:10 +01:00
*/
function pg2mysql ( & $input , & $arrayofprimaryalreadyintabledef , $header = true )
2020-02-10 20:35:18 +01:00
{
global $config ;
2020-02-11 11:48:10 +01:00
if ( is_array ( $input )) {
$lines = $input ;
2020-02-10 20:35:18 +01:00
} else {
2023-11-17 10:54:39 +01:00
$lines = explode ( " \n " , $input );
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
if ( $header ) {
$output = " -- Converted with " . PRODUCT . " - " . VERSION . " \n " ;
$output .= " -- Converted on " . date ( " r " ) . " \n " ;
$output .= " \n " ;
$output .= " /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; \n " ;
$output .= " /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; \n " ;
$output .= " /*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; \n " ;
$output .= " /*!40101 SET NAMES utf8 */; \n " ;
$output .= " /*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; \n " ;
$output .= " /*!40103 SET TIME_ZONE='+00:00' */; \n " ;
$output .= " /*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; \n " ;
$output .= " /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; \n " ;
$output .= " /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; \n " ;
$output .= " /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; \n " ;
$output .= " \n " ;
$outputatend = " " ;
} else {
$output = " " ;
$outputatend = " " ;
2020-02-10 20:35:18 +01:00
}
2020-02-11 14:44:53 +01:00
$in_create_table = $in_insert = false ;
2020-02-10 20:35:18 +01:00
2020-02-11 11:48:10 +01:00
$linenumber = 0 ;
$tbl_extra = " " ;
while ( isset ( $lines [ $linenumber ])) {
$line = $lines [ $linenumber ];
// $line =str_replace('ALTER TABLE public\.', '', $line);
2020-02-10 20:35:18 +01:00
$reg = array ();
2020-02-11 11:48:10 +01:00
if ( preg_match ( '/CREATE SEQUENCE (?:public\.)(.*)_(id|rowid|id_comment)_seq/' , $line , $reg )) {
$outputatend .= '-- Make field ' . $reg [ 2 ] . ' auto_increment for table ' . $reg [ 1 ] . " \n " ;
$outputatend .= 'ALTER TABLE ' . $reg [ 1 ] . ' CHANGE COLUMN ' . $reg [ 2 ] . ' ' . $reg [ 2 ] . ' INTEGER NOT NULL AUTO_INCREMENT;' . " \n \n " ;
// var_dump($outputatend);
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
if ( substr ( $line , 0 , 12 ) == " CREATE TABLE " ) {
$in_create_table = true ;
$line = str_replace ( " \" " , " ` " , $line );
$line = str_replace ( 'public.' , '' , $line );
2020-02-10 20:35:18 +01:00
2020-02-11 11:48:10 +01:00
$reg2 = array ();
2020-02-10 20:35:18 +01:00
if ( preg_match ( '/CREATE TABLE ([^\s]+)/' , $line , $reg2 )) {
$in_create_table = $reg2 [ 1 ];
}
2020-02-11 11:48:10 +01:00
$reg2 = array ();
2020-02-10 20:35:18 +01:00
if ( preg_match ( '/CREATE TABLE ([^\s]+)/' , $line , $reg2 )) {
2020-02-11 11:48:10 +01:00
$output .= 'DROP TABLE IF EXISTS `' . $reg2 [ 1 ] . '`;' . " \n " ;
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
$output .= $line ;
2023-12-04 10:22:29 +01:00
$linenumber ++ ;
2020-02-10 20:35:18 +01:00
continue ;
}
2020-02-11 11:48:10 +01:00
if ( substr ( $line , 0 , 2 ) == " ); " && $in_create_table ) {
$in_create_table = false ;
$line = " ) ENGINE= { $config [ 'engine' ] } ; \n \n " ;
2020-02-10 20:35:18 +01:00
2020-02-11 11:48:10 +01:00
$output .= $tbl_extra ;
$output .= $line ;
2020-02-10 20:35:18 +01:00
2023-12-04 10:22:29 +01:00
$linenumber ++ ;
2020-02-11 11:48:10 +01:00
$tbl_extra = " " ;
2020-02-10 20:35:18 +01:00
continue ;
}
2020-02-11 11:48:10 +01:00
if ( $in_create_table ) {
2020-02-10 20:35:18 +01:00
$regs = array ();
2020-02-11 11:48:10 +01:00
$line = str_replace ( " \" " , " ` " , $line );
$line = str_replace ( " integer " , " int(11) " , $line );
$line = str_replace ( " int_unsigned " , " int(11) UNSIGNED " , $line );
$line = str_replace ( " smallint_unsigned " , " smallint UNSIGNED " , $line );
$line = str_replace ( " bigint_unsigned " , " bigint UNSIGNED " , $line );
$line = str_replace ( " serial " , " int(11) auto_increment " , $line );
$line = str_replace ( " bytea " , " BLOB " , $line );
$line = str_replace ( " boolean " , " bool " , $line );
$line = str_replace ( " bool DEFAULT true " , " bool DEFAULT 1 " , $line );
$line = str_replace ( " bool DEFAULT false " , " bool DEFAULT 0 " , $line );
if ( preg_match ( " / character varying \ (([0-9]*) \ )/ " , $line , $regs )) {
$num = $regs [ 1 ];
2021-03-01 00:48:36 +01:00
if ( $num <= 255 ) {
2020-02-11 11:48:10 +01:00
$line = preg_replace ( " / character varying \ ([0-9]* \ )/ " , " varchar( $num ) " , $line );
2021-03-01 00:48:36 +01:00
} else {
$line = preg_replace ( " / character varying \ ([0-9]* \ )/ " , " text " , $line );
}
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
// character varying with no size, we will default to varchar(255)
if ( preg_match ( " / character varying/ " , $line )) {
$line = preg_replace ( " / character varying/ " , " varchar(255) " , $line );
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
if ( preg_match ( " / DEFAULT \ ('([0-9]*)'::int/ " , $line , $regs ) || preg_match ( " / DEFAULT \ ('([0-9]*)'::smallint/ " , $line , $regs ) || preg_match ( " / DEFAULT \ ('([0-9]*)'::bigint/ " , $line , $regs )) {
$num = $regs [ 1 ];
$line = preg_replace ( " / DEFAULT \ ('([0-9]*)'[^ ,]*/ " , " DEFAULT $num " , $line );
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
if ( preg_match ( " / DEFAULT \ (([0-9 \ -]*) \ )/ " , $line , $regs )) {
$num = $regs [ 1 ];
$line = preg_replace ( " / DEFAULT \ (([0-9 \ -]*) \ )/ " , " DEFAULT $num " , $line );
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
$line = preg_replace ( " / DEFAULT nextval \ (.* \ ) / " , " auto_increment " , $line );
$line = preg_replace ( " /::.*,/ " , " , " , $line );
$line = preg_replace ( " /::.* $ / " , " \n " , $line );
if ( preg_match ( " /character \ (([0-9]*) \ )/ " , $line , $regs )) {
$num = $regs [ 1 ];
2021-03-01 00:48:36 +01:00
if ( $num <= 255 ) {
2020-02-11 11:48:10 +01:00
$line = preg_replace ( " / character \ ([0-9]* \ )/ " , " varchar( $num ) " , $line );
2021-03-01 00:48:36 +01:00
} else {
$line = preg_replace ( " / character \ ([0-9]* \ )/ " , " text " , $line );
}
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
// timestamps
$line = str_replace ( " timestamp with time zone " , " datetime " , $line );
$line = str_replace ( " timestamp without time zone " , " datetime " , $line );
2020-02-10 20:35:18 +01:00
2020-02-11 11:48:10 +01:00
// time
$line = str_replace ( " time with time zone " , " time " , $line );
$line = str_replace ( " time without time zone " , " time " , $line );
2020-02-10 20:35:18 +01:00
2020-10-08 12:29:01 +02:00
$line = str_replace ( " timestamp DEFAULT now() " , " timestamp DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP " , $line );
$line = str_replace ( " timestamp without time zone DEFAULT now() " , " timestamp DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP " , $line );
2020-02-10 20:35:18 +01:00
if ( strstr ( $line , " auto_increment " ) || preg_match ( '/ rowid int/' , $line ) || preg_match ( '/ id int/' , $line )) {
2020-02-11 11:48:10 +01:00
$field = getfieldname ( $line );
$tbl_extra .= " , PRIMARY KEY(` $field `) \n " ;
$arrayofprimaryalreadyintabledef [ $in_create_table ] = $in_create_table ;
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
$specialfields = array ( " repeat " , " status " , " type " , " call " );
2020-02-10 20:35:18 +01:00
2020-02-11 11:48:10 +01:00
$field = getfieldname ( $line );
if ( in_array ( $field , $specialfields )) {
$line = str_replace ( " $field " , " ` $field ` " , $line );
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
// text/blob fields are not allowed to have a default, so if we find a text DEFAULT, change it to varchar(255) DEFAULT
if ( strstr ( $line , " text DEFAULT " )) {
$line = str_replace ( " text DEFAULT " , " varchar(255) DEFAULT " , $line );
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
// just skip a CONSTRAINT line
if ( strstr ( $line , " CONSTRAINT " )) {
$line = " " ;
// and if the previous output ended with a , remove the ,
2023-12-04 10:22:29 +01:00
$lastchr = substr ( $output , - 2 , 1 );
2020-02-11 11:48:10 +01:00
// echo "lastchr=$lastchr";
if ( $lastchr == " , " ) {
2023-12-04 10:22:29 +01:00
$output = substr ( $output , 0 , - 2 ) . " \n " ;
2020-02-10 20:35:18 +01:00
}
}
2020-02-11 11:48:10 +01:00
$output .= $line ;
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
if ( substr ( $line , 0 , 11 ) == " INSERT INTO " ) {
2020-02-10 20:35:18 +01:00
$line = str_replace ( 'public.' , '' , $line );
2023-12-04 10:22:29 +01:00
if ( substr ( $line , - 3 , - 1 ) == " ); " ) {
2020-02-11 11:48:10 +01:00
// we have a complete insert on one line
2023-12-04 10:22:29 +01:00
list ( $before , $after ) = explode ( " VALUES " , $line , 2 );
2020-02-11 11:48:10 +01:00
// we only replace the " with ` in what comes BEFORE the VALUES
// (ie, field names, like INSERT INTO table ("bla","bla2") VALUES ('s:4:"test"','bladata2');
// should convert to INSERT INTO table (`bla`,`bla2`) VALUES ('s:4:"test"','bladata2');
2020-02-10 20:35:18 +01:00
2020-02-11 11:48:10 +01:00
$before = str_replace ( " \" " , " ` " , $before );
2020-02-10 20:35:18 +01:00
2024-01-12 17:14:13 +01:00
// in after, we need to watch out for escape format strings, ie (E'escaped \r in a string'), and ('bla',E'escaped \r in a string'), but could also be (number, E'string'); so we can't search for the previous '
2020-02-11 11:48:10 +01:00
// ugh i guess its possible these strings could exist IN the data as well, but the only way to solve that is to process these lines one character
2024-01-12 17:14:13 +01:00
// at a time, and that's just stupid, so lets just hope this doesn't appear anywhere in the actual data
2020-02-11 11:48:10 +01:00
$after = str_replace ( " (E' " , " (' " , $after );
$after = str_replace ( " , E' " , " , ' " , $after );
2020-02-10 20:35:18 +01:00
2020-02-11 11:48:10 +01:00
$output .= $before . " VALUES " . $after ;
2023-12-04 10:22:29 +01:00
$linenumber ++ ;
2020-02-10 20:35:18 +01:00
continue ;
2020-02-11 11:48:10 +01:00
} else {
// this insert spans multiple lines, so keep dumping the lines until we reach a line
// that ends with ");"
2023-12-04 10:22:29 +01:00
list ( $before , $after ) = explode ( " VALUES " , $line , 2 );
2020-02-11 11:48:10 +01:00
// we only replace the " with ` in what comes BEFORE the VALUES
// (ie, field names, like INSERT INTO table ("bla","bla2") VALUES ('s:4:"test"','bladata2');
// should convert to INSERT INTO table (`bla`,`bla2`) VALUES ('s:4:"test"','bladata2');
$before = str_replace ( " \" " , " ` " , $before );
// in after, we need to watch out for escape format strings, ie (E'escaped \r in a string'), and ('bla',E'escaped \r in a string')
// ugh i guess its possible these strings could exist IN the data as well, but the only way to solve that is to process these lines one character
2024-01-12 17:14:13 +01:00
// at a time, and that's just stupid, so lets just hope this doesn't appear anywhere in the actual data
2020-02-11 11:48:10 +01:00
$after = str_replace ( " (E' " , " (' " , $after );
$after = str_replace ( " , E' " , " , ' " , $after );
$c = substr_count ( $line , " ' " );
// we have an odd number of ' marks
if ( $c % 2 != 0 ) {
$inquotes = true ;
} else {
$inquotes = false ;
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
$output .= $before . " VALUES " . $after ;
do {
2023-12-04 10:22:29 +01:00
$linenumber ++ ;
2020-02-11 11:48:10 +01:00
// in after, we need to watch out for escape format strings, ie (E'escaped \r in a string'), and ('bla',E'escaped \r in a string')
// ugh i guess its possible these strings could exist IN the data as well, but the only way to solve that is to process these lines one character
2024-01-12 17:14:13 +01:00
// at a time, and that's just stupid, so lets just hope this doesn't appear anywhere in the actual data
2020-02-11 11:48:10 +01:00
2024-01-12 17:14:13 +01:00
// after the first line, we only need to check for it in the middle, not at the beginning of an insert (because the beginning will be on the first line)
2020-02-11 11:48:10 +01:00
// $after=str_replace(" (E'","' ('",$after);
$line = $lines [ $linenumber ];
$line = str_replace ( " ', E' " , " ', ' " , $line );
$output .= $line ;
// printf("inquotes: %d linenumber: %4d line: %s\n",$inquotes,$linenumber,$lines[$linenumber]);
$c = substr_count ( $line , " ' " );
// we have an odd number of ' marks
if ( $c % 2 != 0 ) {
2021-03-01 00:48:36 +01:00
if ( $inquotes ) {
2020-02-11 11:48:10 +01:00
$inquotes = false ;
2021-03-01 00:48:36 +01:00
} else {
$inquotes = true ;
}
2020-02-11 11:48:10 +01:00
// echo "inquotes=$inquotes\n";
}
2023-12-04 10:22:29 +01:00
} while ( substr ( $lines [ $linenumber ], - 3 , - 1 ) != " ); " || $inquotes );
2020-02-10 20:35:18 +01:00
}
}
2020-02-11 11:48:10 +01:00
if ( substr ( $line , 0 , 16 ) == " ALTER TABLE ONLY " ) {
$line = preg_replace ( '/ ONLY/' , '' , $line );
$line = str_replace ( " \" " , " ` " , $line );
$line = str_replace ( " public. " , " " , $line );
$pkey = $line ;
2020-02-10 20:35:18 +01:00
2023-12-04 10:22:29 +01:00
$linenumber ++ ;
2022-08-31 22:14:20 +02:00
if ( ! empty ( $lines [ $linenumber ])) {
2020-02-10 20:35:18 +01:00
$line = $lines [ $linenumber ];
2020-02-11 11:48:10 +01:00
} else {
2020-02-10 20:35:18 +01:00
$line = '' ;
}
2023-12-04 10:22:29 +01:00
if ( strstr ( $line , " PRIMARY KEY " ) && substr ( $line , - 3 , - 1 ) == " ); " ) {
2020-02-10 20:35:18 +01:00
$reg2 = array ();
if ( preg_match ( '/ALTER TABLE ([^\s]+)/' , $pkey , $reg2 )) {
if ( empty ( $arrayofprimaryalreadyintabledef [ $reg2 [ 1 ]])) {
2020-02-11 11:48:10 +01:00
// looks like we have a single line PRIMARY KEY definition, lets go ahead and add it
$output .= str_replace ( " \n " , " " , $pkey );
// the postgres and mysql syntax for this is (at least, in the example im looking at)
// identical, so we can just add it as is.
$output .= $line . " \n " ;
2020-02-10 20:35:18 +01:00
} else {
2020-02-11 11:48:10 +01:00
$output .= '-- ' . str_replace ( " \n " , " " , $pkey );
$output .= '-- ' . $line . " \n " ;
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
} else {
$output .= '-- ' . str_replace ( " \n " , " " , $pkey );
$output .= '-- ' . $line . " \n " ;
2020-02-10 20:35:18 +01:00
}
}
}
2020-02-11 11:48:10 +01:00
// while we're here, we might as well catch CREATE INDEX as well
if ( substr ( $line , 0 , 12 ) == " CREATE INDEX " ) {
2020-02-10 20:35:18 +01:00
$matches = array ();
2020-02-11 11:48:10 +01:00
preg_match ( '/CREATE INDEX "?([a-zA-Z0-9_]*)"? ON "?([a-zA-Z0-9_\.]*)"? USING btree \((.*)\);/' , $line , $matches );
2022-08-31 22:14:20 +02:00
if ( ! empty ( $matches [ 3 ])) {
2020-02-11 11:48:10 +01:00
$indexname = $matches [ 1 ];
$tablename = str_replace ( 'public.' , '' , $matches [ 2 ]);
$columns = $matches [ 3 ];
if ( $tablename && $columns ) {
$output .= " ALTER TABLE ` " . $tablename . " ` ADD INDEX " . $indexname . " ( { $columns } ) ; \n " ;
2020-02-10 20:35:18 +01:00
}
}
}
2020-02-11 11:48:10 +01:00
if ( substr ( $line , 0 , 19 ) == " CREATE UNIQUE INDEX " ) {
2020-02-10 20:35:18 +01:00
$matches = array ();
2020-02-11 11:48:10 +01:00
preg_match ( '/CREATE UNIQUE INDEX "?([a-zA-Z0-9_]*)"? ON "?([a-zA-Z0-9_\.]*)"? USING btree \((.*)\);/' , $line , $matches );
2022-08-31 22:14:20 +02:00
if ( ! empty ( $matches [ 3 ])) {
2020-02-11 11:48:10 +01:00
$indexname = $matches [ 1 ];
$tablename = str_replace ( 'public.' , '' , $matches [ 2 ]);
$columns = str_replace ( '"' , '' , $matches [ 3 ]);
if ( $tablename && $columns ) {
$output .= " ALTER TABLE ` " . $tablename . " ` ADD UNIQUE INDEX " . $indexname . " ( { $columns } ) ; \n " ;
2020-02-10 20:35:18 +01:00
}
}
}
2021-03-01 00:48:36 +01:00
if ( substr ( $line , 0 , 13 ) == 'DROP DATABASE' ) {
2020-02-10 20:35:18 +01:00
$output .= $line ;
2021-03-01 00:48:36 +01:00
}
2020-02-10 20:35:18 +01:00
2020-02-11 11:48:10 +01:00
if ( substr ( $line , 0 , 15 ) == 'CREATE DATABASE' ) {
2020-02-10 20:35:18 +01:00
$matches = array ();
preg_match ( '/CREATE DATABASE ([a-zA-Z0-9_]*) .* ENCODING = \'(.*)\'/' , $line , $matches );
$output .= " CREATE DATABASE ` $matches[1] ` DEFAULT CHARACTER SET $matches[2] ; \n \n " ;
}
2020-02-11 11:48:10 +01:00
if ( substr ( $line , 0 , 8 ) == '\\connect' ) {
2020-02-10 20:35:18 +01:00
$matches = array ();
preg_match ( '/connect ([a-zA-Z0-9_]*)/' , $line , $matches );
$output .= " USE ` $matches[1] `; \n \n " ;
}
2020-02-11 11:48:10 +01:00
if ( substr ( $line , 0 , 5 ) == 'COPY ' ) {
2020-02-10 20:35:18 +01:00
$matches = array ();
preg_match ( '/COPY (.*) FROM stdin/' , $line , $matches );
$heads = str_replace ( '"' , " ` " , $matches [ 1 ]);
$values = array ();
2020-02-11 14:44:53 +01:00
$in_insert = true ;
2020-02-11 11:48:10 +01:00
} elseif ( $in_insert ) {
if ( $line == " \\ . \n " ) {
2020-02-11 14:44:53 +01:00
$in_insert = false ;
2020-02-11 11:48:10 +01:00
if ( $values ) {
$output .= " INSERT INTO $heads VALUES \n " . implode ( " , \n " , $values ) . " ; \n \n " ;
}
2020-02-10 20:35:18 +01:00
} else {
$vals = explode ( ' ' , $line );
2020-02-11 11:48:10 +01:00
foreach ( $vals as $i => $val ) {
$vals [ $i ] = ( $val == '\\N' ) ? 'NULL' : " ' " . str_replace ( " ' " , " \\ ' " , trim ( $val )) . " ' " ;
2020-02-10 20:35:18 +01:00
}
$values [] = '(' . implode ( ',' , $vals ) . ')' ;
2020-02-11 11:48:10 +01:00
if ( count ( $values ) >= 1000 ) {
2020-02-10 20:35:18 +01:00
$output .= " INSERT INTO $heads VALUES \n " . implode ( " , \n " , $values ) . " ; \n " ;
$values = array ();
}
}
}
2023-12-04 10:22:29 +01:00
$linenumber ++ ;
2020-02-10 20:35:18 +01:00
}
2020-02-11 11:48:10 +01:00
return array ( 'output' => $output , 'outputatend' => $outputatend );
2020-02-10 20:35:18 +01:00
}