2013-06-03 17:44:55 +02:00
< ? php
2013-06-07 20:09:15 +02:00
/* Copyright ( C ) 2008 - 2013 Laurent Destailleur < eldy @ users . sourceforge . net >
2013-06-03 17:44:55 +02:00
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 3 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
2019-09-23 21:55:30 +02:00
* along with this program . If not , see < https :// www . gnu . org / licenses />.
* or see https :// www . gnu . org /
2013-06-03 17:44:55 +02:00
*/
/**
2013-09-18 23:39:53 +02:00
* \file htdocs / core / lib / geturl . lib . php
* \brief This file contains functions dedicated to get URL .
2013-06-03 17:44:55 +02:00
*/
/**
2019-04-22 12:44:25 +02:00
* Function to get a content from an URL ( use proxy if proxy defined )
2013-06-03 17:44:55 +02:00
*
2015-08-29 15:06:42 +02:00
* @ param string $url URL to call .
2019-04-22 12:44:25 +02:00
* @ param string $postorget 'POST' , 'GET' , 'HEAD' , 'PUT' , 'PUTALREADYFORMATED' , 'POSTALREADYFORMATED' , 'DELETE'
2015-08-29 15:06:42 +02:00
* @ param string $param Parameters of URL ( x = value1 & y = value2 ) or may be a formated content with PUTALREADYFORMATED
2016-04-09 15:07:55 +02:00
* @ param integer $followlocation 1 = Follow location , 0 = Do not follow
* @ param string [] $addheaders Array of string to add into header . Example : ( 'Accept: application/xrds+xml' , .... )
2015-08-29 15:06:42 +02:00
* @ return array Returns an associative array containing the response from the server array ( 'content' => response , 'curl_error_no' => errno , 'curl_error_msg' => errmsg ... )
2013-06-03 17:44:55 +02:00
*/
2019-01-27 15:20:16 +01:00
function getURLContent ( $url , $postorget = 'GET' , $param = '' , $followlocation = 1 , $addheaders = array ())
2013-06-03 17:44:55 +02:00
{
//declaring of global variables
2019-04-22 12:42:46 +02:00
global $conf ;
2013-06-07 20:09:15 +02:00
$USE_PROXY = empty ( $conf -> global -> MAIN_PROXY_USE ) ? 0 : $conf -> global -> MAIN_PROXY_USE ;
$PROXY_HOST = empty ( $conf -> global -> MAIN_PROXY_HOST ) ? 0 : $conf -> global -> MAIN_PROXY_HOST ;
$PROXY_PORT = empty ( $conf -> global -> MAIN_PROXY_PORT ) ? 0 : $conf -> global -> MAIN_PROXY_PORT ;
$PROXY_USER = empty ( $conf -> global -> MAIN_PROXY_USER ) ? 0 : $conf -> global -> MAIN_PROXY_USER ;
$PROXY_PASS = empty ( $conf -> global -> MAIN_PROXY_PASS ) ? 0 : $conf -> global -> MAIN_PROXY_PASS ;
2013-06-03 17:44:55 +02:00
2013-06-07 20:09:15 +02:00
dol_syslog ( " getURLContent postorget= " . $postorget . " URL= " . $url . " param= " . $param );
2013-06-03 17:44:55 +02:00
//setting the curl parameters.
$ch = curl_init ();
/* print $API_Endpoint . " - " . $API_version . " - " . $PAYPAL_API_USER . " - " . $PAYPAL_API_PASSWORD . " - " . $PAYPAL_API_SIGNATURE . " <br> " ;
print $USE_PROXY . " - " . $gv_ApiErrorURL . " <br> " ;
print $nvpStr ;
exit ; */
curl_setopt ( $ch , CURLOPT_URL , $url );
curl_setopt ( $ch , CURLOPT_VERBOSE , 1 );
2013-09-18 21:22:24 +02:00
curl_setopt ( $ch , CURLOPT_USERAGENT , 'Dolibarr geturl function' );
2016-02-09 15:44:34 +01:00
@ curl_setopt ( $ch , CURLOPT_FOLLOWLOCATION , ( $followlocation ? true : false )); // We use @ here because this may return warning if safe mode is on or open_basedir is on
2017-10-01 18:27:54 +02:00
2013-09-18 21:22:24 +02:00
if ( count ( $addheaders )) curl_setopt ( $ch , CURLOPT_HTTPHEADER , $addheaders );
curl_setopt ( $ch , CURLINFO_HEADER_OUT , true ); // To be able to retrieve request header and log it
2013-06-03 17:44:55 +02:00
2017-05-08 12:08:43 +02:00
// By default use tls decied by PHP.
// You can force, if supported a version like TLSv1 or TLSv1.2
if ( ! empty ( $conf -> global -> MAIN_CURL_SSLVERSION )) curl_setopt ( $ch , CURLOPT_SSLVERSION , $conf -> global -> MAIN_CURL_SSLVERSION );
//curl_setopt($ch, CURLOPT_SSLVERSION, 6); for tls 1.2
2017-10-01 18:27:54 +02:00
2013-06-03 17:44:55 +02:00
//turning off the server and peer verification(TrustManager Concept).
2018-04-16 13:51:16 +02:00
curl_setopt ( $ch , CURLOPT_SSL_VERIFYPEER , false );
curl_setopt ( $ch , CURLOPT_SSL_VERIFYHOST , false );
2013-06-03 17:44:55 +02:00
2013-06-12 11:59:55 +02:00
curl_setopt ( $ch , CURLOPT_CONNECTTIMEOUT , empty ( $conf -> global -> MAIN_USE_CONNECT_TIMEOUT ) ? 5 : $conf -> global -> MAIN_USE_CONNECT_TIMEOUT );
curl_setopt ( $ch , CURLOPT_TIMEOUT , empty ( $conf -> global -> MAIN_USE_RESPONSE_TIMEOUT ) ? 30 : $conf -> global -> MAIN_USE_RESPONSE_TIMEOUT );
2013-07-07 13:14:32 +02:00
2017-05-20 17:09:15 +02:00
//curl_setopt($ch, CURLOPT_SAFE_UPLOAD, true); // PHP 5.5
2019-01-27 11:55:16 +01:00
curl_setopt ( $ch , CURLOPT_RETURNTRANSFER , 1 ); // We want response
2013-09-18 21:22:24 +02:00
if ( $postorget == 'POST' )
{
curl_setopt ( $ch , CURLOPT_POST , 1 ); // POST
curl_setopt ( $ch , CURLOPT_POSTFIELDS , $param ); // Setting param x=a&y=z as POST fields
}
2019-02-28 11:00:40 +01:00
elseif ( $postorget == 'POSTALREADYFORMATED' )
{
curl_setopt ( $ch , CURLOPT_CUSTOMREQUEST , 'POST' ); // HTTP request is 'POST' but param string is taken as it is
curl_setopt ( $ch , CURLOPT_POSTFIELDS , $param ); // param = content of post, like a xml string
}
2019-01-27 10:49:34 +01:00
elseif ( $postorget == 'PUT' )
2014-10-11 04:25:32 +02:00
{
2019-04-22 12:42:46 +02:00
$array_param = null ;
2014-10-11 04:25:32 +02:00
curl_setopt ( $ch , CURLOPT_CUSTOMREQUEST , 'PUT' ); // HTTP request is 'PUT'
2015-08-29 15:06:42 +02:00
if ( ! is_array ( $param )) parse_str ( $param , $array_param );
2017-10-01 18:27:54 +02:00
else
2015-08-29 15:06:42 +02:00
{
dol_syslog ( " parameter param must be a string " , LOG_WARNING );
$array_param = $param ;
}
2017-10-01 18:27:54 +02:00
curl_setopt ( $ch , CURLOPT_POSTFIELDS , http_build_query ( $array_param )); // Setting param x=a&y=z as PUT fields
2014-10-11 04:25:32 +02:00
}
2019-01-27 10:49:34 +01:00
elseif ( $postorget == 'PUTALREADYFORMATED' )
2015-08-29 15:06:42 +02:00
{
curl_setopt ( $ch , CURLOPT_CUSTOMREQUEST , 'PUT' ); // HTTP request is 'PUT'
curl_setopt ( $ch , CURLOPT_POSTFIELDS , $param ); // param = content of post, like a xml string
2014-10-11 04:25:32 +02:00
}
2019-01-27 10:49:34 +01:00
elseif ( $postorget == 'HEAD' )
2013-09-18 21:22:24 +02:00
{
curl_setopt ( $ch , CURLOPT_CUSTOMREQUEST , 'HEAD' ); // HTTP request is 'HEAD'
curl_setopt ( $ch , CURLOPT_NOBODY , true );
}
2019-01-27 10:49:34 +01:00
elseif ( $postorget == 'DELETE' )
2015-06-07 00:06:11 +02:00
{
curl_setopt ( $ch , CURLOPT_CUSTOMREQUEST , 'DELETE' ); // POST
}
2013-09-18 21:22:24 +02:00
else
{
curl_setopt ( $ch , CURLOPT_POST , 0 ); // GET
}
2013-06-03 17:44:55 +02:00
2015-11-06 01:33:49 +01:00
//if USE_PROXY constant set at begin of this method.
2013-06-03 17:44:55 +02:00
if ( $USE_PROXY )
{
dol_syslog ( " getURLContent set proxy to " . $PROXY_HOST . " : " . $PROXY_PORT . " - " . $PROXY_USER . " : " . $PROXY_PASS );
//curl_setopt ($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); // Curl 7.10
curl_setopt ( $ch , CURLOPT_PROXY , $PROXY_HOST . " : " . $PROXY_PORT );
if ( $PROXY_USER ) curl_setopt ( $ch , CURLOPT_PROXYUSERPWD , $PROXY_USER . " : " . $PROXY_PASS );
}
//getting response from server
$response = curl_exec ( $ch );
2016-10-09 20:13:27 +02:00
$request = curl_getinfo ( $ch , CURLINFO_HEADER_OUT ); // Reading of request must be done after sending request
2017-10-01 18:27:54 +02:00
2016-10-09 20:13:27 +02:00
dol_syslog ( " getURLContent request= " . $request );
2017-12-10 17:59:19 +01:00
//dol_syslog("getURLContent response =".response); // This may contains binary data, so we dont output it
2017-12-10 19:03:18 +01:00
dol_syslog ( " getURLContent response size= " . strlen ( $response )); // This may contains binary data, so we dont output it
2013-09-18 21:22:24 +02:00
2013-06-03 17:44:55 +02:00
$rep = array ();
if ( curl_errno ( $ch ))
{
2016-10-10 00:53:15 +02:00
// Ad keys to $rep
2016-10-09 20:13:27 +02:00
$rep [ 'content' ] = $response ;
2017-10-01 18:27:54 +02:00
2013-06-03 17:44:55 +02:00
// moving to display page to display curl errors
$rep [ 'curl_error_no' ] = curl_errno ( $ch );
$rep [ 'curl_error_msg' ] = curl_error ( $ch );
2019-01-27 11:55:16 +01:00
dol_syslog ( " getURLContent response array is " . join ( ',' , $rep ));
2013-06-03 17:44:55 +02:00
}
else
{
2013-07-09 13:50:20 +02:00
$info = curl_getinfo ( $ch );
2013-09-18 21:22:24 +02:00
2016-10-10 00:53:15 +02:00
// Ad keys to $rep
2016-10-09 20:13:27 +02:00
$rep = $info ;
//$rep['header_size']=$info['header_size'];
//$rep['http_code']=$info['http_code'];
2016-10-10 00:53:15 +02:00
dol_syslog ( " getURLContent http_code= " . $rep [ 'http_code' ]);
2017-10-01 18:27:54 +02:00
2016-10-10 00:53:15 +02:00
// Add more keys to $rep
$rep [ 'content' ] = $response ;
2016-10-09 20:13:27 +02:00
$rep [ 'curl_error_no' ] = '' ;
$rep [ 'curl_error_msg' ] = '' ;
2017-10-01 18:27:54 +02:00
2013-07-09 13:50:20 +02:00
//closing the curl
2013-06-03 17:44:55 +02:00
curl_close ( $ch );
}
return $rep ;
}
2017-10-01 18:27:54 +02:00
/**
* Function get second level domain name .
* For example : https :// www . abc . mydomain . com / dir / page . html return 'mydomain'
*
* @ param string $url Full URL .
2019-06-19 22:05:38 +02:00
* @ param int $mode 0 = return 'mydomain' , 1 = return 'mydomain.com' , 2 = return 'abc.mydomain.com'
2017-10-01 18:27:54 +02:00
* @ return string Returns domaine name
*/
2019-05-29 10:03:05 +02:00
function getDomainFromURL ( $url , $mode = 0 )
2017-10-01 18:27:54 +02:00
{
$tmpdomain = preg_replace ( '/^https?:\/\//i' , '' , $url ); // Remove http(s)://
$tmpdomain = preg_replace ( '/\/.*$/i' , '' , $tmpdomain ); // Remove part after domain
2019-06-19 22:05:38 +02:00
if ( $mode == 2 )
{
$tmpdomain = preg_replace ( '/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)$/' , '\1.\2.\3' , $tmpdomain ); // Remove part 'www.' before 'abc.mydomain.com'
}
else
{
$tmpdomain = preg_replace ( '/^.*\.([^\.]+)\.([^\.]+)$/' , '\1.\2' , $tmpdomain ); // Remove part 'www.abc.' before 'mydomain.com'
}
2019-05-28 22:18:22 +02:00
if ( empty ( $mode ))
{
$tmpdomain = preg_replace ( '/\.[^\.]+$/' , '' , $tmpdomain ); // Remove first level domain (.com, .net, ...)
}
2017-12-10 17:59:19 +01:00
2017-10-01 18:27:54 +02:00
return $tmpdomain ;
}
2017-12-10 17:59:19 +01:00
/**
* Function root url from a long url
* For example : https :// www . abc . mydomain . com / dir / page . html return 'https://www.abc.mydomain.com'
* For example : http :// www . abc . mydomain . com / return 'https://www.abc.mydomain.com'
*
* @ param string $url Full URL .
* @ return string Returns root url
*/
function getRootURLFromURL ( $url )
{
$prefix = '' ;
$tmpurl = $url ;
2019-04-22 12:42:46 +02:00
$reg = null ;
2017-12-10 17:59:19 +01:00
if ( preg_match ( '/^(https?:\/\/)/i' , $tmpurl , $reg )) $prefix = $reg [ 1 ];
$tmpurl = preg_replace ( '/^https?:\/\//i' , '' , $tmpurl ); // Remove http(s)://
$tmpurl = preg_replace ( '/\/.*$/i' , '' , $tmpurl ); // Remove part after domain
return $prefix . $tmpurl ;
}
/**
* Function to remove comments into HTML content
*
* @ param string $content Text content
* @ return string Returns text without HTML comments
*/
function removeHtmlComment ( $content )
{
$content = preg_replace ( '/<!--[^\-]+-->/' , '' , $content );
return $content ;
}