dolibarr/htdocs/core/lib/geturl.lib.php

217 lines
8.8 KiB
PHP
Raw Normal View History

2013-06-03 17:44:55 +02:00
<?php
/* Copyright (C) 2008-2013 Laurent Destailleur <eldy@users.sourceforge.net>
2013-06-03 17:44:55 +02:00
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
2019-09-23 21:55:30 +02:00
* along with this program. If not, see <https://www.gnu.org/licenses/>.
* or see https://www.gnu.org/
2013-06-03 17:44:55 +02:00
*/
/**
2013-09-18 23:39:53 +02:00
* \file htdocs/core/lib/geturl.lib.php
* \brief This file contains functions dedicated to get URL.
2013-06-03 17:44:55 +02:00
*/
/**
2019-04-22 12:44:25 +02:00
* Function to get a content from an URL (use proxy if proxy defined)
2013-06-03 17:44:55 +02:00
*
* @param string $url URL to call.
2019-04-22 12:44:25 +02:00
* @param string $postorget 'POST', 'GET', 'HEAD', 'PUT', 'PUTALREADYFORMATED', 'POSTALREADYFORMATED', 'DELETE'
* @param string $param Parameters of URL (x=value1&y=value2) or may be a formated content with PUTALREADYFORMATED
2016-04-09 15:07:55 +02:00
* @param integer $followlocation 1=Follow location, 0=Do not follow
* @param string[] $addheaders Array of string to add into header. Example: ('Accept: application/xrds+xml', ....)
* @return array Returns an associative array containing the response from the server array('content'=>response,'curl_error_no'=>errno,'curl_error_msg'=>errmsg...)
2013-06-03 17:44:55 +02:00
*/
function getURLContent($url, $postorget = 'GET', $param = '', $followlocation = 1, $addheaders = array())
2013-06-03 17:44:55 +02:00
{
//declaring of global variables
2019-04-22 12:42:46 +02:00
global $conf;
$USE_PROXY = empty($conf->global->MAIN_PROXY_USE) ? 0 : $conf->global->MAIN_PROXY_USE;
$PROXY_HOST = empty($conf->global->MAIN_PROXY_HOST) ? 0 : $conf->global->MAIN_PROXY_HOST;
$PROXY_PORT = empty($conf->global->MAIN_PROXY_PORT) ? 0 : $conf->global->MAIN_PROXY_PORT;
$PROXY_USER = empty($conf->global->MAIN_PROXY_USER) ? 0 : $conf->global->MAIN_PROXY_USER;
$PROXY_PASS = empty($conf->global->MAIN_PROXY_PASS) ? 0 : $conf->global->MAIN_PROXY_PASS;
2013-06-03 17:44:55 +02:00
dol_syslog("getURLContent postorget=".$postorget." URL=".$url." param=".$param);
2013-06-03 17:44:55 +02:00
//setting the curl parameters.
$ch = curl_init();
/*print $API_Endpoint."-".$API_version."-".$PAYPAL_API_USER."-".$PAYPAL_API_PASSWORD."-".$PAYPAL_API_SIGNATURE."<br>";
print $USE_PROXY."-".$gv_ApiErrorURL."<br>";
print $nvpStr;
exit;*/
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
2013-09-18 21:22:24 +02:00
curl_setopt($ch, CURLOPT_USERAGENT, 'Dolibarr geturl function');
@curl_setopt($ch, CURLOPT_FOLLOWLOCATION, ($followlocation ?true:false)); // We use @ here because this may return warning if safe mode is on or open_basedir is on
2013-09-18 21:22:24 +02:00
if (count($addheaders)) curl_setopt($ch, CURLOPT_HTTPHEADER, $addheaders);
curl_setopt($ch, CURLINFO_HEADER_OUT, true); // To be able to retrieve request header and log it
2013-06-03 17:44:55 +02:00
2017-05-08 12:08:43 +02:00
// By default use tls decied by PHP.
// You can force, if supported a version like TLSv1 or TLSv1.2
if (!empty($conf->global->MAIN_CURL_SSLVERSION)) curl_setopt($ch, CURLOPT_SSLVERSION, $conf->global->MAIN_CURL_SSLVERSION);
2017-05-08 12:08:43 +02:00
//curl_setopt($ch, CURLOPT_SSLVERSION, 6); for tls 1.2
2013-06-03 17:44:55 +02:00
//turning off the server and peer verification(TrustManager Concept).
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
2013-06-03 17:44:55 +02:00
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, empty($conf->global->MAIN_USE_CONNECT_TIMEOUT) ? 5 : $conf->global->MAIN_USE_CONNECT_TIMEOUT);
curl_setopt($ch, CURLOPT_TIMEOUT, empty($conf->global->MAIN_USE_RESPONSE_TIMEOUT) ? 30 : $conf->global->MAIN_USE_RESPONSE_TIMEOUT);
//curl_setopt($ch, CURLOPT_SAFE_UPLOAD, true); // PHP 5.5
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // We want response
2013-09-18 21:22:24 +02:00
if ($postorget == 'POST')
{
curl_setopt($ch, CURLOPT_POST, 1); // POST
curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // Setting param x=a&y=z as POST fields
2020-05-21 15:05:19 +02:00
} elseif ($postorget == 'POSTALREADYFORMATED')
2019-02-28 11:00:40 +01:00
{
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST'); // HTTP request is 'POST' but param string is taken as it is
curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string
2020-05-21 15:05:19 +02:00
} elseif ($postorget == 'PUT')
{
$array_param = null;
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT'
if (!is_array($param)) parse_str($param, $array_param);
2020-05-21 01:41:27 +02:00
else {
dol_syslog("parameter param must be a string", LOG_WARNING);
$array_param = $param;
}
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($array_param)); // Setting param x=a&y=z as PUT fields
2020-05-21 15:05:19 +02:00
} elseif ($postorget == 'PUTALREADYFORMATED')
{
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT'
curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string
2020-05-21 15:05:19 +02:00
} elseif ($postorget == 'HEAD')
2013-09-18 21:22:24 +02:00
{
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'HEAD'); // HTTP request is 'HEAD'
curl_setopt($ch, CURLOPT_NOBODY, true);
2020-05-21 15:05:19 +02:00
} elseif ($postorget == 'DELETE')
2015-06-07 00:06:11 +02:00
{
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'DELETE'); // POST
2020-05-21 15:05:19 +02:00
} else {
curl_setopt($ch, CURLOPT_POST, 0); // GET
2013-09-18 21:22:24 +02:00
}
2013-06-03 17:44:55 +02:00
//if USE_PROXY constant set at begin of this method.
2013-06-03 17:44:55 +02:00
if ($USE_PROXY)
{
dol_syslog("getURLContent set proxy to ".$PROXY_HOST.":".$PROXY_PORT." - ".$PROXY_USER.":".$PROXY_PASS);
2013-06-03 17:44:55 +02:00
//curl_setopt ($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); // Curl 7.10
curl_setopt($ch, CURLOPT_PROXY, $PROXY_HOST.":".$PROXY_PORT);
if ($PROXY_USER) curl_setopt($ch, CURLOPT_PROXYUSERPWD, $PROXY_USER.":".$PROXY_PASS);
2013-06-03 17:44:55 +02:00
}
//getting response from server
$response = curl_exec($ch);
$request = curl_getinfo($ch, CURLINFO_HEADER_OUT); // Reading of request must be done after sending request
dol_syslog("getURLContent request=".$request);
//dol_syslog("getURLContent response =".response); // This may contains binary data, so we dont output it
dol_syslog("getURLContent response size=".strlen($response)); // This may contains binary data, so we dont output it
2013-09-18 21:22:24 +02:00
$rep = array();
2013-06-03 17:44:55 +02:00
if (curl_errno($ch))
{
2016-10-10 00:53:15 +02:00
// Ad keys to $rep
$rep['content'] = $response;
2013-06-03 17:44:55 +02:00
// moving to display page to display curl errors
$rep['curl_error_no'] = curl_errno($ch);
$rep['curl_error_msg'] = curl_error($ch);
2013-06-03 17:44:55 +02:00
dol_syslog("getURLContent response array is ".join(',', $rep));
2020-05-21 15:05:19 +02:00
} else {
2013-07-09 13:50:20 +02:00
$info = curl_getinfo($ch);
2013-09-18 21:22:24 +02:00
2016-10-10 00:53:15 +02:00
// Ad keys to $rep
$rep = $info;
//$rep['header_size']=$info['header_size'];
//$rep['http_code']=$info['http_code'];
2016-10-10 00:53:15 +02:00
dol_syslog("getURLContent http_code=".$rep['http_code']);
2016-10-10 00:53:15 +02:00
// Add more keys to $rep
$rep['content'] = $response;
$rep['curl_error_no'] = '';
$rep['curl_error_msg'] = '';
2013-07-09 13:50:20 +02:00
//closing the curl
2013-06-03 17:44:55 +02:00
curl_close($ch);
}
return $rep;
}
/**
* Function get second level domain name.
* For example: https://www.abc.mydomain.com/dir/page.html return 'mydomain'
*
* @param string $url Full URL.
2019-06-19 22:05:38 +02:00
* @param int $mode 0=return 'mydomain', 1=return 'mydomain.com', 2=return 'abc.mydomain.com'
* @return string Returns domaine name
*/
2019-05-29 10:03:05 +02:00
function getDomainFromURL($url, $mode = 0)
{
$tmpdomain = preg_replace('/^https?:\/\//i', '', $url); // Remove http(s)://
$tmpdomain = preg_replace('/\/.*$/i', '', $tmpdomain); // Remove part after domain
2019-06-19 22:05:38 +02:00
if ($mode == 2)
{
$tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)$/', '\1.\2.\3', $tmpdomain); // Remove part 'www.' before 'abc.mydomain.com'
2020-05-21 15:05:19 +02:00
} else {
$tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)$/', '\1.\2', $tmpdomain); // Remove part 'www.abc.' before 'mydomain.com'
2019-06-19 22:05:38 +02:00
}
2019-05-28 22:18:22 +02:00
if (empty($mode))
{
$tmpdomain = preg_replace('/\.[^\.]+$/', '', $tmpdomain); // Remove first level domain (.com, .net, ...)
2019-05-28 22:18:22 +02:00
}
return $tmpdomain;
}
/**
* Function root url from a long url
* For example: https://www.abc.mydomain.com/dir/page.html return 'https://www.abc.mydomain.com'
* For example: http://www.abc.mydomain.com/ return 'https://www.abc.mydomain.com'
*
* @param string $url Full URL.
* @return string Returns root url
*/
function getRootURLFromURL($url)
{
$prefix = '';
$tmpurl = $url;
2019-04-22 12:42:46 +02:00
$reg = null;
if (preg_match('/^(https?:\/\/)/i', $tmpurl, $reg)) $prefix = $reg[1];
$tmpurl = preg_replace('/^https?:\/\//i', '', $tmpurl); // Remove http(s)://
$tmpurl = preg_replace('/\/.*$/i', '', $tmpurl); // Remove part after domain
return $prefix.$tmpurl;
}
/**
* Function to remove comments into HTML content
*
* @param string $content Text content
* @return string Returns text without HTML comments
*/
function removeHtmlComment($content)
{
$content = preg_replace('/<!--[^\-]+-->/', '', $content);
return $content;
}