|
对于经常做数据抓取的技术员应该知道,优化好的爬虫程序能事半功倍,对于工作量增加或者调整业务类型也是方便快捷,下面几种PHP语言下使用爬虫IP的代码给大家提供参考:
PHP curl
/**
* 请求
*
* @param [type] $targetUrl 目标站点
* @param [type] $proxyIp 爬虫IP
* @param [type] $proxyPort 代理端口
* @param [type] $proxyUser authKey(key)
* @param [type] $proxyPassword authpwd(密码)
* @return void
*/
function sendRequest($targetUrl, $proxyIp, $proxyPort, $proxyUser, $proxyPassword){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $targetUrl);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXYPORT, $proxyPort);
curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP');
curl_setopt($ch, CURLOPT_PROXY, $proxyIp);
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyUser . ':' . $proxyPassword);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$data = sendRequest('http://jshk.com.cn/ip', '219.151.125.106', 31615, '895314XY', '24D6YB309ZCB');
var_dump($data);PHP stream
/**
* 请求
*
* @param [type] $targetUrl 目标站点
* @param [type] $proxyIp 爬虫IP
* @param [type] $proxyPort 代理端口
* @param [type] $proxyUser authKey(key)
* @param [type] $proxyPassword authpwd(密码)
* @return void
*/
function sendRequest($targetUrl, $proxyIp, $proxyPort, $proxyUser, $proxyPassword){
$proxyAuth = base64_encode($proxyUser . ":" . $proxyPassword);
$headers = implode("\r\n", [
"Proxy-Authorization: Basic {$proxyAuth}"
]);
// 目标站为https时需要
// $sniServer = parse_url($targetUrl, PHP_URL_HOST);
$options = [
"http" => [
"proxy" => $proxyIp . ':' . $proxyPort,
"header" => $headers,
"method" => "GET",
"request_fulluri" => true,
],
// 目标站为https时需要
// 'ssl' => array(
// 'SNI_enabled' => true,
// 'SNI_server_name' => $sniServer
// )
];
$context = stream_context_create($options);
$result = file_get_contents($targetUrl, false, $context);
return $result;
}
$data = sendRequest('http://jshk.com.cn/ip', '219.151.125.106', 31615, '895314XY', '24D6YB309ZCB');
var_dump($data);PHP GuzzleHttp
/**
* 请求
*
* @param [type] $targetUrl 目标站点
* @param [type] $proxyIp 爬虫IP
* @param [type] $proxyPort 代理端口
* @param [type] $proxyUser authKey(key)
* @param [type] $proxyPassword authpwd(密码)
* @return void
*/
function sendRequest($targetUrl, $proxyIp, $proxyPort, $proxyUser, $proxyPassword){
$client = new \GuzzleHttp\Client();
$proxyAuth = base64_encode($proxyUser . ":" . $proxyPassword);
$options = [
"proxy" => $proxyIp . ':' . $proxyPort,
"headers" => [
"Proxy-Authorization" => "Basic " . $proxyAuth
]
];
$result = $client->request('GET', $targetUrl, $options);
return $result->getBody()->getContents();
}
$data = sendRequest('http://jshk.com.cn/ip', '219.151.125.106', 31615, '895314XY', '24D6YB309ZCB');
var_dump($data); |
|