header = $this->agent[rand(0,count($this->agent) - 1)]; $this->referer = empty($referer)?'http://weixin.sogou.com/' : $referer; $this->host = empty($host)?'weixin.sogou.com' : $host; /** @var 处理微信图片的防盗链 */ $this->antiLeech = '/Public/Home/GetWeChatImg.php?url='; $this->antiLeech = 'http://'.$_SERVER['SERVER_NAME'].'/Public/Home/GetWeChatImg.php?url='; } /** * 爬取内容 * @author bignerd * @since 2016-08-16T10:13:58+0800 * @param $url */ public function _get($url) { // $ch=curl_init($url); // $options = [ // CURLOPT_USERAGENT => $this->agent, // CURLOPT_REFERER => $this->referer, // ]; // curl_setopt($ch,CURLOPT_RETURNTRANSFER,true); // curl_setopt($ch,CURLOPT_BINARYTRANSFER,true); // curl_setopt($ch,CURLOPT_TIMEOUT,60); // $output=curl_exec($ch); // return $output; $html = file_get_contents($url); return $html; } public function crawByUrl($url) { $content = $this->_get($url); $basicInfo = $this->articleBasicInfo($content); list($content_html, $content_text) = $this->contentHandle($content); $result= array_merge($basicInfo,array('content_html' => $content_html,'content_text' => $content_text)); $result[cover]=$result[cover]?$this->antiLeech.$result[cover]:""; return $result; } /** * 处理微信文章源码,提取文章主体,处理图片链接 * @author bignerd * @since 2016-08-16T15:59:27+0800 * @param $content 抓取的微信文章源码 * @return [带图html文本,无图html文本] */ public function contentHandle($content) { $content_html_pattern = '/