吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 3440|回复: 1
收起左侧

[其他转载] php二次开发:下载当前网页合适图片

[复制链接]
zhan170 发表于 2015-2-14 20:54
[PHP] 纯文本查看 复制代码
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<?php
/**
 * 下载远程文件类支持断点续传 
 */
class HttpDownload {
	private $m_url = "";
 	private $m_urlpath = "";
 	private $m_scheme = "http";
 	private $m_host = "";
 	private $m_port = "80";
 	private $m_user = "";
 	private $m_pass = "";
 	private $m_path = "/";
 	private $m_query = "";
 	private $m_fp = "";
 	private $m_error = "";
	private $m_httphead = "" ;
	private $m_html = "";
 
	/**
	 * 初始化 
	 */
	public function PrivateInit($url){
		$urls = "";
		$urls = @parse_url($url);
		$this->m_url = $url;
		if(is_array($urls)) {
			$this->m_host = $urls["host"];
			if(!empty($urls["scheme"])) $this->m_scheme = $urls["scheme"];
			if(!empty($urls["user"])) $this->m_user = $urls["user"];
		    if(!empty($urls["pass"])) $this->m_pass = $urls["pass"];
		    if(!empty($urls["port"])) $this->m_port = $urls["port"];
		    if(!empty($urls["path"])) $this->m_path = $urls["path"];
		    $this->m_urlpath = $this->m_path;
			if(!empty($urls["query"])) {
     			$this->m_query = $urls["query"];
     			$this->m_urlpath .= "?".$this->m_query;
     		}
  		}
	}
 
	/**
	* 打开指定网址
	*/
	function OpenUrl($url) {
		#重设各参数
		$this->m_url = "";
		$this->m_urlpath = "";
		$this->m_scheme = "http";
		$this->m_host = "";
		$this->m_port = "80";
		$this->m_user = "";
		$this->m_pass = "";
		$this->m_path = "/";
		$this->m_query = "";
		$this->m_error = "";
		$this->m_httphead = "" ;
		$this->m_html = "";
		$this->Close();
		#初始化系统
		$this->PrivateInit($url);
		$this->PrivateStartSession();
	}

	/**
	* 获得某操作错误的原因
	*/
	public function printError() {
		echo "错误信息:".$this->m_error;
		echo "具体返回头:<br>";
		foreach($this->m_httphead as $k=>$v) { 
			echo "$k => $v <br>\r\n"; 
		}
	}
 
	/**
	* 判别用Get方法发送的头的应答结果是否正确
	*/
	public function IsGetOK() {
		if( ereg("^2",$this->GetHead("http-state")) ) { 
			return true; 
		} else {
			$this->m_error .= $this->GetHead("http-state")." - ".$this->GetHead("http-describe")."<br>";
			return false;
		}
	}
	
	/**
	* 看看返回的网页是否是text类型
	*/
	public function IsText() {
		if (ereg("^2",$this->GetHead("http-state")) && eregi("^text",$this->GetHead("content-type"))) { 
			return true; 
		} else {
			$this->m_error .= "内容为非文本类型<br>";
			return false;
		}
	}
	/**
	* 判断返回的网页是否是特定的类型
	*/
	public function IsContentType($ctype) {
		if (ereg("^2",$this->GetHead("http-state")) && $this->GetHead("content-type") == strtolower($ctype)) { 
			return true; 
		} else {
			$this->m_error .= "类型不对 ".$this->GetHead("content-type")."<br>";
			return false;
		}
	}
	
	/**
	* 用 HTTP 协议下载文件
	*/
	public function SaveToBin($savefilename) {
		if (!$this->IsGetOK()) return false;
		if (@feof($this->m_fp)) { 
			$this->m_error = "连接已经关闭!"; 
			return false; 
		}
		$fp = fopen("img/".$savefilename,"w") or die("写入文件 $savefilename 失败!");
		while (!feof($this->m_fp)) {
			@fwrite($fp,fgets($this->m_fp,256));
		}
		@fclose($this->m_fp);
		return true;
	}
	
	/**
	* 保存网页内容为 Text 文件
	*/
	public function SaveToText($savefilename) {
		if ($this->IsText()) {
			$this->SaveBinFile($savefilename);
		} else {
			return "";
		}
	}
	
	/**
	* 用 HTTP 协议获得一个网页的内容
	*/
	public function GetHtml() {
		if (!$this->IsText()) return "";
		if ($this->m_html!="") return $this->m_html;
		if (!$this->m_fp||@feof($this->m_fp)) return "";
		while(!feof($this->m_fp)) {
			$this->m_html .= fgets($this->m_fp,256);
		}
		@fclose($this->m_fp);
		return $this->m_html;
	}
	
	/**
	* 开始 HTTP 会话
	*/
	public function PrivateStartSession() {
		if (!$this->PrivateOpenHost()) {
			$this->m_error .= "打开远程主机出错!";
			return false;
		}
		if ($this->GetHead("http-edition")=="HTTP/1.1") {
			$httpv = "HTTP/1.1";
		} else {
			$httpv = "HTTP/1.0";
		}
		fputs($this->m_fp,"GET ".$this->m_urlpath." $httpv\r\n");
		fputs($this->m_fp,"Host: ".$this->m_host."\r\n");
		fputs($this->m_fp,"Accept: */*\r\n");
		fputs($this->m_fp,"User-Agent: Mozilla/4.0+(compatible;+MSIE+10.0;+Windows+NT+5.2)\r\n");
		#HTTP1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束
		if ($httpv=="HTTP/1.1") {
			fputs($this->m_fp,"Connection: Close\r\n\r\n");
		} else {
			fputs($this->m_fp,"\r\n");
		}
		$httpstas = fgets($this->m_fp,256);
		$httpstas = split(" ",$httpstas);
		$this->m_httphead["http-edition"] = trim($httpstas[0]);
		$this->m_httphead["http-state"] = trim($httpstas[1]);
		$this->m_httphead["http-describe"] = "";
		for ($i=2;$i<count($httpstas);$i++) {
			$this->m_httphead["http-describe"] .= " ".trim($httpstas[$i]);
		}
		while (!feof($this->m_fp)) {
			$line = str_replace("\"","",trim(fgets($this->m_fp,256)));
			if($line == "") break;
			if (ereg(":",$line)) {
				$lines = split(":",$line);
				$this->m_httphead[strtolower(trim($lines[0]))] = trim($lines[1]);
			}
		}
	}
	
	/**
	* 获得一个Http头的值
	*/
	public function GetHead($headname) {
		$headname = strtolower($headname);
		if (isset($this->m_httphead[$headname])) {
			return $this->m_httphead[$headname];
		} else {
			return "";
		}
	}
	
	/**
	* 打开连接
	*/
	public function PrivateOpenHost() {
		if ($this->m_host=="") return false;
		$this->m_fp = @fsockopen($this->m_host, $this->m_port, &$errno, &$errstr,10);
		if (!$this->m_fp){
			$this->m_error = $errstr;
			return false;
		} else {
			return true;
		}
	}
	
	/**
	* 关闭连接
	*/
	public function Close(){
		@fclose($this->m_fp);
	}
}

function myGetImageSize($url, $type = 'curl', $isGetFilesize = false)  
{ 
    // 若需要获取图片体积大小则默认使用 fread 方式 
    $type = $isGetFilesize ? 'fread' : $type; 
    
     if ($type == 'fread') { 
        // 或者使用 socket 二进制方式读取, 需要获取图片体积大小最好使用此方法 
        $handle = fopen($url, 'rb'); 
    
        if (! $handle) return false; 
    
        // 只取头部固定长度168字节数据 
        $dataBlock = fread($handle, 512000); 
    } 
    else { 
        // 据说 CURL 能缓存DNS 效率比 socket 高 
        $ch = curl_init($url); 
        // 超时设置 
        curl_setopt($ch, CURLOPT_TIMEOUT, 5); 
        // 取前面 168 个字符 通过四张测试图读取宽高结果都没有问题,若获取不到数据可适当加大数值 
        curl_setopt($ch, CURLOPT_RANGE, '0-512000'); 
        // 跟踪301跳转 
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); 
        // 返回结果 
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 
    
        $dataBlock = curl_exec($ch); 
    
        curl_close($ch); 
    
        if (! $dataBlock) return false; 
    } 
    
    // 将读取的图片信息转化为图片路径并获取图片信息,经测试,这里的转化设置 jpeg 对获取png,gif的信息没有影响,无须分别设置 
    // 有些图片虽然可以在浏览器查看但实际已被损坏可能无法解析信息  
    $size = getimagesize('data://image/jpeg;base64,'. base64_encode($dataBlock)); 
    if (empty($size)) { 
        return false; 
    } 
    
    $result['width'] = $size[0]; 
    $result['height'] = $size[1]; 
    
    // 是否获取图片体积大小 
    if ($isGetFilesize) { 
        // 获取文件数据流信息 
        $meta = stream_get_meta_data($handle); 
        // nginx 的信息保存在 headers 里,apache 则直接在 wrapper_data  
        $dataInfo = isset($meta['wrapper_data']['headers']) ? $meta['wrapper_data']['headers'] : $meta['wrapper_data']; 
    
        foreach ($dataInfo as $va) { 
            if ( preg_match('/length/iU', $va)) { 
                $ts = explode(':', $va); 
                $result['size'] = trim(array_pop($ts)); 
                break; 
            } 
        } 
    } 
    
    if ($type == 'fread') fclose($handle); 
    
    return $result; 
} 

set_time_limit(0);//抓取不受时间限制  
#下载文件
$file = new HttpDownload(); # 实例化类


$wenjian=trim($_POST["wenjian"]);
if($wenjian!=""){

	$urls = preg_split('/\r\n/',$wenjian);
	//print_r($names);
	foreach($urls as $url){
		$iu="";
		if(strstr($url, 'http://')==false){
			$iu="http://".$url;
		}else{
			$iu=$url;
		}
		$file->OpenUrl($iu);
		$content=$file->GetHtml();

		preg_match_all("/<img.*?src=[\\\'| \\\"](.*?(?:[\.gif|\.jpg|\.png|\.bmp|\.jpeg]))[\\\'|\\\"].*?[\/]?>/", strtolower($content), $tupian);
		$srcs = array_unique($tupian[1]);
		
		for($i=0;$i<1000;$i++){
			//echo($srcs[$i]);		
			
			$img=trim($srcs[$i]);
			if($img!=''){
				if(strstr($img, 'http://')==false){
					$u=parse_url($iu);
					$imim="http://".$u['host'].$img;
				}else{
					$imim=$img;
				}
				$result = myGetImageSize($imim);
				$ww=ceil($result['width']);
				$hh=ceil($result['height']);
				if($ww>=300 && $hh>=300){
					echo $ww."-".$hh.";";				
					$file->OpenUrl($imim); # 远程文件地址
					$wjm=basename($imim);
					$file->SaveToBin($wjm); # 保存路径及文件名
				}
			}	
		}
		
	}
	echo "完毕";
}
$file->Close(); # 释放资源
?>
<body>
<form action="index.php" method="post">
  <p><textarea name="wenjian" cols="20" rows="6" value="" style="width:500px;"></textarea></p>
  <p><input type="submit" value="提交" /></p>
</form>
</body>

这个还不够,正则只能取img标签的src链接,微信图文上就抓不到了

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

l2430478 发表于 2015-2-24 19:27
能做成工具吗?
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2024-11-15 03:52

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表