php二次开发:下载当前网页合适图片
<meta http-equiv="content-type" content="text/html; charset=utf-8"><?php
/**
* 下载远程文件类支持断点续传
*/
class HttpDownload {
private $m_url = "";
private $m_urlpath = "";
private $m_scheme = "http";
private $m_host = "";
private $m_port = "80";
private $m_user = "";
private $m_pass = "";
private $m_path = "/";
private $m_query = "";
private $m_fp = "";
private $m_error = "";
private $m_httphead = "" ;
private $m_html = "";
/**
* 初始化
*/
public function PrivateInit($url){
$urls = "";
$urls = @parse_url($url);
$this->m_url = $url;
if(is_array($urls)) {
$this->m_host = $urls["host"];
if(!empty($urls["scheme"])) $this->m_scheme = $urls["scheme"];
if(!empty($urls["user"])) $this->m_user = $urls["user"];
if(!empty($urls["pass"])) $this->m_pass = $urls["pass"];
if(!empty($urls["port"])) $this->m_port = $urls["port"];
if(!empty($urls["path"])) $this->m_path = $urls["path"];
$this->m_urlpath = $this->m_path;
if(!empty($urls["query"])) {
$this->m_query = $urls["query"];
$this->m_urlpath .= "?".$this->m_query;
}
}
}
/**
* 打开指定网址
*/
function OpenUrl($url) {
#重设各参数
$this->m_url = "";
$this->m_urlpath = "";
$this->m_scheme = "http";
$this->m_host = "";
$this->m_port = "80";
$this->m_user = "";
$this->m_pass = "";
$this->m_path = "/";
$this->m_query = "";
$this->m_error = "";
$this->m_httphead = "" ;
$this->m_html = "";
$this->Close();
#初始化系统
$this->PrivateInit($url);
$this->PrivateStartSession();
}
/**
* 获得某操作错误的原因
*/
public function printError() {
echo "错误信息:".$this->m_error;
echo "具体返回头:<br>";
foreach($this->m_httphead as $k=>$v) {
echo "$k => $v <br>\r\n";
}
}
/**
* 判别用Get方法发送的头的应答结果是否正确
*/
public function IsGetOK() {
if( ereg("^2",$this->GetHead("http-state")) ) {
return true;
} else {
$this->m_error .= $this->GetHead("http-state")." - ".$this->GetHead("http-describe")."<br>";
return false;
}
}
/**
* 看看返回的网页是否是text类型
*/
public function IsText() {
if (ereg("^2",$this->GetHead("http-state")) && eregi("^text",$this->GetHead("content-type"))) {
return true;
} else {
$this->m_error .= "内容为非文本类型<br>";
return false;
}
}
/**
* 判断返回的网页是否是特定的类型
*/
public function IsContentType($ctype) {
if (ereg("^2",$this->GetHead("http-state")) && $this->GetHead("content-type") == strtolower($ctype)) {
return true;
} else {
$this->m_error .= "类型不对 ".$this->GetHead("content-type")."<br>";
return false;
}
}
/**
* 用 HTTP 协议下载文件
*/
public function SaveToBin($savefilename) {
if (!$this->IsGetOK()) return false;
if (@feof($this->m_fp)) {
$this->m_error = "连接已经关闭!";
return false;
}
$fp = fopen("img/".$savefilename,"w") or die("写入文件 $savefilename 失败!");
while (!feof($this->m_fp)) {
@fwrite($fp,fgets($this->m_fp,256));
}
@fclose($this->m_fp);
return true;
}
/**
* 保存网页内容为 Text 文件
*/
public function SaveToText($savefilename) {
if ($this->IsText()) {
$this->SaveBinFile($savefilename);
} else {
return "";
}
}
/**
* 用 HTTP 协议获得一个网页的内容
*/
public function GetHtml() {
if (!$this->IsText()) return "";
if ($this->m_html!="") return $this->m_html;
if (!$this->m_fp||@feof($this->m_fp)) return "";
while(!feof($this->m_fp)) {
$this->m_html .= fgets($this->m_fp,256);
}
@fclose($this->m_fp);
return $this->m_html;
}
/**
* 开始 HTTP 会话
*/
public function PrivateStartSession() {
if (!$this->PrivateOpenHost()) {
$this->m_error .= "打开远程主机出错!";
return false;
}
if ($this->GetHead("http-edition")=="HTTP/1.1") {
$httpv = "HTTP/1.1";
} else {
$httpv = "HTTP/1.0";
}
fputs($this->m_fp,"GET ".$this->m_urlpath." $httpv\r\n");
fputs($this->m_fp,"Host: ".$this->m_host."\r\n");
fputs($this->m_fp,"Accept: */*\r\n");
fputs($this->m_fp,"User-Agent: Mozilla/4.0+(compatible;+MSIE+10.0;+Windows+NT+5.2)\r\n");
#HTTP1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束
if ($httpv=="HTTP/1.1") {
fputs($this->m_fp,"Connection: Close\r\n\r\n");
} else {
fputs($this->m_fp,"\r\n");
}
$httpstas = fgets($this->m_fp,256);
$httpstas = split(" ",$httpstas);
$this->m_httphead["http-edition"] = trim($httpstas);
$this->m_httphead["http-state"] = trim($httpstas);
$this->m_httphead["http-describe"] = "";
for ($i=2;$i<count($httpstas);$i++) {
$this->m_httphead["http-describe"] .= " ".trim($httpstas[$i]);
}
while (!feof($this->m_fp)) {
$line = str_replace("\"","",trim(fgets($this->m_fp,256)));
if($line == "") break;
if (ereg(":",$line)) {
$lines = split(":",$line);
$this->m_httphead))] = trim($lines);
}
}
}
/**
* 获得一个Http头的值
*/
public function GetHead($headname) {
$headname = strtolower($headname);
if (isset($this->m_httphead[$headname])) {
return $this->m_httphead[$headname];
} else {
return "";
}
}
/**
* 打开连接
*/
public function PrivateOpenHost() {
if ($this->m_host=="") return false;
$this->m_fp = @fsockopen($this->m_host, $this->m_port, &$errno, &$errstr,10);
if (!$this->m_fp){
$this->m_error = $errstr;
return false;
} else {
return true;
}
}
/**
* 关闭连接
*/
public function Close(){
@fclose($this->m_fp);
}
}
function myGetImageSize($url, $type = 'curl', $isGetFilesize = false)
{
// 若需要获取图片体积大小则默认使用 fread 方式
$type = $isGetFilesize ? 'fread' : $type;
if ($type == 'fread') {
// 或者使用 socket 二进制方式读取, 需要获取图片体积大小最好使用此方法
$handle = fopen($url, 'rb');
if (! $handle) return false;
// 只取头部固定长度168字节数据
$dataBlock = fread($handle, 512000);
}
else {
// 据说 CURL 能缓存DNS 效率比 socket 高
$ch = curl_init($url);
// 超时设置
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
// 取前面 168 个字符 通过四张测试图读取宽高结果都没有问题,若获取不到数据可适当加大数值
curl_setopt($ch, CURLOPT_RANGE, '0-512000');
// 跟踪301跳转
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
// 返回结果
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$dataBlock = curl_exec($ch);
curl_close($ch);
if (! $dataBlock) return false;
}
// 将读取的图片信息转化为图片路径并获取图片信息,经测试,这里的转化设置 jpeg 对获取png,gif的信息没有影响,无须分别设置
// 有些图片虽然可以在浏览器查看但实际已被损坏可能无法解析信息
$size = getimagesize('data://image/jpeg;base64,'. base64_encode($dataBlock));
if (empty($size)) {
return false;
}
$result['width'] = $size;
$result['height'] = $size;
// 是否获取图片体积大小
if ($isGetFilesize) {
// 获取文件数据流信息
$meta = stream_get_meta_data($handle);
// nginx 的信息保存在 headers 里,apache 则直接在 wrapper_data
$dataInfo = isset($meta['wrapper_data']['headers']) ? $meta['wrapper_data']['headers'] : $meta['wrapper_data'];
foreach ($dataInfo as $va) {
if ( preg_match('/length/iU', $va)) {
$ts = explode(':', $va);
$result['size'] = trim(array_pop($ts));
break;
}
}
}
if ($type == 'fread') fclose($handle);
return $result;
}
set_time_limit(0);//抓取不受时间限制
#下载文件
$file = new HttpDownload(); # 实例化类
$wenjian=trim($_POST["wenjian"]);
if($wenjian!=""){
$urls = preg_split('/\r\n/',$wenjian);
//print_r($names);
foreach($urls as $url){
$iu="";
if(strstr($url, 'http://')==false){
$iu="http://".$url;
}else{
$iu=$url;
}
$file->OpenUrl($iu);
$content=$file->GetHtml();
preg_match_all("/<img.*?src=[\\\'| \\\"](.*?(?:[\.gif|\.jpg|\.png|\.bmp|\.jpeg]))[\\\'|\\\"].*?[\/]?>/", strtolower($content), $tupian);
$srcs = array_unique($tupian);
for($i=0;$i<1000;$i++){
//echo($srcs[$i]);
$img=trim($srcs[$i]);
if($img!=''){
if(strstr($img, 'http://')==false){
$u=parse_url($iu);
$imim="http://".$u['host'].$img;
}else{
$imim=$img;
}
$result = myGetImageSize($imim);
$ww=ceil($result['width']);
$hh=ceil($result['height']);
if($ww>=300 && $hh>=300){
echo $ww."-".$hh.";";
$file->OpenUrl($imim); # 远程文件地址
$wjm=basename($imim);
$file->SaveToBin($wjm); # 保存路径及文件名
}
}
}
}
echo "完毕";
}
$file->Close(); # 释放资源
?>
<body>
<form action="index.php" method="post">
<p><textarea name="wenjian" cols="20" rows="6" value="" style="width:500px;"></textarea></p>
<p><input type="submit" value="提交" /></p>
</form>
</body>
这个还不够,正则只能取img标签的src链接,微信图文上就抓不到了
能做成工具吗?
页:
[1]