自动解析网页中图片链接并下载到本地
做CMS的时候,需要采集别人的文章但是里面的图片好多都是动态的,所以需要下载到本地比较好,下面是基本代码。
using System;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
namespace zhang.Common
{
public class HanlerFiles
{
private string[] GetImgTag(string htmlStr)
{
Regex regObj = new Regex("
string[] strAry = new string[regObj.Matches(htmlStr).Count];
int i = 0;
foreach (Match matchItem in regObj.Matches(htmlStr))
{
strAry[i] = GetImgUrl(matchItem.Value);
i++;
}
return strAry;
}
private string GetImgUrl(string imgTagStr)
{
string str = "";
Regex regObj = new Regex("http://.+.(?:jpg gif bmp png)", RegexOptions.Compiled RegexOptions.IgnoreCase);
foreach (Match matchItem in regObj.Matches(imgTagStr))
{
str = matchItem.Value;
}
return str;
}
/**////
……