[C#] 纯文本查看 复制代码 using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Text;
using HtmlAgilityPack;
using System.Net.Http;
using System.Threading.Tasks;
using System.Threading;
namespace Reptile
{
class Program
{
public static List<string> UserAgent = new List<string>();
public static string Url = @"https://www.biquge.info/wanjiexiaoshuo/";
public static string FilePath = string.Empty;
public static List<char> FileNoName = new List<char>(){ '#', '/', '\\', ':', '*', '?', '\"', '<', '>', '|' };
static async Task Main(string[] args)
{
userAgentAdd();
await run();
Console.Read();
}
/// <summary>
/// 启动
/// </summary>
/// <returns></returns>
public static async Task<dynamic> run()
{
FilePath = Directory.GetCurrentDirectory();
await ReadLists(@"https://www.xbiquwx.la/10_10240/");
//读取全书
//await ReadChapter(@"https://www.xbiquwx.la/10_10240/5018128.html");
Console.Read();
return null;
}
/// <summary>
/// 读取目录
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public async static Task<dynamic> ReadLists(string url)
{
var doc = new HtmlDocument();
var pageContent = await HttpGet(url);
doc.LoadHtml(pageContent);
//章节列表
var nodes = doc.DocumentNode.SelectNodes("//div[@class=\"box_con\"]/div[@id=\"list\"]/dl/dd");
//小说名
var currPath = doc.DocumentNode.SelectSingleNode("//div[@class=\"box_con\"]/div[@id=\"maininfo\"]/div[@id=\"info\"]/h1").InnerText;
//文件夹路径
var path = Path.Combine(FilePath,currPath) + "\\";
if (!Directory.Exists(path)) Directory.CreateDirectory(path);
foreach (var node in nodes)
{
var data = node.SelectSingleNode("a");
await ReadChapter(Path.Combine(url,data.Attributes["href"].Value),path);
System.Threading.Thread.Sleep(200);
}
return null;
}
/// <summary>
/// 读取章节
/// </summary>
/// <param name="url"></param>
/// <param name="path"></param>
/// <returns></returns>
public async static Task<dynamic> ReadChapter(string url,string path)
{
var doc = new HtmlDocument();
var pageContent = await HttpGet(url);
doc.LoadHtml(pageContent);
var node = doc.DocumentNode.SelectSingleNode("//div[@class=\"content_read\"]/div[\"box_con\"]");
var data = node.SelectSingleNode("div[@id=\"content\"]").InnerText;
var title = node.SelectSingleNode("div[@class=\"bookname\"]/h1").InnerText;
title = HandlingName(title);
data = data
.Replace(" ","")
.Replace("<br>","\r\n");
var filePath = Path.Combine(Path.Combine(FilePath, path),title) + ".txt";
await WriteData(data, filePath);
return true;
}
public static string HandlingName(string str)
{
string retStr = string.Empty;
for (int character = 0; character < str.Length; character++)
{
if (FileNoName.Contains(str[character]))
{
retStr += string.Empty;
}
retStr += str[character];
}
return retStr;
}
/// <summary>
/// HttpGet请求
/// </summary>
/// <param name="url">网站地址</param>
/// <returns></returns>
public async static Task<dynamic> HttpGet(string url)
{
string retString = string.Empty;
HttpWebRequest request;
HttpWebResponse response;
Stream myResponseStream;
StreamReader streamReader;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "GET";
request.UserAgent = UserAgent[new Random().Next(0,UserAgent.Count)];
request.Timeout = 8000;
//request.ContentType = @"text/html; charset=UTF-8";
response = (HttpWebResponse)await request.GetResponseAsync();
myResponseStream = new System.IO.Compression.GZipStream(response.GetResponseStream(),
System.IO.Compression.CompressionMode.Decompress);
//Stream myResponseStream = response.GetResponseStream();
streamReader = new StreamReader(myResponseStream, Encoding.UTF8);
retString = streamReader.ReadToEnd();
streamReader.Close();
myResponseStream.Close();
return retString;
}
catch
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "GET";
request.Timeout = 8000;
//request.ContentType = @"text/html; charset=UTF-8";
request.UserAgent = UserAgent[new Random().Next(UserAgent.Count)];
response = (HttpWebResponse)await request.GetResponseAsync();
//Stream myResponseStream = new System.IO.Compression.GZipStream(response.GetResponseStream(),
// System.IO.Compression.CompressionMode.Decompress);
myResponseStream = response.GetResponseStream();
streamReader = new StreamReader(myResponseStream, Encoding.UTF8);
retString = streamReader.ReadToEnd();
streamReader.Close();
myResponseStream.Close();
}
return retString;
}
/// <summary>
/// 访问网站内容将 (每本书的地址)
/// </summary>
/// <param name="url">网站</param>
/// <param name="path"></param>
/// <returns></returns>
public static async Task<dynamic> WriteData(string content,string path)
{
try
{
using (FileStream file = new FileStream(path, FileMode.Create, FileAccess.Write))
{
Console.WriteLine(path);
StreamWriter textWriter = new StreamWriter(file,Encoding.UTF8);
textWriter.Write(content);
textWriter.Close();
}
return true;
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
throw;
}
}
/// <summary>
/// 用户代{过}{滤}理
/// </summary>
public static void userAgentAdd()
{
UserAgent.Add(@"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62");
UserAgent.Add(@"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36");
UserAgent.Add(@"Mozilla/5.0 (Windows NT 10; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0");
UserAgent.Add(@"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36");
UserAgent.Add(@"Mozilla/5.0 (Windows NT 10; Win64; x64; rv:82.0) Gecko/20100101 Firefox/82.0");
UserAgent.Add(@"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36");
UserAgent.Add(@"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36");
UserAgent.Add(@"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36");
UserAgent.Add(@"Mozilla/5.0 (Linux; Android 10; HLK-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.92 Mobile Safari/537.36");
UserAgent.Add(@"Mozilla/5.0 (Linux; Android 10; V1962A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.92 Mobile Safari/537.36");
UserAgent.Add(@"Mozilla/5.0 (Linux; Android 10; ELS-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.92 Mobile Safari/537.36");
UserAgent.Add(@"Mozilla/5.0 (Linux; Android 10; MED-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.92 Mobile Safari/537.36");
}
}
}
代码如上,运行起来后 异步就和没异步一样
代码应该没错吧,应该是它网站的原因吧 |