1、项目中安装引用AngleSharp
AngleSharp:http://anglesharp.github.io/
通过NuGet获取AngleSharp
1)使用Nuget管理控制台
将AngleSharp集成到项目中的最简单方法是使用NuGet。您可以通过打开包管理器控制台(PM)并键入以下语句来安装AngleSharp:
Install-Package AngleSharp
2)使用Nuget图形管理器
使用Nuget的界面的管理器搜索"AngleSharp"
=> 找到点出点击"安装"
。
3)使用.NET CLI命令安装
> dotnet add TodoApi.csproj package AngleSharp
相关文档:VS(Visual Studio)中Nuget的使用
2、下载网页代码及引用的静态文件
/// <summary> /// MD5字符串加密 /// </summary> /// <param name="txt"></param> /// <returns>加密后字符串</returns> public static string GenerateMD5(string txt) { using (MD5 mi = MD5.Create()) { byte[] buffer = Encoding.Default.GetBytes(txt); //开始加密 byte[] newBuffer = mi.ComputeHash(buffer); StringBuilder sb = new StringBuilder(); for (int i = 0; i < newBuffer.Length; i++) { sb.Append(newBuffer[i].ToString("x2")); } return sb.ToString(); } } //下载文件 public static bool DwonFile(IElement item, string fileName, string sourceUrl, string filePath, string sub, string attr, string dir, IDocument document) { //原文:https://www.cjavapy.com/article/696/ sourceUrl = item.GetAttribute(attr); if (string.IsNullOrEmpty(sourceUrl)) return true; if (dir == "jpg" && sourceUrl.IndexOf(";base64,") > -1 || (dir == "css" && sourceUrl.ToLower().IndexOf(".css") == 0)) return true; WebClient webClient = new WebClient(); sub = Path.Combine(filePath, dir); if (!Directory.Exists(sub)) Directory.CreateDirectory(sub); sourceUrl = fixUrl(sourceUrl, document.Origin); fileName = GenerateMD5(sourceUrl) + "." + dir; Console.WriteLine(sourceUrl); log.Info(sourceUrl + " = " + sourceUrl); //https://www.cjavapy.com/article/696/ if (!File.Exists(Path.Combine(sub, fileName))) try { webClient.DownloadFile(sourceUrl, Path.Combine(sub, fileName)); } catch (Exception ex) { log.Info("sourceUrl = " + sourceUrl + " dir = " + dir); log.Error(ex); } item.SetAttribute(attr, "/static/" + dir + "/" + fileName); item.SetAttribute("referrerPolicy", "no-referrer"); return false; } public static string GetHtml(string url, string filePath) { var requester = new DefaultHttpRequester("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"); //https://www.cjavapy.com/article/696/ requester.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"); requester.Headers.Add("Referer", ""); requester.Headers.Add("Accept-Language", "zh-Hans-CN,zh-Hans;q=0.8,en-US;q=0.5,en;q=0.3"); var context = BrowsingContext.New(Configuration.Default.WithLocaleBasedEncoding().WithDefaultLoader().WithDefaultCookies().With(requester)); //根据虚拟请求/响应模式创建文档 //https://www.cjavapy.com/article/696/ var document = context.OpenAsync(url).Result; //var blueListItemsLinq = document.All.Where(m => m.LocalName == "li" && m.ClassList.Contains("blue")); //或者直接使用CSS选择器 string sourceUrl = string.Empty; var scripts = document.QuerySelectorAll("script"); string fileName = string.Empty; string sub = string.Empty; foreach (var item in scripts) { if (DwonFile(item, fileName, sourceUrl, filePath, sub, "src", "js", document)) continue; } var links = document.QuerySelectorAll("link"); foreach (var item in links) { if (DwonFile(item, fileName, sourceUrl, filePath, sub, "href", "css", document)) continue; } var imgs = document.QuerySelectorAll("img"); foreach (var item in imgs) { if (DwonFile(item, fileName, sourceUrl, filePath, sub, "src", "jpg", document)) continue; } return document.ToHtml(); }
相关文档: