1、项目中安装引用AngleSharp
AngleSharp:http://anglesharp.github.io/
1)使用Nuget管理控制台
将AngleSharp
集成到项目中的最简单方法是使用NuGet。您可以通过打开包管理器控制台(PM)并键入以下语句来安装AngleSharp:
Install-Package AngleSharp
2)使用Nuget图形管理器
使用Nuget的界面的管理器搜索"AngleSharp"
=> 找到点出点击"安装"
。
3)使用.NET CLI命令安装
> dotnet add TodoApi.csproj package AngleSharp
相关文档:VS(Visual Studio)中Nuget的使用
2、提取解析百度的搜索结果的html
public static List<SearchResult> GetBaiduResult(string wd, int p) { if (p <= 0) p = 1; string url = string.Format("https://www.baidu.com/s?wd={0}&pn={1}", Uri.EscapeDataString(wd), (p - 1) * 10); var requester = new DefaultHttpRequester("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"); requester.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"); requester.Headers.Add("Referer", ""); requester.Headers.Add("Accept-Language", "zh-Hans-CN,zh-Hans;q=0.8,en-US;q=0.5,en;q=0.3"); var context = BrowsingContext.New(Configuration.Default.WithLocaleBasedEncoding().WithDefaultLoader().WithDefaultCookies().With(requester)); //根据虚拟请求/响应模式创建文档 var document = context.OpenAsync(url).Result; //var blueListItemsLinq = document.All.Where(m => m.LocalName == "li" && m.ClassList.Contains("blue")); //或者直接使用CSS选择器 var itemsCssSelector = document.QuerySelectorAll("#content_left .result"); List<SearchResult> results = new List<SearchResult>(); foreach (var item in itemsCssSelector) { if (item.QuerySelector("h3 a") == null || item.QuerySelector("h3 a") == null || item.QuerySelector("div.c-abstract") == null || item.QuerySelector("div.f13 a.c-showurl") == null || item.QuerySelector("div.f13 a.m") == null ) { continue; } /* Console.WriteLine("title = " + item.QuerySelector("h3 a").Html()); Console.WriteLine("href = " + item.QuerySelector("h3 a").GetAttribute("href")); Console.WriteLine("desc = " + item.QuerySelector("div.c-abstract").Html()); Console.WriteLine("a = " + item.QuerySelector("div.f13 a.c-showurl").Html()); Console.WriteLine("a_herf = " + item.QuerySelector("div.f13 a.c-showurl").GetAttribute("href")); Console.WriteLine("kz = " + item.QuerySelector("div.f13 a.m").GetAttribute("href"));*/ results.Add(new SearchResult() { title = item.QuerySelector("h3 a").Html(), desc = item.QuerySelector("div.c-abstract").Html(), url = item.QuerySelector("h3 a").GetAttribute("href"), urlText = item.QuerySelector("div.f13 a.c-showurl").Html(), snapshot = item.QuerySelector("div.f13 a.m").GetAttribute("href") }); } return results; }
3、提取解析谷歌的搜索结果的html
public static List<SearchResult> GetGoogleResult(string wd, int p) { if (p <= 0) p = 1; string url = string.Format("https://www.google.com/search?q={0}&start={1}", Uri.EscapeDataString(wd), (p - 1) * 10); var requester = new DefaultHttpRequester("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"); requester.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"); requester.Headers.Add("Referer", ""); requester.Headers.Add("Accept-Language", "zh-Hans-CN,zh-Hans;q=0.8,en-US;q=0.5,en;q=0.3"); var context = BrowsingContext.New(Configuration.Default.WithLocaleBasedEncoding().WithDefaultLoader().WithDefaultCookies().With(requester)); //根据虚拟请求/响应模式创建文档 var document = context.OpenAsync(url).Result; //var blueListItemsLinq = document.All.Where(m => m.LocalName == "li" && m.ClassList.Contains("blue")); //或者直接使用CSS选择器 var itemsCssSelector = document.QuerySelectorAll("#rso > div"); List<SearchResult> results = new List<SearchResult>(); foreach (var item in itemsCssSelector) { try { //Console.WriteLine("title = " + item.QuerySelector("div > div.r > a > h3").Html()); //Console.WriteLine("href = " + item.QuerySelector("div > div.r > a").GetAttribute("href")); //Console.WriteLine("desc = " + item.QuerySelector("div > div.s > div").Html()); //Console.WriteLine("a = " + item.QuerySelector("div > div.r > a > div > cite").Html()); //Console.WriteLine("a_herf = " + item.QuerySelector("div.f13 a.c-showurl").GetAttribute("href")); // Console.WriteLine("kz = " + item.QuerySelector("div > div.r > div > div.eFM0qc > span > div > ol > li > a").GetAttribute("href")); if (item.QuerySelector("div > div.r > a > h3") == null || item.QuerySelector("div > div.s > div") == null || item.QuerySelector("div > div.r > a") == null || item.QuerySelector("div > div.r > a > div > cite") == null || item.QuerySelector("div > div.r > div > div.eFM0qc > span > div > ol > li > a") == null ) { continue; } results.Add(new SearchResult() { title = item.QuerySelector("div > div.r > a > h3").Html(), desc = item.QuerySelector("div > div.s > div").Html(), url = item.QuerySelector("div > div.r > a").GetAttribute("href"), urlText = item.QuerySelector("div > div.r > a > div > cite").Html(), snapshot = item.QuerySelector("div > div.r > div > div.eFM0qc > span > div > ol > li > a").GetAttribute("href") }); } catch (Exception ex) { log.Error(ex); } } return results; }