Skip to content

Commit

Permalink
Fix a fatal error cause by code out-of-sync
Browse files Browse the repository at this point in the history
  • Loading branch information
sqybi committed May 7, 2015
1 parent d4e6b82 commit fb99a49
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 25 deletions.
44 changes: 20 additions & 24 deletions BaiduHiCrawler/BaiduHiCrawler/MainWindow.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -111,31 +111,27 @@ private async void ButtonStartCrawling_Click(object sender, RoutedEventArgs e)

try
{
//// Get URI of hi space
//Logger.LogVerbose("Getting URL of personal space home page");

//var spaceLinkRegex = new Regex(
// @"<A href=""(http://hi.baidu.com/[^""]+)"">我的主页</A>",
// RegexOptions.IgnoreCase | RegexOptions.Compiled);

//var htmlDoc =
// await
// this.NavigateAndGetHtmlDocumentWithCheck(
// this.webBrowserCrawler,
// Constants.HomeUri,
// d => spaceLinkRegex.IsMatch(d.DocumentNode.OuterHtml),
// null);
//if (htmlDoc == null)
//{
// throw new Exception("Cannot get space home page URL");
//}

//var spaceLink = spaceLinkRegex.Match(htmlDoc.DocumentNode.OuterHtml).Groups[1].Value;
//var spaceUri = new Uri(spaceLink);

var spaceLink = "http://hi.baidu.com/xinr_kazemai";
// Get URI of hi space
Logger.LogVerbose("Getting URL of personal space home page");

var spaceLinkRegex = new Regex(
@"<A href=""(http://hi.baidu.com/[^""]+)"">我的主页</A>",
RegexOptions.IgnoreCase | RegexOptions.Compiled);

var htmlDoc =
await
this.NavigateAndGetHtmlDocumentWithCheck(
this.webBrowserCrawler,
Constants.HomeUri,
d => spaceLinkRegex.IsMatch(d.DocumentNode.OuterHtml),
null);
if (htmlDoc == null)
{
throw new Exception("Cannot get space home page URL");
}

var spaceLink = spaceLinkRegex.Match(htmlDoc.DocumentNode.OuterHtml).Groups[1].Value;
var spaceUri = new Uri(spaceLink);
HtmlAgilityPack.HtmlDocument htmlDoc;

// Get pages count
Logger.LogVerbose("Getting total page count of space");
Expand Down
2 changes: 1 addition & 1 deletion BaiduHiCrawler/BaiduHiCrawlerUpdater/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public static class Constants
{
Major = 0,
Minor = 2,
Revision = 1
Revision = 2
};

public const string MainProcessName = "BaiduHiCrawler";
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@

# 更新日志

## v0.2.2
* 修正了一个由代码同步导致的致命错误。

## v0.2.1
* 针对部分模板修正了无法抓取的bug。

Expand Down

0 comments on commit fb99a49

Please sign in to comment.