Skip to content

Commit e17955d

Browse files
committed
解耦了下载器、数据管道、解析器
1 parent 0d27df1 commit e17955d

File tree

232 files changed

+1782
-2578
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

232 files changed

+1782
-2578
lines changed

Design.zh-CN.md

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# DESIGN
2+
3+
在此重大更新之前, 下载器 & 选择器 & 数据管理是耦合在一起的, 经过许久思考我决定把它们解耦出来, 这样用户可以自由的选择他们喜爱的组件. 比如说, 下载器使用框架自带的Downloader、WebClientApi、苏菲的HttpHelper等; 解析器可以使用框架自带的Extraction、AngleSharp等;
4+
5+
### Dowloader
6+
7+
Downloader is a independent module to help user to download data from target website. There are a lot of details, see below:
8+
9+
1. Two ways to set cookie, one is call the AddCookie method in downloader, it add cookie to CookieContainer so impact every request.
10+
Set cookie header in request, the result is combine you cookie header and cookies in CookieContainer.
11+
2. CookieInjector in downloader is invoked one time, and inject cookies to CookieContainer.
12+
13+
### Scheduler
14+
15+
#### Request hash
16+
17+
1. Same url different headers are different requests, so headers are a factor
18+
2. There is a CycleRetryTimes property in a request, if value are different, then requests are different. Depth property is not
19+
a factor.
20+
21+

DotnetSpider.sln

+3-8
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ Microsoft Visual Studio Solution File, Format Version 12.00
33
# Visual Studio 15
44
VisualStudioVersion = 15.0.27703.2042
55
MinimumVisualStudioVersion = 10.0.40219.1
6-
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.Common", "src\DotnetSpider.Common\DotnetSpider.Common.csproj", "{F1C6C272-A72A-4A5B-95EE-846643A29A3A}"
7-
EndProject
86
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.Extraction", "src\DotnetSpider.Extraction\DotnetSpider.Extraction.csproj", "{C5A68E4D-E9B4-4B2D-B198-74FA88C8CA22}"
97
EndProject
108
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.HtmlAgilityPack.Css", "src\DotnetSpider.HtmlAgilityPack.Css\DotnetSpider.HtmlAgilityPack.Css.csproj", "{38DFF949-761C-4DC1-ADC6-D3F535E84AEF}"
@@ -36,11 +34,12 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
3634
.editorconfig = .editorconfig
3735
.gitignore = .gitignore
3836
.travis.yml = .travis.yml
37+
Design.md = Design.md
38+
Design.zh-CN.md = Design.zh-CN.md
3939
DistributeDesign.md = DistributeDesign.md
4040
publishToNuget.bat = publishToNuget.bat
4141
README.md = README.md
4242
runtests.sh = runtests.sh
43-
Design.md = Design.md
4443
EndProjectSection
4544
EndProject
4645
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.Worker", "src\DotnetSpider.Worker\DotnetSpider.Worker.csproj", "{C416B779-5018-42AF-A1A5-98186389CCED}"
@@ -49,18 +48,14 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.Migrator", "sr
4948
EndProject
5049
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.Broker.Test", "src\DotnetSpider.Broker.Test\DotnetSpider.Broker.Test.csproj", "{6CAEECB0-0BD0-4A32-B057-99C7DADE3F4C}"
5150
EndProject
52-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DotnetSpider.Broker", "src\DotnetSpider.Broker\DotnetSpider.Broker.csproj", "{AAD552D8-0D0A-43B0-9C5D-E542AA8998CE}"
51+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.Broker", "src\DotnetSpider.Broker\DotnetSpider.Broker.csproj", "{AAD552D8-0D0A-43B0-9C5D-E542AA8998CE}"
5352
EndProject
5453
Global
5554
GlobalSection(SolutionConfigurationPlatforms) = preSolution
5655
Debug|Any CPU = Debug|Any CPU
5756
Release|Any CPU = Release|Any CPU
5857
EndGlobalSection
5958
GlobalSection(ProjectConfigurationPlatforms) = postSolution
60-
{F1C6C272-A72A-4A5B-95EE-846643A29A3A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
61-
{F1C6C272-A72A-4A5B-95EE-846643A29A3A}.Debug|Any CPU.Build.0 = Debug|Any CPU
62-
{F1C6C272-A72A-4A5B-95EE-846643A29A3A}.Release|Any CPU.ActiveCfg = Release|Any CPU
63-
{F1C6C272-A72A-4A5B-95EE-846643A29A3A}.Release|Any CPU.Build.0 = Release|Any CPU
6459
{C5A68E4D-E9B4-4B2D-B198-74FA88C8CA22}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
6560
{C5A68E4D-E9B4-4B2D-B198-74FA88C8CA22}.Debug|Any CPU.Build.0 = Debug|Any CPU
6661
{C5A68E4D-E9B4-4B2D-B198-74FA88C8CA22}.Release|Any CPU.ActiveCfg = Release|Any CPU

src/DotnetSpider.Broker.Test/BaseTest.cs

-5
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,10 @@
11
using DotnetSpider.Broker.Data;
22
using DotnetSpider.Broker.Hubs;
3-
using Microsoft.AspNetCore.Builder.Internal;
43
using Microsoft.EntityFrameworkCore;
54
using Microsoft.Extensions.Configuration;
65
using Microsoft.Extensions.DependencyInjection;
76
using System;
8-
using System.Collections.Generic;
9-
using System.Data;
10-
using System.Data.SqlClient;
117
using System.IO;
12-
using System.Text;
138
using DotnetSpider.Broker.Services;
149

1510
namespace DotnetSpider.Broker.Test

src/DotnetSpider.Broker.Test/WorkerServiceTest.cs

-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
using DotnetSpider.Broker.Data;
2-
using DotnetSpider.Broker.Hubs;
32
using DotnetSpider.Broker.Services;
43
using Microsoft.Extensions.DependencyInjection;
54
using System;
6-
using System.Collections.Generic;
7-
using System.Text;
85
using Xunit;
96

107
namespace DotnetSpider.Broker.Test

src/DotnetSpider.Broker/ApiAuthorizeMiddleware.cs

-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
using Microsoft.AspNetCore.Http;
2-
using System;
3-
using System.Collections.Generic;
4-
using System.Linq;
52
using System.Net;
63
using System.Threading.Tasks;
74

src/DotnetSpider.Broker/BrokerOptions.cs

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Threading.Tasks;
1+
using System.Collections.Generic;
52

63
namespace DotnetSpider.Broker
74
{

src/DotnetSpider.Broker/Controllers/BrokerController.cs

-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
using Microsoft.AspNetCore.Mvc;
22
using Microsoft.Extensions.Logging;
3-
using System;
4-
using System.Collections.Generic;
5-
using System.Linq;
6-
using System.Threading.Tasks;
73

84
namespace DotnetSpider.Broker.Controllers
95
{

src/DotnetSpider.Broker/Controllers/HomeController.cs

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Diagnostics;
4-
using System.Linq;
5-
using System.Threading.Tasks;
1+
using System.Diagnostics;
62
using Microsoft.AspNetCore.Mvc;
73
using DotnetSpider.Broker.Models;
84

src/DotnetSpider.Broker/Data/Block.cs

-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Threading.Tasks;
52

63
namespace DotnetSpider.Broker.Data
74
{

src/DotnetSpider.Broker/Data/BrokerDbContext.cs

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Text;
4-
using Microsoft.AspNetCore.Identity.EntityFrameworkCore;
1+
using Microsoft.AspNetCore.Identity.EntityFrameworkCore;
52
using Microsoft.EntityFrameworkCore;
63

74
namespace DotnetSpider.Broker.Data

src/DotnetSpider.Broker/Data/Entity.cs

-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3-
using System.Linq;
43
using System.Reflection;
5-
using System.Threading.Tasks;
64

75
namespace DotnetSpider.Broker.Data
86
{

src/DotnetSpider.Broker/Data/Job.cs

-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
using System;
22
using System.Collections.Generic;
33
using System.ComponentModel.DataAnnotations;
4-
using System.Linq;
5-
using System.Threading.Tasks;
64

75
namespace DotnetSpider.Broker.Data
86
{

src/DotnetSpider.Broker/Data/JobProperty.cs

-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
using System;
2-
using System.Collections.Generic;
32
using System.ComponentModel.DataAnnotations;
4-
using System.Linq;
5-
using System.Threading.Tasks;
63

74
namespace DotnetSpider.Broker.Data
85
{

src/DotnetSpider.Broker/Data/JobStatus.cs

+35-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,42 @@
1-
using DotnetSpider.Common;
1+
using Newtonsoft.Json;
2+
using Newtonsoft.Json.Converters;
23
using System;
3-
using System.Collections.Generic;
4-
using System.Linq;
5-
using System.Threading.Tasks;
64

75
namespace DotnetSpider.Broker.Data
86
{
7+
/// <summary>
8+
/// 爬虫状态
9+
/// </summary>
10+
[System.Flags]
11+
[JsonConverter(typeof(StringEnumConverter))]
12+
public enum Status
13+
{
14+
/// <summary>
15+
/// 初始化
16+
/// </summary>
17+
Init = 1,
18+
19+
/// <summary>
20+
/// 正在运行
21+
/// </summary>
22+
Running = 2,
23+
24+
/// <summary>
25+
/// 暂停
26+
/// </summary>
27+
Paused = 4,
28+
29+
/// <summary>
30+
/// 完成
31+
/// </summary>
32+
Finished = 8,
33+
34+
/// <summary>
35+
/// 退出
36+
/// </summary>
37+
Exited = 16
38+
}
39+
940
public class JobStatus : Entity<Guid>, IHasModificationTime
1041
{
1142
public virtual Guid Identity { get; set; }

src/DotnetSpider.Broker/Data/NodeStatus.cs

-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
using System;
2-
using System.Collections.Generic;
32
using System.ComponentModel.DataAnnotations;
4-
using System.Linq;
5-
using System.Threading.Tasks;
63

74
namespace DotnetSpider.Broker.Data
85
{

src/DotnetSpider.Broker/Data/Running.cs

-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Threading.Tasks;
52

63
namespace DotnetSpider.Broker.Data
74
{

src/DotnetSpider.Broker/Data/Worker.cs

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.ComponentModel.DataAnnotations;
4-
using System.Linq;
5-
using System.Threading.Tasks;
1+
using System.ComponentModel.DataAnnotations;
62

73
namespace DotnetSpider.Broker.Data
84
{

src/DotnetSpider.Broker/DotnetSpider.Broker.csproj

-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
</ItemGroup>
1818

1919
<ItemGroup>
20-
<ProjectReference Include="..\DotnetSpider.Common\DotnetSpider.Common.csproj" />
2120
<ProjectReference Include="..\DotnetSpider.Downloader\DotnetSpider.Downloader.csproj" />
2221
</ItemGroup>
2322

src/DotnetSpider.Broker/Dtos/AddNodeStatusDto.cs

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.ComponentModel.DataAnnotations;
4-
using System.Linq;
5-
using System.Threading.Tasks;
1+
using System.ComponentModel.DataAnnotations;
62

73
namespace DotnetSpider.Broker.Dtos
84
{

src/DotnetSpider.Broker/Dtos/BlockDto.cs

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
1-
using DotnetSpider.Common;
2-
using DotnetSpider.Downloader;
3-
using System;
1+
using DotnetSpider.Downloader;
42
using System.Collections.Generic;
5-
using System.Linq;
6-
using System.Threading.Tasks;
73

84
namespace DotnetSpider.Broker.Dtos
95
{

src/DotnetSpider.Broker/HttpGlobalExceptionFilter.cs

-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
using Microsoft.AspNetCore.Mvc.Filters;
22
using Microsoft.Extensions.Logging;
3-
using System;
4-
using System.Collections.Generic;
5-
using System.Linq;
63
using System.Net;
7-
using System.Threading.Tasks;
84

95
namespace DotnetSpider.Broker
106
{

src/DotnetSpider.Broker/Hubs/NodeHub.cs

-3
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,7 @@
22
using DotnetSpider.Broker.Dtos;
33
using DotnetSpider.Broker.Services;
44
using Microsoft.AspNetCore.SignalR;
5-
using Microsoft.EntityFrameworkCore;
65
using System;
7-
using System.Collections.Generic;
8-
using System.Linq;
96
using System.Threading.Tasks;
107

118
namespace DotnetSpider.Broker.Hubs

src/DotnetSpider.Broker/Hubs/WorkerHub.cs

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
1-
using DotnetSpider.Broker.Data;
2-
using DotnetSpider.Broker.Services;
1+
using DotnetSpider.Broker.Services;
32
using Microsoft.AspNetCore.SignalR;
4-
using Microsoft.EntityFrameworkCore;
53
using System;
6-
using System.Collections.Generic;
7-
using System.Linq;
84
using System.Threading.Tasks;
95

106
namespace DotnetSpider.Broker.Hubs

src/DotnetSpider.Broker/Models/ErrorViewModel.cs

-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
using System;
2-
31
namespace DotnetSpider.Broker.Models
42
{
53
public class ErrorViewModel

src/DotnetSpider.Broker/Program.cs

-4
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
11
using System;
2-
using System.Collections.Generic;
32
using System.IO;
4-
using System.Linq;
5-
using System.Threading.Tasks;
63
using Microsoft.AspNetCore;
74
using Microsoft.AspNetCore.Hosting;
85
using Microsoft.Extensions.Configuration;
9-
using Microsoft.Extensions.Logging;
106
using Serilog;
117
using Serilog.Events;
128

src/DotnetSpider.Broker/ServiceCollectionExtensions.cs

-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
using DotnetSpider.Broker.Services;
22
using Microsoft.Extensions.DependencyInjection;
3-
using System;
4-
using System.Collections.Generic;
5-
using System.Linq;
6-
using System.Threading.Tasks;
73

84
namespace DotnetSpider.Broker
95
{

src/DotnetSpider.Broker/Services/INodeService.cs

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
using DotnetSpider.Broker.Data;
2-
using System;
3-
using System.Collections.Generic;
4-
using System.Linq;
1+
using System;
52
using System.Threading.Tasks;
63

74
namespace DotnetSpider.Broker.Services

src/DotnetSpider.Broker/Services/INodeStatusService.cs

-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
using DotnetSpider.Broker.Data;
2-
using System;
3-
using System.Collections.Generic;
4-
using System.Linq;
52
using System.Threading.Tasks;
63

74
namespace DotnetSpider.Broker.Services

src/DotnetSpider.Broker/Services/IWorkerService.cs

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Threading.Tasks;
1+
using System.Threading.Tasks;
52

63
namespace DotnetSpider.Broker.Services
74
{

src/DotnetSpider.Broker/Services/NodeService.cs

-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
using DotnetSpider.Broker.Data;
22
using Microsoft.EntityFrameworkCore;
33
using System;
4-
using System.Collections.Generic;
5-
using System.Linq;
64
using System.Threading.Tasks;
75

86
namespace DotnetSpider.Broker.Services

src/DotnetSpider.Broker/Services/NodeStatusService.cs

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Threading.Tasks;
1+
using System.Threading.Tasks;
52
using DotnetSpider.Broker.Data;
63

74
namespace DotnetSpider.Broker.Services

src/DotnetSpider.Broker/Services/WorkerService.cs

-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
using DotnetSpider.Broker.Data;
22
using Microsoft.EntityFrameworkCore;
3-
using System;
4-
using System.Collections.Generic;
5-
using System.Linq;
63
using System.Threading.Tasks;
74

85
namespace DotnetSpider.Broker.Services

0 commit comments

Comments
 (0)