@@ -75,24 +75,65 @@ https://github.com/dotnetcore/DotnetSpider/wiki
75
75
[ View complete Codes] ( https://github.com/zlzforever/DotnetSpider/blob/master/src/DotnetSpider.Sample/samples/EntitySpider.cs )
76
76
77
77
```` csharp
78
- public class EntitySpider : Spider
78
+ public class EntitySpider (
79
+ IOptions <SpiderOptions > options ,
80
+ DependenceServices services ,
81
+ ILogger <Spider > logger )
82
+ : Spider (options , services , logger )
79
83
{
80
- public EntitySpider (IOptions <SpiderOptions > options , SpiderServices services , ILogger <Spider > logger ) : base (
81
- options , services , logger )
84
+ public static async Task RunAsync ()
85
+ {
86
+ var builder = Builder .CreateDefaultBuilder <EntitySpider >(options =>
87
+ {
88
+ options .Speed = 1 ;
89
+ });
90
+ builder .UseSerilog ();
91
+ builder .IgnoreServerCertificateError ();
92
+ await builder .Build ().RunAsync ();
93
+ }
94
+
95
+ public static async Task RunMySqlQueueAsync ()
96
+ {
97
+ var builder = Builder .CreateDefaultBuilder <EntitySpider >(options =>
98
+ {
99
+ options .Speed = 1 ;
100
+ });
101
+ builder .UseSerilog ();
102
+ builder .IgnoreServerCertificateError ();
103
+ builder .UseMySqlQueueBfsScheduler ((context , options ) =>
104
+ {
105
+ options .ConnectionString = context .Configuration [" SchedulerConnectionString" ];
106
+ });
107
+ await builder .Build ().RunAsync ();
108
+ }
109
+
110
+ protected override async Task InitializeAsync (CancellationToken stoppingToken = default )
82
111
{
112
+ AddDataFlow <DataParser <CnblogsEntry >>();
113
+ AddDataFlow (GetDefaultStorage );
114
+ await AddRequestsAsync (
115
+ new Request (
116
+ " https://news.cnblogs.com/n/page/1" , new Dictionary <string , object > { { " 网站" , " 博客园" } }));
83
117
}
84
118
85
- #region Nested type: CnblogsEntry
119
+ protected override SpiderId GenerateSpiderId ()
120
+ {
121
+ return new (ObjectId .CreateId ().ToString (), " 博客园" );
122
+ }
86
123
87
124
[Schema (" cnblogs" , " news" )]
88
125
[EntitySelector (Expression = " .//div[@class='news_block']" , Type = SelectorType .XPath )]
89
126
[GlobalValueSelector (Expression = " .//a[@class='current']" , Name = " 类别" , Type = SelectorType .XPath )]
90
- [FollowRequestSelector (XPaths = new []
91
- {
92
- " //div[@class='pager']"
93
- })]
127
+ [GlobalValueSelector (Expression = " //title" , Name = " Title" , Type = SelectorType .XPath )]
128
+ [FollowRequestSelector (Expressions = [" //div[@class='pager']" ])]
94
129
public class CnblogsEntry : EntityBase <CnblogsEntry >
95
130
{
131
+ protected override void Configure ()
132
+ {
133
+ HasIndex (x => x .Title );
134
+ HasIndex (x => new { x .WebSite , x .Guid }, true );
135
+ }
136
+
96
137
public int Id { get ; set ; }
97
138
98
139
[Required ]
@@ -106,7 +147,7 @@ public class EntitySpider : Spider
106
147
public string WebSite { get ; set ; }
107
148
108
149
[StringLength (200 )]
109
- [ValueSelector (Expression = " //title " )]
150
+ [ValueSelector (Expression = " Title " , Type = SelectorType . Environment )]
110
151
[ReplaceFormatter (NewValue = " " , OldValue = " - 博客园" )]
111
152
public string Title { get ; set ; }
112
153
@@ -121,55 +162,15 @@ public class EntitySpider : Spider
121
162
public string Url { get ; set ; }
122
163
123
164
[ValueSelector (Expression = " .//div[@class='entry_summary']" )]
165
+ [TrimFormatter ]
124
166
public string PlainText { get ; set ; }
125
167
126
168
[ValueSelector (Expression = " DATETIME" , Type = SelectorType .Environment )]
127
169
public DateTime CreationTime { get ; set ; }
128
-
129
- protected override void Configure ()
130
- {
131
- HasIndex (x => x .Title );
132
- HasIndex (x => new
133
- {
134
- x .WebSite ,
135
- x .Guid
136
- }, true );
137
- }
138
- }
139
-
140
- #endregion
141
-
142
- public static async Task RunAsync ()
143
- {
144
- var builder = Builder .CreateDefaultBuilder <EntitySpider >();
145
- builder .UseSerilog ();
146
- await builder .Build ()
147
- .RunAsync ();
148
- }
149
-
150
- protected override async Task InitializeAsync (CancellationToken stoppingToken )
151
- {
152
- AddDataFlow (new DataParser <CnblogsEntry >());
153
- AddDataFlow (GetDefaultStorage ());
154
- await AddRequestsAsync (new Request (" https://news.cnblogs.com/n/page/1/" , new Dictionary <string , string >
155
- {
156
- {
157
- " 网站" , " 博客园"
158
- }
159
- }), new Request (" https://news.cnblogs.com/n/page/2/" , new Dictionary <string , string >
160
- {
161
- {
162
- " 网站" , " 博客园"
163
- }
164
- }));
165
- }
166
-
167
- protected override (string Id , string Name ) GetIdAndName ()
168
- {
169
- return (ObjectId .NewId .ToString (), " 博客园" );
170
170
}
171
171
}
172
172
173
+
173
174
````
174
175
175
176
#### Distributed spider
0 commit comments