Skip to content

Commit c6e032e

Browse files
authored
Update README.md
1 parent 808ba3f commit c6e032e

File tree

1 file changed

+52
-51
lines changed

1 file changed

+52
-51
lines changed

README.md

+52-51
Original file line numberDiff line numberDiff line change
@@ -75,24 +75,65 @@ https://github.com/dotnetcore/DotnetSpider/wiki
7575
[View complete Codes](https://github.com/zlzforever/DotnetSpider/blob/master/src/DotnetSpider.Sample/samples/EntitySpider.cs)
7676

7777
````csharp
78-
public class EntitySpider : Spider
78+
public class EntitySpider(
79+
IOptions<SpiderOptions> options,
80+
DependenceServices services,
81+
ILogger<Spider> logger)
82+
: Spider(options, services, logger)
7983
{
80-
public EntitySpider(IOptions<SpiderOptions> options, SpiderServices services, ILogger<Spider> logger) : base(
81-
options, services, logger)
84+
public static async Task RunAsync()
85+
{
86+
var builder = Builder.CreateDefaultBuilder<EntitySpider>(options =>
87+
{
88+
options.Speed = 1;
89+
});
90+
builder.UseSerilog();
91+
builder.IgnoreServerCertificateError();
92+
await builder.Build().RunAsync();
93+
}
94+
95+
public static async Task RunMySqlQueueAsync()
96+
{
97+
var builder = Builder.CreateDefaultBuilder<EntitySpider>(options =>
98+
{
99+
options.Speed = 1;
100+
});
101+
builder.UseSerilog();
102+
builder.IgnoreServerCertificateError();
103+
builder.UseMySqlQueueBfsScheduler((context, options) =>
104+
{
105+
options.ConnectionString = context.Configuration["SchedulerConnectionString"];
106+
});
107+
await builder.Build().RunAsync();
108+
}
109+
110+
protected override async Task InitializeAsync(CancellationToken stoppingToken = default)
82111
{
112+
AddDataFlow<DataParser<CnblogsEntry>>();
113+
AddDataFlow(GetDefaultStorage);
114+
await AddRequestsAsync(
115+
new Request(
116+
"https://news.cnblogs.com/n/page/1", new Dictionary<string, object> { { "网站", "博客园" } }));
83117
}
84118

85-
#region Nested type: CnblogsEntry
119+
protected override SpiderId GenerateSpiderId()
120+
{
121+
return new(ObjectId.CreateId().ToString(), "博客园");
122+
}
86123

87124
[Schema("cnblogs", "news")]
88125
[EntitySelector(Expression = ".//div[@class='news_block']", Type = SelectorType.XPath)]
89126
[GlobalValueSelector(Expression = ".//a[@class='current']", Name = "类别", Type = SelectorType.XPath)]
90-
[FollowRequestSelector(XPaths = new[]
91-
{
92-
"//div[@class='pager']"
93-
})]
127+
[GlobalValueSelector(Expression = "//title", Name = "Title", Type = SelectorType.XPath)]
128+
[FollowRequestSelector(Expressions = ["//div[@class='pager']"])]
94129
public class CnblogsEntry : EntityBase<CnblogsEntry>
95130
{
131+
protected override void Configure()
132+
{
133+
HasIndex(x => x.Title);
134+
HasIndex(x => new { x.WebSite, x.Guid }, true);
135+
}
136+
96137
public int Id { get; set; }
97138

98139
[Required]
@@ -106,7 +147,7 @@ public class EntitySpider : Spider
106147
public string WebSite { get; set; }
107148

108149
[StringLength(200)]
109-
[ValueSelector(Expression = "//title")]
150+
[ValueSelector(Expression = "Title", Type = SelectorType.Environment)]
110151
[ReplaceFormatter(NewValue = "", OldValue = " - 博客园")]
111152
public string Title { get; set; }
112153

@@ -121,55 +162,15 @@ public class EntitySpider : Spider
121162
public string Url { get; set; }
122163

123164
[ValueSelector(Expression = ".//div[@class='entry_summary']")]
165+
[TrimFormatter]
124166
public string PlainText { get; set; }
125167

126168
[ValueSelector(Expression = "DATETIME", Type = SelectorType.Environment)]
127169
public DateTime CreationTime { get; set; }
128-
129-
protected override void Configure()
130-
{
131-
HasIndex(x => x.Title);
132-
HasIndex(x => new
133-
{
134-
x.WebSite,
135-
x.Guid
136-
}, true);
137-
}
138-
}
139-
140-
#endregion
141-
142-
public static async Task RunAsync()
143-
{
144-
var builder = Builder.CreateDefaultBuilder<EntitySpider>();
145-
builder.UseSerilog();
146-
await builder.Build()
147-
.RunAsync();
148-
}
149-
150-
protected override async Task InitializeAsync(CancellationToken stoppingToken)
151-
{
152-
AddDataFlow(new DataParser<CnblogsEntry>());
153-
AddDataFlow(GetDefaultStorage());
154-
await AddRequestsAsync(new Request("https://news.cnblogs.com/n/page/1/", new Dictionary<string, string>
155-
{
156-
{
157-
"网站", "博客园"
158-
}
159-
}), new Request("https://news.cnblogs.com/n/page/2/", new Dictionary<string, string>
160-
{
161-
{
162-
"网站", "博客园"
163-
}
164-
}));
165-
}
166-
167-
protected override (string Id, string Name) GetIdAndName()
168-
{
169-
return (ObjectId.NewId.ToString(), "博客园");
170170
}
171171
}
172172

173+
173174
````
174175

175176
#### Distributed spider

0 commit comments

Comments
 (0)