Skip to content

Commit 2d9d220

Browse files
committed
更新包
1 parent 1b9f029 commit 2d9d220

File tree

18 files changed

+86
-126
lines changed

18 files changed

+86
-126
lines changed

package.props

+4-4
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
55
<PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
66
<PackageLicenseFile>LICENSE.txt</PackageLicenseFile>
7-
<Version>5.1.0</Version>
8-
<FileVersion>5.1.0</FileVersion>
9-
<AssemblyVersion>5.1.0</AssemblyVersion>
7+
<Version>5.1.1</Version>
8+
<FileVersion>5.1.1</FileVersion>
9+
<AssemblyVersion>5.1.1</AssemblyVersion>
1010
<Authors>[email protected];</Authors>
11-
<Copyright>Copyright 2018 Lewis Zou</Copyright>
11+
<Copyright>Copyright 2023 Lewis Zou</Copyright>
1212
<Description>DotnetSpider, a .NET Standard web crawling library. It is lightweight, efficient and fast high-level web crawling &amp; scraping framework</Description>
1313
<PackageTags>DotnetSpider;crawler;dotnet core</PackageTags>
1414
<PackageProjectUrl>https://github.com/dotnetcore/DotnetSpider</PackageProjectUrl>

publish_package.sh

+6-6
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ rm -rf src/DotnetSpider.PostgreSql/bin/Release
88
rm -rf src/DotnetSpider.RabbitMQ/bin/Release
99
dotnet build -c Release
1010
dotnet pack -c Release
11-
dotnet nuget push src/DotnetSpider/bin/Release/*.nupkg --source $NUGET_SERVER --api-key $NUGET_KEY
12-
dotnet nuget push src/DotnetSpider.HBase/bin/Release/*.nupkg --source $NUGET_SERVER --api-key $NUGET_KEY
13-
dotnet nuget push src/DotnetSpider.Mongo/bin/Release/*.nupkg --source $NUGET_SERVER --api-key $NUGET_KEY
14-
dotnet nuget push src/DotnetSpider.MySql/bin/Release/*.nupkg --source $NUGET_SERVER --api-key $NUGET_KEY
15-
dotnet nuget push src/DotnetSpider.PostgreSql/bin/Release/*.nupkg --source $NUGET_SERVER --api-key $NUGET_KEY
16-
dotnet nuget push src/DotnetSpider.RabbitMQ/bin/Release/*.nupkg --source $NUGET_SERVER --api-key $NUGET_KEY
11+
dotnet nuget push src/DotnetSpider/bin/Release/*.nupkg -s $NUGET_SERVER -k $NUGET_KEY --skip-duplicate
12+
dotnet nuget push src/DotnetSpider.HBase/bin/Release/*.nupkg -s $NUGET_SERVER -k $NUGET_KEY --skip-duplicate
13+
dotnet nuget push src/DotnetSpider.Mongo/bin/Release/*.nupkg -s $NUGET_SERVER -k $NUGET_KEY --skip-duplicate
14+
dotnet nuget push src/DotnetSpider.MySql/bin/Release/*.nupkg -s $NUGET_SERVER -k $NUGET_KEY --skip-duplicate
15+
dotnet nuget push src/DotnetSpider.PostgreSql/bin/Release/*.nupkg -s $NUGET_SERVER -k $NUGET_KEY --skip-duplicate
16+
dotnet nuget push src/DotnetSpider.RabbitMQ/bin/Release/*.nupkg -s $NUGET_SERVER -k $NUGET_KEY --skip-duplicate

src/DotnetSpider.Agent/DotnetSpider.Agent.csproj

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
</ItemGroup>
2121

2222
<ItemGroup>
23-
<PackageReference Include="Serilog.AspNetCore" Version="6.0.1" />
23+
<PackageReference Include="Serilog.AspNetCore" Version="6.1.0" />
2424
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
2525
<PackageReference Include="Serilog.Sinks.RollingFile" Version="3.3.0" />
2626
<PackageReference Include="Serilog.Sinks.PeriodicBatching" Version="3.1.0" />

src/DotnetSpider.AgentCenter/DotnetSpider.AgentCenter.csproj

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
<ItemGroup>
1111
<PackageReference Include="Microsoft.Extensions.Hosting" Version="7.0.0" />
12-
<PackageReference Include="Serilog.AspNetCore" Version="6.0.1" />
12+
<PackageReference Include="Serilog.AspNetCore" Version="6.1.0" />
1313
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
1414
<PackageReference Include="Serilog.Sinks.RollingFile" Version="3.3.0" />
1515
<PackageReference Include="Serilog.Sinks.PeriodicBatching" Version="3.1.0" />

src/DotnetSpider.Mongo/DotnetSpider.Mongo.csproj

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
</PropertyGroup>
88

99
<ItemGroup>
10-
<PackageReference Include="MongoDB.Driver" Version="2.18.0" />
10+
<PackageReference Include="MongoDB.Driver" Version="2.19.0" />
1111
</ItemGroup>
1212

1313
<ItemGroup>

src/DotnetSpider.MySql/DotnetSpider.MySql.csproj

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@
1111
</ItemGroup>
1212

1313
<ItemGroup>
14-
<PackageReference Include="MySqlConnector" Version="2.2.0" />
14+
<PackageReference Include="MySqlConnector" Version="2.2.5" />
1515
</ItemGroup>
1616
</Project>

src/DotnetSpider.PostgreSql/DotnetSpider.PostgreSql.csproj

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
</PropertyGroup>
88

99
<ItemGroup>
10-
<PackageReference Include="Npgsql" Version="7.0.0" />
10+
<PackageReference Include="Npgsql" Version="7.0.1" />
1111
</ItemGroup>
1212

1313
<ItemGroup>

src/DotnetSpider.Sample/DotnetSpider.Sample.csproj

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
</ItemGroup>
1515

1616
<ItemGroup>
17-
<PackageReference Include="Serilog.AspNetCore" Version="6.0.1" />
17+
<PackageReference Include="Serilog.AspNetCore" Version="6.1.0" />
1818
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
1919
<PackageReference Include="Serilog.Sinks.RollingFile" Version="3.3.0" />
2020
<PackageReference Include="Serilog.Sinks.PeriodicBatching" Version="3.1.0" />

src/DotnetSpider.Sample/Program.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ static async Task Main(string[] args)
2828
// // await DistributedSpider.RunAsync();
2929
// await ProxySpider.RunAsync();
3030
// await EntitySpider.RunMySqlQueueAsync();
31-
await ImageSpider.RunAsync();
31+
await CnblogsSpider.RunAsync();
3232

3333
Console.WriteLine("Bye!");
3434
}

src/DotnetSpider.Sample/samples/CnblogsSpider.cs

+19-41
Original file line numberDiff line numberDiff line change
@@ -40,54 +40,26 @@ protected override async Task InitializeAsync(CancellationToken stoppingToken =
4040
{
4141
AddDataFlow(new ListNewsParser());
4242
AddDataFlow(new NewsParser());
43-
AddDataFlow(new MyConsoleStorage());
44-
await AddRequestsAsync(new Request("https://news.cnblogs.com/n/page/1/"));
43+
var request = new Request("https://news.cnblogs.com/n/page/1")
44+
{
45+
};
46+
request.Headers.UserAgent = "";
47+
await AddRequestsAsync(request);
4548
}
4649

4750
protected override SpiderId GenerateSpiderId()
4851
{
4952
return new(ObjectId.CreateId().ToString(), "博客园");
5053
}
5154

52-
protected class MyConsoleStorage : DataFlowBase
53-
{
54-
public override Task InitializeAsync()
55-
{
56-
return Task.CompletedTask;
57-
}
58-
59-
public override Task HandleAsync(DataFlowContext context)
60-
{
61-
if (IsNullOrEmpty(context))
62-
{
63-
Logger.LogWarning("数据流上下文不包含解析结果");
64-
return Task.CompletedTask;
65-
}
66-
67-
var typeName = typeof(News).FullName;
68-
var data = context.GetData(typeName);
69-
if (data is News news)
70-
{
71-
Console.WriteLine($"URL: {news.Url}, TITLE: {news.Title}, VIEWS: {news.Views}");
72-
}
73-
74-
return Task.CompletedTask;
75-
}
76-
}
7755

7856
protected class ListNewsParser : DataParser
7957
{
8058
public override Task InitializeAsync()
8159
{
82-
// AddRequiredValidator("news\\.cnblogs\\.com/n/page");
83-
AddRequiredValidator((request =>
84-
{
85-
var host = request.RequestUri.Host;
86-
var regex = host + "/$";
87-
return Regex.IsMatch(request.RequestUri.ToString(), regex);
88-
}));
60+
AddRequiredValidator("news\\.cnblogs\\.com/n/page");
8961
// if you want to collect every pages
90-
// AddFollowRequestQuerier(Selectors.XPath(".//div[@class='pager']"));
62+
AddFollowRequestQuerier(Selectors.XPath(".//div[@class='pager']"));
9163
return Task.CompletedTask;
9264
}
9365

@@ -128,16 +100,22 @@ public override Task InitializeAsync()
128100
protected override Task ParseAsync(DataFlowContext context)
129101
{
130102
var typeName = typeof(News).FullName;
103+
var url = context.Request.RequestUri.ToString();
104+
var title = context.Request.Properties["title"]?.ToString()?.Trim();
105+
var summary = context.Request.Properties["summary"]?.ToString()?.Trim();
106+
var views = int.Parse(context.Request.Properties["views"]?.ToString()?.Trim() ?? "0");
107+
var content = context.Selectable.Select(Selectors.XPath(".//div[@id='news_body']"))?.Value
108+
?.Trim();
131109
context.AddData(typeName,
132110
new News
133111
{
134-
Url = context.Request.RequestUri.ToString(),
135-
Title = context.Request.Properties["title"]?.ToString()?.Trim(),
136-
Summary = context.Request.Properties["summary"]?.ToString()?.Trim(),
137-
Views = int.Parse(context.Request.Properties["views"]?.ToString()?.Trim() ?? "0"),
138-
Content = context.Selectable.Select(Selectors.XPath(".//div[@id='news_body']")).Value
139-
?.Trim()
112+
Url = url,
113+
Title = title,
114+
Summary = summary,
115+
Views = views,
116+
Content = content
140117
});
118+
141119
return Task.CompletedTask;
142120
}
143121
}

src/DotnetSpider.Spiders/DotnetSpider.Spiders.csproj

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
</ItemGroup>
1313

1414
<ItemGroup>
15-
<PackageReference Include="Serilog.AspNetCore" Version="6.0.1" />
15+
<PackageReference Include="Serilog.AspNetCore" Version="6.1.0" />
1616
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
1717
<PackageReference Include="Serilog.Sinks.RollingFile" Version="3.3.0" />
1818
<PackageReference Include="Serilog.Sinks.PeriodicBatching" Version="3.1.0" />

src/DotnetSpider.Tests/DotnetSpider.Tests.csproj

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
</PropertyGroup>
77

88
<ItemGroup>
9-
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.4.0" />
9+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.4.1" />
1010
<PackageReference Include="xunit" Version="2.4.2" />
1111
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.5">
1212
<PrivateAssets>all</PrivateAssets>
@@ -16,7 +16,7 @@
1616
<PrivateAssets>all</PrivateAssets>
1717
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
1818
</PackageReference>
19-
<PackageReference Include="Moq" Version="4.18.2" />
19+
<PackageReference Include="Moq" Version="4.18.4" />
2020
</ItemGroup>
2121

2222
<ItemGroup>
+11-12
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
namespace DotnetSpider.Agent
22
{
3-
public class AgentOptions
4-
{
3+
public class AgentOptions
4+
{
5+
/// <summary>
6+
/// 节点标识
7+
/// </summary>
8+
public string AgentId { get; set; }
59

6-
/// <summary>
7-
/// 节点标识
8-
/// </summary>
9-
public string AgentId { get; set; }
10-
11-
/// <summary>
12-
/// 节点名称
13-
/// </summary>
14-
public string AgentName { get; set; }
15-
}
10+
/// <summary>
11+
/// 节点名称
12+
/// </summary>
13+
public string AgentName { get; set; }
14+
}
1615
}

src/DotnetSpider/Agent/AgentService.cs

+17-20
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public AgentService(
4646

4747
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
4848
{
49-
_logger.LogInformation(
49+
_logger.LogDebug(
5050
_messageQueue.IsDistributed
5151
? $"Agent {_options.AgentId}, {_options.AgentName} is starting"
5252
: "Agent is starting");
@@ -66,26 +66,23 @@ await _messageQueue.PublishAsBytesAsync(Topics.AgentCenter,
6666
});
6767
}
6868

69-
// 同类型下载器注册于相同的 topic,用于负载均衡
69+
// 同类型下载器注册于相同的 topic, 用于负载均衡
7070
await RegisterAgentAsync(_downloader.Name, stoppingToken);
7171

7272
if (_messageQueue.IsDistributed)
7373
{
7474
// 注册 agent_{id} 用于固定节点下载
7575
await RegisterAgentAsync(string.Format(Topics.Spider, _options.AgentId), stoppingToken);
76-
}
7776

78-
// 分布式才需要发送心跳
79-
if (_messageQueue.IsDistributed)
80-
{
81-
await Task.Factory.StartNew(async () =>
77+
// 分布式才需要发送心跳
78+
Task.Factory.StartNew(async () =>
8279
{
8380
while (!stoppingToken.IsCancellationRequested)
8481
{
8582
await HeartbeatAsync();
8683
await Task.Delay(5000, stoppingToken);
8784
}
88-
}, stoppingToken);
85+
}, stoppingToken).ConfigureAwait(true).GetAwaiter();
8986
}
9087

9188
_logger.LogInformation(_messageQueue.IsDistributed
@@ -121,14 +118,14 @@ private async Task HandleMessageAsync(byte[] bytes)
121118
switch (message)
122119
{
123120
case Messages.Agent.Exit exit:
124-
{
125-
if (exit.AgentId == _options.AgentId)
126121
{
127-
_applicationLifetime.StopApplication();
128-
}
122+
if (exit.AgentId == _options.AgentId)
123+
{
124+
_applicationLifetime.StopApplication();
125+
}
129126

130-
break;
131-
}
127+
break;
128+
}
132129
case Request request:
133130
Task.Factory.StartNew(async () =>
134131
{
@@ -150,11 +147,11 @@ private async Task HandleMessageAsync(byte[] bytes)
150147
}).ConfigureAwait(false).GetAwaiter();
151148
break;
152149
default:
153-
{
154-
var msg = JsonSerializer.Serialize(message);
155-
_logger.LogWarning($"Message not supported: {msg}");
156-
break;
157-
}
150+
{
151+
var msg = JsonSerializer.Serialize(message);
152+
_logger.LogWarning($"Message not supported: {msg}");
153+
break;
154+
}
158155
}
159156
}
160157

@@ -167,7 +164,7 @@ await _messageQueue.PublishAsBytesAsync(Topics.AgentCenter,
167164
{
168165
AgentId = _options.AgentId,
169166
AgentName = _options.AgentName,
170-
AvailableMemory = MachineInfo.Current.AvailableMemory,
167+
AvailableMemory = MachineInfo.Current.AvailableMemory,
171168
CpuLoad = 0
172169
});
173170
}

src/DotnetSpider/DotnetSpider.csproj

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,18 @@
1010

1111
<ItemGroup>
1212
<PackageReference Include="Bert.RateLimiters" Version="1.0.15" />
13-
<PackageReference Include="MessagePack" Version="2.4.35" />
13+
<PackageReference Include="MessagePack" Version="2.4.59" />
1414
<PackageReference Include="Microsoft.CSharp" Version="4.7.0" />
1515
<PackageReference Include="Microsoft.Extensions.Hosting" Version="7.0.0" />
1616
<PackageReference Include="Microsoft.Extensions.Http" Version="7.0.0" />
1717
<PackageReference Include="Microsoft.VisualBasic" Version="10.3.0" />
18-
<PackageReference Include="Newtonsoft.Json" Version="13.0.1" />
18+
<PackageReference Include="Newtonsoft.Json" Version="13.0.2" />
1919
<PackageReference Include="Dapper" Version="2.0.123" />
2020
<PackageReference Include="HtmlAgilityPack" Version="1.11.46" />
2121
<PackageReference Include="System.Collections.Immutable" Version="7.0.0" />
2222
<PackageReference Include="System.ComponentModel.Annotations" Version="5.0.0" />
2323
<PackageReference Include="System.Data.SqlClient" Version="4.8.5" />
24-
<PackageReference Include="System.Text.Json" Version="7.0.0" />
24+
<PackageReference Include="System.Text.Json" Version="7.0.1" />
2525
<PackageReference Include="System.Threading.Channels" Version="7.0.0" />
2626
<PackageReference Include="System.Threading.Tasks.Dataflow" Version="7.0.0" />
2727
<PackageReference Include="ZCJ.HashedWheelTimer" Version="0.10.2" />

0 commit comments

Comments
 (0)