@@ -181,6 +181,48 @@ protected override async Task<Page> DowloadContent(Request request, ISpider spid
181
181
}
182
182
}
183
183
184
+ protected virtual string ReadContent ( Site site , HttpResponseMessage response )
185
+ {
186
+ byte [ ] contentBytes = response . Content . ReadAsByteArrayAsync ( ) . Result ;
187
+ contentBytes = PreventCutOff ( contentBytes ) ;
188
+ if ( string . IsNullOrWhiteSpace ( site . EncodingName ) )
189
+ {
190
+ var charSet = response . Content . Headers . ContentType ? . CharSet ;
191
+ Encoding htmlCharset = EncodingExtensions . GetEncoding ( charSet , contentBytes ) ;
192
+ return htmlCharset . GetString ( contentBytes , 0 , contentBytes . Length ) ;
193
+ }
194
+ else
195
+ {
196
+ return site . Encoding . GetString ( contentBytes , 0 , contentBytes . Length ) ;
197
+ }
198
+ }
199
+
200
+ private Page HandleResponse ( Request request , HttpResponseMessage response , Site site )
201
+ {
202
+ string content = ReadContent ( site , response ) ;
203
+
204
+ if ( _decodeHtml )
205
+ {
206
+ #if NET45
207
+ content = HttpUtility . UrlDecode ( HttpUtility . HtmlDecode ( content ) , string . IsNullOrEmpty ( site . EncodingName ) ? Encoding . Default : site . Encoding ) ;
208
+ #else
209
+ content = System . Net . WebUtility . UrlDecode ( System . Net . WebUtility . HtmlDecode ( content ) ) ;
210
+ #endif
211
+ }
212
+
213
+ Page page = new Page ( request )
214
+ {
215
+ Content = content
216
+ } ;
217
+
218
+ //foreach (var header in response.Headers)
219
+ //{
220
+ // page.Request.PutExtra(header.Key, header.Value);
221
+ //}
222
+
223
+ return page ;
224
+ }
225
+
184
226
private void PrepareHttpClient ( HttpClientEntry httpClientEntry )
185
227
{
186
228
httpClientEntry . Init ( AllowAutoRedirect , ( ) =>
@@ -278,48 +320,6 @@ private HttpRequestMessage GenerateHttpRequestMessage(Request request, Site site
278
320
return httpRequestMessage ;
279
321
}
280
322
281
- private Page HandleResponse ( Request request , HttpResponseMessage response , Site site )
282
- {
283
- string content = ReadContent ( site , response ) ;
284
-
285
- if ( _decodeHtml )
286
- {
287
- #if NET45
288
- content = HttpUtility . UrlDecode ( HttpUtility . HtmlDecode ( content ) , string . IsNullOrEmpty ( site . EncodingName ) ? Encoding . Default : site . Encoding ) ;
289
- #else
290
- content = System . Net . WebUtility . UrlDecode ( System . Net . WebUtility . HtmlDecode ( content ) ) ;
291
- #endif
292
- }
293
-
294
- Page page = new Page ( request )
295
- {
296
- Content = content
297
- } ;
298
-
299
- //foreach (var header in response.Headers)
300
- //{
301
- // page.Request.PutExtra(header.Key, header.Value);
302
- //}
303
-
304
- return page ;
305
- }
306
-
307
- private string ReadContent ( Site site , HttpResponseMessage response )
308
- {
309
- byte [ ] contentBytes = response . Content . ReadAsByteArrayAsync ( ) . Result ;
310
- contentBytes = PreventCutOff ( contentBytes ) ;
311
- if ( string . IsNullOrWhiteSpace ( site . EncodingName ) )
312
- {
313
- var charSet = response . Content . Headers . ContentType ? . CharSet ;
314
- Encoding htmlCharset = EncodingExtensions . GetEncoding ( charSet , contentBytes ) ;
315
- return htmlCharset . GetString ( contentBytes , 0 , contentBytes . Length ) ;
316
- }
317
- else
318
- {
319
- return site . Encoding . GetString ( contentBytes , 0 , contentBytes . Length ) ;
320
- }
321
- }
322
-
323
323
private Page SaveFile ( Request request , HttpResponseMessage response , ISpider spider )
324
324
{
325
325
var intervalPath = new Uri ( request . Url ) . LocalPath . Replace ( "//" , "/" ) . Replace ( "/" , Env . PathSeperator ) ;
0 commit comments