File tree Expand file tree Collapse file tree 3 files changed +13
-5
lines changed Expand file tree Collapse file tree 3 files changed +13
-5
lines changed Original file line number Diff line number Diff line change 23
23
p fetch_article ( 'https://www.dw.com/zh/%E4%B8%AD%E5%9B%BD%E7%BB%AD%E6%89%93%E5%8E%8B%E6%96%B0%E5%86%A0%E8%A8%80%E8%AE%BA-%E5%8C%97%E4%BA%AC%E4%B8%89%E5%BF%97%E6%84%BF%E8%80%85%E9%81%AD%E6%8D%95/a-53255271' )
24
24
25
25
# scmp
26
- p fetch_article ( 'https://www.scmp.com/news/china/politics/article/3081569/chinese-activists-detained-after-sharing-censored-coronavirus' ) [ :content ]
26
+ p fetch_article ( 'https://www.scmp.com/news/china/politics/article/3081569/chinese-activists-detained-after-sharing-censored-coronavirus' )
27
27
28
28
# default article
29
29
p fetch_article ( 'http://www.rfi.fr/cn/%E4%B8%AD%E5%9B%BD/20200426-%E7%96%91%E5%9B%A0%E5%A4%87%E4%BB%BD%E9%81%AD%E5%88%A0%E9%99%A4%E6%96%B0%E5%86%A0%E7%96%AB%E6%83%85%E6%8A%A5%E9%81%93-%E5%8C%97%E4%BA%AC3%E5%90%8D90%E5%90%8E2%E4%BA%BA%E9%81%AD%E6%8B%98%E6%8A%BC1%E4%BA%BA%E5%A4%B1%E8%81%94' )
Original file line number Diff line number Diff line change 8
8
document = Nokogiri ::HTML ( html )
9
9
title = document . css ( '#bodyContent h1' ) . first . content
10
10
author = ''
11
- content = document . css ( '#bodyContent' ) . first
11
+ content = document . css ( '#bodyContent .longText ' ) . first
12
12
13
- content . css ( 'h1' ) . first . remove
14
- content . css ( '.artikel' ) . first . remove
15
- content . css ( '#sharing-bar' ) . first . remove
13
+ content . css ( '.picBox' ) . each do |el |
14
+ if el . css ( 'a' ) . first
15
+ img = el . css ( 'img' ) . first
16
+ el . css ( 'a' ) . first . replace ( img )
17
+ else
18
+ end
19
+ end
20
+
21
+ content . css ( 'script' ) . each ( &:remove )
16
22
17
23
{
18
24
title : title ,
Original file line number Diff line number Diff line change 7
7
process : -> ( html ) {
8
8
document = Nokogiri ::HTML ( html )
9
9
title = document . css ( 'h1.info__headline.headline' ) . first . content . strip
10
+ subheadline = document . css ( '.info__subHeadline' ) . first . to_html
10
11
author = document . css ( '.main-info__name a' ) . first . content
11
12
apollo = document . css ( 'script' ) . find { |x | x . content . start_with? ( 'window.__APOLLO_STATE__=' ) } . content
12
13
data = apollo . split ( '\"more-on-this\"' )
49
50
end
50
51
}
51
52
content = recur . ( arr )
53
+ content = "#{ subheadline } #{ content } "
52
54
53
55
{
54
56
title : title ,
You can’t perform that action at this time.
0 commit comments