Skip to content

Commit d774824

Browse files
author
duty-machine
committed
fix dw
1 parent b137b84 commit d774824

File tree

3 files changed

+13
-5
lines changed

3 files changed

+13
-5
lines changed

run_test.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
p fetch_article('https://www.dw.com/zh/%E4%B8%AD%E5%9B%BD%E7%BB%AD%E6%89%93%E5%8E%8B%E6%96%B0%E5%86%A0%E8%A8%80%E8%AE%BA-%E5%8C%97%E4%BA%AC%E4%B8%89%E5%BF%97%E6%84%BF%E8%80%85%E9%81%AD%E6%8D%95/a-53255271')
2424

2525
# scmp
26-
p fetch_article('https://www.scmp.com/news/china/politics/article/3081569/chinese-activists-detained-after-sharing-censored-coronavirus')[:content]
26+
p fetch_article('https://www.scmp.com/news/china/politics/article/3081569/chinese-activists-detained-after-sharing-censored-coronavirus')
2727

2828
# default article
2929
p fetch_article('http://www.rfi.fr/cn/%E4%B8%AD%E5%9B%BD/20200426-%E7%96%91%E5%9B%A0%E5%A4%87%E4%BB%BD%E9%81%AD%E5%88%A0%E9%99%A4%E6%96%B0%E5%86%A0%E7%96%AB%E6%83%85%E6%8A%A5%E9%81%93-%E5%8C%97%E4%BA%AC3%E5%90%8D90%E5%90%8E2%E4%BA%BA%E9%81%AD%E6%8B%98%E6%8A%BC1%E4%BA%BA%E5%A4%B1%E8%81%94')

websites/dw.rb

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,17 @@
88
document = Nokogiri::HTML(html)
99
title = document.css('#bodyContent h1').first.content
1010
author = ''
11-
content = document.css('#bodyContent').first
11+
content = document.css('#bodyContent .longText').first
1212

13-
content.css('h1').first.remove
14-
content.css('.artikel').first.remove
15-
content.css('#sharing-bar').first.remove
13+
content.css('.picBox').each do |el|
14+
if el.css('a').first
15+
img = el.css('img').first
16+
el.css('a').first.replace(img)
17+
else
18+
end
19+
end
20+
21+
content.css('script').each(&:remove)
1622

1723
{
1824
title: title,

websites/scmp.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
process: -> (html) {
88
document = Nokogiri::HTML(html)
99
title = document.css('h1.info__headline.headline').first.content.strip
10+
subheadline = document.css('.info__subHeadline').first.to_html
1011
author = document.css('.main-info__name a').first.content
1112
apollo = document.css('script').find{|x| x.content.start_with?('window.__APOLLO_STATE__=') }.content
1213
data = apollo.split('\"more-on-this\"')
@@ -49,6 +50,7 @@
4950
end
5051
}
5152
content = recur.(arr)
53+
content = "#{subheadline}#{content}"
5254

5355
{
5456
title: title,

0 commit comments

Comments
 (0)