Skip to content

Commit 51e89df

Browse files
committed
Fix None title
1 parent 45c9acd commit 51e89df

File tree

1 file changed

+21
-20
lines changed

1 file changed

+21
-20
lines changed

goose/extractors/title.py

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -76,28 +76,29 @@ def get_title(self):
7676
title_ = self.article.opengraph.get('title', '')
7777
if title_:
7878
# handle tags without any title: <meta property="og:title" />
79-
return self.clean_title(title_)
80-
81-
# try to fetch the meta headline
82-
meta_headline = self.parser.getElementsByTag(
83-
self.article.doc,
84-
tag="meta",
85-
attr="name",
86-
value="headline")
87-
if meta_headline:
88-
title_ = self.parser.getAttribute(meta_headline[0], 'content')
89-
if title_:
90-
return self.clean_title(title_)
91-
92-
# otherwise use the title meta
93-
title_element = self.parser.getElementsByTag(self.article.doc, tag='title')
94-
if title_element:
95-
title_ = self.parser.getText(title_element[0])
96-
if title_:
97-
return self.clean_title(title_)
79+
title = self.clean_title(title_)
80+
else:
81+
# try to fetch the meta headline
82+
meta_headline = self.parser.getElementsByTag(
83+
self.article.doc,
84+
tag="meta",
85+
attr="name",
86+
value="headline")
87+
if meta_headline:
88+
title_ = self.parser.getAttribute(meta_headline[0], 'content')
89+
if title_:
90+
title = self.clean_title(title_)
91+
else:
92+
# otherwise use the title meta
93+
title_element = self.parser.getElementsByTag(self.article.doc, tag='title')
94+
if title_element:
95+
title_ = self.parser.getText(title_element[0])
96+
if title_:
97+
title = self.clean_title(title_)
9898
except:
9999
print >> sys.stderr, 'ERROR when getting title: ', traceback.format_exec()
100-
return title
100+
101+
return title
101102

102103
def extract(self):
103104
return self.get_title()

0 commit comments

Comments
 (0)