Skip to content

Commit fa2ea77

Browse files
committed
[IMP] website, *: introduce JsonLd builder and structured data
*=website_blog Body: This commit introduces a reusable JsonLd builder for Schema.org payloads and integrates structured data generation in website and website_blog. - add a JsonLd helper with snake_case to camelCase normalization, nested schema support, datetime normalization, and safe rendering for single or multiple schemas - add website structured data foundations (organization schema default and breadcrumb helper) through a dedicated mixin - expose website-level structured data generation and inject structured_data in template rendering context - render JSON-LD payload in website layout head - add images_from_html utility to collect post images from blog content - generate blog schemas for listing and detail pages (Blog, CollectionPage, BlogPosting, BreadcrumbList) - pass structured_data from blog controllers for both list and detail routes - add dedicated tests validating JsonLd behavior and serialization rules This change enables consistent, extensible structured-data generation across website and blog pages. task-4655276 [IMP] website_blog: WIP
1 parent 6848b06 commit fa2ea77

9 files changed

Lines changed: 118 additions & 135 deletions

File tree

addons/website/helpers/jsonld_builder.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,7 @@ def to_iso_datetime(dt) -> str | None:
113113
return as_datetime.isoformat()
114114

115115
def get(self, key: str, default=None):
116-
"""Retrieve a stored value by its snake_case key.
117-
The key is normalised exactly like :meth:`set` / :meth:`add_nested`
118-
so callers never have to know the internal camelCase representation.
116+
"""Retrieve a stored value by key.
119117
Args:
120118
key: Property name
121119
default: Value returned when the key is absent.
@@ -144,7 +142,7 @@ def set(self, values: dict[str, Any]) -> JsonLd:
144142
self.values[key] = value
145143
return self
146144

147-
def add_nested(self, values: dict[str, JsonLd | list[JsonLd | None] | None]) -> JsonLd:
145+
def add_nested(self, values: dict[str, JsonLd | list[JsonLd] | None]) -> JsonLd:
148146
"""Add nested schema builder(s).
149147
A single nested value is stored as-is; values are converted to a list
150148
only when multiple nested values exist for the same key. None values

addons/website/models/mixins.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,7 @@ class WebsiteStructuredDataMixin(models.AbstractModel):
877877
_name = 'website.structured_data.mixin'
878878
_description = 'Website Structured Data Mixin'
879879

880-
def get_jsonLD(self, is_detail_page=False):
880+
def get_json_ld(self, is_detail_page=False):
881881
"""Return the JSON-LD structured data for this record.
882882
:param is_detail_page: whether the structured data is for a detail page
883883
:return: string containing the JSON-LD structured data

addons/website/models/website.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2493,7 +2493,7 @@ def _is_tag_domains_watchlisted(self, tagName, atts):
24932493
def _is_tag_classes_watchlisted(self, tagName, atts):
24942494
return self._get_blocked_iframe_containers_classes().intersection((atts.get('class') or '').split(' '))
24952495

2496-
def get_jsonLD(self):
2496+
def get_json_ld(self):
24972497
"""Generate structured data for the website."""
24982498
self.ensure_one()
24992499
return JsonLd.render_structured_data([self.organization_structured_data()])
@@ -2504,7 +2504,9 @@ def organization_structured_data(self):
25042504
base_url = self.get_base_url()
25052505
logo_url = f"{base_url}/logo.png?company={self.company_id.id}"
25062506
return JsonLd("Organization",
2507-
{"name": self.name,
2508-
"url": base_url,
2509-
"@id": f"{base_url}/#organization"},
2507+
{
2508+
"name": self.name,
2509+
"url": base_url,
2510+
"@id": f"{base_url}/#organization",
2511+
},
25102512
).add_nested({"logo": JsonLd("ImageObject", {"url": logo_url})})

addons/website/tests/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from . import test_iap
2020
from . import test_import_files
2121
from . import test_ir_asset
22+
from . import test_jsonld_builder
2223
from . import test_lang_url
2324
from . import test_menu
2425
from . import test_multi_website
@@ -32,7 +33,6 @@
3233
from . import test_sitemap
3334
from . import test_skip_website_configurator
3435
from . import test_snippets
35-
from . import test_structure_data_defination
3636
from . import test_theme
3737
from . import test_ui
3838
from . import test_unsplash_beacon
File renamed without changes.

addons/website/tools.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -94,27 +94,24 @@ def text_from_html(html_fragment, collapse_whitespace=False):
9494
return content
9595

9696

97-
def images_from_html(html_fragment, website_url):
97+
def images_from_html(html_fragment, base_url):
9898
"""
9999
Extract unique image URLs from an HTML fragment.
100-
Preserves order.
100+
101101
:param html_fragment: document from which image URLs must be extracted
102-
:param website_url: base URL of the website to resolve relative URLs
102+
:param base_url: base URL of the website to resolve relative URLs
103103
:return: list of image URLs extracted from the html
104104
"""
105105
if not html_fragment:
106106
return []
107107

108108
tree = html.fromstring(html_fragment)
109-
image_paths = []
110109
seen = set()
111110

112111
for img in tree.xpath("//img[@src]"):
113-
src = urljoin(website_url, img.get("src"))
114-
if src not in seen:
115-
seen.add(src)
116-
image_paths.append(src)
117-
return image_paths
112+
src = urljoin(base_url, img.get("src"))
113+
seen.add(src)
114+
return list(seen)
118115

119116

120117
def get_base_domain(url, strip_www=False):

addons/website/views/website_templates.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@
177177
<link rel="preconnect" href="https://fonts.gstatic.com/" crossorigin=""/>
178178
<link rel="apple-touch-icon" t-att-href="x_icon"/>
179179
<!-- Render structured data from context or fallback to website-level schema -->
180-
<script type="application/ld+json" t-out="structured_data or website.get_jsonLD()"/>
180+
<script type="application/ld+json" t-out="structured_data or website.get_json_ld()"/>
181181
</xpath>
182182

183183
<xpath expr="//head/script" position="before">

addons/website_blog/controllers/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ def blog(self, blog=None, tag=None, page=1, search=None, **opt):
227227
posts = values['posts']
228228
if blog:
229229
posts = posts.with_context(blog_id=blog.id)
230-
values['structured_data'] = posts.get_jsonLD()
230+
values['structured_data'] = posts.get_json_ld()
231231

232232
return request.render("website_blog.blog_post_short", values)
233233

@@ -339,7 +339,7 @@ def blog_post(self, blog, blog_post, tag_id=None, page=1, enable_editor=None, **
339339
'is_next_post_recommended': is_next_post_recommended,
340340
'date': date_begin,
341341
'blog_url': blog_url,
342-
'structured_data': blog_post.get_jsonLD(is_detail_page=True),
342+
'structured_data': blog_post.get_json_ld(is_detail_page=True),
343343
}
344344
response = request.render("website_blog.blog_post_complete", values)
345345

addons/website_blog/models/website_blog.py

Lines changed: 99 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -106,46 +106,42 @@ def all_tags(self, join=False, min_limit=1):
106106

107107
return tag_by_blog
108108

109-
def _get_breadcrumb_items(self):
109+
def _get_breadcrumb_items(self, is_detail_page=None):
110110
"""Return breadcrumb items for a blog page."""
111-
self.ensure_one()
112111
website = self.env['website'].get_current_website()
113112
base_url = website.get_base_url()
114-
return [
113+
item = [
115114
(website.name, base_url),
116-
(self.env._("Blog"), f"{base_url}/blog"),
117-
(self.name, f"{base_url}{self.website_url}"),
115+
(f"{self.env._('Blog Posts')} | {website.name}", f"{base_url}/blog"),
118116
]
117+
if is_detail_page:
118+
item.append((f"{self.name} | {website.name}", f"{base_url}{self.website_url}"))
119+
return item
119120

120121
def _build_blog_schema(self, blog_details=False):
121122
"""Return the Blog schema for a single blog.
122123
123-
:param posts: Optional paginated post records for hasPart.
124124
:param blog_details: Whether to include blog details (description, image)
125125
:return: Blog structured data.
126126
:rtype: JsonLd
127127
"""
128128
self.ensure_one()
129-
website = self.env['website'].get_current_website()
130-
base_url = website.get_base_url()
131-
slug = self.env["ir.http"]._slug(self)
132-
blog_url = f"{base_url}/blog/{slug}"
133-
description = None
134-
image_url = None
135-
organization = JsonLd("Organization", {"@id": f"{base_url}/#organization"})
136-
if blog_details:
137-
description = self.subtitle
138-
if image_url := self._get_image_url():
139-
image_url = f"{base_url}{image_url}"
140-
return JsonLd(
141-
"Blog",
142-
{
143-
"@id": f"{blog_url}/#blog",
144-
"name": self.name,
145-
"url": blog_url,
146-
"description": description,
147-
},
148-
).add_nested({"image": JsonLd("ImageObject", {"url": image_url}) if image_url else None, "publisher": organization})
129+
base_url = self.get_base_url()
130+
blog_slug = self.env["ir.http"]._slug(self)
131+
blog_url = f"{base_url}/blog/{blog_slug}"
132+
schema_data = {
133+
"@id": f"{blog_url}/#blog",
134+
"name": self.name,
135+
"url": blog_url,
136+
}
137+
if blog_details and self.subtitle:
138+
schema_data["description"] = self.subtitle
139+
nested_schema_data = {
140+
"publisher": JsonLd("Organization", {"@id": f"{base_url}/#organization"}),
141+
}
142+
if blog_details and (image_url := self._get_image_url()):
143+
nested_schema_data["image"] = JsonLd("ImageObject", {"url": f"{base_url}{image_url}"})
144+
return JsonLd("Blog", schema_data).add_nested(nested_schema_data)
149145

150146

151147
class BlogTagCategory(models.Model):
@@ -225,84 +221,84 @@ def _default_content(self):
225221
website_id = fields.Many2one(related='blog_id.website_id', readonly=True, store=True)
226222

227223
def _build_summary_blog_post_schema(self):
228-
"""Return summary structured data for a single post.
229-
230-
:return: BlogPosting summary schema.
231-
:rtype: JsonLd
232-
"""
224+
"""Return summary structured data for a single post."""
233225
self.ensure_one()
234226
website = self.env['website'].get_current_website()
235-
base_url = website.get_base_url()
227+
base_url = self.get_base_url()
236228
post_url = f"{base_url}{self.website_url}"
237-
teaser = None
238-
if website.is_view_active('website_blog.opt_posts_loop_show_teaser'):
239-
teaser = self.teaser
240-
messages_count = (
241-
len(self.website_message_ids)
242-
if website.is_view_active('website_blog.opt_posts_loop_show_stats')
243-
else None
244-
)
245-
blog_slug = self.env['ir.http']._slug(self.blog_id)
246-
image_jsonld = None
247-
if website.is_view_active('website_blog.opt_posts_loop_show_cover') and (image_url := self._get_image_url()):
248-
image_jsonld = JsonLd("ImageObject", {"url": base_url + image_url})
249-
organization_jsonld = JsonLd("Organization", {"@id": f"{base_url}/#organization"})
250-
author_sudo = self.author_id.sudo()
251-
author_jsonld = organization_jsonld
252-
if not author_sudo.is_company:
253-
# public user don't have access to user profile, avoid including profile url
254-
author_jsonld = JsonLd("Person", {"name": author_sudo.display_name})
255-
is_part_of_jsonld = JsonLd("Blog", {"@id": f"{base_url}/blog/{blog_slug}/#blog"})
256-
nested_schema = {
257-
"image": image_jsonld,
258-
"publisher": organization_jsonld,
259-
"author": author_jsonld,
260-
"isPartOf": is_part_of_jsonld,
229+
schema_data = {
230+
"headline": self.name,
231+
"url": post_url,
232+
"datePublished": JsonLd.to_iso_datetime(self.published_date),
233+
"dateModified": JsonLd.to_iso_datetime(self.write_date),
261234
}
262-
return JsonLd(
263-
"BlogPosting",
264-
{
265-
"headline": self.name,
266-
"url": post_url,
267-
"datePublished": JsonLd.to_iso_datetime(self.published_date) if self.published_date else None,
268-
"keywords": ", ".join(self.tag_ids.mapped("name")) or None,
269-
"description": teaser,
270-
"commentCount": messages_count,
271-
},
272-
).add_nested(nested_schema)
235+
if tags := self.tag_ids.mapped("name"):
236+
schema_data["keywords"] = ", ".join(tags)
237+
if website.is_view_active('website_blog.opt_posts_loop_show_teaser') and self.teaser:
238+
schema_data["description"] = self.teaser
239+
if website.is_view_active('website_blog.opt_posts_loop_show_stats') and self.website_message_ids:
240+
schema_data["commentCount"] = len(self.website_message_ids)
241+
nested_schema_data = {}
242+
if (
243+
website.is_view_active('website_blog.opt_posts_loop_show_cover')
244+
and (image_url := self._get_image_url())
245+
):
246+
nested_schema_data["image"] = JsonLd("ImageObject", {"url": base_url + image_url})
247+
organization = JsonLd("Organization", {"@id": f"{base_url}/#organization"})
248+
nested_schema_data["publisher"] = organization
249+
author_sudo = self.author_id.sudo()
250+
if author_sudo.is_company:
251+
nested_schema_data["author"] = organization
252+
else:
253+
nested_schema_data["author"] = JsonLd("Person", {"name": author_sudo.display_name})
254+
blog_slug = self.env['ir.http']._slug(self.blog_id)
255+
nested_schema_data["isPartOf"] = JsonLd(
256+
"Blog", {"@id": f"{base_url}/blog/{blog_slug}/#blog"},
257+
)
258+
return JsonLd("BlogPosting", schema_data).add_nested(nested_schema_data)
273259

274260
def _get_breadcrumb_items(self):
275261
"""Return breadcrumb items for a blog post page."""
276262
self.ensure_one()
277263
website = self.env['website'].get_current_website()
278-
base_url = website.get_base_url()
279-
items = self.blog_id._get_breadcrumb_items()
280-
items.append((self.name, f"{base_url}{self.website_url}"))
264+
base_url = self.get_base_url()
265+
items = self.blog_id._get_breadcrumb_items(self.blog_id)
266+
items.append((f"{self.name} | {website.name}", f"{base_url}{self.website_url}"))
281267
return items
282268

283269
def _build_blog_post_schema(self):
284270
"""Return full BlogPosting schema for a post detail page."""
285271
self.ensure_one()
286272
website = self.env['website'].get_current_website()
287-
website_url = website.get_base_url()
288-
blog_post_sd = self._build_summary_blog_post_schema()
289-
lang_code = self.env.lang
290-
in_language = lang_code.replace("_", "-") if lang_code else None
291-
content_text = text_from_html(self.content, True) if self.content else None
292-
word_count = len(content_text.split()) if content_text else None
273+
website_url = self.get_base_url()
274+
blog_post_jsonld = self._build_summary_blog_post_schema()
293275
image_urls = []
294-
if blog_post_sd.get("image") is None and (image_url := self._get_image_url()):
295-
image_urls.append(f"{website_url}{image_url}")
296-
image_urls.extend(dict.fromkeys(images_from_html(self.content, website_url)))
297-
if blog_post_sd.get("commentCount") is None and website.is_view_active('website_blog.opt_blog_post_comment'):
298-
blog_post_sd.set({"commentCount": len(self.website_message_ids)})
299-
return blog_post_sd.set({
300-
"dateModified": JsonLd.to_iso_datetime(self.write_date),
301-
"inLanguage": in_language,
302-
"wordCount": word_count,
303-
}).add_nested({
304-
"image": [JsonLd("ImageObject", {"url": image_url}) for image_url in image_urls],
305-
})
276+
if not blog_post_jsonld.get("image"):
277+
if image_url := self._get_image_url():
278+
image_urls.append(f"{website_url}{image_url}")
279+
if html_images := images_from_html(self.content, website_url):
280+
image_urls.extend(dict.fromkeys(html_images))
281+
schema_data = {}
282+
if (
283+
not blog_post_jsonld.get("commentCount")
284+
and website.is_view_active('website_blog.opt_blog_post_comment')
285+
):
286+
schema_data["commentCount"] = len(self.website_message_ids)
287+
if self.env.lang:
288+
schema_data["inLanguage"] = self.env.lang.replace("_", "-")
289+
if self.content:
290+
if content_text := text_from_html(self.content, True):
291+
schema_data["wordCount"] = len(content_text.split())
292+
if schema_data:
293+
blog_post_jsonld.set(schema_data)
294+
if image_urls:
295+
blog_post_jsonld.add_nested({
296+
"image": [
297+
JsonLd("ImageObject", {"url": url})
298+
for url in image_urls
299+
],
300+
})
301+
return blog_post_jsonld
306302

307303
def _build_structured_data(self, is_detail_page=False):
308304
"""Build structured data schemas for blog post pages.
@@ -320,40 +316,30 @@ def _build_structured_data(self, is_detail_page=False):
320316
self._build_breadcrumb_schema(self._get_breadcrumb_items()),
321317
])
322318
return schemas
323-
324-
if blog := self.env['blog.blog'].browse(self.env.context.get('blog_id')).exists():
325-
schemas.append(blog._build_blog_schema(blog_details=True))
326-
collection_blog = blog
327-
breadcrumb_items = blog._get_breadcrumb_items()
328-
else:
329-
for blog_record in self.mapped('blog_id'):
330-
schemas.append(blog_record._build_blog_schema())
331-
collection_blog = None
332-
website = self.env['website'].get_current_website()
333-
base_url = website.get_base_url()
334-
breadcrumb_items = [
335-
(website.name, base_url),
336-
(self.env._("Blog"), f"{base_url}/blog"),
337-
]
319+
current_blog = self.env['blog.blog'].browse(self.env.context.get('blog_id')).exists()
320+
blogs = current_blog or self.mapped('blog_id')
321+
breadcrumb_items = blogs._get_breadcrumb_items(bool(current_blog))
322+
for blog_record in blogs:
323+
schemas.append(blog_record._build_blog_schema())
338324
schemas.extend([
339-
self._to_structured_data_collectionpage(blog=collection_blog),
325+
self._to_structured_data_collectionpage(blog=current_blog),
340326
self._build_breadcrumb_schema(breadcrumb_items)
341327
])
342328
return schemas
343329

344330
def _to_structured_data_collectionpage(self, blog=None):
345331
"""Return the generic /blog CollectionPage schema."""
346332
website = self.env['website'].get_current_website()
347-
if blog:
348-
placeholder_name = blog.name
349-
else:
350-
placeholder_name = self.env._("Blog Posts")
333+
collectionpage_name = f"{blog.name}" if blog else self.env._('Blog Posts')
351334
base_url = website.get_base_url()
352-
return JsonLd(
353-
"CollectionPage", {"name": f"{placeholder_name} | {website.name}", "url": f"{base_url}/blog"},
354-
).add_nested({
355-
"hasPart": [post._build_summary_blog_post_schema() for post in self],
356-
"isPartOf": JsonLd("Organization", {"@id": f"{base_url}/#organization"}),
335+
haspart_jsonld = [post._build_summary_blog_post_schema() for post in self]
336+
organization_jsonld = JsonLd("Organization", {"@id": f"{base_url}/#organization"})
337+
return JsonLd("CollectionPage", {
338+
"name": collectionpage_name,
339+
"url": f"{base_url}/blog",
340+
}).add_nested({
341+
"hasPart": haspart_jsonld,
342+
"isPartOf": organization_jsonld,
357343
})
358344

359345
@api.depends('content', 'teaser_manual')

0 commit comments

Comments
 (0)