diff --git a/extruct/w3cmicrodata.py b/extruct/w3cmicrodata.py index a254a5c6..cb8c9fa7 100644 --- a/extruct/w3cmicrodata.py +++ b/extruct/w3cmicrodata.py @@ -18,11 +18,32 @@ from urllib.parse import urljoin import lxml.etree +from lxml.html.clean import Cleaner from w3lib.html import strip_html5_whitespace +import html_text from extruct.utils import parse_html +# Cleaner which is similar to html_text cleaner, but is less aggressive +cleaner = Cleaner( + scripts=True, + javascript=False, # onclick attributes are fine + comments=True, + style=True, + links=True, + meta=True, + page_structure=False, #
We’re sorry, but there seems to be an error with some of the information provided. Please check the highlighted fields
++ + + + + From only £5.29 - £9.56 +
++ Save 10% on your first Repeat Delivery + +
+Johnsons 4 Fleas Cats & Kittens - 3 Treatment Pack, 6 Treatment Pack
For use with Cats and Kittens over 4 weeks of age between 1 and 11kg.