thp · naquad · Apr 18, 2025 · Apr 30, 2025 · May 2, 2025 · May 14, 2025
diff --git a/docs/source/reporters.rst b/docs/source/reporters.rst
@@ -60,6 +60,7 @@ The list of built-in reporters can be retrieved using::
 
 At the moment, the following reporters are built-in:
 
+- **atom**: Store summaries as Atom feed
 - **discord**: Send a message to a Discord channel
 - **email**: Send summary via e-mail / SMTP / sendmail
 - **gotify**: Send a message to a gotify server
@@ -80,6 +81,33 @@ At the moment, the following reporters are built-in:
    sed -e 's/^  \* \(.*\) - \(.*\)$/- **\1**: \2/'
 
 
+Atom
+----
+
+You can configure urlwatch to store changes in an Atom 1.0 feed.
+To enable this feature, run ``urlwatch --edit-config`` to edit your configuration
+file. Enable the Atom reporter and specify the path where the feed should be
+saved.
+
+The available configuration options are:
+
+.. code:: yaml
+
+   atom:
+     # REQUIRED: Writable path where the Atom feed will be stored
+     path: /var/www/html/feed.xml
+     # Optional: Unique feed ID (automatically generated if omitted)
+     id: "urn:uuid:ffa6dc6e-7436-48f6-bc99-020ab1e7d429"
+     # Optional: Title of the feed
+     title: "URLWatch"
-     title: "URLWatch"
+     title: "urlwatch changes"
-     title: "URLWatch"
+     title: "urlwatch changes"
+     # Optional: Subtitle of the feed
+     subtitle: ""
+     # Optional: URL of your site (no relation to the particular job)
+     link: "https://www.example.com/"
+     # Optional: URL of the feed itself
+     linkself: "https://www.example.com/feed.xml"
+
+
 Pushover
 --------
 

diff --git a/lib/urlwatch/reporters.py b/lib/urlwatch/reporters.py
@@ -38,6 +38,9 @@
 import html
 import functools
 import subprocess
+import uuid
+from lxml import etree
+from datetime import datetime, timezone
 
 import requests
 
@@ -1166,3 +1169,183 @@ def submit(self):
             'priority': self.config['priority'],
             'title': self.config['title'],
         })
+
+
+class AtomReporter(HtmlReporter):
+    """Store summaries as Atom feed"""
+
+    # https://validator.w3.org/feed/docs/atom.html
+    NSMAP = {None: "http://www.w3.org/2005/Atom"}
+
+    __kind__ = 'atom'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.feed = self._read()
+
+        if self.feed.find('./id') is None or self.config.get('id'):
+            self._declare(self.feed, 'id', default=self._mkuuid)
+
+        self._declare(self.feed, 'title')
+        self._declare(self.feed, 'subtitle')
+        self._declare(self.feed, 'link', target='href')
+        self._declare(self.feed, 'linkself', tag='link', target='href', rel='self')
+
+        self._write(self.feed)
+
+    def _read(self):
+        """
+        Tries to load existing feed from the path given in configuration.
+        If the feed can't be loaded, a new feed is created.
+        """
+        nspfx = f'{{{self.NSMAP[None]}}}'
+
+        try:
+            with open(self.config['path'], 'rb') as f:
+                tree = etree.parse(f)
+
+                # fix the namespaces
+                for elem in tree.iter():
+                    if hasattr(elem, 'tag') and elem.tag.startswith(nspfx):
+                        elem.tag = elem.tag[len(nspfx):]
+
+                root = tree.getroot()
+                if root.tag == 'feed':
+                    return root
+
+                logger.warning("%s: invalid atom feed", self.config['path'])
+        except etree.LxmlError as e:
+            logger.warning("failed to parse %s: %s", self.config['path'], e)
+        except FileNotFoundError:
+            pass
+
+        return etree.Element("feed", nsmap=self.NSMAP)
+
+    def _write(self, feed):
+        with open(self.config['path'], 'wb') as f:
+            tree = etree.ElementTree(feed)
+            tree.write(f, encoding='utf-8', xml_declaration=True)
+
+    def _attrs_equal(self, a, b, exist):
+        for k in a.keys() | b.keys():
+            if (
+                k not in exist and a.get(k) != b.get(k)
+                or k in exist and k not in a
+            ):
+                return False
+
+        return True
+
+    def _e(self, parent, tag, value, target='text', create=True, remove=True, single=True, **attrs):
+        """A multi-tool for creating, updating, and deleting XML elements"""
+
+        # find existing elements
+        present = set()
+        if target not in ('text', 'raw', 'cdata'):
+            present.add(target)  # ignore the updated attribute's value but check it exists
+
+        elems = []
+        for child in parent.iterchildren(tag):
+            if self._attrs_equal(child.attrib, attrs, present):
+                elems.append(child)
+
+        # if value is None there's nothing to update or even a cleanup should be made
+        if value is None:
+            while remove and elems:
+                parent.remove(elems.pop())
+            return
+
+        # if no elements exist, then create one or stop
+        if not elems:
+            if not create:
+                return
+
+            elem = etree.Element(tag, attrs)
+            parent.append(elem)
+            elems.append(elem)
+
+        # when there are multiple elements and single=True remove all existing
+        # elements except one
+        while single and len(elems) > 1:
+            parent.remove(elems.pop())
+
+        # finally, update the value
+        for elem in elems:
+            if target == 'text':
+                elem.text = value
+            elif target == 'cdata':
+                elem.text = etree.CDATA(value)
+            elif target == 'raw':
+                while len(elem) > 0:
+                    elem.remove(elem[0])
+
+                elem.append(etree.XML(value))
+            else:
+                elem.attrib[target] = value
+
+    def _declare(self, parent, name, target='text', tag=None, default=None, **attrs):
+        """Creates an element for the configuration parameter"""
+        value = self.config.get(name, None)
+        if not value:
+            value = default
+            if callable(value):
+                value = value()
+
+        self._e(parent, tag or name, value, target, **attrs)
+
+    def _entry_updated(self, entry):
+        """Tries to fetch the updated timestamp from the entry"""
+        updated = entry.find('./updated')
+        return updated is not None and updated.text or '2099-01-01T00:00:00Z'
+
+    def _mkuuid(self):
+        """UUID4 generator"""
+        return f'urn:uuid:{uuid.uuid4()}'
+
+    def _tsfmt(self, ts):
+        """Format the given timestamp as an ISO8601 UTC datetime"""
+        return datetime.fromtimestamp(ts).replace(microsecond=0).\
+            astimezone(timezone.utc).isoformat()
+
+    def _entry(self, feed, job_state, timestamp):
+        """Entry construction"""
+        job = job_state.job
+        cfg = self.get_base_config(self.report)
+
+        entry = etree.Element("entry")
+        feed.append(entry)
+        e = functools.partial(self._e, entry)
+
+        e("id", self._mkuuid())
+        e("title", f'{job_state.verb}: {job.pretty_name()}')
+
+        if job.location_is_url():
+            e("link", job.get_location(), target='href')
+        else:
+            e("summary", job.get_location())
+
+        content = self._format_content(job_state, cfg['diff'])
+        e("content", str(content), target='cdata', type='html')
+        e("updated", self._tsfmt(timestamp))
+
+    def submit(self):
+        last = None
+        now = int(datetime.now().timestamp())
+        for job_state in self.report.get_filtered_job_states(self.job_states):
+            dt = job_state.timestamp or now  # errors have no timestamp
+            self._entry(self.feed, job_state, dt)
+            last = max(dt, last or dt)
+
+        if last is not None:
+            self._e(self.feed, "updated", self._tsfmt(last))
+
+        maxitems = self.config.get('maxitems', 0)
+        if maxitems < 0:
+            logger.warning("atom: maxitems can't be negative")
+        elif maxitems > 0:
+            items = self.feed.findall('./entry')
+            items.sort(key=self._entry_updated, reverse=True)
+            while len(items) > maxitems:
+                self.feed.remove(items.pop())
+
+        self._write(self.feed)
diff --git a/lib/urlwatch/storage.py b/lib/urlwatch/storage.py
@@ -198,6 +198,15 @@
             'ignore_stdout': True,
             'ignore_stderr': False,
         },
+        'atom': {
+            'enabled': False,
+            'maxitems': 50,
+            'path': '/path/to/feed.xml',
+            'title': 'URLWatch Updates',
-            'title': 'URLWatch Updates',
+            'title': 'urlwatch Updates',
-            'title': 'URLWatch Updates',
+            'title': 'urlwatch Updates',
+            'subtitle': '',
+            'link': 'https://www.example.com/',
+            'linkself': 'https://www.example.com/feed.xml',
+        }
     },
 
     'job_defaults': {