diff --git a/.gitignore b/.gitignore
index c72797a..f73f3c4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,4 @@ bot_spiders/
 .coverage.*
 htmlcov/
 .scrapy
+docs/_build
diff --git a/Dockerfile b/Dockerfile
index 9dcedb4..6651a80 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -35,12 +35,12 @@ RUN npm install
 
 # install arachnado
 COPY . /app
+RUN pip install --editable /app
 
 # npm install is executed again because node_modules can be overwritten
 # if .dockerignore is not active (may happen with docker-compose or DockerHub)
 RUN npm install
 RUN npm run build
-RUN pip3 install .
 
 # use e.g. -v /path/to/my/arachnado/config.conf:/etc/arachnado.conf
 # docker run option to override arachnado parameters
@@ -51,7 +51,7 @@ RUN pip3 install .
 # this folder is added to PYTHONPATH, so modules from there are available
 # for spider_packages Arachnado option
 VOLUME /python-packages
-ENV PYTHONPATH $PYTHONPATH:/python-packages
+ENV PYTHONPATH $PYTHONPATH:/python-packages:/app
 
 EXPOSE 8888
-ENTRYPOINT ["arachnado"]
+CMD ["arachnado"]
diff --git a/README.rst b/README.rst
index d95fa01..388a4f1 100644
--- a/README.rst
+++ b/README.rst
@@ -13,7 +13,7 @@ License is MIT.
 Install
 -------
 
-Arachnado requires Python 2.7.
+Arachnado requires Python 2.7 or Python 3.5.
 To install Arachnado use pip::
 
     pip install arachnado
@@ -41,6 +41,14 @@ the server::
 For available options check
 https://github.com/TeamHG-Memex/arachnado/blob/master/arachnado/config/defaults.conf.
 
+Tests
+-----
+
+To run tests make sure tox_ is installed, then
+execute ``tox`` command from the source root.
+
+.. _tox: https://testrun.org/tox/latest/
+
 Development
 -----------
 
diff --git a/arachnado/__main__.py b/arachnado/__main__.py
index 6f7d864..3738cc3 100755
--- a/arachnado/__main__.py
+++ b/arachnado/__main__.py
@@ -83,8 +83,11 @@ def main(port, host, start_manhole, manhole_port, manhole_host, loglevel, opts):
     })
 
     job_storage = MongoTailStorage(jobs_uri, cache=True)
+    job_storage.ensure_index("urls")
     site_storage = MongoStorage(sites_uri, cache=True)
     item_storage = MongoTailStorage(items_uri)
+    item_storage.ensure_index("url")
+    item_storage.ensure_index("_job_id")
 
     crawler_process = ArachnadoCrawlerProcess(settings)
 
diff --git a/arachnado/config/defaults.conf b/arachnado/config/defaults.conf
index 0357e35..b91fe69 100644
--- a/arachnado/config/defaults.conf
+++ b/arachnado/config/defaults.conf
@@ -5,7 +5,8 @@
 [arachnado]
 ; General Arachnado server options.
 
-; Event loop to use. Allowed values are "twisted", "tornado" and "auto".
+; Event loop to use. Allowed values are
+; "twisted", "tornado" and "auto".
 reactor = auto
 
 ; Host/port to listen to
@@ -30,9 +31,11 @@ DEPTH_LIMIT = 10
 
 ; Packages to load spiders from (separated by whitespace)
 spider_packages =
-; Name of the default spider. It is used for crawling if no custom spider
-; is specified or detected. It should support API similar to
-; arachnado.spider.CrawlWebsiteSpider (which is the default here)
+
+; Name of the default spider. It is used for crawling if
+; no custom spider is specified or detected. It should support
+; API similar to arachnado.spider.CrawlWebsiteSpider
+; (which is the default here).
 default_spider_name = generic
 
 [arachnado.storage]
diff --git a/arachnado/cron.py b/arachnado/cron.py
index e192c1e..a4b85d6 100644
--- a/arachnado/cron.py
+++ b/arachnado/cron.py
@@ -17,7 +17,7 @@ def __init__(self, domain_crawlers, site_storage):
         self.waiting_calls = {}
         self.domain_crawlers = domain_crawlers
         self.site_storage = site_storage
-        self.site_storage.subscribe(self.site_storage.available_subscriptions,
+        self.site_storage.subscribe(self.site_storage.available_events,
                                     self.rerun)
 
     def start(self):
diff --git a/arachnado/handlers.py b/arachnado/handlers.py
index 62ba6a6..af5afd6 100644
--- a/arachnado/handlers.py
+++ b/arachnado/handlers.py
@@ -8,6 +8,9 @@
 from arachnado.utils.misc import json_encode
 from arachnado.monitor import Monitor
 from arachnado.handler_utils import ApiHandler, NoEtagsMixin
+
+from arachnado.rpc.data import PagesDataRpcWebsocketHandler, JobsDataRpcWebsocketHandler
+
 from arachnado.rpc import RpcHttpHandler
 from arachnado.rpc.ws import RpcWebsocketHandler
 
@@ -28,16 +31,23 @@ def get_application(crawler_process, domain_crawlers,
     debug = opts['arachnado']['debug']
 
     handlers = [
+        # UI
         url(r"/", Index, context, name="index"),
         url(r"/help", Help, context, name="help"),
+
+        # simple API used by UI
         url(r"/crawler/start", StartCrawler, context, name="start"),
         url(r"/crawler/stop", StopCrawler, context, name="stop"),
         url(r"/crawler/pause", PauseCrawler, context, name="pause"),
         url(r"/crawler/resume", ResumeCrawler, context, name="resume"),
         url(r"/crawler/status", CrawlerStatus, context, name="status"),
         url(r"/ws-updates", Monitor, context, name="ws-updates"),
+
+        # RPC API
         url(r"/ws-rpc", RpcWebsocketHandler, context, name="ws-rpc"),
         url(r"/rpc", RpcHttpHandler, context, name="rpc"),
+        url(r"/ws-pages-data", PagesDataRpcWebsocketHandler, context, name="ws-pages-data"),
+        url(r"/ws-jobs-data", JobsDataRpcWebsocketHandler, context, name="ws-jobs-data"),
     ]
     return Application(
         handlers=handlers,
@@ -143,6 +153,8 @@ def control_job(self, job_id):
 
 class CrawlerStatus(BaseRequestHandler):
     """ Status for one or more jobs. """
+    # FIXME: does it work? Can we remove it? It is not used
+    # by Arachnado UI.
     def get(self):
         crawl_ids_arg = self.get_argument('crawl_ids', '')
 
diff --git a/arachnado/manhole.py b/arachnado/manhole.py
index 03d4ce0..f120b4f 100644
--- a/arachnado/manhole.py
+++ b/arachnado/manhole.py
@@ -3,13 +3,13 @@
 An interactive Python interpreter available through telnet.
 """
 from __future__ import absolute_import
-from twisted.conch.manhole import ColoredManhole
-from twisted.conch.insults import insults
 from twisted.conch.telnet import TelnetTransport, TelnetBootstrapProtocol
 from twisted.internet import protocol
 
 
 def start(port=None, host=None, telnet_vars=None):
+    from twisted.conch.manhole import ColoredManhole
+    from twisted.conch.insults import insults
     from twisted.internet import reactor
 
     port = int(port) if port else 6023
diff --git a/arachnado/pipelines/mongoexport.py b/arachnado/pipelines/mongoexport.py
index 17b519c..4429e45 100644
--- a/arachnado/pipelines/mongoexport.py
+++ b/arachnado/pipelines/mongoexport.py
@@ -81,6 +81,14 @@ def __init__(self, crawler):
     def from_crawler(cls, crawler):
         return cls(crawler)
 
+    @classmethod
+    def get_spider_urls(cls, spider):
+        options = getattr(spider.crawler, 'start_options', None)
+        if options and "domain" in options:
+            return [options["domain"]]
+        else:
+            return spider.start_urls
+
     @tt_coroutine
     def open_spider(self, spider):
         try:
@@ -94,6 +102,7 @@ def open_spider(self, spider):
                 'started_at': datetime.datetime.utcnow(),
                 'status': 'running',
                 'spider': spider.name,
+                "urls": self.get_spider_urls(spider),
                 'options': getattr(spider.crawler, 'start_options', {}),
             }, upsert=True, new=True)
             self.job_id = str(job['_id'])
diff --git a/arachnado/rpc/__init__.py b/arachnado/rpc/__init__.py
index 9975e0b..e1d5289 100644
--- a/arachnado/rpc/__init__.py
+++ b/arachnado/rpc/__init__.py
@@ -13,12 +13,22 @@
 
 class ArachnadoRPC(object):
     """ Base class for all Arachnado RPC resources.
+    Use it as a mixin for tornado.web.RequestHandler subclasses.
+
+    It provides :meth:`handle_request` method which handles
+    Jobs, Sites and Pages RPC requests.
     """
+    rpc_objects = tuple()
+
     def initialize(self, *args, **kwargs):
+        jobs = Jobs(self, *args, **kwargs)
+        sites = Sites(self, *args, **kwargs)
+        pages = Pages(self, *args, **kwargs)
+        self.rpc_objects = [jobs, sites, pages]
+
         self.dispatcher = Dispatcher()
-        self.dispatcher.add_object(Jobs(self, *args, **kwargs))
-        self.dispatcher.add_object(Sites(self, *args, **kwargs))
-        self.dispatcher.add_object(Pages(self, *args, **kwargs))
+        for obj in self.rpc_objects:
+            self.dispatcher.add_object(obj)
 
     def handle_request(self, body):
         response = JSONRPCResponseManager.handle(body, self.dispatcher)
diff --git a/arachnado/rpc/data.py b/arachnado/rpc/data.py
new file mode 100644
index 0000000..bd67164
--- /dev/null
+++ b/arachnado/rpc/data.py
@@ -0,0 +1,368 @@
+import logging
+import json
+import sys
+from collections import deque
+from tornado import gen
+import tornado.ioloop
+from bson.objectid import ObjectId
+from bson.errors import InvalidId
+from jsonrpc.dispatcher import Dispatcher
+
+from arachnado.rpc.jobs import Jobs
+from arachnado.rpc.sites import Sites
+from arachnado.rpc.pages import Pages
+
+from arachnado.crawler_process import agg_stats_changed, CrawlerProcessSignals as CPS
+from arachnado.rpc.ws import RpcWebsocketHandler
+from arachnado.utils.misc import json_encode
+
+logger = logging.getLogger(__name__)
+
+
+class DataRpcWebsocketHandler(RpcWebsocketHandler):
+    """ basic class for Data API handlers"""
+    stored_data = None
+    delay_mode = False
+    heartbeat_data = None
+    i_args = None
+    i_kwargs = None
+    storages = None
+    max_msg_size = 2**20
+
+    def _send_event(self, data):
+        return super(DataRpcWebsocketHandler, self).write_event(data, max_message_size=self.max_msg_size)
+
+    def init_heartbeat(self, update_delay):
+        if update_delay > 0 and not self.heartbeat_data:
+            self.delay_mode = True
+            self.heartbeat_data = tornado.ioloop.PeriodicCallback(
+                lambda: self.send_updates(),
+                update_delay
+            )
+            self.heartbeat_data.start()
+
+    def cancel_subscription(self, subscription_id):
+        storage = self.storages.pop(subscription_id, None)
+        if storage:
+            storage.on_close()
+            return True
+        else:
+            return False
+
+    def set_max_message_size(self, max_size):
+        self.max_msg_size = max_size
+        return True
+
+    def initialize(self, *args, **kwargs):
+        self.stored_data = deque()
+        self.storages = {}
+        self.i_args = args
+        self.i_kwargs = kwargs
+        self.cp = kwargs.get("crawler_process", None)
+        self.dispatcher = Dispatcher()
+        self.dispatcher["cancel_subscription"] = self.cancel_subscription
+        self.dispatcher["set_max_message_size"] = self.set_max_message_size
+
+    def on_close(self):
+        logger.info("connection closed")
+        for storage in self.storages.values():
+            storage.on_close()
+        if self.heartbeat_data:
+            self.heartbeat_data.stop()
+        super(DataRpcWebsocketHandler, self).on_close()
+
+    def open(self):
+        logger.info("new connection")
+        super(DataRpcWebsocketHandler, self).open()
+
+    def send_updates(self):
+        while len(self.stored_data):
+            item = self.stored_data.popleft()
+            self._send_event(item)
+
+
+class JobsDataRpcWebsocketHandler(DataRpcWebsocketHandler):
+    mongo_id_mapping = None
+    job_url_mapping = None
+    stored_jobs_stats = None
+
+    @gen.coroutine
+    def subscribe_to_jobs(self, include=None, exclude=None, update_delay=0, last_job_id=None):
+        self.init_heartbeat(update_delay)
+        stor_id, storage = self.add_storage()
+        jobs_storage = Jobs(self, *self.i_args, **self.i_kwargs)
+        jobs_storage.callback_meta = stor_id
+        jobs_storage.callback = self.on_jobs_tailed
+        storage.add_jobs_subscription(jobs_storage, include=include, exclude=exclude, last_id=last_job_id)
+        return {"datatype": "job_subscription_id",
+                "id": stor_id}
+
+    def add_storage(self):
+        new_id = str(len(self.storages))
+        storage = DataSubscription()
+        self.storages[new_id] = storage
+        return new_id, storage
+
+    @gen.coroutine
+    def write_event(self, data, aggregate=False):
+        event_data = dict(data)
+        if 'stats' in event_data:
+            if not isinstance(event_data['stats'], dict):
+                try:
+                    event_data['stats'] = json.loads(event_data['stats'])
+                except Exception as ex:
+                    logger.warning("Invalid stats field in job {}".format(event_data.get("_id", "MISSING MONGO ID")))
+        if aggregate and self.delay_mode:
+            item_id = event_data.get("_id", None)
+            if item_id:
+                if item_id in self.stored_jobs_stats:
+                    self.stored_jobs_stats[item_id]["stats"].update(event_data["stats"])
+                else:
+                    item = event_data
+                    self.stored_jobs_stats[item_id] = item
+            else:
+                logger.warning("Job data without _id field from event {}".format(event))
+        else:
+            return self._send_event(event_data)
+
+    def send_updates(self):
+        super(JobsDataRpcWebsocketHandler, self).send_updates()
+        for job_id in set(self.stored_jobs_stats.keys()):
+            item = self.stored_jobs_stats.pop(job_id, None)
+            if item:
+                self._send_event(item)
+
+    def initialize(self, *args, **kwargs):
+        super(JobsDataRpcWebsocketHandler, self).initialize(*args, **kwargs)
+        self.dispatcher["subscribe_to_jobs"] = self.subscribe_to_jobs
+        self.mongo_id_mapping = {}
+        self.job_url_mapping = {}
+        self.stored_jobs_stats = {}
+
+    def on_close(self):
+        logger.debug("connection closed")
+        if self.cp:
+            self.cp.signals.disconnect(self.on_stats_changed, agg_stats_changed)
+            self.cp.signals.disconnect(self.on_spider_closed, CPS.spider_closed)
+        super(JobsDataRpcWebsocketHandler, self).on_close()
+
+    def open(self):
+        logger.debug("new connection")
+        super(JobsDataRpcWebsocketHandler, self).open()
+        if self.cp:
+            self.cp.signals.connect(self.on_stats_changed, agg_stats_changed)
+            self.cp.signals.connect(self.on_spider_closed, CPS.spider_closed)
+
+    def on_stats_changed(self, changes, crawler):
+        job_id = crawler.spider.crawl_id
+        data = {"stats": changes}
+        # same as crawl_id
+        data["id"] = job_id
+        # mongo id
+        data["_id"] = self.mongo_id_mapping.get(job_id, "")
+        # job url
+        data["urls"] = self.job_url_mapping.get(job_id, "")
+        allowed = False
+        for storage in self.storages.values():
+            allowed = allowed or job_id in storage.job_ids
+        if allowed:
+            self.write_event(data, aggregate=True)
+
+    def on_spider_closed(self, spider):
+        if self.cp:
+            for job in self.cp.jobs:
+                job_id = job["id"]
+                allowed = False
+                if job_id:
+                    for storage in self.storages.values():
+                        allowed = allowed or job_id in storage.job_ids
+                if allowed:
+                    self.write_event(job)
+
+    def on_jobs_tailed(self, data, callback_meta=None):
+        if "id" in data and callback_meta:
+            self.storages[callback_meta].job_ids.add(data["id"])
+            self.mongo_id_mapping[data["id"]] = data.get("_id", None)
+            self.job_url_mapping[data["id"]] = data.get("urls", None)
+        self.write_event(data)
+
+
+class PagesDataRpcWebsocketHandler(DataRpcWebsocketHandler):
+    """ pages API"""
+
+    @gen.coroutine
+    def subscribe_to_pages(self, urls=None, url_groups=None):
+        result = {
+            "datatype": "pages_subscription_id",
+            "single_subscription_id": "",
+            "id": {},
+        }
+        if urls:
+            result["single_subscription_id"] = yield self.create_subscribtion_to_urls(urls)
+        if url_groups:
+            res = {}
+            for group_id in url_groups:
+                res[group_id] = yield self.create_subscribtion_to_urls(url_groups[group_id])
+            result["id"] = res
+        if not urls and not url_groups:
+            stor_id, storage = self.add_storage()
+            result["single_subscription_id"] = stor_id
+            storage.pages.subscribe(query={})
+        raise gen.Return(result)
+
+    @gen.coroutine
+    def create_subscribtion_to_urls(self, urls):
+        jobs_to_subscribe = []
+        stor_id, storage = self.add_storage()
+        result = stor_id
+        for url in urls:
+            last_id = urls[url]
+            jobs = Jobs(self, *self.i_args, **self.i_kwargs)
+            jobs.callback_meta = {
+                "subscription_id":stor_id,
+                "last_id":last_id
+            }
+            jobs.callback = self.job_query_callback
+            jobs_q = self.create_jobs_query(url)
+            jobs_ds = yield jobs.storage.fetch(jobs_q)
+            job_ids =[x["_id"] for x in jobs_ds]
+            if job_ids:
+                storage.job_ids.update(job_ids)
+                pages_query = storage.create_pages_query(job_ids, last_id)
+                storage.filters.append(pages_query)
+                storage.jobs.append(jobs)
+                jobs_to_subscribe.append([jobs_q,  jobs])
+            else:
+                logger.info("No jobs found for url {}".format(url))
+        storage.subscribe_to_pages()
+        for jobs_q, jobs in jobs_to_subscribe:
+            jobs.subscribe(query=jobs_q)
+        raise gen.Return(result)
+
+    @gen.coroutine
+    def write_event(self, data, aggregate=False):
+        if aggregate and self.delay_mode:
+            self.stored_data.append(data)
+        else:
+            return self._send_event(data)
+
+    def initialize(self, *args, **kwargs):
+        super(PagesDataRpcWebsocketHandler, self).initialize(*args, **kwargs)
+        self.dispatcher["subscribe_to_pages"] = self.subscribe_to_pages
+
+    @gen.coroutine
+    def job_query_callback(self, data, callback_meta=None):
+        if "_id" in data and callback_meta:
+            storage = self.storages[callback_meta["subscription_id"]]
+            job_id = data["_id"]
+            storage.update_pages_subscription(job_id, callback_meta["last_id"])
+        else:
+            logger.warning("Jobs callback with incomplete data")
+
+    def on_pages_tailed(self, data, callback_meta=None):
+        self.write_event(data)
+
+    def create_jobs_query(self, url):
+        if url:
+            return {"urls":{'$regex': url }}
+        else:
+            return {}
+
+    def add_storage(self):
+        new_id = str(len(self.storages))
+        pages = Pages(self, *self.i_args, **self.i_kwargs)
+        pages.callback = self.on_pages_tailed
+        self.storages[new_id] = DataSubscription(pages)
+        return new_id, self.storages[new_id]
+
+    def cancel_subscription(self, subscription_id):
+        storage = self.storages.pop(subscription_id, None)
+        if storage:
+            storage.on_close()
+            return True
+        else:
+            return False
+
+
+class DataSubscription(object):
+
+    def __init__(self, pages_storage=None):
+        self.pages = pages_storage
+        self.jobs = []
+        self.job_ids = set([])
+        self.filters = []
+
+    def on_close(self):
+        for jobs in self.jobs:
+            jobs._on_close()
+        if self.pages:
+            self.pages._on_close()
+
+    def subscribe_to_pages(self, require_filters=True):
+        if self.filters:
+            if len(self.filters) == 1:
+                self.pages.subscribe(query=self.filters[0])
+            elif len(self.filters) > 1:
+                self.pages.subscribe(query={"$or": self.filters})
+        elif not require_filters:
+            self.pages.subscribe(query={})
+        else:
+            logger.warning("No subscription - empty filter list")
+
+    def add_jobs_subscription(self, jobs_storage, include=None, exclude=None, last_id=None):
+        jobs_query = self.create_jobs_subscription_query(include=include, exclude=exclude, last_id=last_id)
+        self.jobs.append(jobs_storage)
+        jobs_storage.subscribe(query=jobs_query)
+
+    def update_pages_subscription(self, job_id, last_id):
+        if job_id not in self.job_ids:
+            # stop pages subscription
+            self.pages.unsubscribe()
+            # create new pages query
+            pages_query = self.create_pages_query([job_id], last_id)
+            self.filters.append(pages_query)
+            # subscribe to pages
+            self.subscribe_to_pages()
+        else:
+            logger.debug("Already subscribed to job {}".format(job_id))
+
+    def create_pages_query(self, job_ids=None, last_id=None):
+        filters = []
+        job_conditions_lst = []
+        if job_ids:
+            for job_id in job_ids:
+                job_conditions_lst.append({"_job_id":{'$eq': str(job_id) }})
+        if job_conditions_lst:
+            if len(job_conditions_lst) > 1:
+                filters.append({"$or": job_conditions_lst})
+            else:
+                filters.append(job_conditions_lst[0])
+        if last_id:
+            try:
+                page_id = ObjectId(last_id)
+                filters.append({"_id":{"$gt":page_id}})
+            except InvalidId:
+                logger.warning("Invalid ObjectID: {}, will use job ids filter only.".format(last_id))
+        items_q = {}
+        if len(filters) == 1:
+            items_q = filters[0]
+        elif len(filters) > 1:
+            items_q = {"$and": filters}
+        return items_q
+
+    def create_jobs_subscription_query(self, include, exclude, last_id):
+        conditions = []
+        if last_id:
+            conditions.append({"_id":{"$gt":last_id}})
+        if include:
+            for inc_str in include:
+                conditions.append({"urls":{'$regex': inc_str }})
+        if exclude:
+            for exc_str in exclude:
+                conditions.append({"urls":{'$regex': '^((?!' + exc_str + ').)*$'}})
+        jobs_q = {}
+        if len(conditions) == 1:
+            jobs_q = conditions[0]
+        elif len(conditions):
+            jobs_q = {"$and": conditions }
+        return jobs_q
\ No newline at end of file
diff --git a/arachnado/rpc/jobs.py b/arachnado/rpc/jobs.py
index 8757ee2..e59ee30 100644
--- a/arachnado/rpc/jobs.py
+++ b/arachnado/rpc/jobs.py
@@ -1,15 +1,23 @@
 import logging
 
+from arachnado.storages.mongotail import MongoTailStorage
 
-class Jobs(object):
 
+class Jobs(object):
+    """
+    This object is exposed for RPC requests.
+    It allows to subscribe for scraping job updates.
+    """
+    callback_meta = None
+    callback = None
     logger = logging.getLogger(__name__)
 
     def __init__(self, handler, job_storage, **kwargs):
         self.handler = handler
-        self.storage = job_storage
+        self.storage = job_storage  # type: MongoTailStorage
 
     def subscribe(self, last_id=0, query=None, fields=None):
+        """ Subscribe for job updates. """
         self.storage.subscribe('tailed', self._publish, last_id=last_id,
                                query=query, fields=fields)
 
@@ -17,8 +25,12 @@ def _on_close(self):
         self.storage.unsubscribe('tailed')
 
     def _publish(self, data):
-        # print("=============================================================")
-        # print("jobs rpc :")
-        # print(data)
+        if self.callback:
+            _callback = self.callback
+        else:
+            _callback = self.handler.write_event
         if self.storage.tailing:
-            self.handler.write_event('jobs.tailed', data)
+            if self.callback_meta:
+                _callback(data, callback_meta=self.callback_meta)
+            else:
+                _callback(data)
diff --git a/arachnado/rpc/pages.py b/arachnado/rpc/pages.py
index 1f7e891..014031d 100644
--- a/arachnado/rpc/pages.py
+++ b/arachnado/rpc/pages.py
@@ -2,6 +2,9 @@
 
 
 class Pages(object):
+    """ Pages (scraped items) object exposed via JSON RPC """
+    handler_id = None
+    callback = None
 
     def __init__(self, handler, item_storage, **kwargs):
         self.handler = handler
@@ -17,6 +20,13 @@ def subscribe(self, last_id=0, query=None, fields=None, fetch_delay=None):
     def _on_close(self):
         self.storage.unsubscribe('tailed')
 
+    def unsubscribe(self):
+        self.storage.unsubscribe('tailed')
+
     def _publish(self, data):
+        if self.callback:
+            _callback = self.callback
+        else:
+            _callback = self.handler.write_event
         if self.storage.tailing:
-            self.handler.write_event('pages.tailed', data)
+            _callback(data)
diff --git a/arachnado/rpc/sites.py b/arachnado/rpc/sites.py
index a547811..c475cdf 100644
--- a/arachnado/rpc/sites.py
+++ b/arachnado/rpc/sites.py
@@ -1,13 +1,16 @@
 import logging
+from functools import partial
 
+from arachnado.storages.mongotail import MongoTailStorage
 
-class Sites(object):
 
+class Sites(object):
+    """ 'Known sites' object exposed via JSON-RPC """
     logger = logging.getLogger(__name__)
 
     def __init__(self, handler, site_storage, **kwargs):
         self.handler = handler
-        self.storage = site_storage
+        self.storage = site_storage  # type: MongoTailStorage
 
     def list(self):
         return self.storage.fetch()
@@ -22,15 +25,14 @@ def delete(self, site):
         self.storage.delete(site)
 
     def subscribe(self):
-        for subscription in self.storage.available_subscriptions:
+        for event_name in self.storage.available_events:
             self.storage.subscribe(
-                subscription,
-                lambda data, subscription=subscription:
-                self._publish(data, subscription)
+                event_name,
+                partial(self._publish, event=event_name)
             )
 
     def _on_close(self):
-        self.storage.unsubscribe(self.storage.available_subscriptions)
+        self.storage.unsubscribe(self.storage.available_events)
 
-    def _publish(self, data, subscription):
-        self.handler.write_event('sites.{}'.format(subscription), data)
+    def _publish(self, event, data):
+        self.handler.write_event('sites.{}'.format(event), data)
diff --git a/arachnado/rpc/stats.py b/arachnado/rpc/stats.py
deleted file mode 100644
index 07fa5a5..0000000
--- a/arachnado/rpc/stats.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import logging
-
-
-class Stats(object):
-
-    logger = logging.getLogger(__name__)
-
-    def __init__(self, handler, stats_storage, **kwargs):
-        self.handler = handler
-        self.storage = stats_storage
-
-    def list(self):
-        return self.storage.cache.values()
-
-    def post(self, site):
-        self.storage.create(site)
-
-    def patch(self, site):
-        self.storage.update(site)
-
-    def subscribe(self):
-        for subscription in self.storage.available_subscriptions:
-            self.storage.subscribe(
-                subscription,
-                lambda data, subscription=subscription:
-                self._publish(data, subscription)
-            )
-
-    def _on_close(self):
-        self.storage.unsubscribe(self.storage.available_subscriptions)
-
-    def _publish(self, data, subscription):
-        self.handler.write_event('stats.{}'.format(subscription), data)
diff --git a/arachnado/rpc/ws.py b/arachnado/rpc/ws.py
index 0653058..834edbf 100644
--- a/arachnado/rpc/ws.py
+++ b/arachnado/rpc/ws.py
@@ -1,3 +1,4 @@
+import sys
 import json
 import logging
 import six
@@ -19,49 +20,41 @@ class RpcWebsocketHandler(ArachnadoRPC, websocket.WebSocketHandler):
     """
 
     def on_message(self, message):
-        try:
-            msg = json.loads(message)
-            event, data = msg['event'], msg['data']
-        except (TypeError, ValueError):
-            logger.warn('Invalid message skipped: {!r}'.format(message[:500]))
-            return
-        if event == 'rpc:request':
-            self.handle_request(json.dumps(data))
-        else:
-            logger.warn('Unsupported event type: {!r}'.format(event))
+        self.handle_request(message)
 
     def send_data(self, data):
-        self.write_event('rpc:response', data)
+        self.write_event(data)
 
     @gen.coroutine
-    def write_event(self, event, data):
+    def write_event(self, data, max_message_size=0):
         if isinstance(data, six.string_types):
-            data = json.loads(data)
-        message = json_encode({'event': event, 'data': data})
+            message = data
+        else:
+            message = json_encode(data)
         try:
-            self.write_message(message)
+            if sys.getsizeof(message) < max_message_size or not max_message_size:
+                self.write_message(message)
+            else:
+                logger.info("Message size exceeded. Message wasn't sent.")
         except websocket.WebSocketClosedError:
             pass
 
     def open(self):
         """ Forward open event to resource objects.
         """
-        for resource in self._resources():
+        logger.debug("Connection opened %s", self)
+        for resource in self.rpc_objects:
             if hasattr(resource, '_on_open'):
                 resource._on_open()
         self._pinger = PeriodicCallback(lambda: self.ping(b'PING'), 1000 * 15)
         self._pinger.start()
+        logger.debug("Pinger initiated %s", self)
 
     def on_close(self):
         """ Forward on_close event to resource objects.
         """
-        for resource in self._resources():
+        logger.debug("Connection closed %s", self)
+        for resource in self.rpc_objects:
             if hasattr(resource, '_on_close'):
                 resource._on_close()
         self._pinger.stop()
-
-    def _resources(self):
-        for resource_name, resource in self.__dict__.items():
-            if hasattr(RequestHandler, resource_name):
-                continue
-            yield resource
diff --git a/arachnado/storages/mongo.py b/arachnado/storages/mongo.py
index 68eebda..a253365 100644
--- a/arachnado/storages/mongo.py
+++ b/arachnado/storages/mongo.py
@@ -9,58 +9,64 @@
 
 
 class MongoStorage(object):
-
+    """
+    Utility class for working with MongoDB data.
+    It supports CRUD operations and allows to subscribe to
+    created/updated/deleted events.
+    """
     def __init__(self, mongo_uri, cache=False):
         self.mongo_uri = mongo_uri
-        self.cache_flag = cache
         _, _, _, _, self.col = motor_from_uri(mongo_uri)
         self.signal_manager = SignalManager()
         # Used for unsubscribe
         # disconnect() requires reference to original callback
-        self.subscription_callbacks = {}
-        if cache:
-            self.cache = defaultdict(dict)
-        else:
-            self.cache = None
+        self._callbacks = {}
         self.fetching = False
         self.signals = {
             'created': object(),
             'updated': object(),
             'deleted': object(),
         }
+        # XXX: cache is used in arachnado.cron and arachnado.site_checker.
+        # Is it needed?
+        self.cache_flag = cache
+        if cache:
+            self.cache = defaultdict(dict)
+        else:
+            self.cache = None
 
-    def subscribe(self, subscriptions=None, callback=None):
-        if subscriptions is None:
-            subscriptions = self.available_subscriptions
-        if not isinstance(subscriptions, list):
-            subscriptions = [subscriptions]
-        for subscription in subscriptions:
-            try:
-                self.signal_manager.connect(callback,
-                                            self.signals[subscription],
-                                            weak=False)
-                self.subscription_callbacks[subscription] = callback
-            except KeyError as exc:
-                raise ValueError('Invalid subscription type: {}'.format(exc))
+    def subscribe(self, events=None, callback=None):
+        if events is None:
+            events = self.available_events
+        if not isinstance(events, list):
+            events = [events]
+        for event_name in events:
+            if event_name not in self.signals:
+                raise ValueError('Invalid event name: {}'.format(event_name))
+            self.signal_manager.connect(callback,
+                                        self.signals[event_name],
+                                        weak=False)
+            self._callbacks[event_name] = callback
 
-    def unsubscribe(self, subscriptions=None):
-        if subscriptions is None:
-            subscriptions = self.available_subscriptions
-        if not isinstance(subscriptions, list):
-            subscriptions = [subscriptions]
-        for subscription in subscriptions:
+    def unsubscribe(self, events=None):
+        if events is None:
+            events = self.available_events
+        if not isinstance(events, list):
+            events = [events]
+        for event_name in events:
             try:
                 self.signal_manager.disconnect(
-                    self.subscription_callbacks[subscription],
-                    self.signals[subscription],
+                    self._callbacks[event_name],
+                    self.signals[event_name],
                     weak=False
                 )
-                self.subscription_callbacks.pop(subscription, None)
+                self._callbacks.pop(event_name, None)
             except KeyError:
+                # FIXME: when can it happen?
                 pass
 
     @property
-    def available_subscriptions(self):
+    def available_events(self):
         return list(self.signals.keys())
 
     @coroutine
@@ -91,6 +97,11 @@ def create(self, doc):
         self.signal_manager.send_catch_log(self.signals['created'], data=doc)
         raise Return(result)
 
+    @coroutine
+    def ensure_index(self, key_or_list):
+        result = yield self.col.ensure_index(key_or_list)
+        raise Return(result)
+
     @coroutine
     def update(self, doc):
         doc = replace_dots(doc)
diff --git a/arachnado/storages/mongotail.py b/arachnado/storages/mongotail.py
index 61c429d..d30f68d 100644
--- a/arachnado/storages/mongotail.py
+++ b/arachnado/storages/mongotail.py
@@ -6,6 +6,9 @@
 
 
 class MongoTailStorage(MongoStorage):
+    """
+    This MongoStorage subclass allows to subscribe to a mongo query.
+    """
     fetch_delay = 0
 
     def __init__(self, mongo_uri, *args, **kwargs):
@@ -13,18 +16,24 @@ def __init__(self, mongo_uri, *args, **kwargs):
         self.tailing = False
         self.signals['tailed'] = object()
 
-    def subscribe(self, subscriptions, callback, last_id=None, query=None,
+    def subscribe(self, events, callback, last_id=None, query=None,
                   fields=None):
-        if 'tailed' in subscriptions:
+        if 'tailed' in events:
             self.tail(query, fields, last_id)
-        super(MongoTailStorage, self).subscribe(subscriptions, callback)
+        super(MongoTailStorage, self).subscribe(events, callback)
 
-    def unsubscribe(self, subscriptions):
-        if 'tailed' in subscriptions:
+    def unsubscribe(self, events):
+        if 'tailed' in events:
             self.untail()
+        # FIXME: shouldn't it unsubscribe from other events, i.e. call super()?
 
     @coroutine
     def tail(self, query=None, fields=None, last_object_id=None):
+        """
+        Execute ``query`` periodically, fetching new results.
+        ``self.signals['tailed']`` signal with each result is sent
+        when a new document appears.
+        """
         if self.tailing:
             raise RuntimeError('This storage is already tailing')
         self.tailing = True
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..e8c61ee
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,225 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = _build
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  applehelp  to make an Apple Help Book"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  epub3      to make an epub3"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  xml        to make Docutils-native XML files"
+	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+	@echo "  coverage   to run coverage check of the documentation (if enabled)"
+	@echo "  dummy      to check syntax errors of document sources"
+
+.PHONY: clean
+clean:
+	rm -rf $(BUILDDIR)/*
+
+.PHONY: html
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+.PHONY: dirhtml
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+.PHONY: singlehtml
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+.PHONY: pickle
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+.PHONY: json
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+.PHONY: htmlhelp
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+.PHONY: qthelp
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Arachnado.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Arachnado.qhc"
+
+.PHONY: applehelp
+applehelp:
+	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
+	@echo
+	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
+	@echo "N.B. You won't be able to view it unless you put it in" \
+	      "~/Library/Documentation/Help or install it in your application" \
+	      "bundle."
+
+.PHONY: devhelp
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/Arachnado"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Arachnado"
+	@echo "# devhelp"
+
+.PHONY: epub
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+.PHONY: epub3
+epub3:
+	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
+	@echo
+	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
+
+.PHONY: latex
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+.PHONY: latexpdf
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+.PHONY: latexpdfja
+latexpdfja:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through platex and dvipdfmx..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+.PHONY: text
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+.PHONY: man
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+.PHONY: texinfo
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+.PHONY: info
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+.PHONY: gettext
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+.PHONY: changes
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+.PHONY: linkcheck
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+.PHONY: doctest
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+
+.PHONY: coverage
+coverage:
+	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
+	@echo "Testing of coverage in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/coverage/python.txt."
+
+.PHONY: xml
+xml:
+	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+	@echo
+	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+.PHONY: pseudoxml
+pseudoxml:
+	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+	@echo
+	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
+
+.PHONY: dummy
+dummy:
+	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
+	@echo
+	@echo "Build finished. Dummy builder generates no files."
diff --git a/docs/_static/img/arachnado-0.png b/docs/_static/img/arachnado-0.png
new file mode 100644
index 0000000..0a87676
Binary files /dev/null and b/docs/_static/img/arachnado-0.png differ
diff --git a/docs/_static/img/arachnado-1.png b/docs/_static/img/arachnado-1.png
new file mode 100644
index 0000000..212eb5b
Binary files /dev/null and b/docs/_static/img/arachnado-1.png differ
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..6d3c3f0
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,338 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Arachnado documentation build configuration file, created by
+# sphinx-quickstart on Fri Jul  1 16:48:37 2016.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.viewcode',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The encoding of source files.
+#
+# source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = 'Arachnado'
+copyright = '2016, TeamHG'
+author = 'TeamHG'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.2'
+# The full version, including alpha/beta/rc tags.
+release = '0.2'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#
+# today = ''
+#
+# Else, today_fmt is used as the format for a strftime call.
+#
+# today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#
+# default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#
+# add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#
+# add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#
+# show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+# modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+# keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+import sphinx_rtd_theme
+html_theme = "sphinx_rtd_theme"
+html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+# html_theme_path = []
+
+# The name for this set of Sphinx documents.
+# "<project> v<release> documentation" by default.
+#
+# html_title = 'Arachnado v0.2'
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#
+# html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#
+# html_logo = None
+
+# The name of an image file (relative to this directory) to use as a favicon of
+# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#
+# html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#
+# html_extra_path = []
+
+# If not None, a 'Last updated on:' timestamp is inserted at every page
+# bottom, using the given strftime format.
+# The empty string is equivalent to '%b %d, %Y'.
+#
+# html_last_updated_fmt = None
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#
+# html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#
+# html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#
+# html_additional_pages = {}
+
+# If false, no module index is generated.
+#
+# html_domain_indices = True
+
+# If false, no index is generated.
+#
+# html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#
+# html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#
+# html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#
+# html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#
+# html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#
+# html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+# html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+#   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
+#   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
+#
+# html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# 'ja' uses this config value.
+# 'zh' user can custom change `jieba` dictionary path.
+#
+# html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#
+# html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'Arachnadodoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+     # The paper size ('letterpaper' or 'a4paper').
+     #
+     # 'papersize': 'letterpaper',
+
+     # The font size ('10pt', '11pt' or '12pt').
+     #
+     # 'pointsize': '10pt',
+
+     # Additional stuff for the LaTeX preamble.
+     #
+     # 'preamble': '',
+
+     # Latex figure (float) alignment
+     #
+     # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'Arachnado.tex', 'Arachnado Documentation',
+     'TeamHG', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#
+# latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#
+# latex_use_parts = False
+
+# If true, show page references after internal links.
+#
+# latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#
+# latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#
+# latex_appendices = []
+
+# If false, no module index is generated.
+#
+# latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'arachnado', 'Arachnado Documentation',
+     [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#
+# man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'Arachnado', 'Arachnado Documentation',
+     author, 'Arachnado', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#
+# texinfo_appendices = []
+
+# If false, no module index is generated.
+#
+# texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#
+# texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#
+# texinfo_no_detailmenu = False
diff --git a/docs/config.rst b/docs/config.rst
new file mode 100644
index 0000000..65d3608
--- /dev/null
+++ b/docs/config.rst
@@ -0,0 +1,20 @@
+.. _config:
+
+Configuration
+=============
+
+Arachnado can be configured using a config file. Put it to one of the common
+locations:
+
+* `/etc/arachnado.conf`
+* `~/.config/arachnado.conf`
+* `~/.arachnado.conf'`
+
+or pass the file name as an argument when starting the server::
+
+    arachnado --config ./my-config.conf
+
+Available options and their default values:
+
+.. literalinclude::
+    ../arachnado/config/defaults.conf
diff --git a/docs/http-api.rst b/docs/http-api.rst
new file mode 100644
index 0000000..b7d78c6
--- /dev/null
+++ b/docs/http-api.rst
@@ -0,0 +1,62 @@
+HTTP API
+========
+
+Arachnado provides HTTP API for starting/stopping crawls.
+
+To use HTTP API send a POST request with
+``Content-Type: application/json`` header; parameters should be in
+JSON-encoded POST body.
+
+/crawler/start
+--------------
+
+Start a crawling job. Prameters::
+
+    {
+        "domain": "<URL to crawl>",
+        "args": {<Scrapy spider arguments, optional>},
+        "settings": {<Scrapy settings, optional>}
+    }
+
+If job is started successfuly, endpoint returns
+``{"status": "ok", "job_id": "<job id>"}`` object with an ID of a started job.
+
+In case of errors ``{"status": "error"}`` is returned.
+
+/crawler/stop
+-------------
+
+Stop a job. Prameters::
+
+    {"job_id": "<job id>"}
+
+If job is stopped successfuly, endpoint returns
+``{"status": "ok"}``, otherwise it returns ``{"status": "error"}``.
+
+
+/crawler/pause
+--------------
+
+Pause a job. Prameters::
+
+    {"job_id": "<job id>"}
+
+If job is stopped successfuly, endpoint returns
+``{"status": "ok"}``, otherwise it returns ``{"status": "error"}``.
+
+
+/crawler/resume
+---------------
+
+Resume paused job. Prameters::
+
+    {"job_id": "<job id>"}
+
+If job is stopped successfuly, endpoint returns
+``{"status": "ok"}``, otherwise it returns ``{"status": "error"}``.
+
+
+/crawler/status
+---------------
+
+TODO
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..12070a9
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,37 @@
+Arachnado
+=========
+
+Arachnado is a tool to crawl a specific website.
+It provides a Tornado_-based HTTP API and a web UI for a
+Scrapy_-based crawler.
+
+License is MIT.
+
+.. _Tornado: http://www.tornadoweb.org
+.. _Scrapy: http://scrapy.org/
+
+.. toctree::
+   :maxdepth: 2
+
+   intro
+   config
+   http-api
+   json-rpc-api
+
+Screenshots
+-----------
+
+.. image::
+   _static/img/arachnado-0.png
+
+
+.. image::
+   _static/img/arachnado-1.png
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
diff --git a/docs/intro.rst b/docs/intro.rst
new file mode 100644
index 0000000..a8534c3
--- /dev/null
+++ b/docs/intro.rst
@@ -0,0 +1,22 @@
+Getting Started
+===============
+
+Install
+-------
+
+Arachnado requires Python 2.7 or Python 3.5.
+To install Arachnado use pip::
+
+    pip install arachnado
+
+Run
+---
+
+To start Arachnado execute ``arachnado`` command::
+
+    arachnado
+
+and then visit http://0.0.0.0:8888.
+
+Run ``arachndo --help`` to see available command-line options.
+See also: :ref:`config`.
diff --git a/docs/json-rpc-api.rst b/docs/json-rpc-api.rst
new file mode 100644
index 0000000..e642ff8
--- /dev/null
+++ b/docs/json-rpc-api.rst
@@ -0,0 +1,227 @@
+JSON RPC API
+============
+
+Arachnado provides JSON-RPC_ API for working with jobs and crawled items
+(pages). The API works over WebSocket transport.
+
+JSON-RPC requests have the following format::
+
+    {
+        "jsonrpc": "2.0",
+
+        # pass unique request id here; this id will be included in response
+        "id": 362810,
+
+        # command to execute
+        "method": "<method name>",
+        "params": {"name": "value"},
+    }
+
+JSON-RPC responses::
+
+    {
+        "jsonrpc": "2.0",
+
+        # id of the request
+        "id": 362810,
+
+        # what command returns
+        "result": ...
+    }
+
+Working with jobs and pages
+---------------------------
+
+JSON-RPC API allows to
+
+* get information about scraping jobs;
+* start new crawls;
+* subscripbe to crawled pages;
+* subscribe to job updates.
+
+jobs.subscribe
+    Get information about jobs and subscribe for new jobs.
+
+    Parameters:
+
+    * last_id - optional, ObjectID value of a last previously seen job;
+      When passed, only new job data is returned;
+    * query - optional, MongoDB query;
+    * fields - optional, set of fields to return.
+
+pages.subscribe
+    Get crawled pages and subscribe for new pages.
+
+    Parameters:
+
+    * last_id - optional, ObjectID value of a last previously seen page.
+      When passed, only new job data is returned;
+    * query - optional, MongoDB query;
+    * fields - optional, set of fields to return.
+
+
+New API
+=======
+
+Working with jobs
+-----------------
+
+Open a websocket connection to ``/ws-jobs-data`` in order to use
+jobs JSON-RPC API for scraping jobs.
+
+subscribe_to_jobs
+    Get information about jobs and subscribe for new jobs.
+    Parameters:
+
+    * include - an array of regexes which should match URLs to include;
+    * exclude - an array of regexes; URLs matched by these regexes are excluded
+      from the result;
+    * update_delay - (opional) int, a minimum number of ms between websocket messages. If this parameter set then Arachnado will aggregate job statistics;
+    * last_job_id - optional, ObjectID value of a last previously seen job.
+      When passed, only new job data is returned.
+
+
+    Response contains subscription ID in ``['result']['id']`` field::
+
+        {
+            'id': '<request id>',
+            'jsonrpc': '2.0',
+            'result': {'datatype': 'job_subscription_id', 'id': '0'}
+        }
+
+    Use this ID to cancel the subscription.
+
+    After the subscription Arachnado will start to send information
+    about new jobs. Messages look like this::
+
+        {
+             '_id': '574718bba7a4edb9b026f248',
+             'finished_at': '2016-05-26 16:03:17',
+             'id': '97ca610fa8c347dbafeca9fcd02213dd',
+             'options': {
+                         'args': {},
+                         'crawl_id': '97ca610fa8c347dbafeca9fcd02213dd',
+                         'domain': 'scrapy.org',
+                         'settings': {}
+                         },
+             'spider': 'generic',
+             'started_at': '2016-05-26 16:03:16',
+             'stats': {...},
+             'status': 'finished'
+        }
+
+cancel_subscription
+    Stop receiving updates about jobs. Parameters:
+
+    * subscription_id
+
+
+set_max_message_size
+    Set maximum message size in bytes for websockets channel.
+    Messages larger than specified limit are dropped.
+    Default value is 2**20.
+    To disable this check set max size to zero.
+    Parameters:
+    * max_size - maximum message size in bytes.
+
+    Response returns result(true/false) at result field::
+
+
+         {
+            "id": '<request id>',
+            "result": true,
+            "jsonrpc": "2.0"
+         }
+
+
+Working with pages (crawled items)
+----------------------------------
+
+Open a websocket connection to ``/ws-pages-data`` in order to use
+jobs JSON-RPC API for scraping jobs.
+
+subscribe_to_pages
+    Get crawled pages(items) for specific urls.
+    Url values are used as regex without any modifications at Arachnado side.
+    Allows to get all pages or only crawled since last update.
+    Search function uses job start urls, not page urls.
+    For example, if job was started for www.mysite.com and then goes to www.example.com (by redirect, etc.),
+    all its pages will be returned by www.mysite.com search query.
+    To search pages by its own urls use pages.subscribe method described above.
+    To get only new pages set last seen page id (from "id" field of page record) for an url.
+    To get all pages set page id to None.
+
+    Parameters:
+
+    * urls - a dictionary of <url>:<last seen page id pairs>. Arachnado will create one subscription id for all urls;
+    * url_groups - a dictionary of <url group id>: {<url>:<last seen page id pairs>}. Arachnado will create one subscription id for each url group.
+
+    Command example::
+
+            {
+              'id': '<request id>',
+              'jsonrpc': '2.0',
+              'method': 'subscribe_to_pages',
+              'params': {
+                         'urls': {'http://example.com': None},
+                         'url_groups': {
+                                        'gr1': {'http://example1.com': None},
+                                        'gr2': {'http://example2.com': "57863974a8cb9c15e8f3d53a"}}
+                                       }
+                        }
+            }
+
+    Response example for above command::
+
+        {
+            "result": {
+                        "datatype": "pages_subscription_id",
+                        "single_subscription_id": "112", # subscription id for http://example.com subscription
+                        "id": {
+                                "gr1": "113", # subscription id for http://example1.com subscription
+                                "gr2": "114", # subscription id for http://example2.com subscription
+                                }
+                      },
+            "id": '<request id>', # command request id
+            "jsonrpc": "2.0"
+        }
+
+    Use these IDs to cancel subscriptions.
+
+    After the subscription Arachnado will start to send information
+    about crawled pages. Messages look like this::
+
+        {
+            "status": 200,
+            "items": [],
+            "_id": "57863974a8cb9c15e8f3d53a",
+            "url": "http://example.com/index.php",
+            "headers": {},
+            "_type": "page",
+            "body": ""
+        }
+
+
+cancel_subscription
+    Stop receiving updates. Parameters:
+
+    * subscription_id
+
+set_max_message_size
+    Set maximum message size in bytes for websockets channel.
+    Messages larger than specified limit are dropped.
+    Default value is 2**20.
+    To disable this check set max size to zero.
+    Parameters:
+    * max_size - maximum message size in bytes.
+
+    Response returns result(true/false) at result field::
+
+        {
+            "id": '<request id>',,
+            "result": true,
+            "jsonrpc": "2.0"
+        }
+
+
+.. _JSON-RPC: http://www.jsonrpc.org/specification
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..03d149a
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,281 @@
+@ECHO OFF
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set BUILDDIR=_build
+set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
+set I18NSPHINXOPTS=%SPHINXOPTS% .
+if NOT "%PAPER%" == "" (
+	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
+	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
+)
+
+if "%1" == "" goto help
+
+if "%1" == "help" (
+	:help
+	echo.Please use `make ^<target^>` where ^<target^> is one of
+	echo.  html       to make standalone HTML files
+	echo.  dirhtml    to make HTML files named index.html in directories
+	echo.  singlehtml to make a single large HTML file
+	echo.  pickle     to make pickle files
+	echo.  json       to make JSON files
+	echo.  htmlhelp   to make HTML files and a HTML help project
+	echo.  qthelp     to make HTML files and a qthelp project
+	echo.  devhelp    to make HTML files and a Devhelp project
+	echo.  epub       to make an epub
+	echo.  epub3      to make an epub3
+	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+	echo.  text       to make text files
+	echo.  man        to make manual pages
+	echo.  texinfo    to make Texinfo files
+	echo.  gettext    to make PO message catalogs
+	echo.  changes    to make an overview over all changed/added/deprecated items
+	echo.  xml        to make Docutils-native XML files
+	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
+	echo.  linkcheck  to check all external links for integrity
+	echo.  doctest    to run all doctests embedded in the documentation if enabled
+	echo.  coverage   to run coverage check of the documentation if enabled
+	echo.  dummy      to check syntax errors of document sources
+	goto end
+)
+
+if "%1" == "clean" (
+	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
+	del /q /s %BUILDDIR%\*
+	goto end
+)
+
+
+REM Check if sphinx-build is available and fallback to Python version if any
+%SPHINXBUILD% 1>NUL 2>NUL
+if errorlevel 9009 goto sphinx_python
+goto sphinx_ok
+
+:sphinx_python
+
+set SPHINXBUILD=python -m sphinx.__init__
+%SPHINXBUILD% 2> nul
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+:sphinx_ok
+
+
+if "%1" == "html" (
+	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
+	goto end
+)
+
+if "%1" == "dirhtml" (
+	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
+	goto end
+)
+
+if "%1" == "singlehtml" (
+	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
+	goto end
+)
+
+if "%1" == "pickle" (
+	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the pickle files.
+	goto end
+)
+
+if "%1" == "json" (
+	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the JSON files.
+	goto end
+)
+
+if "%1" == "htmlhelp" (
+	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in %BUILDDIR%/htmlhelp.
+	goto end
+)
+
+if "%1" == "qthelp" (
+	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in %BUILDDIR%/qthelp, like this:
+	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Arachnado.qhcp
+	echo.To view the help file:
+	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Arachnado.ghc
+	goto end
+)
+
+if "%1" == "devhelp" (
+	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished.
+	goto end
+)
+
+if "%1" == "epub" (
+	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The epub file is in %BUILDDIR%/epub.
+	goto end
+)
+
+if "%1" == "epub3" (
+	%SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
+	goto end
+)
+
+if "%1" == "latex" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "latexpdf" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	cd %BUILDDIR%/latex
+	make all-pdf
+	cd %~dp0
+	echo.
+	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "latexpdfja" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	cd %BUILDDIR%/latex
+	make all-pdf-ja
+	cd %~dp0
+	echo.
+	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "text" (
+	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The text files are in %BUILDDIR%/text.
+	goto end
+)
+
+if "%1" == "man" (
+	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The manual pages are in %BUILDDIR%/man.
+	goto end
+)
+
+if "%1" == "texinfo" (
+	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
+	goto end
+)
+
+if "%1" == "gettext" (
+	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
+	goto end
+)
+
+if "%1" == "changes" (
+	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.The overview file is in %BUILDDIR%/changes.
+	goto end
+)
+
+if "%1" == "linkcheck" (
+	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Link check complete; look for any errors in the above output ^
+or in %BUILDDIR%/linkcheck/output.txt.
+	goto end
+)
+
+if "%1" == "doctest" (
+	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Testing of doctests in the sources finished, look at the ^
+results in %BUILDDIR%/doctest/output.txt.
+	goto end
+)
+
+if "%1" == "coverage" (
+	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Testing of coverage in the sources finished, look at the ^
+results in %BUILDDIR%/coverage/python.txt.
+	goto end
+)
+
+if "%1" == "xml" (
+	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The XML files are in %BUILDDIR%/xml.
+	goto end
+)
+
+if "%1" == "pseudoxml" (
+	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
+	goto end
+)
+
+if "%1" == "dummy" (
+	%SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. Dummy builder generates no files.
+	goto end
+)
+
+:end
diff --git a/requirements.txt b/requirements.txt
index 8b5c728..5b795c2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,6 +7,6 @@ pymongo==2.8        # required for motor 0.6.2
 docopt == 0.6.2
 service_identity
 json-rpc==1.10.3
-croniter == 0.3.8
+croniter == 0.3.12
 autopager == 0.2
 autologin-middleware == 0.1.1
diff --git a/arachnado/storages/memory.py b/tests/__init__.py
similarity index 100%
rename from arachnado/storages/memory.py
rename to tests/__init__.py
diff --git a/tests/items.jl b/tests/items.jl
new file mode 100644
index 0000000..c39786c
--- /dev/null
+++ b/tests/items.jl
@@ -0,0 +1,2 @@
+{"_job_id": "5749d89da8cb9c1f286e3a90","status" : 200, "body" : "<html><head></head><body></body></html>", "_type" : "page", "url" : "http://mysite.com/index.php", "items" : [ ], "headers" : { "Cache-Control" : [ "private, no-cache=\"set-cookie\"" ], "X-Powered-By" : [ "PHP/5.5.9-1ubuntu4.14" ], "Date" : [ "Sat, 28 May 2016 17:43:05 GMT" ], "Content-Type" : [ "text/html; charset=UTF-8" ], "Expires" : [ "Sat, 28 May 2016 17:43:05 GMT" ], "Vary" : [ "Accept-Encoding" ], "Server" : [ "Apache/2.4.7 (Ubuntu)" ] }, "meta" : { "download_timeout" : 180, "depth" : 2}}
+{"_job_id": "5749d89da8cb9c1f286e3fff","status" : 200, "body" : "<html><head></head><body></body></html>", "_type" : "page", "url" : "http://mysite.com", "items" : [ ], "headers" : { "Cache-Control" : [ "private, no-cache=\"set-cookie\"" ], "X-Powered-By" : [ "PHP/5.5.9-1ubuntu4.14" ], "Date" : [ "Sat, 28 May 2016 17:43:05 GMT" ], "Content-Type" : [ "text/html; charset=UTF-8" ], "Expires" : [ "Sat, 28 May 2016 17:43:05 GMT" ], "Vary" : [ "Accept-Encoding" ], "Server" : [ "Apache/2.4.7 (Ubuntu)" ] }, "meta" : { "download_timeout" : 180, "depth" : 2}}
\ No newline at end of file
diff --git a/tests/jobs.jl b/tests/jobs.jl
new file mode 100644
index 0000000..1338406
--- /dev/null
+++ b/tests/jobs.jl
@@ -0,0 +1,2 @@
+{"_id": "5749d45fa8cb9c1df532a98a", "started_at": "2016-05-28 17:42:53", "urls": "http://127.0.0.1/", "spider": "generic", "status": "finished", "stats": "{\"memusage/startup\": 42012672, \"arachnado/domain\": \"127.0.0.1\", \"log_count/INFO\": 11, \"downloader/response_count\": 58, \"downloader/response_bytes\": 220480, \"finish_reason\": \"finished\", \"arachnado/start_url\": \"http://127.0.0.1/\", \"mongo_export/items_stored_count\": 58, \"scheduler/dequeued\": 58, \"request_depth_max\": 4, \"start_time\": \"2016-05-28 17:42:53\", \"request_depth_count/2\": 351, \"scheduler/enqueued/disk\": 58, \"downloader/request_bytes\": 27767, \"downloader/request_method_count/GET\": 58, \"memusage/max\": 42012672, \"dupefilter/filtered\": 1054, \"scheduler/dequeued/disk\": 58, \"downloader/response_status_count/200\": 58, \"response_received_count\": 58, \"finish_time\": \"2016-05-28 17:43:19\", \"item_scraped_count\": 58, \"scheduler/enqueued\": 58, \"request_depth_count/1\": 19, \"request_depth_count/0\": 1, \"request_depth_count/3\": 629, \"downloader/request_count\": 58, \"request_depth_count/4\": 112}", "id": "99f123a74e814fd09a9954265595eac0", "options": {"args": {}, "settings": {}, "crawl_id": "99f123a74e814fd09a9954265595eac0", "domain": "http://127.0.0.1/"}, "finished_at": "2016-05-28 17:43:19"}
+{"_id": "5749d89da8cb9c1f286e3a90", "started_at": "2016-05-30 12:29:18", "stats_dict": {"downloader/request_method_count/GET": 107, "log_count/INFO": 18, "mongo_export/items_stored_count": 103, "request_depth_count/2": 9718, "scheduler/enqueued": 3711, "scheduler/dequeued": 107, "downloader/request_count": 107, "memusage/max": 97583104, "start_time": "2016-05-30 12:29:18", "downloader/request_bytes": 43053, "dupefilter/filtered": 7451, "scheduler/initial": 0, "downloader/response_count": 107, "arachnado/start_url": "http://example.com/", "scheduler/enqueued/disk": 3711, "request_depth_count/1": 86, "downloader/response_status_count/200": 103, "response_received_count": 103, "request_depth_count/3": 1353, "request_depth_count/0": 1, "memusage/startup": 41951232, "finish_time": "2016-05-30 12:30:27", "item_scraped_count": 103, "scheduler/remaining": 3604, "request_depth_max": 3, "scheduler/dequeued/disk": 107, "finish_reason": "stopped", "log_count/WARNING": 3, "downloader/response_bytes": 3336279, "downloader/response_status_count/301": 3, "downloader/response_status_count/302": 1}, "urls": "http://example.com/", "spider": "generic", "status": "finished", "stats": "{\"memusage/startup\": 41951232, \"scheduler/remaining\": 3604, \"scheduler/initial\": 0, \"log_count/INFO\": 18, \"downloader/response_count\": 107, \"downloader/response_bytes\": 3336279, \"finish_reason\": \"stopped\", \"arachnado/start_url\": \"http://example.com/\", \"mongo_export/items_stored_count\": 103, \"scheduler/dequeued\": 107, \"log_count/WARNING\": 3, \"request_depth_max\": 3, \"start_time\": \"2016-05-30 12:29:18\", \"request_depth_count/2\": 9718, \"scheduler/enqueued/disk\": 3711, \"downloader/request_bytes\": 43053, \"downloader/request_method_count/GET\": 107, \"memusage/max\": 97583104, \"dupefilter/filtered\": 7451, \"scheduler/dequeued/disk\": 107, \"downloader/response_status_count/200\": 103, \"response_received_count\": 103, \"finish_time\": \"2016-05-30 12:30:27\", \"item_scraped_count\": 103, \"scheduler/enqueued\": 3711, \"request_depth_count/1\": 86, \"request_depth_count/0\": 1, \"request_depth_count/3\": 1353, \"downloader/request_count\": 107, \"downloader/response_status_count/301\": 3, \"downloader/response_status_count/302\": 1}", "id": "6c8be393f12a442ca2ec94ec3f89b72e", "options": {"args": {}, "settings": {}, "domain": "http://example.com/"}, "finished_at": "2016-05-30 12:30:27"}
\ No newline at end of file
diff --git a/tests/test_data.py b/tests/test_data.py
new file mode 100644
index 0000000..d60408f
--- /dev/null
+++ b/tests/test_data.py
@@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+import tornado
+import json
+from tornado import web, websocket
+import tornado.testing
+from tornado.ioloop import TimeoutError
+
+import tests.utils as u
+
+
+class TestDataAPI(tornado.testing.AsyncHTTPTestCase):
+    pages_uri = r"/ws-pages-data"
+    jobs_uri = r"/ws-jobs-data"
+
+    @classmethod
+    def setUpClass(cls):
+        u.init_db()
+
+    @classmethod
+    def tearDownClass(cls):
+        u.clear_db()
+
+    def get_app(self):
+        return u.get_app(self.pages_uri, self.jobs_uri)
+
+    @tornado.testing.gen_test
+    def test_set_message_size(self):
+        test_command = self.get_command("test_set_0",'set_max_message_size', {"max_size":10000})
+        ws_url = "ws://localhost:" + str(self.get_http_port()) + self.jobs_uri
+        ws_client = yield tornado.websocket.websocket_connect(ws_url)
+        ws_client.write_message(json.dumps(test_command))
+        response = yield ws_client.read_message()
+        json_response = json.loads(response)
+        res = json_response.get("result", False)
+        self.assertTrue(res)
+
+    @tornado.testing.gen_test
+    def test_jobs_no_filter(self):
+        jobs_command = self.get_command("test_jobs_0",'subscribe_to_jobs', {})
+        yield self.execute_jobs_command(jobs_command, wait_result=True)
+
+    @tornado.testing.gen_test
+    def test_jobs_filter_include(self):
+        jobs_command = self.get_command("test_jobs_1",'subscribe_to_jobs', {"include":["127.0.0.1"],})
+        yield self.execute_jobs_command(jobs_command, wait_result=True)
+
+    @tornado.gen.coroutine
+    def execute_jobs_command(self, jobs_command, wait_result=True):
+        ws_url = "ws://localhost:" + str(self.get_http_port()) + self.jobs_uri
+        ws_client = yield tornado.websocket.websocket_connect(ws_url)
+        ws_client.write_message(json.dumps(jobs_command))
+        response = yield ws_client.read_message()
+        json_response = json.loads(response)
+        subs_id = json_response.get("result").get("id", -1)
+        self.assertNotEqual(subs_id, -1)
+        if wait_result:
+            response = yield ws_client.read_message()
+            json_response = json.loads(response)
+            if json_response is None:
+                self.fail("incorrect response")
+            else:
+                self.assertTrue('stats' in json_response)
+                self.assertTrue(isinstance(json_response["stats"], dict))
+        yield self.execute_cancel(ws_client, subs_id, True)
+
+    def test_jobs_filter_include_not_exists(self):
+        @tornado.gen.coroutine
+        def f():
+            jobs_command = self.get_command("test_jobs_2",'subscribe_to_jobs', {"include":["notexists.com"],})
+            yield self.execute_jobs_command(jobs_command, wait_result=True)
+        self.assertRaises(TimeoutError, self.io_loop.run_sync, f, timeout=3)
+
+    @tornado.testing.gen_test
+    def test_pages_filter_url_groups(self):
+        url_value = 'http://example.com'
+        pages_command = self.get_command("test_pages_0",'subscribe_to_pages', {'url_groups': {1: {url_value: None}}})
+        yield self.execute_pages_command(pages_command, wait_result=True, required_url=url_value)
+
+    def test_pages_no_result(self):
+        @tornado.gen.coroutine
+        def f():
+            url_value = 'http://mysite.com'
+            pages_command = self.get_command("test_pages_3",'subscribe_to_pages', {'url_groups': {1: {url_value: None}}})
+            yield self.execute_pages_command(pages_command,
+                                             wait_result=True,
+                                             required_url=url_value,
+                                             max_count=0)
+        self.assertRaises(TimeoutError, self.io_loop.run_sync, f, timeout=3)
+
+    def test_pages_exact_count(self):
+        @tornado.gen.coroutine
+        def f():
+            url_value = 'http://example.com'
+            pages_command = self.get_command("test_pages_4",'subscribe_to_pages', {'url_groups': {1: {url_value: None}}})
+            yield self.execute_pages_command(pages_command,
+                                             wait_result=True,
+                                             required_url=url_value,
+                                             max_count=1)
+        self.assertRaises(TimeoutError, self.io_loop.run_sync, f, timeout=3)
+
+    @tornado.testing.gen_test
+    def test_pages_no_filter(self):
+        pages_command = self.get_command("test_pages_1",'subscribe_to_pages', {})
+        yield self.execute_pages_command(pages_command, wait_result=True)
+
+    @tornado.testing.gen_test
+    def test_pages_filter_urls(self):
+        url_value = 'http://example.com'
+        pages_command = self.get_command("test_pages_2",'subscribe_to_pages', {'urls': {url_value: None}})
+        yield self.execute_pages_command(pages_command, wait_result=True, required_url=url_value)
+
+    def get_command(self, id, method, params):
+        return {
+                'id': id,
+                'jsonrpc': '2.0',
+                'method': method,
+                'params': params
+               }
+
+    @tornado.gen.coroutine
+    def execute_pages_command(self, pages_command, wait_result=False, required_url=None, max_count=None):
+        ws_url = "ws://localhost:" + str(self.get_http_port()) + self.pages_uri
+        ws_client = yield tornado.websocket.websocket_connect(ws_url)
+        ws_client.write_message(json.dumps(pages_command))
+        response = yield ws_client.read_message()
+        json_response = json.loads(response)
+        subs_id = json_response.get("result").get("single_subscription_id", -1)
+        if not subs_id:
+            group_sub_ids = json_response.get("result").get("id", {})
+            for group_id in group_sub_ids.keys():
+                if group_sub_ids[group_id] != -1:
+                    subs_id = group_sub_ids[group_id]
+        self.assertNotEqual(subs_id, -1)
+        if wait_result:
+            if max_count is None:
+                response = yield ws_client.read_message()
+                json_response = json.loads(response)
+                if json_response is None:
+                    self.fail("incorrect response")
+            else:
+                cnt = 0
+                while True:
+                    response = yield ws_client.read_message()
+                    json_response = json.loads(response)
+                    if json_response is None:
+                        self.fail("incorrect response")
+                    cnt += 1
+                    if cnt > max_count:
+                        self.fail("max count of pages exceeded")
+        yield self.execute_cancel(ws_client, subs_id, True)
+
+    @tornado.testing.gen_test
+    def test_wrong_cancel(self):
+        ws_url = "ws://localhost:" + str(self.get_http_port()) + self.pages_uri
+        ws_client = yield tornado.websocket.websocket_connect(ws_url)
+        yield self.execute_cancel(ws_client, -1, False)
+
+    @tornado.gen.coroutine
+    def execute_cancel(self, ws_client, subscription_id, expected):
+        cmd_id = "test_cancel"
+        cancel_command = self.get_command(cmd_id,'cancel_subscription', {"subscription_id": subscription_id})
+        ws_client.write_message(json.dumps(cancel_command))
+        while True:
+            response = yield ws_client.read_message()
+            json_response = json.loads(response)
+            if json_response.get("id", None) == cmd_id:
+                self.assertEqual(json_response.get("result"), expected)
+                break
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 0000000..578c6b5
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+import os
+import tornado
+import json
+from pymongo import MongoClient
+from pymongo.errors import DuplicateKeyError
+
+from arachnado.rpc.data import PagesDataRpcWebsocketHandler, JobsDataRpcWebsocketHandler
+from arachnado.storages.mongotail import MongoTailStorage
+from arachnado.utils.mongo import motor_from_uri
+
+
+def get_mongo_db():
+    client = MongoClient('mongodb://localhost:27017/')
+    return client["arachnado-test"]
+
+
+def get_db_uri():
+    return "mongodb://localhost:27017/arachnado-test"
+
+
+def get_app(ws_pages_uri, ws_jobs_uri):
+    db_uri = get_db_uri()
+    items_uri = "{}/items".format(db_uri)
+    jobs_uri = "{}/jobs".format(db_uri)
+    job_storage = MongoTailStorage(jobs_uri, cache=True)
+    item_storage = MongoTailStorage(items_uri)
+    context = {
+        'crawler_process': None,
+        'job_storage': job_storage,
+        'item_storage': item_storage,
+    }
+    app = tornado.web.Application([
+        (ws_pages_uri, PagesDataRpcWebsocketHandler, context),
+        (ws_jobs_uri, JobsDataRpcWebsocketHandler, context),
+    ])
+    return app
+
+
+def init_db():
+    db = get_mongo_db()
+    collections = ["jobs", "items"]
+    for collection in collections:
+        col_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "{}.jl".format(collection))
+        col = db[collection]
+        with open(col_path, "r") as fin:
+            for text_line in fin:
+                record = json.loads(text_line)
+                try:
+                    col.insert(record)
+                except DuplicateKeyError:
+                    pass
+
+
+def clear_db():
+    db = get_mongo_db()
+    collections = ["jobs", "items"]
+    for collection in collections:
+        col = db[collection]
+        col.drop()
\ No newline at end of file
diff --git a/tox.ini b/tox.ini
index 1f6d62a..f143631 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py27
+envlist = py27,py35
 
 [testenv]
 deps =
@@ -7,6 +7,6 @@ deps =
     pytest-cov
 commands =
     pip install -r requirements.txt
-    py.test --doctest-modules --cov=arachnado {posargs:arachnado}
+    py.test --doctest-modules --cov=arachnado {posargs:arachnado tests}