diff --git a/COPYING b/COPYING
index c128f62..02ba3d4 100644
--- a/COPYING
+++ b/COPYING
@@ -1,5 +1,6 @@
 WeighWords: a Python library for creating word weights/word clouds from text
-Copyright 2011 University of Amsterdam
+Copyright 2011-2019 University of Amsterdam
+Copyright 2019 TinQwise Stamkracht
 
 This program is free software: you can redistribute it and/or modify it under
 the terms of the GNU Lesser General Public License as published by the
diff --git a/README.rst b/README.rst
index 8fc2365..c3888cb 100644
--- a/README.rst
+++ b/README.rst
@@ -9,28 +9,88 @@ Rather than use simple word frequency, it weighs words by statistical models
 known as *parsimonious language models*. These models are good at picking up
 the words that distinguish a text document from other documents in a
 collection. The downside to this is that you can't use WeighWords to make a
-word cloud of a single document; you need a bunch of document to compare to.
+word cloud of a single document; you need a bunch of documents (i.e. a
+background collection) to compare to.
 
 
 Installation
 ------------
 
-Either::
+Either install the latest release from PyPI::
 
     pip install weighwords
 
-or::
+or clone this git repository, and::
 
     python setup.py install
 
+or::
+
+    pip install -e .
+
+Usage
+-----
+>>> quotes = [
+...     "Love all, trust a few, Do wrong to none",
+...     ...
+...     "A lover's eyes will gaze an eagle blind. "
+...     "A lover's ear will hear the lowest sound.",
+... ]
+>>> doc_tokens = [
+...     re.sub(r"[.,:;!?\"‘’]|'s\b", " ", quote).lower().split()
+...     for quote in quotes
+... ]
+
+The ``ParsimoniousLM`` is initialized with all document tokens as a
+background corpus, and subsequently takes a single document's tokens
+as input. Its ``top`` method returns the top terms and their log-probabilities:
+
+>>> from weighwords import  ParsimoniousLM
+>>> plm = ParsimoniousLM(doc_tokens, w=.1)
+>>> plm.top(10, doc_tokens[-1])
+[('lover', -1.871802261651365),
+ ('will', -1.871802261651365),
+ ('eyes', -2.5649494422113044),
+ ('gaze', -2.5649494422113044),
+ ('an', -2.5649494422113044),
+ ('eagle', -2.5649494422113044),
+ ('blind', -2.5649494422113044),
+ ('ear', -2.5649494422113044),
+ ('hear', -2.5649494422113044),
+ ('lowest', -2.5649494422113044)]
+
+The ``SignificantWordsLM`` is similarly initialized with a background corpus,
+but subsequently takes a group of document tokens as input. Its ``group_top``
+method returns the top terms and their probabilities:
+
+>>> from weighwords import SignificantWordsLM
+>>> swlm = SignificantWordsLM(doc_tokens, lambdas=(.7, .1, .2))
+>>> swlm.group_top(10, doc_tokens[-3:])
+[('in', 0.37875318027881),
+ ('is', 0.07195732361699828),
+ ('mortal', 0.07195732361699828),
+ ('nature', 0.07195732361699828),
+ ('all', 0.07110584778711342),
+ ('we', 0.03597866180849914),
+ ('true', 0.03597866180849914),
+ ('lovers', 0.03597866180849914),
+ ('strange', 0.03597866180849914),
+ ('capers', 0.03597866180849914)]
+
+See ``example/dickens.py`` for a running example with more realistic data.
 
 References
 ----------
-D. Hiemstra, S. Robertson and H. Zaragoza (2004). `Parsimonious Language Models
+D. Hiemstra, S. Robertson, and H. Zaragoza (2004). `Parsimonious Language Models
 for Information Retrieval
 <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.5806>`_.
 Proc. SIGIR'04.
 
-R. Kaptein, D. Hiemstra and J. Kamps (2010). `How different are Language Models
-and word clouds? <http://riannekaptein.woelmuis.nl/2010/kapt-how10.pdf>`_
-Proc. ECIR.
+R. Kaptein, D. Hiemstra, and J. Kamps (2010). `How different are Language Models
+and word clouds? <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.189.822>`_.
+Proc. ECIR'10.
+
+M. Dehghani, H. Azarbonyad, J. Kamps, D. Hiemstra, and M. Marx (2016).
+`Luhn Revisited: Significant Words Language Models
+<https://djoerdhiemstra.com/wp-content/uploads/cikm2016.pdf>`_.
+Proc. CKIM'16.
diff --git a/example/dickens.py b/example/dickens.py
index 3328f6f..0b334b3 100755
--- a/example/dickens.py
+++ b/example/dickens.py
@@ -1,13 +1,16 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Find terms that distinguish various novels by Charles Dickens.
 # Note: if the w parameter is set wisely, no stop list is needed.
-
-from weighwords import ParsimoniousLM
 import gzip
 import logging
-import numpy as np
+import math
 import re
+from itertools import zip_longest
+
+import numpy as np
+
+from weighwords import ParsimoniousLM, SignificantWordsLM
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -21,28 +24,49 @@
 ]
 
 startbook = """*** START OF THIS PROJECT GUTENBERG EBOOK """
+endbook = """*** END OF THIS PROJECT GUTENBERG EBOOK """
 
 
 def read_book(title, num):
     """Returns generator over words in book num"""
 
-    logger.info("Fetching terms from %s" % title)
-    path = "%s.txt.utf8.gz" % num
+    logger.info(f"Fetching terms from {title}")
+    path = f"{num}.txt.utf8.gz"
     in_book = False
-    for ln in gzip.open(path):
-        if in_book:
-            for w in re.sub(r"[.,:;!?\"']", " ", ln).lower().split():
+    for ln in gzip.open(path, 'rt', encoding='utf8'):
+        if in_book and ln.startswith(endbook):
+            break
+        elif in_book:
+            for w in re.sub(r"[.,:;!?\"'‘’]", " ", ln).lower().split():
                 yield w
         elif ln.startswith(startbook):
             in_book = True
 
 
+def grouper(iterable, n, filler=None):
+    """Source: https://docs.python.org/3/library/itertools.html#itertools-recipes"""
+    args = [iter(iterable)] * n
+    return zip_longest(*args, fillvalue=filler)
+
+
 book_contents = [(title, list(read_book(title, num))) for title, num in books]
+corpus = [terms for title, terms in book_contents]
 
-model = ParsimoniousLM([terms for title, terms in book_contents], w=.01)
+plm = ParsimoniousLM(corpus, w=.01)
+swlm = SignificantWordsLM(corpus, lambdas=(.9, .01, .09))
 
 for title, terms in book_contents:
-    print("Top %d words in %s:" % (top_k, title))
-    for term, p in model.top(top_k, terms):
-        print("    %s %.4f" % (term, np.exp(p)))
+    plm_top = plm.top(top_k, terms)
+    swlm_top = swlm.group_top(
+        top_k,
+        grouper(terms, math.ceil(len(terms) / 10)),
+        fix_lambdas=True,
+    )
+    print(f"\nTop {top_k} words in {title}:")
+    print(f"\n{'PLM term':<16} {'PLM p':<12} {'SWLM term':<16} {'SWLM p':<6}")
+    for (plm_t, plm_p), (swlm_t, swlm_p) in zip(plm_top, swlm_top):
+        print(f"{plm_t:<16} {np.exp(plm_p):<12.4f} {swlm_t:<16} {swlm_p:.4f}")
     print("")
+
+
+
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..7670e0e
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,9 @@
+# install the weighwords package for convenience
+-e .
+
+# testing framework
+pytest ~= 4.5
+
+# static type checking
+mypy >= 0.701
+https://github.com/numpy/numpy-stubs/archive/master.tar.gz
diff --git a/setup.py b/setup.py
index 601f5da..3552010 100644
--- a/setup.py
+++ b/setup.py
@@ -8,11 +8,14 @@
     description = "Python library for creating word weights/word clouds from text",
     keywords = "word cloud nlp language model",
     license = "LGPL",
+    package_data = {"weighwords": ["py.typed"]},
     packages = ["weighwords"],
-    install_requires = ["numpy>=1.4.0"],
+    install_requires = ["numpy>=1.15.0"],
+    tests_require = ["pytest"],
     classifiers = [
         "Development Status :: 4 - Beta",
         "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)",
+        "Programming Language :: Python :: 3.7",
         "Topic :: Text Processing",
     ]
 )
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..3b68efa
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,43 @@
+import re
+
+import pytest
+
+
+@pytest.fixture(scope="module")
+def uniform_doc():
+    return ['one', 'two', 'three', 'four', 'five']
+
+
+@pytest.fixture(scope="module")
+def number_corpus():
+    return [
+        ['one'],
+        ['two', 'two'],
+        ['three', 'three', 'three'],
+        ['four', 'four', 'four', 'four'],
+        ['five', 'five', 'five', 'five', 'five']
+    ]
+
+
+@pytest.fixture(scope="module")
+def shakespeare_quotes():
+    quotes = [
+        "Love all, trust a few, Do wrong to none",
+        "But love that comes too late, "
+        "Like a remorseful pardon slowly carried, "
+        "To the great sender turns a sour offence.",
+        "If thou remember'st not the slightest folly "
+        "That ever love did make thee run into, "
+        "Thou hast not lov'd.",
+        "We that are true lovers run into strange capers; "
+        "but as all is mortal in nature, "
+        "so is all nature in love mortal in folly.",
+        "But are you so much in love as your rhymes speak? "
+        "Neither rhyme nor reason can express how much.",
+        "A lover's eyes will gaze an eagle blind. "
+        "A lover's ear will hear the lowest sound.",
+    ]
+    return [
+        re.sub(r"[.,:;!?\"‘’]|'s\b", " ", quote).lower().split()
+        for quote in quotes
+    ]
diff --git a/tests/test_equivalence.py b/tests/test_equivalence.py
new file mode 100644
index 0000000..4c934e6
--- /dev/null
+++ b/tests/test_equivalence.py
@@ -0,0 +1,58 @@
+from itertools import chain
+
+from weighwords import ParsimoniousLM, SignificantWordsLM
+from weighwords.logsum import logsum
+
+
+def test_model_equivalence(shakespeare_quotes):
+    weight = .1
+    plm = ParsimoniousLM(shakespeare_quotes, w=weight)
+    # initialize SWLM with weights that make it equivalent to PLM
+    swlm = SignificantWordsLM(
+        shakespeare_quotes,
+        lambdas=(1 - weight, weight, 0.)
+    )
+    plm_terms, swlm_terms = fit_models(plm, swlm, shakespeare_quotes)
+
+    assert plm_terms == swlm_terms, 'PLM and SWLM are not functionally equivalent'
+
+
+def test_model_non_equivalence(shakespeare_quotes):
+    weight = .1
+    plm = ParsimoniousLM(shakespeare_quotes, w=weight)
+    # initialize SWLM with weights that make it non-equivalent to PLM
+    swlm = SignificantWordsLM(
+        shakespeare_quotes,
+        lambdas=(1 - 2 * weight, weight, weight)
+    )
+    plm_terms, swlm_terms = fit_models(plm, swlm, shakespeare_quotes)
+
+    assert plm_terms != swlm_terms, 'PLM and SWLM should not be functionally equivalent'
+
+
+def get_p_corpus(language_model):
+    p_corpus = language_model.p_corpus.copy()
+    vocab = language_model.vocab
+    term_tiers = [
+        (1.5, ['love', 'folly', "lov'd", 'lovers', 'lover']),
+        (1.3, ['trust', 'remorseful', 'sour', 'offence', 'gaze']),
+        (1.1, ["remember'st", 'capers', 'rhyme', 'rhymes', 'eagle']),
+    ]
+    for multiplier, terms in term_tiers:
+        for t in terms:
+            p_corpus[vocab[t]] *= multiplier
+
+    return p_corpus - logsum(p_corpus)
+
+
+def fit_models(plm, swlm, docs):
+    # artificially reduce the corpus probability of selected terms
+    plm.p_corpus = swlm.p_corpus = get_p_corpus(plm)
+
+    top_k = 15
+    plm_top = plm.top(top_k, chain(*docs))
+    swlm_top = swlm.group_top(top_k, docs, fix_lambdas=True)
+    plm_terms = [term for term, log_prob in plm_top]
+    swlm_terms = [term for term, prob in swlm_top]
+
+    return plm_terms, swlm_terms
diff --git a/tests/test_plm.py b/tests/test_plm.py
new file mode 100644
index 0000000..ceb9f90
--- /dev/null
+++ b/tests/test_plm.py
@@ -0,0 +1,21 @@
+import numpy as np
+
+from weighwords import ParsimoniousLM
+
+
+def test_document_model(number_corpus, uniform_doc):
+    plm = ParsimoniousLM(number_corpus, w=0.1, thresh=3)
+    tf, p_term = plm._document_model(uniform_doc)
+    assert (tf[:2] == 0).all(), \
+        "Terms with a corpus frequency < thresh should not be counted"
+    assert tf.sum() == 3, f"Expected tf.sum() to be 3, got {tf.sum()} instead"
+    linear_p_term = np.exp(p_term)
+    assert (linear_p_term[2:].sum() - 1) < 1e-10, \
+        f"All probability mass should be on the last 3 terms, got {linear_p_term} instead"
+
+
+def test_document_model_out_of_vocabulary(number_corpus):
+    plm = ParsimoniousLM(number_corpus, w=0.1)
+    doc = ['two', 'or', 'three', 'unseen', 'words']
+    tf, p_term = plm._document_model(doc)
+    assert tf.sum() == 2, f"Unseen words should be ignored, got {tf} instead"
diff --git a/tests/test_swlm.py b/tests/test_swlm.py
new file mode 100644
index 0000000..e6b82bc
--- /dev/null
+++ b/tests/test_swlm.py
@@ -0,0 +1,37 @@
+import logging
+
+from weighwords import SignificantWordsLM
+
+logging.basicConfig(level=logging.INFO)
+
+
+def test_model_fit_fixed(number_corpus, uniform_doc):
+    swlm = SignificantWordsLM([uniform_doc], lambdas=(1/3, 1/3, 1/3))
+    doc_group = [l + r for l, r in zip(number_corpus, reversed(number_corpus))]
+    term_probs = swlm.fit_parsimonious_group(doc_group, fix_lambdas=True)
+    expected_probs = {
+        "one": 0.0,
+        "two": 0.12373,
+        "three": 2e-5,
+        "four": 0.50303,
+        "five": 0.37322,
+    }
+    for term, p in expected_probs.items():
+        diff = abs(term_probs[term] - p)
+        assert diff < 1e-5, f"P({term}) != {p} with difference {diff}"
+
+
+def test_model_fit_shifty(number_corpus, uniform_doc):
+    swlm = SignificantWordsLM([uniform_doc], lambdas=(1/3, 1/3, 1/3))
+    doc_group = [l + r for l, r in zip(number_corpus, reversed(number_corpus))]
+    term_probs = swlm.fit_parsimonious_group(doc_group, fix_lambdas=False)
+    expected_probs = {
+        "one": 0.0,
+        "two": 0.33322,
+        "three": 0.0,
+        "four": 0.66678,
+        "five": 0.0,
+    }
+    for term, p in expected_probs.items():
+        diff = abs(term_probs[term] - p)
+        assert diff < 1e-5, f"P({term}) != {p} with difference {diff}"
diff --git a/weighwords/__init__.py b/weighwords/__init__.py
index f725693..40c7742 100644
--- a/weighwords/__init__.py
+++ b/weighwords/__init__.py
@@ -1 +1,2 @@
 from .parsimonious import ParsimoniousLM
+from .significant_words import SignificantWordsLM
diff --git a/weighwords/logsum.py b/weighwords/logsum.py
index 6df59cd..350bd26 100644
--- a/weighwords/logsum.py
+++ b/weighwords/logsum.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 
-def logsum(x):
+def logsum(x: np.ndarray) -> np.ndarray:
     """Computes the sum of x assuming x is in the log domain.
 
     Returns log(sum(exp(x))) while minimizing the possibility of
@@ -24,7 +24,7 @@ def logsum(x):
     """
     # Use the max to normalize, as with the log this is what accumulates
     # the less errors
-    vmax = x.max(axis=0)
-    out = np.log(np.sum(np.exp(x - vmax), axis=0))
+    vmax = np.nanmax(x, axis=0)
+    out = np.log(np.nansum(np.exp(x - vmax), axis=0))
     out += vmax
     return out
diff --git a/weighwords/parsimonious.py b/weighwords/parsimonious.py
index becaa02..60ef281 100644
--- a/weighwords/parsimonious.py
+++ b/weighwords/parsimonious.py
@@ -1,21 +1,28 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
-# Copyright 2011-2013 University of Amsterdam
+# Copyright 2011-2019 University of Amsterdam
 # Author: Lars Buitinck
 
+from __future__ import annotations
+# TODO: remove redundant typing imports once PEP 585 is finalized
+
 from collections import defaultdict
 from heapq import nlargest
 import logging
+from operator import itemgetter
+from typing import Iterable, Optional, Dict, List, Tuple
+
 import numpy as np
 
-from .logsum import logsum
+from weighwords.logsum import logsum
 
 
 logger = logging.getLogger(__name__)
 
 
-class ParsimoniousLM(object):
-    """Language model for a set of documents.
+class ParsimoniousLM:
+    """
+    Language model for a set of documents.
 
     Constructing an object of this class fits a background model. The top
     method can then be used to fit document-specific models, also for unseen
@@ -23,34 +30,51 @@ class ParsimoniousLM(object):
 
     Parameters
     ----------
-    documents : iterable over iterable over terms
+    documents : iterable over iterable of str terms
+        All documents that should be included in the corpus model.
     w : float
-        Weight of document model (1 - weight of corpus model)
+        Weight of document model (1 - weight of corpus model).
     thresh : int
-        Don't include words that occur < thresh times
+        Don't include words that occur fewer than `thresh` times.
 
     Attributes
     ----------
     vocab : dict of term -> int
         Mapping of terms to numeric indices
     p_corpus : array of float
-        Log prob of terms
+        Log probability of terms in background model (indexed by `vocab`)
+
+    References
+    ----------
+    D. Hiemstra, S. Robertson, and H. Zaragoza (2004).
+    `Parsimonious Language Models for Information Retrieval
+    <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.5806>`_.
+    Proc. SIGIR'04.
     """
 
-    def __init__(self, documents, w, thresh=0):
+    def __init__(
+            self,
+            documents: Iterable[Iterable[str]],
+            w: np.floating,
+            thresh: int = 0
+    ):
+        """Collect the vocabulary and fit the background model."""
         logger.info('Building corpus model')
 
         self.w = w
-        self.vocab = vocab = {}     # Vocabulary: maps terms to numeric indices
-        count = defaultdict(int)    # Corpus frequency
+        # Vocabulary: maps terms to numeric indices
+        vocab: Dict[str, int]
+        self.vocab = vocab = {}
+        # Corpus frequency
+        count: Dict[int, int] = defaultdict(int)
 
         for d in documents:
             for tok in d:
                 i = vocab.setdefault(tok, len(vocab))
                 count[i] += 1
 
-        cf = np.empty(len(count), dtype=np.float)
-        for i, f in count.iteritems():
+        cf = np.empty(len(count), dtype=np.double)
+        for i, f in count.items():
             cf[i] = f
         rare = (cf < thresh)
         cf -= rare * cf
@@ -59,60 +83,79 @@ def __init__(self, documents, w, thresh=0):
             old_error_settings = np.seterr(divide='ignore')
 
             # lg P(t|C)
-            self.p_corpus = np.log(cf) - np.log(np.sum(cf))
+            self.p_corpus: np.ndarray = np.log(cf) - np.log(np.sum(cf))
         finally:
             np.seterr(**old_error_settings)
 
-    def top(self, k, d, max_iter=50, eps=1e-5, w=None):
-        '''Get the top k terms of a document d and their log probabilities.
+    def top(
+            self,
+            k: int,
+            d: Iterable[str],
+            max_iter: int = 50,
+            eps: float = 1e-5,
+            w: Optional[np.floating] = None
+    ) -> List[Tuple[str, float]]:
+        """
+        Get the top `k` terms of a document `d` and their log probabilities.
 
         Uses the Expectation Maximization (EM) algorithm to estimate term
         probabilities.
 
         Parameters
         ----------
+        k : int
+            Number of top terms to return.
+        d : iterable of str terms
+            Terms that make up the document.
         max_iter : int, optional
             Maximum number of iterations of EM algorithm to run.
         eps : float, optional
-            Convergence threshold for EM algorithm.
+            Epsilon: convergence threshold for EM algorithm.
         w : float, optional
             Weight of document model; overrides value given to __init__
 
         Returns
         -------
         t_p : list of (str, float)
-        '''
+            Terms and their log-probabilities in the parsimonious model.
+        """
 
         tf, p_term = self._document_model(d)
         p_term = self._EM(tf, p_term, w, max_iter, eps)
 
-        terms = [(t, p_term[i]) for t, i in self.vocab.iteritems()]
-        return nlargest(k, terms, lambda tp: tp[1])
+        terms = [(t, p_term[i]) for t, i in self.vocab.items()]
+        return nlargest(k, terms, itemgetter(1))
 
-    def _document_model(self, d):
-        '''Build document model.
+    def _document_model(self, d: Iterable[str]) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Build document model.
 
         Parameters
         ----------
-        d : array of terms
+        d : iterable of str terms
 
         Returns
         -------
-        tf : array of int
+        tf : array of float
             Term frequencies
         p_term : array of float
             Term log probabilities
 
         Initial p_term is 1/n_distinct for terms with non-zero tf,
         0 for terms with 0 tf.
-        '''
+        """
 
         logger.info('Gathering term probabilities')
 
-        tf = np.zeros(len(self.vocab), dtype=np.float)  # Term frequency
+        tf = np.zeros(len(self.vocab), dtype=np.double)  # Term frequency
 
         for tok in d:
-            tf[self.vocab[tok]] += 1
+            term_id = self.vocab.get(tok)
+            if term_id:
+                tf[term_id] += 1
+
+        # ignore counts of terms with zero corpus probability
+        tf *= np.isfinite(self.p_corpus)
 
         n_distinct = (tf > 0).sum()
 
@@ -124,8 +167,16 @@ def _document_model(self, d):
 
         return tf, p_term
 
-    def _EM(self, tf, p_term, w, max_iter, eps):
-        '''Expectation maximization.
+    def _EM(
+            self,
+            tf: np.ndarray,
+            p_term: np.ndarray,
+            w: Optional[np.floating],
+            max_iter: int,
+            eps: float
+    ) -> np.ndarray:
+        """
+        Expectation maximization.
 
         Parameters
         ----------
@@ -135,14 +186,16 @@ def _EM(self, tf, p_term, w, max_iter, eps):
             Term probabilities, as returned by document_model
         max_iter : int
             Number of iterations to run.
+        eps : float
+            Epsilon: convergence threshold for EM algorithm.
 
         Returns
         -------
         p_term : array of float
             A posteriori term probabilities.
-        '''
+        """
 
-        logger.info('EM with max_iter=%d, eps=%g' % (max_iter, eps))
+        logger.info(f'EM with max_iter={max_iter}, eps={eps}')
 
         if w is None:
             w = self.w
@@ -155,7 +208,7 @@ def _EM(self, tf, p_term, w, max_iter, eps):
         try:
             old_error_settings = np.seterr(divide='ignore')
             p_term = np.asarray(p_term)
-            for i in xrange(1, max_iter + 1):
+            for i in range(1, max_iter + 1):
                 # E-step
                 p_term += w
                 E = tf + p_term - np.logaddexp(p_corpus, p_term)
@@ -165,9 +218,8 @@ def _EM(self, tf, p_term, w, max_iter, eps):
 
                 diff = new_p_term - p_term
                 p_term = new_p_term
-                if (diff < eps).all():
-                    logger.info('EM: convergence reached after %d iterations'
-                                % i)
+                if (diff[np.isfinite(diff)] < eps).all():
+                    logger.info(f'EM: convergence reached after {i} iterations')
                     break
         finally:
             np.seterr(**old_error_settings)
diff --git a/weighwords/py.typed b/weighwords/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/weighwords/significant_words.py b/weighwords/significant_words.py
new file mode 100644
index 0000000..3e9d761
--- /dev/null
+++ b/weighwords/significant_words.py
@@ -0,0 +1,390 @@
+#!/usr/bin/env python3
+
+# Copyright 2019 TinQwise Stamkracht, University of Amsterdam
+# Author: Alex Olieman
+
+from __future__ import annotations
+# TODO: remove redundant typing imports once PEP 585 is finalized
+
+import logging
+from heapq import nlargest
+from operator import itemgetter
+from typing import Iterable, Optional, Sequence, Tuple, List, Dict, cast
+
+import numpy as np
+
+from weighwords import ParsimoniousLM
+from weighwords.logsum import logsum
+from weighwords.specific_term_estimators import (
+    SpecificTermEstimator,
+    RequiresMultipleDocuments,
+    mutual_exclusion,
+)
+
+logger = logging.getLogger(__name__)
+
+InitialLambdas = Tuple[np.floating, np.floating, np.floating]
+
+
+class SignificantWordsLM(ParsimoniousLM):
+    """
+    Language model that consists of three sub-models:
+
+    - Corpus model: represents term probabilities in a (large) background collection;
+    - Group model: parsimonious term probabilities in a group of documents;
+    - Specific model: represents the same group, but is biased towards terms that
+      occur with a high frequency in single docs, and a low frequency in others.
+
+    Parameters
+    ----------
+    documents : iterable over iterable of str terms
+        All documents that should be included in the corpus model.
+    lambdas : 3-tuple of float
+        Weight of corpus, group, and specific models. Will be normalized
+        if the weights in the tuple don't sum to one.
+    thresh : int
+        Don't include words that occur fewer than `thresh` times.
+
+    Attributes
+    ----------
+    vocab : dict of term -> int
+        Mapping of terms to numeric indices
+    p_corpus : array of float
+        Log probability of terms in background model (indexed by `vocab`)
+    p_group : array of float
+        Log probability of terms in background model (indexed by `vocab`)
+    p_specific : array of float
+        Log probability of terms in background model (indexed by `vocab`)
+    lambda_corpus : array of float
+        Log probability (weight) of corpus model for documents
+    lambda_group : array of float
+        Log probability (weight) of group model for documents
+    lambda_specific : array of float
+        Log probability (weight) of specific model for documents
+
+    Methods
+    -------
+    fit_parsimonious_group(document_group, ...)
+        Estimates a document group model, parsimonized against the corpus
+        and specific models. The documents may be unseen, but terms that
+        are not in the vocabulary will be ignored.
+    group_top(k, document_group, ...)
+        Shortcut to fit the group model and retrieve the top `k` terms.
+    get_term_probabilities(log_prob_distribution)
+        Aligns a term distribution with the vocabulary, and transforms
+        the term log probabilities to linear probabilities.
+
+    See Also
+    --------
+    parsimonious.ParsimoniousLM : one-sided parsimonious model
+
+    References
+    ----------
+    M. Dehghani, H. Azarbonyad, J. Kamps, D. Hiemstra, and M. Marx (2016).
+    `Luhn Revisited: Significant Words Language Models
+    <https://djoerdhiemstra.com/wp-content/uploads/cikm2016.pdf>`_.
+    Proc. CKIM'16.
+    """
+
+    def __init__(
+            self,
+            documents: Iterable[Iterable[str]],
+            lambdas: InitialLambdas,
+            thresh: int = 0
+    ):
+        """Collect the vocabulary and fit the background model."""
+        self.initial_lambdas = self.normalize_lambdas(lambdas)
+        super().__init__(documents, self.initial_lambdas[1], thresh=thresh)
+        self.lambda_corpus: Optional[np.ndarray] = None
+        self.lambda_group: Optional[np.ndarray] = None
+        self.lambda_specific: Optional[np.ndarray] = None
+        self.p_group: Optional[np.ndarray] = None
+        self.p_specific: Optional[np.ndarray] = None
+        self.fix_lambdas = False
+
+    def group_top(
+            self,
+            k: int,
+            document_group: Iterable[Iterable[str]],
+            **kwargs
+    ) -> List[Tuple[str, float]]:
+        """
+        Get the top `k` terms of a `document_group` and their probabilities.
+        This is a shortcut to retrieve the top terms found by `fit_parsimonious_group`.
+
+        Parameters
+        ----------
+        k : int
+            Number of top terms to return.
+        document_group : iterable over iterable of str terms
+            All documents that should be included in the group model.
+        kwargs
+            Optional keyword arguments for `fit_parsimonious_group`.
+
+        Returns
+        -------
+        t_p : list of (str, float)
+            Terms and their probabilities in the group model.
+
+        See Also
+        --------
+        SignificantWordsLM.fit_parsimonious_group
+        """
+        term_probabilities = self.fit_parsimonious_group(document_group, **kwargs)
+        return nlargest(k, term_probabilities.items(), itemgetter(1))
+
+    def fit_parsimonious_group(
+            self,
+            document_group: Iterable[Iterable[str]],
+            max_iter: int = 50,
+            eps: float = 1e-5,
+            lambdas: Optional[InitialLambdas] = None,
+            fix_lambdas: bool = False,
+            parsimonize_specific: bool = False,
+            post_parsimonize: bool = False,
+            specific_estimator: SpecificTermEstimator = mutual_exclusion
+    ) -> Dict[str, float]:
+        """
+        Estimate a document group model, and parsimonize it against fixed
+        corpus and specific models. The documents may be unseen, but any terms
+        that are not in the vocabulary will be ignored.
+
+        Parameters
+        ----------
+        document_group : iterable over iterable of str terms
+            All documents that should be included in the group model.
+        max_iter : int, optional
+            Maximum number of iterations of EM algorithm to run.
+        eps : float, optional
+            Epsilon: convergence threshold for EM algorithm.
+        lambdas : 3-tuple of float, optional
+            Weight of corpus, group, and specific models. Will be normalized
+            if the weights in the tuple don't sum to one.
+        fix_lambdas : bool, optional
+            Fix the weights of the three sub-models (i.e. don't estimate
+            lambdas as part of the M-step).
+        parsimonize_specific : bool, optional
+            Bias the specific model towards uncommon terms before applying
+            the EM algorithm to the group model. This generally results in
+            a group model that stands out less from the corpus model.
+        post_parsimonize : bool, optional
+            Bias the group model towards uncommon terms after applying
+            the EM algorithm. This may be used to compensate when the
+            frequency of common terms varies much between the documents
+            in the group.
+        specific_estimator : callable, optional
+            Function that estimates the specific terms model based on
+            the document term frequencies of the doc group.
+
+        Returns
+        -------
+        t_p_map : dict of term -> float
+            Dictionary of terms and their probabilities in the group model.
+        """
+        if lambdas is None:
+            lambdas = self.initial_lambdas
+        else:
+            lambdas = self.normalize_lambdas(lambdas)
+
+        self.fix_lambdas = fix_lambdas
+
+        document_models = [
+            self._document_model(doc)
+            for doc in document_group
+        ]
+        del document_group
+
+        doc_term_frequencies = [tf for tf, _ in document_models]
+        group_tf, p_group = self._group_model(
+            doc_term_frequencies
+        )
+        try:
+            self.p_specific = specific_estimator(doc_term_frequencies)
+        except RequiresMultipleDocuments:
+            logger.warning(
+                'Cannot calculate `p_specific` for a single document, '
+                'using `p_corpus` as replacement.'
+            )
+            self.p_specific = self.p_corpus
+
+        if parsimonize_specific:
+            self.p_specific = self._EM(
+                group_tf,
+                self.p_specific,
+                cast(np.floating, 1/3),
+                max_iter,
+                eps
+            )
+
+        weights_shape = len(document_models)
+        if self.fix_lambdas:
+            weights_shape = 1
+
+        general_w, group_w, specific_w = np.log(lambdas)
+        self.lambda_corpus = np.full(weights_shape, general_w, dtype=np.double)
+        self.lambda_specific = np.full(weights_shape, specific_w, dtype=np.double)
+        self.lambda_group = np.full(weights_shape, group_w, dtype=np.double)
+        logger.info(
+            f'Lambdas initialized to: Corpus={lambdas[0]:.4f}, '
+            f'Group={lambdas[1]:.4f}, Specific={lambdas[2]:.4f}'
+        )
+        self.p_group = self._estimate(
+            p_group, self.p_specific, doc_term_frequencies, max_iter, eps
+        )
+        if post_parsimonize:
+            self.p_group = self._EM(group_tf, self.p_group, self.w, max_iter, eps)
+
+        if self.fix_lambdas is False:
+            logger.info(
+                f'Final lambdas (mean): '
+                f'Corpus={np.mean(np.exp(self.lambda_corpus)):.4f}, '
+                f'Group={np.mean(np.exp(self.lambda_group)):.4f}, '
+                f'Specific={np.mean(np.exp(self.lambda_specific)):.4f}'
+            )
+        return self.get_term_probabilities(self.p_group)
+
+    def get_term_probabilities(
+            self,
+            log_prob_distribution: np.ndarray
+    ) -> Dict[str, float]:
+        """
+        Align a term distribution with the vocabulary, and transform
+        the term log probabilities to linear probabilities.
+
+        Parameters
+        ----------
+        log_prob_distribution : array of float
+            Log probability of terms which is indexed by the vocabulary.
+
+        Returns
+        -------
+        t_p_map : dict of term -> float
+            Dictionary of terms and their probabilities in the (sub-)model.
+        """
+        probabilities = np.exp(log_prob_distribution)
+        probabilities[np.isnan(probabilities)] = 0.
+        return {t: probabilities[i] for t, i in self.vocab.items()}
+
+    def _estimate(
+            self,
+            p_group: np.ndarray,
+            p_specific: np.ndarray,
+            doc_tf: Sequence[np.ndarray],
+            max_iter: int,
+            eps: float
+    ) -> np.ndarray:
+        """Apply the Expectation Maximization algorithm."""
+        try:
+            old_error_settings = np.seterr(divide='ignore')
+            log_doc_tf = np.log(doc_tf)
+            for i in range(1, 1 + max_iter):
+                expectation = self._e_step(p_group, p_specific)
+                new_p_group = self._m_step(expectation, log_doc_tf)
+
+                diff = new_p_group - p_group
+                p_group = new_p_group
+                if (diff[np.isfinite(diff)] < eps).all():
+                    logger.info(f'EM: convergence reached after {i} iterations')
+                    break
+        finally:
+            np.seterr(**old_error_settings)
+
+        return p_group
+
+    def _e_step(
+            self,
+            p_group: np.ndarray,
+            p_specific: np.ndarray
+    ) -> Dict[str, np.ndarray]:
+        """Run an E-step."""
+        corpus_numerator = np.add.outer(self.lambda_corpus, self.p_corpus)
+        specific_numerator = np.add.outer(self.lambda_specific, p_specific)
+        group_numerator = np.add.outer(self.lambda_group, p_group)
+        denominator = [
+            logsum(np.asarray(doc_numerators))
+            for doc_numerators in zip(
+                corpus_numerator,
+                specific_numerator,
+                group_numerator
+            )
+        ]
+        out = {
+            'corpus': corpus_numerator - denominator,
+            'specific': specific_numerator - denominator,
+            'group': group_numerator - denominator
+        }
+        # prevent NaNs from causing downstream errors
+        for v in out.values():
+            v[np.isnan(v)] = np.NINF
+
+        return out
+
+    def _m_step(
+            self,
+            expectation: Dict[str, np.ndarray],
+            log_doc_tf: Sequence[np.ndarray]
+    ) -> np.ndarray:
+        """Run an M-step."""
+        term_weighted_group = log_doc_tf + expectation['group']
+        group_numerator = logsum(term_weighted_group)
+        p_group = group_numerator - logsum(group_numerator)
+
+        if self.fix_lambdas is False:
+            # estimate lambdas
+            corpus_numerator = logsum(
+                np.transpose(log_doc_tf + expectation['corpus'])
+            )
+            specific_numerator = logsum(
+                np.transpose(log_doc_tf + expectation['specific'])
+            )
+            group_numerator = logsum(np.transpose(term_weighted_group))
+            denominator = logsum(
+                np.asarray([corpus_numerator, specific_numerator, group_numerator])
+            )
+            self.lambda_corpus = corpus_numerator - denominator
+            self.lambda_specific = specific_numerator - denominator
+            self.lambda_group = group_numerator - denominator
+
+        return p_group
+
+    @staticmethod
+    def _group_model(
+            document_term_frequencies: Sequence[np.ndarray]
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        """Create the initial group model."""
+        group_tf = np.array(document_term_frequencies).sum(axis=0)
+
+        try:
+            old_error_settings = np.seterr(divide='ignore')
+            p_group = np.log(group_tf) - np.log(np.sum(group_tf))
+        finally:
+            np.seterr(**old_error_settings)
+
+        return group_tf, p_group
+
+    @staticmethod
+    def normalize_lambdas(lambdas: InitialLambdas) -> InitialLambdas:
+        """
+        Check and normalize the initial lambdas of the three sub-models.
+
+        Parameters
+        ----------
+        lambdas : 3-tuple of float
+            Weight of corpus, group, and specific models.
+
+        Returns
+        -------
+        lambdas : 3-tuple of float
+            Normalized probability of corpus, group, and specific models.
+        """
+        assert len(lambdas) == 3, f'lambdas should be a 3-tuple, not {lambdas}'
+        total_weight = sum(lambdas)
+        if abs(total_weight - 1) > 1e-10:
+            lambdas = cast(
+                InitialLambdas,
+                tuple(
+                    w / total_weight
+                    for w in lambdas
+                )
+            )
+        return lambdas
diff --git a/weighwords/specific_term_estimators.py b/weighwords/specific_term_estimators.py
new file mode 100644
index 0000000..158abd9
--- /dev/null
+++ b/weighwords/specific_term_estimators.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+
+# Copyright 2019 TinQwise Stamkracht, University of Amsterdam
+# Author: Alex Olieman
+
+from __future__ import annotations
+# TODO: remove redundant typing imports once PEP 585 is finalized
+
+import functools
+import logging
+from typing import Sequence, Callable
+
+import numpy as np
+from weighwords.logsum import logsum
+
+logger = logging.getLogger(__name__)
+
+SpecificTermEstimator = Callable[[Sequence[np.ndarray]], np.ndarray]
+
+
+class RequiresMultipleDocuments(Exception):
+    pass
+
+
+def requires_multiple_docs(estimator_func: SpecificTermEstimator):
+    """
+    Do not let the decorated function be called with fewer than two docs.
+
+    Parameters
+    ----------
+    estimator_func : SpecificTermEstimator
+
+    Raises
+    ------
+    RequiresMultipleDocuments
+
+    Returns
+    -------
+    decorated_func : SpecificTermEstimator
+    """
+    @functools.wraps(estimator_func)
+    def wrapper_func(document_term_frequencies):
+        if len(document_term_frequencies) < 2:
+            raise RequiresMultipleDocuments
+
+        return estimator_func(document_term_frequencies)
+
+    return wrapper_func
+
+
+@requires_multiple_docs
+def mutual_exclusion(
+        document_term_frequencies: Sequence[np.ndarray]
+) -> np.ndarray:
+    """Estimate the fixed specific model with the mutual exclusion method."""
+    doc_term_probs = [
+        np.log(tf) - np.log(np.sum(tf))
+        for tf in document_term_frequencies
+    ]
+    # complement events: 1 - p
+    complements = [
+        np.log1p(-np.exp(p_doc))
+        for p_doc in doc_term_probs
+    ]
+    # probability of term to be important in one doc, and not others
+    complement_products = np.array([
+        dlm + complement
+        for i, dlm in enumerate(doc_term_probs)
+        for j, complement in enumerate(complements)
+        if i != j
+    ])
+    # marginalize over all documents
+    p_specific = (
+        logsum(complement_products)
+        - np.log(
+            np.count_nonzero(complement_products > np.NINF, axis=0)
+        )
+    )
+    # prevent NaNs from causing downstream errors
+    p_specific[np.isnan(p_specific)] = np.NINF
+
+    return p_specific
+
+
+@requires_multiple_docs
+def inverse_doc_frequency(
+        document_term_frequencies: Sequence[np.ndarray]
+) -> np.ndarray:
+    """Estimate the fixed specific model with the inverse doc frequency method."""
+    idf = 1 / np.count_nonzero(document_term_frequencies, axis=0)
+    idf[~np.isfinite(idf)] = 0.
+
+    # calculate normalized idf as log-probabilities
+    p_specific = np.log(idf) - np.log(np.sum(idf))
+
+    return p_specific
+
+
+def idf_fallback_for_many_docs(
+        document_term_frequencies: Sequence[np.ndarray],
+        primary_estimator: SpecificTermEstimator,
+        fallback_thresh: int
+):
+    if len(document_term_frequencies) < fallback_thresh:
+        estimator_func = primary_estimator
+    else:
+        estimator_func = inverse_doc_frequency
+        logger.warning(
+            f'Estimator got more than {fallback_thresh} docs:'
+            ' falling back to IDF for the current doc group.'
+        )
+
+    return estimator_func(document_term_frequencies)
+
+
+me_up_to_40_docs = functools.partial(
+    idf_fallback_for_many_docs,
+    primary_estimator=mutual_exclusion,
+    fallback_thresh=40
+)