diff --git a/.gitignore b/.gitignore
index 33dab8536..5241ecd73 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 *.egg-info
 *.so
 *.o
+MANIFEST
 README.html
 build/
 dist/
diff --git a/CHANGE_LOG b/CHANGE_LOG
index fe6621a54..58afbd8c5 100644
--- a/CHANGE_LOG
+++ b/CHANGE_LOG
@@ -1,5 +1,75 @@
-2013-XX-XX   0.8.2:
+2019-XX-XX   1.1.0:
 -------------------
+  * add optional start and stop parameters to .count() method
+  * add official Python 3.8 support
+  * update documentation to use positional-only syntax in docstrings
+  * update readme to pass Python 3 doctest
+
+
+2019-07-19   1.0.1:
+-------------------
+  * fix readme to pass ``twine check``
+
+
+2019-07-15   1.0.0:
+-------------------
+  * fix bitarrays beings created from unicode in Python 2
+  * use ``PyBytes_*`` in C code, treating the Py3k function names as default,
+    which also removes all redefinitions of ``PyString_*``
+  * handle negative arguments of .index() method consistently with how
+    they are treated for lists
+  * add a few more comments to the C code
+  * move imports outside tests: pickle, io, etc.
+  * drop Python 2.5 support
+
+
+2019-05-20   0.9.3:
+-------------------
+  * refactor resize() - only shrink allocated memory if new size falls
+    lower than half the allocated size
+  * improve error message when trying to initialize from float or complex
+
+
+2019-04-29   0.9.2:
+-------------------
+  * fix fail to compile on Windows with VS 2015, issue #72
+
+
+2019-04-28   0.9.1:
+-------------------
+  * fix types to actually be types, #29
+  * check for ambiguous prefix codes when building binary tree for decoding
+  * remove Python level methods: encode, decode, iterdecode (in favor of
+    having these implemented on the C-level along with check_codedict)
+  * fix self tests for Python 2.5 and 2.6
+  * move all Huffman code related example code into examples/huffman
+  * add code to generate graphviz .dot file of Huffman tree to examples
+
+
+2019-04-22   0.9.0:
+-------------------
+  * more efficient decode and iterdecode by using C-level binary tree
+    instead of a python one, #54
+  * added buffer protocol support for Python 3, #55
+  * fixed invalid pointer exceptions in pypy, #47
+  * made all examples Py3k compatible
+  * add gene sequence example
+  * add official Python 3.7 support
+  * drop Python 2.4, 3.1 and 3.2 support
+
+
+2018-07-06   0.8.3:
+-------------------
+  * add exception to setup.py when README.rst cannot be opened
+
+
+2018-05-30   0.8.2:
+-------------------
+  * add official Python 3.6 support (although it was already working)
+  * fix description of fill(), #52
+  * handle extending self correctly, #28
+  * copy_n: fast copy with memmove fixed, #43
+  * minor clarity/wording changes to README, #23
 
 
 2013-03-30   0.8.1:
@@ -75,7 +145,7 @@
 2009-01-15   0.3.4:
 -------------------
   * Made C code less ambiguous, such that the package compiles on
-    Visual Studio, will all tests passing.
+    Visual Studio, with all tests passing.
 
 
 2008-12-14   0.3.3:
@@ -99,7 +169,7 @@
 
 2008-09-30   0.3.0:
 -------------------
-  * Fixed a severe bug for 64bit machines.  Implemented all methods in C,
+  * Fixed a severe bug for 64-bit machines.  Implemented all methods in C,
     improved tests.
   * Removed deprecated methods from01 and fromlist.
 
diff --git a/Makefile b/Makefile
index cf7a8ed95..9a71f2c24 100644
--- a/Makefile
+++ b/Makefile
@@ -8,11 +8,14 @@ test: bitarray/_bitarray.so
 
 doc: bitarray/_bitarray.so
 	$(PYTHON) update_readme.py
+	$(PYTHON) setup.py sdist
+	twine check dist/*
 
 
 clean:
 	rm -rf build dist
 	rm -f bitarray/*.o bitarray/*.so
 	rm -f bitarray/*.pyc
+	rm -f examples/*.pyc
 	rm -rf bitarray/__pycache__ *.egg-info
-	rm -f README.html
+	rm -rf examples/__pycache__
diff --git a/README.rst b/README.rst
index b98efea73..39b073353 100644
--- a/README.rst
+++ b/README.rst
@@ -5,7 +5,7 @@ bitarray: efficient arrays of booleans
 This module provides an object type which efficiently represents an array
 of booleans.  Bitarrays are sequence types and behave very much like usual
 lists.  Eight bits are represented by one byte in a contiguous block of
-memory.  The user can select between two representations; little-endian
+memory.  The user can select between two representations: little-endian
 and big-endian.  All of the functionality is implemented in C.
 Methods for accessing the machine representation are provided.
 This can be useful when bit level access to binary files is required,
@@ -24,25 +24,22 @@ Key features
 
  * The bit endianness can be specified for each bitarray object, see below.
 
- * On 32bit systems, a bitarray object can contain up to 2^34 elements,
-   that is 16 Gbits (on 64bit machines up to 2^63 elements in theory --
-   on Python 2.4 only 2^31 elements,
-   see `PEP 353 <http://www.python.org/dev/peps/pep-0353/>`_
-   (added in Python 2.5)).
-
  * Packing and unpacking to other binary data formats,
    e.g. `numpy.ndarray <http://www.scipy.org/Tentative_NumPy_Tutorial>`_,
    is possible.
 
  * Fast methods for encoding and decoding variable bit length prefix codes
 
- * Sequential search (as list or iterator)
-
  * Bitwise operations: ``&, |, ^, &=, |=, ^=, ~``
 
+ * Sequential search
+
  * Pickling and unpickling of bitarray objects possible.
 
- * Bitarray objects support the buffer protocol (Python 2.7 only)
+ * Bitarray objects support the buffer protocol (Python 2.7 and above)
+
+ * On 32-bit systems, a bitarray object can contain up to 2^34 elements,
+   that is 16 Gbits (on 64-bit machines up to 2^63 elements in theory).
 
 
 Installation
@@ -50,29 +47,27 @@ Installation
 
 bitarray can be installed from source::
 
-   $ tar xzf bitarray-0.8.2.tar.gz
-   $ cd bitarray-0.8.2
+   $ tar xzf bitarray-1.1.0.tar.gz
+   $ cd bitarray-1.1.0
    $ python setup.py install
 
 On Unix systems, the latter command may have to be executed with root
-privileges.
-If you have `distribute <http://pypi.python.org/pypi/distribute/>`_
-installed, you can easy_install bitarray.
+privileges.  You can also pip install bitarray.
 Once you have installed the package, you may want to test it::
 
    $ python -c 'import bitarray; bitarray.test()'
    bitarray is installed in: /usr/local/lib/python2.7/site-packages/bitarray
-   bitarray version: 0.8.2
-   2.7.2 (r271:86832, Nov 29 2010) [GCC 4.2.1 (SUSE Linux)]
+   bitarray version: 1.1.0
+   3.7.4 (r271:86832, Dec 29 2019) [GCC 4.2.1 (SUSE Linux)]
    .........................................................................
-   ...........................................
+   .................................................................
    ----------------------------------------------------------------------
-   Ran 134 tests in 1.396s
+   Ran 148 tests in 2.229s
    
    OK
 
 You can always import the function test,
-and ``test().wasSuccessful()`` will return True when the test went well.
+and ``test().wasSuccessful()`` will return ``True`` when the test went well.
 
 
 
@@ -80,9 +75,9 @@ Using the module
 ----------------
 
 As mentioned above, bitarray objects behave very much like lists, so
-there is not too new to learn.  The biggest difference to list objects
+there is not too much to learn.  The biggest difference from list objects
 is the ability to access the machine representation of the object.
-When doing so, the bit endianness is of importance, this issue is
+When doing so, the bit endianness is of importance; this issue is
 explained in detail in the section below.  Here, we demonstrate the
 basic usage of bitarray objects:
 
@@ -111,7 +106,7 @@ being applied, whenever casting an object:
    bitarray('101010')
    >>> a.append(a)      # note that bool(a) is True
    >>> a.count(42)      # counts occurrences of True (not 42)
-   4L
+   4
    >>> a.remove('')     # removes first occurrence of False
    >>> a
    bitarray('110101')
@@ -156,8 +151,8 @@ Bit endianness
 --------------
 
 Since a bitarray allows addressing of individual bits, where the machine
-represents 8 bits in one byte, there two obvious choices for this mapping;
-little- and big-endian.
+represents 8 bits in one byte, there are two obvious choices for this
+mapping: little- and big-endian.
 When creating a new bitarray object, the endianness can always be
 specified explicitly:
 
@@ -167,7 +162,7 @@ specified explicitly:
    bitarray('10000010')
    >>> b = bitarray('11000010', endian='little')
    >>> b.tobytes()
-   'C'
+   b'C'
 
 Here, the low-bit comes first because little-endian means that increasing
 numeric significance corresponds to an increasing address (index).
@@ -180,7 +175,7 @@ and most significant bit.
    bitarray('01000001')
    >>> a[6] = 1
    >>> a.tobytes()
-   'C'
+   b'C'
 
 Here, the high-bit comes first because big-endian
 means "most-significant first".
@@ -233,11 +228,11 @@ endianness:
    >>> a.tobytes() == b.tobytes()
    True
 
-The default bit endianness is currently big-endian, however this may change
+The default bit endianness is currently big-endian; however, this may change
 in the future, and when dealing with the machine representation of bitarray
 objects, it is recommended to always explicitly specify the endianness.
 
-Unless, explicitly converting to machine representation, using
+Unless explicitly converting to machine representation, using
 the ``tobytes``, ``frombytes``, ``tofile`` and ``fromfile`` methods,
 the bit endianness will have no effect on any computation, and one
 can safely ignore setting the endianness, and other details of this section.
@@ -256,12 +251,12 @@ interpreted as simple bytes.
    >>> len(v)
    3
    >>> v[-1]
-   'C'
+   67
    >>> v[:2].tobytes()
-   'AB'
+   b'AB'
    >>> v.readonly  # changing a bitarray's memory is also possible
    False
-   >>> v[1] = 'o'
+   >>> v[1] = 111
    >>> a
    bitarray('010000010110111101000011')
 
@@ -340,7 +335,7 @@ Reference
    Returns True when any bit in the array is True.
 
 
-``append(item)``
+``append(item, /)``
    Append the value bool(item) to the end of the bitarray.
 
 
@@ -362,26 +357,26 @@ Reference
    Return a copy of the bitarray.
 
 
-``count([value])`` -> int
+``count(value=True, start=0, stop=<end of array>, /)`` -> int
    Return number of occurrences of value (defaults to True) in the bitarray.
 
 
-``decode(code)`` -> list
+``decode(code, /)`` -> list
    Given a prefix code (a dict mapping symbols to bitarrays),
-   decode the content of the bitarray and return the list of symbols.
+   decode the content of the bitarray and return it as a list of symbols.
 
 
-``encode(code, iterable)``
+``encode(code, iterable, /)``
    Given a prefix code (a dict mapping symbols to bitarrays),
-   iterates over iterable object with symbols, and extends the bitarray
+   iterate over the iterable object with symbols, and extend the bitarray
    with the corresponding bitarray for each symbols.
 
 
-``endian()`` -> string
+``endian()`` -> str
    Return the bit endianness as a string (either 'little' or 'big').
 
 
-``extend(object)``
+``extend(iterable, /)``
    Append bits to the end of the bitarray.  The objects which can be passed
    to this method are the same iterable objects which can given to a bitarray
    object upon initialization.
@@ -389,30 +384,30 @@ Reference
 
 ``fill()`` -> int
    Adds zeros to the end of the bitarray, such that the length of the bitarray
-   is not a multiple of 8.  Returns the number of bits added (0..7).
+   will be a multiple of 8.  Returns the number of bits added (0..7).
 
 
-``frombytes(bytes)``
+``frombytes(bytes, /)``
    Append from a byte string, interpreted as machine values.
 
 
-``fromfile(f, [n])``
+``fromfile(f, n=<till EOF>, /)``
    Read n bytes from the file object f and append them to the bitarray
    interpreted as machine values.  When n is omitted, as many bytes are
    read until EOF is reached.
 
 
-``fromstring(string)``
+``fromstring(str)``
    Append from a string, interpreting the string as machine values.
    Deprecated since version 0.4.0, use ``frombytes()`` instead.
 
 
-``index(value, [start, [stop]])`` -> int
+``index(value, start=0, stop=<end of array>, /)`` -> int
    Return index of the first occurrence of bool(value) in the bitarray.
    Raises ValueError if the value is not present.
 
 
-``insert(i, item)``
+``insert(i, item, /)``
    Insert bool(item) into the bitarray before position i.
 
 
@@ -421,12 +416,13 @@ Reference
    i.e. convert each 1-bit into a 0-bit and vice versa.
 
 
-``iterdecode(code)`` -> iterator
+``iterdecode(code, /)`` -> iterator
    Given a prefix code (a dict mapping symbols to bitarrays),
-   decode the content of the bitarray and iterate over the symbols.
+   decode the content of the bitarray and return an iterator over
+   the symbols.
 
 
-``itersearch(bitarray)`` -> iterator
+``itersearch(bitarray, /)`` -> iterator
    Searches for the given a bitarray in self, and return an iterator over
    the start positions where bitarray matches self.
 
@@ -439,7 +435,7 @@ Reference
    on 32bit and 64bit machines.
 
 
-``pack(bytes)``
+``pack(bytes, /)``
    Extend the bitarray from a byte string, where each characters corresponds to
    a single bit.  The character b'\x00' maps to bit 0 and all other characters
    map to bit 1.
@@ -448,12 +444,12 @@ Reference
    (for example NumPy's ndarray object) which have a different view of memory.
 
 
-``pop([i])`` -> item
+``pop(index=-1, /)`` -> item
    Return the i-th (default last) element and delete it from the bitarray.
    Raises IndexError if bitarray is empty or index is out of range.
 
 
-``remove(item)``
+``remove(item, /)``
    Remove the first occurrence of bool(item) in the bitarray.
    Raises ValueError if item is not present.
 
@@ -462,14 +458,14 @@ Reference
    Reverse the order of bits in the array (in-place).
 
 
-``search(bitarray, [limit])`` -> list
-   Searches for the given a bitarray in self, and returns the start positions
-   where bitarray matches self as a list.
+``search(bitarray, limit=<none>, /)`` -> list
+   Searches for the given bitarray in self, and return the list of start
+   positions.
    The optional argument limits the number of search results to the integer
    specified.  By default, all search results are returned.
 
 
-``setall(value)``
+``setall(value, /)``
    Set all bits in the bitarray to bool(value).
 
 
@@ -477,7 +473,7 @@ Reference
    Sort the bits in the array (in-place).
 
 
-``to01()`` -> string
+``to01()`` -> str
    Return a string containing '0's and '1's, representing the bits in the
    bitarray object.
    Note: To extend a bitarray from a string containing '0's and '1's,
@@ -490,7 +486,7 @@ Reference
    bits (1..7) are set to 0.
 
 
-``tofile(f)``
+``tofile(f, /)``
    Write all bits (as machine values) to the file object f.
    When the length of the bitarray is not a multiple of 8,
    the remaining bits (1..7) are set to 0.
@@ -505,7 +501,7 @@ Reference
    use the extend method.
 
 
-``tostring()`` -> string
+``tostring()`` -> str
    Return the string representing (machine values) of the bitarray.
    When the length of the bitarray is not a multiple of 8, the few remaining
    bits (1..7) are set to 0.
@@ -524,42 +520,48 @@ Reference
    Run self-test, and return unittest.runner.TextTestResult object.
 
 
-``bitdiff(a, b)`` -> int
+``bitdiff(a, b, /)`` -> int
    Return the difference between two bitarrays a and b.
    This is function does the same as (a ^ b).count(), but is more memory
-   efficient, as no intermediate bitarray object gets created
+   efficient, as no intermediate bitarray object gets created.
 
 
-``bits2bytes(n)`` -> int
+``bits2bytes(n, /)`` -> int
    Return the number of bytes necessary to store n bits.
 
 
 Change log
 ----------
 
-2013-XX-XX   0.8.2:
+2019-XX-XX   1.1.0:
 
+  * add optional start and stop parameters to .count() method
+  * update documentation to use positional-only syntax in docstrings
+  * update readme to pass Python 3 doctest
 
 
-**0.8.1** (2013-03-30):
+**1.0.1** (2019-07-19):
 
-  * fix issue #10, i.e. int(bitarray()) segfault
-  * added tests for using a bitarray object as an argument to functions
-    like int, long (on Python 2), float, list, tuple, dict
+  * fix readme to pass ``twine check``
 
 
-**0.8.0** (2012-04-04):
+**1.0.0** (2019-07-15):
 
-  * add Python 2.4 support
-  * add (module level) function bitdiff for calculating the difference
-    between two bitarrays
+  * fix bitarrays beings created from unicode in Python 2
+  * use ``PyBytes_*`` in C code, treating the Py3k function names as default,
+    which also removes all redefinitions of ``PyString_*``
+  * handle negative arguments of .index() method consistently with how
+    they are treated for lists
+  * add a few more comments to the C code
+  * move imports outside tests: pickle, io, etc.
+  * drop Python 2.5 support
 
 
-**0.7.0** (2012-02-15):
+**0.9.3** (2019-05-20):
 
-  * add iterdecode method (C level), which returns an iterator but is
-    otherwise like the decode method
-  * improve memory efficiency and speed of pickling large bitarray objects
+  * refactor resize() - only shrink allocated memory if new size falls
+    lower than half the allocated size
+  * improve error message when trying to initialize from float or complex
 
 
 Please find the complete change log
diff --git a/TODO b/TODO
index 99243f1c2..7ea955040 100644
--- a/TODO
+++ b/TODO
@@ -40,9 +40,25 @@ for i in xrange(1000000):
 print sorted(sizes)
 
 
+/* walk the binary tree, and display information for each node */
+static void
+display_binode_tree(binode *tree)
+{
+    int k;
 
-If I should ever decide to use pre-calculated tables:
+    printf("id: %p child0: %14p child1: %14p symbol: %s\n",
+           tree,
+           tree->child[0],
+           tree->child[1],
+           tree->symbol ? PyString_AsString(tree->symbol) : "NULL");
 
+    for (k = 0; k < 2; k++)
+        if (tree->child[k])
+            display_binode_tree(tree->child[k]);
+}
+
+
+# If I should ever decide to use pre-calculated tables:
 static char bytereverse_trans[256] = {
     0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
     0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
diff --git a/bitarray/__init__.py b/bitarray/__init__.py
index 7ccea7c64..708eb8957 100644
--- a/bitarray/__init__.py
+++ b/bitarray/__init__.py
@@ -10,40 +10,7 @@
 """
 from bitarray._bitarray import _bitarray, bitdiff, bits2bytes, _sysinfo
 
-__version__ = '0.8.2'
-
-
-def _tree_insert(tree, sym, ba):
-    """
-    Insert symbol which is mapped to bitarray into tree
-    """
-    v = ba[0]
-    if len(ba) > 1:
-        if tree[v] == []:
-            tree[v] = [[], []]
-        _tree_insert(tree[v], sym, ba[1:])
-    else:
-        if tree[v] != []:
-            raise ValueError("prefix code ambiguous")
-        tree[v] = sym
-
-def _mk_tree(codedict):
-    # Generate tree from codedict
-    tree = [[], []]
-    for sym, ba in codedict.items():
-        _tree_insert(tree, sym, ba)
-    return tree
-
-def _check_codedict(codedict):
-    if not isinstance(codedict, dict):
-        raise TypeError("dictionary expected")
-    if len(codedict) == 0:
-        raise ValueError("prefix code empty")
-    for k, v in codedict.items():
-        if not isinstance(v, bitarray):
-            raise TypeError("bitarray expected for dictionary value")
-        if v.length() == 0:
-            raise ValueError("non-empty bitarray expected")
+__version__ = '1.1.0'
 
 
 class bitarray(_bitarray):
@@ -78,14 +45,14 @@ class bitarray(_bitarray):
 fromfile, tobytes, frombytes."""
 
     def fromstring(self, string):
-        """fromstring(string)
+        """fromstring(str)
 
 Append from a string, interpreting the string as machine values.
 Deprecated since version 0.4.0, use ``frombytes()`` instead."""
         return self.frombytes(string.encode())
 
     def tostring(self):
-        """tostring() -> string
+        """tostring() -> str
 
 Return the string representing (machine values) of the bitarray.
 When the length of the bitarray is not a multiple of 8, the few remaining
@@ -93,39 +60,14 @@ def tostring(self):
 Deprecated since version 0.4.0, use ``tobytes()`` instead."""
         return self.tobytes().decode()
 
-    def decode(self, codedict):
-        """decode(code) -> list
-
-Given a prefix code (a dict mapping symbols to bitarrays),
-decode the content of the bitarray and return the list of symbols."""
-        _check_codedict(codedict)
-        return self._decode(_mk_tree(codedict))
-
-    def iterdecode(self, codedict):
-        """iterdecode(code) -> iterator
-
-Given a prefix code (a dict mapping symbols to bitarrays),
-decode the content of the bitarray and iterate over the symbols."""
-        _check_codedict(codedict)
-        return self._iterdecode(_mk_tree(codedict))
-
-    def encode(self, codedict, iterable):
-        """encode(code, iterable)
-
-Given a prefix code (a dict mapping symbols to bitarrays),
-iterates over iterable object with symbols, and extends the bitarray
-with the corresponding bitarray for each symbols."""
-        _check_codedict(codedict)
-        self._encode(codedict, iterable)
-
     def __int__(self):
-        raise TypeError("int() argument cannot be a bitarray")
+        raise TypeError("int() cannot take bitarray as argument")
 
     def __long__(self):
-        raise TypeError("long() argument cannot be a bitarray")
+        raise TypeError("long() cannot take bitarray as argument")
 
     def __float__(self):
-        raise TypeError("float() argument cannot be a bitarray")
+        raise TypeError("float() cannot take bitarray as argument")
 
 
 def test(verbosity=1, repeat=1):
diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c
index 48be51cea..8ad155a22 100644
--- a/bitarray/_bitarray.c
+++ b/bitarray/_bitarray.c
@@ -1,6 +1,9 @@
 /*
-   This file is the C part of the bitarray package.  Almost all
-   functionality is implemented here.
+   Copyright (c) 2008 - 2019, Ilan Schnell
+   bitarray is published under the PSF license.
+
+   This file is the C part of the bitarray package.
+   All functionality is implemented here.
 
    Author: Ilan Schnell
 */
@@ -13,42 +16,10 @@
 #endif
 
 #ifdef IS_PY3K
-#include "bytesobject.h"
-#define PyString_FromStringAndSize  PyBytes_FromStringAndSize
-#define PyString_FromString  PyBytes_FromString
-#define PyString_Check  PyBytes_Check
-#define PyString_Size  PyBytes_Size
-#define PyString_AsString  PyBytes_AsString
-#define PyString_ConcatAndDel  PyBytes_ConcatAndDel
 #define Py_TPFLAGS_HAVE_WEAKREFS  0
 #endif
 
-#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 5
-/* Py_ssize_t was introduced in Python 2.5, substitute long for it */
-typedef long Py_ssize_t;
-#define PY_SSIZE_T_MAX  LONG_MAX
-#define PY_SSIZE_T_MIN  LONG_MIN
-Py_ssize_t PyNumber_AsSsize_t(PyObject *o, PyObject *exc)
-{
-    return PyLong_AsLong(o);
-}
-int PyIndex_Check(PyObject *o)
-{
-    return 0;
-}
-#define PY_SSIZE_T_FMT  "l"
-#else
-/* Python 2.5 and up uses 'n' as the format char for Py_ssize_t */
-#define PY_SSIZE_T_FMT  "n"
-#endif
-
-#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 6
-/* backward compatibility with Python 2.5 */
-#define Py_TYPE(ob)   (((PyObject *) (ob))->ob_type)
-#define Py_SIZE(ob)   (((PyVarObject *) (ob))->ob_size)
-#endif
-
-#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION == 7
+#if PY_MAJOR_VERSION == 3 || (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION == 7)
 /* (new) buffer protocol */
 #define WITH_BUFFER
 #endif
@@ -67,6 +38,10 @@ typedef long long int idx_t;
 /* throughout:  0 = little endian   1 = big endian */
 #define DEFAULT_ENDIAN  1
 
+/* Unlike the normal convention, ob_size is the byte count, not the number
+   of elements.  The reason for doing this is that we can use our own
+   special idx_t for the number of bits (which can exceed 2^32 on a 32 bit
+   machine.  */
 typedef struct {
     PyObject_VAR_HEAD
 #ifdef WITH_BUFFER
@@ -74,7 +49,7 @@ typedef struct {
 #endif
     char *ob_item;
     Py_ssize_t allocated;       /* how many bytes allocated */
-    idx_t nbits;                /* length og bitarray */
+    idx_t nbits;                /* length of bitarray, i.e. elements */
     int endian;                 /* bit endianness of bitarray */
     PyObject *weakreflist;      /* list of weak references */
 } bitarrayobject;
@@ -130,50 +105,46 @@ static int
 resize(bitarrayobject *self, idx_t nbits)
 {
     Py_ssize_t newsize;
-    size_t _new_size;       /* for allocation */
+    size_t new_allocated;
+    Py_ssize_t allocated = self->allocated;
 
     if (check_overflow(nbits) < 0)
         return -1;
-
     newsize = (Py_ssize_t) BYTES(nbits);
 
     /* Bypass realloc() when a previous overallocation is large enough
-       to accommodate the newsize.  If the newsize is 16 smaller than the
-       current size, then proceed with the realloc() to shrink the list.
+       to accommodate the newsize.  If the newsize falls lower than half
+       the allocated size, then proceed with the realloc() to shrink.
     */
-    if (self->allocated >= newsize &&
-        Py_SIZE(self) < newsize + 16 &&
-        self->ob_item != NULL)
-    {
+    if (allocated >= newsize && newsize >= (allocated >> 1)) {
+        assert(self->ob_item != NULL || newsize == 0);
         Py_SIZE(self) = newsize;
         self->nbits = nbits;
         return 0;
     }
 
-    if (newsize >= Py_SIZE(self) + 65536)
-        /* Don't overallocate when the size increase is very large. */
-        _new_size = newsize;
-    else
-        /* This over-allocates proportional to the bitarray size, making
-           room for additional growth.  The over-allocation is mild, but is
-           enough to give linear-time amortized behavior over a long
-           sequence of appends() in the presence of a poorly-performing
-           system realloc().
+    new_allocated = (size_t) newsize;
+    if (newsize < Py_SIZE(self) + 65536)
+        /* Over-allocate unless the size increase is very large.
+           This over-allocates proportional to the bitarray size, making
+           room for additional growth.
            The growth pattern is:  0, 4, 8, 16, 25, 34, 44, 54, 65, 77, ...
            Note, the pattern starts out the same as for lists but then
            grows at a smaller rate so that larger bitarrays only overallocate
            by about 1/16th -- this is done because bitarrays are assumed
            to be memory critical.
         */
-        _new_size = (newsize >> 4) + (Py_SIZE(self) < 8 ? 3 : 7) + newsize;
+        new_allocated += (newsize >> 4) + (newsize < 8 ? 3 : 7);
 
-    self->ob_item = PyMem_Realloc(self->ob_item, _new_size);
+    if (newsize == 0)
+        new_allocated = 0;
+    self->ob_item = PyMem_Realloc(self->ob_item, new_allocated);
     if (self->ob_item == NULL) {
         PyErr_NoMemory();
         return -1;
     }
     Py_SIZE(self) = newsize;
-    self->allocated = _new_size;
+    self->allocated = new_allocated;
     self->nbits = nbits;
     return 0;
 }
@@ -200,7 +171,7 @@ newbitarrayobject(PyTypeObject *type, idx_t nbits, int endian)
         obj->ob_item = NULL;
     }
     else {
-        obj->ob_item = PyMem_Malloc((size_t) nbytes);
+        obj->ob_item = (char *) PyMem_Malloc((size_t) nbytes);
         if (obj->ob_item == NULL) {
             PyObject_Del(obj);
             PyErr_NoMemory();
@@ -234,25 +205,34 @@ copy_n(bitarrayobject *self, idx_t a,
     assert(0 <= n && n <= self->nbits && n <= other->nbits);
     assert(0 <= a && a <= self->nbits - n);
     assert(0 <= b && b <= other->nbits - n);
+    if (n == 0)
+        return;
 
-    /* XXX
+    /* When the start positions are at byte positions, we can copy whole
+       bytes using memmove, and copy the remaining few bits individually.
+       Note that the order of these two operations matters when copying
+       self to self. */
     if (self->endian == other->endian && a % 8 == 0 && b % 8 == 0 && n >= 8)
     {
-        Py_ssize_t bytes;
-        idx_t bits;
+        const Py_ssize_t bytes = (Py_ssize_t) n / 8;
+        const idx_t bits = bytes * 8;
+
+        assert(bits <= n && n < bits + 8);
+        if (a <= b)
+            memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes);
+
+        if (n != bits)
+            copy_n(self, bits + a, other, bits + b, n - bits);
+
+        if (a > b)
+            memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes);
 
-        bytes = n / 8;
-        bits = 8 * bytes;
-        copy_n(self, bits + a, other, bits + b, n - bits);
-        memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes);
         return;
     }
-    */
 
-    /* the different type of looping is only relevant when other and self
-       are the same object, i.e. when copying a piece of an bitarrayobject
-       onto itself */
-    if (a < b) {
+    /* The different type of looping is only relevant copying self to self,
+       i.e. when copying a piece of an bitarrayobject onto itself. */
+    if (a <= b) {
         for (i = 0; i < n; i++)             /* loop forward (delete) */
             setbit(self, i + a, GETBIT(other, i + b));
     }
@@ -367,6 +347,8 @@ bitwise(bitarrayobject *self, PyObject *arg, enum op_type oper)
         for (i = 0; i < Py_SIZE(self); i++)
             self->ob_item[i] ^= other->ob_item[i];
         break;
+    default:  /* should never happen */
+        return -1;
     }
     return 0;
 }
@@ -443,16 +425,37 @@ static int bitcount_lookup[256] = {
 
 /* returns number of 1 bits */
 static idx_t
-count(bitarrayobject *self)
+count(bitarrayobject *self, idx_t start, idx_t stop)
 {
-    Py_ssize_t i;
-    idx_t res = 0;
+    Py_ssize_t byte_start, byte_stop, j;
+    idx_t i, res = 0;
     unsigned char c;
 
-    setunused(self);
-    for (i = 0; i < Py_SIZE(self); i++) {
-        c = self->ob_item[i];
-        res += bitcount_lookup[c];
+    assert(0 <= start && start <= self->nbits);
+    assert(0 <= stop && stop <= self->nbits);
+    assert(BYTES(stop) <= Py_SIZE(self));
+
+    if (self->nbits == 0 || start >= stop)
+        return 0;
+
+    if (stop >= start + 8) {
+        byte_start = BYTES(start);
+        byte_stop = stop / 8;
+        for (i = start; i < byte_start * 8; i++)
+            if (GETBIT(self, i))
+                res++;
+        for (j = byte_start; j < byte_stop; j++) {
+            c = self->ob_item[j];
+            res += bitcount_lookup[c];
+        }
+        for (i = byte_stop * 8; i < stop; i++)
+            if (GETBIT(self, i))
+                res++;
+    }
+    else {
+        for (i = start; i < stop; i++)
+            if (GETBIT(self, i))
+                res++;
     }
     return res;
 }
@@ -465,16 +468,14 @@ findfirst(bitarrayobject *self, int vi, idx_t start, idx_t stop)
     idx_t i;
     char c;
 
-    if (Py_SIZE(self) == 0)
-        return -1;
-    if (start < 0 || start > self->nbits)
-        start = 0;
-    if (stop < 0 || stop > self->nbits)
-        stop = self->nbits;
-    if (start >= stop)
+    assert(0 <= start && start <= self->nbits);
+    assert(0 <= stop && stop <= self->nbits);
+    assert(BYTES(stop) <= Py_SIZE(self));
+
+    if (self->nbits == 0 || start >= stop)
         return -1;
 
-    if (stop > start + 8) {
+    if (stop >= start + 8) {
         /* seraching for 1 means: break when byte is not 0x00
            searching for 0 means: break when byte is not 0xff */
         c = vi ? 0x00 : 0xff;
@@ -484,10 +485,6 @@ findfirst(bitarrayobject *self, int vi, idx_t start, idx_t stop)
             if (c ^ self->ob_item[j])
                 break;
 
-        if (j == Py_SIZE(self))
-            j--;
-        assert(0 <= j && j < Py_SIZE(self));
-
         if (start < BITS(j))
             start = BITS(j);
     }
@@ -500,8 +497,8 @@ findfirst(bitarrayobject *self, int vi, idx_t start, idx_t stop)
     return -1;
 }
 
-/* search for the first occurrence bitarray xa (in self), starting at p,
-   and return its position (-1 when not found)
+/* search for the first occurrence of bitarray xa (in self), starting at p,
+   and return its position (or -1 when not found)
 */
 static idx_t
 search(bitarrayobject *self, bitarrayobject *xa, idx_t p)
@@ -545,7 +542,7 @@ append_item(bitarrayobject *self, PyObject *item)
 static PyObject *
 unpack(bitarrayobject *self, char zero, char one)
 {
-    PyObject *res;
+    PyObject *result;
     Py_ssize_t i;
     char *str;
 
@@ -553,7 +550,7 @@ unpack(bitarrayobject *self, char zero, char one)
         PyErr_SetString(PyExc_OverflowError, "bitarray too large to unpack");
         return NULL;
     }
-    str = PyMem_Malloc((size_t) self->nbits);
+    str = (char *) PyMem_Malloc((size_t) self->nbits);
     if (str == NULL) {
         PyErr_NoMemory();
         return NULL;
@@ -561,24 +558,29 @@ unpack(bitarrayobject *self, char zero, char one)
     for (i = 0; i < self->nbits; i++) {
         *(str + i) = GETBIT(self, i) ? one : zero;
     }
-    res = PyString_FromStringAndSize(str, (Py_ssize_t) self->nbits);
+    result = PyBytes_FromStringAndSize(str, (Py_ssize_t) self->nbits);
     PyMem_Free((void *) str);
-    return res;
+    return result;
 }
 
 static int
 extend_bitarray(bitarrayobject *self, bitarrayobject *other)
 {
     idx_t n_sum;
+    idx_t n_other_bits;
 
     if (other->nbits == 0)
         return 0;
 
+    /* Note that other may be self.  Thus we take the size before we resize,
+       ensuring we only copy the right parts of the array. */
+    n_other_bits = other->nbits;
     n_sum = self->nbits + other->nbits;
+
     if (resize(self, n_sum) < 0)
         return -1;
 
-    copy_n(self, n_sum - other->nbits, other, 0, other->nbits);
+    copy_n(self, n_sum - n_other_bits, other, 0, n_other_bits);
     return 0;
 }
 
@@ -649,30 +651,30 @@ extend_tuple(bitarrayobject *self, PyObject *tuple)
     return 0;
 }
 
-/* extend_string(): extend the bitarray from a string, where each whole
-   characters is converted to a single bit
+/* extend_bytes(): extend the bitarray from a PyBytes object, where each
+   whole character is converted to a single bit
 */
-enum conv_tp {
+enum conv_t {
     STR_01,    /*  '0' -> 0    '1'  -> 1   no other characters allowed */
     STR_RAW,   /*  0x00 -> 0   other -> 1                              */
 };
 
 static int
-extend_string(bitarrayobject *self, PyObject *string, enum conv_tp conv)
+extend_bytes(bitarrayobject *self, PyObject *bytes, enum conv_t conv)
 {
     Py_ssize_t strlen, i;
     char c, *str;
     int vi = 0;
 
-    assert(PyString_Check(string));
-    strlen = PyString_Size(string);
+    assert(PyBytes_Check(bytes));
+    strlen = PyBytes_Size(bytes);
     if (strlen == 0)
         return 0;
 
     if (resize(self, self->nbits + strlen) < 0)
         return -1;
 
-    str = PyString_AsString(string);
+    str = PyBytes_AsString(bytes);
 
     for (i = 0; i < strlen; i++) {
         c = *(str + i);
@@ -691,6 +693,8 @@ extend_string(bitarrayobject *self, PyObject *string, enum conv_tp conv)
         case STR_RAW:
             vi = c ? 1 : 0;
             break;
+        default:  /* should never happen */
+            return -1;
         }
         setbit(self, self->nbits - strlen + i, vi);
     }
@@ -698,20 +702,20 @@ extend_string(bitarrayobject *self, PyObject *string, enum conv_tp conv)
 }
 
 static int
-extend_rawstring(bitarrayobject *self, PyObject *string)
+extend_rawbytes(bitarrayobject *self, PyObject *bytes)
 {
     Py_ssize_t strlen;
     char *str;
 
-    assert(PyString_Check(string) && self->nbits % 8 == 0);
-    strlen = PyString_Size(string);
+    assert(PyBytes_Check(bytes) && self->nbits % 8 == 0);
+    strlen = PyBytes_Size(bytes);
     if (strlen == 0)
         return 0;
 
     if (resize(self, self->nbits + BITS(strlen)) < 0)
         return -1;
 
-    str = PyString_AsString(string);
+    str = PyBytes_AsString(bytes);
     memcpy(self->ob_item + (Py_SIZE(self) - strlen), str, strlen);
     return 0;
 }
@@ -732,18 +736,16 @@ extend_dispatch(bitarrayobject *self, PyObject *obj)
     if (PyTuple_Check(obj))                                  /* tuple */
         return extend_tuple(self, obj);
 
-    if (PyString_Check(obj))                                 /* str01 */
-        return extend_string(self, obj, STR_01);
+    if (PyBytes_Check(obj))                              /* string 01 */
+        return extend_bytes(self, obj, STR_01);
 
-#ifdef IS_PY3K
-    if (PyUnicode_Check(obj)) {                               /* str01 */
-        PyObject *string;
-        string = PyUnicode_AsEncodedString(obj, NULL, NULL);
-        ret = extend_string(self, string, STR_01);
-        Py_DECREF(string);
+    if (PyUnicode_Check(obj)) {                         /* unicode 01 */
+        PyObject *bytes;
+        bytes = PyUnicode_AsEncodedString(obj, NULL, NULL);
+        ret = extend_bytes(self, bytes, STR_01);
+        Py_DECREF(bytes);
         return ret;
     }
-#endif
 
     if (PyIter_Check(obj))                                    /* iter */
         return extend_iter(self, obj);
@@ -766,7 +768,7 @@ extend_dispatch(bitarrayobject *self, PyObject *obj)
 #ifdef IS_PY3K
 #define IS_INDEX(x)  (PyLong_Check(x) || PyIndex_Check(x))
 #define IS_INT_OR_BOOL(x)  (PyBool_Check(x) || PyLong_Check(x))
-#else
+#else  /* Py 2 */
 #define IS_INDEX(x)  (PyInt_Check(x) || PyLong_Check(x) || PyIndex_Check(x))
 #define IS_INT_OR_BOOL(x)  (PyBool_Check(x) || PyInt_Check(x) || \
                                                PyLong_Check(x))
@@ -804,6 +806,19 @@ IntBool_AsInt(PyObject *v)
     return (int) x;
 }
 
+/* Normalize index (which may be negative), such that 0 <= i <= n */
+static void
+normalize_index(idx_t n, idx_t *i)
+{
+    if (*i < 0) {
+        *i += n;
+        if (*i < 0)
+            *i = 0;
+    }
+    if (*i > n)
+        *i = n;
+}
+
 /* Extract a slice index from a PyInt or PyLong or an object with the
    nb_index slot defined, and store in *i.
    However, this function returns -1 on error and 0 on success.
@@ -943,18 +958,24 @@ Return a copy of the bitarray.");
 static PyObject *
 bitarray_count(bitarrayobject *self, PyObject *args)
 {
-    idx_t n1;
+    idx_t n1, start = 0, stop = self->nbits;
     long x = 1;
 
-    if (!PyArg_ParseTuple(args, "|i:count", &x))
+    if (!PyArg_ParseTuple(args, "|iLL:count", &x, &start, &stop))
         return NULL;
 
-    n1 = count(self);
-    return PyLong_FromLongLong(x ? n1 : (self->nbits - n1));
+    normalize_index(self->nbits, &start);
+    normalize_index(self->nbits, &stop);
+
+    if (self->nbits == 0 || start >= stop)
+        return PyLong_FromLongLong(0);
+
+    n1 = count(self, start, stop);
+    return PyLong_FromLongLong(x ? n1 : (stop - start - n1));
 }
 
 PyDoc_STRVAR(count_doc,
-"count([value]) -> int\n\
+"count(value=True, start=0, stop=<end of array>, /) -> int\n\
 \n\
 Return number of occurrences of value (defaults to True) in the bitarray.");
 
@@ -963,7 +984,7 @@ static PyObject *
 bitarray_index(bitarrayobject *self, PyObject *args)
 {
     PyObject *x;
-    idx_t i, start = 0, stop = -1;
+    idx_t i, start = 0, stop = self->nbits;
     long vi;
 
     if (!PyArg_ParseTuple(args, "O|LL:index", &x, &start, &stop))
@@ -973,6 +994,9 @@ bitarray_index(bitarrayobject *self, PyObject *args)
     if (vi < 0)
         return NULL;
 
+    normalize_index(self->nbits, &start);
+    normalize_index(self->nbits, &stop);
+
     i = findfirst(self, vi, start, stop);
     if (i < 0) {
         PyErr_SetString(PyExc_ValueError, "index(x): x not in bitarray");
@@ -982,7 +1006,7 @@ bitarray_index(bitarrayobject *self, PyObject *args)
 }
 
 PyDoc_STRVAR(index_doc,
-"index(value, [start, [stop]]) -> int\n\
+"index(value, start=0, stop=<end of array>, /) -> int\n\
 \n\
 Return index of the first occurrence of bool(value) in the bitarray.\n\
 Raises ValueError if the value is not present.");
@@ -997,7 +1021,7 @@ bitarray_extend(bitarrayobject *self, PyObject *obj)
 }
 
 PyDoc_STRVAR(extend_doc,
-"extend(object)\n\
+"extend(iterable, /)\n\
 \n\
 Append bits to the end of the bitarray.  The objects which can be passed\n\
 to this method are the same iterable objects which can given to a bitarray\n\
@@ -1015,7 +1039,7 @@ bitarray_contains(bitarrayobject *self, PyObject *x)
         vi = IntBool_AsInt(x);
         if (vi < 0)
             return NULL;
-        res = findfirst(self, vi, 0, -1) >= 0;
+        res = findfirst(self, vi, 0, self->nbits) >= 0;
     }
     else if (bitarray_Check(x)) {
         res = search(self, (bitarrayobject *) x, 0) >= 0;
@@ -1028,10 +1052,10 @@ bitarray_contains(bitarrayobject *self, PyObject *x)
 }
 
 PyDoc_STRVAR(contains_doc,
-"__contains__(x) -> bool\n\
+"__contains__(value, /) -> bool\n\
 \n\
-Return True if bitarray contains x, False otherwise.\n\
-The value x may be a boolean (or integer between 0 and 1), or a bitarray.");
+Return True if bitarray contains value, False otherwise.\n\
+The value may be a boolean (or integer between 0 and 1), or a bitarray.");
 
 
 static PyObject *
@@ -1043,7 +1067,7 @@ bitarray_search(bitarrayobject *self, PyObject *args)
     bitarrayobject *xa;
     idx_t p;
 
-    if (!PyArg_ParseTuple(args, "O|" PY_SSIZE_T_FMT ":_search", &x, &limit))
+    if (!PyArg_ParseTuple(args, "O|n:_search", &x, &limit))
         return NULL;
 
     if (!bitarray_Check(x)) {
@@ -1081,10 +1105,10 @@ bitarray_search(bitarrayobject *self, PyObject *args)
 }
 
 PyDoc_STRVAR(search_doc,
-"search(bitarray, [limit]) -> list\n\
+"search(bitarray, limit=<none>, /) -> list\n\
 \n\
-Searches for the given a bitarray in self, and returns the start positions\n\
-where bitarray matches self as a list.\n\
+Searches for the given bitarray in self, and return the list of start\n\
+positions.\n\
 The optional argument limits the number of search results to the integer\n\
 specified.  By default, all search results are returned.");
 
@@ -1126,7 +1150,7 @@ bitarray_endian(bitarrayobject *self)
 }
 
 PyDoc_STRVAR(endian_doc,
-"endian() -> string\n\
+"endian() -> str\n\
 \n\
 Return the bit endianness as a string (either 'little' or 'big').");
 
@@ -1141,7 +1165,7 @@ bitarray_append(bitarrayobject *self, PyObject *v)
 }
 
 PyDoc_STRVAR(append_doc,
-"append(item)\n\
+"append(item, /)\n\
 \n\
 Append the value bool(item) to the end of the bitarray.");
 
@@ -1149,7 +1173,7 @@ Append the value bool(item) to the end of the bitarray.");
 static PyObject *
 bitarray_all(bitarrayobject *self)
 {
-    if (findfirst(self, 0, 0, -1) >= 0)
+    if (findfirst(self, 0, 0, self->nbits) >= 0)
         Py_RETURN_FALSE;
     else
         Py_RETURN_TRUE;
@@ -1164,7 +1188,7 @@ Returns True when all bits in the array are True.");
 static PyObject *
 bitarray_any(bitarrayobject *self)
 {
-    if (findfirst(self, 1, 0, -1) >= 0)
+    if (findfirst(self, 1, 0, self->nbits) >= 0)
         Py_RETURN_TRUE;
     else
         Py_RETURN_FALSE;
@@ -1190,14 +1214,14 @@ bitarray_reduce(bitarrayobject *self)
     }
     /* the first byte indicates the number of unused bits at the end, and
        the rest of the bytes consist of the raw binary data */
-    str = PyMem_Malloc(Py_SIZE(self) + 1);
+    str = (char *) PyMem_Malloc(Py_SIZE(self) + 1);
     if (str == NULL) {
         PyErr_NoMemory();
         goto error;
     }
     str[0] = (char) setunused(self);
     memcpy(str + 1, self->ob_item, Py_SIZE(self));
-    repr = PyString_FromStringAndSize(str, Py_SIZE(self) + 1);
+    repr = PyBytes_FromStringAndSize(str, Py_SIZE(self) + 1);
     if (repr == NULL)
         goto error;
     PyMem_Free((void *) str);
@@ -1267,7 +1291,7 @@ PyDoc_STRVAR(fill_doc,
 "fill() -> int\n\
 \n\
 Adds zeros to the end of the bitarray, such that the length of the bitarray\n\
-is not a multiple of 8.  Returns the number of bits added (0..7).");
+will be a multiple of 8.  Returns the number of bits added (0..7).");
 
 
 static PyObject *
@@ -1313,7 +1337,7 @@ bitarray_setall(bitarrayobject *self, PyObject *v)
 }
 
 PyDoc_STRVAR(setall_doc,
-"setall(value)\n\
+"setall(value, /)\n\
 \n\
 Set all bits in the bitarray to bool(value).");
 
@@ -1329,7 +1353,7 @@ bitarray_sort(bitarrayobject *self, PyObject *args, PyObject *kwds)
         return NULL;
 
     n = self->nbits;
-    n1 = count(self);
+    n1 = count(self, 0, self->nbits);
 
     if (reverse) {
         setrange(self, 0, n1, 1);
@@ -1349,6 +1373,10 @@ PyDoc_STRVAR(sort_doc,
 Sort the bits in the array (in-place).");
 
 
+/* since too many details differ between the Python 2 and 3 implementation
+   of this function, we choose to have two separate function implementation,
+   even though this means some of the code is duplicated in the two versions
+*/
 #ifdef IS_PY3K
 static PyObject *
 bitarray_fromfile(bitarrayobject *self, PyObject *args)
@@ -1387,7 +1415,6 @@ bitarray_fromfile(bitarrayobject *self, PyObject *args)
             Py_DECREF(reader);
             return NULL;
         }
-
         nread = PyBytes_Size(result);
 
         t = self->nbits;
@@ -1402,7 +1429,6 @@ bitarray_fromfile(bitarrayobject *self, PyObject *args)
             Py_DECREF(reader);
             return NULL;
         }
-
         memcpy(self->ob_item + (Py_SIZE(self) - nread),
                PyBytes_AS_STRING(result), nread);
 
@@ -1417,10 +1443,9 @@ bitarray_fromfile(bitarrayobject *self, PyObject *args)
 
     Py_DECREF(rargs);
     Py_DECREF(reader);
-
     Py_RETURN_NONE;
 }
-#else
+#else  /* Python 2 */
 static PyObject *
 bitarray_fromfile(bitarrayobject *self, PyObject *args)
 {
@@ -1431,7 +1456,7 @@ bitarray_fromfile(bitarrayobject *self, PyObject *args)
     idx_t t, p;
     long cur;
 
-    if (!PyArg_ParseTuple(args, "O|" PY_SSIZE_T_FMT ":fromfile", &f, &nbytes))
+    if (!PyArg_ParseTuple(args, "O|n:fromfile", &f, &nbytes))
         return NULL;
 
     fp = PyFile_AsFile(f);
@@ -1484,13 +1509,16 @@ bitarray_fromfile(bitarrayobject *self, PyObject *args)
 #endif
 
 PyDoc_STRVAR(fromfile_doc,
-"fromfile(f, [n])\n\
+"fromfile(f, n=<till EOF>, /)\n\
 \n\
 Read n bytes from the file object f and append them to the bitarray\n\
 interpreted as machine values.  When n is omitted, as many bytes are\n\
 read until EOF is reached.");
 
 
+/* since too many details differ between the Python 2 and 3 implementation
+   of this function, we choose to have two separate function implementation
+*/
 #ifdef IS_PY3K
 static PyObject *
 bitarray_tofile(bitarrayobject *self, PyObject *f)
@@ -1528,7 +1556,7 @@ bitarray_tofile(bitarrayobject *self, PyObject *f)
     Py_DECREF(result);
     Py_RETURN_NONE;
 }
-#else
+#else  /* Python 2 */
 static PyObject *
 bitarray_tofile(bitarrayobject *self, PyObject *f)
 {
@@ -1555,7 +1583,7 @@ bitarray_tofile(bitarrayobject *self, PyObject *f)
 #endif
 
 PyDoc_STRVAR(tofile_doc,
-"tofile(f)\n\
+"tofile(f, /)\n\
 \n\
 Write all bits (as machine values) to the file object f.\n\
 When the length of the bitarray is not a multiple of 8,\n\
@@ -1591,19 +1619,25 @@ use the extend method.");
 
 
 static PyObject *
-bitarray_frombytes(bitarrayobject *self, PyObject *string)
+bitarray_frombytes(bitarrayobject *self, PyObject *bytes)
 {
     idx_t t, p;
 
-    if (!PyString_Check(string)) {
+    if (!PyBytes_Check(bytes)) {
         PyErr_SetString(PyExc_TypeError, "byte string expected");
         return NULL;
     }
+
+    /* Before we extend the raw bytes with the new data, we need to store
+       the current size and pad the last byte, as our bitarray size might
+       not be a multiple of 8.  After extending, we remove the padding
+       bits again.  The same is done in bitarray_fromfile().
+    */
     t = self->nbits;
     p = setunused(self);
     self->nbits += p;
 
-    if (extend_rawstring(self, string) < 0)
+    if (extend_rawbytes(self, bytes) < 0)
         return NULL;
     if (delete_n(self, t, p) < 0)
         return NULL;
@@ -1611,7 +1645,7 @@ bitarray_frombytes(bitarrayobject *self, PyObject *string)
 }
 
 PyDoc_STRVAR(frombytes_doc,
-"frombytes(bytes)\n\
+"frombytes(bytes, /)\n\
 \n\
 Append from a byte string, interpreted as machine values.");
 
@@ -1620,7 +1654,7 @@ static PyObject *
 bitarray_tobytes(bitarrayobject *self)
 {
     setunused(self);
-    return PyString_FromStringAndSize(self->ob_item, Py_SIZE(self));
+    return PyBytes_FromStringAndSize(self->ob_item, Py_SIZE(self));
 }
 
 PyDoc_STRVAR(tobytes_doc,
@@ -1635,9 +1669,12 @@ static PyObject *
 bitarray_to01(bitarrayobject *self)
 {
 #ifdef IS_PY3K
-    PyObject *string, *unpacked;
+    PyObject *string;
+    PyObject *unpacked;
 
     unpacked = unpack(self, '0', '1');
+    if (unpacked == NULL)
+        return NULL;
     string = PyUnicode_FromEncodedObject(unpacked, NULL, NULL);
     Py_DECREF(unpacked);
     return string;
@@ -1647,7 +1684,7 @@ bitarray_to01(bitarrayobject *self)
 }
 
 PyDoc_STRVAR(to01_doc,
-"to01() -> string\n\
+"to01() -> str\n\
 \n\
 Return a string containing '0's and '1's, representing the bits in the\n\
 bitarray object.\n\
@@ -1677,20 +1714,20 @@ See also the pack method.");
 
 
 static PyObject *
-bitarray_pack(bitarrayobject *self, PyObject *string)
+bitarray_pack(bitarrayobject *self, PyObject *bytes)
 {
-    if (!PyString_Check(string)) {
+    if (!PyBytes_Check(bytes)) {
         PyErr_SetString(PyExc_TypeError, "byte string expected");
         return NULL;
     }
-    if (extend_string(self, string, STR_RAW) < 0)
+    if (extend_bytes(self, bytes, STR_RAW) < 0)
         return NULL;
 
     Py_RETURN_NONE;
 }
 
 PyDoc_STRVAR(pack_doc,
-"pack(bytes)\n\
+"pack(bytes, /)\n\
 \n\
 Extend the bitarray from a byte string, where each characters corresponds to\n\
 a single bit.  The character b'\\x00' maps to bit 0 and all other characters\n\
@@ -1703,29 +1740,30 @@ transfer of data between bitarray objects to other python objects\n\
 static PyObject *
 bitarray_repr(bitarrayobject *self)
 {
-    PyObject *string;
+    PyObject *bytes;
+    PyObject *unpacked;
 #ifdef IS_PY3K
     PyObject *decoded;
 #endif
 
     if (self->nbits == 0) {
-        string = PyString_FromString("bitarray()");
-        if (string == NULL)
-            return NULL;
+        bytes = PyBytes_FromString("bitarray()");
     }
     else {
-        string = PyString_FromString("bitarray(\'");
-        if (string == NULL)
+        bytes = PyBytes_FromString("bitarray(\'");
+        unpacked = unpack(self, '0', '1');
+        if (unpacked == NULL)
             return NULL;
-        PyString_ConcatAndDel(&string, unpack(self, '0', '1'));
-        PyString_ConcatAndDel(&string, PyString_FromString("\')"));
+        PyBytes_ConcatAndDel(&bytes, unpacked);
+        PyBytes_ConcatAndDel(&bytes, PyBytes_FromString("\')"));
     }
 #ifdef IS_PY3K
-    decoded = PyUnicode_FromEncodedObject(string, NULL, NULL);
-    Py_DECREF(string);
-    string = decoded;
+    decoded = PyUnicode_FromEncodedObject(bytes, NULL, NULL);
+    Py_DECREF(bytes);
+    return decoded;
+#else
+    return bytes;  /* really a string in Python 2 */
 #endif
-    return string;
 }
 
 
@@ -1754,7 +1792,7 @@ bitarray_insert(bitarrayobject *self, PyObject *args)
 }
 
 PyDoc_STRVAR(insert_doc,
-"insert(i, item)\n\
+"insert(i, item, /)\n\
 \n\
 Insert bool(item) into the bitarray before position i.");
 
@@ -1787,7 +1825,7 @@ bitarray_pop(bitarrayobject *self, PyObject *args)
 }
 
 PyDoc_STRVAR(pop_doc,
-"pop([i]) -> item\n\
+"pop(index=-1, /) -> item\n\
 \n\
 Return the i-th (default last) element and delete it from the bitarray.\n\
 Raises IndexError if bitarray is empty or index is out of range.");
@@ -1803,7 +1841,7 @@ bitarray_remove(bitarrayobject *self, PyObject *v)
     if (vi < 0)
         return NULL;
 
-    i = findfirst(self, vi, 0, -1);
+    i = findfirst(self, vi, 0, self->nbits);
     if (i < 0) {
         PyErr_SetString(PyExc_ValueError, "remove(x): x not in bitarray");
         return NULL;
@@ -1814,7 +1852,7 @@ bitarray_remove(bitarrayobject *self, PyObject *v)
 }
 
 PyDoc_STRVAR(remove_doc,
-"remove(item)\n\
+"remove(item, /)\n\
 \n\
 Remove the first occurrence of bool(item) in the bitarray.\n\
 Raises ValueError if item is not present.");
@@ -2103,12 +2141,43 @@ BITWISE_IFUNC(xor)
 
 /******************* variable length encoding and decoding ***************/
 
+static int
+check_codedict(PyObject *codedict)
+{
+    PyObject *key, *value;
+    Py_ssize_t pos = 0;
+
+    if (!PyDict_Check(codedict)) {
+        PyErr_SetString(PyExc_TypeError, "dict expected");
+        return -1;
+    }
+    if (PyDict_Size(codedict) == 0) {
+        PyErr_SetString(PyExc_ValueError, "prefix code dict empty");
+        return -1;
+    }
+    while (PyDict_Next(codedict, &pos, &key, &value)) {
+        if (!bitarray_Check(value)) {
+            PyErr_SetString(PyExc_TypeError,
+                            "bitarray expected for dict value");
+            return -1;
+        }
+        if (((bitarrayobject *) value)->nbits == 0) {
+            PyErr_SetString(PyExc_ValueError, "non-empty bitarray expected");
+            return -1;
+        }
+    }
+    return 0;
+}
+
 static PyObject *
 bitarray_encode(bitarrayobject *self, PyObject *args)
 {
     PyObject *codedict, *iterable, *iter, *symbol, *bits;
 
-    if (!PyArg_ParseTuple(args, "OO:_encode", &codedict, &iterable))
+    if (!PyArg_ParseTuple(args, "OO:encode", &codedict, &iterable))
+        return NULL;
+
+    if (check_codedict(codedict) < 0)
         return NULL;
 
     iter = PyObject_GetIter(iterable);
@@ -2121,7 +2190,8 @@ bitarray_encode(bitarrayobject *self, PyObject *args)
         bits = PyDict_GetItem(codedict, symbol);
         Py_DECREF(symbol);
         if (bits == NULL) {
-            PyErr_SetString(PyExc_ValueError, "symbol not in prefix code");
+            PyErr_SetString(PyExc_ValueError,
+                            "symbol not defined in prefix code");
             goto error;
         }
         if (extend_bitarray(self, (bitarrayobject *) bits) < 0)
@@ -2137,72 +2207,194 @@ bitarray_encode(bitarrayobject *self, PyObject *args)
 }
 
 PyDoc_STRVAR(encode_doc,
-"_encode(code, iterable)\n\
+"encode(code, iterable, /)\n\
 \n\
-like the encode method without code checking");
+Given a prefix code (a dict mapping symbols to bitarrays),\n\
+iterate over the iterable object with symbols, and extend the bitarray\n\
+with the corresponding bitarray for each symbols.");
 
 
-/* return the leave node resulting from traversing the (binary) tree,
-   or, when the iteration is finished, NULL
-*/
-static PyObject *
-tree_traverse(bitarrayobject *self, idx_t *indexp, PyObject *tree)
+/* Binary tree definition */
+typedef struct _bin_node
 {
-    PyObject *subtree;
-    long vi;
+    struct _bin_node *child[2];
+    PyObject *symbol;
+} binode;
 
-    if (*indexp == self->nbits)  /* stop iterator */
+
+static binode *
+new_binode(void)
+{
+    binode *nd;
+
+    nd = (binode *) PyMem_Malloc(sizeof(binode));
+    if (nd == NULL) {
+        PyErr_NoMemory();
         return NULL;
+    }
+    nd->child[0] = NULL;
+    nd->child[1] = NULL;
+    nd->symbol = NULL;
+    return nd;
+}
 
-    vi = GETBIT(self, *indexp);
-    (*indexp)++;
-    subtree = PyList_GetItem(tree, vi);
+static void
+delete_binode_tree(binode *tree)
+{
+    if (tree == NULL)
+        return;
 
-    if (PyList_Check(subtree) && PyList_Size(subtree) == 2)
-        return tree_traverse(self, indexp, subtree);
-    else
-        return subtree;
+    delete_binode_tree(tree->child[0]);
+    delete_binode_tree(tree->child[1]);
+    PyMem_Free(tree);
 }
 
-#define IS_EMPTY_LIST(x)  (PyList_Check(x) && PyList_Size(x) == 0)
+static int
+insert_symbol(binode *tree, bitarrayobject *ba, PyObject *symbol)
+{
+    binode *nd = tree, *prev;
+    Py_ssize_t i;
+    int k;
+
+    for (i = 0; i < ba->nbits; i++) {
+        k = GETBIT(ba, i);
+        prev = nd;
+        nd = nd->child[k];
+
+        /* we cannot have already a symbol when branching to the new leaf */
+        if (nd && nd->symbol)
+            goto ambiguity;
 
+        if (!nd) {
+            nd = new_binode();
+            if (nd == NULL)
+                return -1;
+            prev->child[k] = nd;
+        }
+    }
+    /* the new leaf node cannot already have a symbol or children */
+    if (nd->symbol || nd->child[0] || nd->child[1])
+        goto ambiguity;
+
+    nd->symbol = symbol;
+    return 0;
+
+ ambiguity:
+    PyErr_SetString(PyExc_ValueError, "prefix code ambiguous");
+    return -1;
+}
+
+static binode *
+make_tree(PyObject *codedict)
+{
+    binode *tree;
+    PyObject *symbol, *array;
+    Py_ssize_t pos = 0;
+
+    tree = new_binode();
+    if (tree == NULL)
+        return NULL;
+
+    while (PyDict_Next(codedict, &pos, &symbol, &array)) {
+        if (insert_symbol(tree, (bitarrayobject *) array, symbol) < 0) {
+            delete_binode_tree(tree);
+            return NULL;
+        }
+    }
+    return tree;
+}
+
+/*
+  Traverse tree using the branches corresponding to the bitarray `ba`,
+  starting at *indexp, and return the symbol at the leaf node (or NULL
+  when the end of the bitarray has been reached).
+*/
 static PyObject *
-bitarray_decode(bitarrayobject *self, PyObject *tree)
+traverse_tree(binode *tree, bitarrayobject *ba, idx_t *indexp)
 {
-    PyObject *symbol, *list;
-    idx_t index = 0;
+    binode *nd = tree;
+    int k;
 
+    while (*indexp < ba->nbits) {
+        k = GETBIT(ba, *indexp);
+        (*indexp)++;
+        nd = nd->child[k];
+        if (nd == NULL) {
+            PyErr_SetString(PyExc_ValueError,
+                            "prefix code does not match data in bitarray");
+            return NULL;
+        }
+        if (nd->symbol)  /* leaf */
+            return nd->symbol;
+    }
+    if (nd != tree)
+        PyErr_SetString(PyExc_ValueError, "decoding not terminated");
+
+    return NULL;
+}
+
+static PyObject *
+bitarray_decode(bitarrayobject *self, PyObject *codedict)
+{
+    binode *tree, *nd;
+    PyObject *list;
+    Py_ssize_t i;
+    int k;
+
+    if (check_codedict(codedict) < 0)
+        return NULL;
+
+    tree = make_tree(codedict);
+    if (tree == NULL || PyErr_Occurred())
+        return NULL;
+
+    nd = tree;
     list = PyList_New(0);
-    if (list == NULL)
+    if (list == NULL) {
+        delete_binode_tree(tree);
         return NULL;
-    /* traverse binary tree and append symbols to the result list */
-    while ((symbol = tree_traverse(self, &index, tree)) != NULL) {
-        if (IS_EMPTY_LIST(symbol)) {
+    }
+    /* traverse tree (just like above) */
+    for (i = 0; i < self->nbits; i++) {
+        k = GETBIT(self, i);
+        nd = nd->child[k];
+        if (nd == NULL) {
             PyErr_SetString(PyExc_ValueError,
                             "prefix code does not match data in bitarray");
             goto error;
         }
-        if (PyList_Append(list, symbol) < 0)
-            goto error;
+        if (nd->symbol) {  /* leaf */
+            if (PyList_Append(list, nd->symbol) < 0)
+                goto error;
+            nd = tree;
+        }
     }
+    if (nd != tree) {
+        PyErr_SetString(PyExc_ValueError, "decoding not terminated");
+        goto error;
+    }
+    delete_binode_tree(tree);
     return list;
+
 error:
+    delete_binode_tree(tree);
     Py_DECREF(list);
     return NULL;
 }
 
 PyDoc_STRVAR(decode_doc,
-"_decode(tree) -> list\n\
+"decode(code, /) -> list\n\
 \n\
-Given a tree, decode the content of the bitarray and return the list of\n\
-symbols.");
+Given a prefix code (a dict mapping symbols to bitarrays),\n\
+decode the content of the bitarray and return it as a list of symbols.");
 
 /*********************** (Bitarray) Decode Iterator *********************/
 
+
 typedef struct {
     PyObject_HEAD
     bitarrayobject *bao;        /* bitarray we're searching in */
-    PyObject *tree;             /* prefix tree containing symbols */
+    binode *tree;               /* prefix tree containing symbols */
     idx_t index;                /* current index in bitarray */
 } decodeiterobject;
 
@@ -2210,30 +2402,41 @@ static PyTypeObject DecodeIter_Type;
 
 #define DecodeIter_Check(op)  PyObject_TypeCheck(op, &DecodeIter_Type)
 
+
+
 /* create a new initialized bitarray search iterator object */
 static PyObject *
-bitarray_iterdecode(bitarrayobject *self, PyObject *tree)
+bitarray_iterdecode(bitarrayobject *self, PyObject *codedict)
 {
     decodeiterobject *it;  /* iterator to be returned */
+    binode *tree;
+
+    if (check_codedict(codedict) < 0)
+        return NULL;
+
+    tree = make_tree(codedict);
+    if (tree == NULL || PyErr_Occurred())
+        return NULL;
 
     it = PyObject_GC_New(decodeiterobject, &DecodeIter_Type);
     if (it == NULL)
         return NULL;
 
+    it->tree = tree;
+
     Py_INCREF(self);
     it->bao = self;
-    Py_INCREF(tree);
-    it->tree = tree;
     it->index = 0;
     PyObject_GC_Track(it);
     return (PyObject *) it;
 }
 
 PyDoc_STRVAR(iterdecode_doc,
-"_iterdecode(tree) -> iterator\n\
+"iterdecode(code, /) -> iterator\n\
 \n\
-Given a tree, decode the content of the bitarray and iterate over the\n\
-symbols.");
+Given a prefix code (a dict mapping symbols to bitarrays),\n\
+decode the content of the bitarray and return an iterator over\n\
+the symbols.");
 
 static PyObject *
 decodeiter_next(decodeiterobject *it)
@@ -2241,14 +2444,9 @@ decodeiter_next(decodeiterobject *it)
     PyObject *symbol;
 
     assert(DecodeIter_Check(it));
-    symbol = tree_traverse(it->bao, &(it->index), it->tree);
-    if (symbol == NULL)  /* stop iteration */
-        return NULL;
-    if (IS_EMPTY_LIST(symbol)) {
-        PyErr_SetString(PyExc_ValueError,
-                        "prefix code does not match data in bitarray");
+    symbol = traverse_tree(it->tree, it->bao, &(it->index));
+    if (symbol == NULL)  /* stop iteration OR error occured */
         return NULL;
-    }
     Py_INCREF(symbol);
     return symbol;
 }
@@ -2256,9 +2454,9 @@ decodeiter_next(decodeiterobject *it)
 static void
 decodeiter_dealloc(decodeiterobject *it)
 {
+    delete_binode_tree(it->tree);
     PyObject_GC_UnTrack(it);
     Py_XDECREF(it->bao);
-    Py_XDECREF(it->tree);
     PyObject_GC_Del(it);
 }
 
@@ -2271,7 +2469,7 @@ decodeiter_traverse(decodeiterobject *it, visitproc visit, void *arg)
 
 static PyTypeObject DecodeIter_Type = {
 #ifdef IS_PY3K
-    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    PyVarObject_HEAD_INIT(NULL, 0)
 #else
     PyObject_HEAD_INIT(NULL)
     0,                                        /* ob_size */
@@ -2350,7 +2548,7 @@ bitarray_itersearch(bitarrayobject *self, PyObject *x)
 }
 
 PyDoc_STRVAR(itersearch_doc,
-"itersearch(bitarray) -> iterator\n\
+"itersearch(bitarray, /) -> iterator\n\
 \n\
 Searches for the given a bitarray in self, and return an iterator over\n\
 the start positions where bitarray matches self.");
@@ -2386,7 +2584,7 @@ searchiter_traverse(searchiterobject *it, visitproc visit, void *arg)
 
 static PyTypeObject SearchIter_Type = {
 #ifdef IS_PY3K
-    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    PyVarObject_HEAD_INIT(NULL, 0)
 #else
     PyObject_HEAD_INIT(NULL)
     0,                                        /* ob_size */
@@ -2439,11 +2637,11 @@ bitarray_methods[] = {
      copy_doc},
     {"count",        (PyCFunction) bitarray_count,       METH_VARARGS,
      count_doc},
-    {"_decode",      (PyCFunction) bitarray_decode,      METH_O,
+    {"decode",       (PyCFunction) bitarray_decode,      METH_O,
      decode_doc},
-    {"_iterdecode",  (PyCFunction) bitarray_iterdecode,  METH_O,
+    {"iterdecode",   (PyCFunction) bitarray_iterdecode,  METH_O,
      iterdecode_doc},
-    {"_encode",      (PyCFunction) bitarray_encode,      METH_VARARGS,
+    {"encode",       (PyCFunction) bitarray_encode,      METH_VARARGS,
      encode_doc},
     {"endian",       (PyCFunction) bitarray_endian,      METH_NOARGS,
      endian_doc},
@@ -2585,16 +2783,16 @@ bitarray_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
         return a;
     }
 
-    /* string */
-    if (PyString_Check(initial)) {
+    /* bytes */
+    if (PyBytes_Check(initial)) {
         Py_ssize_t strlen;
         char *str;
 
-        strlen = PyString_Size(initial);
+        strlen = PyBytes_Size(initial);
         if (strlen == 0)        /* empty string */
             return newbitarrayobject(type, 0, endian);
 
-        str = PyString_AsString(initial);
+        str = PyBytes_AsString(initial);
         if (0 <= str[0] && str[0] < 8) {
             /* when the first character is smaller than 8, it indicates the
                number of unused bits at the end, and rest of the bytes
@@ -2613,6 +2811,16 @@ bitarray_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
         }
     }
 
+#define CHECK_TYPE(type)  \
+    if (Py ## type ## _Check(initial)) {                                  \
+        PyErr_SetString(PyExc_TypeError,                                  \
+                        "cannot create bitarray from " #type " object");  \
+        return NULL;                                                      \
+    }
+CHECK_TYPE(Float)
+CHECK_TYPE(Complex)
+#undef CHECK_TYPE
+
     /* leave remaining type dispatch to the extend method */
     a = newbitarrayobject(type, 0, endian);
     if (a == NULL)
@@ -2750,7 +2958,7 @@ bitarrayiter_traverse(bitarrayiterobject *it, visitproc visit, void *arg)
 
 static PyTypeObject BitarrayIter_Type = {
 #ifdef IS_PY3K
-    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    PyVarObject_HEAD_INIT(NULL, 0)
 #else
     PyObject_HEAD_INIT(NULL)
     0,                                        /* ob_size */
@@ -2787,6 +2995,8 @@ static PyTypeObject BitarrayIter_Type = {
 
 /********************* Bitarray Buffer Interface ************************/
 #ifdef WITH_BUFFER
+
+#if PY_MAJOR_VERSION == 2
 static Py_ssize_t
 bitarray_buffer_getreadbuf(bitarrayobject *self,
                            Py_ssize_t index, const void **ptr)
@@ -2831,6 +3041,8 @@ bitarray_buffer_getcharbuf(bitarrayobject *self,
     return Py_SIZE(self);
 }
 
+#endif
+
 static int
 bitarray_getbuffer(bitarrayobject *self, Py_buffer *view, int flags)
 {
@@ -2857,19 +3069,23 @@ bitarray_releasebuffer(bitarrayobject *self, Py_buffer *view)
 }
 
 static PyBufferProcs bitarray_as_buffer = {
+#if PY_MAJOR_VERSION == 2   /* old buffer protocol */
     (readbufferproc) bitarray_buffer_getreadbuf,
     (writebufferproc) bitarray_buffer_getwritebuf,
     (segcountproc) bitarray_buffer_getsegcount,
     (charbufferproc) bitarray_buffer_getcharbuf,
+#endif
     (getbufferproc) bitarray_getbuffer,
     (releasebufferproc) bitarray_releasebuffer,
 };
+
 #endif  /* WITH_BUFFER */
+
 /************************** Bitarray Type *******************************/
 
 static PyTypeObject Bitarraytype = {
 #ifdef IS_PY3K
-    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    PyVarObject_HEAD_INIT(NULL, 0)
 #else
     PyObject_HEAD_INIT(NULL)
     0,                                        /* ob_size */
@@ -2898,7 +3114,7 @@ static PyTypeObject Bitarraytype = {
     0,                                        /* tp_as_buffer */
 #endif
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS
-#ifdef WITH_BUFFER
+#if defined(WITH_BUFFER) && PY_MAJOR_VERSION == 2
     | Py_TPFLAGS_HAVE_NEWBUFFER
 #endif
     ,                                         /* tp_flags */
@@ -2959,11 +3175,11 @@ bitdiff(PyObject *self, PyObject *args)
 }
 
 PyDoc_STRVAR(bitdiff_doc,
-"bitdiff(a, b) -> int\n\
+"bitdiff(a, b, /) -> int\n\
 \n\
 Return the difference between two bitarrays a and b.\n\
 This is function does the same as (a ^ b).count(), but is more memory\n\
-efficient, as no intermediate bitarray object gets created");
+efficient, as no intermediate bitarray object gets created.");
 
 
 static PyObject *
@@ -2978,14 +3194,14 @@ bits2bytes(PyObject *self, PyObject *v)
     if (getIndex(v, &n) < 0)
         return NULL;
     if (n < 0) {
-        PyErr_SetString(PyExc_ValueError, "positive value expected");
+        PyErr_SetString(PyExc_ValueError, "non-negative integer expected");
         return NULL;
     }
     return PyLong_FromLongLong(BYTES(n));
 }
 
 PyDoc_STRVAR(bits2bytes_doc,
-"bits2bytes(n) -> int\n\
+"bits2bytes(n, /) -> int\n\
 \n\
 Return the number of bytes necessary to store n bits.");
 
@@ -3034,6 +3250,8 @@ init_bitarray(void)
     PyObject *m;
 
     Py_TYPE(&Bitarraytype) = &PyType_Type;
+    Py_TYPE(&SearchIter_Type) = &PyType_Type;
+    Py_TYPE(&DecodeIter_Type) = &PyType_Type;
     Py_TYPE(&BitarrayIter_Type) = &PyType_Type;
 #ifdef IS_PY3K
     m = PyModule_Create(&moduledef);
diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py
index f558b8625..c24cb1054 100644
--- a/bitarray/test_bitarray.py
+++ b/bitarray/test_bitarray.py
@@ -12,10 +12,21 @@
 
 is_py3k = bool(sys.version_info[0] == 3)
 
+# imports needed inside tests
+import copy
+import pickle
+import itertools
+
+try:
+    import shelve, hashlib
+except ImportError:
+    shelve = hashlib = None
+
 if is_py3k:
     from io import StringIO
 else:
     from cStringIO import StringIO
+    import cPickle
 
 
 from bitarray import bitarray, bitdiff, bits2bytes, __version__
@@ -23,24 +34,25 @@
 
 tests = []
 
-if sys.version_info[:2] < (2, 6):
-    def next(x):
-        return x.next()
-
 
 def to_bytes(s):
     if is_py3k:
         return bytes(s.encode('latin1'))
-    elif sys.version_info[:2] >= (2, 6):
-        return bytes(s)
     else:
-        return s
+        return bytes(s)  # which is str for Python 2
+
+
+if is_py3k:
+    def unicode(*args):
+        if len(args) == 0:
+            return ''
+        return args[0]
 
 
 class Util(object):
 
-    def randombitarrays(self):
-        for n in list(range(25)) + [randint(1000, 2000)]:
+    def randombitarrays(self, start=0):
+        for n in list(range(start, 25)) + [randint(1000, 2000)]:
             a = bitarray(endian=['little', 'big'][randint(0, 1)])
             a.frombytes(os.urandom(bits2bytes(n)))
             del a[n:]
@@ -150,6 +162,8 @@ def test_bits2bytes(self):
         for arg in ['foo', [], None, {}]:
             self.assertRaises(TypeError, bits2bytes, arg)
 
+        self.assertRaises(TypeError, bits2bytes, 187.0)
+        self.assertRaises(TypeError, bits2bytes, -4.0)
         self.assertRaises(TypeError, bits2bytes)
         self.assertRaises(TypeError, bits2bytes, 1, 2)
 
@@ -185,11 +199,13 @@ def test_endian1(self):
         a = bitarray(endian='little')
         a.fromstring('A')
         self.assertEqual(a.endian(), 'little')
+        self.assert_(isinstance(a.endian(), str))
         self.check_obj(a)
 
         b = bitarray(endian='big')
         b.fromstring('A')
         self.assertEqual(b.endian(), 'big')
+        self.assert_(isinstance(a.endian(), str))
         self.check_obj(b)
 
         self.assertEqual(a.tostring(), b.tostring())
@@ -475,10 +491,8 @@ def test_setitem1(self):
         self.assertRaises(IndexError, a.__setitem__, -3, False)
 
     def test_setitem2(self):
-        for a in self.randombitarrays():
+        for a in self.randombitarrays(start=1):
             la = len(a)
-            if la == 0:
-                continue
             i = randint(0, la - 1)
             aa = a.tolist()
             ida = id(a)
@@ -512,10 +526,9 @@ def test_setitem3(self):
         self.assertRaises(IndexError, a.__setitem__, -6, 'bar')
 
     def test_setitem4(self):
-        for a in self.randombitarrays():
+        for a in self.randombitarrays(start=1):
             la = len(a)
-            if la == 0: continue
-            for dum in range(3):
+            for dum in range(50):
                 step = self.rndsliceidx(la)
                 if step == 0: step = None
                 s = slice(self.rndsliceidx(la),
@@ -571,10 +584,9 @@ def test_delitem1(self):
         self.assertRaises(IndexError, a.__delitem__, -4)
 
     def test_delitem2(self):
-        for a in self.randombitarrays():
+        for a in self.randombitarrays(start=1):
             la = len(a)
-            if la == 0: continue
-            for dum in range(10):
+            for dum in range(50):
                 step = self.rndsliceidx(la)
                 if step == 0: step = None
                 s = slice(self.rndsliceidx(la),
@@ -605,6 +617,15 @@ def test_booleanness(self):
         self.assertEqual(bool(bitarray('0')), True)
         self.assertEqual(bool(bitarray('1')), True)
 
+    def test_to01(self):
+        a = bitarray()
+        self.assertEqual(a.to01(), '')
+        self.assert_(isinstance(a.to01(), str))
+
+        a = bitarray('101')
+        self.assertEqual(a.to01(), '101')
+        self.assert_(isinstance(a.to01(), str))
+
     def test_iterate(self):
         for lst in self.randomlists():
             acc = []
@@ -633,17 +654,17 @@ def test_assignment(self):
         self.assertEqual(a, b)
 
     def test_compare(self):
-        for a in self.randombitarrays():
+        for a in self.randombitarrays(start=1):
             aa = a.tolist()
-
-            for b in self.randombitarrays():
-                bb = b.tolist()
-                self.assertEqual(a == b, aa == bb)
-                self.assertEqual(a != b, aa != bb)
-                self.assertEqual(a <= b, aa <= bb)
-                self.assertEqual(a <  b, aa <  bb)
-                self.assertEqual(a >= b, aa >= bb)
-                self.assertEqual(a >  b, aa >  bb)
+            b = a.copy()
+            b[randint(0, len(a)-1)] = randint(0, 1)
+            bb = b.tolist()
+            self.assertEqual(a == b, aa == bb)
+            self.assertEqual(a != b, aa != bb)
+            self.assertEqual(a <= b, aa <= bb)
+            self.assertEqual(a <  b, aa <  bb)
+            self.assertEqual(a >= b, aa >= bb)
+            self.assertEqual(a >  b, aa >  bb)
 
     def test_subclassing(self):
         class ExaggeratingBitarray(bitarray):
@@ -657,12 +678,10 @@ def __init__(self, data, offset):
             def __getitem__(self, i):
                 return bitarray.__getitem__(self, i - self.offset)
 
-        for a in self.randombitarrays():
-            if len(a) == 0:
-                continue
+        for a in self.randombitarrays(start=1):
             b = ExaggeratingBitarray(a, 1234)
             for i in range(len(a)):
-                self.assertEqual(a[i], b[i+1234])
+                self.assertEqual(a[i], b[i + 1234])
 
     def test_endianness1(self):
         a = bitarray(endian='little')
@@ -720,10 +739,9 @@ def test_endianness5(self):
         self.assertEqual(a.tobytes(), b.tobytes())
 
     def test_pickle(self):
-        from pickle import loads, dumps
         for v in range(3):
             for a in self.randombitarrays():
-                b = loads(dumps(a, v))
+                b = pickle.loads(pickle.dumps(a, v))
                 self.assert_(b is not a)
                 self.assertEQUAL(a, b)
 
@@ -731,16 +749,13 @@ def test_cPickle(self):
         if is_py3k:
             return
         for v in range(3):
-            from cPickle import loads, dumps
             for a in self.randombitarrays():
-                b = loads(dumps(a, v))
+                b = cPickle.loads(cPickle.dumps(a, v))
                 self.assert_(b is not a)
                 self.assertEQUAL(a, b)
 
     def test_overflow(self):
-        from platform import architecture
-
-        if architecture()[0] == '64bit':
+        if tuple.__itemsize__ == 8:
             return
 
         self.assertRaises(OverflowError, bitarray.__new__,
@@ -749,6 +764,30 @@ def test_overflow(self):
         a = bitarray(10 ** 6)
         self.assertRaises(OverflowError, a.__imul__, 17180)
 
+    def test_unicode1(self):
+        a = bitarray(unicode())
+        self.assertEqual(a, bitarray())
+
+        a = bitarray(unicode('111001'))
+        self.assertEqual(a, bitarray('111001'))
+
+        for a in self.randombitarrays():
+            b = bitarray(unicode(a.to01()))
+            self.assertEqual(a, b)
+
+    def test_unicode2(self):
+        a = bitarray()
+        a.extend(unicode())
+        self.assertEqual(a, bitarray())
+
+        a = bitarray()
+        a.extend(unicode('001011'))
+        self.assertEqual(a, bitarray('001011'))
+
+        for a in self.randombitarrays():
+            b = bitarray()
+            b.extend(unicode(a.to01()))
+            self.assertEqual(a, b)
 
 tests.append(MiscTests)
 
@@ -760,9 +799,6 @@ def test_all(self):
         a = bitarray()
         self.assertTrue(a.all())
 
-        if sys.version_info[:2] < (2, 5):
-            return
-
         for a in self.randombitarrays():
             self.assertEqual(all(a),          a.all())
             self.assertEqual(all(a.tolist()), a.all())
@@ -772,20 +808,19 @@ def test_any(self):
         a = bitarray()
         self.assertFalse(a.any())
 
-        if sys.version_info[:2] < (2, 5):
-            return
-
         for a in self.randombitarrays():
             self.assertEqual(any(a),          a.any())
             self.assertEqual(any(a.tolist()), a.any())
 
 
     def test_repr(self):
-        a = bitarray()
-        self.assertEqual(repr(a), "bitarray()")
+        r = repr(bitarray())
+        self.assertEqual(r, "bitarray()")
+        self.assert_(isinstance(r, str))
 
-        a = bitarray('10111')
-        self.assertEqual(repr(a), "bitarray('10111')")
+        r = repr(bitarray('10111'))
+        self.assertEqual(r, "bitarray('10111')")
+        self.assert_(isinstance(r, str))
 
         for a in self.randombitarrays():
             b = eval(repr(a))
@@ -795,7 +830,6 @@ def test_repr(self):
 
 
     def test_copy(self):
-        import copy
         for a in self.randombitarrays():
             b = a.copy()
             self.assert_(b is not a)
@@ -1189,10 +1223,6 @@ def test_iterator1(self):
                 self.check_obj(c)
 
     def test_iterator2(self):
-        try:
-            import itertools
-        except ImportError:
-            return
         a = bitarray()
         a.extend(itertools.repeat(True, 23))
         self.assertEqual(a, bitarray(23 * '1'))
@@ -1211,6 +1241,24 @@ def test_string01(self):
                 self.assertEqual(c.tolist(), a + b)
                 self.check_obj(c)
 
+    def test_extend_self(self):
+        a = bitarray()
+        a.extend(a)
+        self.assertEqual(a, bitarray())
+
+        a = bitarray('1')
+        a.extend(a)
+        self.assertEqual(a, bitarray('11'))
+
+        a = bitarray('110')
+        a.extend(a)
+        self.assertEqual(a, bitarray('110110'))
+
+        for a in self.randombitarrays():
+            b = bitarray(a)
+            a.extend(a)
+            self.assertEqual(a, b + b)
+
 
 tests.append(ExtendTests)
 
@@ -1265,6 +1313,13 @@ def test_index(self):
         self.assertRaises(ValueError, a.index, True)
         a[20] = a[27] = 1
         self.assertEqual(a.index(42), 20)
+        self.assertEqual(a.index(1, 21), 27)
+        self.assertEqual(a.index(1, 27), 27)
+        self.assertEqual(a.index(1, -73), 27)
+        self.assertRaises(ValueError, a.index, 1, 5, 17)
+        self.assertRaises(ValueError, a.index, 1, 5, -83)
+        self.assertRaises(ValueError, a.index, 1, 21, 27)
+        self.assertRaises(ValueError, a.index, 1, 28)
         self.assertEqual(a.index(0), 0)
 
         a = bitarray(200 * [True])
@@ -1286,7 +1341,7 @@ def test_index(self):
                 a[m] = 0
                 self.assertEqual(a.index(0), m)
 
-    def test_index2(self):
+    def test_index2a(self):
         a = bitarray('00001000' '00000000' '0010000')
         self.assertEqual(a.index(1), 4)
         self.assertEqual(a.index(1, 1), 4)
@@ -1295,6 +1350,15 @@ def test_index2(self):
         self.assertRaises(ValueError, a.index, 1, 5, 18)
         self.assertRaises(ValueError, a.index, 1, 19)
 
+    def test_index2b(self):
+        a = bitarray('11110111' '11111111' '1101111')
+        self.assertEqual(a.index(0), 4)
+        self.assertEqual(a.index(0, 1), 4)
+        self.assertEqual(a.index(1, 4), 5)
+        self.assertEqual(a.index(0, 5), 18)
+        self.assertRaises(ValueError, a.index, 0, 5, 18)
+        self.assertRaises(ValueError, a.index, 0, 19)
+
     def test_index3(self):
         a = bitarray(2000)
         a.setall(0)
@@ -1314,6 +1378,27 @@ def test_index3(self):
                 res2 = None
             self.assertEqual(res1, res2)
 
+    def test_index4(self):
+        for n in range(1, 50):
+            a = bitarray(n)
+            i = randint(0, 1)
+            a.setall(i)
+            for unused in range(randint(1, 4)):
+                a[randint(0, n-1)] = 1-i
+            aa = a.tolist()
+            for unused in range(100):
+                start = randint(-50, n+50)
+                stop = randint(-50, n+50)
+                try:
+                    res1 = a.index(1-i, start, stop)
+                except ValueError:
+                    res1 = None
+                try:
+                    res2 = aa.index(1-i, start, stop)
+                except ValueError:
+                    res2 = None
+                self.assertEqual(res1, res2)
+
 
     def test_count(self):
         a = bitarray('10011')
@@ -1330,9 +1415,35 @@ def test_count(self):
             self.assertEqual(a.count(), a.to01().count('1'))
 
         for a in self.randombitarrays():
-            self.assertEqual(a.count(), a.count(1))
-            self.assertEqual(a.count(1), a.to01().count('1'))
-            self.assertEqual(a.count(0), a.to01().count('0'))
+            s = a.to01()
+            self.assertEqual(a.count(1), s.count('1'))
+            self.assertEqual(a.count(0), s.count('0'))
+
+    def test_count2(self):
+        N = 37
+        a = bitarray(N)
+        a.setall(1)
+        for i in range(N):
+            for j in range(i, N):
+                self.assertEqual(a.count(1, i, j), j - i)
+
+    def test_count3(self):
+        a = bitarray('01001100' '01110011' '01')
+        self.assertEqual(a.count(), 9)
+        self.assertEqual(a.count(0, 12), 3)
+        self.assertEqual(a.count(1, -5), 3)
+        self.assertEqual(a.count(1, 2, 17), 7)
+        self.assertEqual(a.count(1, 6, 11), 2)
+        self.assertEqual(a.count(0, 7, -3), 4)
+        self.assertEqual(a.count(1, 1, -1), 8)
+        self.assertEqual(a.count(1, 17, 14), 0)
+
+        for a in self.randombitarrays():
+            s = a.to01()
+            i = randint(-3, len(a)+1)
+            j = randint(-3, len(a)+1)
+            self.assertEqual(a.count(1, i, j), s[i:j].count('1'))
+            self.assertEqual(a.count(0, i, j), s[i:j].count('0'))
 
 
     def test_search(self):
@@ -1388,10 +1499,21 @@ def test_search3(self):
             self.assertEqual(list(a.itersearch(b)), res)
             self.assertEqual([p for p in a.itersearch(b)], res)
 
+    def test_search4(self):
+        for a in self.randombitarrays():
+            aa = a.to01()
+            for sub in '0', '1', '01', '01', '11', '101', '1111111':
+                sr = a.search(bitarray(sub), 1)
+                try:
+                    p = sr[0]
+                except IndexError:
+                    p = -1
+                self.assertEqual(p, aa.find(sub))
+
     def test_search_type(self):
         a = bitarray('10011')
         it = a.itersearch(bitarray('1'))
-        self.assertIsInstance(type(it), type)
+        self.assertTrue(isinstance(type(it), type))
 
     def test_fill(self):
         a = bitarray('')
@@ -1533,9 +1655,7 @@ def test_pop(self):
             self.check_obj(a)
             self.assertEqual(a.endian(), enda)
 
-        for a in self.randombitarrays():
-            if len(a) == 0:
-                continue
+        for a in self.randombitarrays(start=1):
             n = randint(-len(a), len(a)-1)
             aa = a.tolist()
             self.assertEqual(a.pop(n), aa[n])
@@ -1645,6 +1765,10 @@ def test_tobytes(self):
 
     def test_unpack(self):
         a = bitarray('01')
+        if is_py3k:
+            self.assert_(isinstance(a.unpack(), bytes))
+        else:
+            self.assert_(isinstance(a.unpack(), str))
         self.assertEqual(a.unpack(), to_bytes('\x00\xff'))
         self.assertEqual(a.unpack(to_bytes('A')), to_bytes('A\xff'))
         self.assertEqual(a.unpack(to_bytes('0'), to_bytes('1')),
@@ -1710,35 +1834,30 @@ def tearDown(self):
 
 
     def test_pickle(self):
-        from pickle import load, dump
-
         for v in range(3):
             for a in self.randombitarrays():
                 fo = open(self.tmpfname, 'wb')
-                dump(a, fo, v)
+                pickle.dump(a, fo, v)
                 fo.close()
-                b = load(open(self.tmpfname, 'rb'))
+                b = pickle.load(open(self.tmpfname, 'rb'))
                 self.assert_(b is not a)
                 self.assertEQUAL(a, b)
 
     def test_cPickle(self):
         if is_py3k:
             return
-        from cPickle import load, dump
-
         for v in range(3):
             for a in self.randombitarrays():
                 fo = open(self.tmpfname, 'wb')
-                dump(a, fo, v)
+                cPickle.dump(a, fo, v)
                 fo.close()
-                b = load(open(self.tmpfname, 'rb'))
+                b = cPickle.load(open(self.tmpfname, 'rb'))
                 self.assert_(b is not a)
                 self.assertEQUAL(a, b)
 
     def test_shelve(self):
-        if sys.version_info[:2] < (2, 5):
+        if not shelve or hasattr(sys, 'gettotalrefcount'):
             return
-        import shelve, hashlib
 
         d = shelve.open(self.tmpfname)
         stored = []
@@ -1823,6 +1942,7 @@ def test_fromfile_n(self):
 
         b = bitarray()
         f = open(self.tmpfname, 'rb')
+        b.fromfile(f, 0);     self.assertEqual(b.tostring(), '')
         b.fromfile(f, 1);     self.assertEqual(b.tostring(), 'A')
         f.read(1)
         b = bitarray()
@@ -1909,16 +2029,6 @@ def test_tofile(self):
 
 class PrefixCodeTests(unittest.TestCase, Util):
 
-    def test_encode_errors(self):
-        a = bitarray()
-        self.assertRaises(TypeError, a.encode, 0, '')
-        self.assertRaises(ValueError, a.encode, {}, '')
-        self.assertRaises(TypeError, a.encode, {'a':42}, '')
-        self.assertRaises(ValueError, a.encode, {'a': bitarray()}, '')
-        # 42 not iterable
-        self.assertRaises(TypeError, a.encode, {'a': bitarray('0')}, 42)
-        self.assertEqual(len(a), 0)
-
     def test_encode_string(self):
         a = bitarray()
         d = {'a': bitarray('0')}
@@ -1930,7 +2040,7 @@ def test_encode_string(self):
 
     def test_encode_list(self):
         a = bitarray()
-        d = {'a':bitarray('0')}
+        d = {'a': bitarray('0')}
         a.encode(d, [])
         self.assertEqual(a, bitarray())
         a.encode(d, ['a'])
@@ -1965,14 +2075,48 @@ def test_encode(self):
                              'a': bitarray('001'), 'n': bitarray('000')})
         self.assertRaises(ValueError, a.encode, d, 'arvin')
 
+    def test_encode_symbol_not_in_code(self):
+        d = {None : bitarray('0'),
+             0    : bitarray('10'),
+             'A'  : bitarray('11')}
+        a = bitarray()
+        a.encode(d, ['A', None, 0])
+        self.assertEqual(a, bitarray('11010'))
+        self.assertRaises(ValueError, a.encode, d, [1, 2])
+        self.assertRaises(ValueError, a.encode, d, 'ABCD')
+
+    def test_encode_not_iterable(self):
+        d = {'a': bitarray('0'), 'b': bitarray('1')}
+        a = bitarray()
+        a.encode(d, 'abba')
+        self.assertRaises(TypeError, a.encode, d, 42)
+        self.assertRaises(TypeError, a.encode, d, 1.3)
+        self.assertRaises(TypeError, a.encode, d, None)
+        self.assertEqual(a, bitarray('0110'))
 
-    def test_decode_check_codedict(self):
+    def test_check_codedict_encode(self):
         a = bitarray()
+        self.assertRaises(TypeError, a.encode, None, '')
+        self.assertRaises(ValueError, a.encode, {}, '')
+        self.assertRaises(TypeError, a.encode, {'a': 'b'}, '')
+        self.assertRaises(ValueError, a.encode, {'a': bitarray()}, '')
+        self.assertEqual(len(a), 0)
+
+    def test_check_codedict_decode(self):
+        a = bitarray('101')
         self.assertRaises(TypeError, a.decode, 0)
         self.assertRaises(ValueError, a.decode, {})
-        # 42 not iterable
-        self.assertRaises(TypeError, a.decode, {'a':42})
-        self.assertRaises(ValueError, a.decode, {'a':bitarray()})
+        self.assertRaises(TypeError, a.decode, {'a': 42})
+        self.assertRaises(ValueError, a.decode, {'a': bitarray()})
+        self.assertEqual(a, bitarray('101'))
+
+    def test_check_codedict_iterdecode(self):
+        a = bitarray('1100101')
+        self.assertRaises(TypeError, a.iterdecode, 0)
+        self.assertRaises(ValueError, a.iterdecode, {})
+        self.assertRaises(TypeError, a.iterdecode, {'a': []})
+        self.assertRaises(ValueError, a.iterdecode, {'a': bitarray()})
+        self.assertEqual(a, bitarray('1100101'))
 
     def test_decode_simple(self):
         d = {'I': bitarray('1'),
@@ -2006,6 +2150,13 @@ def test_decode_empty(self):
         self.assertEqual(d, {'a': bitarray('1')})
         self.assertEqual(len(a), 0)
 
+    def test_decode_no_term(self):
+        d = {'a': bitarray('0'), 'b': bitarray('111')}
+        a = bitarray('011')
+        self.assertRaises(ValueError, a.decode, d)
+        self.assertEqual(a, bitarray('011'))
+        self.assertEqual(d, {'a': bitarray('0'), 'b': bitarray('111')})
+
     def test_decode_buggybitarray(self):
         d = {'a': bitarray('0')}
         a = bitarray('1')
@@ -2013,6 +2164,16 @@ def test_decode_buggybitarray(self):
         self.assertEqual(a, bitarray('1'))
         self.assertEqual(d, {'a': bitarray('0')})
 
+    def test_iterdecode_no_term(self):
+        d = {'a': bitarray('0'), 'b': bitarray('111')}
+        a = bitarray('011')
+        it = a.iterdecode(d)
+        if not is_py3k:
+            self.assertEqual(it.next(), 'a')
+            self.assertRaises(ValueError, it.next)
+        self.assertEqual(a, bitarray('011'))
+        self.assertEqual(d, {'a': bitarray('0'), 'b': bitarray('111')})
+
     def test_iterdecode_buggybitarray(self):
         d = {'a': bitarray('0')}
         a = bitarray('1')
@@ -2037,16 +2198,15 @@ def test_iterdecode_buggybitarray2(self):
         self.assertEqual(a, bitarray('1'))
 
     def test_decode_ambiguous_code(self):
-        d = {'a': bitarray('0'), 'b': bitarray('0'), 'c': bitarray('1')}
-        a = bitarray()
-        self.assertRaises(ValueError, a.decode, d)
-        self.assertRaises(ValueError, a.iterdecode, d)
-
-    def test_decode_ambiguous2(self):
-        d = {'a': bitarray('01'), 'b': bitarray('01'), 'c': bitarray('1')}
-        a = bitarray()
-        self.assertRaises(ValueError, a.decode, d)
-        self.assertRaises(ValueError, a.iterdecode, d)
+        for d in [
+            {'a': bitarray('0'), 'b': bitarray('0'), 'c': bitarray('1')},
+            {'a': bitarray('01'), 'b': bitarray('01'), 'c': bitarray('1')},
+            {'a': bitarray('0'), 'b': bitarray('01')},
+            {'a': bitarray('0'), 'b': bitarray('11'), 'c': bitarray('111')},
+        ]:
+            a = bitarray()
+            self.assertRaises(ValueError, a.decode, d)
+            self.assertRaises(ValueError, a.iterdecode, d)
 
     def test_miscitems(self):
         d = {None : bitarray('00'),
@@ -2068,34 +2228,34 @@ def test_miscitems(self):
         self.assertStopIteration(it)
 
     def test_real_example(self):
-        code = {' '  : bitarray('001'),
-                '.'  : bitarray('0101010'),
-                'a'  : bitarray('0110'),
-                'b'  : bitarray('0001100'),
-                'c'  : bitarray('000011'),
-                'd'  : bitarray('01011'),
-                'e'  : bitarray('111'),
-                'f'  : bitarray('010100'),
-                'g'  : bitarray('101000'),
-                'h'  : bitarray('00000'),
-                'i'  : bitarray('1011'),
-                'j'  : bitarray('0111101111'),
-                'k'  : bitarray('00011010'),
-                'l'  : bitarray('01110'),
-                'm'  : bitarray('000111'),
-                'n'  : bitarray('1001'),
-                'o'  : bitarray('1000'),
-                'p'  : bitarray('101001'),
-                'q'  : bitarray('00001001101'),
-                'r'  : bitarray('1101'),
-                's'  : bitarray('1100'),
-                't'  : bitarray('0100'),
-                'u'  : bitarray('000100'),
-                'v'  : bitarray('0111100'),
-                'w'  : bitarray('011111'),
-                'x'  : bitarray('0000100011'),
-                'y'  : bitarray('101010'),
-                'z'  : bitarray('00011011110')}
+        code = {' ': bitarray('001'),
+                '.': bitarray('0101010'),
+                'a': bitarray('0110'),
+                'b': bitarray('0001100'),
+                'c': bitarray('000011'),
+                'd': bitarray('01011'),
+                'e': bitarray('111'),
+                'f': bitarray('010100'),
+                'g': bitarray('101000'),
+                'h': bitarray('00000'),
+                'i': bitarray('1011'),
+                'j': bitarray('0111101111'),
+                'k': bitarray('00011010'),
+                'l': bitarray('01110'),
+                'm': bitarray('000111'),
+                'n': bitarray('1001'),
+                'o': bitarray('1000'),
+                'p': bitarray('101001'),
+                'q': bitarray('00001001101'),
+                'r': bitarray('1101'),
+                's': bitarray('1100'),
+                't': bitarray('0100'),
+                'u': bitarray('000100'),
+                'v': bitarray('0111100'),
+                'w': bitarray('011111'),
+                'x': bitarray('0000100011'),
+                'y': bitarray('101010'),
+                'z': bitarray('00011011110')}
         a = bitarray()
         message = 'the quick brown fox jumps over the lazy dog.'
         a.encode(code, message)
@@ -2117,10 +2277,10 @@ def test_read1(self):
         a = bitarray('01000001' '01000010' '01000011', endian='big')
         v = memoryview(a)
         self.assertEqual(len(v), 3)
-        self.assertEqual(v[0], 'A')
-        self.assertEqual(v[:].tobytes(), 'ABC')
+        self.assertEqual(v[0], 65 if is_py3k else 'A')
+        self.assertEqual(v[:].tobytes(), to_bytes('ABC'))
         a[13] = 1
-        self.assertEqual(v[:].tobytes(), 'AFC')
+        self.assertEqual(v[:].tobytes(), to_bytes('AFC'))
 
     def test_read2(self):
         a = bitarray([randint(0, 1) for d in range(8000)])
@@ -2135,22 +2295,22 @@ def test_write(self):
         a.setall(0)
         v = memoryview(a)
         self.assertFalse(v.readonly)
-        v[50000] = '\xff'
+        v[50000] = 255 if is_py3k else '\xff'
         self.assertEqual(a[399999:400009], bitarray('0111111110'))
         a[400003] = 0
         self.assertEqual(a[399999:400009], bitarray('0111011110'))
-        v[30001:30004] = 'ABC'
-        self.assertEqual(a[240000:240040].tobytes(), '\x00ABC\x00')
+        v[30001:30004] = to_bytes('ABC')
+        self.assertEqual(a[240000:240040].tobytes(), to_bytes('\x00ABC\x00'))
 
-if sys.version_info[:2] == (2, 7):
+if sys.version_info[:2] >= (2, 7):
     tests.append(BufferInterfaceTests)
 
 # ---------------------------------------------------------------------------
 
 def run(verbosity=1, repeat=1):
-    print('bitarray is installed in: ' + os.path.dirname(__file__))
-    print('bitarray version: ' + __version__)
-    print(sys.version)
+    print('bitarray is installed in: %s' % os.path.dirname(__file__))
+    print('bitarray version: %s' % __version__)
+    print('Python version: %s' % sys.version)
 
     suite = unittest.TestSuite()
     for cls in tests:
diff --git a/examples/README b/examples/README
index 668b5cdb5..94527c74f 100644
--- a/examples/README
+++ b/examples/README
@@ -1,32 +1,31 @@
 bloom.py:
-    Demonstrates the implementation of a Bloom filter, see:
+    Demonstrates the implementation of a "Bloom filter", see:
     http://en.wikipedia.org/wiki/Bloom_filter
 
 
 compress.py:
     Demonstrates how the bz2 module may be used to create a compressed
-    object which represents a bitarray
+    object which represents a bitarray.
 
 
-decoding.py
-    Bitarray's decode method is implemented in C.  Since the C code
-    might be hard to read, we have implemented exactly the same
-    algorithm in Python.  It is about 20 times slower than it's
-    C counterpart, since (recursive) function calls are more expensive
-    in Python than in C.
+gene.py:
+    shows how gene sequences (ATGC) can be very easily and efficiently
+    represented by bitarrays.
 
 
-huffman.py
-    Demonstrates building a Huffman tree.  Given an input file,
-    calculates the number of occurrences for each character;
-    from those frequencies, a Huffman tree is build; and by traversing
-    the tree, the Huffman code is evaluated.
-    Also allows encoding and decoding of a file, see -h option.
+helpers.py:
+    some useful helper function which people have requested, but which are
+    not common enough to add to the library itself.
+
+
+huffman
+    Directory containing a library and examples for working with Huffman
+    trees and codes.
 
 
 mandel.py
-    Generates a .ppm image file of size 8000x6000 of the Mandelbrot set.
-    Despite it's size, the output image file has only a size of slightly
+    Generates a .ppm image file of size 8000 x 6000 of the Mandelbrot set.
+    Despite its size, the output image file has only a size of slightly
     over 6 Million bytes (uncompressed) because each pixel is stored in
     one bit.
     Requires numpy and scipy (see http://scipy.org/).
@@ -58,4 +57,3 @@ smallints.py
     For example, an array with 1000 5 bit integers can be created,
     allowing each element in the array to take values form 0 to 31,
     while the size of the object is 625 (5000/8) bytes.
-    Thanks to David Kammeyer for the idea to apply a bitarray in this way.
diff --git a/examples/bloom.py b/examples/bloom.py
index 44290ce7c..425d34167 100644
--- a/examples/bloom.py
+++ b/examples/bloom.py
@@ -2,8 +2,15 @@
 Demonstrates the implementation of a Bloom filter, see:
 http://en.wikipedia.org/wiki/Bloom_filter
 """
+from __future__ import print_function
+import sys
+
+if sys.version_info > (3,):
+    long = int
+    xrange = range
+
 import hashlib
-from math import exp, log
+from math import exp
 
 from bitarray import bitarray
 
@@ -29,9 +36,9 @@ def _hashes(self, key):
         the m array positions with a uniform random distribution
         """
         h = hashlib.new('md5')
-        h.update(str(key))
+        h.update(str(key).encode())
         x = long(h.hexdigest(), 16)
-        for _ in xrange(self.k):
+        for _unused in xrange(self.k):
             if x < self.m:
                 h.update('.')
                 x = long(h.hexdigest(), 16)
@@ -46,11 +53,11 @@ def test_bloom(m, k, n):
         assert b.contains(i)
 
     p = (1.0 - exp(-k * (n + 0.5) / (m - 1))) ** k
-    print 100.0 * p, '%'
+    print(100.0 * p, '%')
 
     N = 100000
     false_pos = sum(b.contains(i) for i in xrange(n, n + N))
-    print 100.0 * false_pos / N, '%'
+    print(100.0 * false_pos / N, '%')
 
 
 if __name__ == '__main__':
diff --git a/examples/compress.py b/examples/compress.py
index 73b5bb045..83e018143 100644
--- a/examples/compress.py
+++ b/examples/compress.py
@@ -2,6 +2,8 @@
 Demonstrates how the bz2 module may be used to create a compressed object
 which represents a bitarray.
 """
+from __future__ import print_function
+
 import bz2
 
 from bitarray import bitarray
@@ -35,6 +37,6 @@ def decompress(obj):
     a.setall(0)
     a[::10] = True
     c = compress(a)
-    print c
+    print(c)
     b = decompress(c)
     assert a == b, a.endian() == b.endian()
diff --git a/examples/decoding.py b/examples/decoding.py
deleted file mode 100644
index b47c8ce58..000000000
--- a/examples/decoding.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import time
-from bitarray import bitarray
-from huffman import freq_string, huffCode
-
-
-def traverse(it, tree):
-    """
-    return False, when it has no more elements, or the leave node
-    resulting from traversing the tree
-    """
-    try:
-        subtree = tree[next(it)]
-    except StopIteration:
-        return False
-
-    if isinstance(subtree, list) and len(subtree)==2:
-        return traverse(it, subtree)
-    else: # leave node
-        return subtree
-
-
-def insert(tree, sym, ba):
-    """
-    insert symbol which is mapped to bitarray into tree
-    """
-    v = ba[0]
-    if len(ba) > 1:
-        if tree[v] == []:
-            tree[v] = [[], []]
-        insert(tree[v], sym, ba[1:])
-    else:
-        if tree[v] != []:
-            raise ValueError("prefix code ambiguous")
-        tree[v] = sym
-
-
-def decode(codedict, bitsequence):
-    """
-    this function does the same thing as the bitarray decode method
-    """
-    # generate tree from codedict
-    tree = [[], []]
-    for sym, ba in codedict.items():
-        insert(tree, sym, ba)
-
-    # actual decoding by traversing until StopIteration
-    res = []
-    it = iter(bitsequence)
-    while True:
-        r = traverse(it, tree)
-        if r is False:
-            break
-        else:
-            if r == []:
-                raise ValueError("prefix code does not match data")
-            res.append(r)
-    return res
-
-
-def main():
-    txt = open('README').read()
-    code = huffCode(freq_string(txt))
-
-    sample = 2000 * txt
-
-    a = bitarray()
-    a.encode(code, sample)
-
-    # Time the decode function above
-    start_time = time.time()
-    res = decode(code, a)
-    Py_time = time.time() - start_time
-    assert ''.join(res) == sample
-    print('Py_time: %.6f sec' % Py_time)
-
-    # Time the decode method which is implemented in C
-    start_time = time.time()
-    res = a.decode(code)
-    C_time = time.time() - start_time
-    assert ''.join(res) == sample
-    print('C_time: %.6f sec' % C_time)
-
-    print('Ratio: %f' % (Py_time / C_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/examples/gene.py b/examples/gene.py
new file mode 100644
index 000000000..30efbbe38
--- /dev/null
+++ b/examples/gene.py
@@ -0,0 +1,27 @@
+# gene sequence example from @yoch, see
+# https://github.com/ilanschnell/bitarray/pull/54
+
+from random import choice
+from timeit import timeit
+
+from bitarray import bitarray
+
+
+trans = {
+    "A": bitarray("00"),
+    "T": bitarray("01"),
+    "G": bitarray("10"),
+    "C": bitarray("11")
+}
+
+N = 10000
+seq = [choice("ATGC") for _ in range(N)]
+
+arr = bitarray()
+arr.encode(trans, seq)
+
+assert arr.decode(trans) == seq
+
+# decodage
+t = timeit(lambda: arr.decode(trans), number=1000)
+print(t)
diff --git a/examples/helpers.py b/examples/helpers.py
new file mode 100644
index 000000000..09cfd1abb
--- /dev/null
+++ b/examples/helpers.py
@@ -0,0 +1,47 @@
+from bitarray import bitarray
+
+
+def trim(a):
+    "return a bitarray, with zero bits removed from beginning"
+    try:
+        first = a.index(1)
+    except ValueError:
+        return bitarray()
+    last = len(a) - 1
+    while not a[last]:
+        last -= 1
+    return a[first:last+1]
+
+def find_last(a, value=True):
+    "find the last occurrence of value, in bitarray."
+    i = len(a) - 1
+    while not a[i] == bool(value):
+        i -= 1
+    return i
+
+def count_n(a, n):
+    "return the index i for which a[:i].count() == n"
+    i, j = n, a.count(1, 0, n)
+    while j < n:
+        if a[i]:
+            j += 1
+        i += 1
+    return i
+
+if __name__ == '__main__':
+    # trim
+    assert trim(bitarray()) == bitarray()
+    assert trim(bitarray('000')) == bitarray()
+    assert trim(bitarray('111')) == bitarray('111')
+    assert trim(bitarray('00010100')) == bitarray('101')
+
+    # find_last
+    assert find_last(bitarray('00010100')) == 5
+    assert find_last(bitarray('00010111'), 0) == 4
+    assert find_last(bitarray('0000'), 0) == 3
+
+    # count_n
+    a = bitarray('11111011111011111011111001111011111011111011111010111010111')
+    for n in range(0, 48):
+        i = count_n(a, n)
+        assert a[:i].count() == n
diff --git a/examples/huffman.py b/examples/huffman.py
deleted file mode 100644
index e974bb976..000000000
--- a/examples/huffman.py
+++ /dev/null
@@ -1,141 +0,0 @@
-"""
-The non-trivial part of the code is derived from:
-http://en.literateprograms.org/Huffman_coding_(Python)
-
-The link also contains a good description of the algorithm.
-"""
-import os, sys
-from collections import defaultdict
-from bitarray import bitarray
-from heapq import heappush, heappop
-
-
-def huffCode(freq):
-    """
-    Given a dictionary mapping symbols to thier frequency,
-    return the Huffman code in the form of
-    a dictionary mapping the symbols to bitarrays.
-    """
-    minheap = []
-    for s in freq:
-        heappush(minheap, (freq[s], s))
-
-    while len(minheap) > 1:
-        childR, childL = heappop(minheap), heappop(minheap)
-        parent = (childL[0] + childR[0], childL, childR)
-        heappush(minheap, parent)
-
-    # Now minheap[0] is the root node of the Huffman tree
-
-    def traverse(tree, prefix=bitarray()):
-        if len(tree) == 2:
-            result[tree[1]] = prefix
-        else:
-            for i in range(2):
-                traverse(tree[i+1], prefix + bitarray([i]))
-
-    result = {}
-    traverse(minheap[0])
-    return result
-
-
-def freq_string(s):
-    """
-    Given a string, return a dictionary
-    mapping characters to thier frequency.
-    """
-    res = defaultdict(int)
-    for c in s:
-        res[c] += 1
-    return res
-
-
-def print_code(filename):
-    freq = freq_string(open(filename).read())
-    code = huffCode(freq)
-    print('   char    frequency    Huffman code')
-    print(70*'-')
-    for c in sorted(code, key=lambda c: freq[c], reverse=True):
-        print('%7r %8i        %s' % (c, freq[c], code[c].to01()))
-
-
-def encode(filename):
-    s = open(filename, 'rb').read()
-    code = huffCode(freq_string(s))
-    fo = open(filename + '.huff', 'wb')
-    fo.write(repr(code) + '\n')
-    a = bitarray(endian='little')
-    a.encode(code, s)
-    fo.write(str(a.buffer_info()[3])) # write unused bits as one char string
-    a.tofile(fo)
-    fo.close()
-    print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(s)))
-
-
-def decode(filename):
-    fi = open(filename, 'rb')
-    code = eval(fi.readline())
-    u = int(fi.read(1)) # number of unused bits in last byte stored in file
-    a = bitarray(endian='little')
-    a.fromfile(fi)
-    fi.close()
-    if u: del a[-u:]
-
-    assert filename.endswith('.huff')
-    fo = open(filename[:-5] + '.out', 'wb')
-    fo.write(''.join(a.decode(code)))
-    fo.close()
-
-
-def usage():
-    print("""Usage: %s command FILE
-
-  print  --  calculate and display the Huffman code for the frequency
-             of characters in FILE.
-
-  encode --  encode FILE using the Huffman code calculated for the
-             frequency of characters in FILE itself.
-             The output is FILE.huff which contains both the Huffman
-             code and the bitarray resulting from the encoding.
-
-  decode --  decode FILE, which has .huff extension generated with the
-             encode command.  The output is written in a filename
-             where .huff is replaced by .out
-
-  test   --  encode FILE, decode FILE.huff, compare FILE with FILE.out,
-             and unlink created files.
-""" % sys.argv[0])
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    if len(sys.argv) != 3:
-        usage()
-
-    cmd, filename = sys.argv[1:3]
-
-    if cmd == 'print':
-        print_code(filename)
-
-    elif cmd == 'encode':
-        encode(filename)
-
-    elif cmd == 'decode':
-        if filename.endswith('.huff'):
-            decode(filename)
-        else:
-            print('Filename has no .huff extension')
-
-    elif cmd == 'test':
-        huff = filename + '.huff'
-        out = filename + '.out'
-
-        encode(filename)
-        decode(huff)
-        assert open(filename, 'rb').read() == open(out, 'rb').read()
-        os.unlink(huff)
-        os.unlink(out)
-
-    else:
-        print('Unknown command %r' % cmd)
-        usage()
diff --git a/examples/huffman/README b/examples/huffman/README
new file mode 100644
index 000000000..386946cb7
--- /dev/null
+++ b/examples/huffman/README
@@ -0,0 +1,16 @@
+compress.py:
+    Demonstrates how Huffman codes can be used to efficiently
+    compress and uncompress files (text or binary).
+    Given an input file, calculates the number of occurrences for each
+    character; from those frequencies, a Huffman tree is build.
+    Also allows encoding and decoding of a file, see -h option.
+
+
+decoding.py:
+    This example demonstrates how much faster bitarray's decoing is as
+    opposed to traversing the Huffman tree using Python.
+
+
+huffman.py:
+    Library containing useful functionality for working with Huffman trees
+    and codes.
diff --git a/examples/huffman/compress.py b/examples/huffman/compress.py
new file mode 100644
index 000000000..0051c2bef
--- /dev/null
+++ b/examples/huffman/compress.py
@@ -0,0 +1,126 @@
+"""
+This program demonstrates how Huffman codes can be used to efficiently
+compress and uncompress files (text or binary).
+"""
+import os
+from optparse import OptionParser
+from collections import Counter
+from bitarray import bitarray
+
+from huffman import is_py3k, huffCode, huffTree, print_code, write_dot
+
+
+def is_binary(s):
+    null = 0 if is_py3k else '\0'
+    return bool(null in s)
+
+
+def analyze(filename, printCode=False, writeDot=False):
+    with open(filename, 'rb') as fi:
+        s = fi.read()
+
+    freq = Counter(s)
+    tree = huffTree(freq)
+    if writeDot:
+        write_dot(tree, 'tree.dot', is_binary(s))
+    code = huffCode(tree)
+    if printCode:
+        print_code(freq, code)
+
+
+def encode(filename):
+    with open(filename, 'rb') as fi:
+        s = fi.read()
+
+    code = huffCode(huffTree(Counter(s)))
+    with open(filename + '.huff', 'wb') as fo:
+        for c in sorted(code):
+            fo.write(('%02x %s\n' % (c if is_py3k else ord(c),
+                                     code[c].to01())).encode())
+        a = bitarray(endian='little')
+        a.encode(code, s)
+        # write unused bits
+        fo.write(b'unused %s\n' % str(a.buffer_info()[3]).encode())
+        a.tofile(fo)
+    print('%d / %d' % (len(a), 8 * len(s)))
+    print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(s)))
+
+
+def decode(filename):
+    assert filename.endswith('.huff')
+    code = {}
+
+    with open(filename, 'rb') as fi:
+        while 1:
+            line = fi.readline()
+            c, b = line.split()
+            if c == b'unused':
+                u = int(b)
+                break
+            i = int(c, 16)
+            code[i if is_py3k else chr(i)] = bitarray(b)
+        a = bitarray(endian='little')
+        a.fromfile(fi)
+
+    if u:
+        del a[-u:]
+
+    with open(filename[:-5] + '.out', 'wb') as fo:
+        for c in a.iterdecode(code):
+            fo.write(chr(c).encode('ISO-8859-1') if is_py3k else c)
+
+
+def main():
+    p = OptionParser("usage: %prog [options] FILE")
+    p.add_option(
+        '-s', '--show',
+        action="store_true",
+        help="calculate and print the Huffman code for the "
+             "frequency of characters in FILE")
+    p.add_option(
+        '-t', '--tree',
+        action="store_true",
+        help="calculate and the Huffman tree (from the frequency of "
+             "characters in FILE) and write a .dot file")
+    p.add_option(
+        '-e', '--encode',
+        action="store_true",
+        help="encode (compress) FILE using the Huffman code calculated for "
+             "the frequency of characters in FILE itself. "
+             "The output is FILE.huff which contains both the Huffman "
+             "code and the bitarray resulting from the encoding.")
+    p.add_option(
+        '-d', '--decode',
+        action="store_true",
+        help="decode (decompress) FILE.huff and write the output to FILE.out")
+    p.add_option(
+        '--test',
+        action="store_true",
+        help="encode FILE, decode FILE.huff, compare FILE with FILE.out, "
+             "and unlink created files.")
+    opts, args = p.parse_args()
+    if len(args) != 1:
+        p.error('exactly one argument required')
+    filename = args[0]
+
+    if opts.show or opts.tree:
+        analyze(filename, printCode=opts.show, writeDot=opts.tree)
+
+    if opts.encode:
+        encode(filename)
+
+    if opts.decode:
+        decode(filename + '.huff')
+
+    if opts.test:
+        huff = filename + '.huff'
+        out = filename + '.out'
+        encode(filename)
+        decode(huff)
+        assert open(filename, 'rb').read() == open(out, 'rb').read()
+        os.unlink(huff)
+        os.unlink(out)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/huffman/decoding.py b/examples/huffman/decoding.py
new file mode 100644
index 000000000..4c02f49e0
--- /dev/null
+++ b/examples/huffman/decoding.py
@@ -0,0 +1,49 @@
+from __future__ import print_function
+from time import time
+from collections import Counter
+from bitarray import bitarray
+
+from huffman import huffTree, huffCode, write_dot, make_tree, decode
+
+
+def main():
+    txt = 1000 * open('README').read()
+
+    t0 = time()
+    freq = Counter(txt)
+    print('count:     %9.6f sec' % (time() - t0))
+
+    t0 = time()
+    tree = huffTree(freq)
+    print('tree:      %9.6f sec' % (time() - t0))
+
+    write_dot(tree, 'tree.dot')
+    code = huffCode(tree)
+    # create tree from code (no frequencies)
+    write_dot(make_tree(code), 'tree_raw.dot')
+
+    a = bitarray()
+
+    t0 = time()
+    a.encode(code, txt)
+    print('C encode:  %9.6f sec' % (time() - t0))
+
+    # Time the decode function above
+    t0 = time()
+    res = decode(tree, a)
+    Py_time = time() - t0
+    assert ''.join(res) == txt
+    print('Py decode: %9.6f sec' % Py_time)
+
+    # Time the decode method which is implemented in C
+    t0 = time()
+    res = a.decode(code)
+    assert ''.join(res) == txt
+    C_time = time() - t0
+    print('C decode:  %9.6f sec' % C_time)
+
+    print('Ratio: %f' % (Py_time / C_time))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/huffman/huffman.py b/examples/huffman/huffman.py
new file mode 100644
index 000000000..57e674174
--- /dev/null
+++ b/examples/huffman/huffman.py
@@ -0,0 +1,222 @@
+"""
+This library contains useful functionality for working with Huffman trees
+and codes.
+"""
+from __future__ import print_function
+import sys
+from heapq import heappush, heappop
+from bitarray import bitarray
+
+is_py3k = bool(sys.version_info[0] == 3)
+
+
+class Node(object):
+    def __init__(self):
+        self.child = [None, None]
+        self.symbol = None
+        self.freq = None
+
+    def __lt__(self, other):
+        # heapq needs to be able to compare the nodes
+        return self.freq < other.freq
+
+
+def huffTree(freq):
+    """
+    Given a dictionary mapping symbols to thier frequency, construct a Huffman
+    tree and return its root node.
+    """
+    minheap = []
+    # create all the leaf nodes and push them onto the queue
+    for c in sorted(freq):
+        nd = Node()
+        nd.symbol = c
+        nd.freq = freq[c]
+        heappush(minheap, nd)
+
+    # repeat the process until only one node remains
+    while len(minheap) > 1:
+        # take the nodes with smallest frequencies from the queue
+        childR = heappop(minheap)
+        childL = heappop(minheap)
+        # construct the new internal node and push it onto the queue
+        parent = Node()
+        parent.child = [childL, childR]
+        parent.freq = childL.freq + childR.freq
+        heappush(minheap, parent)
+
+    # return the one remaining node, which is the root of the Huffman tree
+    return minheap[0]
+
+
+def huffCode(tree):
+    """
+    Given a Huffman tree, traverse the tree and return the Huffman code, i.e.
+    a dictionary mapping symbol to bitarrays.
+    """
+    result = {}
+
+    def traverse(nd, prefix=bitarray()):
+        if nd.symbol is None: # parent, so traverse each of the children
+            for i in range(2):
+                traverse(nd.child[i], prefix + bitarray([i]))
+        else: # leaf
+            result[nd.symbol] = prefix
+
+    traverse(tree)
+    return result
+
+
+def insert_symbol(tree, ba, sym):
+    """
+    Insert symbol into a tree at the position described by the bitarray,
+    creating nodes as necessary.
+    """
+    nd = tree
+    for k in ba:
+        prev = nd
+        nd = nd.child[k]
+        if nd and nd.symbol:
+            raise ValueError("ambiguity")
+        if not nd:
+            nd = Node()
+            prev.child[k] = nd
+    if nd.symbol or nd.child[0] or nd.child[1]:
+        raise ValueError("ambiguity")
+    nd.symbol = sym
+
+
+def make_tree(codedict):
+    """
+    Create a tree from the given code dictionary, and return its root node.
+    Unlike trees created by huffTree, all nodes will have .freq set to None.
+    """
+    tree = Node()
+    for sym, ba in codedict.items():
+        insert_symbol(tree, ba, sym)
+    return tree
+
+
+def traverse(tree, it):
+    """
+    Traverse tree until a leaf node is reached, and return its symbol.
+    This function takes an iterator on which next() is called during each
+    step of traversing.
+    """
+    nd = tree
+    while 1:
+        nd = nd.child[next(it)]
+        if not nd:
+            raise ValueError("prefix code does not match data in bitarray")
+            return None
+        if nd.symbol is not None:
+            return nd.symbol
+    if nd != tree:
+        raise ValueError("decoding not terminated")
+        return None
+
+
+def decode(tree, bitsequence):
+    """
+    Given a tree and a bitsequence, decode the bitsequence and return a
+    list of symbols.
+    """
+    res = []
+    it = iter(bitsequence)
+    while True:
+        try:
+            r = traverse(tree, it)
+        except StopIteration:
+            break
+        res.append(r)
+    return res
+
+
+def write_dot(tree, fn, binary=False):
+    """
+    Given a tree (which may or may not contain frequencies), write
+    a graphviz '.dot' file with a visual representation of the tree.
+    """
+    special_ascii = {' ': 'SPACE', '\n': 'LF', '\r': 'CR', '\t': 'TAB',
+                     '\\': r'\\', '"': r'\"'}
+    def disp_char(c):
+        if is_py3k and isinstance(c, int):
+            c = chr(c)
+        if binary:
+            return 'x%02x' % ord(c)
+        else:
+            if special_ascii:
+                res = special_ascii.get(c, c)
+                assert res.strip(), repr(c)
+                return res
+
+    def disp_freq(f):
+        if f is None:
+            return ''
+        return '%d' % f
+
+    with open(fn, 'w') as fo:    # dot -Tpng tree.dot -O
+        def write_nd(fo, nd):
+            if nd.symbol: # leaf node
+                a, b = disp_freq(nd.freq), disp_char(nd.symbol)
+                fo.write('  %d  [label="%s%s%s"];\n' %
+                         (id(nd), a, ': ' if a and b else '', b))
+            else: # parent node
+                fo.write('  %d  [shape=circle, style=filled, '
+                         'fillcolor=grey, label="%s"];\n' %
+                         (id(nd), disp_freq(nd.freq)))
+
+            for k in range(2):
+                if nd.child[k]:
+                    fo.write('  %d->%d;\n' % (id(nd), id(nd.child[k])))
+
+            for k in range(2):
+                if nd.child[k]:
+                    write_nd(fo, nd.child[k])
+
+        fo.write('digraph BT {\n')
+        fo.write('  node [shape=box, fontsize=20, fontname="Arial"];\n')
+        write_nd(fo, tree)
+        fo.write('}\n')
+
+
+def print_code(freq, codedict):
+    """
+    Given a frequency map (dictionary mapping symbols to thier frequency)
+    and a codedict, print them in a readable form.
+    """
+    special_ascii = {0: 'NULL', 9: 'TAB', 10: 'LF', 13: 'CR', 127: 'DEL'}
+    def disp_char(i):
+        if 32 <= i < 127:
+            return repr(chr(i))
+        return special_ascii.get(i, '')
+
+    print(' symbol     char    hex   frequency     Huffman code')
+    print(70 * '-')
+    for c in sorted(codedict, key=lambda c: (freq[c], c), reverse=True):
+        i = c if is_py3k else ord(c)
+        print('%7r     %-4s    0x%02x %10i     %s' % (
+            c, disp_char(i),
+            i, freq[c], codedict[c].to01()))
+
+
+def test():
+    freq = {'a': 10, 'b': 2, 'c': 1}
+    tree = huffTree(freq)
+    code = huffCode(tree)
+    assert len(code['a']) == 1
+    assert len(code['b']) == len(code['c']) == 2
+
+    code = {'a': bitarray('0'),
+            'b': bitarray('10'),
+            'c': bitarray('11')}
+    tree = make_tree(code)
+    txt = 'abca'
+    a = bitarray()
+    a.encode(code, txt)
+    assert a == bitarray('010110')
+    assert decode(tree, a) == ['a', 'b', 'c', 'a']
+
+
+if __name__ == '__main__':
+    test()
diff --git a/examples/ndarray.py b/examples/ndarray.py
index a426458b4..9193e1033 100644
--- a/examples/ndarray.py
+++ b/examples/ndarray.py
@@ -2,19 +2,20 @@
 # This example illusatrates how binary data can be efficiently be passed
 # between a bitarray object and an ndarray with dtype bool
 #
+from __future__ import print_function
+
 import bitarray
 import numpy
 
 a = bitarray.bitarray('100011001001')
-print a
+print(a)
 
 # bitarray  ->  ndarray
 b = numpy.fromstring(a.unpack(), dtype=bool)
-print repr(b)
+print(repr(b))
 
 # ndarray  ->  bitarray
 c = bitarray.bitarray()
 c.pack(b.tostring())
 
 assert a == c
-
diff --git a/examples/runall b/examples/runall
deleted file mode 100755
index f85427e9c..000000000
--- a/examples/runall
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash -x
-
-python huffman.py test README || exit 1
-
-for x in *.py
-do
-    echo Running: $x
-    python $x || exit 1
-done
-
-rm *.ppm *.pyc
diff --git a/examples/sieve.py b/examples/sieve.py
index 4241903a1..65281b998 100644
--- a/examples/sieve.py
+++ b/examples/sieve.py
@@ -1,3 +1,12 @@
+"""
+Demonstrates the implementation of "Sieve of Eratosthenes" algorithm for
+finding all prime numbers up to any given limit.
+"""
+from __future__ import print_function
+import sys
+if sys.version_info > (3,):
+    xrange = range
+
 import time
 
 import numpy
@@ -7,31 +16,31 @@ def primesToN1(n):
     # use numpy: 8-bit array of boolean flags
     if n < 2:
         return []
-    print 'init numpy'
+    print('init numpy')
     A = numpy.ones(n+1, numpy.bool) # set to 1 == True
     A[:2] = A[2*2::2] = 0
-    print 'sieve'
+    print('sieve')
     for i in xrange(3, int(n**.5)+1, 2): # odd numbers
         if A[i]:  # i is prime
             A[i*i::i*2] = 0
-    print 'counting'
-    print numpy.sum(A)
+    print('counting')
+    print(numpy.sum(A))
 
 
 def primesToN2(n):
     # use bitarray: 1-bit boolean flags
     if n < 2:
         return []
-    print 'init bitarray'
+    print('init bitarray')
     A = bitarray.bitarray(n+1)
     A.setall(1)
     A[:2] = A[2*2::2] = 0
-    print 'sieve'
+    print('sieve')
     for i in xrange(3, int(n**.5)+1, 2): # odd numbers
         if A[i]:  # i is prime
             A[i*i::i*2] = 0
-    print 'counting'
-    print A.count()
+    print('counting')
+    print(A.count())
 
 
 N = 100 * 1000 * 1000
@@ -39,7 +48,7 @@ def primesToN2(n):
 def run(func):
     start_time = time.time()
     func(N)
-    print 'time: %.6f sec\n' % (time.time() - start_time)
+    print('time: %.6f sec\n' % (time.time() - start_time))
 
 run(primesToN1)
 run(primesToN2)
diff --git a/setup.py b/setup.py
index 5eae52743..933793200 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,10 @@
 
 
 kwds = {}
-kwds['long_description'] = open('README.rst').read()
+try:
+    kwds['long_description'] = open('README.rst').read()
+except IOError:
+    pass
 
 # Read version from bitarray/__init__.py
 pat = re.compile(r'__version__\s*=\s*(\S+)', re.M)
@@ -25,15 +28,15 @@
         "Operating System :: OS Independent",
         "Programming Language :: C",
         "Programming Language :: Python :: 2",
-        "Programming Language :: Python :: 2.4",
-        "Programming Language :: Python :: 2.5",
         "Programming Language :: Python :: 2.6",
         "Programming Language :: Python :: 2.7",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.1",
-        "Programming Language :: Python :: 3.2",
         "Programming Language :: Python :: 3.3",
         "Programming Language :: Python :: 3.4",
+        "Programming Language :: Python :: 3.5",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
         "Topic :: Utilities",
     ],
     description = "efficient arrays of booleans -- C extension",
diff --git a/update_readme.py b/update_readme.py
index c4bbc7dba..a08caccd0 100644
--- a/update_readme.py
+++ b/update_readme.py
@@ -1,7 +1,10 @@
-import os
+import sys
+if not sys.version_info[0] == 3:
+    sys.exit("This program only runs with Python 3, sorry :-(")
+
 import re
 import doctest
-from cStringIO import StringIO
+from io import StringIO
 
 import bitarray
 
@@ -85,13 +88,12 @@ def main():
     fo.close()
 
     if new_data == data:
-        print "already up-to-date"
+        print("already up-to-date")
     else:
         with open('README.rst', 'w') as f:
             f.write(new_data)
 
     doctest.testfile('README.rst')
-    os.system('rst2html.py README.rst >README.html')
 
 
 if __name__ == '__main__':