From 5ac5b54b16b6a28bb68b55f5adda9b8dfc62b20d Mon Sep 17 00:00:00 2001 From: flowerhack Date: Tue, 7 Oct 2014 13:40:54 -0400 Subject: [PATCH 001/185] Minor clarity/wording changes to README --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index b98efea73..5b105dd68 100644 --- a/README.rst +++ b/README.rst @@ -5,7 +5,7 @@ bitarray: efficient arrays of booleans This module provides an object type which efficiently represents an array of booleans. Bitarrays are sequence types and behave very much like usual lists. Eight bits are represented by one byte in a contiguous block of -memory. The user can select between two representations; little-endian +memory. The user can select between two representations: little-endian and big-endian. All of the functionality is implemented in C. Methods for accessing the machine representation are provided. This can be useful when bit level access to binary files is required, @@ -80,9 +80,9 @@ Using the module ---------------- As mentioned above, bitarray objects behave very much like lists, so -there is not too new to learn. The biggest difference to list objects +there is not too much to learn. The biggest difference from list objects is the ability to access the machine representation of the object. -When doing so, the bit endianness is of importance, this issue is +When doing so, the bit endianness is of importance; this issue is explained in detail in the section below. Here, we demonstrate the basic usage of bitarray objects: @@ -233,11 +233,11 @@ endianness: >>> a.tobytes() == b.tobytes() True -The default bit endianness is currently big-endian, however this may change +The default bit endianness is currently big-endian; however, this may change in the future, and when dealing with the machine representation of bitarray objects, it is recommended to always explicitly specify the endianness. -Unless, explicitly converting to machine representation, using +Unless explicitly converting to machine representation, using the ``tobytes``, ``frombytes``, ``tofile`` and ``fromfile`` methods, the bit endianness will have no effect on any computation, and one can safely ignore setting the endianness, and other details of this section. From 60864a63a842b1dd9799a7a908bb842d2166da48 Mon Sep 17 00:00:00 2001 From: "David R. MacIver" Date: Fri, 3 Jul 2015 15:05:20 +0100 Subject: [PATCH 002/185] Handle extending self correctly Pythons list implementation has a special case to ensure that x.extend(x) is the same as x.extend(list(x)). i.e. it doesn't notice changes to the size of the list during the extend. Previously this case would not have been handled correctly by bitarray. This changes that by checking what the size of the other array is before we make any changes. This ensures both that this works correctly and also (importantly) that we don't try to write past the end of the array. --- bitarray/_bitarray.c | 9 ++++++++- bitarray/test_bitarray.py | 5 +++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index d2c19cb9f..fb9375c89 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -574,11 +574,18 @@ extend_bitarray(bitarrayobject *self, bitarrayobject *other) if (other->nbits == 0) return 0; + /* + Note: other may be self. Thus we take the size before we change the + size, ensuring we only copy the right parts of the array. + */ + + idx_t n_other_bits = other->nbits; + n_sum = self->nbits + other->nbits; if (resize(self, n_sum) < 0) return -1; - copy_n(self, n_sum - other->nbits, other, 0, other->nbits); + copy_n(self, n_sum - other->nbits, other, 0, n_other_bits); return 0; } diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 83aa3487c..36504bfbb 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1211,6 +1211,11 @@ def test_string01(self): self.assertEqual(c.tolist(), a + b) self.check_obj(c) + def test_extend_self(self): + a = bitarray('1') + a.extend(a) + self.assertEqual(a, bitarray('11')) + tests.append(ExtendTests) From 1298bfcc2d99550172d60aa087977a8a06d3e50b Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Sun, 29 Nov 2015 08:22:37 +0100 Subject: [PATCH 003/185] Fixed minor doc typo. --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index b98efea73..f33a16b2f 100644 --- a/README.rst +++ b/README.rst @@ -5,7 +5,7 @@ bitarray: efficient arrays of booleans This module provides an object type which efficiently represents an array of booleans. Bitarrays are sequence types and behave very much like usual lists. Eight bits are represented by one byte in a contiguous block of -memory. The user can select between two representations; little-endian +memory. The user can select between two representations: little-endian and big-endian. All of the functionality is implemented in C. Methods for accessing the machine representation are provided. This can be useful when bit level access to binary files is required, @@ -156,8 +156,8 @@ Bit endianness -------------- Since a bitarray allows addressing of individual bits, where the machine -represents 8 bits in one byte, there two obvious choices for this mapping; -little- and big-endian. +represents 8 bits in one byte, there are two obvious choices for this +mapping: little- and big-endian. When creating a new bitarray object, the endianness can always be specified explicitly: From 1241055ef0b6df373a71a7678d13cb4ab034e052 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 20 Jan 2016 09:38:11 -0600 Subject: [PATCH 004/185] add official support for Python 3.5 --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 5eae52743..20f98ef52 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", "Topic :: Utilities", ], description = "efficient arrays of booleans -- C extension", From a1646c01c977894df68ab2e7e94d0de82d9b9a37 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 7 Mar 2016 00:39:25 -0600 Subject: [PATCH 005/185] improve test output --- bitarray/test_bitarray.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 83aa3487c..44de2f025 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -2144,9 +2144,9 @@ def test_write(self): # --------------------------------------------------------------------------- def run(verbosity=1, repeat=1): - print('bitarray is installed in: ' + os.path.dirname(__file__)) - print('bitarray version: ' + __version__) - print(sys.version) + print('bitarray is installed in: %s' % os.path.dirname(__file__)) + print('bitarray version: %s' % __version__) + print('Python version: %s' % sys.version) suite = unittest.TestSuite() for cls in tests: From e0ee1d3ea862cabf618a491e09a68a3d3f4fe40d Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 25 Dec 2016 00:27:35 -0600 Subject: [PATCH 006/185] add official Python 3.6 support --- CHANGE_LOG | 3 ++- setup.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index fe6621a54..d485ed31d 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,5 +1,6 @@ -2013-XX-XX 0.8.2: +2017-XX-XX 0.8.2: ------------------- + * add official Python 3.6 support (although it was already working) 2013-03-30 0.8.1: diff --git a/setup.py b/setup.py index 20f98ef52..cb9f06739 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", "Topic :: Utilities", ], description = "efficient arrays of booleans -- C extension", From 779159affb9120ac1b8a4e21322559c7d6729ca7 Mon Sep 17 00:00:00 2001 From: Dusan Klinec Date: Tue, 17 Jan 2017 14:51:57 +0100 Subject: [PATCH 007/185] copy_n: fast copy with memmove fixed - fixed a problem in cases a < b when self == other. Source memory was overwritten. --- bitarray/_bitarray.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index d2c19cb9f..187679670 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -235,24 +235,30 @@ copy_n(bitarrayobject *self, idx_t a, assert(0 <= a && a <= self->nbits - n); assert(0 <= b && b <= other->nbits - n); - /* XXX if (self->endian == other->endian && a % 8 == 0 && b % 8 == 0 && n >= 8) { - Py_ssize_t bytes; - idx_t bits; + const Py_ssize_t bytes = (Py_ssize_t) n / 8; + const idx_t bits = bytes * 8; + + if (a <= b){ + memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes); + } + + if (n != bits) { + copy_n(self, bits + a, other, bits + b, n - bits); + } + + if (a > b){ + memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes); + } - bytes = n / 8; - bits = 8 * bytes; - copy_n(self, bits + a, other, bits + b, n - bits); - memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes); return; } - */ /* the different type of looping is only relevant when other and self are the same object, i.e. when copying a piece of an bitarrayobject onto itself */ - if (a < b) { + if (a <= b) { for (i = 0; i < n; i++) /* loop forward (delete) */ setbit(self, i + a, GETBIT(other, i + b)); } From f2332d434fb8b4c3db68362acf44ed5d79d84f19 Mon Sep 17 00:00:00 2001 From: Dusan Klinec Date: Tue, 17 Jan 2017 14:55:33 +0100 Subject: [PATCH 008/185] copy_n: if n is null return immediately --- bitarray/_bitarray.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 187679670..ad7b3bcc7 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -234,6 +234,9 @@ copy_n(bitarrayobject *self, idx_t a, assert(0 <= n && n <= self->nbits && n <= other->nbits); assert(0 <= a && a <= self->nbits - n); assert(0 <= b && b <= other->nbits - n); + if (n == 0){ + return; + } if (self->endian == other->endian && a % 8 == 0 && b % 8 == 0 && n >= 8) { From 736239ccb904a85f2cc6c304cb850387ed9b93b0 Mon Sep 17 00:00:00 2001 From: Dusan Klinec Date: Tue, 17 Jan 2017 14:59:26 +0100 Subject: [PATCH 009/185] randomised tests - more rounds, helped with debugging fast_copy in copy_n --- bitarray/test_bitarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 44de2f025..8c0b5f5dd 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -515,7 +515,7 @@ def test_setitem4(self): for a in self.randombitarrays(): la = len(a) if la == 0: continue - for dum in range(3): + for dum in range(50): step = self.rndsliceidx(la) if step == 0: step = None s = slice(self.rndsliceidx(la), @@ -574,7 +574,7 @@ def test_delitem2(self): for a in self.randombitarrays(): la = len(a) if la == 0: continue - for dum in range(10): + for dum in range(50): step = self.rndsliceidx(la) if step == 0: step = None s = slice(self.rndsliceidx(la), From cb4570de38efc5c5d46dfa065c5f8763637f5f4d Mon Sep 17 00:00:00 2001 From: Matthew Joyce Date: Wed, 26 Apr 2017 14:17:44 +0100 Subject: [PATCH 010/185] Fix invalid pointer exceptions in pypy --- bitarray/_bitarray.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index d2c19cb9f..3181010c5 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -3034,6 +3034,8 @@ init_bitarray(void) PyObject *m; Py_TYPE(&Bitarraytype) = &PyType_Type; + Py_TYPE(&SearchIter_Type) = &PyType_Type; + Py_TYPE(&DecodeIter_Type) = &PyType_Type; Py_TYPE(&BitarrayIter_Type) = &PyType_Type; #ifdef IS_PY3K m = PyModule_Create(&moduledef); From 6fdfb105f5a75c1fb99933a2ffec81cce0c71cc7 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Sun, 14 Jan 2018 14:14:51 -0800 Subject: [PATCH 011/185] DOC: improve description of fill() The previous wording made it sound like the function did the opposite of what it actually does. --- README.rst | 2 +- bitarray/_bitarray.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index f33a16b2f..7c866c21a 100644 --- a/README.rst +++ b/README.rst @@ -389,7 +389,7 @@ Reference ``fill()`` -> int Adds zeros to the end of the bitarray, such that the length of the bitarray - is not a multiple of 8. Returns the number of bits added (0..7). + will be a multiple of 8. Returns the number of bits added (0..7). ``frombytes(bytes)`` diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index d2c19cb9f..1d3ab86d7 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1267,7 +1267,7 @@ PyDoc_STRVAR(fill_doc, "fill() -> int\n\ \n\ Adds zeros to the end of the bitarray, such that the length of the bitarray\n\ -is not a multiple of 8. Returns the number of bits added (0..7)."); +will be a multiple of 8. Returns the number of bits added (0..7)."); static PyObject * From 2b5712f7ae103d64d7145181022e5d3a7422f6cd Mon Sep 17 00:00:00 2001 From: yoch Date: Fri, 11 May 2018 02:04:22 +0300 Subject: [PATCH 012/185] Efficient decode and iterdecode (use raw C tree) --- bitarray/__init__.py | 25 +------ bitarray/_bitarray.c | 175 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 143 insertions(+), 57 deletions(-) diff --git a/bitarray/__init__.py b/bitarray/__init__.py index 7ccea7c64..9cead18d4 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -13,27 +13,6 @@ __version__ = '0.8.2' -def _tree_insert(tree, sym, ba): - """ - Insert symbol which is mapped to bitarray into tree - """ - v = ba[0] - if len(ba) > 1: - if tree[v] == []: - tree[v] = [[], []] - _tree_insert(tree[v], sym, ba[1:]) - else: - if tree[v] != []: - raise ValueError("prefix code ambiguous") - tree[v] = sym - -def _mk_tree(codedict): - # Generate tree from codedict - tree = [[], []] - for sym, ba in codedict.items(): - _tree_insert(tree, sym, ba) - return tree - def _check_codedict(codedict): if not isinstance(codedict, dict): raise TypeError("dictionary expected") @@ -99,7 +78,7 @@ def decode(self, codedict): Given a prefix code (a dict mapping symbols to bitarrays), decode the content of the bitarray and return the list of symbols.""" _check_codedict(codedict) - return self._decode(_mk_tree(codedict)) + return self._decode(codedict) def iterdecode(self, codedict): """iterdecode(code) -> iterator @@ -107,7 +86,7 @@ def iterdecode(self, codedict): Given a prefix code (a dict mapping symbols to bitarrays), decode the content of the bitarray and iterate over the symbols.""" _check_codedict(codedict) - return self._iterdecode(_mk_tree(codedict)) + return self._iterdecode(codedict) def encode(self, codedict, iterable): """encode(code, iterable) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index d2c19cb9f..08731df37 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2142,51 +2142,154 @@ PyDoc_STRVAR(encode_doc, like the encode method without code checking"); -/* return the leave node resulting from traversing the (binary) tree, - or, when the iteration is finished, NULL -*/ -static PyObject * -tree_traverse(bitarrayobject *self, idx_t *indexp, PyObject *tree) +/* Binary Tree definition */ +typedef struct _bin_node { - PyObject *subtree; - long vi; + PyObject * symbol; + struct _bin_node * child[2]; +} binode; + + +static binode * +new_binode(void) +{ + binode * nd = malloc(sizeof *nd); + nd->symbol = NULL; + nd->child[0] = NULL; + nd->child[1] = NULL; + return nd; +} + +static void +delete_binode_tree(binode * root) +{ + if (!root) return; + delete_binode_tree(root->child[0]); + delete_binode_tree(root->child[1]); + free(root); +} +static int +insert_symbol(binode * root, bitarrayobject * self, PyObject * symbol) +{ + binode * nd = root, * prev = NULL; + for (Py_ssize_t i=0; i < self->nbits; ++i) + { + unsigned char k = GETBIT(self, i); + prev = nd, nd = nd->child[k]; + if (!nd) + { + nd = prev->child[k] = new_binode(); + } + } + + if (nd->symbol) + { + PyErr_SetString(PyExc_ValueError, + "prefix code ambiguous"); + return -1; + } + nd->symbol = symbol; + return 0; +} + +static binode * +make_tree (PyObject * codedict) +{ + binode * root = new_binode(); + + PyObject *symbol; + PyObject * array; + Py_ssize_t pos = 0; + + while (PyDict_Next(codedict, &pos, &symbol, &array)) { + int ok = insert_symbol(root, (bitarrayobject*) array, symbol); + /* if an error occured */ + if (ok < 0) { + delete_binode_tree(root); + return NULL; + } + } + + return root; +} + +static PyObject * +tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) +{ if (*indexp == self->nbits) /* stop iterator */ return NULL; - vi = GETBIT(self, *indexp); - (*indexp)++; - subtree = PyList_GetItem(tree, vi); + binode * nd = tree; - if (PyList_Check(subtree) && PyList_Size(subtree) == 2) - return tree_traverse(self, indexp, subtree); - else - return subtree; + while (1) + { + unsigned char k = GETBIT(self, *indexp); + (*indexp)++; + + nd = nd->child[k]; + + if (!nd) + { + PyErr_SetString(PyExc_ValueError, + "prefix code does not match data in bitarray"); + return NULL; + } + + if (nd->symbol) // leaf + { + return nd->symbol; + } + } } -#define IS_EMPTY_LIST(x) (PyList_Check(x) && PyList_Size(x) == 0) static PyObject * -bitarray_decode(bitarrayobject *self, PyObject *tree) +bitarray_decode(bitarrayobject *self, PyObject * codedict) { - PyObject *symbol, *list; - idx_t index = 0; + binode * tree = make_tree(codedict); + if (PyErr_Occurred()) + { + return NULL; + } + + binode * nd = tree; + + PyObject *list; list = PyList_New(0); if (list == NULL) return NULL; - /* traverse binary tree and append symbols to the result list */ - while ((symbol = tree_traverse(self, &index, tree)) != NULL) { - if (IS_EMPTY_LIST(symbol)) { + + for (Py_ssize_t i=0; i < self->nbits; ++i) { + unsigned char k = GETBIT(self, i); + + nd = nd->child[k]; + + if (!nd) { PyErr_SetString(PyExc_ValueError, "prefix code does not match data in bitarray"); goto error; } - if (PyList_Append(list, symbol) < 0) - goto error; + + if (nd->symbol) { + if (PyList_Append(list, nd->symbol) < 0) + goto error; + nd = tree; + } } + + if (nd != tree) { + PyErr_SetString(PyExc_ValueError, + "decoding not terminated"); + goto error; + } + + delete_binode_tree(tree); return list; + error: + delete_binode_tree(tree); Py_DECREF(list); return NULL; } @@ -2199,10 +2302,11 @@ symbols."); /*********************** (Bitarray) Decode Iterator *********************/ + typedef struct { PyObject_HEAD bitarrayobject *bao; /* bitarray we're searching in */ - PyObject *tree; /* prefix tree containing symbols */ + binode *tree; /* prefix tree containing symbols */ idx_t index; /* current index in bitarray */ } decodeiterobject; @@ -2210,20 +2314,28 @@ static PyTypeObject DecodeIter_Type; #define DecodeIter_Check(op) PyObject_TypeCheck(op, &DecodeIter_Type) + + /* create a new initialized bitarray search iterator object */ static PyObject * -bitarray_iterdecode(bitarrayobject *self, PyObject *tree) +bitarray_iterdecode(bitarrayobject *self, PyObject * codedict) { decodeiterobject *it; /* iterator to be returned */ + + binode *tree = make_tree(codedict); + if (PyErr_Occurred()) + { + return NULL; + } it = PyObject_GC_New(decodeiterobject, &DecodeIter_Type); if (it == NULL) return NULL; + it->tree = tree; + Py_INCREF(self); it->bao = self; - Py_INCREF(tree); - it->tree = tree; it->index = 0; PyObject_GC_Track(it); return (PyObject *) it; @@ -2242,13 +2354,8 @@ decodeiter_next(decodeiterobject *it) assert(DecodeIter_Check(it)); symbol = tree_traverse(it->bao, &(it->index), it->tree); - if (symbol == NULL) /* stop iteration */ - return NULL; - if (IS_EMPTY_LIST(symbol)) { - PyErr_SetString(PyExc_ValueError, - "prefix code does not match data in bitarray"); + if (symbol == NULL) /* stop iteration OR error occured */ return NULL; - } Py_INCREF(symbol); return symbol; } @@ -2256,9 +2363,9 @@ decodeiter_next(decodeiterobject *it) static void decodeiter_dealloc(decodeiterobject *it) { + delete_binode_tree(it->tree); PyObject_GC_UnTrack(it); Py_XDECREF(it->bao); - Py_XDECREF(it->tree); PyObject_GC_Del(it); } From c636f0cc386c9ded9f31947bbd74affccc93c21a Mon Sep 17 00:00:00 2001 From: yoch Date: Mon, 14 May 2018 21:55:00 +0300 Subject: [PATCH 013/185] Adding buffer protocol support for Python 3 --- bitarray/_bitarray.c | 12 ++++++++++-- bitarray/test_bitarray.py | 14 +++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index d2c19cb9f..be6b379f8 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -48,7 +48,7 @@ int PyIndex_Check(PyObject *o) #define Py_SIZE(ob) (((PyVarObject *) (ob))->ob_size) #endif -#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION == 7 +#if PY_MAJOR_VERSION == 3 || (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION == 7) /* (new) buffer protocol */ #define WITH_BUFFER #endif @@ -2787,6 +2787,8 @@ static PyTypeObject BitarrayIter_Type = { /********************* Bitarray Buffer Interface ************************/ #ifdef WITH_BUFFER + +#if PY_MAJOR_VERSION == 2 static Py_ssize_t bitarray_buffer_getreadbuf(bitarrayobject *self, Py_ssize_t index, const void **ptr) @@ -2831,6 +2833,8 @@ bitarray_buffer_getcharbuf(bitarrayobject *self, return Py_SIZE(self); } +#endif + static int bitarray_getbuffer(bitarrayobject *self, Py_buffer *view, int flags) { @@ -2857,14 +2861,18 @@ bitarray_releasebuffer(bitarrayobject *self, Py_buffer *view) } static PyBufferProcs bitarray_as_buffer = { +#if PY_MAJOR_VERSION == 2 // old buffer protocol (readbufferproc) bitarray_buffer_getreadbuf, (writebufferproc) bitarray_buffer_getwritebuf, (segcountproc) bitarray_buffer_getsegcount, (charbufferproc) bitarray_buffer_getcharbuf, +#endif (getbufferproc) bitarray_getbuffer, (releasebufferproc) bitarray_releasebuffer, }; + #endif /* WITH_BUFFER */ + /************************** Bitarray Type *******************************/ static PyTypeObject Bitarraytype = { @@ -2898,7 +2906,7 @@ static PyTypeObject Bitarraytype = { 0, /* tp_as_buffer */ #endif Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS -#ifdef WITH_BUFFER +#if defined(WITH_BUFFER) && PY_MAJOR_VERSION == 2 | Py_TPFLAGS_HAVE_NEWBUFFER #endif , /* tp_flags */ diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 44de2f025..b72b55451 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -2113,10 +2113,10 @@ def test_read1(self): a = bitarray('01000001' '01000010' '01000011', endian='big') v = memoryview(a) self.assertEqual(len(v), 3) - self.assertEqual(v[0], 'A') - self.assertEqual(v[:].tobytes(), 'ABC') + #self.assertEqual(v[0], 'A') + self.assertEqual(v[:].tobytes(), b'ABC') a[13] = 1 - self.assertEqual(v[:].tobytes(), 'AFC') + self.assertEqual(v[:].tobytes(), b'AFC') def test_read2(self): a = bitarray([randint(0, 1) for d in range(8000)]) @@ -2131,14 +2131,14 @@ def test_write(self): a.setall(0) v = memoryview(a) self.assertFalse(v.readonly) - v[50000] = '\xff' + v[50000] = 255 if is_py3k else '\xff' self.assertEqual(a[399999:400009], bitarray('0111111110')) a[400003] = 0 self.assertEqual(a[399999:400009], bitarray('0111011110')) - v[30001:30004] = 'ABC' - self.assertEqual(a[240000:240040].tobytes(), '\x00ABC\x00') + v[30001:30004] = b'ABC' + self.assertEqual(a[240000:240040].tobytes(), b'\x00ABC\x00') -if sys.version_info[:2] == (2, 7): +if sys.version_info[:2] >= (2, 7): tests.append(BufferInterfaceTests) # --------------------------------------------------------------------------- From 53fdf77218f3afb34c4167f6e873511ee46f40e8 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 May 2018 13:44:27 -0500 Subject: [PATCH 014/185] update changelog --- CHANGE_LOG | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index d485ed31d..152a2e8a4 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,6 +1,7 @@ -2017-XX-XX 0.8.2: +2018-XX-XX 0.8.2: ------------------- * add official Python 3.6 support (although it was already working) + * fix description of fill(), #52 2013-03-30 0.8.1: From 1322a28bfba843d588f84503b69d69f422042ad0 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 May 2018 13:52:36 -0500 Subject: [PATCH 015/185] update changelog --- CHANGE_LOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGE_LOG b/CHANGE_LOG index 152a2e8a4..852e63bbb 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -2,6 +2,7 @@ ------------------- * add official Python 3.6 support (although it was already working) * fix description of fill(), #52 + * handle extending self correctly, #28 2013-03-30 0.8.1: From 8f4d51ba8d1ba09511dc3812a73bf2a0ac6f732e Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 May 2018 14:57:23 -0500 Subject: [PATCH 016/185] declare variable on the top of the function --- bitarray/_bitarray.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 4a2e339a7..e2e02f8da 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -570,22 +570,21 @@ static int extend_bitarray(bitarrayobject *self, bitarrayobject *other) { idx_t n_sum; + idx_t n_other_bits; if (other->nbits == 0) return 0; - /* - Note: other may be self. Thus we take the size before we change the - size, ensuring we only copy the right parts of the array. - */ - - idx_t n_other_bits = other->nbits; - n_sum = self->nbits + other->nbits; if (resize(self, n_sum) < 0) return -1; - copy_n(self, n_sum - other->nbits, other, 0, n_other_bits); + /* + Note: other may be self. Thus we take the size before we change the + size, ensuring we only copy the right parts of the array. + */ + n_other_bits = other->nbits; + copy_n(self, n_sum - n_other_bits, other, 0, n_other_bits); return 0; } From 2e98d1239b5cdbf3e6d5709f710584a4006e6da7 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 May 2018 15:09:43 -0500 Subject: [PATCH 017/185] update changelog; cleanup --- CHANGE_LOG | 1 + bitarray/_bitarray.c | 7 ++----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 852e63bbb..5668c72ba 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -3,6 +3,7 @@ * add official Python 3.6 support (although it was already working) * fix description of fill(), #52 * handle extending self correctly, #28 + * copy_n: fast copy with memmove fixed, #43 2013-03-30 0.8.1: diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index df058b313..c07a553c2 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -243,18 +243,15 @@ copy_n(bitarrayobject *self, idx_t a, const Py_ssize_t bytes = (Py_ssize_t) n / 8; const idx_t bits = bytes * 8; - if (a <= b){ + if (a <= b) { memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes); } - if (n != bits) { copy_n(self, bits + a, other, bits + b, n - bits); } - - if (a > b){ + if (a > b) { memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes); } - return; } From f9126221c23672161f7602ad1e1b97f5a7d2628c Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 May 2018 15:24:18 -0500 Subject: [PATCH 018/185] update changelog --- CHANGE_LOG | 1 + README.rst | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 5668c72ba..2e0a253f0 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -4,6 +4,7 @@ * fix description of fill(), #52 * handle extending self correctly, #28 * copy_n: fast copy with memmove fixed, #43 + * minor clarity/wording changes to README, #23 2013-03-30 0.8.1: diff --git a/README.rst b/README.rst index af8a415ce..169b238ea 100644 --- a/README.rst +++ b/README.rst @@ -537,8 +537,13 @@ Reference Change log ---------- -2013-XX-XX 0.8.2: +2018-XX-XX 0.8.2: + * add official Python 3.6 support (although it was already working) + * fix description of fill(), #52 + * handle extending self correctly, #28 + * copy_n: fast copy with memmove fixed, #43 + * minor clarity/wording changes to README, #23 **0.8.1** (2013-03-30): From 68c0233d252bea027172138300682492c97c1c92 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 May 2018 15:47:59 -0500 Subject: [PATCH 019/185] we must take the size before resizing --- bitarray/_bitarray.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index c07a553c2..863d29f80 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -234,7 +234,7 @@ copy_n(bitarrayobject *self, idx_t a, assert(0 <= n && n <= self->nbits && n <= other->nbits); assert(0 <= a && a <= self->nbits - n); assert(0 <= b && b <= other->nbits - n); - if (n == 0){ + if (n == 0) { return; } @@ -581,15 +581,16 @@ extend_bitarray(bitarrayobject *self, bitarrayobject *other) if (other->nbits == 0) return 0; + /* + Note: other may be self. Thus we take the size before we resize, + ensuring we only copy the right parts of the array. + */ + n_other_bits = other->nbits; n_sum = self->nbits + other->nbits; + if (resize(self, n_sum) < 0) return -1; - /* - Note: other may be self. Thus we take the size before we change the - size, ensuring we only copy the right parts of the array. - */ - n_other_bits = other->nbits; copy_n(self, n_sum - n_other_bits, other, 0, n_other_bits); return 0; } From f490541277e7731b2039f9b8f11378eed7fad319 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 May 2018 15:48:08 -0500 Subject: [PATCH 020/185] add tests --- bitarray/test_bitarray.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 1b1f1ba8f..ee5e4469c 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1212,10 +1212,18 @@ def test_string01(self): self.check_obj(c) def test_extend_self(self): + a = bitarray('') + a.extend(a) + self.assertEqual(a, bitarray('')) + a = bitarray('1') a.extend(a) self.assertEqual(a, bitarray('11')) + a = bitarray('110') + a.extend(a) + self.assertEqual(a, bitarray('110110')) + tests.append(ExtendTests) From 9222bad47028f3bb19a583bd119a9284d4db9f98 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 May 2018 19:47:46 -0500 Subject: [PATCH 021/185] add self extend test --- bitarray/test_bitarray.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index ee5e4469c..6ee3bdb16 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1224,6 +1224,11 @@ def test_extend_self(self): a.extend(a) self.assertEqual(a, bitarray('110110')) + for a in self.randombitarrays(): + b = bitarray(a) + a.extend(a) + self.assertEqual(a, b + b) + tests.append(ExtendTests) From 75376c6713a3b2b6ab503dba0d9fa0d84be15083 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 May 2018 19:49:32 -0500 Subject: [PATCH 022/185] simplify test --- bitarray/test_bitarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 6ee3bdb16..3a9cc8500 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1212,9 +1212,9 @@ def test_string01(self): self.check_obj(c) def test_extend_self(self): - a = bitarray('') + a = bitarray() a.extend(a) - self.assertEqual(a, bitarray('')) + self.assertEqual(a, bitarray()) a = bitarray('1') a.extend(a) From a4c8820eb5259650a89f2907b088569fab5e2257 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 30 May 2018 11:20:23 -0500 Subject: [PATCH 023/185] add release date --- CHANGE_LOG | 2 +- README.rst | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 2e0a253f0..aa432080c 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,4 +1,4 @@ -2018-XX-XX 0.8.2: +2018-05-30 0.8.2: ------------------- * add official Python 3.6 support (although it was already working) * fix description of fill(), #52 diff --git a/README.rst b/README.rst index 169b238ea..53b184c21 100644 --- a/README.rst +++ b/README.rst @@ -537,7 +537,7 @@ Reference Change log ---------- -2018-XX-XX 0.8.2: +**0.8.2** (2018-05-30): * add official Python 3.6 support (although it was already working) * fix description of fill(), #52 @@ -560,12 +560,5 @@ Change log between two bitarrays -**0.7.0** (2012-02-15): - - * add iterdecode method (C level), which returns an iterator but is - otherwise like the decode method - * improve memory efficiency and speed of pickling large bitarray objects - - Please find the complete change log `here `_. From 5fb0ecb9aa7725b70235f14665e3ce6923a2bc26 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 30 May 2018 13:12:38 -0500 Subject: [PATCH 024/185] update git ignore file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 33dab8536..5241ecd73 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *.egg-info *.so *.o +MANIFEST README.html build/ dist/ From fb35735a7c090bcb61d338f3b9615d91b61cab06 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 30 May 2018 13:14:22 -0500 Subject: [PATCH 025/185] add exception handling --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cb9f06739..785cbb3c4 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,10 @@ kwds = {} -kwds['long_description'] = open('README.rst').read() +try: + kwds['long_description'] = open('README.rst').read() +except FileNotFoundError: + pass # Read version from bitarray/__init__.py pat = re.compile(r'__version__\s*=\s*(\S+)', re.M) From 340c5fc4925e814bea57c26e98506eda12e41f05 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 30 May 2018 13:28:50 -0500 Subject: [PATCH 026/185] use IOError instead --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 785cbb3c4..5b4b9bef0 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ kwds = {} try: kwds['long_description'] = open('README.rst').read() -except FileNotFoundError: +except IOError: pass # Read version from bitarray/__init__.py From bfec9657e40a2465f61d54731746241f41595eeb Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 30 May 2018 14:49:27 -0500 Subject: [PATCH 027/185] prepare for next release --- CHANGE_LOG | 5 +++++ bitarray/__init__.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index aa432080c..461097495 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,3 +1,8 @@ +2018-XX-XX 0.8.3: +------------------- + * + + 2018-05-30 0.8.2: ------------------- * add official Python 3.6 support (although it was already working) diff --git a/bitarray/__init__.py b/bitarray/__init__.py index 7ccea7c64..0afa343eb 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -10,7 +10,7 @@ """ from bitarray._bitarray import _bitarray, bitdiff, bits2bytes, _sysinfo -__version__ = '0.8.2' +__version__ = '0.8.3' def _tree_insert(tree, sym, ba): From e0d724550577344a49ab0af9a553df11c7dc8869 Mon Sep 17 00:00:00 2001 From: wiggin15 Date: Thu, 14 Jun 2018 18:39:43 +0300 Subject: [PATCH 028/185] Get rid of warning during installation Installing with "easy_install" causes an installation warning: "module references __file__". zip_safe eliminates this message. --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 5b4b9bef0..403354598 100644 --- a/setup.py +++ b/setup.py @@ -43,6 +43,7 @@ ], description = "efficient arrays of booleans -- C extension", packages = ["bitarray"], + zip_safe = False, ext_modules = [Extension(name = "bitarray._bitarray", sources = ["bitarray/_bitarray.c"])], **kwds From 97f9ad5d866c4977e6fcdd49bf18116b4ef52040 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 6 Jul 2018 10:17:36 -0500 Subject: [PATCH 029/185] remove non-distutils option --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 403354598..5b4b9bef0 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,6 @@ ], description = "efficient arrays of booleans -- C extension", packages = ["bitarray"], - zip_safe = False, ext_modules = [Extension(name = "bitarray._bitarray", sources = ["bitarray/_bitarray.c"])], **kwds From 89ab5761d1fef9e1403caca088cb7f7ac00fc0c3 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 6 Jul 2018 11:11:50 -0500 Subject: [PATCH 030/185] update changelog --- CHANGE_LOG | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 461097495..ad77d4ce0 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,6 +1,6 @@ -2018-XX-XX 0.8.3: +2018-07-06 0.8.3: ------------------- - * + * add exception to setup.py when README.rst cannot be opened 2018-05-30 0.8.2: From 1110ae724a3ba802a2aa146b987740f85061ec3e Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 6 Jul 2018 10:41:54 -0500 Subject: [PATCH 031/185] update readme --- README.rst | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 53b184c21..31d78e499 100644 --- a/README.rst +++ b/README.rst @@ -50,8 +50,8 @@ Installation bitarray can be installed from source:: - $ tar xzf bitarray-0.8.2.tar.gz - $ cd bitarray-0.8.2 + $ tar xzf bitarray-0.8.3.tar.gz + $ cd bitarray-0.8.3 $ python setup.py install On Unix systems, the latter command may have to be executed with root @@ -62,7 +62,7 @@ Once you have installed the package, you may want to test it:: $ python -c 'import bitarray; bitarray.test()' bitarray is installed in: /usr/local/lib/python2.7/site-packages/bitarray - bitarray version: 0.8.2 + bitarray version: 0.8.3 2.7.2 (r271:86832, Nov 29 2010) [GCC 4.2.1 (SUSE Linux)] ......................................................................... ........................................... @@ -537,6 +537,11 @@ Reference Change log ---------- +**0.8.3** (2018-07-06): + + * add exception to setup.py when README.rst cannot be opened + + **0.8.2** (2018-05-30): * add official Python 3.6 support (although it was already working) @@ -553,12 +558,5 @@ Change log like int, long (on Python 2), float, list, tuple, dict -**0.8.0** (2012-04-04): - - * add Python 2.4 support - * add (module level) function bitdiff for calculating the difference - between two bitarrays - - Please find the complete change log `here `_. From 948db916da5453e1ee49f8af0cc343fe1c928ca3 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 13 Jul 2018 16:48:31 -0500 Subject: [PATCH 032/185] add official Python 3.7 support --- CHANGE_LOG | 5 +++++ setup.py | 3 +-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index ad77d4ce0..a718cfeb2 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,3 +1,8 @@ +2018-XX-XX 0.8.4: +------------------- + * add official Python 3.7 support (drop Python 3.1 and 3.2 support) + + 2018-07-06 0.8.3: ------------------- * add exception to setup.py when README.rst cannot be opened diff --git a/setup.py b/setup.py index 5b4b9bef0..646299912 100644 --- a/setup.py +++ b/setup.py @@ -33,12 +33,11 @@ "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.1", - "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", "Topic :: Utilities", ], description = "efficient arrays of booleans -- C extension", From e6c4d74da5a4590b96359fbf0b308439a421efb5 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 28 Feb 2019 23:31:49 -0600 Subject: [PATCH 033/185] add comment --- examples/ndarray.py | 1 - examples/sieve.py | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/ndarray.py b/examples/ndarray.py index a426458b4..d569aa898 100644 --- a/examples/ndarray.py +++ b/examples/ndarray.py @@ -17,4 +17,3 @@ c.pack(b.tostring()) assert a == c - diff --git a/examples/sieve.py b/examples/sieve.py index 4241903a1..327fc12f9 100644 --- a/examples/sieve.py +++ b/examples/sieve.py @@ -1,3 +1,7 @@ +""" +Demonstrates the implementation of "Sieve of Eratosthenes" algorithm for +finding all prime numbers up to any given limit. +""" import time import numpy From 5805b879910c67bd50976fda32f975481331e11a Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 17 Apr 2019 22:31:40 -0500 Subject: [PATCH 034/185] minor cleanup --- bitarray/_bitarray.c | 98 ++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 53 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index a36d62134..a93662c71 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2158,101 +2158,97 @@ like the encode method without code checking"); /* Binary Tree definition */ typedef struct _bin_node { - PyObject * symbol; - struct _bin_node * child[2]; + PyObject *symbol; + struct _bin_node *child[2]; } binode; -static binode * +static binode * new_binode(void) { - binode * nd = malloc(sizeof *nd); + binode *nd = malloc(sizeof *nd); + nd->symbol = NULL; nd->child[0] = NULL; nd->child[1] = NULL; return nd; } -static void -delete_binode_tree(binode * root) +static void +delete_binode_tree(binode *root) { - if (!root) return; + if (root == NULL) + return; + delete_binode_tree(root->child[0]); delete_binode_tree(root->child[1]); free(root); } static int -insert_symbol(binode * root, bitarrayobject * self, PyObject * symbol) +insert_symbol(binode *root, bitarrayobject *self, PyObject *symbol) { - binode * nd = root, * prev = NULL; - for (Py_ssize_t i=0; i < self->nbits; ++i) + binode *nd = root, *prev = NULL; + unsigned char k; + + for (Py_ssize_t i = 0; i < self->nbits; i++) { - unsigned char k = GETBIT(self, i); - prev = nd, nd = nd->child[k]; + k = GETBIT(self, i); + prev = nd; + nd = nd->child[k]; if (!nd) - { nd = prev->child[k] = new_binode(); - } } if (nd->symbol) { - PyErr_SetString(PyExc_ValueError, - "prefix code ambiguous"); + PyErr_SetString(PyExc_ValueError, "prefix code ambiguous"); return -1; } nd->symbol = symbol; return 0; } -static binode * -make_tree (PyObject * codedict) +static binode * +make_tree (PyObject *codedict) { - binode * root = new_binode(); - - PyObject *symbol; - PyObject * array; + binode *root; + PyObject *symbol, *array; Py_ssize_t pos = 0; + root = new_binode(); while (PyDict_Next(codedict, &pos, &symbol, &array)) { - int ok = insert_symbol(root, (bitarrayobject*) array, symbol); - /* if an error occured */ - if (ok < 0) { + if (insert_symbol(root, (bitarrayobject*) array, symbol) < 0) { delete_binode_tree(root); return NULL; } } - return root; } static PyObject * tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) { + binode *nd = tree; + unsigned char k; + if (*indexp == self->nbits) /* stop iterator */ return NULL; - binode * nd = tree; - - while (1) - { - unsigned char k = GETBIT(self, *indexp); + while (1) { + k = GETBIT(self, *indexp); (*indexp)++; - + nd = nd->child[k]; - if (!nd) - { + if (!nd) { PyErr_SetString(PyExc_ValueError, "prefix code does not match data in bitarray"); return NULL; } if (nd->symbol) // leaf - { return nd->symbol; - } } } @@ -2260,31 +2256,28 @@ tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) static PyObject * bitarray_decode(bitarrayobject *self, PyObject * codedict) { - binode * tree = make_tree(codedict); + binode *tree, *nd; + PyObject *list; + Py_ssize_t i; + unsigned char k; + + tree = make_tree(codedict); if (PyErr_Occurred()) - { return NULL; - } - - binode * nd = tree; - - PyObject *list; + nd = tree; list = PyList_New(0); if (list == NULL) return NULL; - for (Py_ssize_t i=0; i < self->nbits; ++i) { - unsigned char k = GETBIT(self, i); - + for (i = 0; i < self->nbits; i++) { + k = GETBIT(self, i); nd = nd->child[k]; - if (!nd) { PyErr_SetString(PyExc_ValueError, "prefix code does not match data in bitarray"); goto error; } - if (nd->symbol) { if (PyList_Append(list, nd->symbol) < 0) goto error; @@ -2331,15 +2324,14 @@ static PyTypeObject DecodeIter_Type; /* create a new initialized bitarray search iterator object */ static PyObject * -bitarray_iterdecode(bitarrayobject *self, PyObject * codedict) +bitarray_iterdecode(bitarrayobject *self, PyObject *codedict) { decodeiterobject *it; /* iterator to be returned */ - - binode *tree = make_tree(codedict); + binode *tree; + + tree = make_tree(codedict); if (PyErr_Occurred()) - { return NULL; - } it = PyObject_GC_New(decodeiterobject, &DecodeIter_Type); if (it == NULL) From 1b6a14819e7beda2c4b5aac1c6bbbfaf9fc3e9f4 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 17 Apr 2019 22:35:10 -0500 Subject: [PATCH 035/185] update changelog --- CHANGE_LOG | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index a718cfeb2..777c1a70e 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,5 +1,7 @@ -2018-XX-XX 0.8.4: +2019-XX-XX 0.9.0: ------------------- + * more efficient decode and iterdecode by using C-level binary tree + instead of a python one, #54 * add official Python 3.7 support (drop Python 3.1 and 3.2 support) From 5e4f5c0b3895802611ca8d1cab004aaf01d04eca Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 17 Apr 2019 23:02:32 -0500 Subject: [PATCH 036/185] declare variables in beginning of function, use int --- bitarray/_bitarray.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index a93662c71..183e189d0 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2189,9 +2189,10 @@ static int insert_symbol(binode *root, bitarrayobject *self, PyObject *symbol) { binode *nd = root, *prev = NULL; - unsigned char k; + Py_ssize_t i; + int k; - for (Py_ssize_t i = 0; i < self->nbits; i++) + for (i = 0; i < self->nbits; i++) { k = GETBIT(self, i); prev = nd; @@ -2218,7 +2219,7 @@ make_tree (PyObject *codedict) root = new_binode(); while (PyDict_Next(codedict, &pos, &symbol, &array)) { - if (insert_symbol(root, (bitarrayobject*) array, symbol) < 0) { + if (insert_symbol(root, (bitarrayobject *) array, symbol) < 0) { delete_binode_tree(root); return NULL; } @@ -2230,7 +2231,7 @@ static PyObject * tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) { binode *nd = tree; - unsigned char k; + int k; if (*indexp == self->nbits) /* stop iterator */ return NULL; @@ -2252,14 +2253,13 @@ tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) } } - static PyObject * bitarray_decode(bitarrayobject *self, PyObject * codedict) { binode *tree, *nd; PyObject *list; Py_ssize_t i; - unsigned char k; + int k; tree = make_tree(codedict); if (PyErr_Occurred()) From a1ad325ef7a5abc3505968d1c2a3669aca05bc3e Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 18 Apr 2019 00:23:03 -0500 Subject: [PATCH 037/185] Py3k fixes in bloom filer example --- examples/bloom.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/examples/bloom.py b/examples/bloom.py index 44290ce7c..425d34167 100644 --- a/examples/bloom.py +++ b/examples/bloom.py @@ -2,8 +2,15 @@ Demonstrates the implementation of a Bloom filter, see: http://en.wikipedia.org/wiki/Bloom_filter """ +from __future__ import print_function +import sys + +if sys.version_info > (3,): + long = int + xrange = range + import hashlib -from math import exp, log +from math import exp from bitarray import bitarray @@ -29,9 +36,9 @@ def _hashes(self, key): the m array positions with a uniform random distribution """ h = hashlib.new('md5') - h.update(str(key)) + h.update(str(key).encode()) x = long(h.hexdigest(), 16) - for _ in xrange(self.k): + for _unused in xrange(self.k): if x < self.m: h.update('.') x = long(h.hexdigest(), 16) @@ -46,11 +53,11 @@ def test_bloom(m, k, n): assert b.contains(i) p = (1.0 - exp(-k * (n + 0.5) / (m - 1))) ** k - print 100.0 * p, '%' + print(100.0 * p, '%') N = 100000 false_pos = sum(b.contains(i) for i in xrange(n, n + N)) - print 100.0 * false_pos / N, '%' + print(100.0 * false_pos / N, '%') if __name__ == '__main__': From 66ce1e5a5fcda4b4f4da6e882691b0af010a0d43 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 18 Apr 2019 00:40:30 -0500 Subject: [PATCH 038/185] make sieve example Py3k compatible --- examples/sieve.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/examples/sieve.py b/examples/sieve.py index 327fc12f9..65281b998 100644 --- a/examples/sieve.py +++ b/examples/sieve.py @@ -2,6 +2,11 @@ Demonstrates the implementation of "Sieve of Eratosthenes" algorithm for finding all prime numbers up to any given limit. """ +from __future__ import print_function +import sys +if sys.version_info > (3,): + xrange = range + import time import numpy @@ -11,31 +16,31 @@ def primesToN1(n): # use numpy: 8-bit array of boolean flags if n < 2: return [] - print 'init numpy' + print('init numpy') A = numpy.ones(n+1, numpy.bool) # set to 1 == True A[:2] = A[2*2::2] = 0 - print 'sieve' + print('sieve') for i in xrange(3, int(n**.5)+1, 2): # odd numbers if A[i]: # i is prime A[i*i::i*2] = 0 - print 'counting' - print numpy.sum(A) + print('counting') + print(numpy.sum(A)) def primesToN2(n): # use bitarray: 1-bit boolean flags if n < 2: return [] - print 'init bitarray' + print('init bitarray') A = bitarray.bitarray(n+1) A.setall(1) A[:2] = A[2*2::2] = 0 - print 'sieve' + print('sieve') for i in xrange(3, int(n**.5)+1, 2): # odd numbers if A[i]: # i is prime A[i*i::i*2] = 0 - print 'counting' - print A.count() + print('counting') + print(A.count()) N = 100 * 1000 * 1000 @@ -43,7 +48,7 @@ def primesToN2(n): def run(func): start_time = time.time() func(N) - print 'time: %.6f sec\n' % (time.time() - start_time) + print('time: %.6f sec\n' % (time.time() - start_time)) run(primesToN1) run(primesToN2) From 8944f9deb779a80d8fc87b4e5b25113ac2c251bc Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 18 Apr 2019 00:45:17 -0500 Subject: [PATCH 039/185] make ndarray example Py3k compatible --- examples/ndarray.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/ndarray.py b/examples/ndarray.py index d569aa898..9193e1033 100644 --- a/examples/ndarray.py +++ b/examples/ndarray.py @@ -2,15 +2,17 @@ # This example illusatrates how binary data can be efficiently be passed # between a bitarray object and an ndarray with dtype bool # +from __future__ import print_function + import bitarray import numpy a = bitarray.bitarray('100011001001') -print a +print(a) # bitarray -> ndarray b = numpy.fromstring(a.unpack(), dtype=bool) -print repr(b) +print(repr(b)) # ndarray -> bitarray c = bitarray.bitarray() From 587aead9d75fdfb86665c9dc7771e51f0af29376 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 18 Apr 2019 01:10:53 -0500 Subject: [PATCH 040/185] make compress example Py3k compatible --- examples/compress.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/compress.py b/examples/compress.py index 73b5bb045..83e018143 100644 --- a/examples/compress.py +++ b/examples/compress.py @@ -2,6 +2,8 @@ Demonstrates how the bz2 module may be used to create a compressed object which represents a bitarray. """ +from __future__ import print_function + import bz2 from bitarray import bitarray @@ -35,6 +37,6 @@ def decompress(obj): a.setall(0) a[::10] = True c = compress(a) - print c + print(c) b = decompress(c) assert a == b, a.endian() == b.endian() From f412869455bc577845f9dcf2a4515bca99012fdc Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 18 Apr 2019 01:35:49 -0500 Subject: [PATCH 041/185] add gene sequence example --- examples/gene.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 examples/gene.py diff --git a/examples/gene.py b/examples/gene.py new file mode 100644 index 000000000..ebd5e93b1 --- /dev/null +++ b/examples/gene.py @@ -0,0 +1,27 @@ +# gene sequence example from @yoch, see +# https://github.com/ilanschnell/bitarray/pull/54 + +from random import choice +from timeit import timeit + +from bitarray import bitarray + + +trans = { + "A": bitarray("00"), + "T": bitarray("01"), + "G": bitarray("10"), + "C": bitarray("11") +} + +N = 10000 +seq = [choice("ATGC") for _ in range(N)] + +arr = bitarray() +arr.encode(trans, seq) + +assert arr.decode(trans) == seq + +# decodage +t = timeit(lambda: arr.decode(trans), number=1000) +print(t) From c1c31574af5001a2c3ba906ac57efadb5b98d62b Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 18 Apr 2019 15:55:54 -0500 Subject: [PATCH 042/185] update changelog --- CHANGE_LOG | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGE_LOG b/CHANGE_LOG index 777c1a70e..d415fea97 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -2,6 +2,8 @@ ------------------- * more efficient decode and iterdecode by using C-level binary tree instead of a python one, #54 + * made all examples Py3k compatible + * add gene sequence example * add official Python 3.7 support (drop Python 3.1 and 3.2 support) From 1ad03ead3a02d16f27719932b64a493ee4232a52 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 18 Apr 2019 15:58:36 -0500 Subject: [PATCH 043/185] use Python memory manager and add error handling --- bitarray/_bitarray.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 183e189d0..7fb2202d6 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2166,8 +2166,13 @@ typedef struct _bin_node static binode * new_binode(void) { - binode *nd = malloc(sizeof *nd); + binode *nd; + nd = PyMem_Malloc(sizeof *nd); + if (nd == NULL) { + PyErr_NoMemory(); + return NULL; + } nd->symbol = NULL; nd->child[0] = NULL; nd->child[1] = NULL; @@ -2182,7 +2187,7 @@ delete_binode_tree(binode *root) delete_binode_tree(root->child[0]); delete_binode_tree(root->child[1]); - free(root); + PyMem_Free(root); } static int @@ -2192,17 +2197,19 @@ insert_symbol(binode *root, bitarrayobject *self, PyObject *symbol) Py_ssize_t i; int k; - for (i = 0; i < self->nbits; i++) - { + for (i = 0; i < self->nbits; i++) { k = GETBIT(self, i); prev = nd; nd = nd->child[k]; - if (!nd) - nd = prev->child[k] = new_binode(); + if (!nd) { + nd = new_binode(); + if (nd == NULL) + return -1; + prev->child[k] = nd; + } } - if (nd->symbol) - { + if (nd->symbol) { PyErr_SetString(PyExc_ValueError, "prefix code ambiguous"); return -1; } @@ -2218,6 +2225,9 @@ make_tree (PyObject *codedict) Py_ssize_t pos = 0; root = new_binode(); + if (root == NULL) + return NULL; + while (PyDict_Next(codedict, &pos, &symbol, &array)) { if (insert_symbol(root, (bitarrayobject *) array, symbol) < 0) { delete_binode_tree(root); From ad142a72e6873ce08c839eb2d41825c700bc2701 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 18 Apr 2019 15:59:52 -0500 Subject: [PATCH 044/185] improve examples for Py3k --- examples/decoding.py | 2 +- examples/huffman.py | 36 ++++++++++++++++++------------------ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/examples/decoding.py b/examples/decoding.py index b47c8ce58..f75fe47ed 100644 --- a/examples/decoding.py +++ b/examples/decoding.py @@ -13,7 +13,7 @@ def traverse(it, tree): except StopIteration: return False - if isinstance(subtree, list) and len(subtree)==2: + if isinstance(subtree, list) and len(subtree) == 2: return traverse(it, subtree) else: # leave node return subtree diff --git a/examples/huffman.py b/examples/huffman.py index e974bb976..dda04a271 100644 --- a/examples/huffman.py +++ b/examples/huffman.py @@ -23,6 +23,7 @@ def huffCode(freq): while len(minheap) > 1: childR, childL = heappop(minheap), heappop(minheap) parent = (childL[0] + childR[0], childL, childR) + #print(minheap, parent) heappush(minheap, parent) # Now minheap[0] is the root node of the Huffman tree @@ -54,37 +55,36 @@ def print_code(filename): freq = freq_string(open(filename).read()) code = huffCode(freq) print(' char frequency Huffman code') - print(70*'-') + print(70 * '-') for c in sorted(code, key=lambda c: freq[c], reverse=True): print('%7r %8i %s' % (c, freq[c], code[c].to01())) def encode(filename): - s = open(filename, 'rb').read() + with open(filename, 'rb') as fi: + s = fi.read() code = huffCode(freq_string(s)) - fo = open(filename + '.huff', 'wb') - fo.write(repr(code) + '\n') - a = bitarray(endian='little') - a.encode(code, s) - fo.write(str(a.buffer_info()[3])) # write unused bits as one char string - a.tofile(fo) - fo.close() + with open(filename + '.huff', 'wb') as fo: + fo.write(repr(code) + '\n') + a = bitarray(endian='little') + a.encode(code, s) + fo.write(str(a.buffer_info()[3])) # write unused bits as one char string + a.tofile(fo) print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(s))) def decode(filename): - fi = open(filename, 'rb') - code = eval(fi.readline()) - u = int(fi.read(1)) # number of unused bits in last byte stored in file - a = bitarray(endian='little') - a.fromfile(fi) - fi.close() + with open(filename, 'rb') as fi: + code = eval(fi.readline()) + u = int(fi.read(1)) # number of unused bits in last byte stored in file + a = bitarray(endian='little') + a.fromfile(fi) + if u: del a[-u:] assert filename.endswith('.huff') - fo = open(filename[:-5] + '.out', 'wb') - fo.write(''.join(a.decode(code))) - fo.close() + with open(filename[:-5] + '.out', 'wb') as fo: + fo.write(''.join(a.decode(code))) def usage(): From 3ccf66a10eee2c662dab8204ce8a04b96eb50d79 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 18 Apr 2019 16:09:04 -0500 Subject: [PATCH 045/185] handle NULL return value --- bitarray/_bitarray.c | 6 +++--- examples/gene.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 7fb2202d6..fdb2869fc 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2218,7 +2218,7 @@ insert_symbol(binode *root, bitarrayobject *self, PyObject *symbol) } static binode * -make_tree (PyObject *codedict) +make_tree(PyObject *codedict) { binode *root; PyObject *symbol, *array; @@ -2272,7 +2272,7 @@ bitarray_decode(bitarrayobject *self, PyObject * codedict) int k; tree = make_tree(codedict); - if (PyErr_Occurred()) + if (tree == NULL || PyErr_Occurred()) return NULL; nd = tree; @@ -2340,7 +2340,7 @@ bitarray_iterdecode(bitarrayobject *self, PyObject *codedict) binode *tree; tree = make_tree(codedict); - if (PyErr_Occurred()) + if (tree == NULL || PyErr_Occurred()) return NULL; it = PyObject_GC_New(decodeiterobject, &DecodeIter_Type); diff --git a/examples/gene.py b/examples/gene.py index ebd5e93b1..30efbbe38 100644 --- a/examples/gene.py +++ b/examples/gene.py @@ -9,7 +9,7 @@ trans = { "A": bitarray("00"), - "T": bitarray("01"), + "T": bitarray("01"), "G": bitarray("10"), "C": bitarray("11") } From 46fbdc5b09287ac876cccd38d5136d4ef3c77385 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 18 Apr 2019 18:02:32 -0500 Subject: [PATCH 046/185] no reason to set prev --- bitarray/_bitarray.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index fdb2869fc..b6bcd5d04 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2193,7 +2193,7 @@ delete_binode_tree(binode *root) static int insert_symbol(binode *root, bitarrayobject *self, PyObject *symbol) { - binode *nd = root, *prev = NULL; + binode *nd = root, *prev; Py_ssize_t i; int k; @@ -2249,7 +2249,6 @@ tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) while (1) { k = GETBIT(self, *indexp); (*indexp)++; - nd = nd->child[k]; if (!nd) { @@ -2257,7 +2256,6 @@ tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) "prefix code does not match data in bitarray"); return NULL; } - if (nd->symbol) // leaf return nd->symbol; } From a1b2ed7d31c412b219f9c8d55ba393fbb5e593cf Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 18 Apr 2019 18:22:37 -0500 Subject: [PATCH 047/185] bump version, formatting --- bitarray/__init__.py | 2 +- bitarray/_bitarray.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bitarray/__init__.py b/bitarray/__init__.py index d29716d33..bbd26dcfa 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -10,7 +10,7 @@ """ from bitarray._bitarray import _bitarray, bitdiff, bits2bytes, _sysinfo -__version__ = '0.8.3' +__version__ = '0.9.0' def _check_codedict(codedict): diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index b6bcd5d04..c8c454e08 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2262,7 +2262,7 @@ tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) } static PyObject * -bitarray_decode(bitarrayobject *self, PyObject * codedict) +bitarray_decode(bitarrayobject *self, PyObject *codedict) { binode *tree, *nd; PyObject *list; From 0e4475ad7b47da1f213a6674351b083490ca00f7 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 18 Apr 2019 18:34:57 -0500 Subject: [PATCH 048/185] update docstrings in C code --- bitarray/_bitarray.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index c8c454e08..939c24085 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2309,10 +2309,10 @@ bitarray_decode(bitarrayobject *self, PyObject *codedict) } PyDoc_STRVAR(decode_doc, -"_decode(tree) -> list\n\ +"_decode(codedict) -> list\n\ \n\ -Given a tree, decode the content of the bitarray and return the list of\n\ -symbols."); +Given a code dictionary, decode the content of the bitarray and return\n\ +the list of symbols."); /*********************** (Bitarray) Decode Iterator *********************/ @@ -2355,10 +2355,10 @@ bitarray_iterdecode(bitarrayobject *self, PyObject *codedict) } PyDoc_STRVAR(iterdecode_doc, -"_iterdecode(tree) -> iterator\n\ +"_iterdecode(codedict) -> iterator\n\ \n\ -Given a tree, decode the content of the bitarray and iterate over the\n\ -symbols."); +Given a code dictionary, decode the content of the bitarray and iterate\n\ +over the represented symbols."); static PyObject * decodeiter_next(decodeiterobject *it) From 60706a369e849d829dc95cc4ecd3591cf01f0556 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 19 Apr 2019 01:06:46 -0500 Subject: [PATCH 049/185] wording --- bitarray/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitarray/__init__.py b/bitarray/__init__.py index bbd26dcfa..6a97795d7 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -92,7 +92,7 @@ def encode(self, codedict, iterable): """encode(code, iterable) Given a prefix code (a dict mapping symbols to bitarrays), -iterates over iterable object with symbols, and extends the bitarray +iterate over the iterable object with symbols, and extend the bitarray with the corresponding bitarray for each symbols.""" _check_codedict(codedict) self._encode(codedict, iterable) From f1acf77e1f40ea8ed0ec74f308752ce583665090 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 19 Apr 2019 17:58:33 -0500 Subject: [PATCH 050/185] fix test for Py3k --- bitarray/test_bitarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index c0e83a543..ceccc7d9c 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -2131,7 +2131,7 @@ def test_read1(self): a = bitarray('01000001' '01000010' '01000011', endian='big') v = memoryview(a) self.assertEqual(len(v), 3) - #self.assertEqual(v[0], 'A') + self.assertEqual(v[0], 65 if is_py3k else 'A') self.assertEqual(v[:].tobytes(), b'ABC') a[13] = 1 self.assertEqual(v[:].tobytes(), b'AFC') From b23625b07e58552d74a5f9802452d263be390c48 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 19 Apr 2019 18:27:22 -0500 Subject: [PATCH 051/185] update changelog --- CHANGE_LOG | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGE_LOG b/CHANGE_LOG index d415fea97..77c33a233 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -2,6 +2,8 @@ ------------------- * more efficient decode and iterdecode by using C-level binary tree instead of a python one, #54 + * added buffer protocol support for Python 3, #55 + * fixed invalid pointer exceptions in pypy, #47 * made all examples Py3k compatible * add gene sequence example * add official Python 3.7 support (drop Python 3.1 and 3.2 support) From db9170657ff3f01742e403aa24a79d4f668669c6 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 20 Apr 2019 03:53:51 -0500 Subject: [PATCH 052/185] fixing huffman example for Py3k --- examples/huffman.py | 46 +++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/examples/huffman.py b/examples/huffman.py index dda04a271..ae8b9e67a 100644 --- a/examples/huffman.py +++ b/examples/huffman.py @@ -4,10 +4,15 @@ The link also contains a good description of the algorithm. """ -import os, sys +from __future__ import print_function + +import os +import sys +import heapq from collections import defaultdict from bitarray import bitarray -from heapq import heappush, heappop + +is_py3k = bool(sys.version_info[0] == 3) def huffCode(freq): @@ -17,14 +22,14 @@ def huffCode(freq): a dictionary mapping the symbols to bitarrays. """ minheap = [] - for s in freq: - heappush(minheap, (freq[s], s)) + for i, c in enumerate(freq): + heapq.heappush(minheap, (freq[c] + 1E-5 * i, c)) while len(minheap) > 1: - childR, childL = heappop(minheap), heappop(minheap) + childR = heapq.heappop(minheap) + childL = heapq.heappop(minheap) parent = (childL[0] + childR[0], childL, childR) - #print(minheap, parent) - heappush(minheap, parent) + heapq.heappush(minheap, parent) # Now minheap[0] is the root node of the Huffman tree @@ -51,40 +56,49 @@ def freq_string(s): return res +def read_file(filename): + with open(filename, 'rb') as fi: + res = fi.read() + return res + + def print_code(filename): - freq = freq_string(open(filename).read()) + freq = freq_string(read_file(filename)) code = huffCode(freq) print(' char frequency Huffman code') print(70 * '-') - for c in sorted(code, key=lambda c: freq[c], reverse=True): + for c in sorted(code, key=lambda c: (freq[c], c), reverse=True): print('%7r %8i %s' % (c, freq[c], code[c].to01())) def encode(filename): - with open(filename, 'rb') as fi: - s = fi.read() + s = read_file(filename) code = huffCode(freq_string(s)) with open(filename + '.huff', 'wb') as fo: - fo.write(repr(code) + '\n') + fo.write(repr(code).encode() + b'\n') a = bitarray(endian='little') a.encode(code, s) - fo.write(str(a.buffer_info()[3])) # write unused bits as one char string + # write unused bits as one char string + fo.write(str(a.buffer_info()[3]).encode()) a.tofile(fo) print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(s))) def decode(filename): + assert filename.endswith('.huff') + with open(filename, 'rb') as fi: code = eval(fi.readline()) u = int(fi.read(1)) # number of unused bits in last byte stored in file a = bitarray(endian='little') a.fromfile(fi) - if u: del a[-u:] + if u: + del a[-u:] - assert filename.endswith('.huff') with open(filename[:-5] + '.out', 'wb') as fo: - fo.write(''.join(a.decode(code))) + for c in a.iterdecode(code): + fo.write(chr(c).encode() if is_py3k else c) def usage(): From 4c96443f553fd435474dfbe6a31989335cfa64bd Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 20 Apr 2019 13:18:58 -0500 Subject: [PATCH 053/185] fix huffman example for binary files in Python 3 --- examples/huffman.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/huffman.py b/examples/huffman.py index ae8b9e67a..094e7f336 100644 --- a/examples/huffman.py +++ b/examples/huffman.py @@ -23,6 +23,8 @@ def huffCode(freq): """ minheap = [] for i, c in enumerate(freq): + # having the exact same frequency for different symbols causes + # problems with heapq in Python 3, so we simply add a small float heapq.heappush(minheap, (freq[c] + 1E-5 * i, c)) while len(minheap) > 1: @@ -65,7 +67,7 @@ def read_file(filename): def print_code(filename): freq = freq_string(read_file(filename)) code = huffCode(freq) - print(' char frequency Huffman code') + print(' symbol frequency Huffman code') print(70 * '-') for c in sorted(code, key=lambda c: (freq[c], c), reverse=True): print('%7r %8i %s' % (c, freq[c], code[c].to01())) @@ -98,7 +100,7 @@ def decode(filename): with open(filename[:-5] + '.out', 'wb') as fo: for c in a.iterdecode(code): - fo.write(chr(c).encode() if is_py3k else c) + fo.write(chr(c).encode('ISO-8859-1') if is_py3k else c) def usage(): From cf30fd9c2acb4c486b2f8fb008daa603db01b5d6 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 20 Apr 2019 19:22:57 -0500 Subject: [PATCH 054/185] add useful output --- examples/huffman.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/huffman.py b/examples/huffman.py index 094e7f336..c5c625f71 100644 --- a/examples/huffman.py +++ b/examples/huffman.py @@ -67,10 +67,12 @@ def read_file(filename): def print_code(filename): freq = freq_string(read_file(filename)) code = huffCode(freq) - print(' symbol frequency Huffman code') + print(' symbol char frequency Huffman code') print(70 * '-') for c in sorted(code, key=lambda c: (freq[c], c), reverse=True): - print('%7r %8i %s' % (c, freq[c], code[c].to01())) + print('%7r %-7r %8i %s' % ( + c, chr(c).encode('ISO-8859-1') if is_py3k else c, + freq[c], code[c].to01())) def encode(filename): From 55ac5ab7ac3556ae8dbeaf9d18df12207fa9df8c Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 20 Apr 2019 19:43:45 -0500 Subject: [PATCH 055/185] update Makefile --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index cf7a8ed95..a81a962ee 100644 --- a/Makefile +++ b/Makefile @@ -14,5 +14,7 @@ clean: rm -rf build dist rm -f bitarray/*.o bitarray/*.so rm -f bitarray/*.pyc + rm -f examples/*.pyc rm -rf bitarray/__pycache__ *.egg-info + rm -rf examples/__pycache__ rm -f README.html From c40b5c3c0288361aa6a4b2154e177e521cba5aca Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 20 Apr 2019 21:09:16 -0500 Subject: [PATCH 056/185] ensure stable Huffman code by sorting frequency dict --- examples/huffman.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/huffman.py b/examples/huffman.py index c5c625f71..a6cefd72f 100644 --- a/examples/huffman.py +++ b/examples/huffman.py @@ -22,10 +22,10 @@ def huffCode(freq): a dictionary mapping the symbols to bitarrays. """ minheap = [] - for i, c in enumerate(freq): + for i, c in enumerate(sorted(freq)): # having the exact same frequency for different symbols causes # problems with heapq in Python 3, so we simply add a small float - heapq.heappush(minheap, (freq[c] + 1E-5 * i, c)) + heapq.heappush(minheap, (freq[c] + 1E-3 * i, c)) while len(minheap) > 1: childR = heapq.heappop(minheap) @@ -60,8 +60,7 @@ def freq_string(s): def read_file(filename): with open(filename, 'rb') as fi: - res = fi.read() - return res + return fi.read() def print_code(filename): From e29598d0531a7fa4a00b2217c91814d861203e2b Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 20 Apr 2019 23:07:33 -0500 Subject: [PATCH 057/185] create output format which is now consistent between Python versions - add useful print output --- examples/huffman.py | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/examples/huffman.py b/examples/huffman.py index a6cefd72f..5db747779 100644 --- a/examples/huffman.py +++ b/examples/huffman.py @@ -49,8 +49,7 @@ def traverse(tree, prefix=bitarray()): def freq_string(s): """ - Given a string, return a dictionary - mapping characters to thier frequency. + Given a string, return a dict mapping characters to thier frequency. """ res = defaultdict(int) for c in s: @@ -63,36 +62,52 @@ def read_file(filename): return fi.read() +special_ascii = {9: 'TAB', 10: 'LF', 13: 'CR', 127: 'DEL'} +def disp_char(i): + if 32 <= i < 127: + return chr(i) + return special_ascii.get(i, '') + def print_code(filename): freq = freq_string(read_file(filename)) code = huffCode(freq) - print(' symbol char frequency Huffman code') + print(' symbol char hex frequency Huffman code') print(70 * '-') for c in sorted(code, key=lambda c: (freq[c], c), reverse=True): - print('%7r %-7r %8i %s' % ( - c, chr(c).encode('ISO-8859-1') if is_py3k else c, - freq[c], code[c].to01())) + i = c if is_py3k else ord(c) + print('%7r %-3s 0x%02x %10i %s' % ( + c, disp_char(i), + i, freq[c], code[c].to01())) def encode(filename): s = read_file(filename) code = huffCode(freq_string(s)) with open(filename + '.huff', 'wb') as fo: - fo.write(repr(code).encode() + b'\n') + for c in sorted(code): + fo.write(('%02x %s\n' % (c if is_py3k else ord(c), + code[c].to01())).encode()) a = bitarray(endian='little') a.encode(code, s) - # write unused bits as one char string - fo.write(str(a.buffer_info()[3]).encode()) + # write unused bits + fo.write(b'unused %s\n' % str(a.buffer_info()[3]).encode()) a.tofile(fo) print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(s))) def decode(filename): assert filename.endswith('.huff') + code = {} with open(filename, 'rb') as fi: - code = eval(fi.readline()) - u = int(fi.read(1)) # number of unused bits in last byte stored in file + while 1: + line = fi.readline() + c, b = line.split() + if c == b'unused': + u = int(b) + break + i = int(c, 16) + code[i if is_py3k else chr(i)] = bitarray(b) a = bitarray(endian='little') a.fromfile(fi) From a1ccf46da240b863aa414367632289e108ee2226 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 21 Apr 2019 01:40:03 -0500 Subject: [PATCH 058/185] add comments --- examples/huffman.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/huffman.py b/examples/huffman.py index 5db747779..2a8d5f7d5 100644 --- a/examples/huffman.py +++ b/examples/huffman.py @@ -33,16 +33,17 @@ def huffCode(freq): parent = (childL[0] + childR[0], childL, childR) heapq.heappush(minheap, parent) - # Now minheap[0] is the root node of the Huffman tree + # minheap[0] is now the root node of the Huffman tree + # now traverse the tree to create the Huffman codes + result = {} def traverse(tree, prefix=bitarray()): - if len(tree) == 2: + if len(tree) == 2: # leave result[tree[1]] = prefix - else: + else: # parent, so traverse each of the children for i in range(2): traverse(tree[i+1], prefix + bitarray([i])) - result = {} traverse(minheap[0]) return result From b8a121560c1912c1c0e874f7afced966946a7f44 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 21 Apr 2019 03:07:28 -0500 Subject: [PATCH 059/185] improve cli --- examples/huffman.py | 77 ++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/examples/huffman.py b/examples/huffman.py index 2a8d5f7d5..559706bed 100644 --- a/examples/huffman.py +++ b/examples/huffman.py @@ -120,55 +120,54 @@ def decode(filename): fo.write(chr(c).encode('ISO-8859-1') if is_py3k else c) -def usage(): - print("""Usage: %s command FILE - - print -- calculate and display the Huffman code for the frequency - of characters in FILE. - - encode -- encode FILE using the Huffman code calculated for the - frequency of characters in FILE itself. - The output is FILE.huff which contains both the Huffman - code and the bitarray resulting from the encoding. - - decode -- decode FILE, which has .huff extension generated with the - encode command. The output is written in a filename - where .huff is replaced by .out - - test -- encode FILE, decode FILE.huff, compare FILE with FILE.out, - and unlink created files. -""" % sys.argv[0]) - sys.exit(0) - - -if __name__ == '__main__': - if len(sys.argv) != 3: - usage() - - cmd, filename = sys.argv[1:3] - - if cmd == 'print': +def main(): + from optparse import OptionParser + + p = OptionParser("usage: %prog [options] FILE") + p.add_option( + '-p', '--print', + action="store_true", + help="calculate and print the Huffman code for the " + "frequency of characters in FILE") + p.add_option( + '-e', '--encode', + action="store_true", + help="encode FILE using the Huffman code calculated for the " + "frequency of characters in FILE itself. " + "The output is FILE.huff which contains both the Huffman " + "code and the bitarray resulting from the encoding.") + p.add_option( + '-d', '--decode', + action="store_true", + help="decode FILE.huff and write the output to FILE.out") + p.add_option( + '-t', '--test', + action="store_true", + help="encode FILE, decode FILE.huff, compare FILE with FILE.out, " + "and unlink created files.") + opts, args = p.parse_args() + if len(args) != 1: + p.error('exactly one argument required') + filename = args[0] + + if opts.print: print_code(filename) - elif cmd == 'encode': + if opts.encode: encode(filename) - elif cmd == 'decode': - if filename.endswith('.huff'): - decode(filename) - else: - print('Filename has no .huff extension') + if opts.decode: + decode(filename + '.huff') - elif cmd == 'test': + if opts.test: huff = filename + '.huff' out = filename + '.out' - encode(filename) decode(huff) assert open(filename, 'rb').read() == open(out, 'rb').read() os.unlink(huff) os.unlink(out) - else: - print('Unknown command %r' % cmd) - usage() + +if __name__ == '__main__': + main() From a7ba3247167173d9764f4d1dc6a2c6474b334182 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 21 Apr 2019 12:23:24 -0500 Subject: [PATCH 060/185] fix segfault when decoing invalid bitarrays --- bitarray/_bitarray.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 839ee9fa5..691c2abce 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2243,10 +2243,12 @@ tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) binode *nd = tree; int k; - if (*indexp == self->nbits) /* stop iterator */ - return NULL; - while (1) { + /* stop iterator - we need to check constantly, as the encoded + bitarray may be invalid */ + if (*indexp >= self->nbits) + return NULL; + k = GETBIT(self, *indexp); (*indexp)++; nd = nd->child[k]; From 853479a68dc63b0938b787739db05d03ea3fd6be Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 21 Apr 2019 12:32:17 -0500 Subject: [PATCH 061/185] minor style issue --- bitarray/_bitarray.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 691c2abce..6c80ee9ef 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2252,13 +2252,12 @@ tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) k = GETBIT(self, *indexp); (*indexp)++; nd = nd->child[k]; - - if (!nd) { + if (nd == NULL) { PyErr_SetString(PyExc_ValueError, "prefix code does not match data in bitarray"); return NULL; } - if (nd->symbol) // leaf + if (nd->symbol) /* leaf */ return nd->symbol; } } @@ -2983,7 +2982,7 @@ bitarray_releasebuffer(bitarrayobject *self, Py_buffer *view) } static PyBufferProcs bitarray_as_buffer = { -#if PY_MAJOR_VERSION == 2 // old buffer protocol +#if PY_MAJOR_VERSION == 2 /* old buffer protocol */ (readbufferproc) bitarray_buffer_getreadbuf, (writebufferproc) bitarray_buffer_getwritebuf, (segcountproc) bitarray_buffer_getsegcount, From a20ca97c7e9cc314251cc19ba5246429403a6f06 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 21 Apr 2019 16:11:13 -0500 Subject: [PATCH 062/185] drop Python 2.4 support --- CHANGE_LOG | 3 ++- bitarray/_bitarray.c | 23 ++--------------------- setup.py | 1 - 3 files changed, 4 insertions(+), 23 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 77c33a233..f9f40e095 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -6,7 +6,8 @@ * fixed invalid pointer exceptions in pypy, #47 * made all examples Py3k compatible * add gene sequence example - * add official Python 3.7 support (drop Python 3.1 and 3.2 support) + * add official Python 3.7 support + * drop Python 2.4, 3.1 and 3.2 support 2018-07-06 0.8.3: diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 6c80ee9ef..5577b15ba 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -23,25 +23,6 @@ #define Py_TPFLAGS_HAVE_WEAKREFS 0 #endif -#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 5 -/* Py_ssize_t was introduced in Python 2.5, substitute long for it */ -typedef long Py_ssize_t; -#define PY_SSIZE_T_MAX LONG_MAX -#define PY_SSIZE_T_MIN LONG_MIN -Py_ssize_t PyNumber_AsSsize_t(PyObject *o, PyObject *exc) -{ - return PyLong_AsLong(o); -} -int PyIndex_Check(PyObject *o) -{ - return 0; -} -#define PY_SSIZE_T_FMT "l" -#else -/* Python 2.5 and up uses 'n' as the format char for Py_ssize_t */ -#define PY_SSIZE_T_FMT "n" -#endif - #if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 6 /* backward compatibility with Python 2.5 */ #define Py_TYPE(ob) (((PyObject *) (ob))->ob_type) @@ -1056,7 +1037,7 @@ bitarray_search(bitarrayobject *self, PyObject *args) bitarrayobject *xa; idx_t p; - if (!PyArg_ParseTuple(args, "O|" PY_SSIZE_T_FMT ":_search", &x, &limit)) + if (!PyArg_ParseTuple(args, "O|n:_search", &x, &limit)) return NULL; if (!bitarray_Check(x)) { @@ -1444,7 +1425,7 @@ bitarray_fromfile(bitarrayobject *self, PyObject *args) idx_t t, p; long cur; - if (!PyArg_ParseTuple(args, "O|" PY_SSIZE_T_FMT ":fromfile", &f, &nbytes)) + if (!PyArg_ParseTuple(args, "O|n:fromfile", &f, &nbytes)) return NULL; fp = PyFile_AsFile(f); diff --git a/setup.py b/setup.py index 646299912..0473b1f62 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,6 @@ "Operating System :: OS Independent", "Programming Language :: C", "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.4", "Programming Language :: Python :: 2.5", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", From c797465761595c439ab0fd78856245ae5634fa1f Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 21 Apr 2019 16:55:22 -0500 Subject: [PATCH 063/185] handle not terminated decoding properly when iterating, and add tests --- bitarray/_bitarray.c | 14 ++++++++------ bitarray/test_bitarray.py | 17 +++++++++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 5577b15ba..ab11b7b84 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2226,9 +2226,12 @@ tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) while (1) { /* stop iterator - we need to check constantly, as the encoded - bitarray may be invalid */ - if (*indexp >= self->nbits) - return NULL; + bitarray may be terminated */ + if (*indexp >= self->nbits) { + if (nd != tree) + PyErr_SetString(PyExc_ValueError, "decoding not terminated"); + return NULL; + } k = GETBIT(self, *indexp); (*indexp)++; @@ -2263,7 +2266,7 @@ bitarray_decode(bitarrayobject *self, PyObject *codedict) for (i = 0; i < self->nbits; i++) { k = GETBIT(self, i); nd = nd->child[k]; - if (!nd) { + if (nd == NULL) { PyErr_SetString(PyExc_ValueError, "prefix code does not match data in bitarray"); goto error; @@ -2276,8 +2279,7 @@ bitarray_decode(bitarrayobject *self, PyObject *codedict) } if (nd != tree) { - PyErr_SetString(PyExc_ValueError, - "decoding not terminated"); + PyErr_SetString(PyExc_ValueError, "decoding not terminated"); goto error; } diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index ceccc7d9c..3e66bffa2 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -2020,6 +2020,13 @@ def test_decode_empty(self): self.assertEqual(d, {'a': bitarray('1')}) self.assertEqual(len(a), 0) + def test_decode_no_term(self): + d = {'a': bitarray('0'), 'b': bitarray('111')} + a = bitarray('011') + self.assertRaises(ValueError, a.decode, d) + self.assertEqual(a, bitarray('011')) + self.assertEqual(d, {'a': bitarray('0'), 'b': bitarray('111')}) + def test_decode_buggybitarray(self): d = {'a': bitarray('0')} a = bitarray('1') @@ -2027,6 +2034,16 @@ def test_decode_buggybitarray(self): self.assertEqual(a, bitarray('1')) self.assertEqual(d, {'a': bitarray('0')}) + def test_iterdecode_no_term(self): + d = {'a': bitarray('0'), 'b': bitarray('111')} + a = bitarray('011') + it = a.iterdecode(d) + if not is_py3k: + self.assertEqual(it.next(), 'a') + self.assertRaises(ValueError, it.next) + self.assertEqual(a, bitarray('011')) + self.assertEqual(d, {'a': bitarray('0'), 'b': bitarray('111')}) + def test_iterdecode_buggybitarray(self): d = {'a': bitarray('0')} a = bitarray('1') From 63eca8524c4537d4579772bc3708a74b783c9f90 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 21 Apr 2019 17:31:48 -0500 Subject: [PATCH 064/185] simplify logic in tree_traverse --- bitarray/_bitarray.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index ab11b7b84..e4d0ae78c 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2224,15 +2224,7 @@ tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) binode *nd = tree; int k; - while (1) { - /* stop iterator - we need to check constantly, as the encoded - bitarray may be terminated */ - if (*indexp >= self->nbits) { - if (nd != tree) - PyErr_SetString(PyExc_ValueError, "decoding not terminated"); - return NULL; - } - + while (*indexp < self->nbits) { k = GETBIT(self, *indexp); (*indexp)++; nd = nd->child[k]; @@ -2244,6 +2236,10 @@ tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) if (nd->symbol) /* leaf */ return nd->symbol; } + if (nd != tree) + PyErr_SetString(PyExc_ValueError, "decoding not terminated"); + + return NULL; } static PyObject * From 3eac3414cc942614661a1261e288b5a61106eace Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 21 Apr 2019 18:09:09 -0500 Subject: [PATCH 065/185] deallocate tree when list creation fails --- bitarray/_bitarray.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index e4d0ae78c..d605f4cf1 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2256,9 +2256,10 @@ bitarray_decode(bitarrayobject *self, PyObject *codedict) nd = tree; list = PyList_New(0); - if (list == NULL) + if (list == NULL) { + delete_binode_tree(tree); return NULL; - + } for (i = 0; i < self->nbits; i++) { k = GETBIT(self, i); nd = nd->child[k]; @@ -2267,18 +2268,16 @@ bitarray_decode(bitarrayobject *self, PyObject *codedict) "prefix code does not match data in bitarray"); goto error; } - if (nd->symbol) { + if (nd->symbol) { /* leaf */ if (PyList_Append(list, nd->symbol) < 0) goto error; nd = tree; } } - if (nd != tree) { PyErr_SetString(PyExc_ValueError, "decoding not terminated"); goto error; } - delete_binode_tree(tree); return list; From ba55b84208b6a4e32319e4c9f17bbcc6433c7455 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 22 Apr 2019 01:29:53 -0500 Subject: [PATCH 066/185] improve printed code table --- examples/huffman.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/huffman.py b/examples/huffman.py index 559706bed..6f8c0dd75 100644 --- a/examples/huffman.py +++ b/examples/huffman.py @@ -63,20 +63,20 @@ def read_file(filename): return fi.read() -special_ascii = {9: 'TAB', 10: 'LF', 13: 'CR', 127: 'DEL'} +special_ascii = {0: 'NULL', 9: 'TAB', 10: 'LF', 13: 'CR', 127: 'DEL'} def disp_char(i): if 32 <= i < 127: - return chr(i) + return repr(chr(i)) return special_ascii.get(i, '') def print_code(filename): freq = freq_string(read_file(filename)) code = huffCode(freq) - print(' symbol char hex frequency Huffman code') + print(' symbol char hex frequency Huffman code') print(70 * '-') for c in sorted(code, key=lambda c: (freq[c], c), reverse=True): i = c if is_py3k else ord(c) - print('%7r %-3s 0x%02x %10i %s' % ( + print('%7r %-4s 0x%02x %10i %s' % ( c, disp_char(i), i, freq[c], code[c].to01())) From 0ca4fe4da736f68f4ff84794b978251b24b6aee2 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 22 Apr 2019 01:56:30 -0500 Subject: [PATCH 067/185] add release date, update readme --- CHANGE_LOG | 2 +- Makefile | 1 - README.rst | 27 ++++++++++++++++----------- update_readme.py | 2 +- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index f9f40e095..1367b598f 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,4 +1,4 @@ -2019-XX-XX 0.9.0: +2019-04-22 0.9.0: ------------------- * more efficient decode and iterdecode by using C-level binary tree instead of a python one, #54 diff --git a/Makefile b/Makefile index a81a962ee..7144a83d2 100644 --- a/Makefile +++ b/Makefile @@ -17,4 +17,3 @@ clean: rm -f examples/*.pyc rm -rf bitarray/__pycache__ *.egg-info rm -rf examples/__pycache__ - rm -f README.html diff --git a/README.rst b/README.rst index 31d78e499..bcb512cc5 100644 --- a/README.rst +++ b/README.rst @@ -50,8 +50,8 @@ Installation bitarray can be installed from source:: - $ tar xzf bitarray-0.8.3.tar.gz - $ cd bitarray-0.8.3 + $ tar xzf bitarray-0.9.0.tar.gz + $ cd bitarray-0.9.0 $ python setup.py install On Unix systems, the latter command may have to be executed with root @@ -62,7 +62,7 @@ Once you have installed the package, you may want to test it:: $ python -c 'import bitarray; bitarray.test()' bitarray is installed in: /usr/local/lib/python2.7/site-packages/bitarray - bitarray version: 0.8.3 + bitarray version: 0.9.0 2.7.2 (r271:86832, Nov 29 2010) [GCC 4.2.1 (SUSE Linux)] ......................................................................... ........................................... @@ -373,7 +373,7 @@ Reference ``encode(code, iterable)`` Given a prefix code (a dict mapping symbols to bitarrays), - iterates over iterable object with symbols, and extends the bitarray + iterate over the iterable object with symbols, and extend the bitarray with the corresponding bitarray for each symbols. @@ -537,6 +537,18 @@ Reference Change log ---------- +**0.9.0** (2019-04-22): + + * more efficient decode and iterdecode by using C-level binary tree + instead of a python one, #54 + * added buffer protocol support for Python 3, #55 + * fixed invalid pointer exceptions in pypy, #47 + * made all examples Py3k compatible + * add gene sequence example + * add official Python 3.7 support + * drop Python 2.4, 3.1 and 3.2 support + + **0.8.3** (2018-07-06): * add exception to setup.py when README.rst cannot be opened @@ -551,12 +563,5 @@ Change log * minor clarity/wording changes to README, #23 -**0.8.1** (2013-03-30): - - * fix issue #10, i.e. int(bitarray()) segfault - * added tests for using a bitarray object as an argument to functions - like int, long (on Python 2), float, list, tuple, dict - - Please find the complete change log `here `_. diff --git a/update_readme.py b/update_readme.py index c4bbc7dba..93f523cab 100644 --- a/update_readme.py +++ b/update_readme.py @@ -91,7 +91,7 @@ def main(): f.write(new_data) doctest.testfile('README.rst') - os.system('rst2html.py README.rst >README.html') + #os.system('rst2html.py README.rst >README.html') if __name__ == '__main__': From 814eea7c7e187a17e960710ce56301eaf1887af7 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 22 Apr 2019 15:39:49 -0500 Subject: [PATCH 068/185] update changelog --- CHANGE_LOG | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGE_LOG b/CHANGE_LOG index 1367b598f..692ddaaaf 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,3 +1,8 @@ +2019-XX-XX 0.9.1: +------------------- + * fix types to actually be types, #29 + + 2019-04-22 0.9.0: ------------------- * more efficient decode and iterdecode by using C-level binary tree From 578568fc207f67727e12d0cef110ff4477da9b4a Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 22 Apr 2019 16:12:29 -0500 Subject: [PATCH 069/185] update runall script --- examples/runall | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/runall b/examples/runall index f85427e9c..72a54e1d4 100755 --- a/examples/runall +++ b/examples/runall @@ -1,9 +1,12 @@ #!/bin/bash -x -python huffman.py test README || exit 1 +python huffman.py --test README || exit 1 for x in *.py do + if [ $x == 'huffman.py' ]; then + continue + fi echo Running: $x python $x || exit 1 done From 65a9ccc8eaea5ecee09cb9311012cbee9b4d9c13 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 22 Apr 2019 19:03:11 -0500 Subject: [PATCH 070/185] improve check for ambiguous prefix codes --- bitarray/_bitarray.c | 12 ++++++++---- bitarray/test_bitarray.py | 19 +++++++++---------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 61d27e63f..79278ecc9 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2182,6 +2182,8 @@ insert_symbol(binode *root, bitarrayobject *self, PyObject *symbol) k = GETBIT(self, i); prev = nd; nd = nd->child[k]; + if (nd && nd->symbol) + goto ambiguous; if (!nd) { nd = new_binode(); if (nd == NULL) @@ -2189,13 +2191,15 @@ insert_symbol(binode *root, bitarrayobject *self, PyObject *symbol) prev->child[k] = nd; } } + if (nd->symbol || nd->child[0] || nd->child[1]) + goto ambiguous; - if (nd->symbol) { - PyErr_SetString(PyExc_ValueError, "prefix code ambiguous"); - return -1; - } nd->symbol = symbol; return 0; + + ambiguous: + PyErr_SetString(PyExc_ValueError, "prefix code ambiguous"); + return -1; } static binode * diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 9b4ce2ed7..38dd482dc 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -2072,16 +2072,15 @@ def test_iterdecode_buggybitarray2(self): self.assertEqual(a, bitarray('1')) def test_decode_ambiguous_code(self): - d = {'a': bitarray('0'), 'b': bitarray('0'), 'c': bitarray('1')} - a = bitarray() - self.assertRaises(ValueError, a.decode, d) - self.assertRaises(ValueError, a.iterdecode, d) - - def test_decode_ambiguous2(self): - d = {'a': bitarray('01'), 'b': bitarray('01'), 'c': bitarray('1')} - a = bitarray() - self.assertRaises(ValueError, a.decode, d) - self.assertRaises(ValueError, a.iterdecode, d) + for d in [ + {'a': bitarray('0'), 'b': bitarray('0'), 'c': bitarray('1')}, + {'a': bitarray('01'), 'b': bitarray('01'), 'c': bitarray('1')}, + {'a': bitarray('0'), 'b': bitarray('01')}, + {'a': bitarray('0'), 'b': bitarray('11'), 'c': bitarray('111')}, + ]: + a = bitarray() + self.assertRaises(ValueError, a.decode, d) + self.assertRaises(ValueError, a.iterdecode, d) def test_miscitems(self): d = {None : bitarray('00'), From 5420a44edd98e2f00bd416eec85244de05108102 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 22 Apr 2019 19:03:51 -0500 Subject: [PATCH 071/185] update changelog --- CHANGE_LOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGE_LOG b/CHANGE_LOG index 692ddaaaf..650b9885f 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,6 +1,7 @@ 2019-XX-XX 0.9.1: ------------------- * fix types to actually be types, #29 + * improve check for ambiguous prefix codes 2019-04-22 0.9.0: From ff034ec43302689963592fba568dc999da1c4ed5 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 22 Apr 2019 19:20:14 -0500 Subject: [PATCH 072/185] add comment --- bitarray/_bitarray.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 79278ecc9..a17df3248 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2182,8 +2182,11 @@ insert_symbol(binode *root, bitarrayobject *self, PyObject *symbol) k = GETBIT(self, i); prev = nd; nd = nd->child[k]; + + /* we cannot have already a symbol when branching to the new leaf */ if (nd && nd->symbol) goto ambiguous; + if (!nd) { nd = new_binode(); if (nd == NULL) @@ -2191,6 +2194,7 @@ insert_symbol(binode *root, bitarrayobject *self, PyObject *symbol) prev->child[k] = nd; } } + /* the new leaf node cannot already have a symbol or children */ if (nd->symbol || nd->child[0] || nd->child[1]) goto ambiguous; From fde71bcb0380708cb24c48b22361ce6420ea2f56 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 22 Apr 2019 20:04:11 -0500 Subject: [PATCH 073/185] add comment --- bitarray/_bitarray.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index a17df3248..a4307c5a9 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2226,8 +2226,13 @@ make_tree(PyObject *codedict) return root; } +/* + Traverse tree using the branches corresponding to the bitarray self, + starting at *indexp, and return the symbol at the leaf node (or NULL + when the end of the bitarray has been reached). +*/ static PyObject * -tree_traverse(bitarrayobject *self, idx_t *indexp, binode *tree) +tree_traverse(binode *tree, bitarrayobject *self, idx_t *indexp) { binode *nd = tree; int k; @@ -2353,7 +2358,7 @@ decodeiter_next(decodeiterobject *it) PyObject *symbol; assert(DecodeIter_Check(it)); - symbol = tree_traverse(it->bao, &(it->index), it->tree); + symbol = tree_traverse(it->tree, it->bao, &(it->index)); if (symbol == NULL) /* stop iteration OR error occured */ return NULL; Py_INCREF(symbol); From 83de91866b2791efb9b76e6c37c039dd38f12e97 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 22 Apr 2019 21:45:02 -0500 Subject: [PATCH 074/185] simplify decoding example --- examples/decoding.py | 56 ++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 36 deletions(-) diff --git a/examples/decoding.py b/examples/decoding.py index f75fe47ed..c602d8039 100644 --- a/examples/decoding.py +++ b/examples/decoding.py @@ -3,35 +3,21 @@ from huffman import freq_string, huffCode -def traverse(it, tree): - """ - return False, when it has no more elements, or the leave node - resulting from traversing the tree - """ - try: - subtree = tree[next(it)] - except StopIteration: - return False - - if isinstance(subtree, list) and len(subtree) == 2: - return traverse(it, subtree) - else: # leave node - return subtree +def insert(nd, ba, sym): + for k in ba: + prev = nd + nd = nd[k] + if not nd: + nd = [[], []] + prev[k] = nd + nd[0] = sym + del nd[1] -def insert(tree, sym, ba): - """ - insert symbol which is mapped to bitarray into tree - """ - v = ba[0] - if len(ba) > 1: - if tree[v] == []: - tree[v] = [[], []] - insert(tree[v], sym, ba[1:]) - else: - if tree[v] != []: - raise ValueError("prefix code ambiguous") - tree[v] = sym +def traverse(nd, it): + while len(nd) == 2: + nd = nd[next(it)] + return nd[0] def decode(codedict, bitsequence): @@ -39,21 +25,19 @@ def decode(codedict, bitsequence): this function does the same thing as the bitarray decode method """ # generate tree from codedict - tree = [[], []] + root = [[], []] for sym, ba in codedict.items(): - insert(tree, sym, ba) + insert(root, ba, sym) # actual decoding by traversing until StopIteration res = [] it = iter(bitsequence) while True: - r = traverse(it, tree) - if r is False: + try: + r = traverse(root, it) + except StopIteration: break - else: - if r == []: - raise ValueError("prefix code does not match data") - res.append(r) + res.append(r) return res @@ -61,7 +45,7 @@ def main(): txt = open('README').read() code = huffCode(freq_string(txt)) - sample = 2000 * txt + sample = 500 * txt a = bitarray() a.encode(code, sample) From 99096bea41559bc7fe50eeb4ba500ae87a6c3e21 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 22 Apr 2019 22:02:21 -0500 Subject: [PATCH 075/185] more consistent naming --- bitarray/_bitarray.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index a4307c5a9..d6766199a 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2161,20 +2161,20 @@ new_binode(void) } static void -delete_binode_tree(binode *root) +delete_binode_tree(binode *tree) { - if (root == NULL) + if (tree == NULL) return; - delete_binode_tree(root->child[0]); - delete_binode_tree(root->child[1]); - PyMem_Free(root); + delete_binode_tree(tree->child[0]); + delete_binode_tree(tree->child[1]); + PyMem_Free(tree); } static int -insert_symbol(binode *root, bitarrayobject *self, PyObject *symbol) +insert_symbol(binode *tree, bitarrayobject *self, PyObject *symbol) { - binode *nd = root, *prev; + binode *nd = tree, *prev; Py_ssize_t i; int k; @@ -2185,7 +2185,7 @@ insert_symbol(binode *root, bitarrayobject *self, PyObject *symbol) /* we cannot have already a symbol when branching to the new leaf */ if (nd && nd->symbol) - goto ambiguous; + goto ambiguity; if (!nd) { nd = new_binode(); @@ -2196,12 +2196,12 @@ insert_symbol(binode *root, bitarrayobject *self, PyObject *symbol) } /* the new leaf node cannot already have a symbol or children */ if (nd->symbol || nd->child[0] || nd->child[1]) - goto ambiguous; + goto ambiguity; nd->symbol = symbol; return 0; - ambiguous: + ambiguity: PyErr_SetString(PyExc_ValueError, "prefix code ambiguous"); return -1; } @@ -2209,21 +2209,21 @@ insert_symbol(binode *root, bitarrayobject *self, PyObject *symbol) static binode * make_tree(PyObject *codedict) { - binode *root; + binode *tree; PyObject *symbol, *array; Py_ssize_t pos = 0; - root = new_binode(); - if (root == NULL) + tree = new_binode(); + if (tree == NULL) return NULL; while (PyDict_Next(codedict, &pos, &symbol, &array)) { - if (insert_symbol(root, (bitarrayobject *) array, symbol) < 0) { - delete_binode_tree(root); + if (insert_symbol(tree, (bitarrayobject *) array, symbol) < 0) { + delete_binode_tree(tree); return NULL; } } - return root; + return tree; } /* From a5c6b444921326863193dda9a76ea14d367a1332 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 22 Apr 2019 22:16:28 -0500 Subject: [PATCH 076/185] update example readme --- examples/README | 13 ++++++++----- examples/decoding.py | 6 +++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/examples/README b/examples/README index 668b5cdb5..ae3455a98 100644 --- a/examples/README +++ b/examples/README @@ -11,16 +11,19 @@ compress.py: decoding.py Bitarray's decode method is implemented in C. Since the C code might be hard to read, we have implemented exactly the same - algorithm in Python. It is about 20 times slower than it's - C counterpart, since (recursive) function calls are more expensive - in Python than in C. + algorithm in Python. It is at least 20 times slower than it's + C counterpart. + + +gene.py: + shows how gene sequences (ATGC) can be very easily and efficiently + represented by bitarrays. huffman.py Demonstrates building a Huffman tree. Given an input file, calculates the number of occurrences for each character; - from those frequencies, a Huffman tree is build; and by traversing - the tree, the Huffman code is evaluated. + from those frequencies, a Huffman tree is build. Also allows encoding and decoding of a file, see -h option. diff --git a/examples/decoding.py b/examples/decoding.py index c602d8039..c3ec18509 100644 --- a/examples/decoding.py +++ b/examples/decoding.py @@ -25,16 +25,16 @@ def decode(codedict, bitsequence): this function does the same thing as the bitarray decode method """ # generate tree from codedict - root = [[], []] + tree = [[], []] for sym, ba in codedict.items(): - insert(root, ba, sym) + insert(tree, ba, sym) # actual decoding by traversing until StopIteration res = [] it = iter(bitsequence) while True: try: - r = traverse(root, it) + r = traverse(tree, it) except StopIteration: break res.append(r) From ffab9638f964d61564da0bb139cad2b7d7317e50 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 22 Apr 2019 23:42:19 -0500 Subject: [PATCH 077/185] move check_codedict to C-level --- bitarray/__init__.py | 15 --------------- bitarray/_bitarray.c | 40 ++++++++++++++++++++++++++++++++++++++- bitarray/test_bitarray.py | 20 ++++++++++++++++---- 3 files changed, 55 insertions(+), 20 deletions(-) diff --git a/bitarray/__init__.py b/bitarray/__init__.py index 6a97795d7..c15e4d512 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -13,18 +13,6 @@ __version__ = '0.9.0' -def _check_codedict(codedict): - if not isinstance(codedict, dict): - raise TypeError("dictionary expected") - if len(codedict) == 0: - raise ValueError("prefix code empty") - for k, v in codedict.items(): - if not isinstance(v, bitarray): - raise TypeError("bitarray expected for dictionary value") - if v.length() == 0: - raise ValueError("non-empty bitarray expected") - - class bitarray(_bitarray): """bitarray([initial], [endian=string]) @@ -77,7 +65,6 @@ def decode(self, codedict): Given a prefix code (a dict mapping symbols to bitarrays), decode the content of the bitarray and return the list of symbols.""" - _check_codedict(codedict) return self._decode(codedict) def iterdecode(self, codedict): @@ -85,7 +72,6 @@ def iterdecode(self, codedict): Given a prefix code (a dict mapping symbols to bitarrays), decode the content of the bitarray and iterate over the symbols.""" - _check_codedict(codedict) return self._iterdecode(codedict) def encode(self, codedict, iterable): @@ -94,7 +80,6 @@ def encode(self, codedict, iterable): Given a prefix code (a dict mapping symbols to bitarrays), iterate over the iterable object with symbols, and extend the bitarray with the corresponding bitarray for each symbols.""" - _check_codedict(codedict) self._encode(codedict, iterable) def __int__(self): diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index d6766199a..2600be5ba 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2097,6 +2097,34 @@ BITWISE_IFUNC(xor) /******************* variable length encoding and decoding ***************/ +static int +check_codedict(PyObject *codedict) +{ + PyObject *key, *value; + Py_ssize_t pos = 0; + + if (!PyDict_Check(codedict)) { + PyErr_SetString(PyExc_TypeError, "dict expected"); + return -1; + } + if (PyDict_Size(codedict) == 0) { + PyErr_SetString(PyExc_ValueError, "prefix code dict empty"); + return -1; + } + while (PyDict_Next(codedict, &pos, &key, &value)) { + if (!bitarray_Check(value)) { + PyErr_SetString(PyExc_TypeError, + "bitarray expected for dict value"); + return -1; + } + if (((bitarrayobject *) value)->nbits == 0) { + PyErr_SetString(PyExc_ValueError, "non-empty bitarray expected"); + return -1; + } + } + return 0; +} + static PyObject * bitarray_encode(bitarrayobject *self, PyObject *args) { @@ -2105,6 +2133,9 @@ bitarray_encode(bitarrayobject *self, PyObject *args) if (!PyArg_ParseTuple(args, "OO:_encode", &codedict, &iterable)) return NULL; + if (check_codedict(codedict) < 0) + return NULL; + iter = PyObject_GetIter(iterable); if (iter == NULL) { PyErr_SetString(PyExc_TypeError, "iterable object expected"); @@ -2115,7 +2146,8 @@ bitarray_encode(bitarrayobject *self, PyObject *args) bits = PyDict_GetItem(codedict, symbol); Py_DECREF(symbol); if (bits == NULL) { - PyErr_SetString(PyExc_ValueError, "symbol not in prefix code"); + PyErr_SetString(PyExc_ValueError, + "symbol not defined in prefix code"); goto error; } if (extend_bitarray(self, (bitarrayobject *) bits) < 0) @@ -2263,6 +2295,9 @@ bitarray_decode(bitarrayobject *self, PyObject *codedict) Py_ssize_t i; int k; + if (check_codedict(codedict) < 0) + return NULL; + tree = make_tree(codedict); if (tree == NULL || PyErr_Occurred()) return NULL; @@ -2329,6 +2364,9 @@ bitarray_iterdecode(bitarrayobject *self, PyObject *codedict) decodeiterobject *it; /* iterator to be returned */ binode *tree; + if (check_codedict(codedict) < 0) + return NULL; + tree = make_tree(codedict); if (tree == NULL || PyErr_Occurred()) return NULL; diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 38dd482dc..f5cdd27e1 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1983,14 +1983,26 @@ def test_encode(self): 'a': bitarray('001'), 'n': bitarray('000')}) self.assertRaises(ValueError, a.encode, d, 'arvin') + def test_check_codedict_encode(self): + a = bitarray() + self.assertRaises(TypeError, a.encode, None, 'asdf') + self.assertRaises(ValueError, a.encode, {}, 'asdf') + self.assertRaises(TypeError, a.encode, {'a': 'b'}, 'asdf') + self.assertRaises(ValueError, a.encode, {'a': bitarray()}, 'asdf') - def test_decode_check_codedict(self): + def test_check_codedict_decode(self): a = bitarray() self.assertRaises(TypeError, a.decode, 0) self.assertRaises(ValueError, a.decode, {}) - # 42 not iterable - self.assertRaises(TypeError, a.decode, {'a':42}) - self.assertRaises(ValueError, a.decode, {'a':bitarray()}) + self.assertRaises(TypeError, a.decode, {'a': 42}) + self.assertRaises(ValueError, a.decode, {'a': bitarray()}) + + def test_check_codedict_iterdecode(self): + a = bitarray() + self.assertRaises(TypeError, a.iterdecode, 0) + self.assertRaises(ValueError, a.iterdecode, {}) + self.assertRaises(TypeError, a.iterdecode, {'a': []}) + self.assertRaises(ValueError, a.iterdecode, {'a': bitarray()}) def test_decode_simple(self): d = {'I': bitarray('1'), From b63ea150dc0b9db275ec80fe2a09cd6feb6a5cc4 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 22 Apr 2019 23:59:43 -0500 Subject: [PATCH 078/185] remove Python level methods: encode, decode, iterdecode (in favor of having everthing on the C-level now --- bitarray/__init__.py | 22 ---------------------- bitarray/_bitarray.c | 27 +++++++++++++++------------ 2 files changed, 15 insertions(+), 34 deletions(-) diff --git a/bitarray/__init__.py b/bitarray/__init__.py index c15e4d512..aa65e2a6b 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -60,28 +60,6 @@ def tostring(self): Deprecated since version 0.4.0, use ``tobytes()`` instead.""" return self.tobytes().decode() - def decode(self, codedict): - """decode(code) -> list - -Given a prefix code (a dict mapping symbols to bitarrays), -decode the content of the bitarray and return the list of symbols.""" - return self._decode(codedict) - - def iterdecode(self, codedict): - """iterdecode(code) -> iterator - -Given a prefix code (a dict mapping symbols to bitarrays), -decode the content of the bitarray and iterate over the symbols.""" - return self._iterdecode(codedict) - - def encode(self, codedict, iterable): - """encode(code, iterable) - -Given a prefix code (a dict mapping symbols to bitarrays), -iterate over the iterable object with symbols, and extend the bitarray -with the corresponding bitarray for each symbols.""" - self._encode(codedict, iterable) - def __int__(self): raise TypeError("int() argument cannot be a bitarray") diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 2600be5ba..058096615 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2130,7 +2130,7 @@ bitarray_encode(bitarrayobject *self, PyObject *args) { PyObject *codedict, *iterable, *iter, *symbol, *bits; - if (!PyArg_ParseTuple(args, "OO:_encode", &codedict, &iterable)) + if (!PyArg_ParseTuple(args, "OO:encode", &codedict, &iterable)) return NULL; if (check_codedict(codedict) < 0) @@ -2163,9 +2163,11 @@ bitarray_encode(bitarrayobject *self, PyObject *args) } PyDoc_STRVAR(encode_doc, -"_encode(code, iterable)\n\ +"encode(code, iterable)\n\ \n\ -like the encode method without code checking"); +Given a prefix code (a dict mapping symbols to bitarrays),\n\ +iterate over the iterable object with symbols, and extend the bitarray\n\ +with the corresponding bitarray for each symbols."); /* Binary Tree definition */ @@ -2336,10 +2338,10 @@ bitarray_decode(bitarrayobject *self, PyObject *codedict) } PyDoc_STRVAR(decode_doc, -"_decode(codedict) -> list\n\ +"decode(code) -> list\n\ \n\ -Given a code dictionary, decode the content of the bitarray and return\n\ -the list of symbols."); +Given a prefix code (a dict mapping symbols to bitarrays),\n\ +decode the content of the bitarray and return it as a list of symbols."); /*********************** (Bitarray) Decode Iterator *********************/ @@ -2385,10 +2387,11 @@ bitarray_iterdecode(bitarrayobject *self, PyObject *codedict) } PyDoc_STRVAR(iterdecode_doc, -"_iterdecode(codedict) -> iterator\n\ +"iterdecode(code) -> iterator\n\ \n\ -Given a code dictionary, decode the content of the bitarray and iterate\n\ -over the represented symbols."); +Given a prefix code (a dict mapping symbols to bitarrays),\n\ +decode the content of the bitarray and return an iterator over\n\ +the symbols."); static PyObject * decodeiter_next(decodeiterobject *it) @@ -2589,11 +2592,11 @@ bitarray_methods[] = { copy_doc}, {"count", (PyCFunction) bitarray_count, METH_VARARGS, count_doc}, - {"_decode", (PyCFunction) bitarray_decode, METH_O, + {"decode", (PyCFunction) bitarray_decode, METH_O, decode_doc}, - {"_iterdecode", (PyCFunction) bitarray_iterdecode, METH_O, + {"iterdecode", (PyCFunction) bitarray_iterdecode, METH_O, iterdecode_doc}, - {"_encode", (PyCFunction) bitarray_encode, METH_VARARGS, + {"encode", (PyCFunction) bitarray_encode, METH_VARARGS, encode_doc}, {"endian", (PyCFunction) bitarray_endian, METH_NOARGS, endian_doc}, From b849588460eaf9065c8a848a0cb88475f42d9e3d Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 23 Apr 2019 00:02:58 -0500 Subject: [PATCH 079/185] update changelog --- CHANGE_LOG | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGE_LOG b/CHANGE_LOG index 650b9885f..dba670f8f 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -2,6 +2,8 @@ ------------------- * fix types to actually be types, #29 * improve check for ambiguous prefix codes + * remove Python level methods: encode, decode, iterdecode (in favor of + having these implemented on the C-level along with check_codedict) 2019-04-22 0.9.0: From 0b390bc8bd07693a46772f2b36d074053aa18bda Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 23 Apr 2019 00:33:19 -0500 Subject: [PATCH 080/185] add tests for non-iterable when encoding --- bitarray/__init__.py | 2 +- bitarray/test_bitarray.py | 34 ++++++++++++++++++---------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/bitarray/__init__.py b/bitarray/__init__.py index aa65e2a6b..e07c90c2a 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -10,7 +10,7 @@ """ from bitarray._bitarray import _bitarray, bitdiff, bits2bytes, _sysinfo -__version__ = '0.9.0' +__version__ = '0.9.1' class bitarray(_bitarray): diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index f5cdd27e1..edaf657d9 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1927,16 +1927,6 @@ def test_tofile(self): class PrefixCodeTests(unittest.TestCase, Util): - def test_encode_errors(self): - a = bitarray() - self.assertRaises(TypeError, a.encode, 0, '') - self.assertRaises(ValueError, a.encode, {}, '') - self.assertRaises(TypeError, a.encode, {'a':42}, '') - self.assertRaises(ValueError, a.encode, {'a': bitarray()}, '') - # 42 not iterable - self.assertRaises(TypeError, a.encode, {'a': bitarray('0')}, 42) - self.assertEqual(len(a), 0) - def test_encode_string(self): a = bitarray() d = {'a': bitarray('0')} @@ -1983,26 +1973,38 @@ def test_encode(self): 'a': bitarray('001'), 'n': bitarray('000')}) self.assertRaises(ValueError, a.encode, d, 'arvin') + def test_encode_not_iterable(self): + d = {'a': bitarray('0'), 'b': bitarray('1')} + a = bitarray() + a.encode(d, 'abba') + self.assertRaises(TypeError, a.encode, d, 42) + self.assertRaises(TypeError, a.encode, d, 1.3) + self.assertRaises(TypeError, a.encode, d, None) + self.assertEqual(a, bitarray('0110')) + def test_check_codedict_encode(self): a = bitarray() - self.assertRaises(TypeError, a.encode, None, 'asdf') - self.assertRaises(ValueError, a.encode, {}, 'asdf') - self.assertRaises(TypeError, a.encode, {'a': 'b'}, 'asdf') - self.assertRaises(ValueError, a.encode, {'a': bitarray()}, 'asdf') + self.assertRaises(TypeError, a.encode, None, '') + self.assertRaises(ValueError, a.encode, {}, '') + self.assertRaises(TypeError, a.encode, {'a': 'b'}, '') + self.assertRaises(ValueError, a.encode, {'a': bitarray()}, '') + self.assertEqual(len(a), 0) def test_check_codedict_decode(self): - a = bitarray() + a = bitarray('101') self.assertRaises(TypeError, a.decode, 0) self.assertRaises(ValueError, a.decode, {}) self.assertRaises(TypeError, a.decode, {'a': 42}) self.assertRaises(ValueError, a.decode, {'a': bitarray()}) + self.assertEqual(a, bitarray('101')) def test_check_codedict_iterdecode(self): - a = bitarray() + a = bitarray('1100101') self.assertRaises(TypeError, a.iterdecode, 0) self.assertRaises(ValueError, a.iterdecode, {}) self.assertRaises(TypeError, a.iterdecode, {'a': []}) self.assertRaises(ValueError, a.iterdecode, {'a': bitarray()}) + self.assertEqual(a, bitarray('1100101')) def test_decode_simple(self): d = {'I': bitarray('1'), From e8adf9fb6c06247f4606abe5552867a213ee0b59 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 23 Apr 2019 00:50:33 -0500 Subject: [PATCH 081/185] update comment --- bitarray/_bitarray.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 058096615..f1d75046d 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1,6 +1,9 @@ /* - This file is the C part of the bitarray package. Almost all - functionality is implemented here. + Copyright (c) 2008 - 2019, Ilan Schnell + bitarray is published under the PDF license. + + This file is the C part of the bitarray package. + All functionality is implemented here. Author: Ilan Schnell */ From 264e72076a59bc7e396c810ea800bb2cb4a12dbc Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 23 Apr 2019 00:58:04 -0500 Subject: [PATCH 082/185] wording --- bitarray/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bitarray/__init__.py b/bitarray/__init__.py index e07c90c2a..740a16c0d 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -61,13 +61,13 @@ def tostring(self): return self.tobytes().decode() def __int__(self): - raise TypeError("int() argument cannot be a bitarray") + raise TypeError("int() cannot take bitarray as argument") def __long__(self): - raise TypeError("long() argument cannot be a bitarray") + raise TypeError("long() cannot take bitarray as argument") def __float__(self): - raise TypeError("float() argument cannot be a bitarray") + raise TypeError("float() cannot take bitarray as argument") def test(verbosity=1, repeat=1): From 0dcc1f522c5359e3e8a5414bbfb18489bae85f0d Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 23 Apr 2019 01:20:27 -0500 Subject: [PATCH 083/185] add test for undefined symbol in code --- bitarray/test_bitarray.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index edaf657d9..2b00aab17 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1973,6 +1973,16 @@ def test_encode(self): 'a': bitarray('001'), 'n': bitarray('000')}) self.assertRaises(ValueError, a.encode, d, 'arvin') + def test_encode_symbol_not_in_code(self): + d = {None : bitarray('0'), + 0 : bitarray('10'), + 'A' : bitarray('11')} + a = bitarray() + a.encode(d, ['A', None, 0]) + self.assertEqual(a, bitarray('11010')) + self.assertRaises(ValueError, a.encode, d, [1, 2]) + self.assertRaises(ValueError, a.encode, d, 'ABCD') + def test_encode_not_iterable(self): d = {'a': bitarray('0'), 'b': bitarray('1')} a = bitarray() From 868b1ad69a7067bb9712c58dcf94b0c16a6378be Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 23 Apr 2019 20:07:25 -0500 Subject: [PATCH 084/185] fix test for Python 2.6 --- bitarray/test_bitarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 2b00aab17..377839357 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1409,7 +1409,7 @@ def test_search3(self): def test_search_type(self): a = bitarray('10011') it = a.itersearch(bitarray('1')) - self.assertIsInstance(type(it), type) + self.assertTrue(isinstance(type(it), type)) def test_fill(self): a = bitarray('') From cc7a889a47a69cfb26d4ae956120d0ab1a284bea Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 23 Apr 2019 21:20:58 -0500 Subject: [PATCH 085/185] typo --- bitarray/_bitarray.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index f1d75046d..419a2e33e 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1,6 +1,6 @@ /* Copyright (c) 2008 - 2019, Ilan Schnell - bitarray is published under the PDF license. + bitarray is published under the PSF license. This file is the C part of the bitarray package. All functionality is implemented here. From 4e3429ddbc9bc908772ca7023769b4c81715f5cb Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 23 Apr 2019 21:41:56 -0500 Subject: [PATCH 086/185] fix some tests for Python 2.5 --- bitarray/test_bitarray.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 377839357..2299b970d 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -2176,9 +2176,9 @@ def test_read1(self): v = memoryview(a) self.assertEqual(len(v), 3) self.assertEqual(v[0], 65 if is_py3k else 'A') - self.assertEqual(v[:].tobytes(), b'ABC') + self.assertEqual(v[:].tobytes(), to_bytes('ABC')) a[13] = 1 - self.assertEqual(v[:].tobytes(), b'AFC') + self.assertEqual(v[:].tobytes(), to_bytes('AFC')) def test_read2(self): a = bitarray([randint(0, 1) for d in range(8000)]) @@ -2197,8 +2197,8 @@ def test_write(self): self.assertEqual(a[399999:400009], bitarray('0111111110')) a[400003] = 0 self.assertEqual(a[399999:400009], bitarray('0111011110')) - v[30001:30004] = b'ABC' - self.assertEqual(a[240000:240040].tobytes(), b'\x00ABC\x00') + v[30001:30004] = to_bytes('ABC') + self.assertEqual(a[240000:240040].tobytes(), to_bytes('\x00ABC\x00')) if sys.version_info[:2] >= (2, 7): tests.append(BufferInterfaceTests) From c5004fdfda01eee59ada9a4d094c9e3ca77960d5 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 23 Apr 2019 22:31:51 -0500 Subject: [PATCH 087/185] add debug function to notes --- TODO | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/TODO b/TODO index 99243f1c2..7ea955040 100644 --- a/TODO +++ b/TODO @@ -40,9 +40,25 @@ for i in xrange(1000000): print sorted(sizes) +/* walk the binary tree, and display information for each node */ +static void +display_binode_tree(binode *tree) +{ + int k; -If I should ever decide to use pre-calculated tables: + printf("id: %p child0: %14p child1: %14p symbol: %s\n", + tree, + tree->child[0], + tree->child[1], + tree->symbol ? PyString_AsString(tree->symbol) : "NULL"); + for (k = 0; k < 2; k++) + if (tree->child[k]) + display_binode_tree(tree->child[k]); +} + + +# If I should ever decide to use pre-calculated tables: static char bytereverse_trans[256] = { 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, From 0720dc1ad4bd9fb729c31bd4a1c45c4dffdf9762 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 23 Apr 2019 22:49:54 -0500 Subject: [PATCH 088/185] formatting --- bitarray/test_bitarray.py | 56 +++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 2299b970d..8b830e500 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -2126,34 +2126,34 @@ def test_miscitems(self): self.assertStopIteration(it) def test_real_example(self): - code = {' ' : bitarray('001'), - '.' : bitarray('0101010'), - 'a' : bitarray('0110'), - 'b' : bitarray('0001100'), - 'c' : bitarray('000011'), - 'd' : bitarray('01011'), - 'e' : bitarray('111'), - 'f' : bitarray('010100'), - 'g' : bitarray('101000'), - 'h' : bitarray('00000'), - 'i' : bitarray('1011'), - 'j' : bitarray('0111101111'), - 'k' : bitarray('00011010'), - 'l' : bitarray('01110'), - 'm' : bitarray('000111'), - 'n' : bitarray('1001'), - 'o' : bitarray('1000'), - 'p' : bitarray('101001'), - 'q' : bitarray('00001001101'), - 'r' : bitarray('1101'), - 's' : bitarray('1100'), - 't' : bitarray('0100'), - 'u' : bitarray('000100'), - 'v' : bitarray('0111100'), - 'w' : bitarray('011111'), - 'x' : bitarray('0000100011'), - 'y' : bitarray('101010'), - 'z' : bitarray('00011011110')} + code = {' ': bitarray('001'), + '.': bitarray('0101010'), + 'a': bitarray('0110'), + 'b': bitarray('0001100'), + 'c': bitarray('000011'), + 'd': bitarray('01011'), + 'e': bitarray('111'), + 'f': bitarray('010100'), + 'g': bitarray('101000'), + 'h': bitarray('00000'), + 'i': bitarray('1011'), + 'j': bitarray('0111101111'), + 'k': bitarray('00011010'), + 'l': bitarray('01110'), + 'm': bitarray('000111'), + 'n': bitarray('1001'), + 'o': bitarray('1000'), + 'p': bitarray('101001'), + 'q': bitarray('00001001101'), + 'r': bitarray('1101'), + 's': bitarray('1100'), + 't': bitarray('0100'), + 'u': bitarray('000100'), + 'v': bitarray('0111100'), + 'w': bitarray('011111'), + 'x': bitarray('0000100011'), + 'y': bitarray('101010'), + 'z': bitarray('00011011110')} a = bitarray() message = 'the quick brown fox jumps over the lazy dog.' a.encode(code, message) From 1b34c43247a3c35127fc01093ebc49a75cacdfd3 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 23 Apr 2019 22:53:07 -0500 Subject: [PATCH 089/185] formatting --- bitarray/test_bitarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 8b830e500..edbef36c7 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1938,7 +1938,7 @@ def test_encode_string(self): def test_encode_list(self): a = bitarray() - d = {'a':bitarray('0')} + d = {'a': bitarray('0')} a.encode(d, []) self.assertEqual(a, bitarray()) a.encode(d, ['a']) From 9e8693adefb70cde4134403b9a3502464d8d458a Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 23 Apr 2019 23:31:51 -0500 Subject: [PATCH 090/185] update changelog --- CHANGE_LOG | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index dba670f8f..be60cdef6 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,9 +1,10 @@ 2019-XX-XX 0.9.1: ------------------- * fix types to actually be types, #29 - * improve check for ambiguous prefix codes + * check for ambiguous prefix codes when building binary tree for decoding * remove Python level methods: encode, decode, iterdecode (in favor of having these implemented on the C-level along with check_codedict) + * fix self tests for Python 2.5 and 2.6 2019-04-22 0.9.0: From a1eafa64413b2c21841c6acf7bf54bf10c9dd5f4 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 24 Apr 2019 03:37:59 -0500 Subject: [PATCH 091/185] rework example to be much closer to C counterpart --- examples/decoding.py | 59 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/examples/decoding.py b/examples/decoding.py index c3ec18509..2487e5a8d 100644 --- a/examples/decoding.py +++ b/examples/decoding.py @@ -1,23 +1,58 @@ +from __future__ import print_function import time +from pprint import pprint from bitarray import bitarray from huffman import freq_string, huffCode -def insert(nd, ba, sym): +CNT = 1 + +class Node: + def __init__(self): + global CNT + self.symbol = None + self.child = [None, None] + self.id = CNT # used in display_tree only + CNT += 1 + + +def insert(tree, ba, sym): + nd = tree for k in ba: prev = nd - nd = nd[k] + nd = nd.child[k] + if nd and nd.symbol: + print("ambiguity") + if not nd: + nd = Node() + prev.child[k] = nd + if nd.symbol or nd.child[0] or nd.child[1]: + print("ambiguity") + nd.symbol = sym + + +def traverse(tree, it): + nd = tree + while 1: + nd = nd.child[next(it)] if not nd: - nd = [[], []] - prev[k] = nd - nd[0] = sym - del nd[1] + print("prefix code does not match data in bitarray") + return None + if nd.symbol is not None: + return nd.symbol + if nd != tree: + print("decoding not terminated") -def traverse(nd, it): - while len(nd) == 2: - nd = nd[next(it)] - return nd[0] +def display_tree(nd): + print("id: %3d child0: %3d child1: %3d symbol: %r" % + (nd.id, + nd.child[0].id if nd.child[0] else 0, + nd.child[1].id if nd.child[1] else 0, + nd.symbol)) + for k in range(2): + if nd.child[k]: + display_tree(nd.child[k]) def decode(codedict, bitsequence): @@ -25,9 +60,10 @@ def decode(codedict, bitsequence): this function does the same thing as the bitarray decode method """ # generate tree from codedict - tree = [[], []] + tree = Node() for sym, ba in codedict.items(): insert(tree, ba, sym) + display_tree(tree) # actual decoding by traversing until StopIteration res = [] @@ -44,6 +80,7 @@ def decode(codedict, bitsequence): def main(): txt = open('README').read() code = huffCode(freq_string(txt)) + pprint(code) sample = 500 * txt From d8d094acf94d6aebe0f0a1bb427e45787fb1e699 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 24 Apr 2019 03:44:16 -0500 Subject: [PATCH 092/185] better naming, add comment --- bitarray/_bitarray.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 419a2e33e..b89383d6c 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2173,7 +2173,7 @@ iterate over the iterable object with symbols, and extend the bitarray\n\ with the corresponding bitarray for each symbols."); -/* Binary Tree definition */ +/* Binary tree definition */ typedef struct _bin_node { PyObject *symbol; @@ -2269,7 +2269,7 @@ make_tree(PyObject *codedict) when the end of the bitarray has been reached). */ static PyObject * -tree_traverse(binode *tree, bitarrayobject *self, idx_t *indexp) +traverse_tree(binode *tree, bitarrayobject *self, idx_t *indexp) { binode *nd = tree; int k; @@ -2313,6 +2313,7 @@ bitarray_decode(bitarrayobject *self, PyObject *codedict) delete_binode_tree(tree); return NULL; } + /* traverse tree (just like above) */ for (i = 0; i < self->nbits; i++) { k = GETBIT(self, i); nd = nd->child[k]; @@ -2402,7 +2403,7 @@ decodeiter_next(decodeiterobject *it) PyObject *symbol; assert(DecodeIter_Check(it)); - symbol = tree_traverse(it->tree, it->bao, &(it->index)); + symbol = traverse_tree(it->tree, it->bao, &(it->index)); if (symbol == NULL) /* stop iteration OR error occured */ return NULL; Py_INCREF(symbol); From 293f886b3cf5feee96d2f06c3f806c5f10363385 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 24 Apr 2019 12:50:45 -0500 Subject: [PATCH 093/185] add creation of .dot file to example --- bitarray/_bitarray.c | 2 +- examples/decoding.py | 50 +++++++++++++++++++++++++++++--------------- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index b89383d6c..4a674491d 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2176,8 +2176,8 @@ with the corresponding bitarray for each symbols."); /* Binary tree definition */ typedef struct _bin_node { - PyObject *symbol; struct _bin_node *child[2]; + PyObject *symbol; } binode; diff --git a/examples/decoding.py b/examples/decoding.py index 2487e5a8d..f0ec5a424 100644 --- a/examples/decoding.py +++ b/examples/decoding.py @@ -5,15 +5,11 @@ from huffman import freq_string, huffCode -CNT = 1 class Node: def __init__(self): - global CNT - self.symbol = None self.child = [None, None] - self.id = CNT # used in display_tree only - CNT += 1 + self.symbol = None def insert(tree, ba, sym): @@ -44,15 +40,35 @@ def traverse(tree, it): print("decoding not terminated") -def display_tree(nd): - print("id: %3d child0: %3d child1: %3d symbol: %r" % - (nd.id, - nd.child[0].id if nd.child[0] else 0, - nd.child[1].id if nd.child[1] else 0, - nd.symbol)) - for k in range(2): - if nd.child[k]: - display_tree(nd.child[k]) +def write_dot(tree): + + special_ascii = {' ': 'SPACE', '\n': 'LF', '\t': 'TAB'} + def disp_char(c): + res = special_ascii.get(c, c) + assert res.strip(), repr(c) + return res + + with open('tree.dot', 'w') as fo: # dot -Tpng tree.dot -O + def write_nd(fo, nd): + if nd.symbol: + fo.write(' %d [label="%s"];\n' % (id(nd), + disp_char(nd.symbol))) + else: + fo.write(' %d [label=""];\n' % (id(nd),)) + + if nd.child[0] and nd.child[1]: + for k in range(2): + fo.write(' %d->%d;\n' % (id(nd), id(nd.child[k]))) + + for k in range(2): + if nd.child[k]: + write_nd(fo, nd.child[k]) + + fo.write('digraph BT {\n') + fo.write('''node [shape=box, fontsize=20, fontname="Arial", + fontcolor=red];\n''') + write_nd(fo, tree) + fo.write('}\n') def decode(codedict, bitsequence): @@ -63,7 +79,7 @@ def decode(codedict, bitsequence): tree = Node() for sym, ba in codedict.items(): insert(tree, ba, sym) - display_tree(tree) + write_dot(tree) # actual decoding by traversing until StopIteration res = [] @@ -80,9 +96,9 @@ def decode(codedict, bitsequence): def main(): txt = open('README').read() code = huffCode(freq_string(txt)) - pprint(code) + #pprint(code) - sample = 500 * txt + sample = 100 * txt a = bitarray() a.encode(code, sample) From 81ecb98105bb58dc8159bf8e7e06370e6afe1abd Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 24 Apr 2019 17:18:41 -0500 Subject: [PATCH 094/185] use different shape for leaf nodes --- examples/README | 11 +++++------ examples/decoding.py | 8 ++++---- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/examples/README b/examples/README index ae3455a98..8cca447cc 100644 --- a/examples/README +++ b/examples/README @@ -1,17 +1,17 @@ bloom.py: - Demonstrates the implementation of a Bloom filter, see: + Demonstrates the implementation of a "Bloom filter", see: http://en.wikipedia.org/wiki/Bloom_filter compress.py: Demonstrates how the bz2 module may be used to create a compressed - object which represents a bitarray + object which represents a bitarray. decoding.py Bitarray's decode method is implemented in C. Since the C code might be hard to read, we have implemented exactly the same - algorithm in Python. It is at least 20 times slower than it's + algorithm in Python. It is at least 20 times slower than its C counterpart. @@ -28,8 +28,8 @@ huffman.py mandel.py - Generates a .ppm image file of size 8000x6000 of the Mandelbrot set. - Despite it's size, the output image file has only a size of slightly + Generates a .ppm image file of size 8000 x 6000 of the Mandelbrot set. + Despite its size, the output image file has only a size of slightly over 6 Million bytes (uncompressed) because each pixel is stored in one bit. Requires numpy and scipy (see http://scipy.org/). @@ -61,4 +61,3 @@ smallints.py For example, an array with 1000 5 bit integers can be created, allowing each element in the array to take values form 0 to 31, while the size of the object is 625 (5000/8) bytes. - Thanks to David Kammeyer for the idea to apply a bitarray in this way. diff --git a/examples/decoding.py b/examples/decoding.py index f0ec5a424..de422befd 100644 --- a/examples/decoding.py +++ b/examples/decoding.py @@ -42,7 +42,7 @@ def traverse(tree, it): def write_dot(tree): - special_ascii = {' ': 'SPACE', '\n': 'LF', '\t': 'TAB'} + special_ascii = {' ': 'SPACE', '\n': 'LF', '\t': 'TAB', '"': r'\"'} def disp_char(c): res = special_ascii.get(c, c) assert res.strip(), repr(c) @@ -54,7 +54,8 @@ def write_nd(fo, nd): fo.write(' %d [label="%s"];\n' % (id(nd), disp_char(nd.symbol))) else: - fo.write(' %d [label=""];\n' % (id(nd),)) + fo.write(' %d [shape=circle, style=filled, ' + 'fillcolor=grey, label=""];\n' % (id(nd),)) if nd.child[0] and nd.child[1]: for k in range(2): @@ -65,8 +66,7 @@ def write_nd(fo, nd): write_nd(fo, nd.child[k]) fo.write('digraph BT {\n') - fo.write('''node [shape=box, fontsize=20, fontname="Arial", - fontcolor=red];\n''') + fo.write('node [shape=box, fontsize=20, fontname="Arial"];\n') write_nd(fo, tree) fo.write('}\n') From 0f673189472358b2d7bf9def7f17826c85bdc184 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 24 Apr 2019 21:18:41 -0500 Subject: [PATCH 095/185] refactor Huffman example --- CHANGE_LOG | 1 + examples/decoding.py | 85 ++++++++++++++++++++++---------------------- examples/huffman.py | 56 +++++++++++++++++++---------- 3 files changed, 80 insertions(+), 62 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index be60cdef6..93b544a92 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -5,6 +5,7 @@ * remove Python level methods: encode, decode, iterdecode (in favor of having these implemented on the C-level along with check_codedict) * fix self tests for Python 2.5 and 2.6 + * add code to generate .dot file of Huffman tree to examples 2019-04-22 0.9.0: diff --git a/examples/decoding.py b/examples/decoding.py index de422befd..118119592 100644 --- a/examples/decoding.py +++ b/examples/decoding.py @@ -1,18 +1,10 @@ from __future__ import print_function import time -from pprint import pprint from bitarray import bitarray -from huffman import freq_string, huffCode +from huffman import freq_string, huffTree, huffCode, Node - -class Node: - def __init__(self): - self.child = [None, None] - self.symbol = None - - -def insert(tree, ba, sym): +def insert_symbol(tree, ba, sym): nd = tree for k in ba: prev = nd @@ -27,6 +19,14 @@ def insert(tree, ba, sym): nd.symbol = sym +def make_tree(codedict): + # generate tree from codedict + tree = Node() + for sym, ba in codedict.items(): + insert_symbol(tree, ba, sym) + return tree + + def traverse(tree, it): nd = tree while 1: @@ -40,22 +40,41 @@ def traverse(tree, it): print("decoding not terminated") -def write_dot(tree): +def decode(tree, bitsequence): + # actual decoding by traversing until StopIteration + res = [] + it = iter(bitsequence) + while True: + try: + r = traverse(tree, it) + except StopIteration: + break + res.append(r) + return res + +def write_dot(tree, fn): special_ascii = {' ': 'SPACE', '\n': 'LF', '\t': 'TAB', '"': r'\"'} def disp_char(c): res = special_ascii.get(c, c) assert res.strip(), repr(c) return res - with open('tree.dot', 'w') as fo: # dot -Tpng tree.dot -O + def disp_freq(f): + if f is None: + return '' + return '%d' % f + + with open(fn, 'w') as fo: # dot -Tpng tree.dot -O def write_nd(fo, nd): - if nd.symbol: - fo.write(' %d [label="%s"];\n' % (id(nd), - disp_char(nd.symbol))) - else: + if nd.symbol: # leaf node + a, b = disp_freq(nd.freq), disp_char(nd.symbol) + fo.write(' %d [label="%s%s%s"];\n' % + (id(nd), a, ': ' if a and b else '', b)) + else: # parent node fo.write(' %d [shape=circle, style=filled, ' - 'fillcolor=grey, label=""];\n' % (id(nd),)) + 'fillcolor=grey, label="%s"];\n' % + (id(nd), disp_freq(nd.freq))) if nd.child[0] and nd.child[1]: for k in range(2): @@ -66,37 +85,17 @@ def write_nd(fo, nd): write_nd(fo, nd.child[k]) fo.write('digraph BT {\n') - fo.write('node [shape=box, fontsize=20, fontname="Arial"];\n') + fo.write(' node [shape=box, fontsize=20, fontname="Arial"];\n') write_nd(fo, tree) fo.write('}\n') -def decode(codedict, bitsequence): - """ - this function does the same thing as the bitarray decode method - """ - # generate tree from codedict - tree = Node() - for sym, ba in codedict.items(): - insert(tree, ba, sym) - write_dot(tree) - - # actual decoding by traversing until StopIteration - res = [] - it = iter(bitsequence) - while True: - try: - r = traverse(tree, it) - except StopIteration: - break - res.append(r) - return res - - def main(): txt = open('README').read() - code = huffCode(freq_string(txt)) - #pprint(code) + tree = huffTree(freq_string(txt)) + write_dot(tree, 'tree1.dot') + code = huffCode(tree) + write_dot(make_tree(code), 'tree2.dot') sample = 100 * txt @@ -105,7 +104,7 @@ def main(): # Time the decode function above start_time = time.time() - res = decode(code, a) + res = decode(tree, a) Py_time = time.time() - start_time assert ''.join(res) == sample print('Py_time: %.6f sec' % Py_time) diff --git a/examples/huffman.py b/examples/huffman.py index 6f8c0dd75..fdd0e4873 100644 --- a/examples/huffman.py +++ b/examples/huffman.py @@ -15,36 +15,54 @@ is_py3k = bool(sys.version_info[0] == 3) -def huffCode(freq): +class Node: + def __init__(self): + self.child = [None, None] + self.symbol = None + self.freq = None + + def __lt__(self, other): + return self.freq < other.freq + + +def huffTree(freq): """ - Given a dictionary mapping symbols to thier frequency, - return the Huffman code in the form of - a dictionary mapping the symbols to bitarrays. + Given a dictionary mapping symbols to thier frequency, construct a Huffman + tree and return its root node. """ minheap = [] - for i, c in enumerate(sorted(freq)): - # having the exact same frequency for different symbols causes - # problems with heapq in Python 3, so we simply add a small float - heapq.heappush(minheap, (freq[c] + 1E-3 * i, c)) + for c in sorted(freq): + nd = Node() + nd.symbol = c + nd.freq = freq[c] + heapq.heappush(minheap, nd) while len(minheap) > 1: childR = heapq.heappop(minheap) childL = heapq.heappop(minheap) - parent = (childL[0] + childR[0], childL, childR) + parent = Node() + parent.child[0] = childL + parent.child[1] = childR + parent.freq = childL.freq + childR.freq heapq.heappush(minheap, parent) - # minheap[0] is now the root node of the Huffman tree - # now traverse the tree to create the Huffman codes + return minheap[0] + + +def huffCode(tree): + """ + Given a Huffman tree, traverse the tree and return the Huffman code. + """ result = {} - def traverse(tree, prefix=bitarray()): - if len(tree) == 2: # leave - result[tree[1]] = prefix - else: # parent, so traverse each of the children + def traverse(nd, prefix=bitarray()): + if nd.symbol is None: # parent, so traverse each of the children for i in range(2): - traverse(tree[i+1], prefix + bitarray([i])) + traverse(nd.child[i], prefix + bitarray([i])) + else: # leaf + result[nd.symbol] = prefix - traverse(minheap[0]) + traverse(tree) return result @@ -71,7 +89,7 @@ def disp_char(i): def print_code(filename): freq = freq_string(read_file(filename)) - code = huffCode(freq) + code = huffCode(huffTree(freq)) print(' symbol char hex frequency Huffman code') print(70 * '-') for c in sorted(code, key=lambda c: (freq[c], c), reverse=True): @@ -83,7 +101,7 @@ def print_code(filename): def encode(filename): s = read_file(filename) - code = huffCode(freq_string(s)) + code = huffCode(huffTree(freq_string(s))) with open(filename + '.huff', 'wb') as fo: for c in sorted(code): fo.write(('%02x %s\n' % (c if is_py3k else ord(c), From 464237f5d9680cb2c707c6ea877b1c6fafe74962 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 24 Apr 2019 21:37:05 -0500 Subject: [PATCH 096/185] ensure edges are always drawn, even from parent nodes with only one child --- examples/decoding.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/decoding.py b/examples/decoding.py index 118119592..21d9db05d 100644 --- a/examples/decoding.py +++ b/examples/decoding.py @@ -76,8 +76,8 @@ def write_nd(fo, nd): 'fillcolor=grey, label="%s"];\n' % (id(nd), disp_freq(nd.freq))) - if nd.child[0] and nd.child[1]: - for k in range(2): + for k in range(2): + if nd.child[k]: fo.write(' %d->%d;\n' % (id(nd), id(nd.child[k]))) for k in range(2): From 7bd71ab6e6042da6ff3394d24f3aff8d8050f030 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 25 Apr 2019 15:29:51 -0500 Subject: [PATCH 097/185] better naming --- bitarray/_bitarray.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 4a674491d..12d56ae19 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2209,14 +2209,14 @@ delete_binode_tree(binode *tree) } static int -insert_symbol(binode *tree, bitarrayobject *self, PyObject *symbol) +insert_symbol(binode *tree, bitarrayobject *ba, PyObject *symbol) { binode *nd = tree, *prev; Py_ssize_t i; int k; - for (i = 0; i < self->nbits; i++) { - k = GETBIT(self, i); + for (i = 0; i < ba->nbits; i++) { + k = GETBIT(ba, i); prev = nd; nd = nd->child[k]; @@ -2269,13 +2269,13 @@ make_tree(PyObject *codedict) when the end of the bitarray has been reached). */ static PyObject * -traverse_tree(binode *tree, bitarrayobject *self, idx_t *indexp) +traverse_tree(binode *tree, bitarrayobject *ba, idx_t *indexp) { binode *nd = tree; int k; - while (*indexp < self->nbits) { - k = GETBIT(self, *indexp); + while (*indexp < ba->nbits) { + k = GETBIT(ba, *indexp); (*indexp)++; nd = nd->child[k]; if (nd == NULL) { From ba4b7f1bd02a39591b2ba6e305690c3d3e43e54f Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 25 Apr 2019 23:19:54 -0500 Subject: [PATCH 098/185] refactor Huffman example into library --- examples/README | 8 +- examples/decoding.py | 123 ------------------- examples/huffman.py | 191 ----------------------------- examples/huffman/README | 11 ++ examples/huffman/decoding.py | 43 +++++++ examples/huffman/huffman.py | 230 +++++++++++++++++++++++++++++++++++ 6 files changed, 287 insertions(+), 319 deletions(-) delete mode 100644 examples/decoding.py delete mode 100644 examples/huffman.py create mode 100644 examples/huffman/README create mode 100644 examples/huffman/decoding.py create mode 100644 examples/huffman/huffman.py diff --git a/examples/README b/examples/README index 8cca447cc..20b765ad8 100644 --- a/examples/README +++ b/examples/README @@ -20,11 +20,9 @@ gene.py: represented by bitarrays. -huffman.py - Demonstrates building a Huffman tree. Given an input file, - calculates the number of occurrences for each character; - from those frequencies, a Huffman tree is build. - Also allows encoding and decoding of a file, see -h option. +huffman + Directory containing a library and examples for working with Huffman + trees and codes. mandel.py diff --git a/examples/decoding.py b/examples/decoding.py deleted file mode 100644 index 21d9db05d..000000000 --- a/examples/decoding.py +++ /dev/null @@ -1,123 +0,0 @@ -from __future__ import print_function -import time -from bitarray import bitarray -from huffman import freq_string, huffTree, huffCode, Node - - -def insert_symbol(tree, ba, sym): - nd = tree - for k in ba: - prev = nd - nd = nd.child[k] - if nd and nd.symbol: - print("ambiguity") - if not nd: - nd = Node() - prev.child[k] = nd - if nd.symbol or nd.child[0] or nd.child[1]: - print("ambiguity") - nd.symbol = sym - - -def make_tree(codedict): - # generate tree from codedict - tree = Node() - for sym, ba in codedict.items(): - insert_symbol(tree, ba, sym) - return tree - - -def traverse(tree, it): - nd = tree - while 1: - nd = nd.child[next(it)] - if not nd: - print("prefix code does not match data in bitarray") - return None - if nd.symbol is not None: - return nd.symbol - if nd != tree: - print("decoding not terminated") - - -def decode(tree, bitsequence): - # actual decoding by traversing until StopIteration - res = [] - it = iter(bitsequence) - while True: - try: - r = traverse(tree, it) - except StopIteration: - break - res.append(r) - return res - - -def write_dot(tree, fn): - special_ascii = {' ': 'SPACE', '\n': 'LF', '\t': 'TAB', '"': r'\"'} - def disp_char(c): - res = special_ascii.get(c, c) - assert res.strip(), repr(c) - return res - - def disp_freq(f): - if f is None: - return '' - return '%d' % f - - with open(fn, 'w') as fo: # dot -Tpng tree.dot -O - def write_nd(fo, nd): - if nd.symbol: # leaf node - a, b = disp_freq(nd.freq), disp_char(nd.symbol) - fo.write(' %d [label="%s%s%s"];\n' % - (id(nd), a, ': ' if a and b else '', b)) - else: # parent node - fo.write(' %d [shape=circle, style=filled, ' - 'fillcolor=grey, label="%s"];\n' % - (id(nd), disp_freq(nd.freq))) - - for k in range(2): - if nd.child[k]: - fo.write(' %d->%d;\n' % (id(nd), id(nd.child[k]))) - - for k in range(2): - if nd.child[k]: - write_nd(fo, nd.child[k]) - - fo.write('digraph BT {\n') - fo.write(' node [shape=box, fontsize=20, fontname="Arial"];\n') - write_nd(fo, tree) - fo.write('}\n') - - -def main(): - txt = open('README').read() - tree = huffTree(freq_string(txt)) - write_dot(tree, 'tree1.dot') - code = huffCode(tree) - write_dot(make_tree(code), 'tree2.dot') - - sample = 100 * txt - - a = bitarray() - a.encode(code, sample) - - # Time the decode function above - start_time = time.time() - res = decode(tree, a) - Py_time = time.time() - start_time - assert ''.join(res) == sample - print('Py_time: %.6f sec' % Py_time) - - # Time the decode method which is implemented in C - start_time = time.time() - res = a.decode(code) - C_time = time.time() - start_time - assert ''.join(res) == sample - print('C_time: %.6f sec' % C_time) - - print('Ratio: %f' % (Py_time / C_time)) - - -if __name__ == '__main__': - main() diff --git a/examples/huffman.py b/examples/huffman.py deleted file mode 100644 index fdd0e4873..000000000 --- a/examples/huffman.py +++ /dev/null @@ -1,191 +0,0 @@ -""" -The non-trivial part of the code is derived from: -http://en.literateprograms.org/Huffman_coding_(Python) - -The link also contains a good description of the algorithm. -""" -from __future__ import print_function - -import os -import sys -import heapq -from collections import defaultdict -from bitarray import bitarray - -is_py3k = bool(sys.version_info[0] == 3) - - -class Node: - def __init__(self): - self.child = [None, None] - self.symbol = None - self.freq = None - - def __lt__(self, other): - return self.freq < other.freq - - -def huffTree(freq): - """ - Given a dictionary mapping symbols to thier frequency, construct a Huffman - tree and return its root node. - """ - minheap = [] - for c in sorted(freq): - nd = Node() - nd.symbol = c - nd.freq = freq[c] - heapq.heappush(minheap, nd) - - while len(minheap) > 1: - childR = heapq.heappop(minheap) - childL = heapq.heappop(minheap) - parent = Node() - parent.child[0] = childL - parent.child[1] = childR - parent.freq = childL.freq + childR.freq - heapq.heappush(minheap, parent) - - return minheap[0] - - -def huffCode(tree): - """ - Given a Huffman tree, traverse the tree and return the Huffman code. - """ - result = {} - - def traverse(nd, prefix=bitarray()): - if nd.symbol is None: # parent, so traverse each of the children - for i in range(2): - traverse(nd.child[i], prefix + bitarray([i])) - else: # leaf - result[nd.symbol] = prefix - - traverse(tree) - return result - - -def freq_string(s): - """ - Given a string, return a dict mapping characters to thier frequency. - """ - res = defaultdict(int) - for c in s: - res[c] += 1 - return res - - -def read_file(filename): - with open(filename, 'rb') as fi: - return fi.read() - - -special_ascii = {0: 'NULL', 9: 'TAB', 10: 'LF', 13: 'CR', 127: 'DEL'} -def disp_char(i): - if 32 <= i < 127: - return repr(chr(i)) - return special_ascii.get(i, '') - -def print_code(filename): - freq = freq_string(read_file(filename)) - code = huffCode(huffTree(freq)) - print(' symbol char hex frequency Huffman code') - print(70 * '-') - for c in sorted(code, key=lambda c: (freq[c], c), reverse=True): - i = c if is_py3k else ord(c) - print('%7r %-4s 0x%02x %10i %s' % ( - c, disp_char(i), - i, freq[c], code[c].to01())) - - -def encode(filename): - s = read_file(filename) - code = huffCode(huffTree(freq_string(s))) - with open(filename + '.huff', 'wb') as fo: - for c in sorted(code): - fo.write(('%02x %s\n' % (c if is_py3k else ord(c), - code[c].to01())).encode()) - a = bitarray(endian='little') - a.encode(code, s) - # write unused bits - fo.write(b'unused %s\n' % str(a.buffer_info()[3]).encode()) - a.tofile(fo) - print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(s))) - - -def decode(filename): - assert filename.endswith('.huff') - code = {} - - with open(filename, 'rb') as fi: - while 1: - line = fi.readline() - c, b = line.split() - if c == b'unused': - u = int(b) - break - i = int(c, 16) - code[i if is_py3k else chr(i)] = bitarray(b) - a = bitarray(endian='little') - a.fromfile(fi) - - if u: - del a[-u:] - - with open(filename[:-5] + '.out', 'wb') as fo: - for c in a.iterdecode(code): - fo.write(chr(c).encode('ISO-8859-1') if is_py3k else c) - - -def main(): - from optparse import OptionParser - - p = OptionParser("usage: %prog [options] FILE") - p.add_option( - '-p', '--print', - action="store_true", - help="calculate and print the Huffman code for the " - "frequency of characters in FILE") - p.add_option( - '-e', '--encode', - action="store_true", - help="encode FILE using the Huffman code calculated for the " - "frequency of characters in FILE itself. " - "The output is FILE.huff which contains both the Huffman " - "code and the bitarray resulting from the encoding.") - p.add_option( - '-d', '--decode', - action="store_true", - help="decode FILE.huff and write the output to FILE.out") - p.add_option( - '-t', '--test', - action="store_true", - help="encode FILE, decode FILE.huff, compare FILE with FILE.out, " - "and unlink created files.") - opts, args = p.parse_args() - if len(args) != 1: - p.error('exactly one argument required') - filename = args[0] - - if opts.print: - print_code(filename) - - if opts.encode: - encode(filename) - - if opts.decode: - decode(filename + '.huff') - - if opts.test: - huff = filename + '.huff' - out = filename + '.out' - encode(filename) - decode(huff) - assert open(filename, 'rb').read() == open(out, 'rb').read() - os.unlink(huff) - os.unlink(out) - - -if __name__ == '__main__': - main() diff --git a/examples/huffman/README b/examples/huffman/README new file mode 100644 index 000000000..91f46167e --- /dev/null +++ b/examples/huffman/README @@ -0,0 +1,11 @@ +compress.py: + Demonstrates how Huffman codes can be used to efficiently + compress and uncompress files (text or binary). + Given an input file, calculates the number of occurrences for each + character; from those frequencies, a Huffman tree is build. + Also allows encoding and decoding of a file, see -h option. + + +huffman.py: + Library containing useful functionality for working with Huffman trees + and codes. diff --git a/examples/huffman/decoding.py b/examples/huffman/decoding.py new file mode 100644 index 000000000..920537a1e --- /dev/null +++ b/examples/huffman/decoding.py @@ -0,0 +1,43 @@ +from __future__ import print_function +import time +from bitarray import bitarray + +from huffman import (freq_string, huffTree, huffCode, write_dot, + make_tree, decode) + + +def main(): + txt = open('README').read() + tree = huffTree(freq_string(txt)) + write_dot(tree, 'tree1.dot') + code = huffCode(tree) + write_dot(make_tree(code), 'tree2.dot') + + sample = 100 * txt + + a = bitarray() + a.encode(code, 'Hooray') + print(a) + + a = bitarray() + a.encode(code, sample) + + # Time the decode function above + start_time = time.time() + res = decode(tree, a) + Py_time = time.time() - start_time + assert ''.join(res) == sample + print('Py_time: %.6f sec' % Py_time) + + # Time the decode method which is implemented in C + start_time = time.time() + res = a.decode(code) + C_time = time.time() - start_time + assert ''.join(res) == sample + print('C_time: %.6f sec' % C_time) + + print('Ratio: %f' % (Py_time / C_time)) + + +if __name__ == '__main__': + main() diff --git a/examples/huffman/huffman.py b/examples/huffman/huffman.py new file mode 100644 index 000000000..bf1a5a394 --- /dev/null +++ b/examples/huffman/huffman.py @@ -0,0 +1,230 @@ +""" +This library contains useful functionality for working with Huffman trees +and codes. +""" +from __future__ import print_function +import sys +import heapq +from collections import defaultdict +from bitarray import bitarray + +is_py3k = bool(sys.version_info[0] == 3) + + +class Node: + def __init__(self): + self.child = [None, None] + self.symbol = None + self.freq = None + + def __lt__(self, other): + return self.freq < other.freq + + +def huffTree(freq): + """ + Given a dictionary mapping symbols to thier frequency, construct a Huffman + tree and return its root node. + """ + minheap = [] + for c in sorted(freq): + nd = Node() + nd.symbol = c + nd.freq = freq[c] + heapq.heappush(minheap, nd) + + while len(minheap) > 1: + childR = heapq.heappop(minheap) + childL = heapq.heappop(minheap) + parent = Node() + parent.child[0] = childL + parent.child[1] = childR + parent.freq = childL.freq + childR.freq + heapq.heappush(minheap, parent) + + return minheap[0] + + +def huffCode(tree): + """ + Given a Huffman tree, traverse the tree and return the Huffman code, i.e. + a dictionary mapping symbol to bitarrays. + """ + result = {} + + def traverse(nd, prefix=bitarray()): + if nd.symbol is None: # parent, so traverse each of the children + for i in range(2): + traverse(nd.child[i], prefix + bitarray([i])) + else: # leaf + result[nd.symbol] = prefix + + traverse(tree) + return result + + +def freq_string(s): + """ + Given a string, return a dict mapping characters to thier frequency. + """ + res = defaultdict(int) + for c in s: + res[c] += 1 + return res + + +def insert_symbol(tree, ba, sym): + """ + Insert symbol into a tree at the position described by the bitarray, + creating nodes as necessary. + """ + nd = tree + for k in ba: + prev = nd + nd = nd.child[k] + if nd and nd.symbol: + raise ValueError("ambiguity") + if not nd: + nd = Node() + prev.child[k] = nd + if nd.symbol or nd.child[0] or nd.child[1]: + raise ValueError("ambiguity") + nd.symbol = sym + + +def make_tree(codedict): + """ + Create a tree from the given code dictionary, and return its root node. + """ + tree = Node() + for sym, ba in codedict.items(): + insert_symbol(tree, ba, sym) + return tree + + +def traverse(tree, it): + """ + Traverse tree until a leaf node is reached, and return its symbol. + This function takes an iterator whose .next() method is called during + each step of traversing. + """ + nd = tree + while 1: + nd = nd.child[it.next()] + if not nd: + raise ValueError("prefix code does not match data in bitarray") + return None + if nd.symbol is not None: + return nd.symbol + if nd != tree: + raise ValueError("decoding not terminated") + return None + + +def decode(tree, bitsequence): + """ + Given a tree and a bitsequence, decode the bitsequence and return a + list of symbols. + """ + res = [] + it = iter(bitsequence) + while True: + try: + r = traverse(tree, it) + except StopIteration: + break + res.append(r) + return res + + +def write_dot(tree, fn, binary=False): + """ + Given a tree (which may or may not contain frequencies), write + a graphviz '.dot' file with a visual representation of the tree. + """ + special_ascii = {' ': 'SPACE', '\n': 'LF', '\r': 'CR', '\t': 'TAB', + '\\': r'\\', '"': r'\"'} + def disp_char(c): + if is_py3k: + c = chr(c) + if binary: + return 'x%02x' % ord(c) + else: + if special_ascii: + res = special_ascii.get(c, c) + assert res.strip(), repr(c) + return res + + def disp_freq(f): + if f is None: + return '' + return '%d' % f + + with open(fn, 'w') as fo: # dot -Tpng tree.dot -O + def write_nd(fo, nd): + if nd.symbol: # leaf node + a, b = disp_freq(nd.freq), disp_char(nd.symbol) + fo.write(' %d [label="%s%s%s"];\n' % + (id(nd), a, ': ' if a and b else '', b)) + else: # parent node + fo.write(' %d [shape=circle, style=filled, ' + 'fillcolor=grey, label="%s"];\n' % + (id(nd), disp_freq(nd.freq))) + + for k in range(2): + if nd.child[k]: + fo.write(' %d->%d;\n' % (id(nd), id(nd.child[k]))) + + for k in range(2): + if nd.child[k]: + write_nd(fo, nd.child[k]) + + fo.write('digraph BT {\n') + fo.write(' node [shape=box, fontsize=20, fontname="Arial"];\n') + write_nd(fo, tree) + fo.write('}\n') + + +def print_code(freq, codedict): + """ + Given a frequency map (dictionary mapping symbols to thier frequency) + and a codedict, print them in a readable form. + """ + special_ascii = {0: 'NULL', 9: 'TAB', 10: 'LF', 13: 'CR', 127: 'DEL'} + def disp_char(i): + if 32 <= i < 127: + return repr(chr(i)) + return special_ascii.get(i, '') + + print(' symbol char hex frequency Huffman code') + print(70 * '-') + for c in sorted(codedict, key=lambda c: (freq[c], c), reverse=True): + i = c if is_py3k else ord(c) + print('%7r %-4s 0x%02x %10i %s' % ( + c, disp_char(i), + i, freq[c], codedict[c].to01())) + + +def test(): + freq = {'a': 10, 'b': 2, 'c': 1} + tree = huffTree(freq) + code = huffCode(tree) + assert len(code['a']) == 1 + assert len(code['b']) == len(code['c']) == 2 + + d = freq_string('aabcbbbccca') + assert d['a'] == 3 + + code = {'a': bitarray('0'), + 'b': bitarray('10'), + 'c': bitarray('11')} + tree = make_tree(code) + txt = 'abca' + a = bitarray() + a.encode(code, txt) + assert a == bitarray('010110') + assert decode(tree, a) == ['a', 'b', 'c', 'a'] + + +if __name__ == '__main__': + test() From 53fd2d3424db578dcc5c4aa6c4385e8799328bcf Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 25 Apr 2019 23:20:36 -0500 Subject: [PATCH 099/185] add compress program --- examples/huffman/compress.py | 125 +++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 examples/huffman/compress.py diff --git a/examples/huffman/compress.py b/examples/huffman/compress.py new file mode 100644 index 000000000..6dd6ee9e8 --- /dev/null +++ b/examples/huffman/compress.py @@ -0,0 +1,125 @@ +""" +This program demonstrates how Huffman codes can be used to efficiently +compress and uncompress files (text or binary). +""" +import os +from optparse import OptionParser +from bitarray import bitarray + +from huffman import (is_py3k, huffCode, huffTree, freq_string, + print_code, write_dot) + + +def is_binary(s): + null = 0 if is_py3k else '\0' + return bool(null in s) + + +def analyze(filename, printCode=False, writeDot=False): + with open(filename, 'rb') as fi: + s = fi.read() + + freq = freq_string(s) + tree = huffTree(freq) + if writeDot: + write_dot(tree, 'tree.dot', is_binary(s)) + code = huffCode(tree) + if printCode: + print_code(freq, code) + + +def encode(filename): + with open(filename, 'rb') as fi: + s = fi.read() + + code = huffCode(huffTree(freq_string(s))) + with open(filename + '.huff', 'wb') as fo: + for c in sorted(code): + fo.write(('%02x %s\n' % (c if is_py3k else ord(c), + code[c].to01())).encode()) + a = bitarray(endian='little') + a.encode(code, s) + # write unused bits + fo.write(b'unused %s\n' % str(a.buffer_info()[3]).encode()) + a.tofile(fo) + print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(s))) + + +def decode(filename): + assert filename.endswith('.huff') + code = {} + + with open(filename, 'rb') as fi: + while 1: + line = fi.readline() + c, b = line.split() + if c == b'unused': + u = int(b) + break + i = int(c, 16) + code[i if is_py3k else chr(i)] = bitarray(b) + a = bitarray(endian='little') + a.fromfile(fi) + + if u: + del a[-u:] + + with open(filename[:-5] + '.out', 'wb') as fo: + for c in a.iterdecode(code): + fo.write(chr(c).encode('ISO-8859-1') if is_py3k else c) + + +def main(): + p = OptionParser("usage: %prog [options] FILE") + p.add_option( + '-s', '--show', + action="store_true", + help="calculate and print the Huffman code for the " + "frequency of characters in FILE") + p.add_option( + '-t', '--tree', + action="store_true", + help="calculate and the Huffman tree (from the frequency of " + "characters in FILE) and write a .dot file") + p.add_option( + '-e', '--encode', + action="store_true", + help="encode (compress) FILE using the Huffman code calculated for " + "the frequency of characters in FILE itself. " + "The output is FILE.huff which contains both the Huffman " + "code and the bitarray resulting from the encoding.") + p.add_option( + '-d', '--decode', + action="store_true", + help="decode (decompress) FILE.huff and write the output to FILE.out") + p.add_option( + '--test', + action="store_true", + help="encode FILE, decode FILE.huff, compare FILE with FILE.out, " + "and unlink created files.") + opts, args = p.parse_args() + if len(args) != 1: + p.error('exactly one argument required') + filename = args[0] + + if opts.show or opts.tree: + analyze(filename, printCode=opts.show, writeDot=opts.tree) + + if opts.encode: + encode(filename) + + if opts.decode: + decode(filename + '.huff') + + if opts.test: + huff = filename + '.huff' + out = filename + '.out' + encode(filename) + decode(huff) + assert open(filename, 'rb').read() == open(out, 'rb').read() + os.unlink(huff) + os.unlink(out) + + +if __name__ == '__main__': + main() From cea1b614376e6e3cb676b4ea6795a42b9aabeaa0 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 25 Apr 2019 23:51:47 -0500 Subject: [PATCH 100/185] update work on Huffman example --- examples/huffman/README | 5 +++++ examples/huffman/decoding.py | 9 +++------ examples/huffman/huffman.py | 9 +++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/examples/huffman/README b/examples/huffman/README index 91f46167e..386946cb7 100644 --- a/examples/huffman/README +++ b/examples/huffman/README @@ -6,6 +6,11 @@ compress.py: Also allows encoding and decoding of a file, see -h option. +decoding.py: + This example demonstrates how much faster bitarray's decoing is as + opposed to traversing the Huffman tree using Python. + + huffman.py: Library containing useful functionality for working with Huffman trees and codes. diff --git a/examples/huffman/decoding.py b/examples/huffman/decoding.py index 920537a1e..58682de5e 100644 --- a/examples/huffman/decoding.py +++ b/examples/huffman/decoding.py @@ -9,16 +9,13 @@ def main(): txt = open('README').read() tree = huffTree(freq_string(txt)) - write_dot(tree, 'tree1.dot') + write_dot(tree, 'tree.dot') code = huffCode(tree) - write_dot(make_tree(code), 'tree2.dot') + # create tree from code (no frequencies) + write_dot(make_tree(code), 'tree_raw.dot') sample = 100 * txt - a = bitarray() - a.encode(code, 'Hooray') - print(a) - a = bitarray() a.encode(code, sample) diff --git a/examples/huffman/huffman.py b/examples/huffman/huffman.py index bf1a5a394..025ed0312 100644 --- a/examples/huffman/huffman.py +++ b/examples/huffman/huffman.py @@ -95,6 +95,7 @@ def insert_symbol(tree, ba, sym): def make_tree(codedict): """ Create a tree from the given code dictionary, and return its root node. + Unlike trees created by huffTree, all nodes will have .freq set to None. """ tree = Node() for sym, ba in codedict.items(): @@ -105,12 +106,12 @@ def make_tree(codedict): def traverse(tree, it): """ Traverse tree until a leaf node is reached, and return its symbol. - This function takes an iterator whose .next() method is called during - each step of traversing. + This function takes an iterator on which next() is called during each + step of traversing. """ nd = tree while 1: - nd = nd.child[it.next()] + nd = nd.child[next(it)] if not nd: raise ValueError("prefix code does not match data in bitarray") return None @@ -145,7 +146,7 @@ def write_dot(tree, fn, binary=False): special_ascii = {' ': 'SPACE', '\n': 'LF', '\r': 'CR', '\t': 'TAB', '\\': r'\\', '"': r'\"'} def disp_char(c): - if is_py3k: + if is_py3k and isinstance(c, int): c = chr(c) if binary: return 'x%02x' % ord(c) From 863583557f3e3e0137edbf0df2a44c5e8fcbb593 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 26 Apr 2019 03:59:34 -0500 Subject: [PATCH 101/185] inherit from object --- examples/huffman/huffman.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/huffman/huffman.py b/examples/huffman/huffman.py index 025ed0312..3ebdf4e61 100644 --- a/examples/huffman/huffman.py +++ b/examples/huffman/huffman.py @@ -11,7 +11,7 @@ is_py3k = bool(sys.version_info[0] == 3) -class Node: +class Node(object): def __init__(self): self.child = [None, None] self.symbol = None From 02357793337cf3aedf859ace9c37e456c23c8a12 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 26 Apr 2019 23:07:34 -0500 Subject: [PATCH 102/185] add comments --- examples/huffman/huffman.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/examples/huffman/huffman.py b/examples/huffman/huffman.py index 3ebdf4e61..74acc9849 100644 --- a/examples/huffman/huffman.py +++ b/examples/huffman/huffman.py @@ -4,7 +4,7 @@ """ from __future__ import print_function import sys -import heapq +from heapq import heappush, heappop from collections import defaultdict from bitarray import bitarray @@ -15,9 +15,9 @@ class Node(object): def __init__(self): self.child = [None, None] self.symbol = None - self.freq = None def __lt__(self, other): + # heapq needs to be able to compare the nodes return self.freq < other.freq @@ -27,21 +27,25 @@ def huffTree(freq): tree and return its root node. """ minheap = [] + # create all the leaf nodes and push them onto the queue for c in sorted(freq): nd = Node() nd.symbol = c nd.freq = freq[c] - heapq.heappush(minheap, nd) + heappush(minheap, nd) + # repeat the process until only one node remains while len(minheap) > 1: - childR = heapq.heappop(minheap) - childL = heapq.heappop(minheap) + # take the nodes with smallest frequencies from the queue + childR = heappop(minheap) + childL = heappop(minheap) + # construct the new internal node and push it onto the queue parent = Node() - parent.child[0] = childL - parent.child[1] = childR + parent.child = [childL, childR] parent.freq = childL.freq + childR.freq - heapq.heappush(minheap, parent) + heappush(minheap, parent) + # return the one remaining node, which is the root of the Huffman tree return minheap[0] From 89db59ebe863a0f77a6e50c1bded50ea900d6b4a Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 27 Apr 2019 00:30:16 -0500 Subject: [PATCH 103/185] add freq attribute again --- examples/huffman/huffman.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/huffman/huffman.py b/examples/huffman/huffman.py index 74acc9849..908fe8754 100644 --- a/examples/huffman/huffman.py +++ b/examples/huffman/huffman.py @@ -15,6 +15,7 @@ class Node(object): def __init__(self): self.child = [None, None] self.symbol = None + self.freq = None def __lt__(self, other): # heapq needs to be able to compare the nodes From 819fdd6dcae299514917114ec6ff58331a6d6c6b Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 27 Apr 2019 00:52:57 -0500 Subject: [PATCH 104/185] update comment --- bitarray/_bitarray.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 12d56ae19..a5acd3117 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2264,7 +2264,7 @@ make_tree(PyObject *codedict) } /* - Traverse tree using the branches corresponding to the bitarray self, + Traverse tree using the branches corresponding to the bitarray `ba`, starting at *indexp, and return the symbol at the leaf node (or NULL when the end of the bitarray has been reached). */ From 587f9037cf7c1be068ee94dd7a1e006e392edf84 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 27 Apr 2019 01:49:22 -0500 Subject: [PATCH 105/185] add useful output --- examples/huffman/compress.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/huffman/compress.py b/examples/huffman/compress.py index 6dd6ee9e8..fbda31a9f 100644 --- a/examples/huffman/compress.py +++ b/examples/huffman/compress.py @@ -42,6 +42,7 @@ def encode(filename): # write unused bits fo.write(b'unused %s\n' % str(a.buffer_info()[3]).encode()) a.tofile(fo) + print('%d / %d' % (len(a), 8 * len(s))) print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(s))) From 94ec8d7dde458d0c598a927076d0d5f3d7e90672 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 27 Apr 2019 12:06:22 -0500 Subject: [PATCH 106/185] remove freq_string use collections.Counter instead --- bitarray/_bitarray.c | 2 +- examples/huffman/compress.py | 8 ++++---- examples/huffman/decoding.py | 31 +++++++++++++++++-------------- examples/huffman/huffman.py | 14 -------------- 4 files changed, 22 insertions(+), 33 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index a5acd3117..190feac35 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2191,9 +2191,9 @@ new_binode(void) PyErr_NoMemory(); return NULL; } - nd->symbol = NULL; nd->child[0] = NULL; nd->child[1] = NULL; + nd->symbol = NULL; return nd; } diff --git a/examples/huffman/compress.py b/examples/huffman/compress.py index fbda31a9f..0051c2bef 100644 --- a/examples/huffman/compress.py +++ b/examples/huffman/compress.py @@ -4,10 +4,10 @@ """ import os from optparse import OptionParser +from collections import Counter from bitarray import bitarray -from huffman import (is_py3k, huffCode, huffTree, freq_string, - print_code, write_dot) +from huffman import is_py3k, huffCode, huffTree, print_code, write_dot def is_binary(s): @@ -19,7 +19,7 @@ def analyze(filename, printCode=False, writeDot=False): with open(filename, 'rb') as fi: s = fi.read() - freq = freq_string(s) + freq = Counter(s) tree = huffTree(freq) if writeDot: write_dot(tree, 'tree.dot', is_binary(s)) @@ -32,7 +32,7 @@ def encode(filename): with open(filename, 'rb') as fi: s = fi.read() - code = huffCode(huffTree(freq_string(s))) + code = huffCode(huffTree(Counter(s))) with open(filename + '.huff', 'wb') as fo: for c in sorted(code): fo.write(('%02x %s\n' % (c if is_py3k else ord(c), diff --git a/examples/huffman/decoding.py b/examples/huffman/decoding.py index 58682de5e..a966012d8 100644 --- a/examples/huffman/decoding.py +++ b/examples/huffman/decoding.py @@ -1,36 +1,39 @@ from __future__ import print_function -import time +from time import time +from collections import Counter from bitarray import bitarray -from huffman import (freq_string, huffTree, huffCode, write_dot, - make_tree, decode) +from huffman import huffTree, huffCode, write_dot, make_tree, decode def main(): - txt = open('README').read() - tree = huffTree(freq_string(txt)) + txt = 1000 * open('README').read() + + t0 = time() + freq = Counter(txt) + print('count time: %.6f sec' % (time() - t0)) + + tree = huffTree(freq) write_dot(tree, 'tree.dot') code = huffCode(tree) # create tree from code (no frequencies) write_dot(make_tree(code), 'tree_raw.dot') - sample = 100 * txt - a = bitarray() - a.encode(code, sample) + a.encode(code, txt) # Time the decode function above - start_time = time.time() + t0 = time() res = decode(tree, a) - Py_time = time.time() - start_time - assert ''.join(res) == sample + Py_time = time() - t0 + assert ''.join(res) == txt print('Py_time: %.6f sec' % Py_time) # Time the decode method which is implemented in C - start_time = time.time() + t0 = time() res = a.decode(code) - C_time = time.time() - start_time - assert ''.join(res) == sample + assert ''.join(res) == txt + C_time = time() - t0 print('C_time: %.6f sec' % C_time) print('Ratio: %f' % (Py_time / C_time)) diff --git a/examples/huffman/huffman.py b/examples/huffman/huffman.py index 908fe8754..57e674174 100644 --- a/examples/huffman/huffman.py +++ b/examples/huffman/huffman.py @@ -5,7 +5,6 @@ from __future__ import print_function import sys from heapq import heappush, heappop -from collections import defaultdict from bitarray import bitarray is_py3k = bool(sys.version_info[0] == 3) @@ -68,16 +67,6 @@ def traverse(nd, prefix=bitarray()): return result -def freq_string(s): - """ - Given a string, return a dict mapping characters to thier frequency. - """ - res = defaultdict(int) - for c in s: - res[c] += 1 - return res - - def insert_symbol(tree, ba, sym): """ Insert symbol into a tree at the position described by the bitarray, @@ -218,9 +207,6 @@ def test(): assert len(code['a']) == 1 assert len(code['b']) == len(code['c']) == 2 - d = freq_string('aabcbbbccca') - assert d['a'] == 3 - code = {'a': bitarray('0'), 'b': bitarray('10'), 'c': bitarray('11')} From ce3f308be8c05c2c42b0636b65f6867638908874 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 27 Apr 2019 14:36:59 -0500 Subject: [PATCH 107/185] add count time to output --- examples/huffman/decoding.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/huffman/decoding.py b/examples/huffman/decoding.py index a966012d8..f810d8f4d 100644 --- a/examples/huffman/decoding.py +++ b/examples/huffman/decoding.py @@ -11,9 +11,12 @@ def main(): t0 = time() freq = Counter(txt) - print('count time: %.6f sec' % (time() - t0)) + print('count: %9.6f sec' % (time() - t0)) + t0 = time() tree = huffTree(freq) + print('tree: %9.6f sec' % (time() - t0)) + write_dot(tree, 'tree.dot') code = huffCode(tree) # create tree from code (no frequencies) @@ -27,14 +30,14 @@ def main(): res = decode(tree, a) Py_time = time() - t0 assert ''.join(res) == txt - print('Py_time: %.6f sec' % Py_time) + print('Py decode: %9.6f sec' % Py_time) # Time the decode method which is implemented in C t0 = time() res = a.decode(code) assert ''.join(res) == txt C_time = time() - t0 - print('C_time: %.6f sec' % C_time) + print('C decode: %9.6f sec' % C_time) print('Ratio: %f' % (Py_time / C_time)) From 8d0bc9688bb62fb8a9fcf1964590cf7538f6dbad Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 27 Apr 2019 16:29:17 -0500 Subject: [PATCH 108/185] add encode time --- examples/huffman/decoding.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/huffman/decoding.py b/examples/huffman/decoding.py index f810d8f4d..4c02f49e0 100644 --- a/examples/huffman/decoding.py +++ b/examples/huffman/decoding.py @@ -23,7 +23,10 @@ def main(): write_dot(make_tree(code), 'tree_raw.dot') a = bitarray() + + t0 = time() a.encode(code, txt) + print('C encode: %9.6f sec' % (time() - t0)) # Time the decode function above t0 = time() From 17ba106be0192e5206c8f07d76f7cf6466f043cb Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 28 Apr 2019 01:38:29 -0500 Subject: [PATCH 109/185] add release date, update changelog and examples/README --- CHANGE_LOG | 5 +++-- examples/README | 7 ------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 93b544a92..10869fe75 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,11 +1,12 @@ -2019-XX-XX 0.9.1: +2019-04-28 0.9.1: ------------------- * fix types to actually be types, #29 * check for ambiguous prefix codes when building binary tree for decoding * remove Python level methods: encode, decode, iterdecode (in favor of having these implemented on the C-level along with check_codedict) * fix self tests for Python 2.5 and 2.6 - * add code to generate .dot file of Huffman tree to examples + * move all Huffman code related example code into examples/huffman + * add code to generate graphviz .dot file of Huffman tree to examples 2019-04-22 0.9.0: diff --git a/examples/README b/examples/README index 20b765ad8..1befe4f6c 100644 --- a/examples/README +++ b/examples/README @@ -8,13 +8,6 @@ compress.py: object which represents a bitarray. -decoding.py - Bitarray's decode method is implemented in C. Since the C code - might be hard to read, we have implemented exactly the same - algorithm in Python. It is at least 20 times slower than its - C counterpart. - - gene.py: shows how gene sequences (ATGC) can be very easily and efficiently represented by bitarrays. From fb5c461e3479f94d116fad1cac3c6c8eb654af6c Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 28 Apr 2019 01:41:23 -0500 Subject: [PATCH 110/185] update README.rst --- README.rst | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/README.rst b/README.rst index bcb512cc5..677559739 100644 --- a/README.rst +++ b/README.rst @@ -50,8 +50,8 @@ Installation bitarray can be installed from source:: - $ tar xzf bitarray-0.9.0.tar.gz - $ cd bitarray-0.9.0 + $ tar xzf bitarray-0.9.1.tar.gz + $ cd bitarray-0.9.1 $ python setup.py install On Unix systems, the latter command may have to be executed with root @@ -62,12 +62,12 @@ Once you have installed the package, you may want to test it:: $ python -c 'import bitarray; bitarray.test()' bitarray is installed in: /usr/local/lib/python2.7/site-packages/bitarray - bitarray version: 0.9.0 + bitarray version: 0.9.1 2.7.2 (r271:86832, Nov 29 2010) [GCC 4.2.1 (SUSE Linux)] ......................................................................... - ........................................... + ................................................. ---------------------------------------------------------------------- - Ran 134 tests in 1.396s + Ran 140 tests in 2.164s OK @@ -368,7 +368,7 @@ Reference ``decode(code)`` -> list Given a prefix code (a dict mapping symbols to bitarrays), - decode the content of the bitarray and return the list of symbols. + decode the content of the bitarray and return it as a list of symbols. ``encode(code, iterable)`` @@ -423,7 +423,8 @@ Reference ``iterdecode(code)`` -> iterator Given a prefix code (a dict mapping symbols to bitarrays), - decode the content of the bitarray and iterate over the symbols. + decode the content of the bitarray and return an iterator over + the symbols. ``itersearch(bitarray)`` -> iterator @@ -537,6 +538,17 @@ Reference Change log ---------- +**0.9.1** (2019-04-28): + + * fix types to actually be types, #29 + * check for ambiguous prefix codes when building binary tree for decoding + * remove Python level methods: encode, decode, iterdecode (in favor of + having these implemented on the C-level along with check_codedict) + * fix self tests for Python 2.5 and 2.6 + * move all Huffman code related example code into examples/huffman + * add code to generate graphviz .dot file of Huffman tree to examples + + **0.9.0** (2019-04-22): * more efficient decode and iterdecode by using C-level binary tree @@ -554,14 +566,5 @@ Change log * add exception to setup.py when README.rst cannot be opened -**0.8.2** (2018-05-30): - - * add official Python 3.6 support (although it was already working) - * fix description of fill(), #52 - * handle extending self correctly, #28 - * copy_n: fast copy with memmove fixed, #43 - * minor clarity/wording changes to README, #23 - - Please find the complete change log `here `_. From ce373f2c6acc3043b5e1f60c3a54febfd975aa18 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 29 Apr 2019 13:25:35 -0500 Subject: [PATCH 111/185] fix fail to compile on Windows with VS 2015, issue #72 --- bitarray/_bitarray.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 190feac35..638b80549 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2428,7 +2428,7 @@ decodeiter_traverse(decodeiterobject *it, visitproc visit, void *arg) static PyTypeObject DecodeIter_Type = { #ifdef IS_PY3K - PyVarObject_HEAD_INIT(&PyType_Type, 0) + PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) 0, /* ob_size */ @@ -2543,7 +2543,7 @@ searchiter_traverse(searchiterobject *it, visitproc visit, void *arg) static PyTypeObject SearchIter_Type = { #ifdef IS_PY3K - PyVarObject_HEAD_INIT(&PyType_Type, 0) + PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) 0, /* ob_size */ @@ -2907,7 +2907,7 @@ bitarrayiter_traverse(bitarrayiterobject *it, visitproc visit, void *arg) static PyTypeObject BitarrayIter_Type = { #ifdef IS_PY3K - PyVarObject_HEAD_INIT(&PyType_Type, 0) + PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) 0, /* ob_size */ @@ -3034,7 +3034,7 @@ static PyBufferProcs bitarray_as_buffer = { static PyTypeObject Bitarraytype = { #ifdef IS_PY3K - PyVarObject_HEAD_INIT(&PyType_Type, 0) + PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) 0, /* ob_size */ From 1e3bd03ef084e94d71ff5a06fb3f6a587e4e3946 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 29 Apr 2019 13:28:27 -0500 Subject: [PATCH 112/185] update changelog --- CHANGE_LOG | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGE_LOG b/CHANGE_LOG index 10869fe75..267796030 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,3 +1,8 @@ +2019-04-XX 0.9.2: +------------------- + * fix fail to compile on Windows with VS 2015, issue #72 + + 2019-04-28 0.9.1: ------------------- * fix types to actually be types, #29 From 0b77bdc5400c3e08d122708c8c0b4553a5c74c09 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 29 Apr 2019 14:34:15 -0500 Subject: [PATCH 113/185] prepare for 0.9.2 release --- CHANGE_LOG | 2 +- README.rst | 16 ++++++++-------- bitarray/__init__.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 267796030..2bbff1df8 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,4 +1,4 @@ -2019-04-XX 0.9.2: +2019-04-29 0.9.2: ------------------- * fix fail to compile on Windows with VS 2015, issue #72 diff --git a/README.rst b/README.rst index 677559739..0f06ff7a6 100644 --- a/README.rst +++ b/README.rst @@ -50,8 +50,8 @@ Installation bitarray can be installed from source:: - $ tar xzf bitarray-0.9.1.tar.gz - $ cd bitarray-0.9.1 + $ tar xzf bitarray-0.9.2.tar.gz + $ cd bitarray-0.9.2 $ python setup.py install On Unix systems, the latter command may have to be executed with root @@ -62,7 +62,7 @@ Once you have installed the package, you may want to test it:: $ python -c 'import bitarray; bitarray.test()' bitarray is installed in: /usr/local/lib/python2.7/site-packages/bitarray - bitarray version: 0.9.1 + bitarray version: 0.9.2 2.7.2 (r271:86832, Nov 29 2010) [GCC 4.2.1 (SUSE Linux)] ......................................................................... ................................................. @@ -538,6 +538,11 @@ Reference Change log ---------- +**0.9.2** (2019-04-29): + + * fix fail to compile on Windows with VS 2015, issue #72 + + **0.9.1** (2019-04-28): * fix types to actually be types, #29 @@ -561,10 +566,5 @@ Change log * drop Python 2.4, 3.1 and 3.2 support -**0.8.3** (2018-07-06): - - * add exception to setup.py when README.rst cannot be opened - - Please find the complete change log `here `_. diff --git a/bitarray/__init__.py b/bitarray/__init__.py index 740a16c0d..eab224a8f 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -10,7 +10,7 @@ """ from bitarray._bitarray import _bitarray, bitdiff, bits2bytes, _sysinfo -__version__ = '0.9.1' +__version__ = '0.9.2' class bitarray(_bitarray): From 53b728bda89df9bc6a408615c1c4acd61fe1c933 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 12 May 2019 02:22:32 -0500 Subject: [PATCH 114/185] add better error message when trying to initialize from float or complex --- bitarray/_bitarray.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 638b80549..c9b1b7271 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2770,6 +2770,16 @@ bitarray_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } } + if (PyFloat_Check(initial)) { + PyErr_SetString(PyExc_TypeError, "cannot create bitarray from float"); + return NULL; + } + if (PyComplex_Check(initial)) { + PyErr_SetString(PyExc_TypeError, + "cannot create bitarray from complex number"); + return NULL; + } + /* leave remaining type dispatch to the extend method */ a = newbitarrayobject(type, 0, endian); if (a == NULL) From 1c34ba426c57f2903bc935d6609e12d3e019b490 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 12 May 2019 02:26:05 -0500 Subject: [PATCH 115/185] update changelog --- CHANGE_LOG | 5 +++++ bitarray/__init__.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 2bbff1df8..2b9e3e29d 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,3 +1,8 @@ +2019-XX-XX 0.9.3: +------------------- + * improve error message when trying to initialize from float or complex + + 2019-04-29 0.9.2: ------------------- * fix fail to compile on Windows with VS 2015, issue #72 diff --git a/bitarray/__init__.py b/bitarray/__init__.py index eab224a8f..b57ed19a3 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -10,7 +10,7 @@ """ from bitarray._bitarray import _bitarray, bitdiff, bits2bytes, _sysinfo -__version__ = '0.9.2' +__version__ = '0.9.3' class bitarray(_bitarray): From 3b764699f7430e37d287687b30b2a2c07527fd37 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 12 May 2019 18:56:15 -0500 Subject: [PATCH 116/185] use macro to avoid duplicate code --- bitarray/_bitarray.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index c9b1b7271..9155ed11a 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2770,15 +2770,15 @@ bitarray_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } } - if (PyFloat_Check(initial)) { - PyErr_SetString(PyExc_TypeError, "cannot create bitarray from float"); - return NULL; - } - if (PyComplex_Check(initial)) { - PyErr_SetString(PyExc_TypeError, - "cannot create bitarray from complex number"); - return NULL; - } +#define CHECK_TYPE(type) \ + if (Py ## type ## _Check(initial)) { \ + PyErr_SetString(PyExc_TypeError, \ + "cannot create bitarray from " #type " object"); \ + return NULL; \ + } +CHECK_TYPE(Float) +CHECK_TYPE(Complex) +#undef CHECK /* leave remaining type dispatch to the extend method */ a = newbitarrayobject(type, 0, endian); From 96446f416a42c03da23f3720a41851f819fdf961 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 13 May 2019 17:34:02 -0500 Subject: [PATCH 117/185] fix typo, add comment, add assertion --- bitarray/_bitarray.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 9155ed11a..9463f7941 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -51,6 +51,7 @@ typedef long long int idx_t; /* throughout: 0 = little endian 1 = big endian */ #define DEFAULT_ENDIAN 1 +/* Note that ob_size is the byte count, not the number of elements */ typedef struct { PyObject_VAR_HEAD #ifdef WITH_BUFFER @@ -58,7 +59,7 @@ typedef struct { #endif char *ob_item; Py_ssize_t allocated; /* how many bytes allocated */ - idx_t nbits; /* length og bitarray */ + idx_t nbits; /* length of bitarray */ int endian; /* bit endianness of bitarray */ PyObject *weakreflist; /* list of weak references */ } bitarrayobject; @@ -125,10 +126,8 @@ resize(bitarrayobject *self, idx_t nbits) to accommodate the newsize. If the newsize is 16 smaller than the current size, then proceed with the realloc() to shrink the list. */ - if (self->allocated >= newsize && - Py_SIZE(self) < newsize + 16 && - self->ob_item != NULL) - { + if (self->allocated >= newsize && Py_SIZE(self) < newsize + 16) { + assert(self->ob_item != NULL || newsize == 0); Py_SIZE(self) = newsize; self->nbits = nbits; return 0; From 78f414282a708eb8607ee14ab6fcd77dc6847d95 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 14 May 2019 00:41:36 -0500 Subject: [PATCH 118/185] refactor resize() - only shrink if the newsize falls lower than half the allocated size --- bitarray/_bitarray.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 9463f7941..bc3801ee1 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -115,48 +115,46 @@ static int resize(bitarrayobject *self, idx_t nbits) { Py_ssize_t newsize; - size_t _new_size; /* for allocation */ + size_t new_allocated; + Py_ssize_t allocated = self->allocated; if (check_overflow(nbits) < 0) return -1; - newsize = (Py_ssize_t) BYTES(nbits); /* Bypass realloc() when a previous overallocation is large enough - to accommodate the newsize. If the newsize is 16 smaller than the - current size, then proceed with the realloc() to shrink the list. + to accommodate the newsize. If the newsize falls lower than half + the allocated size, then proceed with the realloc() to shrink. */ - if (self->allocated >= newsize && Py_SIZE(self) < newsize + 16) { + if (allocated >= newsize && newsize >= (allocated >> 1)) { assert(self->ob_item != NULL || newsize == 0); Py_SIZE(self) = newsize; self->nbits = nbits; return 0; } - if (newsize >= Py_SIZE(self) + 65536) - /* Don't overallocate when the size increase is very large. */ - _new_size = newsize; - else - /* This over-allocates proportional to the bitarray size, making - room for additional growth. The over-allocation is mild, but is - enough to give linear-time amortized behavior over a long - sequence of appends() in the presence of a poorly-performing - system realloc(). + new_allocated = (size_t) newsize; + if (newsize < Py_SIZE(self) + 65536) + /* Over-allocate unless the size increase is very large. + This over-allocates proportional to the bitarray size, making + room for additional growth. The growth pattern is: 0, 4, 8, 16, 25, 34, 44, 54, 65, 77, ... Note, the pattern starts out the same as for lists but then grows at a smaller rate so that larger bitarrays only overallocate by about 1/16th -- this is done because bitarrays are assumed to be memory critical. */ - _new_size = (newsize >> 4) + (Py_SIZE(self) < 8 ? 3 : 7) + newsize; + new_allocated += (newsize >> 4) + (newsize < 8 ? 3 : 7); - self->ob_item = PyMem_Realloc(self->ob_item, _new_size); + if (newsize == 0) + new_allocated = 0; + self->ob_item = PyMem_Realloc(self->ob_item, new_allocated); if (self->ob_item == NULL) { PyErr_NoMemory(); return -1; } Py_SIZE(self) = newsize; - self->allocated = _new_size; + self->allocated = new_allocated; self->nbits = nbits; return 0; } From 6a346bc66b768497694e2d536f4f7fbc397244ce Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 14 May 2019 00:44:08 -0500 Subject: [PATCH 119/185] update changelog --- CHANGE_LOG | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGE_LOG b/CHANGE_LOG index 2b9e3e29d..c4428e132 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,5 +1,7 @@ 2019-XX-XX 0.9.3: ------------------- + * refactor resize() - only shrink allocated memory if new size falls + lower than half the allocated size * improve error message when trying to initialize from float or complex From 9338868a3fcacf648b02a303bc2722870f09e03f Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 14 May 2019 12:30:07 -0500 Subject: [PATCH 120/185] formatting --- bitarray/_bitarray.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index bc3801ee1..ae6f958bf 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -562,10 +562,8 @@ extend_bitarray(bitarrayobject *self, bitarrayobject *other) if (other->nbits == 0) return 0; - /* - Note: other may be self. Thus we take the size before we resize, - ensuring we only copy the right parts of the array. - */ + /* Note that other may be self. Thus we take the size before we resize, + ensuring we only copy the right parts of the array. */ n_other_bits = other->nbits; n_sum = self->nbits + other->nbits; From 61682550e3866c6b2020bf8ff11753f516073341 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 18 May 2019 01:05:27 -0500 Subject: [PATCH 121/185] use sizeof(type) --- bitarray/_bitarray.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index ae6f958bf..9201d1f4f 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2181,7 +2181,7 @@ new_binode(void) { binode *nd; - nd = PyMem_Malloc(sizeof *nd); + nd = PyMem_Malloc(sizeof(binode)); if (nd == NULL) { PyErr_NoMemory(); return NULL; From ef3e65a24ecd1fa755a0e53f1f4f3de15684580f Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 19 May 2019 18:43:31 -0500 Subject: [PATCH 122/185] add comment --- bitarray/_bitarray.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 9201d1f4f..bee5353ae 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -59,7 +59,7 @@ typedef struct { #endif char *ob_item; Py_ssize_t allocated; /* how many bytes allocated */ - idx_t nbits; /* length of bitarray */ + idx_t nbits; /* length of bitarray, i.e. elements */ int endian; /* bit endianness of bitarray */ PyObject *weakreflist; /* list of weak references */ } bitarrayobject; From 9a7bd5e23eb8eeedebc5f86a0da4e7469cfbf761 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sun, 19 May 2019 18:46:09 -0500 Subject: [PATCH 123/185] fix #undef --- bitarray/_bitarray.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index bee5353ae..85cf4d024 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2773,7 +2773,7 @@ bitarray_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } CHECK_TYPE(Float) CHECK_TYPE(Complex) -#undef CHECK +#undef CHECK_TYPE /* leave remaining type dispatch to the extend method */ a = newbitarrayobject(type, 0, endian); From ba8ca96e731b2999893ff0ac76cb310ebb974163 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 20 May 2019 10:58:32 -0500 Subject: [PATCH 124/185] add release date, update readme --- CHANGE_LOG | 2 +- README.rst | 25 ++++++++++--------------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index c4428e132..6d628e953 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,4 +1,4 @@ -2019-XX-XX 0.9.3: +2019-05-20 0.9.3: ------------------- * refactor resize() - only shrink allocated memory if new size falls lower than half the allocated size diff --git a/README.rst b/README.rst index 0f06ff7a6..73471495e 100644 --- a/README.rst +++ b/README.rst @@ -50,8 +50,8 @@ Installation bitarray can be installed from source:: - $ tar xzf bitarray-0.9.2.tar.gz - $ cd bitarray-0.9.2 + $ tar xzf bitarray-0.9.3.tar.gz + $ cd bitarray-0.9.3 $ python setup.py install On Unix systems, the latter command may have to be executed with root @@ -62,7 +62,7 @@ Once you have installed the package, you may want to test it:: $ python -c 'import bitarray; bitarray.test()' bitarray is installed in: /usr/local/lib/python2.7/site-packages/bitarray - bitarray version: 0.9.2 + bitarray version: 0.9.3 2.7.2 (r271:86832, Nov 29 2010) [GCC 4.2.1 (SUSE Linux)] ......................................................................... ................................................. @@ -538,6 +538,13 @@ Reference Change log ---------- +**0.9.3** (2019-05-20): + + * refactor resize() - only shrink allocated memory if new size falls + lower than half the allocated size + * improve error message when trying to initialize from float or complex + + **0.9.2** (2019-04-29): * fix fail to compile on Windows with VS 2015, issue #72 @@ -554,17 +561,5 @@ Change log * add code to generate graphviz .dot file of Huffman tree to examples -**0.9.0** (2019-04-22): - - * more efficient decode and iterdecode by using C-level binary tree - instead of a python one, #54 - * added buffer protocol support for Python 3, #55 - * fixed invalid pointer exceptions in pypy, #47 - * made all examples Py3k compatible - * add gene sequence example - * add official Python 3.7 support - * drop Python 2.4, 3.1 and 3.2 support - - Please find the complete change log `here `_. From 8254e0d49befd4634d660ac310914e27bc5515eb Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 20 May 2019 12:37:53 -0500 Subject: [PATCH 125/185] cast newly allocated memory pointers --- CHANGE_LOG | 4 ++++ bitarray/__init__.py | 2 +- bitarray/_bitarray.c | 8 ++++---- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 6d628e953..571e80f4b 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,3 +1,7 @@ +2019-XX-XX 0.9.4: +------------------- + + 2019-05-20 0.9.3: ------------------- * refactor resize() - only shrink allocated memory if new size falls diff --git a/bitarray/__init__.py b/bitarray/__init__.py index b57ed19a3..dce44ee1f 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -10,7 +10,7 @@ """ from bitarray._bitarray import _bitarray, bitdiff, bits2bytes, _sysinfo -__version__ = '0.9.3' +__version__ = '0.9.4' class bitarray(_bitarray): diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 85cf4d024..5d14dcfe9 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -181,7 +181,7 @@ newbitarrayobject(PyTypeObject *type, idx_t nbits, int endian) obj->ob_item = NULL; } else { - obj->ob_item = PyMem_Malloc((size_t) nbytes); + obj->ob_item = (char *) PyMem_Malloc((size_t) nbytes); if (obj->ob_item == NULL) { PyObject_Del(obj); PyErr_NoMemory(); @@ -540,7 +540,7 @@ unpack(bitarrayobject *self, char zero, char one) PyErr_SetString(PyExc_OverflowError, "bitarray too large to unpack"); return NULL; } - str = PyMem_Malloc((size_t) self->nbits); + str = (char *) PyMem_Malloc((size_t) self->nbits); if (str == NULL) { PyErr_NoMemory(); return NULL; @@ -1182,7 +1182,7 @@ bitarray_reduce(bitarrayobject *self) } /* the first byte indicates the number of unused bits at the end, and the rest of the bytes consist of the raw binary data */ - str = PyMem_Malloc(Py_SIZE(self) + 1); + str = (char *) PyMem_Malloc(Py_SIZE(self) + 1); if (str == NULL) { PyErr_NoMemory(); goto error; @@ -2181,7 +2181,7 @@ new_binode(void) { binode *nd; - nd = PyMem_Malloc(sizeof(binode)); + nd = (binode *) PyMem_Malloc(sizeof(binode)); if (nd == NULL) { PyErr_NoMemory(); return NULL; From c84f19668c66acb6a2da0236f0d9d7fdcc59ad97 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 20 May 2019 13:13:06 -0500 Subject: [PATCH 126/185] add comment --- bitarray/_bitarray.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 5d14dcfe9..12ba6b79e 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -51,7 +51,10 @@ typedef long long int idx_t; /* throughout: 0 = little endian 1 = big endian */ #define DEFAULT_ENDIAN 1 -/* Note that ob_size is the byte count, not the number of elements */ +/* Unlike the normal convention, ob_size is the byte count, not the number + of elements. The reason for doing this is that we can use our own + special idx_t for the number of bits (which can exceed 2^32 on a 32 bit + machine. */ typedef struct { PyObject_VAR_HEAD #ifdef WITH_BUFFER From b9ecfa6d514c8eb75aacffab9c3b7c0d0e860a1c Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 20 May 2019 18:55:32 -0500 Subject: [PATCH 127/185] add test for reading 0 bytes from file --- bitarray/test_bitarray.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index edbef36c7..35b221a59 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1841,6 +1841,7 @@ def test_fromfile_n(self): b = bitarray() f = open(self.tmpfname, 'rb') + b.fromfile(f, 0); self.assertEqual(b.tostring(), '') b.fromfile(f, 1); self.assertEqual(b.tostring(), 'A') f.read(1) b = bitarray() From 3830d4a137a67ec8f2dc64e7171fc020e0d0e87b Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 21 May 2019 00:08:25 -0500 Subject: [PATCH 128/185] add comment to bitarray_fromfile() --- bitarray/_bitarray.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 12ba6b79e..1059d5a45 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1344,9 +1344,13 @@ PyDoc_STRVAR(sort_doc, Sort the bits in the array (in-place)."); -#ifdef IS_PY3K +/* since too many details differ between the Python 2 and 3 implementation + of this function, we choose to have two separate function implementation, + even though this means some of the code is duplicated in the two versions +*/ static PyObject * bitarray_fromfile(bitarrayobject *self, PyObject *args) +#ifdef IS_PY3K { PyObject *f; Py_ssize_t newsize, nbytes = -1; @@ -1382,7 +1386,6 @@ bitarray_fromfile(bitarrayobject *self, PyObject *args) Py_DECREF(reader); return NULL; } - nread = PyBytes_Size(result); t = self->nbits; @@ -1397,7 +1400,6 @@ bitarray_fromfile(bitarrayobject *self, PyObject *args) Py_DECREF(reader); return NULL; } - memcpy(self->ob_item + (Py_SIZE(self) - nread), PyBytes_AS_STRING(result), nread); @@ -1412,12 +1414,9 @@ bitarray_fromfile(bitarrayobject *self, PyObject *args) Py_DECREF(rargs); Py_DECREF(reader); - Py_RETURN_NONE; } -#else -static PyObject * -bitarray_fromfile(bitarrayobject *self, PyObject *args) +#else /* Python 2 */ { PyObject *f; FILE *fp; From fd121d0e13399ea1245f7b62a0b40adeab85b29f Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 21 May 2019 00:13:01 -0500 Subject: [PATCH 129/185] add comment to bitarray_tofile() --- bitarray/_bitarray.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 1059d5a45..f6b1b4efc 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1348,9 +1348,9 @@ Sort the bits in the array (in-place)."); of this function, we choose to have two separate function implementation, even though this means some of the code is duplicated in the two versions */ +#ifdef IS_PY3K static PyObject * bitarray_fromfile(bitarrayobject *self, PyObject *args) -#ifdef IS_PY3K { PyObject *f; Py_ssize_t newsize, nbytes = -1; @@ -1417,6 +1417,8 @@ bitarray_fromfile(bitarrayobject *self, PyObject *args) Py_RETURN_NONE; } #else /* Python 2 */ +static PyObject * +bitarray_fromfile(bitarrayobject *self, PyObject *args) { PyObject *f; FILE *fp; @@ -1485,6 +1487,9 @@ interpreted as machine values. When n is omitted, as many bytes are\n\ read until EOF is reached."); +/* since too many details differ between the Python 2 and 3 implementation + of this function, we choose to have two separate function implementation +*/ #ifdef IS_PY3K static PyObject * bitarray_tofile(bitarrayobject *self, PyObject *f) @@ -1522,7 +1527,7 @@ bitarray_tofile(bitarrayobject *self, PyObject *f) Py_DECREF(result); Py_RETURN_NONE; } -#else +#else /* Python 2 */ static PyObject * bitarray_tofile(bitarrayobject *self, PyObject *f) { From d2eee212e1dd252b63b0038f204c86b2356847cf Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 21 May 2019 00:24:25 -0500 Subject: [PATCH 130/185] update changelog --- CHANGE_LOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGE_LOG b/CHANGE_LOG index 571e80f4b..81b58adf1 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,5 +1,6 @@ 2019-XX-XX 0.9.4: ------------------- + * add a few comments to the C code 2019-05-20 0.9.3: From c8ca7057edec93149901b25e379b64fe6eb97e5f Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 21 May 2019 00:59:32 -0500 Subject: [PATCH 131/185] add comment to bitarray_frombytes() --- bitarray/_bitarray.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index f6b1b4efc..2516a10d2 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1598,6 +1598,12 @@ bitarray_frombytes(bitarrayobject *self, PyObject *string) PyErr_SetString(PyExc_TypeError, "byte string expected"); return NULL; } + + /* Before we extend the raw bytes with the new data, we need store + the current size and pad the last byte, as our bitarray size might + not be a multiple of 8. After extending, we remove the padding + bits again. The same is done in bitarray_fromfile(). + */ t = self->nbits; p = setunused(self); self->nbits += p; From e1174a77bbc80fd529a508e54a611f8e9fa4e3ef Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 21 May 2019 04:13:53 -0500 Subject: [PATCH 132/185] add assert inf copy_n() --- bitarray/_bitarray.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 2516a10d2..bb2255a56 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -218,24 +218,24 @@ copy_n(bitarrayobject *self, idx_t a, assert(0 <= n && n <= self->nbits && n <= other->nbits); assert(0 <= a && a <= self->nbits - n); assert(0 <= b && b <= other->nbits - n); - if (n == 0) { + if (n == 0) return; - } if (self->endian == other->endian && a % 8 == 0 && b % 8 == 0 && n >= 8) { const Py_ssize_t bytes = (Py_ssize_t) n / 8; const idx_t bits = bytes * 8; - if (a <= b) { + assert(bits <= n && n < bits + 8); + if (a <= b) memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes); - } - if (n != bits) { + + if (n != bits) copy_n(self, bits + a, other, bits + b, n - bits); - } - if (a > b) { + + if (a > b) memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes); - } + return; } From ed2445abd7d2adfa6b949f39fa155d57d0efe43d Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 21 May 2019 19:54:55 -0500 Subject: [PATCH 133/185] add comment to copy_n() --- bitarray/_bitarray.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index bb2255a56..54902bc5d 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -221,6 +221,10 @@ copy_n(bitarrayobject *self, idx_t a, if (n == 0) return; + /* When the start positions are at byte positions, we can copy whole + bytes using memmove, and copy the remaining few bits individually. + Note that the order of these two operations matters when copying + self to self. */ if (self->endian == other->endian && a % 8 == 0 && b % 8 == 0 && n >= 8) { const Py_ssize_t bytes = (Py_ssize_t) n / 8; @@ -239,9 +243,8 @@ copy_n(bitarrayobject *self, idx_t a, return; } - /* the different type of looping is only relevant when other and self - are the same object, i.e. when copying a piece of an bitarrayobject - onto itself */ + /* The different type of looping is only relevant copying self to self, + i.e. when copying a piece of an bitarrayobject onto itself. */ if (a <= b) { for (i = 0; i < n; i++) /* loop forward (delete) */ setbit(self, i + a, GETBIT(other, i + b)); From 7ea1aff566847c0af05b9d24249a9b364d4acfc7 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 21 May 2019 20:08:48 -0500 Subject: [PATCH 134/185] typo in changelog --- CHANGE_LOG | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 81b58adf1..d397f2d48 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -125,7 +125,7 @@ 2009-01-15 0.3.4: ------------------- * Made C code less ambiguous, such that the package compiles on - Visual Studio, will all tests passing. + Visual Studio, with all tests passing. 2008-12-14 0.3.3: @@ -149,7 +149,7 @@ 2008-09-30 0.3.0: ------------------- - * Fixed a severe bug for 64bit machines. Implemented all methods in C, + * Fixed a severe bug for 64-bit machines. Implemented all methods in C, improved tests. * Removed deprecated methods from01 and fromlist. From a3bb2507940c7954d6f839d71e94865d9cf1cc01 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 21 May 2019 20:32:31 -0500 Subject: [PATCH 135/185] add test to ensure repr is of type str --- bitarray/test_bitarray.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 35b221a59..4c1b3425d 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -781,11 +781,13 @@ def test_any(self): def test_repr(self): - a = bitarray() - self.assertEqual(repr(a), "bitarray()") + r = repr(bitarray()) + self.assertEqual(r, "bitarray()") + self.assert_(isinstance(r, str)) - a = bitarray('10111') - self.assertEqual(repr(a), "bitarray('10111')") + r = repr(bitarray('10111')) + self.assertEqual(r, "bitarray('10111')") + self.assert_(isinstance(r, str)) for a in self.randombitarrays(): b = eval(repr(a)) From f617d2e1a94590efc4b7a350b80920724b8e46e0 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 21 May 2019 20:51:51 -0500 Subject: [PATCH 136/185] add a few tests --- bitarray/test_bitarray.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 4c1b3425d..bebae6d86 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -185,11 +185,13 @@ def test_endian1(self): a = bitarray(endian='little') a.fromstring('A') self.assertEqual(a.endian(), 'little') + self.assert_(isinstance(a.endian(), str)) self.check_obj(a) b = bitarray(endian='big') b.fromstring('A') self.assertEqual(b.endian(), 'big') + self.assert_(isinstance(a.endian(), str)) self.check_obj(b) self.assertEqual(a.tostring(), b.tostring()) @@ -605,6 +607,15 @@ def test_booleanness(self): self.assertEqual(bool(bitarray('0')), True) self.assertEqual(bool(bitarray('1')), True) + def test_to01(self): + a = bitarray() + self.assertEqual(a.to01(), '') + self.assert_(isinstance(a.to01(), str)) + + a = bitarray('101') + self.assertEqual(a.to01(), '101') + self.assert_(isinstance(a.to01(), str)) + def test_iterate(self): for lst in self.randomlists(): acc = [] @@ -1665,6 +1676,10 @@ def test_tobytes(self): def test_unpack(self): a = bitarray('01') + if is_py3k: + self.assert_(isinstance(a.unpack(), bytes)) + else: + self.assert_(isinstance(a.unpack(), str)) self.assertEqual(a.unpack(), to_bytes('\x00\xff')) self.assertEqual(a.unpack(to_bytes('A')), to_bytes('A\xff')) self.assertEqual(a.unpack(to_bytes('0'), to_bytes('1')), From de5bb90d729cc6fba63bc4964cdd37c36877e28b Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 21 May 2019 22:17:36 -0500 Subject: [PATCH 137/185] use PyBytes_* in C code, treating the Py3k function names as default (instead of PyString_* --- bitarray/_bitarray.c | 75 ++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 54902bc5d..d5844b16d 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -17,13 +17,14 @@ #ifdef IS_PY3K #include "bytesobject.h" -#define PyString_FromStringAndSize PyBytes_FromStringAndSize -#define PyString_FromString PyBytes_FromString -#define PyString_Check PyBytes_Check -#define PyString_Size PyBytes_Size -#define PyString_AsString PyBytes_AsString -#define PyString_ConcatAndDel PyBytes_ConcatAndDel #define Py_TPFLAGS_HAVE_WEAKREFS 0 +#else /* Python 2 */ +#define PyBytes_FromStringAndSize PyString_FromStringAndSize +#define PyBytes_FromString PyString_FromString +#define PyBytes_Check PyString_Check +#define PyBytes_Size PyString_Size +#define PyBytes_AsString PyString_AsString +#define PyBytes_ConcatAndDel PyString_ConcatAndDel #endif #if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 6 @@ -554,7 +555,7 @@ unpack(bitarrayobject *self, char zero, char one) for (i = 0; i < self->nbits; i++) { *(str + i) = GETBIT(self, i) ? one : zero; } - res = PyString_FromStringAndSize(str, (Py_ssize_t) self->nbits); + res = PyBytes_FromStringAndSize(str, (Py_ssize_t) self->nbits); PyMem_Free((void *) str); return res; } @@ -647,8 +648,8 @@ extend_tuple(bitarrayobject *self, PyObject *tuple) return 0; } -/* extend_string(): extend the bitarray from a string, where each whole - characters is converted to a single bit +/* extend_bytes(): extend the bitarray from a PyBytes object (PyString in + Python 2), where each whole characters is converted to a single bit */ enum conv_tp { STR_01, /* '0' -> 0 '1' -> 1 no other characters allowed */ @@ -656,21 +657,21 @@ enum conv_tp { }; static int -extend_string(bitarrayobject *self, PyObject *string, enum conv_tp conv) +extend_bytes(bitarrayobject *self, PyObject *bytes, enum conv_tp conv) { Py_ssize_t strlen, i; char c, *str; int vi = 0; - assert(PyString_Check(string)); - strlen = PyString_Size(string); + assert(PyBytes_Check(bytes)); + strlen = PyBytes_Size(bytes); if (strlen == 0) return 0; if (resize(self, self->nbits + strlen) < 0) return -1; - str = PyString_AsString(string); + str = PyBytes_AsString(bytes); for (i = 0; i < strlen; i++) { c = *(str + i); @@ -696,20 +697,20 @@ extend_string(bitarrayobject *self, PyObject *string, enum conv_tp conv) } static int -extend_rawstring(bitarrayobject *self, PyObject *string) +extend_rawbytes(bitarrayobject *self, PyObject *bytes) { Py_ssize_t strlen; char *str; - assert(PyString_Check(string) && self->nbits % 8 == 0); - strlen = PyString_Size(string); + assert(PyBytes_Check(bytes) && self->nbits % 8 == 0); + strlen = PyBytes_Size(bytes); if (strlen == 0) return 0; if (resize(self, self->nbits + BITS(strlen)) < 0) return -1; - str = PyString_AsString(string); + str = PyBytes_AsString(bytes); memcpy(self->ob_item + (Py_SIZE(self) - strlen), str, strlen); return 0; } @@ -730,14 +731,14 @@ extend_dispatch(bitarrayobject *self, PyObject *obj) if (PyTuple_Check(obj)) /* tuple */ return extend_tuple(self, obj); - if (PyString_Check(obj)) /* str01 */ - return extend_string(self, obj, STR_01); + if (PyBytes_Check(obj)) /* str01 */ + return extend_bytes(self, obj, STR_01); #ifdef IS_PY3K if (PyUnicode_Check(obj)) { /* str01 */ PyObject *string; string = PyUnicode_AsEncodedString(obj, NULL, NULL); - ret = extend_string(self, string, STR_01); + ret = extend_bytes(self, string, STR_01); Py_DECREF(string); return ret; } @@ -1195,7 +1196,7 @@ bitarray_reduce(bitarrayobject *self) } str[0] = (char) setunused(self); memcpy(str + 1, self->ob_item, Py_SIZE(self)); - repr = PyString_FromStringAndSize(str, Py_SIZE(self) + 1); + repr = PyBytes_FromStringAndSize(str, Py_SIZE(self) + 1); if (repr == NULL) goto error; PyMem_Free((void *) str); @@ -1593,11 +1594,11 @@ use the extend method."); static PyObject * -bitarray_frombytes(bitarrayobject *self, PyObject *string) +bitarray_frombytes(bitarrayobject *self, PyObject *bytes) { idx_t t, p; - if (!PyString_Check(string)) { + if (!PyBytes_Check(bytes)) { PyErr_SetString(PyExc_TypeError, "byte string expected"); return NULL; } @@ -1611,7 +1612,7 @@ bitarray_frombytes(bitarrayobject *self, PyObject *string) p = setunused(self); self->nbits += p; - if (extend_rawstring(self, string) < 0) + if (extend_rawbytes(self, bytes) < 0) return NULL; if (delete_n(self, t, p) < 0) return NULL; @@ -1628,7 +1629,7 @@ static PyObject * bitarray_tobytes(bitarrayobject *self) { setunused(self); - return PyString_FromStringAndSize(self->ob_item, Py_SIZE(self)); + return PyBytes_FromStringAndSize(self->ob_item, Py_SIZE(self)); } PyDoc_STRVAR(tobytes_doc, @@ -1655,7 +1656,7 @@ bitarray_to01(bitarrayobject *self) } PyDoc_STRVAR(to01_doc, -"to01() -> string\n\ +"to01() -> str\n\ \n\ Return a string containing '0's and '1's, representing the bits in the\n\ bitarray object.\n\ @@ -1685,13 +1686,13 @@ See also the pack method."); static PyObject * -bitarray_pack(bitarrayobject *self, PyObject *string) +bitarray_pack(bitarrayobject *self, PyObject *bytes) { - if (!PyString_Check(string)) { + if (!PyBytes_Check(bytes)) { PyErr_SetString(PyExc_TypeError, "byte string expected"); return NULL; } - if (extend_string(self, string, STR_RAW) < 0) + if (extend_bytes(self, bytes, STR_RAW) < 0) return NULL; Py_RETURN_NONE; @@ -1717,16 +1718,16 @@ bitarray_repr(bitarrayobject *self) #endif if (self->nbits == 0) { - string = PyString_FromString("bitarray()"); + string = PyBytes_FromString("bitarray()"); if (string == NULL) return NULL; } else { - string = PyString_FromString("bitarray(\'"); + string = PyBytes_FromString("bitarray(\'"); if (string == NULL) return NULL; - PyString_ConcatAndDel(&string, unpack(self, '0', '1')); - PyString_ConcatAndDel(&string, PyString_FromString("\')")); + PyBytes_ConcatAndDel(&string, unpack(self, '0', '1')); + PyBytes_ConcatAndDel(&string, PyBytes_FromString("\')")); } #ifdef IS_PY3K decoded = PyUnicode_FromEncodedObject(string, NULL, NULL); @@ -2753,16 +2754,16 @@ bitarray_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return a; } - /* string */ - if (PyString_Check(initial)) { + /* bytes */ + if (PyBytes_Check(initial)) { Py_ssize_t strlen; char *str; - strlen = PyString_Size(initial); + strlen = PyBytes_Size(initial); if (strlen == 0) /* empty string */ return newbitarrayobject(type, 0, endian); - str = PyString_AsString(initial); + str = PyBytes_AsString(initial); if (0 <= str[0] && str[0] < 8) { /* when the first character is smaller than 8, it indicates the number of unused bits at the end, and rest of the bytes From 882ab20c05fc5fec47a5945ed255b415c72c1773 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 22 May 2019 02:32:40 -0500 Subject: [PATCH 138/185] enable unpack() to return either PyString or PyBytes object --- bitarray/_bitarray.c | 85 +++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index d5844b16d..1bf2dfb7b 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -15,8 +15,13 @@ #define IS_PY3K #endif +/* For bytes, we use PyBytes_*, treating the Py3k function names as default. + For strings, we use PyString_*. + */ #ifdef IS_PY3K #include "bytesobject.h" +#define PyString_FromString PyUnicode_FromString +#define PyString_FromStringAndSize PyUnicode_FromStringAndSize #define Py_TPFLAGS_HAVE_WEAKREFS 0 #else /* Python 2 */ #define PyBytes_FromStringAndSize PyString_FromStringAndSize @@ -361,6 +366,8 @@ bitwise(bitarrayobject *self, PyObject *arg, enum op_type oper) for (i = 0; i < Py_SIZE(self); i++) self->ob_item[i] ^= other->ob_item[i]; break; + default: /* should never happen */ + return -1; } return 0; } @@ -536,10 +543,15 @@ append_item(bitarrayobject *self, PyObject *item) return set_item(self, self->nbits - 1, item); } +enum unpack_t { + UNPACK_PYSTRING, /* PyString */ + UNPACK_PYBYTES, /* PyBytes */ +}; + static PyObject * -unpack(bitarrayobject *self, char zero, char one) +unpack(bitarrayobject *self, char zero, char one, enum unpack_t unpack_type) { - PyObject *res; + PyObject *result; Py_ssize_t i; char *str; @@ -555,9 +567,18 @@ unpack(bitarrayobject *self, char zero, char one) for (i = 0; i < self->nbits; i++) { *(str + i) = GETBIT(self, i) ? one : zero; } - res = PyBytes_FromStringAndSize(str, (Py_ssize_t) self->nbits); + switch (unpack_type) { + case UNPACK_PYBYTES: + result = PyBytes_FromStringAndSize(str, (Py_ssize_t) self->nbits); + break; + case UNPACK_PYSTRING: + result = PyString_FromStringAndSize(str, (Py_ssize_t) self->nbits); + break; + default: /* should never happen */ + return NULL; + } PyMem_Free((void *) str); - return res; + return result; } static int @@ -651,13 +672,13 @@ extend_tuple(bitarrayobject *self, PyObject *tuple) /* extend_bytes(): extend the bitarray from a PyBytes object (PyString in Python 2), where each whole characters is converted to a single bit */ -enum conv_tp { +enum conv_t { STR_01, /* '0' -> 0 '1' -> 1 no other characters allowed */ STR_RAW, /* 0x00 -> 0 other -> 1 */ }; static int -extend_bytes(bitarrayobject *self, PyObject *bytes, enum conv_tp conv) +extend_bytes(bitarrayobject *self, PyObject *bytes, enum conv_t conv) { Py_ssize_t strlen, i; char c, *str; @@ -690,6 +711,8 @@ extend_bytes(bitarrayobject *self, PyObject *bytes, enum conv_tp conv) case STR_RAW: vi = c ? 1 : 0; break; + default: /* should never happen */ + return -1; } setbit(self, self->nbits - strlen + i, vi); } @@ -736,10 +759,10 @@ extend_dispatch(bitarrayobject *self, PyObject *obj) #ifdef IS_PY3K if (PyUnicode_Check(obj)) { /* str01 */ - PyObject *string; - string = PyUnicode_AsEncodedString(obj, NULL, NULL); - ret = extend_bytes(self, string, STR_01); - Py_DECREF(string); + PyObject *bytes; + bytes = PyUnicode_AsEncodedString(obj, NULL, NULL); + ret = extend_bytes(self, bytes, STR_01); + Py_DECREF(bytes); return ret; } #endif @@ -1117,11 +1140,7 @@ contents, the bit endianness as a string, the number of unused bits\n\ static PyObject * bitarray_endian(bitarrayobject *self) { -#ifdef IS_PY3K - return PyUnicode_FromString(ENDIAN_STR(self)); -#else return PyString_FromString(ENDIAN_STR(self)); -#endif } PyDoc_STRVAR(endian_doc, @@ -1643,16 +1662,7 @@ bits (1..7) are set to 0."); static PyObject * bitarray_to01(bitarrayobject *self) { -#ifdef IS_PY3K - PyObject *string, *unpacked; - - unpacked = unpack(self, '0', '1'); - string = PyUnicode_FromEncodedObject(unpacked, NULL, NULL); - Py_DECREF(unpacked); - return string; -#else - return unpack(self, '0', '1'); -#endif + return unpack(self, '0', '1', UNPACK_PYSTRING); } PyDoc_STRVAR(to01_doc, @@ -1674,7 +1684,7 @@ bitarray_unpack(bitarrayobject *self, PyObject *args, PyObject *kwds) &zero, &one)) return NULL; - return unpack(self, zero, one); + return unpack(self, zero, one, UNPACK_PYBYTES); } PyDoc_STRVAR(unpack_doc, @@ -1712,29 +1722,30 @@ transfer of data between bitarray objects to other python objects\n\ static PyObject * bitarray_repr(bitarrayobject *self) { - PyObject *string; + PyObject *bytes; + PyObject *unpacked; #ifdef IS_PY3K PyObject *decoded; #endif if (self->nbits == 0) { - string = PyBytes_FromString("bitarray()"); - if (string == NULL) - return NULL; + bytes = PyBytes_FromString("bitarray()"); } else { - string = PyBytes_FromString("bitarray(\'"); - if (string == NULL) + bytes = PyBytes_FromString("bitarray(\'"); + unpacked = unpack(self, '0', '1', UNPACK_PYBYTES); + if (unpacked == NULL) return NULL; - PyBytes_ConcatAndDel(&string, unpack(self, '0', '1')); - PyBytes_ConcatAndDel(&string, PyBytes_FromString("\')")); + PyBytes_ConcatAndDel(&bytes, unpacked); + PyBytes_ConcatAndDel(&bytes, PyBytes_FromString("\')")); } #ifdef IS_PY3K - decoded = PyUnicode_FromEncodedObject(string, NULL, NULL); - Py_DECREF(string); - string = decoded; + decoded = PyUnicode_FromEncodedObject(bytes, NULL, NULL); + Py_DECREF(bytes); + return decoded; +#else + return bytes; /* really a string in Python 2 */ #endif - return string; } From d98ded3d02aed4091a34a96f80f91bd894dd517f Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 22 May 2019 11:08:40 -0500 Subject: [PATCH 139/185] add tests for creating bitarrys from unicode strings --- bitarray/test_bitarray.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index bebae6d86..18bd3ff2a 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -37,6 +37,13 @@ def to_bytes(s): return s +if is_py3k or sys.version_info[:2] < (2, 6): + def unicode(*args): + if len(args) == 0: + return '' + return args[0] + + class Util(object): def randombitarrays(self): @@ -760,6 +767,30 @@ def test_overflow(self): a = bitarray(10 ** 6) self.assertRaises(OverflowError, a.__imul__, 17180) + def test_unicode1(self): + a = bitarray(unicode()) + self.assertEqual(a, bitarray()) + + a = bitarray(unicode('111001')) + self.assertEqual(a, bitarray('111001')) + + for a in self.randombitarrays(): + b = bitarray(unicode(a.to01())) + self.assertEqual(a, b) + + def test_unicode2(self): + a = bitarray() + a.extend(unicode()) + self.assertEqual(a, bitarray()) + + a = bitarray() + a.extend(unicode('001011')) + self.assertEqual(a, bitarray('001011')) + + for a in self.randombitarrays(): + b = bitarray() + b.extend(unicode(a.to01())) + self.assertEqual(a, b) tests.append(MiscTests) From 3b2da2d7b6e8168a4cd6fcd41104ac95db0c0f4e Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 22 May 2019 11:09:48 -0500 Subject: [PATCH 140/185] fix bitarrays beings created from unicode in Python 2 --- bitarray/_bitarray.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 1bf2dfb7b..fa4e34e7d 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -23,7 +23,7 @@ #define PyString_FromString PyUnicode_FromString #define PyString_FromStringAndSize PyUnicode_FromStringAndSize #define Py_TPFLAGS_HAVE_WEAKREFS 0 -#else /* Python 2 */ +#else /* Python 2 -- Hmm, really just Python 2.5? */ #define PyBytes_FromStringAndSize PyString_FromStringAndSize #define PyBytes_FromString PyString_FromString #define PyBytes_Check PyString_Check @@ -757,7 +757,7 @@ extend_dispatch(bitarrayobject *self, PyObject *obj) if (PyBytes_Check(obj)) /* str01 */ return extend_bytes(self, obj, STR_01); -#ifdef IS_PY3K +#if PY_MAJOR_VERSION == 3 || (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION >= 6) if (PyUnicode_Check(obj)) { /* str01 */ PyObject *bytes; bytes = PyUnicode_AsEncodedString(obj, NULL, NULL); From fb7c94ae4d7abbc3e1a183183669dd3f507292de Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 22 May 2019 14:36:00 -0500 Subject: [PATCH 141/185] only define PyBytes_* when necessary --- bitarray/_bitarray.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index fa4e34e7d..acd01beb1 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -23,17 +23,16 @@ #define PyString_FromString PyUnicode_FromString #define PyString_FromStringAndSize PyUnicode_FromStringAndSize #define Py_TPFLAGS_HAVE_WEAKREFS 0 -#else /* Python 2 -- Hmm, really just Python 2.5? */ +#endif + +#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 6 +/* backward compatibility with Python 2.5 */ #define PyBytes_FromStringAndSize PyString_FromStringAndSize #define PyBytes_FromString PyString_FromString #define PyBytes_Check PyString_Check #define PyBytes_Size PyString_Size #define PyBytes_AsString PyString_AsString #define PyBytes_ConcatAndDel PyString_ConcatAndDel -#endif - -#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 6 -/* backward compatibility with Python 2.5 */ #define Py_TYPE(ob) (((PyObject *) (ob))->ob_type) #define Py_SIZE(ob) (((PyVarObject *) (ob))->ob_size) #endif From 8377322ceae4253fc4a27535830243d37c60d59c Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 22 May 2019 20:01:06 -0500 Subject: [PATCH 142/185] unpack() to only return PyBytes --- bitarray/_bitarray.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index acd01beb1..a825d331b 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -542,13 +542,8 @@ append_item(bitarrayobject *self, PyObject *item) return set_item(self, self->nbits - 1, item); } -enum unpack_t { - UNPACK_PYSTRING, /* PyString */ - UNPACK_PYBYTES, /* PyBytes */ -}; - static PyObject * -unpack(bitarrayobject *self, char zero, char one, enum unpack_t unpack_type) +unpack(bitarrayobject *self, char zero, char one) { PyObject *result; Py_ssize_t i; @@ -566,16 +561,7 @@ unpack(bitarrayobject *self, char zero, char one, enum unpack_t unpack_type) for (i = 0; i < self->nbits; i++) { *(str + i) = GETBIT(self, i) ? one : zero; } - switch (unpack_type) { - case UNPACK_PYBYTES: - result = PyBytes_FromStringAndSize(str, (Py_ssize_t) self->nbits); - break; - case UNPACK_PYSTRING: - result = PyString_FromStringAndSize(str, (Py_ssize_t) self->nbits); - break; - default: /* should never happen */ - return NULL; - } + result = PyBytes_FromStringAndSize(str, (Py_ssize_t) self->nbits); PyMem_Free((void *) str); return result; } @@ -1661,7 +1647,19 @@ bits (1..7) are set to 0."); static PyObject * bitarray_to01(bitarrayobject *self) { - return unpack(self, '0', '1', UNPACK_PYSTRING); +#ifdef IS_PY3K + PyObject *string; + PyObject *unpacked; + + unpacked = unpack(self, '0', '1'); + if (unpacked == NULL) + return NULL; + string = PyUnicode_FromEncodedObject(unpacked, NULL, NULL); + Py_DECREF(unpacked); + return string; +#else + return unpack(self, '0', '1'); +#endif } PyDoc_STRVAR(to01_doc, @@ -1683,7 +1681,7 @@ bitarray_unpack(bitarrayobject *self, PyObject *args, PyObject *kwds) &zero, &one)) return NULL; - return unpack(self, zero, one, UNPACK_PYBYTES); + return unpack(self, zero, one); } PyDoc_STRVAR(unpack_doc, @@ -1732,7 +1730,7 @@ bitarray_repr(bitarrayobject *self) } else { bytes = PyBytes_FromString("bitarray(\'"); - unpacked = unpack(self, '0', '1', UNPACK_PYBYTES); + unpacked = unpack(self, '0', '1'); if (unpacked == NULL) return NULL; PyBytes_ConcatAndDel(&bytes, unpacked); From 94458402e89cf8d765b8ce778de3216c6e343842 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 22 May 2019 20:20:27 -0500 Subject: [PATCH 143/185] remove PyString_ redefinitions --- bitarray/_bitarray.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index a825d331b..3f628fa52 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -20,13 +20,10 @@ */ #ifdef IS_PY3K #include "bytesobject.h" -#define PyString_FromString PyUnicode_FromString -#define PyString_FromStringAndSize PyUnicode_FromStringAndSize #define Py_TPFLAGS_HAVE_WEAKREFS 0 #endif -#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 6 -/* backward compatibility with Python 2.5 */ +#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION == 5 #define PyBytes_FromStringAndSize PyString_FromStringAndSize #define PyBytes_FromString PyString_FromString #define PyBytes_Check PyString_Check @@ -1125,7 +1122,11 @@ contents, the bit endianness as a string, the number of unused bits\n\ static PyObject * bitarray_endian(bitarrayobject *self) { +#ifdef IS_PY3K + return PyUnicode_FromString(ENDIAN_STR(self)); +#else return PyString_FromString(ENDIAN_STR(self)); +#endif } PyDoc_STRVAR(endian_doc, From 928887cee7c57b646b5f1f3ca49cd4cdd35ce537 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 22 May 2019 20:28:27 -0500 Subject: [PATCH 144/185] next release will be 1.0.0, droping Python 2.5 support --- CHANGE_LOG | 6 +++++- bitarray/__init__.py | 2 +- bitarray/_bitarray.c | 16 ++-------------- setup.py | 1 - 4 files changed, 8 insertions(+), 17 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index d397f2d48..438e4d438 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,6 +1,10 @@ -2019-XX-XX 0.9.4: +2019-XX-XX 1.0.0: ------------------- + * fix bitarrays beings created from unicode in Python 2 + * use PyBytes_* in C code, treating the Py3k function names as default, + which also removes all redefinitions of PyString_ * add a few comments to the C code + * drop Python 2.5 support 2019-05-20 0.9.3: diff --git a/bitarray/__init__.py b/bitarray/__init__.py index dce44ee1f..04f9ce9ca 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -10,7 +10,7 @@ """ from bitarray._bitarray import _bitarray, bitdiff, bits2bytes, _sysinfo -__version__ = '0.9.4' +__version__ = '1.0.0' class bitarray(_bitarray): diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 3f628fa52..5b6a85c09 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -23,16 +23,6 @@ #define Py_TPFLAGS_HAVE_WEAKREFS 0 #endif -#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION == 5 -#define PyBytes_FromStringAndSize PyString_FromStringAndSize -#define PyBytes_FromString PyString_FromString -#define PyBytes_Check PyString_Check -#define PyBytes_Size PyString_Size -#define PyBytes_AsString PyString_AsString -#define PyBytes_ConcatAndDel PyString_ConcatAndDel -#define Py_TYPE(ob) (((PyObject *) (ob))->ob_type) -#define Py_SIZE(ob) (((PyVarObject *) (ob))->ob_size) -#endif #if PY_MAJOR_VERSION == 3 || (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION == 7) /* (new) buffer protocol */ @@ -736,18 +726,16 @@ extend_dispatch(bitarrayobject *self, PyObject *obj) if (PyTuple_Check(obj)) /* tuple */ return extend_tuple(self, obj); - if (PyBytes_Check(obj)) /* str01 */ + if (PyBytes_Check(obj)) /* string 01 */ return extend_bytes(self, obj, STR_01); -#if PY_MAJOR_VERSION == 3 || (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION >= 6) - if (PyUnicode_Check(obj)) { /* str01 */ + if (PyUnicode_Check(obj)) { /* unicode 01 */ PyObject *bytes; bytes = PyUnicode_AsEncodedString(obj, NULL, NULL); ret = extend_bytes(self, bytes, STR_01); Py_DECREF(bytes); return ret; } -#endif if (PyIter_Check(obj)) /* iter */ return extend_iter(self, obj); diff --git a/setup.py b/setup.py index 0473b1f62..25d23b763 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,6 @@ "Operating System :: OS Independent", "Programming Language :: C", "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.5", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", From c99ffab5bd6db70cfd9dd93ca6f1f3b7d2521612 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 22 May 2019 20:46:10 -0500 Subject: [PATCH 145/185] remove obsolete comment --- bitarray/_bitarray.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 5b6a85c09..23a2695c3 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -15,9 +15,6 @@ #define IS_PY3K #endif -/* For bytes, we use PyBytes_*, treating the Py3k function names as default. - For strings, we use PyString_*. - */ #ifdef IS_PY3K #include "bytesobject.h" #define Py_TPFLAGS_HAVE_WEAKREFS 0 From bd858e7e9f1e519138333c30667bf83bc84b79db Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 22 May 2019 20:51:20 -0500 Subject: [PATCH 146/185] no reason to include bytesobject.h separately --- bitarray/_bitarray.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 23a2695c3..2c949a39a 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -16,11 +16,9 @@ #endif #ifdef IS_PY3K -#include "bytesobject.h" #define Py_TPFLAGS_HAVE_WEAKREFS 0 #endif - #if PY_MAJOR_VERSION == 3 || (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION == 7) /* (new) buffer protocol */ #define WITH_BUFFER From c5695f508d034c54cf95252b50756a9d3b290133 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 22 May 2019 21:00:12 -0500 Subject: [PATCH 147/185] remove special conditions for older Python versions from test suite --- bitarray/test_bitarray.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 18bd3ff2a..cb6bf45a5 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -23,21 +23,15 @@ tests = [] -if sys.version_info[:2] < (2, 6): - def next(x): - return x.next() - def to_bytes(s): if is_py3k: return bytes(s.encode('latin1')) - elif sys.version_info[:2] >= (2, 6): - return bytes(s) else: - return s + return bytes(s) # which is str for Python 2 -if is_py3k or sys.version_info[:2] < (2, 6): +if is_py3k: def unicode(*args): if len(args) == 0: return '' @@ -802,9 +796,6 @@ def test_all(self): a = bitarray() self.assertTrue(a.all()) - if sys.version_info[:2] < (2, 5): - return - for a in self.randombitarrays(): self.assertEqual(all(a), a.all()) self.assertEqual(all(a.tolist()), a.all()) @@ -814,9 +805,6 @@ def test_any(self): a = bitarray() self.assertFalse(a.any()) - if sys.version_info[:2] < (2, 5): - return - for a in self.randombitarrays(): self.assertEqual(any(a), a.any()) self.assertEqual(any(a.tolist()), a.any()) @@ -1802,8 +1790,6 @@ def test_cPickle(self): self.assertEQUAL(a, b) def test_shelve(self): - if sys.version_info[:2] < (2, 5): - return import shelve, hashlib d = shelve.open(self.tmpfname) From 70b06f80ca2ab216cbb76d9ee4f4301116c9e37e Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 22 May 2019 23:53:26 -0500 Subject: [PATCH 148/185] avoid import inside tests, as it may change the reference count --- bitarray/test_bitarray.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index cb6bf45a5..749da14b7 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -5,11 +5,17 @@ """ import os import sys +import copy import unittest import tempfile import shutil from random import randint +try: + import shelve, hashlib +except ImportError: + shelve = hashlib = None + is_py3k = bool(sys.version_info[0] == 3) if is_py3k: @@ -750,9 +756,7 @@ def test_cPickle(self): self.assertEQUAL(a, b) def test_overflow(self): - from platform import architecture - - if architecture()[0] == '64bit': + if tuple.__itemsize__ == 8: return self.assertRaises(OverflowError, bitarray.__new__, @@ -827,7 +831,6 @@ def test_repr(self): def test_copy(self): - import copy for a in self.randombitarrays(): b = a.copy() self.assert_(b is not a) @@ -1790,7 +1793,8 @@ def test_cPickle(self): self.assertEQUAL(a, b) def test_shelve(self): - import shelve, hashlib + if not shelve or hasattr(sys, 'gettotalrefcount'): + return d = shelve.open(self.tmpfname) stored = [] From 4c21bd421a65d3823d09cf705d6faccd1f32f3df Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 23 May 2019 00:04:09 -0500 Subject: [PATCH 149/185] move remaining imports outside tests --- bitarray/test_bitarray.py | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 749da14b7..f3daea17e 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -5,23 +5,28 @@ """ import os import sys -import copy import unittest import tempfile import shutil from random import randint +is_py3k = bool(sys.version_info[0] == 3) + +# imports needed inside tests +import copy +import pickle +import itertools + try: import shelve, hashlib except ImportError: shelve = hashlib = None -is_py3k = bool(sys.version_info[0] == 3) - if is_py3k: from io import StringIO else: from cStringIO import StringIO + import cPickle from bitarray import bitarray, bitdiff, bits2bytes, __version__ @@ -738,10 +743,9 @@ def test_endianness5(self): self.assertEqual(a.tobytes(), b.tobytes()) def test_pickle(self): - from pickle import loads, dumps for v in range(3): for a in self.randombitarrays(): - b = loads(dumps(a, v)) + b = pickle.loads(pickle.dumps(a, v)) self.assert_(b is not a) self.assertEQUAL(a, b) @@ -749,9 +753,8 @@ def test_cPickle(self): if is_py3k: return for v in range(3): - from cPickle import loads, dumps for a in self.randombitarrays(): - b = loads(dumps(a, v)) + b = cPickle.loads(cPickle.dumps(a, v)) self.assert_(b is not a) self.assertEQUAL(a, b) @@ -1224,10 +1227,6 @@ def test_iterator1(self): self.check_obj(c) def test_iterator2(self): - try: - import itertools - except ImportError: - return a = bitarray() a.extend(itertools.repeat(True, 23)) self.assertEqual(a, bitarray(23 * '1')) @@ -1767,28 +1766,24 @@ def tearDown(self): def test_pickle(self): - from pickle import load, dump - for v in range(3): for a in self.randombitarrays(): fo = open(self.tmpfname, 'wb') - dump(a, fo, v) + pickle.dump(a, fo, v) fo.close() - b = load(open(self.tmpfname, 'rb')) + b = pickle.load(open(self.tmpfname, 'rb')) self.assert_(b is not a) self.assertEQUAL(a, b) def test_cPickle(self): if is_py3k: return - from cPickle import load, dump - for v in range(3): for a in self.randombitarrays(): fo = open(self.tmpfname, 'wb') - dump(a, fo, v) + cPickle.dump(a, fo, v) fo.close() - b = load(open(self.tmpfname, 'rb')) + b = cPickle.load(open(self.tmpfname, 'rb')) self.assert_(b is not a) self.assertEQUAL(a, b) From cbf6cf2d81e365bd6188f2d183f3d4b0e14062a9 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 23 May 2019 02:53:09 -0500 Subject: [PATCH 150/185] update changelog --- CHANGE_LOG | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 438e4d438..5ba465fc0 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -3,7 +3,8 @@ * fix bitarrays beings created from unicode in Python 2 * use PyBytes_* in C code, treating the Py3k function names as default, which also removes all redefinitions of PyString_ - * add a few comments to the C code + * add a more comments to the C code + * move imports outside tests: pickle, io, etc. * drop Python 2.5 support From 9b6103e034c393557ae14003bb1bfd42fccc1692 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 23 May 2019 13:15:23 -0500 Subject: [PATCH 151/185] fix comment --- bitarray/_bitarray.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 2c949a39a..3fb9598a0 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -636,8 +636,8 @@ extend_tuple(bitarrayobject *self, PyObject *tuple) return 0; } -/* extend_bytes(): extend the bitarray from a PyBytes object (PyString in - Python 2), where each whole characters is converted to a single bit +/* extend_bytes(): extend the bitarray from a PyBytes object, where each + whole character is converted to a single bit */ enum conv_t { STR_01, /* '0' -> 0 '1' -> 1 no other characters allowed */ From 1585b469a05e304d9db0e5c45e30514afe2c242b Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 23 May 2019 16:39:42 -0500 Subject: [PATCH 152/185] consistently use str in doc-strings --- bitarray/__init__.py | 4 ++-- bitarray/_bitarray.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bitarray/__init__.py b/bitarray/__init__.py index 04f9ce9ca..8edb5a81f 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -45,14 +45,14 @@ class bitarray(_bitarray): fromfile, tobytes, frombytes.""" def fromstring(self, string): - """fromstring(string) + """fromstring(str) Append from a string, interpreting the string as machine values. Deprecated since version 0.4.0, use ``frombytes()`` instead.""" return self.frombytes(string.encode()) def tostring(self): - """tostring() -> string + """tostring() -> str Return the string representing (machine values) of the bitarray. When the length of the bitarray is not a multiple of 8, the few remaining diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 3fb9598a0..315775865 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1113,7 +1113,7 @@ bitarray_endian(bitarrayobject *self) } PyDoc_STRVAR(endian_doc, -"endian() -> string\n\ +"endian() -> str\n\ \n\ Return the bit endianness as a string (either 'little' or 'big')."); From 1a8bcaa038dc2b89a25e79d2be3aeafc64b14798 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 29 May 2019 22:43:04 -0500 Subject: [PATCH 153/185] fix wording for search() docstring --- bitarray/_bitarray.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 315775865..8f4c79eed 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1070,8 +1070,8 @@ bitarray_search(bitarrayobject *self, PyObject *args) PyDoc_STRVAR(search_doc, "search(bitarray, [limit]) -> list\n\ \n\ -Searches for the given a bitarray in self, and returns the start positions\n\ -where bitarray matches self as a list.\n\ +Searches for the given bitarray in self, and return the list of start\n\ +positions.\n\ The optional argument limits the number of search results to the integer\n\ specified. By default, all search results are returned."); From b0e95b405dafc2a1b4807b87e96129de1f298408 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 29 May 2019 22:48:04 -0500 Subject: [PATCH 154/185] improve wording in comment --- bitarray/_bitarray.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 8f4c79eed..8866ae858 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -482,8 +482,8 @@ findfirst(bitarrayobject *self, int vi, idx_t start, idx_t stop) return -1; } -/* search for the first occurrence bitarray xa (in self), starting at p, - and return its position (-1 when not found) +/* search for the first occurrence of bitarray xa (in self), starting at p, + and return its position (or -1 when not found) */ static idx_t search(bitarrayobject *self, bitarrayobject *xa, idx_t p) From ac9620f1df22f47d415b874dce8221321cf6eab4 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 29 May 2019 23:06:03 -0500 Subject: [PATCH 155/185] add tests for .index() method --- bitarray/test_bitarray.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index f3daea17e..736742e95 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1317,6 +1317,11 @@ def test_index(self): self.assertRaises(ValueError, a.index, True) a[20] = a[27] = 1 self.assertEqual(a.index(42), 20) + self.assertEqual(a.index(1, 21), 27) + self.assertEqual(a.index(1, 27), 27) + self.assertRaises(ValueError, a.index, 1, 5, 17) + self.assertRaises(ValueError, a.index, 1, 21, 27) + self.assertRaises(ValueError, a.index, 1, 28) self.assertEqual(a.index(0), 0) a = bitarray(200 * [True]) From 8081e2f537ab7c42fa75f17a92c66fd41222e550 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Thu, 30 May 2019 16:57:32 -0500 Subject: [PATCH 156/185] add tests for .index() method --- bitarray/test_bitarray.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 736742e95..21e51f867 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1343,7 +1343,7 @@ def test_index(self): a[m] = 0 self.assertEqual(a.index(0), m) - def test_index2(self): + def test_index2a(self): a = bitarray('00001000' '00000000' '0010000') self.assertEqual(a.index(1), 4) self.assertEqual(a.index(1, 1), 4) @@ -1352,6 +1352,15 @@ def test_index2(self): self.assertRaises(ValueError, a.index, 1, 5, 18) self.assertRaises(ValueError, a.index, 1, 19) + def test_index2b(self): + a = bitarray('11110111' '11111111' '1101111') + self.assertEqual(a.index(0), 4) + self.assertEqual(a.index(0, 1), 4) + self.assertEqual(a.index(1, 4), 5) + self.assertEqual(a.index(0, 5), 18) + self.assertRaises(ValueError, a.index, 0, 5, 18) + self.assertRaises(ValueError, a.index, 0, 19) + def test_index3(self): a = bitarray(2000) a.setall(0) @@ -1371,6 +1380,26 @@ def test_index3(self): res2 = None self.assertEqual(res1, res2) + def test_index4(self): + for n in range(1, 50): + a = bitarray(n) + a.setall(0) + for _ in range(3): + a[randint(0, n-1)] = 1 + aa = a.tolist() + for unused in range(100): + start = randint(-50, n+50) + stop = randint(-50, n+50) + try: + res1 = a.index(1, start, stop) + except ValueError: + res1 = None + try: + res2 = aa.index(1, start, stop) + except ValueError: + res2 = None + self.assertEqual(res1, res2) + def test_count(self): a = bitarray('10011') From 34cf7dd2d9887bb541f30fca87220fbe53854d13 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 31 May 2019 00:51:21 -0500 Subject: [PATCH 157/185] also search for 0 in arrys with mostly 1s --- bitarray/test_bitarray.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 21e51f867..7c8603129 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1383,19 +1383,20 @@ def test_index3(self): def test_index4(self): for n in range(1, 50): a = bitarray(n) - a.setall(0) - for _ in range(3): - a[randint(0, n-1)] = 1 + i = randint(0, 1) + a.setall(i) + for unused in range(randint(1, 4)): + a[randint(0, n-1)] = 1-i aa = a.tolist() for unused in range(100): start = randint(-50, n+50) stop = randint(-50, n+50) try: - res1 = a.index(1, start, stop) + res1 = a.index(1-i, start, stop) except ValueError: res1 = None try: - res2 = aa.index(1, start, stop) + res2 = aa.index(1-i, start, stop) except ValueError: res2 = None self.assertEqual(res1, res2) From eff36427f6003cc741a7407ca0932551ef9915bd Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 31 May 2019 12:24:24 -0500 Subject: [PATCH 158/185] handle negative arguments of .index() method consistentlt with how they are treated for lists --- bitarray/_bitarray.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 8866ae858..479e29ca1 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -447,16 +447,14 @@ findfirst(bitarrayobject *self, int vi, idx_t start, idx_t stop) idx_t i; char c; - if (Py_SIZE(self) == 0) - return -1; - if (start < 0 || start > self->nbits) - start = 0; - if (stop < 0 || stop > self->nbits) - stop = self->nbits; - if (start >= stop) + assert(0 <= start && start <= self->nbits); + assert(0 <= stop && stop <= self->nbits); + assert(BYTES(stop) <= Py_SIZE(self)); + + if (self->nbits == 0 || start >= stop) return -1; - if (stop > start + 8) { + if (stop >= start + 8) { /* seraching for 1 means: break when byte is not 0x00 searching for 0 means: break when byte is not 0xff */ c = vi ? 0x00 : 0xff; @@ -466,10 +464,6 @@ findfirst(bitarrayobject *self, int vi, idx_t start, idx_t stop) if (c ^ self->ob_item[j]) break; - if (j == Py_SIZE(self)) - j--; - assert(0 <= j && j < Py_SIZE(self)); - if (start < BITS(j)) start = BITS(j); } @@ -950,7 +944,7 @@ static PyObject * bitarray_index(bitarrayobject *self, PyObject *args) { PyObject *x; - idx_t i, start = 0, stop = -1; + idx_t i, start = 0, stop = self->nbits; long vi; if (!PyArg_ParseTuple(args, "O|LL:index", &x, &start, &stop)) @@ -960,6 +954,22 @@ bitarray_index(bitarrayobject *self, PyObject *args) if (vi < 0) return NULL; + if (start < 0) { + start += self->nbits; + if (start < 0) + start = 0; + } + if (start > self->nbits) + start = self->nbits; + + if (stop < 0) { + stop += self->nbits; + if (stop < 0) + stop = 0; + } + if (stop > self->nbits) + stop = self->nbits; + i = findfirst(self, vi, start, stop); if (i < 0) { PyErr_SetString(PyExc_ValueError, "index(x): x not in bitarray"); @@ -1002,7 +1012,7 @@ bitarray_contains(bitarrayobject *self, PyObject *x) vi = IntBool_AsInt(x); if (vi < 0) return NULL; - res = findfirst(self, vi, 0, -1) >= 0; + res = findfirst(self, vi, 0, self->nbits) >= 0; } else if (bitarray_Check(x)) { res = search(self, (bitarrayobject *) x, 0) >= 0; @@ -1136,7 +1146,7 @@ Append the value bool(item) to the end of the bitarray."); static PyObject * bitarray_all(bitarrayobject *self) { - if (findfirst(self, 0, 0, -1) >= 0) + if (findfirst(self, 0, 0, self->nbits) >= 0) Py_RETURN_FALSE; else Py_RETURN_TRUE; @@ -1151,7 +1161,7 @@ Returns True when all bits in the array are True."); static PyObject * bitarray_any(bitarrayobject *self) { - if (findfirst(self, 1, 0, -1) >= 0) + if (findfirst(self, 1, 0, self->nbits) >= 0) Py_RETURN_TRUE; else Py_RETURN_FALSE; @@ -1804,7 +1814,7 @@ bitarray_remove(bitarrayobject *self, PyObject *v) if (vi < 0) return NULL; - i = findfirst(self, vi, 0, -1); + i = findfirst(self, vi, 0, self->nbits); if (i < 0) { PyErr_SetString(PyExc_ValueError, "remove(x): x not in bitarray"); return NULL; From 3f0046fe0a36061c06260b0a321dee1f9f84e9f9 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 31 May 2019 12:46:39 -0500 Subject: [PATCH 159/185] update changelog --- CHANGE_LOG | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGE_LOG b/CHANGE_LOG index 5ba465fc0..b0ae40a08 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -3,6 +3,8 @@ * fix bitarrays beings created from unicode in Python 2 * use PyBytes_* in C code, treating the Py3k function names as default, which also removes all redefinitions of PyString_ + * handle negative arguments of .index() method consistentlt with how + they are treated for lists * add a more comments to the C code * move imports outside tests: pickle, io, etc. * drop Python 2.5 support From 6340d0cd08ceb45b15f2e00d2a6567ffe5c71749 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 31 May 2019 14:12:01 -0500 Subject: [PATCH 160/185] add another search test --- bitarray/test_bitarray.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 7c8603129..3bee62913 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1475,6 +1475,17 @@ def test_search3(self): self.assertEqual(list(a.itersearch(b)), res) self.assertEqual([p for p in a.itersearch(b)], res) + def test_search4(self): + for a in self.randombitarrays(): + aa = a.to01() + for sub in '0', '1', '01', '01', '11', '101', '1111111': + sr = a.search(bitarray(sub), 1) + try: + p = sr[0] + except IndexError: + p = -1 + self.assertEqual(p, aa.find(sub)) + def test_search_type(self): a = bitarray('10011') it = a.itersearch(bitarray('1')) From 47f843a998ed9357afc102ca274141bdfe4c0cd7 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 31 May 2019 19:44:08 -0500 Subject: [PATCH 161/185] update readme --- README.rst | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index 73471495e..03328ddf6 100644 --- a/README.rst +++ b/README.rst @@ -24,25 +24,22 @@ Key features * The bit endianness can be specified for each bitarray object, see below. - * On 32bit systems, a bitarray object can contain up to 2^34 elements, - that is 16 Gbits (on 64bit machines up to 2^63 elements in theory -- - on Python 2.4 only 2^31 elements, - see `PEP 353 `_ - (added in Python 2.5)). - * Packing and unpacking to other binary data formats, e.g. `numpy.ndarray `_, is possible. * Fast methods for encoding and decoding variable bit length prefix codes - * Sequential search (as list or iterator) - * Bitwise operations: ``&, |, ^, &=, |=, ^=, ~`` + * Sequential search + * Pickling and unpickling of bitarray objects possible. - * Bitarray objects support the buffer protocol (Python 2.7 only) + * Bitarray objects support the buffer protocol (Python 2.7 and above) + + * On 32-bit systems, a bitarray object can contain up to 2^34 elements, + that is 16 Gbits (on 64-bit machines up to 2^63 elements in theory. Installation From 7ff815f7e53873a79d67f99321e9daac10fcb14d Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 1 Jun 2019 05:07:55 -0500 Subject: [PATCH 162/185] update readme --- README.rst | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index 03328ddf6..02fae47a8 100644 --- a/README.rst +++ b/README.rst @@ -47,8 +47,8 @@ Installation bitarray can be installed from source:: - $ tar xzf bitarray-0.9.3.tar.gz - $ cd bitarray-0.9.3 + $ tar xzf bitarray-1.0.0.tar.gz + $ cd bitarray-1.0.0 $ python setup.py install On Unix systems, the latter command may have to be executed with root @@ -59,7 +59,7 @@ Once you have installed the package, you may want to test it:: $ python -c 'import bitarray; bitarray.test()' bitarray is installed in: /usr/local/lib/python2.7/site-packages/bitarray - bitarray version: 0.9.3 + bitarray version: 1.0.0 2.7.2 (r271:86832, Nov 29 2010) [GCC 4.2.1 (SUSE Linux)] ......................................................................... ................................................. @@ -374,7 +374,7 @@ Reference with the corresponding bitarray for each symbols. -``endian()`` -> string +``endian()`` -> str Return the bit endianness as a string (either 'little' or 'big'). @@ -399,7 +399,7 @@ Reference read until EOF is reached. -``fromstring(string)`` +``fromstring(str)`` Append from a string, interpreting the string as machine values. Deprecated since version 0.4.0, use ``frombytes()`` instead. @@ -461,8 +461,8 @@ Reference ``search(bitarray, [limit])`` -> list - Searches for the given a bitarray in self, and returns the start positions - where bitarray matches self as a list. + Searches for the given bitarray in self, and return the list of start + positions. The optional argument limits the number of search results to the integer specified. By default, all search results are returned. @@ -475,7 +475,7 @@ Reference Sort the bits in the array (in-place). -``to01()`` -> string +``to01()`` -> str Return a string containing '0's and '1's, representing the bits in the bitarray object. Note: To extend a bitarray from a string containing '0's and '1's, @@ -503,7 +503,7 @@ Reference use the extend method. -``tostring()`` -> string +``tostring()`` -> str Return the string representing (machine values) of the bitarray. When the length of the bitarray is not a multiple of 8, the few remaining bits (1..7) are set to 0. @@ -535,6 +535,18 @@ Reference Change log ---------- +2019-06-XX 1.0.0: + + * fix bitarrays beings created from unicode in Python 2 + * use PyBytes_* in C code, treating the Py3k function names as default, + which also removes all redefinitions of PyString_ + * handle negative arguments of .index() method consistentlt with how + they are treated for lists + * add a more comments to the C code + * move imports outside tests: pickle, io, etc. + * drop Python 2.5 support + + **0.9.3** (2019-05-20): * refactor resize() - only shrink allocated memory if new size falls From fd031bf82424ece8dee071e8a9d2ebf385b22245 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 1 Jun 2019 05:21:27 -0500 Subject: [PATCH 163/185] wording and documentation updates --- CHANGE_LOG | 2 +- README.rst | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index b0ae40a08..ba0623fd4 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,4 +1,4 @@ -2019-XX-XX 1.0.0: +2019-06-XX 1.0.0: ------------------- * fix bitarrays beings created from unicode in Python 2 * use PyBytes_* in C code, treating the Py3k function names as default, diff --git a/README.rst b/README.rst index 02fae47a8..bdfb0e53a 100644 --- a/README.rst +++ b/README.rst @@ -52,9 +52,7 @@ bitarray can be installed from source:: $ python setup.py install On Unix systems, the latter command may have to be executed with root -privileges. -If you have `distribute `_ -installed, you can easy_install bitarray. +privileges. You can also pip install bitarray. Once you have installed the package, you may want to test it:: $ python -c 'import bitarray; bitarray.test()' @@ -64,12 +62,12 @@ Once you have installed the package, you may want to test it:: ......................................................................... ................................................. ---------------------------------------------------------------------- - Ran 140 tests in 2.164s + Ran 146 tests in 2.164s OK You can always import the function test, -and ``test().wasSuccessful()`` will return True when the test went well. +and ``test().wasSuccessful()`` will return ``True`` when the test went well. From 53fa0025903329cff94dc967c2945fb2fa114b8b Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 3 Jun 2019 15:15:40 -0500 Subject: [PATCH 164/185] fix wording --- CHANGE_LOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index ba0623fd4..3b94849e6 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -5,7 +5,7 @@ which also removes all redefinitions of PyString_ * handle negative arguments of .index() method consistentlt with how they are treated for lists - * add a more comments to the C code + * add a few more comments to the C code * move imports outside tests: pickle, io, etc. * drop Python 2.5 support From f9ccdae450539a6fda3b23c2c28202e9de901ec6 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 26 Jun 2019 21:11:17 -0500 Subject: [PATCH 165/185] typo --- CHANGE_LOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 3b94849e6..0aa5e598b 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -3,7 +3,7 @@ * fix bitarrays beings created from unicode in Python 2 * use PyBytes_* in C code, treating the Py3k function names as default, which also removes all redefinitions of PyString_ - * handle negative arguments of .index() method consistentlt with how + * handle negative arguments of .index() method consistently with how they are treated for lists * add a few more comments to the C code * move imports outside tests: pickle, io, etc. From 84fd4a77b30dd67e8d5d47a36ef4e1e39daac4cf Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 28 Jun 2019 21:18:04 -0500 Subject: [PATCH 166/185] update readme --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index bdfb0e53a..fdc951e57 100644 --- a/README.rst +++ b/README.rst @@ -538,9 +538,9 @@ Change log * fix bitarrays beings created from unicode in Python 2 * use PyBytes_* in C code, treating the Py3k function names as default, which also removes all redefinitions of PyString_ - * handle negative arguments of .index() method consistentlt with how + * handle negative arguments of .index() method consistently with how they are treated for lists - * add a more comments to the C code + * add a few more comments to the C code * move imports outside tests: pickle, io, etc. * drop Python 2.5 support From 6a5da99a70567b9862a0bcf6ef2e45f338bc6f7a Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 15 Jul 2019 19:10:56 -0500 Subject: [PATCH 167/185] fix wording --- bitarray/_bitarray.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 479e29ca1..47d7929f6 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1601,7 +1601,7 @@ bitarray_frombytes(bitarrayobject *self, PyObject *bytes) return NULL; } - /* Before we extend the raw bytes with the new data, we need store + /* Before we extend the raw bytes with the new data, we need to store the current size and pad the last byte, as our bitarray size might not be a multiple of 8. After extending, we remove the padding bits again. The same is done in bitarray_fromfile(). From ce61d17c755346364fbbe89910e588983d6875f3 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 15 Jul 2019 19:11:41 -0500 Subject: [PATCH 168/185] add release date, update readme --- CHANGE_LOG | 2 +- README.rst | 13 +------------ 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 0aa5e598b..10ecfba1d 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,4 +1,4 @@ -2019-06-XX 1.0.0: +2019-07-15 1.0.0: ------------------- * fix bitarrays beings created from unicode in Python 2 * use PyBytes_* in C code, treating the Py3k function names as default, diff --git a/README.rst b/README.rst index fdc951e57..bf78675f3 100644 --- a/README.rst +++ b/README.rst @@ -533,7 +533,7 @@ Reference Change log ---------- -2019-06-XX 1.0.0: +**1.0.0** (2019-07-15): * fix bitarrays beings created from unicode in Python 2 * use PyBytes_* in C code, treating the Py3k function names as default, @@ -557,16 +557,5 @@ Change log * fix fail to compile on Windows with VS 2015, issue #72 -**0.9.1** (2019-04-28): - - * fix types to actually be types, #29 - * check for ambiguous prefix codes when building binary tree for decoding - * remove Python level methods: encode, decode, iterdecode (in favor of - having these implemented on the C-level along with check_codedict) - * fix self tests for Python 2.5 and 2.6 - * move all Huffman code related example code into examples/huffman - * add code to generate graphviz .dot file of Huffman tree to examples - - Please find the complete change log `here `_. From 7e5fc924c9da79263858e37aa7bc172c4381930d Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 15 Jul 2019 20:52:04 -0500 Subject: [PATCH 169/185] typo --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index bf78675f3..024d0f3a1 100644 --- a/README.rst +++ b/README.rst @@ -39,7 +39,7 @@ Key features * Bitarray objects support the buffer protocol (Python 2.7 and above) * On 32-bit systems, a bitarray object can contain up to 2^34 elements, - that is 16 Gbits (on 64-bit machines up to 2^63 elements in theory. + that is 16 Gbits (on 64-bit machines up to 2^63 elements in theory). Installation From c580383000437822b69b522b5b4708b1946e39f3 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 19 Jul 2019 01:01:47 -0500 Subject: [PATCH 170/185] fix readme to pass twine check --- CHANGE_LOG | 9 +++++++-- Makefile | 2 ++ README.rst | 20 ++++++++++---------- bitarray/__init__.py | 2 +- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 10ecfba1d..3812536cf 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,8 +1,13 @@ +2019-07-19 1.0.1: +------------------- + * fix readme to pass ``twine check`` + + 2019-07-15 1.0.0: ------------------- * fix bitarrays beings created from unicode in Python 2 - * use PyBytes_* in C code, treating the Py3k function names as default, - which also removes all redefinitions of PyString_ + * use ``PyBytes_*`` in C code, treating the Py3k function names as default, + which also removes all redefinitions of ``PyString_*`` * handle negative arguments of .index() method consistently with how they are treated for lists * add a few more comments to the C code diff --git a/Makefile b/Makefile index 7144a83d2..9a71f2c24 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,8 @@ test: bitarray/_bitarray.so doc: bitarray/_bitarray.so $(PYTHON) update_readme.py + $(PYTHON) setup.py sdist + twine check dist/* clean: diff --git a/README.rst b/README.rst index 024d0f3a1..9aa4b890a 100644 --- a/README.rst +++ b/README.rst @@ -47,8 +47,8 @@ Installation bitarray can be installed from source:: - $ tar xzf bitarray-1.0.0.tar.gz - $ cd bitarray-1.0.0 + $ tar xzf bitarray-1.0.1.tar.gz + $ cd bitarray-1.0.1 $ python setup.py install On Unix systems, the latter command may have to be executed with root @@ -57,7 +57,7 @@ Once you have installed the package, you may want to test it:: $ python -c 'import bitarray; bitarray.test()' bitarray is installed in: /usr/local/lib/python2.7/site-packages/bitarray - bitarray version: 1.0.0 + bitarray version: 1.0.1 2.7.2 (r271:86832, Nov 29 2010) [GCC 4.2.1 (SUSE Linux)] ......................................................................... ................................................. @@ -533,11 +533,16 @@ Reference Change log ---------- +**1.0.1** (2019-07-19): + + * fix readme to pass ``twine check`` + + **1.0.0** (2019-07-15): * fix bitarrays beings created from unicode in Python 2 - * use PyBytes_* in C code, treating the Py3k function names as default, - which also removes all redefinitions of PyString_ + * use ``PyBytes_*`` in C code, treating the Py3k function names as default, + which also removes all redefinitions of ``PyString_*`` * handle negative arguments of .index() method consistently with how they are treated for lists * add a few more comments to the C code @@ -552,10 +557,5 @@ Change log * improve error message when trying to initialize from float or complex -**0.9.2** (2019-04-29): - - * fix fail to compile on Windows with VS 2015, issue #72 - - Please find the complete change log `here `_. diff --git a/bitarray/__init__.py b/bitarray/__init__.py index 8edb5a81f..5018d1161 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -10,7 +10,7 @@ """ from bitarray._bitarray import _bitarray, bitdiff, bits2bytes, _sysinfo -__version__ = '1.0.0' +__version__ = '1.0.1' class bitarray(_bitarray): From 10843437ed5e60ba216b85f92e550053c331a066 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 28 Oct 2019 19:03:09 -0500 Subject: [PATCH 171/185] add helper functions to exmaples --- examples/README | 5 +++++ examples/helpers.py | 47 +++++++++++++++++++++++++++++++++++++++++++++ examples/runall | 14 -------------- 3 files changed, 52 insertions(+), 14 deletions(-) create mode 100644 examples/helpers.py delete mode 100755 examples/runall diff --git a/examples/README b/examples/README index 1befe4f6c..94527c74f 100644 --- a/examples/README +++ b/examples/README @@ -13,6 +13,11 @@ gene.py: represented by bitarrays. +helpers.py: + some useful helper function which people have requested, but which are + not common enough to add to the library itself. + + huffman Directory containing a library and examples for working with Huffman trees and codes. diff --git a/examples/helpers.py b/examples/helpers.py new file mode 100644 index 000000000..7180c2874 --- /dev/null +++ b/examples/helpers.py @@ -0,0 +1,47 @@ +from bitarray import bitarray + + +def trim(a): + "return a bitarray, with zero bits removed from beginning" + try: + first = a.index(1) + except ValueError: + return bitarray() + last = len(a) - 1 + while not a[last]: + last -= 1 + return a[first:last+1] + +def find_last(a, value=True): + "find the last occurrence of value, in bitarray." + i = len(a) - 1 + while not a[i] == bool(value): + i -= 1 + return i + +def count_n(a, n): + "return the index i for which a[:i].count() == n" + i, j = n, a[:n].count() + while j < n: + if a[i]: + j += 1 + i += 1 + return i + +if __name__ == '__main__': + # trim + assert trim(bitarray()) == bitarray() + assert trim(bitarray('000')) == bitarray() + assert trim(bitarray('111')) == bitarray('111') + assert trim(bitarray('00010100')) == bitarray('101') + + # find_last + assert find_last(bitarray('00010100')) == 5 + assert find_last(bitarray('00010111'), 0) == 4 + assert find_last(bitarray('0000'), 0) == 3 + + # count_n + a = bitarray('11111011111011111011111001111011111011111011111010111010111') + for n in range(0, 48): + i = count_n(a, n) + assert a[:i].count() == n diff --git a/examples/runall b/examples/runall deleted file mode 100755 index 72a54e1d4..000000000 --- a/examples/runall +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -x - -python huffman.py --test README || exit 1 - -for x in *.py -do - if [ $x == 'huffman.py' ]; then - continue - fi - echo Running: $x - python $x || exit 1 -done - -rm *.ppm *.pyc From 321fd269456d25363912d44b9e4ad9921d7b97fb Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 00:44:35 -0500 Subject: [PATCH 172/185] update documentation to use positional-only syntax in docstrings, and update README to pass Python 3 doctest --- CHANGE_LOG | 6 +++++ README.rst | 58 ++++++++++++++++++++++++-------------------- bitarray/_bitarray.c | 44 ++++++++++++++++----------------- update_readme.py | 6 ++--- 4 files changed, 62 insertions(+), 52 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index 3812536cf..f6ab3d3ce 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,3 +1,9 @@ +2019-XX-XX 1.0.2: +------------------- + * update documentation to use positional-only syntax in docstrings + * update README to pass Python 3 doctest + + 2019-07-19 1.0.1: ------------------- * fix readme to pass ``twine check`` diff --git a/README.rst b/README.rst index 9aa4b890a..3b5edc2ed 100644 --- a/README.rst +++ b/README.rst @@ -58,7 +58,7 @@ Once you have installed the package, you may want to test it:: $ python -c 'import bitarray; bitarray.test()' bitarray is installed in: /usr/local/lib/python2.7/site-packages/bitarray bitarray version: 1.0.1 - 2.7.2 (r271:86832, Nov 29 2010) [GCC 4.2.1 (SUSE Linux)] + 3.7.4 (r271:86832, Dec 29 2019) [GCC 4.2.1 (SUSE Linux)] ......................................................................... ................................................. ---------------------------------------------------------------------- @@ -106,7 +106,7 @@ being applied, whenever casting an object: bitarray('101010') >>> a.append(a) # note that bool(a) is True >>> a.count(42) # counts occurrences of True (not 42) - 4L + 4 >>> a.remove('') # removes first occurrence of False >>> a bitarray('110101') @@ -162,7 +162,7 @@ specified explicitly: bitarray('10000010') >>> b = bitarray('11000010', endian='little') >>> b.tobytes() - 'C' + b'C' Here, the low-bit comes first because little-endian means that increasing numeric significance corresponds to an increasing address (index). @@ -175,7 +175,7 @@ and most significant bit. bitarray('01000001') >>> a[6] = 1 >>> a.tobytes() - 'C' + b'C' Here, the high-bit comes first because big-endian means "most-significant first". @@ -251,12 +251,12 @@ interpreted as simple bytes. >>> len(v) 3 >>> v[-1] - 'C' + 67 >>> v[:2].tobytes() - 'AB' + b'AB' >>> v.readonly # changing a bitarray's memory is also possible False - >>> v[1] = 'o' + >>> v[1] = 111 >>> a bitarray('010000010110111101000011') @@ -335,7 +335,7 @@ Reference Returns True when any bit in the array is True. -``append(item)`` +``append(item, /)`` Append the value bool(item) to the end of the bitarray. @@ -357,16 +357,16 @@ Reference Return a copy of the bitarray. -``count([value])`` -> int +``count(value=True, /)`` -> int Return number of occurrences of value (defaults to True) in the bitarray. -``decode(code)`` -> list +``decode(code, /)`` -> list Given a prefix code (a dict mapping symbols to bitarrays), decode the content of the bitarray and return it as a list of symbols. -``encode(code, iterable)`` +``encode(code, iterable, /)`` Given a prefix code (a dict mapping symbols to bitarrays), iterate over the iterable object with symbols, and extend the bitarray with the corresponding bitarray for each symbols. @@ -376,7 +376,7 @@ Reference Return the bit endianness as a string (either 'little' or 'big'). -``extend(object)`` +``extend(iterable, /)`` Append bits to the end of the bitarray. The objects which can be passed to this method are the same iterable objects which can given to a bitarray object upon initialization. @@ -387,11 +387,11 @@ Reference will be a multiple of 8. Returns the number of bits added (0..7). -``frombytes(bytes)`` +``frombytes(bytes, /)`` Append from a byte string, interpreted as machine values. -``fromfile(f, [n])`` +``fromfile(f, n=, /)`` Read n bytes from the file object f and append them to the bitarray interpreted as machine values. When n is omitted, as many bytes are read until EOF is reached. @@ -402,12 +402,12 @@ Reference Deprecated since version 0.4.0, use ``frombytes()`` instead. -``index(value, [start, [stop]])`` -> int +``index(value, start=0, stop=, /)`` -> int Return index of the first occurrence of bool(value) in the bitarray. Raises ValueError if the value is not present. -``insert(i, item)`` +``insert(i, item, /)`` Insert bool(item) into the bitarray before position i. @@ -416,13 +416,13 @@ Reference i.e. convert each 1-bit into a 0-bit and vice versa. -``iterdecode(code)`` -> iterator +``iterdecode(code, /)`` -> iterator Given a prefix code (a dict mapping symbols to bitarrays), decode the content of the bitarray and return an iterator over the symbols. -``itersearch(bitarray)`` -> iterator +``itersearch(bitarray, /)`` -> iterator Searches for the given a bitarray in self, and return an iterator over the start positions where bitarray matches self. @@ -435,7 +435,7 @@ Reference on 32bit and 64bit machines. -``pack(bytes)`` +``pack(bytes, /)`` Extend the bitarray from a byte string, where each characters corresponds to a single bit. The character b'\x00' maps to bit 0 and all other characters map to bit 1. @@ -444,12 +444,12 @@ Reference (for example NumPy's ndarray object) which have a different view of memory. -``pop([i])`` -> item +``pop(index=-1, /)`` -> item Return the i-th (default last) element and delete it from the bitarray. Raises IndexError if bitarray is empty or index is out of range. -``remove(item)`` +``remove(item, /)`` Remove the first occurrence of bool(item) in the bitarray. Raises ValueError if item is not present. @@ -458,14 +458,14 @@ Reference Reverse the order of bits in the array (in-place). -``search(bitarray, [limit])`` -> list +``search(bitarray, limit=, /)`` -> list Searches for the given bitarray in self, and return the list of start positions. The optional argument limits the number of search results to the integer specified. By default, all search results are returned. -``setall(value)`` +``setall(value, /)`` Set all bits in the bitarray to bool(value). @@ -486,7 +486,7 @@ Reference bits (1..7) are set to 0. -``tofile(f)`` +``tofile(f, /)`` Write all bits (as machine values) to the file object f. When the length of the bitarray is not a multiple of 8, the remaining bits (1..7) are set to 0. @@ -520,19 +520,25 @@ Reference Run self-test, and return unittest.runner.TextTestResult object. -``bitdiff(a, b)`` -> int +``bitdiff(a, b, /)`` -> int Return the difference between two bitarrays a and b. This is function does the same as (a ^ b).count(), but is more memory efficient, as no intermediate bitarray object gets created -``bits2bytes(n)`` -> int +``bits2bytes(n, /)`` -> int Return the number of bytes necessary to store n bits. Change log ---------- +2019-XX-XX 1.0.2: + + * update documentation to use positional-only syntax in docstrings + * update README to pass Python 3 doctest + + **1.0.1** (2019-07-19): * fix readme to pass ``twine check`` diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 47d7929f6..a7e9b3052 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -935,7 +935,7 @@ bitarray_count(bitarrayobject *self, PyObject *args) } PyDoc_STRVAR(count_doc, -"count([value]) -> int\n\ +"count(value=True, /) -> int\n\ \n\ Return number of occurrences of value (defaults to True) in the bitarray."); @@ -979,7 +979,7 @@ bitarray_index(bitarrayobject *self, PyObject *args) } PyDoc_STRVAR(index_doc, -"index(value, [start, [stop]]) -> int\n\ +"index(value, start=0, stop=, /) -> int\n\ \n\ Return index of the first occurrence of bool(value) in the bitarray.\n\ Raises ValueError if the value is not present."); @@ -994,7 +994,7 @@ bitarray_extend(bitarrayobject *self, PyObject *obj) } PyDoc_STRVAR(extend_doc, -"extend(object)\n\ +"extend(iterable, /)\n\ \n\ Append bits to the end of the bitarray. The objects which can be passed\n\ to this method are the same iterable objects which can given to a bitarray\n\ @@ -1025,10 +1025,10 @@ bitarray_contains(bitarrayobject *self, PyObject *x) } PyDoc_STRVAR(contains_doc, -"__contains__(x) -> bool\n\ +"__contains__(value, /) -> bool\n\ \n\ -Return True if bitarray contains x, False otherwise.\n\ -The value x may be a boolean (or integer between 0 and 1), or a bitarray."); +Return True if bitarray contains value, False otherwise.\n\ +The value may be a boolean (or integer between 0 and 1), or a bitarray."); static PyObject * @@ -1078,7 +1078,7 @@ bitarray_search(bitarrayobject *self, PyObject *args) } PyDoc_STRVAR(search_doc, -"search(bitarray, [limit]) -> list\n\ +"search(bitarray, limit=, /) -> list\n\ \n\ Searches for the given bitarray in self, and return the list of start\n\ positions.\n\ @@ -1138,7 +1138,7 @@ bitarray_append(bitarrayobject *self, PyObject *v) } PyDoc_STRVAR(append_doc, -"append(item)\n\ +"append(item, /)\n\ \n\ Append the value bool(item) to the end of the bitarray."); @@ -1310,7 +1310,7 @@ bitarray_setall(bitarrayobject *self, PyObject *v) } PyDoc_STRVAR(setall_doc, -"setall(value)\n\ +"setall(value, /)\n\ \n\ Set all bits in the bitarray to bool(value)."); @@ -1482,7 +1482,7 @@ bitarray_fromfile(bitarrayobject *self, PyObject *args) #endif PyDoc_STRVAR(fromfile_doc, -"fromfile(f, [n])\n\ +"fromfile(f, n=, /)\n\ \n\ Read n bytes from the file object f and append them to the bitarray\n\ interpreted as machine values. When n is omitted, as many bytes are\n\ @@ -1556,7 +1556,7 @@ bitarray_tofile(bitarrayobject *self, PyObject *f) #endif PyDoc_STRVAR(tofile_doc, -"tofile(f)\n\ +"tofile(f, /)\n\ \n\ Write all bits (as machine values) to the file object f.\n\ When the length of the bitarray is not a multiple of 8,\n\ @@ -1618,7 +1618,7 @@ bitarray_frombytes(bitarrayobject *self, PyObject *bytes) } PyDoc_STRVAR(frombytes_doc, -"frombytes(bytes)\n\ +"frombytes(bytes, /)\n\ \n\ Append from a byte string, interpreted as machine values."); @@ -1700,7 +1700,7 @@ bitarray_pack(bitarrayobject *self, PyObject *bytes) } PyDoc_STRVAR(pack_doc, -"pack(bytes)\n\ +"pack(bytes, /)\n\ \n\ Extend the bitarray from a byte string, where each characters corresponds to\n\ a single bit. The character b'\\x00' maps to bit 0 and all other characters\n\ @@ -1765,7 +1765,7 @@ bitarray_insert(bitarrayobject *self, PyObject *args) } PyDoc_STRVAR(insert_doc, -"insert(i, item)\n\ +"insert(i, item, /)\n\ \n\ Insert bool(item) into the bitarray before position i."); @@ -1798,7 +1798,7 @@ bitarray_pop(bitarrayobject *self, PyObject *args) } PyDoc_STRVAR(pop_doc, -"pop([i]) -> item\n\ +"pop(index=-1, /) -> item\n\ \n\ Return the i-th (default last) element and delete it from the bitarray.\n\ Raises IndexError if bitarray is empty or index is out of range."); @@ -1825,7 +1825,7 @@ bitarray_remove(bitarrayobject *self, PyObject *v) } PyDoc_STRVAR(remove_doc, -"remove(item)\n\ +"remove(item, /)\n\ \n\ Remove the first occurrence of bool(item) in the bitarray.\n\ Raises ValueError if item is not present."); @@ -2180,7 +2180,7 @@ bitarray_encode(bitarrayobject *self, PyObject *args) } PyDoc_STRVAR(encode_doc, -"encode(code, iterable)\n\ +"encode(code, iterable, /)\n\ \n\ Given a prefix code (a dict mapping symbols to bitarrays),\n\ iterate over the iterable object with symbols, and extend the bitarray\n\ @@ -2356,7 +2356,7 @@ bitarray_decode(bitarrayobject *self, PyObject *codedict) } PyDoc_STRVAR(decode_doc, -"decode(code) -> list\n\ +"decode(code, /) -> list\n\ \n\ Given a prefix code (a dict mapping symbols to bitarrays),\n\ decode the content of the bitarray and return it as a list of symbols."); @@ -2405,7 +2405,7 @@ bitarray_iterdecode(bitarrayobject *self, PyObject *codedict) } PyDoc_STRVAR(iterdecode_doc, -"iterdecode(code) -> iterator\n\ +"iterdecode(code, /) -> iterator\n\ \n\ Given a prefix code (a dict mapping symbols to bitarrays),\n\ decode the content of the bitarray and return an iterator over\n\ @@ -2521,7 +2521,7 @@ bitarray_itersearch(bitarrayobject *self, PyObject *x) } PyDoc_STRVAR(itersearch_doc, -"itersearch(bitarray) -> iterator\n\ +"itersearch(bitarray, /) -> iterator\n\ \n\ Searches for the given a bitarray in self, and return an iterator over\n\ the start positions where bitarray matches self."); @@ -3148,7 +3148,7 @@ bitdiff(PyObject *self, PyObject *args) } PyDoc_STRVAR(bitdiff_doc, -"bitdiff(a, b) -> int\n\ +"bitdiff(a, b, /) -> int\n\ \n\ Return the difference between two bitarrays a and b.\n\ This is function does the same as (a ^ b).count(), but is more memory\n\ @@ -3174,7 +3174,7 @@ bits2bytes(PyObject *self, PyObject *v) } PyDoc_STRVAR(bits2bytes_doc, -"bits2bytes(n) -> int\n\ +"bits2bytes(n, /) -> int\n\ \n\ Return the number of bytes necessary to store n bits."); diff --git a/update_readme.py b/update_readme.py index 93f523cab..0a80e4662 100644 --- a/update_readme.py +++ b/update_readme.py @@ -1,7 +1,6 @@ -import os import re import doctest -from cStringIO import StringIO +from io import StringIO import bitarray @@ -85,13 +84,12 @@ def main(): fo.close() if new_data == data: - print "already up-to-date" + print("already up-to-date") else: with open('README.rst', 'w') as f: f.write(new_data) doctest.testfile('README.rst') - #os.system('rst2html.py README.rst >README.html') if __name__ == '__main__': From 103683c7bc5aa0129148a4a80dc62a2bb02fbde5 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 01:06:16 -0500 Subject: [PATCH 173/185] add tests for negative start and stop values in .test_index() --- bitarray/test_bitarray.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 3bee62913..486d4d702 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1319,7 +1319,9 @@ def test_index(self): self.assertEqual(a.index(42), 20) self.assertEqual(a.index(1, 21), 27) self.assertEqual(a.index(1, 27), 27) + self.assertEqual(a.index(1, -73), 27) self.assertRaises(ValueError, a.index, 1, 5, 17) + self.assertRaises(ValueError, a.index, 1, 5, -83) self.assertRaises(ValueError, a.index, 1, 21, 27) self.assertRaises(ValueError, a.index, 1, 28) self.assertEqual(a.index(0), 0) From e2cc3224d7c21e9f8c3452bd7b2d47dfbfef52b5 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 17:33:49 -0500 Subject: [PATCH 174/185] add normalize_index() to avoid duplicate code --- bitarray/_bitarray.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index a7e9b3052..1697110ec 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -785,6 +785,19 @@ IntBool_AsInt(PyObject *v) return (int) x; } +/* Normalize index (which may be negative), such that 0 <= i <= n */ +static void +normalize_index(idx_t n, idx_t *i) +{ + if (*i < 0) { + *i += n; + if (*i < 0) + *i = 0; + } + if (*i > n) + *i = n; +} + /* Extract a slice index from a PyInt or PyLong or an object with the nb_index slot defined, and store in *i. However, this function returns -1 on error and 0 on success. @@ -954,21 +967,8 @@ bitarray_index(bitarrayobject *self, PyObject *args) if (vi < 0) return NULL; - if (start < 0) { - start += self->nbits; - if (start < 0) - start = 0; - } - if (start > self->nbits) - start = self->nbits; - - if (stop < 0) { - stop += self->nbits; - if (stop < 0) - stop = 0; - } - if (stop > self->nbits) - stop = self->nbits; + normalize_index(self->nbits, &start); + normalize_index(self->nbits, &stop); i = findfirst(self, vi, start, stop); if (i < 0) { From af23a07d513c17a5c689b6e1bfb32084695159db Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 17:55:30 -0500 Subject: [PATCH 175/185] add start and stop parameter to count(), and avoid calling setunused() --- bitarray/_bitarray.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 1697110ec..f77e9d3de 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -425,16 +425,37 @@ static int bitcount_lookup[256] = { /* returns number of 1 bits */ static idx_t -count(bitarrayobject *self) +count(bitarrayobject *self, idx_t start, idx_t stop) { - Py_ssize_t i; - idx_t res = 0; + Py_ssize_t byte_start, byte_stop, j; + idx_t i, res = 0; unsigned char c; - setunused(self); - for (i = 0; i < Py_SIZE(self); i++) { - c = self->ob_item[i]; - res += bitcount_lookup[c]; + assert(0 <= start && start <= self->nbits); + assert(0 <= stop && stop <= self->nbits); + assert(BYTES(stop) <= Py_SIZE(self)); + + if (self->nbits == 0 || start >= stop) + return 0; + + if (stop >= start + 8) { + byte_start = BYTES(start); + byte_stop = stop / 8; + for (i = start; i < byte_start * 8; i++) + if (GETBIT(self, i)) + res++; + for (j = byte_start; j < byte_stop; j++) { + c = self->ob_item[j]; + res += bitcount_lookup[c]; + } + for (i = byte_stop * 8; i < stop; i++) + if (GETBIT(self, i)) + res++; + } + else { + for (i = start; i < stop; i++) + if (GETBIT(self, i)) + res++; } return res; } @@ -943,7 +964,7 @@ bitarray_count(bitarrayobject *self, PyObject *args) if (!PyArg_ParseTuple(args, "|i:count", &x)) return NULL; - n1 = count(self); + n1 = count(self, 0, self->nbits); return PyLong_FromLongLong(x ? n1 : (self->nbits - n1)); } @@ -1326,7 +1347,7 @@ bitarray_sort(bitarrayobject *self, PyObject *args, PyObject *kwds) return NULL; n = self->nbits; - n1 = count(self); + n1 = count(self, 0, self->nbits); if (reverse) { setrange(self, 0, n1, 1); From df67ae2bf582cbdea52eeb674714a5d8dfc07f9f Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 17:57:01 -0500 Subject: [PATCH 176/185] simplify test --- bitarray/test_bitarray.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 486d4d702..ebfb8a375 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1419,9 +1419,9 @@ def test_count(self): self.assertEqual(a.count(), a.to01().count('1')) for a in self.randombitarrays(): - self.assertEqual(a.count(), a.count(1)) - self.assertEqual(a.count(1), a.to01().count('1')) - self.assertEqual(a.count(0), a.to01().count('0')) + s = a.to01() + self.assertEqual(a.count(1), s.count('1')) + self.assertEqual(a.count(0), s.count('0')) def test_search(self): From 41f8d294ff3ea95b09921d85a0741033c6ddf8cb Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 18:05:45 -0500 Subject: [PATCH 177/185] DOC: improve formating for bitdiff --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 3b5edc2ed..8c2186a17 100644 --- a/README.rst +++ b/README.rst @@ -522,8 +522,8 @@ Reference ``bitdiff(a, b, /)`` -> int Return the difference between two bitarrays a and b. - This is function does the same as (a ^ b).count(), but is more memory - efficient, as no intermediate bitarray object gets created + This is function does the same as ``(a ^ b).count()``, but is more memory + efficient, as no intermediate bitarray object gets created. ``bits2bytes(n, /)`` -> int From fa5be67f4baedd4f47bf7f457db38d3aa7e26a20 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 18:21:39 -0500 Subject: [PATCH 178/185] add optional start and stop parameters to .count() method --- README.rst | 4 ++-- bitarray/_bitarray.c | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index 8c2186a17..0d04c54ad 100644 --- a/README.rst +++ b/README.rst @@ -357,7 +357,7 @@ Reference Return a copy of the bitarray. -``count(value=True, /)`` -> int +``count(value=True, start=0, stop=, /)`` -> int Return number of occurrences of value (defaults to True) in the bitarray. @@ -522,7 +522,7 @@ Reference ``bitdiff(a, b, /)`` -> int Return the difference between two bitarrays a and b. - This is function does the same as ``(a ^ b).count()``, but is more memory + This is function does the same as (a ^ b).count(), but is more memory efficient, as no intermediate bitarray object gets created. diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index f77e9d3de..3250e4214 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -958,18 +958,24 @@ Return a copy of the bitarray."); static PyObject * bitarray_count(bitarrayobject *self, PyObject *args) { - idx_t n1; + idx_t n1, start = 0, stop = self->nbits; long x = 1; - if (!PyArg_ParseTuple(args, "|i:count", &x)) + if (!PyArg_ParseTuple(args, "|iLL:count", &x, &start, &stop)) return NULL; - n1 = count(self, 0, self->nbits); - return PyLong_FromLongLong(x ? n1 : (self->nbits - n1)); + normalize_index(self->nbits, &start); + normalize_index(self->nbits, &stop); + + if (self->nbits == 0 || start >= stop) + return PyLong_FromLongLong(0); + + n1 = count(self, start, stop); + return PyLong_FromLongLong(x ? n1 : (stop - start - n1)); } PyDoc_STRVAR(count_doc, -"count(value=True, /) -> int\n\ +"count(value=True, start=0, stop=, /) -> int\n\ \n\ Return number of occurrences of value (defaults to True) in the bitarray."); @@ -3173,7 +3179,7 @@ PyDoc_STRVAR(bitdiff_doc, \n\ Return the difference between two bitarrays a and b.\n\ This is function does the same as (a ^ b).count(), but is more memory\n\ -efficient, as no intermediate bitarray object gets created"); +efficient, as no intermediate bitarray object gets created."); static PyObject * From 83b0dc5559ecc03d3e699de9bc4632350c5d0fcf Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 18:25:56 -0500 Subject: [PATCH 179/185] add error message when trying to run update-readme with Python version 2 --- update_readme.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/update_readme.py b/update_readme.py index 0a80e4662..a08caccd0 100644 --- a/update_readme.py +++ b/update_readme.py @@ -1,3 +1,7 @@ +import sys +if not sys.version_info[0] == 3: + sys.exit("This program only runs with Python 3, sorry :-(") + import re import doctest from io import StringIO From 1ccb4aafa1782ac78662a87ccd54db67b2d8a6e2 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 18:32:37 -0500 Subject: [PATCH 180/185] add two new tests for new count() parameters --- README.rst | 4 ++-- bitarray/test_bitarray.py | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 0d04c54ad..9f6474911 100644 --- a/README.rst +++ b/README.rst @@ -60,9 +60,9 @@ Once you have installed the package, you may want to test it:: bitarray version: 1.0.1 3.7.4 (r271:86832, Dec 29 2019) [GCC 4.2.1 (SUSE Linux)] ......................................................................... - ................................................. + ................................................................. ---------------------------------------------------------------------- - Ran 146 tests in 2.164s + Ran 148 tests in 2.229s OK diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index ebfb8a375..1919b98ce 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1423,6 +1423,32 @@ def test_count(self): self.assertEqual(a.count(1), s.count('1')) self.assertEqual(a.count(0), s.count('0')) + def test_count2(self): + N = 37 + a = bitarray(N) + a.setall(1) + for i in range(N): + for j in range(i, N): + self.assertEqual(a.count(1, i, j), j - i) + + def test_count3(self): + a = bitarray('01001100' '01110011' '01') + self.assertEqual(a.count(), 9) + self.assertEqual(a.count(0, 12), 3) + self.assertEqual(a.count(1, -5), 3) + self.assertEqual(a.count(1, 2, 17), 7) + self.assertEqual(a.count(1, 6, 11), 2) + self.assertEqual(a.count(0, 7, -3), 4) + self.assertEqual(a.count(1, 1, -1), 8) + self.assertEqual(a.count(1, 17, 14), 0) + + for a in self.randombitarrays(): + s = a.to01() + i = randint(-3, len(a)+1) + j = randint(-3, len(a)+1) + self.assertEqual(a.count(1, i, j), s[i:j].count('1')) + self.assertEqual(a.count(0, i, j), s[i:j].count('0')) + def test_search(self): a = bitarray('') From 068f3a21a7dfaee36020cc7137362cd101e4632e Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 18:36:32 -0500 Subject: [PATCH 181/185] simplify example by using new funcionality --- examples/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/helpers.py b/examples/helpers.py index 7180c2874..09cfd1abb 100644 --- a/examples/helpers.py +++ b/examples/helpers.py @@ -21,7 +21,7 @@ def find_last(a, value=True): def count_n(a, n): "return the index i for which a[:i].count() == n" - i, j = n, a[:n].count() + i, j = n, a.count(1, 0, n) while j < n: if a[i]: j += 1 From 918eedf96146b980349a55f5a68636d898408f9f Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 18:41:04 -0500 Subject: [PATCH 182/185] update changelog, next version will be 1.1.0 --- CHANGE_LOG | 5 +++-- README.rst | 11 ++++++----- bitarray/__init__.py | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index f6ab3d3ce..5cf6c9f45 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,7 +1,8 @@ -2019-XX-XX 1.0.2: +2019-XX-XX 1.1.0: ------------------- + * add optional start and stop parameters to .count() method * update documentation to use positional-only syntax in docstrings - * update README to pass Python 3 doctest + * update readme to pass Python 3 doctest 2019-07-19 1.0.1: diff --git a/README.rst b/README.rst index 9f6474911..39b073353 100644 --- a/README.rst +++ b/README.rst @@ -47,8 +47,8 @@ Installation bitarray can be installed from source:: - $ tar xzf bitarray-1.0.1.tar.gz - $ cd bitarray-1.0.1 + $ tar xzf bitarray-1.1.0.tar.gz + $ cd bitarray-1.1.0 $ python setup.py install On Unix systems, the latter command may have to be executed with root @@ -57,7 +57,7 @@ Once you have installed the package, you may want to test it:: $ python -c 'import bitarray; bitarray.test()' bitarray is installed in: /usr/local/lib/python2.7/site-packages/bitarray - bitarray version: 1.0.1 + bitarray version: 1.1.0 3.7.4 (r271:86832, Dec 29 2019) [GCC 4.2.1 (SUSE Linux)] ......................................................................... ................................................................. @@ -533,10 +533,11 @@ Reference Change log ---------- -2019-XX-XX 1.0.2: +2019-XX-XX 1.1.0: + * add optional start and stop parameters to .count() method * update documentation to use positional-only syntax in docstrings - * update README to pass Python 3 doctest + * update readme to pass Python 3 doctest **1.0.1** (2019-07-19): diff --git a/bitarray/__init__.py b/bitarray/__init__.py index 5018d1161..708eb8957 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -10,7 +10,7 @@ """ from bitarray._bitarray import _bitarray, bitdiff, bits2bytes, _sysinfo -__version__ = '1.0.1' +__version__ = '1.1.0' class bitarray(_bitarray): From 7df195db8d87174381dd552fae60149834e8567c Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 18:55:51 -0500 Subject: [PATCH 183/185] add official Python 3.8 support --- CHANGE_LOG | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGE_LOG b/CHANGE_LOG index 5cf6c9f45..58afbd8c5 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,6 +1,7 @@ 2019-XX-XX 1.1.0: ------------------- * add optional start and stop parameters to .count() method + * add official Python 3.8 support * update documentation to use positional-only syntax in docstrings * update readme to pass Python 3 doctest diff --git a/setup.py b/setup.py index 25d23b763..933793200 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", "Topic :: Utilities", ], description = "efficient arrays of booleans -- C extension", From aa851917a604a69d30fea4677b16d1da67682ca6 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 23:28:33 -0500 Subject: [PATCH 184/185] improve ValueError excpetion message for bits2bytes when input is negative --- bitarray/_bitarray.c | 4 ++-- bitarray/test_bitarray.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 3250e4214..8ad155a22 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -768,7 +768,7 @@ extend_dispatch(bitarrayobject *self, PyObject *obj) #ifdef IS_PY3K #define IS_INDEX(x) (PyLong_Check(x) || PyIndex_Check(x)) #define IS_INT_OR_BOOL(x) (PyBool_Check(x) || PyLong_Check(x)) -#else +#else /* Py 2 */ #define IS_INDEX(x) (PyInt_Check(x) || PyLong_Check(x) || PyIndex_Check(x)) #define IS_INT_OR_BOOL(x) (PyBool_Check(x) || PyInt_Check(x) || \ PyLong_Check(x)) @@ -3194,7 +3194,7 @@ bits2bytes(PyObject *self, PyObject *v) if (getIndex(v, &n) < 0) return NULL; if (n < 0) { - PyErr_SetString(PyExc_ValueError, "positive value expected"); + PyErr_SetString(PyExc_ValueError, "non-negative integer expected"); return NULL; } return PyLong_FromLongLong(BYTES(n)); diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 1919b98ce..0619f21d3 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -162,6 +162,8 @@ def test_bits2bytes(self): for arg in ['foo', [], None, {}]: self.assertRaises(TypeError, bits2bytes, arg) + self.assertRaises(TypeError, bits2bytes, 187.0) + self.assertRaises(TypeError, bits2bytes, -4.0) self.assertRaises(TypeError, bits2bytes) self.assertRaises(TypeError, bits2bytes, 1, 2) From f088b41fb8ef5dc1db9747697756d17efc7b8681 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Tue, 29 Oct 2019 23:41:49 -0500 Subject: [PATCH 185/185] simplify many tests by adding randombitarrays(start=1) --- bitarray/test_bitarray.py | 44 ++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 0619f21d3..c24cb1054 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -51,8 +51,8 @@ def unicode(*args): class Util(object): - def randombitarrays(self): - for n in list(range(25)) + [randint(1000, 2000)]: + def randombitarrays(self, start=0): + for n in list(range(start, 25)) + [randint(1000, 2000)]: a = bitarray(endian=['little', 'big'][randint(0, 1)]) a.frombytes(os.urandom(bits2bytes(n))) del a[n:] @@ -491,10 +491,8 @@ def test_setitem1(self): self.assertRaises(IndexError, a.__setitem__, -3, False) def test_setitem2(self): - for a in self.randombitarrays(): + for a in self.randombitarrays(start=1): la = len(a) - if la == 0: - continue i = randint(0, la - 1) aa = a.tolist() ida = id(a) @@ -528,9 +526,8 @@ def test_setitem3(self): self.assertRaises(IndexError, a.__setitem__, -6, 'bar') def test_setitem4(self): - for a in self.randombitarrays(): + for a in self.randombitarrays(start=1): la = len(a) - if la == 0: continue for dum in range(50): step = self.rndsliceidx(la) if step == 0: step = None @@ -587,9 +584,8 @@ def test_delitem1(self): self.assertRaises(IndexError, a.__delitem__, -4) def test_delitem2(self): - for a in self.randombitarrays(): + for a in self.randombitarrays(start=1): la = len(a) - if la == 0: continue for dum in range(50): step = self.rndsliceidx(la) if step == 0: step = None @@ -658,17 +654,17 @@ def test_assignment(self): self.assertEqual(a, b) def test_compare(self): - for a in self.randombitarrays(): + for a in self.randombitarrays(start=1): aa = a.tolist() - - for b in self.randombitarrays(): - bb = b.tolist() - self.assertEqual(a == b, aa == bb) - self.assertEqual(a != b, aa != bb) - self.assertEqual(a <= b, aa <= bb) - self.assertEqual(a < b, aa < bb) - self.assertEqual(a >= b, aa >= bb) - self.assertEqual(a > b, aa > bb) + b = a.copy() + b[randint(0, len(a)-1)] = randint(0, 1) + bb = b.tolist() + self.assertEqual(a == b, aa == bb) + self.assertEqual(a != b, aa != bb) + self.assertEqual(a <= b, aa <= bb) + self.assertEqual(a < b, aa < bb) + self.assertEqual(a >= b, aa >= bb) + self.assertEqual(a > b, aa > bb) def test_subclassing(self): class ExaggeratingBitarray(bitarray): @@ -682,12 +678,10 @@ def __init__(self, data, offset): def __getitem__(self, i): return bitarray.__getitem__(self, i - self.offset) - for a in self.randombitarrays(): - if len(a) == 0: - continue + for a in self.randombitarrays(start=1): b = ExaggeratingBitarray(a, 1234) for i in range(len(a)): - self.assertEqual(a[i], b[i+1234]) + self.assertEqual(a[i], b[i + 1234]) def test_endianness1(self): a = bitarray(endian='little') @@ -1661,9 +1655,7 @@ def test_pop(self): self.check_obj(a) self.assertEqual(a.endian(), enda) - for a in self.randombitarrays(): - if len(a) == 0: - continue + for a in self.randombitarrays(start=1): n = randint(-len(a), len(a)-1) aa = a.tolist() self.assertEqual(a.pop(n), aa[n])