From 2d8e977337782e46cd189b7211bb0965c089e905 Mon Sep 17 00:00:00 2001
From: Maarten Bosmans <mkbosmans@gmail.com>
Date: Mon, 28 Nov 2022 20:23:25 +0100
Subject: [PATCH 1/2] Some small cleanup

User-visible strings should be quoted with " and ' is for internal strings.
---
 ipfn/ipfn.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/ipfn/ipfn.py b/ipfn/ipfn.py
index 6b2181b..3d9e8b0 100755
--- a/ipfn/ipfn.py
+++ b/ipfn/ipfn.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 from __future__ import print_function
 import numpy as np
 import pandas as pd
@@ -41,7 +40,7 @@ def __init__(self, original, aggregates, dimensions, weight_col='total',
         self.conv_rate = convergence_rate
         self.max_itr = max_iteration
         if verbose not in [0, 1, 2]:
-            raise(ValueError(f"wrong verbose input, must be either 0, 1 or 2 but got {verbose}"))
+            raise ValueError(f"wrong verbose input, must be either 0, 1 or 2 but got {verbose}")
         self.verbose = verbose
         self.rate_tolerance = rate_tolerance
 
@@ -74,7 +73,7 @@ def ipfn_np(self, m, aggregates, dimensions, weight_col='total'):
         m = IPF.iteration()
         """
 
-        # Check that the inputs are numpay arrays of floats
+        # Check that the inputs are numpy arrays of floats
         inc = 0
         for aggregate in aggregates:
             if not isinstance(aggregate, np.ndarray):
@@ -142,7 +141,7 @@ def ipfn_np(self, m, aggregates, dimensions, weight_col='total'):
                 ori_ijk = aggregates[inc][item]
                 m_slice = m[idx]
                 m_ijk = m_slice.sum()
-                # print('Current vs original', abs(m_ijk/ori_ijk - 1))
+                # print("Current vs original", abs(m_ijk/ori_ijk - 1))
                 if abs(m_ijk / ori_ijk - 1) > max_conv:
                     max_conv = abs(m_ijk / ori_ijk - 1)
 
@@ -268,7 +267,7 @@ def iteration(self):
             ipfn_method = self.ipfn_np
             self.original = self.original.astype('float64')
         else:
-            raise(ValueError(f'Data input instance not recognized. The input matrix is not a numpy array or pandas DataFrame'))
+            raise ValueError(f"Data input instance not recognized. The input matrix is not a numpy array or pandas DataFrame")
         while ((i <= self.max_itr and conv > self.conv_rate) and (i <= self.max_itr and abs(conv - old_conv) > self.rate_tolerance)):
             old_conv = conv
             m, conv = ipfn_method(m, self.aggregates, self.dimensions, self.weight_col)
@@ -277,11 +276,11 @@ def iteration(self):
         converged = 1
         if i <= self.max_itr:
             if (not conv > self.conv_rate) & (self.verbose > 1):
-                print('ipfn converged: convergence_rate below threshold')
+                print("ipfn converged: convergence_rate below threshold")
             elif not abs(conv - old_conv) > self.rate_tolerance:
-                print('ipfn converged: convergence_rate not updating or below rate_tolerance')
+                print("ipfn converged: convergence_rate not updating or below rate_tolerance")
         else:
-            print('Maximum iterations reached')
+            print("Maximum iterations reached")
             converged = 0
 
         # Handle the verbose
@@ -292,4 +291,4 @@ def iteration(self):
         elif self.verbose == 2:
             return m, converged, pd.DataFrame({'iteration': range(i), 'conv': conv_list}).set_index('iteration')
         else:
-            raise(ValueError(f'wrong verbose input, must be either 0, 1 or 2 but got {self.verbose}'))
+            raise ValueError(f"wrong verbose input, must be either 0, 1 or 2 but got {self.verbose}")

From ae61437248b4fa7665589e8da09212943b32b251 Mon Sep 17 00:00:00 2001
From: Maarten Bosmans <mkbosmans@gmail.com>
Date: Mon, 28 Nov 2022 20:29:28 +0100
Subject: [PATCH 2/2] Rework iteration loop

The max_itr is now more explicit and the convergence criteria are only
checked in one place instead of in the while condition and after the loop.

The loop now actually stops after doing max_itr iterations, where previously
it would do iterations numbered 0 to max_itr, i.e. one iteration too many.
---
 ipfn/ipfn.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/ipfn/ipfn.py b/ipfn/ipfn.py
index 3d9e8b0..db4a19a 100755
--- a/ipfn/ipfn.py
+++ b/ipfn/ipfn.py
@@ -253,14 +253,12 @@ def iteration(self):
         """
         Runs the ipfn algorithm. Automatically detects of working with numpy ndarray or pandas dataframes.
         """
-
-        i = 0
-        conv = np.inf
-        old_conv = -np.inf
+        old_conv = np.inf
         conv_list = []
+        converged = 1
         m = self.original
 
-        # If the original data input is in pandas DataFrame format
+        # Prepare input data
         if isinstance(self.original, pd.DataFrame):
             ipfn_method = self.ipfn_df
         elif isinstance(self.original, np.ndarray):
@@ -268,17 +266,19 @@ def iteration(self):
             self.original = self.original.astype('float64')
         else:
             raise ValueError(f"Data input instance not recognized. The input matrix is not a numpy array or pandas DataFrame")
-        while ((i <= self.max_itr and conv > self.conv_rate) and (i <= self.max_itr and abs(conv - old_conv) > self.rate_tolerance)):
-            old_conv = conv
+
+        # Run iterations
+        for i in range(self.max_itr):
             m, conv = ipfn_method(m, self.aggregates, self.dimensions, self.weight_col)
             conv_list.append(conv)
-            i += 1
-        converged = 1
-        if i <= self.max_itr:
-            if (not conv > self.conv_rate) & (self.verbose > 1):
-                print("ipfn converged: convergence_rate below threshold")
-            elif not abs(conv - old_conv) > self.rate_tolerance:
+            if conv <= self.conv_rate:
+                if self.verbose > 1:
+                    print("ipfn converged: convergence_rate below threshold")
+                break
+            if abs(conv - old_conv) <= self.rate_tolerance:
                 print("ipfn converged: convergence_rate not updating or below rate_tolerance")
+                break
+            old_conv = conv
         else:
             print("Maximum iterations reached")
             converged = 0
@@ -289,6 +289,6 @@ def iteration(self):
         elif self.verbose == 1:
             return m, converged
         elif self.verbose == 2:
-            return m, converged, pd.DataFrame({'iteration': range(i), 'conv': conv_list}).set_index('iteration')
+            return m, converged, pd.DataFrame({'iteration': range(1, i+2), 'conv': conv_list}).set_index('iteration')
         else:
             raise ValueError(f"wrong verbose input, must be either 0, 1 or 2 but got {self.verbose}")