diff --git a/ipfn/ipfn.py b/ipfn/ipfn.py index 6b2181b..db4a19a 100755 --- a/ipfn/ipfn.py +++ b/ipfn/ipfn.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python from __future__ import print_function import numpy as np import pandas as pd @@ -41,7 +40,7 @@ def __init__(self, original, aggregates, dimensions, weight_col='total', self.conv_rate = convergence_rate self.max_itr = max_iteration if verbose not in [0, 1, 2]: - raise(ValueError(f"wrong verbose input, must be either 0, 1 or 2 but got {verbose}")) + raise ValueError(f"wrong verbose input, must be either 0, 1 or 2 but got {verbose}") self.verbose = verbose self.rate_tolerance = rate_tolerance @@ -74,7 +73,7 @@ def ipfn_np(self, m, aggregates, dimensions, weight_col='total'): m = IPF.iteration() """ - # Check that the inputs are numpay arrays of floats + # Check that the inputs are numpy arrays of floats inc = 0 for aggregate in aggregates: if not isinstance(aggregate, np.ndarray): @@ -142,7 +141,7 @@ def ipfn_np(self, m, aggregates, dimensions, weight_col='total'): ori_ijk = aggregates[inc][item] m_slice = m[idx] m_ijk = m_slice.sum() - # print('Current vs original', abs(m_ijk/ori_ijk - 1)) + # print("Current vs original", abs(m_ijk/ori_ijk - 1)) if abs(m_ijk / ori_ijk - 1) > max_conv: max_conv = abs(m_ijk / ori_ijk - 1) @@ -254,34 +253,34 @@ def iteration(self): """ Runs the ipfn algorithm. Automatically detects of working with numpy ndarray or pandas dataframes. """ - - i = 0 - conv = np.inf - old_conv = -np.inf + old_conv = np.inf conv_list = [] + converged = 1 m = self.original - # If the original data input is in pandas DataFrame format + # Prepare input data if isinstance(self.original, pd.DataFrame): ipfn_method = self.ipfn_df elif isinstance(self.original, np.ndarray): ipfn_method = self.ipfn_np self.original = self.original.astype('float64') else: - raise(ValueError(f'Data input instance not recognized. The input matrix is not a numpy array or pandas DataFrame')) - while ((i <= self.max_itr and conv > self.conv_rate) and (i <= self.max_itr and abs(conv - old_conv) > self.rate_tolerance)): - old_conv = conv + raise ValueError(f"Data input instance not recognized. The input matrix is not a numpy array or pandas DataFrame") + + # Run iterations + for i in range(self.max_itr): m, conv = ipfn_method(m, self.aggregates, self.dimensions, self.weight_col) conv_list.append(conv) - i += 1 - converged = 1 - if i <= self.max_itr: - if (not conv > self.conv_rate) & (self.verbose > 1): - print('ipfn converged: convergence_rate below threshold') - elif not abs(conv - old_conv) > self.rate_tolerance: - print('ipfn converged: convergence_rate not updating or below rate_tolerance') + if conv <= self.conv_rate: + if self.verbose > 1: + print("ipfn converged: convergence_rate below threshold") + break + if abs(conv - old_conv) <= self.rate_tolerance: + print("ipfn converged: convergence_rate not updating or below rate_tolerance") + break + old_conv = conv else: - print('Maximum iterations reached') + print("Maximum iterations reached") converged = 0 # Handle the verbose @@ -290,6 +289,6 @@ def iteration(self): elif self.verbose == 1: return m, converged elif self.verbose == 2: - return m, converged, pd.DataFrame({'iteration': range(i), 'conv': conv_list}).set_index('iteration') + return m, converged, pd.DataFrame({'iteration': range(1, i+2), 'conv': conv_list}).set_index('iteration') else: - raise(ValueError(f'wrong verbose input, must be either 0, 1 or 2 but got {self.verbose}')) + raise ValueError(f"wrong verbose input, must be either 0, 1 or 2 but got {self.verbose}")