Skip to content

Commit 2e35b38

Browse files
upgrade libs versions
1 parent ea46938 commit 2e35b38

22 files changed

Lines changed: 88 additions & 81 deletions

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
.. image:: https://img.shields.io/pepy/dt/verstack
22
:target: https://pypi.org/project/verstack/
33

4-
.. image:: https://img.shields.io/badge/version-4.3.3-success.svg?color=blue
4+
.. image:: https://img.shields.io/badge/version-4.3.4-success.svg?color=blue
55
:target: https://pypi.org/project/verstack/
66

77
.. image:: logo.png

docs/source/index.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
############################
2-
verstack 4.3.3 Documentation
2+
verstack 4.3.4 Documentation
33
############################
44
Machine learning tools to make a Data Scientist's work efficient
55

@@ -1115,7 +1115,7 @@ Impute all missing values in a pandas dataframe by xgboost models in multiproces
11151115

11161116
.. note::
11171117
This is the second major version of `NaNImputer`. The original class (last version 1.4.0)
1118-
had been very popular. The legacy version is `NaNImputerLegacy` has been depresciated after verstack version 4.3.3
1118+
had been very popular. The legacy version is `NaNImputerLegacy` has been depresciated after verstack version 4.3.4
11191119
Differences between the NaNImputer and NaNImputerLegacy:
11201120
- The new NaNImputer is based on LightGBM instead of XGBoost in the legacy version
11211121
- The new NaNImputer is using a single core multithreading instead of multicore legacy version, it is significantly faster

requirements.txt

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
1-
numpy>=1.26.4,<=2.1.1
2-
pandas==2.2.2
3-
scikit-learn>=1.3.2,<=1.5.1
4-
lightgbm>=4.4.0,<=4.5.0
5-
optuna>=3.5.0,<=4.0.0
1+
numpy~=2.3.0
2+
pandas~=2.3.0
3+
scikit-learn~=1.7.1
4+
lightgbm~=4.5.0
5+
optuna~=4.0.0
66
optuna-integration>=3.2.0,<=4.0.0
7-
plotly>=5.11.0,<=5.24.0
8-
matplotlib==3.9.2
9-
seaborn==0.13.2
10-
python-dateutil==2.9.0
11-
holidays==0.56
12-
mlxtend==0.23.1
13-
category_encoders>=2.5.1,<=2.6.3
14-
tabulate==0.9.0
7+
plotly~=5.24.0
8+
matplotlib~=3.9.2
9+
seaborn~=0.13.2
10+
python-dateutil~=2.9.0
11+
holidays~=0.56
12+
mlxtend~=0.23.1
13+
category_encoders~=2.6.3
14+
tabulate~=0.9.0
1515
# following packages for testing purposes only
16-
# fastparquet==2024.5.0
17-
# pytest==8.3.2
16+
# fastparquet~=2024.5.0
17+
# pytest~=8.3.2

verstack/categoric_encoders/WeightOfEvidenceEncoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pandas as pd
22
import numpy as np
3+
from category_encoders import WOEEncoder
34
from verstack.categoric_encoders.args_validators import is_bool_na_sentinel, assert_fit_transform_args, assert_transform_args, assert_binary_target
45
from verstack.tools import Printer
56

@@ -81,7 +82,6 @@ def fit_transform(self, df, colname, targetname):
8182
assert_binary_target(df, targetname)
8283
encoded_df = df.copy()
8384
self._colname = colname
84-
from category_encoders import WOEEncoder
8585
generic_encoder = WOEEncoder(**self._params)
8686
encoded_column = generic_encoder.fit_transform(df[colname], df[targetname])
8787
self.__generic_encoder = generic_encoder

verstack/lgbm_optuna_tuning/lgb_metrics.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
import numpy as np
1010
from sklearn.metrics import mean_absolute_error as mae
11+
from sklearn.metrics import root_mean_squared_error as rmse
1112
from sklearn.metrics import mean_squared_error as mse
13+
from sklearn.metrics import root_mean_squared_log_error as rmsle
1214
from sklearn.metrics import mean_squared_log_error as msle
1315
from sklearn.metrics import r2_score as r2
1416
from sklearn.metrics import roc_auc_score as auc
@@ -86,17 +88,24 @@ def _percentage_error(real, pred):
8688
# mae imported from sklearn
8789
# ------------------------------------------------------------------------------
8890

89-
def rmse(real, pred):
90-
return mse(real, pred, squared = False)
91+
# rmse imported from sklearn
9192
# ------------------------------------------------------------------------------
9293

93-
def rmsle(real, pred):
94-
'''Changes negative predictions to 0 for correct calculation'''
95-
try:
96-
return msle(real, pred, squared = False)
97-
except ValueError:
98-
pred_non_negative, real_non_negative = remove_negatives(pred, real)
99-
return msle(real_non_negative, pred_non_negative, squared = False)
94+
# rmsle imported from sklearn
95+
# ------------------------------------------------------------------------------
96+
97+
# def rmse(real, pred):
98+
# return mse(real, pred, squared = False)
99+
# ------------------------------------------------------------------------------
100+
101+
# def rmsle(real, pred):
102+
# '''Changes negative predictions to 0 for correct calculation'''
103+
# try:
104+
# return msle(real, pred, squared = False)
105+
# except ValueError:
106+
# pred_non_negative, real_non_negative = remove_negatives(pred, real)
107+
# return msle(real_non_negative, pred_non_negative, squared = False)
108+
100109

101110
# ------------------------------------------------------------------------------
102111

verstack/tests/conftest.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import sys
2+
import os
3+
4+
# Get the verstack directory (parent of tests)
5+
verstack_dir = os.path.dirname(os.path.dirname(__file__))
6+
tests_dir = os.path.dirname(__file__)
7+
8+
# Remove any existing verstack-related paths from sys.path
9+
sys.path = [p for p in sys.path if not (
10+
'verstack' in p and ('site-packages' in p or 'dist-packages' in p)
11+
)]
12+
13+
# Insert local paths at the very beginning
14+
sys.path.insert(0, verstack_dir)
15+
sys.path.insert(0, tests_dir)
16+
17+
print(f"Verstack dir: {verstack_dir}")
18+
print(f"Tests dir: {tests_dir}")
19+
print(f"Cleaned sys.path first 5: {sys.path[:5]}")
20+
21+
# Debug: Check what's in the directories
22+
print(f"Contents of verstack_dir: {os.listdir(verstack_dir)}")
23+
if os.path.exists(os.path.join(verstack_dir, 'lgbm_optuna_tuning')):
24+
print(f"lgbm_optuna_tuning exists: {os.path.exists(os.path.join(verstack_dir, 'lgbm_optuna_tuning', 'LGBMTuner.py'))}")
25+
if os.path.exists(os.path.join(verstack_dir, 'categoric_encoders')):
26+
print(f"categoric_encoders exists: {os.path.exists(os.path.join(verstack_dir, 'categoric_encoders', 'WeightOfEvidenceEncoder.py'))}")

verstack/tests/test_DateParser.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,31 @@
1-
import sys
21
import numpy as np
32
import pandas as pd
3+
import os
44

5-
sys.path.append("../")
65
from DateParser import DateParser
76

7+
# Get the directory where this test file is located
8+
test_dir = os.path.dirname(__file__)
9+
810
datasets = {
911
1: {
10-
"train": "dateparser_train_1.parquet",
11-
"test": "dateparser_test_1.parquet",
12+
"train": os.path.join(test_dir, "dateparser_train_1.parquet"),
13+
"test": os.path.join(test_dir, "dateparser_test_1.parquet"),
1214
},
1315
2: {
14-
"train": "dateparser_train_2.parquet",
15-
"test": "dateparser_test_2.parquet",
16+
"train": os.path.join(test_dir, "dateparser_train_2.parquet"),
17+
"test": os.path.join(test_dir, "dateparser_test_2.parquet"),
1618
},
1719
3: {
18-
"train": "dateparser_train_3.parquet",
19-
"test": "dateparser_test_3.parquet",
20+
"train": os.path.join(test_dir, "dateparser_train_3.parquet"),
21+
"test": os.path.join(test_dir, "dateparser_test_3.parquet"),
2022
},
2123
4: {
22-
"train": "dateparser_train_4.parquet",
23-
"test": "dateparser_test_4.parquet",
24+
"train": os.path.join(test_dir, "dateparser_train_4.parquet"),
25+
"test": os.path.join(test_dir, "dateparser_test_4.parquet"),
2426
},
2527
}
2628

27-
2829
# test overall DateParser not being broken
2930
def test_DateParser():
3031
result = []
@@ -37,4 +38,4 @@ def test_DateParser():
3738
result.append(
3839
np.all(transformed_train.columns == transformed_test.columns)
3940
)
40-
assert result
41+
assert result

verstack/tests/test_DateParser_new.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
import pandas as pd
2-
import numpy as np
3-
import sys
4-
5-
sys.path.append("../")
62
from DateParser import DateParser
73

84
def test_all_date_formats_and_transform():

verstack/tests/test_Factorizer.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import sys
2-
sys.path.append("../")
31
import numpy as np
42
from common import generate_data
53
from categoric_encoders.Factorizer import Factorizer

verstack/tests/test_FrequencyEncoder.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import sys
2-
sys.path.append("../")
31
import numpy as np
42
from common import generate_data
53
from categoric_encoders.FrequencyEncoder import FrequencyEncoder

0 commit comments

Comments
 (0)