Example of error I get for "otsu" (I have the same error for "3k" unless I set plot to False):
region_bin_topics_otsu = binarize_topics(
cistopic_obj, method='otsu',
plot=False, num_columns=5)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[14], line 1
----> 1 region_bin_topics_otsu = binarize_topics(
2 cistopic_obj, method='otsu',
3 plot=False, num_columns=5
4 )
File ~/miniconda3/envs/pycistopic_alone/lib/python3.11/site-packages/pycisTopic/topic_binarization.py:108, in binarize_topics(cistopic_obj, target, method, smooth_topics, ntop, predefined_thr, nbins, plot, figsize, num_columns, save)
106 thr = predefined_thr["Topic" + str(i + 1)]
107 elif method == "otsu":
--> 108 thr = threshold_otsu(l_norm, nbins=nbins)
109 elif method == "yen":
110 thr = threshold_yen(l_norm, nbins=nbins)
File ~/miniconda3/envs/pycistopic_alone/lib/python3.11/site-packages/pycisTopic/topic_binarization.py:268, in threshold_otsu(array, nbins)
247 def threshold_otsu(array, nbins=100):
248 """
249 Apply Otsu threshold on topic-region distributions [Otsu, 1979].
250
(...)
266 cybernetics, 9(1), pp.62-66.
267 """
--> 268 hist, bin_centers = histogram(array, nbins)
269 hist = hist.astype(float)
270 # Class probabilities for all possible thresholds
File ~/miniconda3/envs/pycistopic_alone/lib/python3.11/site-packages/pycisTopic/topic_binarization.py:336, in histogram(array, nbins)
320 """
321 Draw histogram from distribution and identify centers.
322
(...)
333 Histogram values and bin centers.
334 """
335 array = array.ravel().flatten()
--> 336 hist, bin_edges = np.histogram(array, bins=nbins, range=None)
337 bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2.0
338 return hist, bin_centers
File ~/miniconda3/envs/pycistopic_alone/lib/python3.11/site-packages/numpy/lib/histograms.py:780, in histogram(a, bins, range, density, weights)
680 r"""
681 Compute the histogram of a dataset.
682
(...)
776
777 """
778 a, weights = _ravel_and_check_weights(a, weights)
--> 780 bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights)
782 # Histogram is an integer or a float array depending on the weights.
783 if weights is None:
File ~/miniconda3/envs/pycistopic_alone/lib/python3.11/site-packages/numpy/lib/histograms.py:426, in _get_bin_edges(a, bins, range, weights)
423 if n_equal_bins < 1:
424 raise ValueError('`bins` must be positive, when an integer')
--> 426 first_edge, last_edge = _get_outer_edges(a, range)
428 elif np.ndim(bins) == 1:
429 bin_edges = np.asarray(bins)
File ~/miniconda3/envs/pycistopic_alone/lib/python3.11/site-packages/numpy/lib/histograms.py:323, in _get_outer_edges(a, range)
321 first_edge, last_edge = a.min(), a.max()
322 if not (np.isfinite(first_edge) and np.isfinite(last_edge)):
--> 323 raise ValueError(
324 "autodetected range of [{}, {}] is not finite".format(first_edge, last_edge))
326 # expand empty range to avoid divide by zero
327 if first_edge == last_edge:
ValueError: autodetected range of [nan, nan] is not finite
Hi,
I used the polars_1xx branch to run models, which worked well, but then I get the following error when I attempt to binarize the topics with "3k" and "otsu" methods:
Originally posted by @Sebdumas in #195