Hi, I'm getting the error below when running the factorize step. I presume there's some confusion about when to input all features or 2000 over-dispersed features. Can you advise?
Cell In[9], line 12
9 # these steps take long
11 cnmf_run.factorize(verbose=True, total_workers=threads)
---> 12 cnmf_run.postprocess(cpus=1)
14 # Merges cNMF results into the `Dataset` object
15 rna.add_cnmf_results(cnmf_output_dir=cnmf_results_dir, cnmf_name=run_name)
File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:736, in cNMF.postprocess(self, cpus, local_density_threshold, local_neighborhood_size, skip_missing_iterations)
734 elif cpus == 1:
735 for k in sorted(set(run_params.n_components)):
--> 736 call_consensus(k)
737 else:
738 logging.error(f"{cpus} is an invalid number of cpus. Please specify a positive integer.")
File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:627, in cNMF.get_and_check_consensus(self, k, local_density_threshold, local_neighborhood_size)
625 def get_and_check_consensus(self, k, local_density_threshold, local_neighborhood_size):
626 logging.info(f"Creating consensus GEPs and usages for k={k}")
--> 627 self.consensus(k, density_threshold=local_density_threshold,
628 local_neighborhood_size=local_neighborhood_size,
629 show_clustering=True,
630 close_clustergram_fig=True)
631 density_threshold_repl = str(local_density_threshold).replace(".", "_")
632 filenames = [
633 self.paths['consensus_spectra']%(k, density_threshold_repl),
634 self.paths['consensus_spectra']%(k, density_threshold_repl),
(...)
642 self.paths['gene_spectra_score__txt']%(k, density_threshold_repl)
643 ]
File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:458, in cNMF.consensus(self, k, density_threshold, local_neighborhood_size, show_clustering, skip_density_and_return_after_stats, close_clustergram_fig, refit_usage)
455 stability = silhouette_score(l2_spectra.values, kmeans_cluster_labels, metric='euclidean')
457 # Obtain reconstructed count matrix by re-fitting usage and computing dot product: usage.dot(spectra)
--> 458 rf_usages = self.refit_usage(norm_counts.X, median_spectra)
459 rf_usages = pd.DataFrame(rf_usages, index=norm_counts.obs.index, columns=median_spectra.index)
460 rf_pred_norm_counts = rf_usages.dot(median_spectra)
File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:383, in cNMF.refit_usage(self, X, spectra)
380 else:
381 refit_nmf_kwargs.update(dict(n_components = spectra.shape[0], H = spectra, update_H = False))
--> 383 _, rf_usages = self._nmf(X, nmf_kwargs=refit_nmf_kwargs)
384 if (type(X) is pd.DataFrame) and (type(spectra) is pd.DataFrame):
385 rf_usages = pd.DataFrame(rf_usages, index=X.index, columns=spectra.index)
File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:286, in cNMF._nmf(self, X, nmf_kwargs)
278 def _nmf(self, X, nmf_kwargs):
279 """
280
281 :param X: Normalized counts dataFrame to be factorized.
(...)
284 :type nmf_kwargs: dict
285 """
--> 286 (usages, spectra, niter) = non_negative_factorization(X, **nmf_kwargs)
288 return(spectra, usages)
File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/utils/_param_validation.py:213, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
207 try:
208 with config_context(
209 skip_parameter_validation=(
210 prefer_skip_nested_validation or global_skip_validation
211 )
212 ):
--> 213 return func(*args, **kwargs)
214 except InvalidParameterError as e:
215 # When the function is just a wrapper around an estimator, we allow
216 # the function to delegate validation to the estimator, but we replace
217 # the name of the estimator by the name of the function in the error
218 # message to avoid confusion.
219 msg = re.sub(
220 r"parameter of \w+ must be",
221 f"parameter of {func.__qualname__} must be",
222 str(e),
223 )
File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/decomposition/_nmf.py:1133, in non_negative_factorization(X, W, H, n_components, init, update_H, solver, beta_loss, tol, max_iter, alpha_W, alpha_H, l1_ratio, random_state, verbose, shuffle)
1130 X = check_array(X, accept_sparse=("csr", "csc"), dtype=[np.float64, np.float32])
1132 with config_context(assume_finite=True):
-> 1133 W, H, n_iter = est._fit_transform(X, W=W, H=H, update_H=update_H)
1135 return W, H, n_iter
File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/decomposition/_nmf.py:1730, in NMF._fit_transform(self, X, y, W, H, update_H)
1723 raise ValueError(
1724 "When beta_loss <= 0 and X contains zeros, "
1725 "the solver may diverge. Please add small values "
1726 "to X, or use a positive beta_loss."
1727 )
1729 # initialize or check W and H
-> 1730 W, H = self._check_w_h(X, W, H, update_H)
1732 # scale the regularization terms
1733 l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._compute_regularization(X)
File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/decomposition/_nmf.py:1237, in _BaseNMF._check_w_h(self, X, W, H, update_H)
1231 if W is not None:
1232 warnings.warn(
1233 "When update_H=False, the provided initial W is not used.",
1234 RuntimeWarning,
1235 )
-> 1237 _check_init(H, (self._n_components, n_features), "NMF (input H)")
1238 if self._n_components == "auto":
1239 self._n_components = H.shape[0]
File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/decomposition/_nmf.py:79, in _check_init(A, shape, whom)
74 raise ValueError(
75 f"Array with wrong first dimension passed to {whom}. Expected {shape[0]}, "
76 f"but got {A.shape[0]}."
77 )
78 if shape[1] != "auto" and A.shape[1] != shape[1]:
---> 79 raise ValueError(
80 f"Array with wrong second dimension passed to {whom}. Expected {shape[1]}, "
81 f"but got {A.shape[1]}."
82 )
83 check_non_negative(A, whom)
84 if np.max(A) == 0:
ValueError: Array with wrong second dimension passed to NMF (input H). Expected 10672, but got 2000.
Hi, I'm getting the error below when running the factorize step. I presume there's some confusion about when to input all features or 2000 over-dispersed features. Can you advise?