Error when running factorize in python notebook

Hi, I'm getting the error below when running the factorize step. I presume there's some confusion about when to input all features or 2000 over-dispersed features. Can you advise?

```
Cell In[9], line 12
      9 # these steps take long
     11 cnmf_run.factorize(verbose=True, total_workers=threads)
---> 12 cnmf_run.postprocess(cpus=1)
     14 # Merges cNMF results into the `Dataset` object
     15 rna.add_cnmf_results(cnmf_output_dir=cnmf_results_dir, cnmf_name=run_name)

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:736, in cNMF.postprocess(self, cpus, local_density_threshold, local_neighborhood_size, skip_missing_iterations)
    734 elif cpus == 1:
    735     for k in sorted(set(run_params.n_components)):
--> 736         call_consensus(k)
    737 else:
    738     logging.error(f"{cpus} is an invalid number of cpus. Please specify a positive integer.")

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:627, in cNMF.get_and_check_consensus(self, k, local_density_threshold, local_neighborhood_size)
    625 def get_and_check_consensus(self, k, local_density_threshold, local_neighborhood_size):
    626     logging.info(f"Creating consensus GEPs and usages for k={k}")
--> 627     self.consensus(k, density_threshold=local_density_threshold,
    628         local_neighborhood_size=local_neighborhood_size,
    629         show_clustering=True,
    630         close_clustergram_fig=True)
    631     density_threshold_repl = str(local_density_threshold).replace(".", "_")
    632     filenames = [
    633         self.paths['consensus_spectra']%(k, density_threshold_repl),
    634         self.paths['consensus_spectra']%(k, density_threshold_repl),
   (...)
    642         self.paths['gene_spectra_score__txt']%(k, density_threshold_repl)
    643         ]

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:458, in cNMF.consensus(self, k, density_threshold, local_neighborhood_size, show_clustering, skip_density_and_return_after_stats, close_clustergram_fig, refit_usage)
    455 stability = silhouette_score(l2_spectra.values, kmeans_cluster_labels, metric='euclidean')
    457 # Obtain reconstructed count matrix by re-fitting usage and computing dot product: usage.dot(spectra)
--> 458 rf_usages = self.refit_usage(norm_counts.X, median_spectra)
    459 rf_usages = pd.DataFrame(rf_usages, index=norm_counts.obs.index, columns=median_spectra.index)        
    460 rf_pred_norm_counts = rf_usages.dot(median_spectra)

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:383, in cNMF.refit_usage(self, X, spectra)
    380 else:
    381     refit_nmf_kwargs.update(dict(n_components = spectra.shape[0], H = spectra, update_H = False))
--> 383 _, rf_usages = self._nmf(X, nmf_kwargs=refit_nmf_kwargs)
    384 if (type(X) is pd.DataFrame) and (type(spectra) is pd.DataFrame):
    385     rf_usages = pd.DataFrame(rf_usages, index=X.index, columns=spectra.index)

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:286, in cNMF._nmf(self, X, nmf_kwargs)
    278 def _nmf(self, X, nmf_kwargs):
    279     """
    280 
    281     :param X: Normalized counts dataFrame to be factorized.
   (...)
    284     :type nmf_kwargs: dict
    285     """
--> 286     (usages, spectra, niter) = non_negative_factorization(X, **nmf_kwargs)
    288     return(spectra, usages)

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/utils/_param_validation.py:213, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    207 try:
    208     with config_context(
    209         skip_parameter_validation=(
    210             prefer_skip_nested_validation or global_skip_validation
    211         )
    212     ):
--> 213         return func(*args, **kwargs)
    214 except InvalidParameterError as e:
    215     # When the function is just a wrapper around an estimator, we allow
    216     # the function to delegate validation to the estimator, but we replace
    217     # the name of the estimator by the name of the function in the error
    218     # message to avoid confusion.
    219     msg = re.sub(
    220         r"parameter of \w+ must be",
    221         f"parameter of {func.__qualname__} must be",
    222         str(e),
    223     )

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/decomposition/_nmf.py:1133, in non_negative_factorization(X, W, H, n_components, init, update_H, solver, beta_loss, tol, max_iter, alpha_W, alpha_H, l1_ratio, random_state, verbose, shuffle)
   1130 X = check_array(X, accept_sparse=("csr", "csc"), dtype=[np.float64, np.float32])
   1132 with config_context(assume_finite=True):
-> 1133     W, H, n_iter = est._fit_transform(X, W=W, H=H, update_H=update_H)
   1135 return W, H, n_iter

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/decomposition/_nmf.py:1730, in NMF._fit_transform(self, X, y, W, H, update_H)
   1723     raise ValueError(
   1724         "When beta_loss <= 0 and X contains zeros, "
   1725         "the solver may diverge. Please add small values "
   1726         "to X, or use a positive beta_loss."
   1727     )
   1729 # initialize or check W and H
-> 1730 W, H = self._check_w_h(X, W, H, update_H)
   1732 # scale the regularization terms
   1733 l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._compute_regularization(X)

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/decomposition/_nmf.py:1237, in _BaseNMF._check_w_h(self, X, W, H, update_H)
   1231 if W is not None:
   1232     warnings.warn(
   1233         "When update_H=False, the provided initial W is not used.",
   1234         RuntimeWarning,
   1235     )
-> 1237 _check_init(H, (self._n_components, n_features), "NMF (input H)")
   1238 if self._n_components == "auto":
   1239     self._n_components = H.shape[0]

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/decomposition/_nmf.py:79, in _check_init(A, shape, whom)
     74     raise ValueError(
     75         f"Array with wrong first dimension passed to {whom}. Expected {shape[0]}, "
     76         f"but got {A.shape[0]}."
     77     )
     78 if shape[1] != "auto" and A.shape[1] != shape[1]:
---> 79     raise ValueError(
     80         f"Array with wrong second dimension passed to {whom}. Expected {shape[1]}, "
     81         f"but got {A.shape[1]}."
     82     )
     83 check_non_negative(A, whom)
     84 if np.max(A) == 0:

ValueError: Array with wrong second dimension passed to NMF (input H). Expected 10672, but got 2000.
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Error when running factorize in python notebook #23

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Error when running factorize in python notebook #23

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions