Skip to content

Error when running factorize in python notebook #23

@whtns

Description

@whtns

Hi, I'm getting the error below when running the factorize step. I presume there's some confusion about when to input all features or 2000 over-dispersed features. Can you advise?

Cell In[9], line 12
      9 # these steps take long
     11 cnmf_run.factorize(verbose=True, total_workers=threads)
---> 12 cnmf_run.postprocess(cpus=1)
     14 # Merges cNMF results into the `Dataset` object
     15 rna.add_cnmf_results(cnmf_output_dir=cnmf_results_dir, cnmf_name=run_name)

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:736, in cNMF.postprocess(self, cpus, local_density_threshold, local_neighborhood_size, skip_missing_iterations)
    734 elif cpus == 1:
    735     for k in sorted(set(run_params.n_components)):
--> 736         call_consensus(k)
    737 else:
    738     logging.error(f"{cpus} is an invalid number of cpus. Please specify a positive integer.")

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:627, in cNMF.get_and_check_consensus(self, k, local_density_threshold, local_neighborhood_size)
    625 def get_and_check_consensus(self, k, local_density_threshold, local_neighborhood_size):
    626     logging.info(f"Creating consensus GEPs and usages for k={k}")
--> 627     self.consensus(k, density_threshold=local_density_threshold,
    628         local_neighborhood_size=local_neighborhood_size,
    629         show_clustering=True,
    630         close_clustergram_fig=True)
    631     density_threshold_repl = str(local_density_threshold).replace(".", "_")
    632     filenames = [
    633         self.paths['consensus_spectra']%(k, density_threshold_repl),
    634         self.paths['consensus_spectra']%(k, density_threshold_repl),
   (...)
    642         self.paths['gene_spectra_score__txt']%(k, density_threshold_repl)
    643         ]

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:458, in cNMF.consensus(self, k, density_threshold, local_neighborhood_size, show_clustering, skip_density_and_return_after_stats, close_clustergram_fig, refit_usage)
    455 stability = silhouette_score(l2_spectra.values, kmeans_cluster_labels, metric='euclidean')
    457 # Obtain reconstructed count matrix by re-fitting usage and computing dot product: usage.dot(spectra)
--> 458 rf_usages = self.refit_usage(norm_counts.X, median_spectra)
    459 rf_usages = pd.DataFrame(rf_usages, index=norm_counts.obs.index, columns=median_spectra.index)        
    460 rf_pred_norm_counts = rf_usages.dot(median_spectra)

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:383, in cNMF.refit_usage(self, X, spectra)
    380 else:
    381     refit_nmf_kwargs.update(dict(n_components = spectra.shape[0], H = spectra, update_H = False))
--> 383 _, rf_usages = self._nmf(X, nmf_kwargs=refit_nmf_kwargs)
    384 if (type(X) is pd.DataFrame) and (type(spectra) is pd.DataFrame):
    385     rf_usages = pd.DataFrame(rf_usages, index=X.index, columns=spectra.index)

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/mosaicmpi/cnmf.py:286, in cNMF._nmf(self, X, nmf_kwargs)
    278 def _nmf(self, X, nmf_kwargs):
    279     """
    280 
    281     :param X: Normalized counts dataFrame to be factorized.
   (...)
    284     :type nmf_kwargs: dict
    285     """
--> 286     (usages, spectra, niter) = non_negative_factorization(X, **nmf_kwargs)
    288     return(spectra, usages)

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/utils/_param_validation.py:213, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    207 try:
    208     with config_context(
    209         skip_parameter_validation=(
    210             prefer_skip_nested_validation or global_skip_validation
    211         )
    212     ):
--> 213         return func(*args, **kwargs)
    214 except InvalidParameterError as e:
    215     # When the function is just a wrapper around an estimator, we allow
    216     # the function to delegate validation to the estimator, but we replace
    217     # the name of the estimator by the name of the function in the error
    218     # message to avoid confusion.
    219     msg = re.sub(
    220         r"parameter of \w+ must be",
    221         f"parameter of {func.__qualname__} must be",
    222         str(e),
    223     )

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/decomposition/_nmf.py:1133, in non_negative_factorization(X, W, H, n_components, init, update_H, solver, beta_loss, tol, max_iter, alpha_W, alpha_H, l1_ratio, random_state, verbose, shuffle)
   1130 X = check_array(X, accept_sparse=("csr", "csc"), dtype=[np.float64, np.float32])
   1132 with config_context(assume_finite=True):
-> 1133     W, H, n_iter = est._fit_transform(X, W=W, H=H, update_H=update_H)
   1135 return W, H, n_iter

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/decomposition/_nmf.py:1730, in NMF._fit_transform(self, X, y, W, H, update_H)
   1723     raise ValueError(
   1724         "When beta_loss <= 0 and X contains zeros, "
   1725         "the solver may diverge. Please add small values "
   1726         "to X, or use a positive beta_loss."
   1727     )
   1729 # initialize or check W and H
-> 1730 W, H = self._check_w_h(X, W, H, update_H)
   1732 # scale the regularization terms
   1733 l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._compute_regularization(X)

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/decomposition/_nmf.py:1237, in _BaseNMF._check_w_h(self, X, W, H, update_H)
   1231 if W is not None:
   1232     warnings.warn(
   1233         "When update_H=False, the provided initial W is not used.",
   1234         RuntimeWarning,
   1235     )
-> 1237 _check_init(H, (self._n_components, n_features), "NMF (input H)")
   1238 if self._n_components == "auto":
   1239     self._n_components = H.shape[0]

File ~/.conda/envs/ARMOR/lib/python3.11/site-packages/sklearn/decomposition/_nmf.py:79, in _check_init(A, shape, whom)
     74     raise ValueError(
     75         f"Array with wrong first dimension passed to {whom}. Expected {shape[0]}, "
     76         f"but got {A.shape[0]}."
     77     )
     78 if shape[1] != "auto" and A.shape[1] != shape[1]:
---> 79     raise ValueError(
     80         f"Array with wrong second dimension passed to {whom}. Expected {shape[1]}, "
     81         f"but got {A.shape[1]}."
     82     )
     83 check_non_negative(A, whom)
     84 if np.max(A) == 0:

ValueError: Array with wrong second dimension passed to NMF (input H). Expected 10672, but got 2000.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions