diff --git a/applpy/algebra.py b/applpy/algebra.py index 8ca5cd2..2958121 100644 --- a/applpy/algebra.py +++ b/applpy/algebra.py @@ -2,7 +2,6 @@ Algebraic operations on one or two random variables. """ -import numpy as np from sympy import Symbol, exp, expand, integrate, ln, nan, oo, simplify from .conversion import pdf @@ -10,55 +9,6 @@ from .transform import Convert, transform -def _product_discrete(random_variable_1, random_variable_2): - """ - Procedure Name: _product_discrete - Purpose: Compute the product of two independent - discrete random variables - Arguments: 1. random_variable_1: A random variable - 2. random_variable_2: A random variable - Output: 1. The product of random_variable_1 and random_variable_2 - """ - # Ensure that both random variables are discrete - if not random_variable_1.is_discrete() or not random_variable_2.is_discrete(): - raise RVError("both random variables must be discrete") - # Convert both random variables to pdf form - left_pdf_rv = pdf(random_variable_1) - right_pdf_rv = pdf(random_variable_2) - # Convert the support and the value of each random variable - # into numpy arrays - support1 = np.asarray(left_pdf_rv.support, dtype=object) - support2 = np.asarray(right_pdf_rv.support, dtype=object) - pdf1 = np.asarray(left_pdf_rv.func, dtype=object) - pdf2 = np.asarray(right_pdf_rv.func, dtype=object) - # Find all possible values of support1*support2 and val1*val2 - # via the pairwise outer product, flatten into vectors - prodsupport = np.outer(support1, support2).flatten() - prodpdf = np.outer(pdf1, pdf2).flatten() - # Convert the resulting vectors into lists - supportlist = prodsupport.tolist() - pdflist = prodpdf.tolist() - # Sort the function and support lists for the product - sortlist = list(zip(supportlist, pdflist)) - sortlist.sort() - prodlist2 = [] - funclist2 = [] - for i in range(len(sortlist)): - prodlist2.append(sortlist[i][0]) - funclist2.append(sortlist[i][1]) - # Remove redundant elements in the support list - prodlist3 = [] - funclist3 = [] - for i in range(len(prodlist2)): - if prodlist2[i] not in prodlist3: - prodlist3.append(prodlist2[i]) - funclist3.append(funclist2[i]) - else: - funclist3[prodlist3.index(prodlist2[i])] += funclist2[i] - # Create and return the new random variable - return RV(funclist3, prodlist3, ["discrete", "pdf"]) - - def convolution_iid(random_variable, n): """ Procedure Name: convolution_iid @@ -239,8 +189,9 @@ def product(random_variable_1, random_variable_2): 2. random_variable_2: A random variable Output: 1. The product of random_variable_1 and random_variable_2 """ - # If the random variable is continuous, find and return the - # product of the two random variables + if random_variable_1.domain_type != random_variable_2.domain_type: + raise RVError("both random variables must have the same functional form") + if random_variable_1.is_continuous(): # left_pdf_rv.drop_assumptions() # right_pdf_rv.drop_assumptions() @@ -574,8 +525,6 @@ def product(random_variable_1, random_variable_2): product_functions_final.append(product_functions[i]) return RV(product_functions_final, product_support, ["continuous", "pdf"]) - # If the two random variables are discrete in functinonal form, - # find and return the product of the two random variables if random_variable_1.is_discrete_functional(): for num in random_variable_1.support: if not isinstance(num, (int, float)): @@ -591,10 +540,10 @@ def product(random_variable_1, random_variable_2): raise RVError(err_string) random_variable_2 = Convert(random_variable_2) - # If the distributions are discrete, find and return the product - # of the two random variables. if random_variable_1.is_discrete(): - return _product_discrete(random_variable_1, random_variable_2) + fast_rv_1 = random_variable_1.to_fast_rv() + fast_rv_2 = random_variable_2.to_fast_rv() + return RV.from_fast_rv(fast_rv_1 * fast_rv_2) # Backward-compatible aliases for legacy APPLPy function names. diff --git a/applpy/rv.py b/applpy/rv.py index b96b901..3b4549c 100644 --- a/applpy/rv.py +++ b/applpy/rv.py @@ -225,6 +225,29 @@ def is_discrete(self): def is_discrete_functional(self): return self._has_domain_type(DomainType.DISCRETE_FUNCTIONAL) + def to_fast_rv(self): + """ + Convert this APPLPy RV into the Rust-backed FastRV representation. + """ + return applpy_rust.FastRV( + function=self.func, + support=self.support, + functional_form=self.functional_form, + domain_type=self.domain_type, + ) + + @classmethod + def from_fast_rv(cls, fast_rv): + """ + Build an APPLPy RV from a Rust-backed FastRV instance. + """ + return cls( + func=fast_rv.function, + support=fast_rv.support, + functional_form=fast_rv.functional_form, + domain_type=fast_rv.domain_type, + ) + def __repr__(self): """ Procedure Name: __repr__ @@ -911,12 +934,7 @@ def bootstrap_rv(varlist: List[Number]) -> RV: given variate list is equally probable """ fast_rv = applpy_rust.bootstrap_rv(varlist) - return RV( - func=fast_rv.function, - support=fast_rv.support, - functional_form=fast_rv.functional_form, - domain_type=fast_rv.domain_type, - ) + return RV.from_fast_rv(fast_rv) def verify_pdf(random_variable): diff --git a/rust/src/algorithms/algebra.rs b/rust/src/algorithms/algebra.rs new file mode 100644 index 0000000..e5c4505 --- /dev/null +++ b/rust/src/algorithms/algebra.rs @@ -0,0 +1,238 @@ +#![allow(dead_code)] + +use crate::algorithms::number::Number; +use crate::algorithms::rv::{DomainType, FunctionalForm, RandomVariable}; + +/// Computes the product of two independent discrete random variables +/// +/// # Arguments +/// * `random_variable_1` - the first random variable +/// * `random_variable_2` - the second random variable +/// +/// # Returns +/// * `product_rv` - the product of the two random variables +/// +/// # Examples +/// ``` +/// use applpy_rust::algorithms::algebra::product_discrete; +/// use applpy_rust::algorithms::number::Number; +/// use applpy_rust::algorithms::rv::{DomainType, FunctionalForm, RandomVariable}; +/// use num_rational::Rational64; +/// +/// let rv1 = RandomVariable { +/// function: vec![ +/// Number::Rational(Rational64::new(1, 2)), +/// Number::Rational(Rational64::new(1, 2)), +/// ], +/// support: vec![Number::Integer(1), Number::Integer(2)], +/// functional_form: FunctionalForm::Pdf, +/// domain_type: DomainType::Discrete, +/// }; +/// +/// let rv2 = RandomVariable { +/// function: vec![ +/// Number::Rational(Rational64::new(1, 2)), +/// Number::Rational(Rational64::new(1, 2)), +/// ], +/// support: vec![Number::Integer(2), Number::Integer(3)], +/// functional_form: FunctionalForm::Pdf, +/// domain_type: DomainType::Discrete, +/// }; +/// +/// let product = product_discrete(&rv1, &rv2).unwrap(); +/// +/// assert_eq!( +/// product.support, +/// vec![Number::Integer(2), Number::Integer(3), Number::Integer(4), Number::Integer(6)] +/// ); +/// assert_eq!( +/// product.function, +/// vec![ +/// Number::Rational(Rational64::new(1, 4)), +/// Number::Rational(Rational64::new(1, 4)), +/// Number::Rational(Rational64::new(1, 4)), +/// Number::Rational(Rational64::new(1, 4)), +/// ] +/// ); +/// assert!(product.verify_pdf(None).unwrap()); +/// ``` +pub fn product_discrete( + random_variable_1: &RandomVariable, + random_variable_2: &RandomVariable, +) -> Result { + let pdf_random_variable_1 = random_variable_1.to_pdf()?; + let function_1 = pdf_random_variable_1.function; + let support_1 = pdf_random_variable_1.support; + + let pdf_random_variable_2 = random_variable_2.to_pdf()?; + let function_2 = pdf_random_variable_2.function; + let support_2 = pdf_random_variable_2.support; + + // Compute support1 x support2 and the associated probability + // for all combinations of support values + let mut raw_product_support = Vec::new(); + for &s1 in support_1.iter() { + for &s2 in support_2.iter() { + let support_value = s1 * s2; + raw_product_support.push(support_value); + } + } + + let mut raw_product_function = Vec::new(); + for &f1 in function_1.iter() { + for &f2 in function_2.iter() { + let probability = f1 * f2; + raw_product_function.push(probability); + } + } + + // Sort the multiplied support and function values + let mut raw_product_pairs: Vec<_> = raw_product_support + .into_iter() + .zip(raw_product_function) + .collect(); + raw_product_pairs.sort_by(|a, b| { + let first_value = a.0.to_f64(); + let second_value = b.0.to_f64(); + first_value.total_cmp(&second_value) + }); + + let (sorted_support, sorted_function): (Vec, Vec) = + raw_product_pairs.into_iter().unzip(); + + // De-duplicate the support. If a value appears multiple times in the + // support, combine the probabilities + let mut product_function = Vec::new(); + let mut product_support = Vec::new(); + for (&s, &probability) in sorted_support.iter().zip(sorted_function.iter()) { + let support_index = product_support + .iter() + .position(|&x: &Number| x.to_f64() == s.to_f64()); + + match support_index { + Some(index) => { + product_function[index] += probability; + } + None => { + product_function.push(probability); + product_support.push(s); + } + } + } + + let product_rv = RandomVariable { + function: product_function, + support: product_support, + functional_form: FunctionalForm::Pdf, + domain_type: DomainType::Discrete, + }; + Ok(product_rv) +} + +#[cfg(test)] +mod tests { + use super::*; + use num_rational::Rational64; + + fn two_point_pdf( + support: [i64; 2], + probabilities: [Rational64; 2], + form: FunctionalForm, + ) -> RandomVariable { + RandomVariable { + function: vec![ + Number::Rational(probabilities[0]), + Number::Rational(probabilities[1]), + ], + support: vec![Number::Integer(support[0]), Number::Integer(support[1])], + functional_form: form, + domain_type: DomainType::Discrete, + } + } + + #[test] + fn product_discrete_combines_duplicate_support_values() { + let rv1 = two_point_pdf( + [0, 1], + [Rational64::new(1, 2), Rational64::new(1, 2)], + FunctionalForm::Pdf, + ); + let rv2 = two_point_pdf( + [2, 3], + [Rational64::new(1, 5), Rational64::new(4, 5)], + FunctionalForm::Pdf, + ); + + let product = product_discrete(&rv1, &rv2).unwrap(); + + assert_eq!( + product.support, + vec![Number::Integer(0), Number::Integer(2), Number::Integer(3)] + ); + assert_eq!( + product.function, + vec![ + Number::Rational(Rational64::new(1, 2)), + Number::Rational(Rational64::new(1, 10)), + Number::Rational(Rational64::new(2, 5)) + ] + ); + assert!(product.verify_pdf(None).unwrap()); + } + + #[test] + fn product_discrete_accepts_cdf_inputs_by_converting_to_pdf() { + let rv1 = two_point_pdf( + [1, 2], + [Rational64::new(1, 4), Rational64::new(1, 1)], + FunctionalForm::Cdf, + ); + let rv2 = two_point_pdf( + [3, 5], + [Rational64::new(1, 2), Rational64::new(1, 1)], + FunctionalForm::Cdf, + ); + + let product = product_discrete(&rv1, &rv2).unwrap(); + + assert_eq!( + product.support, + vec![ + Number::Integer(3), + Number::Integer(5), + Number::Integer(6), + Number::Integer(10) + ] + ); + assert_eq!( + product.function, + vec![ + Number::Rational(Rational64::new(1, 8)), + Number::Rational(Rational64::new(1, 8)), + Number::Rational(Rational64::new(3, 8)), + Number::Rational(Rational64::new(3, 8)) + ] + ); + assert!(product.verify_pdf(None).unwrap()); + } + + #[test] + fn product_discrete_preserves_pdf_and_discrete_domain() { + let rv1 = two_point_pdf( + [2, 4], + [Rational64::new(1, 3), Rational64::new(2, 3)], + FunctionalForm::Pdf, + ); + let rv2 = two_point_pdf( + [1, 3], + [Rational64::new(3, 10), Rational64::new(7, 10)], + FunctionalForm::Pdf, + ); + + let product = product_discrete(&rv1, &rv2).unwrap(); + + assert_eq!(product.functional_form, FunctionalForm::Pdf); + assert_eq!(product.domain_type, DomainType::Discrete); + assert!(product.verify_pdf(None).unwrap()); + } +} diff --git a/rust/src/algorithms/mod.rs b/rust/src/algorithms/mod.rs index 30e0696..cffed21 100644 --- a/rust/src/algorithms/mod.rs +++ b/rust/src/algorithms/mod.rs @@ -1,3 +1,4 @@ +pub mod algebra; pub mod conversion; pub mod moments; pub mod number; diff --git a/rust/src/algorithms/rv.rs b/rust/src/algorithms/rv.rs index a049cdc..271a4f5 100644 --- a/rust/src/algorithms/rv.rs +++ b/rust/src/algorithms/rv.rs @@ -1,10 +1,12 @@ #![allow(dead_code)] use std::fmt; +use std::ops::Mul; use num_rational::Rational64; use num_traits::cast::ToPrimitive; +use crate::algorithms::algebra; use crate::algorithms::conversion; use crate::algorithms::moments; use crate::algorithms::number::Number; @@ -61,6 +63,15 @@ pub struct RandomVariable { pub domain_type: DomainType, } +impl Mul for RandomVariable { + type Output = Result; + + fn mul(self, rhs: Self) -> Self::Output { + let product_rv = algebra::product_discrete(&self, &rhs)?; + Ok(product_rv) + } +} + impl RandomVariable { pub fn verify_pdf(&self, tolerance: Option) -> Result { if self.functional_form != FunctionalForm::Pdf { diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 9b51c78..d53ee91 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -35,6 +35,9 @@ fn applpy_rust(_py: Python<'_>, module: &Bound<'_, PyModule>) -> PyResult<()> { )?)?; module.add_function(wrap_pyfunction!(python::api::bootstrap_rv_py, module)?)?; + // random variable algebra functions + module.add_function(wrap_pyfunction!(python::api::product_discrete_py, module)?)?; + // transformation functions module.add_function(wrap_pyfunction!(python::api::truncate_discrete_py, module)?)?; module.add_function(wrap_pyfunction!(python::api::mixture_discrete_py, module)?)?; diff --git a/rust/src/python/api.rs b/rust/src/python/api.rs index 95e5908..7b1effa 100644 --- a/rust/src/python/api.rs +++ b/rust/src/python/api.rs @@ -1,10 +1,13 @@ #![allow(clippy::useless_conversion)] #![allow(dead_code)] +use std::ops::Mul; + use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::types::PyAny; +use crate::algorithms::algebra; use crate::algorithms::number::Number; use crate::algorithms::order_stat; use crate::algorithms::rv; @@ -112,6 +115,25 @@ pub fn discrete_minimum_py( )) } +#[pyfunction(name = "product_discrete", signature = (random_variable_1, random_variable_2))] +pub fn product_discrete_py( + random_variable_1: &Bound<'_, PyAny>, + random_variable_2: &Bound<'_, PyAny>, +) -> PyResult { + let random_variable_1: FastRV = random_variable_1.extract()?; + let random_variable_2: FastRV = random_variable_2.extract()?; + + let product_rv = algebra::product_discrete(&random_variable_1.inner, &random_variable_2.inner) + .map_err(PyValueError::new_err)?; + + Ok(FastRV::new( + product_rv.function, + product_rv.support, + product_rv.functional_form, + product_rv.domain_type, + )) +} + #[pyfunction(name = "next_combination", signature = (previous, n))] pub fn next_combination_py(previous: Vec, n: usize) -> PyResult>> { if previous.is_empty() { @@ -199,6 +221,21 @@ pub struct FastRV { inner: RandomVariable, } +impl Mul for FastRV { + type Output = Result; + + fn mul(self, rhs: FastRV) -> Self::Output { + let product_rv = algebra::product_discrete(&self.inner, &rhs.inner)?; + let fast_rv = Self::new( + product_rv.function, + product_rv.support, + product_rv.functional_form, + product_rv.domain_type, + ); + Ok(fast_rv) + } +} + fn format_number_list(values: &[Number]) -> String { values .iter() @@ -236,6 +273,22 @@ impl FastRV { ) } + pub fn __mul__(&self, rhs: FastRV) -> PyResult { + let self_rv = self.inner.clone(); + let rhs_rv = rhs.inner.clone(); + + let product_rv = self_rv * rhs_rv; + + match product_rv { + Ok(rv) => { + let fast_rv = + FastRV::new(rv.function, rv.support, rv.functional_form, rv.domain_type); + Ok(fast_rv) + } + Err(s) => Err(PyErr::new::(s)), + } + } + #[getter] pub fn function(&self) -> Vec { self.inner.function.clone() @@ -280,7 +333,7 @@ impl FastRV { } Err(PyErr::new::( - "converstion to pdf failed", + "conversion to pdf failed", )) } @@ -292,7 +345,7 @@ impl FastRV { } Err(PyErr::new::( - "converstion to cdf failed", + "conversion to cdf failed", )) } @@ -304,7 +357,7 @@ impl FastRV { } Err(PyErr::new::( - "converstion to sf failed", + "conversion to sf failed", )) } @@ -316,7 +369,7 @@ impl FastRV { } Err(PyErr::new::( - "converstion to idf failed", + "conversion to idf failed", )) } @@ -328,7 +381,7 @@ impl FastRV { } Err(PyErr::new::( - "converstion to chf failed", + "conversion to chf failed", )) } diff --git a/test_applpy/unit/test_rv.py b/test_applpy/unit/test_rv.py index c1c61ed..07fa139 100644 --- a/test_applpy/unit/test_rv.py +++ b/test_applpy/unit/test_rv.py @@ -1,7 +1,6 @@ import pytest from sympy import Integer, Rational -from applpy.algebra import _product_discrete as ProductDiscrete from applpy.appl_plot import plot_dist, plot_emp_cdf, pp_plot, qq_plot from applpy import rust_bindings from applpy.rv import ( @@ -106,6 +105,21 @@ def test_bootstrap_rv_creates_discrete_pdf_with_frequencies(): assert rv.func == [Rational(1, 4), Rational(1, 4), Rational(1, 2)] +def test_fast_rv_round_trip_conversion_methods(): + rv = _discrete_pdf_bernoulli() + + fast_rv = rv.to_fast_rv() + assert fast_rv.__class__.__name__ == "FastRV" + assert fast_rv.function == rv.func + assert fast_rv.support == rv.support + assert fast_rv.functional_form == rv.functional_form + assert fast_rv.domain_type == rv.domain_type + + converted = RV.from_fast_rv(fast_rv) + assert isinstance(converted, RV) + assert converted == rv + + def test_next_combination_advances_lexicographically(): assert rust_bindings.next_combination([1, 2, 4], 5) == [1, 2, 5] assert rust_bindings.next_combination([1, 4, 5], 5) == [2, 3, 4] @@ -171,15 +185,16 @@ def test_single_rv_transformative_operations(): def test_two_rv_operations_for_continuous_and_discrete(): discrete = _discrete_pdf() bernoulli = _discrete_pdf_bernoulli() + product = discrete * bernoulli - assert isinstance(ProductDiscrete(discrete, bernoulli), RV) + assert isinstance(product, RV) def test_two_rv_operations_error_paths(): discrete = _discrete_pdf() - with pytest.raises(RVError, match="both random variables must be discrete"): - ProductDiscrete(_uniform_continuous_pdf(), discrete) + with pytest.raises(RVError, match="both random variables must have the same functional form"): + _uniform_continuous_pdf() * discrete def test_plotting_and_misc_utility_paths():