diff --git a/.gitignore b/.gitignore index ad7cea3..eb5ccbd 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ # Ignore compiled shared object files **/*.so +site/ +**/.DS_Store \ No newline at end of file diff --git a/README.md b/README.md index 63bf25a..eb91398 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ # RustyNum +RustyNum is a NumPy compatible array library for Python that uses Rust SIMD to accelerate common operations. + RustyNum is a high-performance numerical computation library written in Rust, created to demonstrate the potential of Rust's SIMD (Single Instruction, Multiple Data) capabilities using the nightly `portable_simd` feature, and serving as a fast alternative to Numpy. ## Key Features diff --git a/docs/api/index.md b/docs/api/index.md index 83b9811..0b28d18 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -1,3 +1,7 @@ +--- +title: RustyNum Python API reference +description: Full Python API reference for RustyNum. Classes, functions, and examples. +--- # Python API Reference ## Classes diff --git a/docs/benchmarks/rustynum-vs-numpy.md b/docs/benchmarks/rustynum-vs-numpy.md new file mode 100644 index 0000000..5cfc95d --- /dev/null +++ b/docs/benchmarks/rustynum-vs-numpy.md @@ -0,0 +1,184 @@ +--- +title: RustyNum vs NumPy performance benchmarks and guidance +description: Reproducible benchmarks that compare RustyNum with NumPy for mean, min, dot, and matrix multiplication in Python, with setup and tips. +--- + +# RustyNum vs NumPy performance + +This is the canonical comparison for RustyNum and NumPy. It includes setup, a small runner you can copy, and notes on when each library is a good choice. Use it as a reference that stays current, and see the blog for dated releases that summarize new results. + +RustyNum is a NumPy compatible array library for Python that uses Rust SIMD to accelerate common operations. + +--- + +## What we measure + +- Mean over large vectors +- Minimum over large vectors +- Dot product of a matrix and a vector +- Matrix multiplication of two square matrices + +These cases map to common data tasks and machine learning preprocessing on a single CPU. + +--- + +## Install and record environment + +Install the packages. + +```bash +python -V +pip install -U rustynum numpy +``` + +Record your environment when you share results. + +```python +import numpy as np, platform, sys, rustynum as rnp +print("Python", sys.version.split()[0]) +print("NumPy", np.__version__) +print("OS", platform.platform()) +print("CPU", platform.processor()) +print("RustyNum", rnp.__version__) +``` + +NumPy performance depends on the BLAS that ships with your wheel. OpenBLAS or MKL can change results. That is expected. + +--- + +## Benchmark runner + +The script below times four operations. It warms up first and reports medians. Sizes are modest so it runs fast during local testing. + +```python +import time, math, numpy as np +import rustynum as rnp +from statistics import median + +def bench(fn, repeats=7, warmup=2): + for _ in range(warmup): + fn() + times = [] + for _ in range(repeats): + t0 = time.perf_counter() + fn() + times.append(time.perf_counter() - t0) + return median(times) + +def as_rn(x): + if x.dtype == np.float32: + return rnp.NumArray(x.flatten().tolist(), dtype="float32").reshape(list(x.shape)) + elif x.dtype == np.float64: + return rnp.NumArray(x.flatten().tolist(), dtype="float64").reshape(list(x.shape)) + else: + raise ValueError("Use float32 or float64") + +def run_suite(n=1_000_000, m=1000): + results = [] + + # 1) mean over vector + vec32 = np.random.rand(n).astype(np.float32) + vec32_rn = as_rn(vec32) + + t_np = bench(lambda: float(np.mean(vec32))) + t_rn = bench(lambda: float(vec32_rn.mean().item())) + results.append(("mean", f"{n}", t_rn, t_np, t_np / t_rn if t_rn > 0 else math.nan)) + + # 2) minimum over vector + t_np = bench(lambda: float(np.min(vec32))) + t_rn = bench(lambda: float(vec32_rn.min())) + results.append(("min", f"{n}", t_rn, t_np, t_np / t_rn if t_rn > 0 else math.nan)) + + # 3) matrix vector dot + mat = np.random.rand(m, m).astype(np.float32) + vec = np.random.rand(m).astype(np.float32) + mat_rn = as_rn(mat) + vec_rn = as_rn(vec) + + t_np = bench(lambda: np.dot(mat, vec)) + t_rn = bench(lambda: mat_rn.dot(vec_rn)) + results.append(("matrix@vector", f"{m}x{m} · {m}", t_rn, t_np, t_np / t_rn if t_rn > 0 else math.nan)) + + # 4) matrix matrix + a = np.random.rand(m, m).astype(np.float32) + b = np.random.rand(m, m).astype(np.float32) + a_rn = as_rn(a) + b_rn = as_rn(b) + + t_np = bench(lambda: a @ b) + t_rn = bench(lambda: a_rn @ b_rn) + results.append(("matrix@matrix", f"{m}x{m}", t_rn, t_np, t_np / t_rn if t_rn > 0 else math.nan)) + + print("\nOperation, Size, RustyNum s, NumPy s, Speedup NumPy/RustyNum") + for op, size, trn, tnp, sp in results: + print(f"{op}, {size}, {trn:.6f}, {tnp:.6f}, {sp:.2f}x") + +if __name__ == "__main__": + run_suite(n=1_000_000, m=1000) +``` + +Run the file. + +```bash +python benchmarks.py +``` + +--- + +## Reading the results + +- Mean and minimum are memory bound. RustyNum often does well due to SIMD friendly loops. +- Matrix vector speed depends on memory access and BLAS. +- Matrix matrix can favor NumPy on large sizes with tuned BLAS. RustyNum can be close on medium sizes. + +Small numeric differences are normal in floating point code. + +--- + +## Example results + +These numbers show benchmark results from RustyNum 0.1.4 vs NumPy 1.24.4 on an Apple M1 Pro laptop with float32 inputs. + +| Operation | Size | RustyNum us | NumPy us | Speedup NumPy over RustyNum | +| ------------- | --------- | ----------- | ----------- | --------------------------- | +| mean | 1e3 | 8.8993 | 22.6300 | 2.54x | +| min | 1e3 | 10.1423 | 28.9693 | 2.86x | +| matrix@vector | 1000x1000 | 10,041.6093 | 24,990.2646 | 2.49x | +| matrix@matrix | 500x500 | 7,010.6638 | 14,878.9556 | 2.12x | + +Your machine will differ. Use the runner above to collect your own data. + +--- + +## Tips for fair runs + +- Stick to float32 or float64 and be consistent. +- Avoid Python loops in the hot path. +- Warm up first and report medians. +- Share CPU model, versions, and BLAS choice. + +--- + +## When to pick RustyNum + +- You want fast reductions or transforms with a small wheel. +- You need a compact dependency for packaging on servers or edge. +- You want SIMD backed methods without heavy external libraries. + +## When to stay with NumPy + +- You run heavy dense linear algebra that benefits from a tuned BLAS. +- You need a very wide API that RustyNum does not cover yet. + +--- + +## Next steps + +- Start with the [Quick Start](../quick-start.md). +- Try the tutorial on [Replacing Core NumPy Calls](../tutorials/replacing-numpy-for-faster-analytics.md). +- Learn matrix math in [Getting Better Matrix Operations](../tutorials/better-matrix-operations.md). +- Install or upgrade with the [Installation Guide](../installation.md). + +--- + +**Further reading**: [Installation](../installation.md), [Quick Start](../quick-start.md), [API Reference](../api/index.md). diff --git a/docs/index.md b/docs/index.md index 2e02164..daba0a0 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,24 @@ -![RustyNum Banner](assets/rustynum-banner.png?raw=true "RustyNum") +--- +title: RustyNum a fast NumPy alternative written in Rust +description: RustyNum is a SIMD accelerated numerical library for Python with a NumPy like API. Learn what it is, why it is fast, and how to use it in Python. +--- +![RustyNum logo and wordmark](assets/rustynum-banner.png?raw=true "RustyNum") + + # Welcome to RustyNum! @@ -48,8 +68,8 @@ Ready to explore RustyNum? Here’s how you can dive in: 1. **[Installation](installation.md)**: Install RustyNum with a single `pip` command. 2. **[Quick Start](quick-start.md)**: Learn the basics of using RustyNum. -3. **[Tutorials](tutorials/)**: Explore real-world examples and advanced guides. -4. **[API Reference](api/)**: Dive deep into RustyNum’s Python API. +3. **[Tutorials](tutorials/index.md)**: Explore real-world examples and advanced guides. +4. **[API Reference](api/index.md)**: Dive deep into RustyNum's Python API. --- diff --git a/docs/installation.md b/docs/installation.md index 33165c9..3fbf2e1 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -1,3 +1,7 @@ +--- +title: Install RustyNum on macOS Linux and Windows for Python 3.8 to 3.13 +description: Step by step install guide for RustyNum on Apple Silicon Intel Linux and Windows. Verify the install and fix common issues. +--- # Installation Guide Get started with RustyNum by following this installation guide. Whether you're using Python for data analysis or contributing to the core library, this page covers everything you need. diff --git a/docs/quick-start.md b/docs/quick-start.md index 56a9427..b8e9850 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -1,7 +1,13 @@ +--- +title: Quick start with RustyNum in Python +description: Create arrays, compute means and dot products, and compare RustyNum with NumPy. Copy ready examples for Python. +--- # Getting Started with RustyNum Welcome to RustyNum! This guide will help you quickly get up and running with RustyNum, from basic operations to a comparison with NumPy. If you're familiar with NumPy, you'll feel right at home. +RustyNum is a NumPy compatible array library for Python that uses Rust SIMD to accelerate common operations. + --- ## 🔥 Why Use RustyNum? @@ -123,7 +129,10 @@ print(dot_product) Once you’re comfortable with the basics, dive deeper into RustyNum with these resources: - **[Tutorials](tutorials/index.md)**: Explore real-world applications of RustyNum. -- **[API Reference](../api/)**: Detailed documentation of RustyNum’s Python bindings. +- **[API Reference](api/index.md)**: Detailed documentation of RustyNum's Python bindings. + + +For larger arrays and timings, see the comparison in [RustyNum vs NumPy performance](benchmarks/rustynum-vs-numpy.md). --- @@ -134,6 +143,6 @@ If you have any questions, check out the [GitHub Discussions](https://github.com ---
- Explore Tutorials - View API Reference + Explore Tutorials + View API Reference
\ No newline at end of file diff --git a/docs/robots.txt b/docs/robots.txt new file mode 100644 index 0000000..d51b472 --- /dev/null +++ b/docs/robots.txt @@ -0,0 +1,3 @@ +User-agent: * +Allow: / +Sitemap: https://rustynum.com/sitemap.xml \ No newline at end of file diff --git a/docs/tutorials/better-matrix-operations.md b/docs/tutorials/better-matrix-operations.md index d4b0091..0c40bf8 100644 --- a/docs/tutorials/better-matrix-operations.md +++ b/docs/tutorials/better-matrix-operations.md @@ -1,3 +1,7 @@ +--- +title: Matrix multiplication in Python faster with RustyNum vs NumPy +description: Learn matrix vector and matrix matrix multiplication in Python using RustyNum, compare with NumPy, and measure runtime. +--- # Getting Better Matrix Operations with RustyNum Matrix operations are at the core of many data science and engineering workflows. When performance matters, switching from traditional Python solutions to RustyNum can be a great move. In this tutorial, you’ll learn how to perform matrix-vector and matrix-matrix operations with RustyNum, compare them to NumPy, and see how SIMD acceleration can improve efficiency. @@ -183,8 +187,12 @@ While timing results can vary based on hardware and environment, RustyNum may of ## Next Steps -- Check out the [API Reference](../../api/) for a complete list of functions and classes. -- Explore more advanced examples in the upcoming [Tutorials](../). +- Check out the [API Reference](../api/index.md) for a complete list of functions and classes. +- Explore more advanced examples in the upcoming [Tutorials](../index.md). - Contribute your own ideas or ask questions on [GitHub](https://github.com/IgorSusmelj/rustynum). -Matrix operations are a cornerstone of computational tasks, and RustyNum offers a Python-friendly path to faster, more efficient code. By tapping into Rust’s low-level optimizations, you can scale your projects without changing your entire workflow. \ No newline at end of file +Matrix operations are a cornerstone of computational tasks, and RustyNum offers a Python-friendly path to faster, more efficient code. By tapping into Rust’s low-level optimizations, you can scale your projects without changing your entire workflow. + +--- + +**Further reading**: [Installation](../installation.md), [Quick Start](../quick-start.md), [API Reference](../api/index.md). \ No newline at end of file diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md index cd63165..70cde17 100644 --- a/docs/tutorials/index.md +++ b/docs/tutorials/index.md @@ -1,3 +1,7 @@ +--- +title: RustyNum tutorials and guides +description: Practical guides that show how to use RustyNum for array operations and matrix math in Python. +--- # RustyNum Tutorials - [Getting Better Matrix Operations with RustyNum](better-matrix-operations.md) diff --git a/docs/tutorials/replacing-numpy-for-faster-analytics.md b/docs/tutorials/replacing-numpy-for-faster-analytics.md index f0b6bec..a677875 100644 --- a/docs/tutorials/replacing-numpy-for-faster-analytics.md +++ b/docs/tutorials/replacing-numpy-for-faster-analytics.md @@ -1,3 +1,7 @@ +--- +title: Replace selected NumPy calls to speed up analytics +description: Speed up mean min and dot product in Python by swapping NumPy with RustyNum. Includes timing code and results. +--- # Replacing Core NumPy Calls for Faster Analytics Many developers rely on NumPy for array operations, statistical calculations, and linear algebra. RustyNum offers an alternative for several common NumPy routines, potentially speeding up your Python analytics. In this tutorial, you’ll see how to replace selected NumPy calls with RustyNum equivalents, measure performance differences, and integrate RustyNum into existing data workflows. @@ -194,6 +198,10 @@ Speedup: 3.79x ## Next Steps -- Check out the [API Reference](../../api/) for a complete list of functions and classes. -- Explore more advanced examples in the upcoming [Tutorials](../). +- Check out the [API Reference](../api/index.md) for a complete list of functions and classes. +- Explore more advanced examples in the upcoming [Tutorials](../index.md). - Contribute your own ideas or ask questions on [GitHub](https://github.com/IgorSusmelj/rustynum). + +--- + +**Further reading**: [Installation](../installation.md), [Quick Start](../quick-start.md), [API Reference](../api/index.md). \ No newline at end of file diff --git a/docs/tutorials/streamlining-machine-learning-preprocessing.md b/docs/tutorials/streamlining-machine-learning-preprocessing.md index dafd085..394d299 100644 --- a/docs/tutorials/streamlining-machine-learning-preprocessing.md +++ b/docs/tutorials/streamlining-machine-learning-preprocessing.md @@ -1,3 +1,7 @@ +--- +title: Scale normalize and concatenate features in Python with RustyNum +description: Faster preprocessing for ML in Python. Min max scaling L2 normalization concatenation and scikit learn integration. +--- # Streamlining Machine Learning Preprocessing with RustyNum Data preprocessing is a key step in machine learning. Whether you're prepping large datasets for neural networks or just cleaning up smaller ones, RustyNum can help speed up vectorized operations and transformations. In this tutorial, we'll explore how to use RustyNum for several preprocessing tasks, then show how to integrate your processed data with popular Python ML libraries. @@ -210,8 +214,12 @@ print("Matrix multiplication result:", product.tolist()) ## Next Steps -- Check out the other [Tutorials](../) for deeper dives into RustyNum's capabilities. -- Review the [API Reference](../api/) for more advanced methods. +- Check out the other [Tutorials](../index.md) for deeper dives into RustyNum's capabilities. +- Review the [API Reference](../api/index.md) for more advanced methods. - Join the community on [GitHub Discussions](https://github.com/IgorSusmelj/rustynum/discussions) to share ideas or ask questions. -Preprocessing data can be a bottleneck in many ML pipelines. By harnessing RustyNum, you might reduce that overhead while still benefiting from a Python-friendly workflow. Give these techniques a try in your own projects and see how RustyNum fits into your machine learning stack! \ No newline at end of file +Preprocessing data can be a bottleneck in many ML pipelines. By harnessing RustyNum, you might reduce that overhead while still benefiting from a Python-friendly workflow. Give these techniques a try in your own projects and see how RustyNum fits into your machine learning stack! + +--- + +**Further reading**: [Installation](../installation.md), [Quick Start](../quick-start.md), [API Reference](../api/index.md). \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 70575c6..5560d92 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -40,6 +40,8 @@ nav: - Getting Better Matrix Operations with RustyNum: tutorials/better-matrix-operations.md - Replacing Core NumPy Calls for Faster Analytics: tutorials/replacing-numpy-for-faster-analytics.md - Streamlining Machine Learning Preprocessing: tutorials/streamlining-machine-learning-preprocessing.md + - Benchmarks: + - RustyNum vs NumPy performance: benchmarks/rustynum-vs-numpy.md - API Reference: api/index.md plugins: - search @@ -76,7 +78,7 @@ extra: extra_javascript: - assets/js/analytics.js - + markdown_extensions: - admonition - codehilite