Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions config/checkstyle/checkstyle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -422,11 +422,11 @@
<module name="InvalidJavadocPosition"/>

<!-- Checks that every public method (excluding getters, setters and constructors) has a header comment. -->
<module name="MissingJavadocMethodCheck">
<!-- <module name="MissingJavadocMethodCheck">
<property name="minLineCount" value="1"/>
<property name="allowMissingPropertyJavadoc" value="true"/>
<property name="ignoreMethodNamesRegex" value="(set.*|get.*|main)"/>
</module>
</module> -->

<!-- Checks that every public class, enumeration and interface has a header comment. -->
<module name="MissingJavadocType"/>
Expand Down
59 changes: 42 additions & 17 deletions src/main/java/dataStructures/avlTree/AVLTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,12 @@ public int height(T key) {
* @param n node whose height is to be updated
*/
private void updateHeight(Node<T> n) {
n.setHeight(1 + Math.max(height(n.getLeft()), height(n.getRight())));
n.setHeight(
1 + Math.max(
height(n.getLeft()),
height(n.getRight())
)
);
}

/**
Expand All @@ -87,12 +92,13 @@ private int getBalance(Node<T> n) {
*/
private Node<T> rotateRight(Node<T> n) {
Node<T> newRoot = n.getLeft();
// this will become the left child of n after rotation
Node<T> newLeftSub = newRoot.getRight();

newRoot.setRight(n);
n.setLeft(newLeftSub);

newRoot.setParent(n.getParent());
n.setParent(newRoot);

updateHeight(n);
updateHeight(newRoot);
Expand All @@ -110,12 +116,13 @@ private Node<T> rotateRight(Node<T> n) {
*/
private Node<T> rotateLeft(Node<T> n) {
Node<T> newRoot = n.getRight();
// this will become the right child of n after rotation
Node<T> newRightSub = newRoot.getLeft();

newRoot.setLeft(n);
n.setRight(newRightSub);

newRoot.setParent(n.getParent());
n.setParent(newRoot);

updateHeight(n);
updateHeight(newRoot);
Expand All @@ -132,13 +139,19 @@ private Node<T> rebalance(Node<T> n) {
updateHeight(n);
int balance = getBalance(n);
if (balance < -1) { // right-heavy case
if (height(n.getRight().getLeft()) > height(n.getRight().getRight())) {
n.setRight(rotateRight(n.getRight()));
Node<T> rightChild = n.getRight();
Node<T> leftSubChild = rightChild.getLeft();
Node<T> rightSubChild = rightChild.getRight();
if (height(leftSubChild) > height(rightSubChild)) {
n.setRight(rotateRight(rightChild));
}
n = rotateLeft(n);
} else if (balance > 1) { // left-heavy case
if (height(n.getLeft().getRight()) > height(n.getLeft().getLeft())) {
n.setLeft(rotateLeft(n.getLeft()));
Node<T> leftChild = n.getLeft();
Node<T> leftSubChild = leftChild.getLeft();
Node<T> rightSubChild = leftChild.getRight();
if (height(rightSubChild) > height(leftSubChild)) {
n.setLeft(rotateLeft(leftChild));
}
n = rotateRight(n);
}
Expand All @@ -159,6 +172,12 @@ private Node<T> getMostLeft(Node<T> n) {
}
}

/**
* Find the right-most child of the (sub)tree rooted at a specified node
*
* @param n tree is rooted at this node
* @return right-most node
*/
private Node<T> getMostRight(Node<T> n) {
if (n.getRight() == null) {
return n;
Expand Down Expand Up @@ -189,11 +208,9 @@ private Node<T> insert(Node<T> node, T key) {
return new Node<>(key);
} else if (node.getKey().compareTo(key) < 0) {
node.setRight(insert(node.getRight(), key));
node.getRight().setParent(node);
// note that insufficient to update parent in rotateLeft & rotateRight if still considered balanced
} else if (node.getKey().compareTo(key) > 0) {
node.setLeft(insert(node.getLeft(), key));
node.getLeft().setParent(node);
} else {
throw new RuntimeException("Duplicate key not supported!");
}
Expand Down Expand Up @@ -226,20 +243,22 @@ private Node<T> delete(Node<T> node, T key) {
node.setLeft(delete(node.getLeft(), key));
} else {
if (node.getLeft() == null || node.getRight() == null) { // case of 1 or 0 child
if (node.getLeft() == null && node.getRight() == null) {
node = null; // 0-child case
if (node.getLeft() == null && node.getRight() == null) { // 0-child case; just delete
node = null;
} else if (node.getRight() == null) {
node.getLeft().setParent(node.getParent());
Node<T> parentNode = node.getParent();
node.getLeft().setParent(parentNode);
node = node.getLeft();
} else {
node.getRight().setParent(node.getParent());
Node<T> parentNode = node.getParent();
node.getRight().setParent(parentNode);
node = node.getRight();
}
} else { // 2-children case
} else { // 2-children case; successor replacement
Node<T> successor = getMostLeft(node.getRight());
node.setKey(successor.getKey());
// since this is a 2-children case, successor of deleted node have
// at most one child; right-child (else it would continue going left)
// at most one child; right-child (else, it would continue going left)
node.setRight(delete(node.getRight(), successor.getKey()));
}
}
Expand Down Expand Up @@ -274,10 +293,13 @@ public Node<T> search(T key) {
* Search for the predecessor of a given key.
*
* @param key find predecessor of this key
* @return generic type value; null if key has no predecessor
* @return generic type value; null if key has no predecessor or tree is empty
*/
public T predecessor(T key) {
Node<T> curr = root;
if (curr == null) {
return null;
}
while (curr != null) {
if (curr.getKey().compareTo(key) == 0) {
break;
Expand Down Expand Up @@ -325,10 +347,13 @@ private T predecessor(Node<T> node) {
* Search for the successor of a given key.
*
* @param key find successor of this key
* @return generic type value; null if key has no successor
* @return generic type value; null if key has no successor or tree is empty
*/
public T successor(T key) {
Node<T> curr = root;
if (curr == null) {
return null;
}
while (curr != null) {
if (curr.getKey().compareTo(key) == 0) {
break;
Expand Down
15 changes: 11 additions & 4 deletions src/main/java/dataStructures/avlTree/Node.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public class Node<T extends Comparable<T>> {

public Node(T key) {
this.key = key;
this.height = 0; // height of a new node is 0 (leaf)
}

public boolean isLeaf() {
Expand All @@ -40,16 +41,22 @@ public Node<T> getLeft() {
return left;
}

public void setLeft(Node<T> left) {
this.left = left;
public void setLeft(Node<T> node) {
this.left = node;
if (node != null) {
node.parent = this;
}
}

public Node<T> getRight() {
return right;
}

public void setRight(Node<T> right) {
this.right = right;
public void setRight(Node<T> node) {
this.right = node;
if (node != null) {
node.parent = this;
}
}

public Node<T> getParent() {
Expand Down
125 changes: 86 additions & 39 deletions src/main/java/dataStructures/avlTree/README.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
# AVL Trees

## Background
Say you want to search of a data in an array. If the array were sorted, lucky you! You can do it with binary search in
`O(logn)` time. But if the array weren't sorted, you can't avoid that `O(n)` linear search loop. Now, one idea is to
first sort the array, and incur a 1-time cost of `O(n)` and subsequent search operations can enjoy that `O(logn)` query
cost. This is all gucci, but it assumes that there will be no additional data streaming in. If incoming data is not
infrequent, you'll have to incur `O(n)` insertion cost each time to maintain sorted order, and this can undermine
Say you want to search of a data in an array. If the array were sorted, lucky you! You can do it with binary search in `O(logn)` time. But if the array wasn't sorted, you can't avoid that `O(n)` linear search loop. Now, one idea is to first sort the array, and incur a 1-time cost of `O(n)` and subsequent search operations can enjoy that `O(logn)` query cost. This is all gucci, but it assumes that there will be no additional data streaming in. If incoming data is not infrequent, you'll have to incur `O(n)` insertion cost each time to maintain sorted order, and this can undermine
performance as a whole. If only there were some structure that allows us to enjoy `O(logn)` operations across..

We have seen binary search trees (BSTs), which always maintains data in sorted order. This allows us to avoid the
overhead of sorting before we search. However, we also learnt that unbalanced BSTs can be incredibly inefficient for
insertion, deletion and search operations, which are O(height) in time complexity (in the case of degenerate trees,
think of a linked list, operations can go up to O(n)).
i.e. linked list, operations can go up to `O(n)`).

Here we discuss a type of self-balancing BST, known as the AVL tree, that avoids the worst case O(n) performance
Here we discuss a type of self-balancing BST, known as the AVL tree, that avoids the worst case `O(n)` performance
across the operations by ensuring careful updating of the tree's structure whenever there is a change
(e.g. insert or delete).

Expand All @@ -31,10 +27,9 @@ Height: The number of edges on the longest path from that node to a leaf. A leaf
</details>

### Definition of Balanced Trees
Balanced trees are a special subset of trees with **height in the order of log(n)**, where n is the number of nodes.
This choice is not an arbitrary one. It can be mathematically shown that a binary tree of n nodes has height of at least
log(n) (in the case of a complete binary tree). So, it makes intuitive sense to give trees whose heights are roughly
in the order of log(n) the desirable 'balanced' label.
Balanced trees are a special subset of trees with **height in the order of `log(n)`**, where `n` is the number of nodes.
<br>
This choice is not an arbitrary one. It can be mathematically shown that a binary tree of `n` nodes has height of at least `log(n)` (in the case of a complete binary tree). So, it makes intuitive sense to give trees whose heights are roughly in the order of `log(n)` the desirable 'balanced' label.

<div align="center">
<img src="../../../../../docs/assets/images/BalancedProof.png" width="40%">
Expand All @@ -51,8 +46,7 @@ What is important is that this **'good' property holds even after every change**
The 'good' property in AVL Trees is the **height-balanced** property. Height-balanced on a node is defined as
**difference in height between the left and right child node being not more than 1**. <br>
We say the tree is height-balanced if every node in the tree is height-balanced. Be careful not to conflate
the concept of "balanced tree" and "height-balanced" property. They are not the same; the latter is used to achieve the
former.
the concept of "balanced tree" and "height-balanced" property. They are not the same; the latter is used to achieve the former.

<details>
<summary> <b>Ponder..</b> </summary>
Expand All @@ -63,8 +57,8 @@ Yes! In fact, you can always construct a large enough AVL tree where their diffe
</details>
</details>

It can be mathematically shown that a **height-balanced tree with n nodes, has at most height <= 2log(n)** (
in fact, using the golden ratio, we can achieve a tighter bound of ~1.44log(n)).
It can be mathematically shown that a **height-balanced tree with n nodes, has at most height <= `2log(n)`** (
in fact, using the golden ratio, we can achieve a tighter bound of ~`1.44log(n)`).
Therefore, following the definition of a balanced tree, AVL trees are balanced.

<div align="center">
Expand All @@ -73,39 +67,92 @@ Therefore, following the definition of a balanced tree, AVL trees are balanced.
Credits: CS2040s Lecture 9
</div>

### Balance Factor
To detect imbalance, each node tracks a **balance factor**:

```
balance factor = height(left subtree) - height(right subtree)
```

A node is height-balanced if its balance factor is in `{-1, 0, 1}`. When `|balance factor| > 1`, rebalancing is required.

- **Positive** balance factor → left-heavy
- **Negative** balance factor → right-heavy

## Complexity Analysis
**Search, Insertion, Deletion, Predecessor & Successor queries Time**: O(height) = O(logn)
**Time:**
| Operation | Complexity |
|-----------|------------|
| Search | `O(log n)` |
| Insert | `O(log n)` |
| Delete | `O(log n)` |
| Predecessor/Successor | `O(log n)` |
| Single Rotation | `O(1)` |

**Space**: O(n) <br>
where n is the number of elements (whatever the structure, it must store at least n nodes)
**Space**: `O(n)` where `n` is the number of elements

## Operations
Minimally, an implementation of AVL tree must support the standard **insert**, **delete**, and **search** operations.
**Update** can be simulated by searching for the old key, deleting it, and then inserting a node with the new key.
An AVL tree supports the standard **insert**, **delete**, and **search** operations.
**Update** can be simulated by deleting the old key and inserting the new one.

Naturally, with insertions and deletions, the structure of the tree will change, and it may not satisfy the
"height-balance" property of the AVL tree. Without this property, we may lose our O(log(n)) run-time guarantee.
Hence, we need some re-balancing operations. To do so, tree rotation operations are introduced. Below is one example.
Insertions and deletions can violate the height-balanced property. To restore it, we use **rotations**.

<div align="center">
<img src="../../../../../docs/assets/images/TreeRotation.png" width="40%">
<br>
Credits: CS2040s Lecture 10
</div>

Prof Seth explains it best! Go re-visit his slides (Lecture 10) for the operations :P <br>
Here is a [link](https://www.youtube.com/watch?v=dS02_IuZPes&list=PLgpwqdiEMkHA0pU_uspC6N88RwMpt9rC8&index=9)
to prof's lecture on trees. <br>
_We may add a summary in the near future._

## Application
While AVL trees offer excellent lookup, insertion, and deletion times due to their strict balancing,
the overhead of maintaining this balance can make them less preferred for applications
where insertions and deletions are significantly more frequent than lookups. As a result, AVL trees often find itself
over-shadowed in practical use by other counterparts like RB-trees,
which boast a relatively simple implementation and lower overhead, or B-trees which are ideal for optimizing disk
accesses in databases.

That said, AVL tree is conceptually simple and often used as the base template for further augmentation to tackle
niche problems. Orthogonal Range Searching and Interval Trees can be implemented with some minor augmentation to
an existing AVL tree.
### The 4 Rotation Cases
After an insert or delete, we walk back up to the root, checking balance factors. When a node has `|balance factor| > 1`, one of four cases applies:

| Case | Condition | Fix |
|------|-----------|-----|
| **Left-Left (LL)** | Left-heavy, left child is left-heavy or balanced | Single right rotation |
| **Right-Right (RR)** | Right-heavy, right child is right-heavy or balanced | Single left rotation |
| **Left-Right (LR)** | Left-heavy, left child is right-heavy | Left rotate left child, then right rotate node |
| **Right-Left (RL)** | Right-heavy, right child is left-heavy | Right rotate right child, then left rotate node |

<details>
<summary><b>How to identify the case</b></summary>

1. Node has balance factor `> 1` (left-heavy):
- If left child's balance factor `>= 0` → **LL case**
- If left child's balance factor `< 0` → **LR case**

2. Node has balance factor `< -1` (right-heavy):
- If right child's balance factor `<= 0` → **RR case**
- If right child's balance factor `> 0` → **RL case**

</details>

**Interview tip:** Rotations are `O(1)` - just pointer updates. The `O(log n)` cost of insert/delete comes from traversing the height of the tree, not from rotations.

Prof Seth explains it best! For visual demonstrations, see [Prof Seth's lecture 10](https://www.youtube.com/watch?v=dS02_IuZPes&list=PLgpwqdiEMkHA0pU_uspC6N88RwMpt9rC8&index=9) on trees.

## Notes
1. **Height guarantee**: AVL trees have height at most `~1.44 log(n)`, tighter than Red-Black trees' `2 log(n)`. This makes AVL faster for lookup-heavy workloads.

2. **Rebalancing frequency**: AVL may rotate more often than RB-trees on insert/delete since it enforces stricter balance. This is the trade-off for faster lookups.

3. **Duplicate keys**: The implementation here does not support duplicate keys. To handle duplicates, you could store a count in each node or use a list as the value.

4. **Augmentation**: AVL trees are a great base for augmented structures. Store additional info (e.g., subtree size for order statistics) and update it during rotations.

## Applications
AVL trees offer excellent lookup times due to strict balancing, but the overhead of maintaining balance
can make them less preferred when insertions/deletions vastly outnumber lookups.

| Use Case | Best Choice | Why |
|----------|-------------|-----|
| Lookup-heavy workloads | AVL | Stricter balance → faster search |
| Insert/delete-heavy | Red-Black | Fewer rotations on average |
| Disk-based storage | B-tree | Optimized for block I/O |
| In-memory databases | AVL or RB | Both work well |

**Interview tip:** "When would you choose AVL over Red-Black?" → When reads dominate writes, AVL's tighter height bound (`1.44 log n` vs `2 log n`) gives faster lookups.

AVL trees are also commonly used as a base for augmented structures:
- **Order Statistics Tree** - find k-th smallest element in `O(log n)`
- **Interval Tree** - find all intervals overlapping a point
- **Orthogonal Range Tree** - 2D range queries
Loading
Loading