Skip to content

Java Regular Expressions

Mattscreative edited this page Mar 9, 2026 · 1 revision

Java Regular Expressions Guide - Pattern Matching

Table of Contents


Introduction

Regular expressions (regex) are patterns used to match character combinations in strings. Java provides regex support through the java.util.regex package.


Pattern and Matcher

Basic Usage

import java.util.regex.*;

String text = "Hello, World!";
String pattern = "World";

// Create pattern
Pattern p = Pattern.compile(pattern);

// Create matcher
Matcher m = p.matcher(text);

// Check for match
if (m.find()) {
    System.out.println("Found: " + m.group());
}

// Using matches() for exact match
System.out.println(Pattern.matches("World", text));  // false
System.out.println(Pattern.matches("Hello.*", text));  // true

Matcher Methods

String text = "abc123def456";

Pattern p = Pattern.compile("\\d+");  // Match digits
Matcher m = p.matcher(text);

// Find next match
while (m.find()) {
    System.out.println("Found: " + m.group() + 
                       " at position: " + m.start() + 
                       " to " + m.end());
}
// Output:
// Found: 123 at position: 3 to 6
// Found: 456 at position: 9 to 12

// Check entire string matches
Pattern p2 = Pattern.compile("\\d+");
Matcher m2 = p2.matcher("123");
System.out.println(m2.matches());  // true

// Replace all
String result = m.replaceAll("#");
System.out.println(result);  // abc#def#

Basic Regex Patterns

Literal Characters

// Match exact string
Pattern.matches("hello", "hello");  // true

// Special characters need escaping
Pattern.matches("price\\$100", "price$100");  // true
Pattern.matches("file\\.txt", "file.txt");      // true

Metacharacters

.       // Any character (except newline)
\\d     // Digit [0-9]
\\D     // Non-digit [^0-9]
\\w     // Word character [a-zA-Z0-9_]
\\W     // Non-word character
\\s     // Whitespace [ \t\n\r\f]
\\S     // Non-whitespace
\\b     // Word boundary
\\B     // Non-word boundary
\\n     // Newline
\\t     // Tab

Examples

// Any character
Pattern.matches("...", "abc");  // true (3 any chars)

// Digit
Pattern.matches("\\d", "5");    // true
Pattern.matches("\\d", "a");   // false

// Word character
Pattern.matches("\\w+", "hello_123");  // true

// Whitespace
Pattern.matches("\\s\\s", "  ");  // true

Character Classes

Basic Classes

[abc]       // a, b, or c
[^abc]      // Not a, b, or c
[a-z]       // Range a to z
[A-Z]       // Range A to Z
[0-9]       // Range 0 to 9
[a-zA-Z]    // Any letter
[a-zA-Z0-9] // Any alphanumeric

Examples

// Any of these characters
Pattern.matches("[aeiou]", "a");    // true
Pattern.matches("[aeiou]", "b");   // false

// Not these characters
Pattern.matches("[^aeiou]", "b");  // true

// Range
Pattern.matches("[0-5]", "3");     // true
Pattern.matches("[0-5]", "7");     // false

// Combined
Pattern.matches("[a-zA-Z]", "X");  // true
Pattern.matches("[a-zA-Z0-9]", "9"); // true

Predefined Classes

.       // Any character
\\d     // [0-9]
\\D     // [^0-9]
\\s     // [ \t\n\r\f]
\\S     // [^ \t\n\r\f]
\\w     // [a-zA-Z0-9_]
\\W     // [^a-zA-Z0-9_]

Quantifiers

Quantifier Types

*       // 0 or more
+       // 1 or more
?       // 0 or 1
{n}     // Exactly n times
{n,}    // n or more times
{n,m}   // Between n and m times

Examples

// Zero or more
Pattern.matches("\\d*", "");       // true
Pattern.matches("\\d*", "123");    // true
Pattern.matches("\\d*", "abc");    // true (no digits is ok)

// One or more
Pattern.matches("\\d+", "123");    // true
Pattern.matches("\\d+", "");       // false (needs at least one)

// Zero or one
Pattern.matches("\\d?", "5");       // true
Pattern.matches("\\d?", "");       // true
Pattern.matches("\\d?", "abc");    // true (no digit is ok)

// Exact count
Pattern.matches("\\d{3}", "123");  // true
Pattern.matches("\\d{3}", "12");   // false
Pattern.matches("\\d{3}", "1234"); // false

// Range
Pattern.matches("\\d{2,4}", "123");  // true
Pattern.matches("\\d{2,4}", "1");    // false

Greedy vs Lazy

String text = "12345";

// Greedy - matches as much as possible
Pattern p1 = Pattern.compile("\\d+");
Matcher m1 = p1.matcher(text);
m1.find();
System.out.println(m1.group());  // "12345"

// Lazy - matches as little as possible
Pattern p2 = Pattern.compile("\\d+?");
Matcher m2 = p2.matcher(text);
m2.find();
System.out.println(m2.group());  // "1"

Anchors

Anchor Types

^       // Start of string (or line in multiline)
$       // End of string (or line in multiline)
\\b     // Word boundary
\\B     // Non-word boundary

Examples

// Start of string
Pattern.matches("^hello", "hello world");  // true
Pattern.matches("^world", "hello world");  // false

// End of string
Pattern.matches("world$", "hello world");  // true
Pattern.matches("hello$", "hello world");  // false

// Word boundary
Pattern.matches("\\bhello\\b", "hello");      // true
Pattern.matches("\\bhello\\b", "helloworld"); // false (no boundary)

// Combined
Pattern.matches("^\\d+$", "12345");  // true (only digits)
Pattern.matches("^[a-z]+$", "hello"); // true (only letters)

Groups and Capturing

Creating Groups

// Groups are created with parentheses
String text = "John Smith, 30, New York";

// Group 0 is entire match, Group 1 is first parentheses
Pattern p = Pattern.compile("(\\w+)\\s+(\\w+).*");
Matcher m = p.matcher(text);

if (m.find()) {
    System.out.println("Full: " + m.group(0));    // John Smith, 30, New York
    System.out.println("Group 1: " + m.group(1)); // John
    System.out.println("Group 2: " + m.group(2)); // Smith
}

Named Groups

// Java 8+ supports named groups
String text = "john@example.com";

Pattern p = Pattern.compile("(?<username>\\w+)@(?<domain>\\w+\\.\\w+)");
Matcher m = p.matcher(text);

if (m.find()) {
    System.out.println(m.group("username")); // john
    System.out.println(m.group("domain"));  // example.com
}

Non-Capturing Groups

// (?:...) doesn't create a capturing group
Pattern p = Pattern.compile("(?:Mr|Mrs|Ms)\\.\\s+(\\w+)");
Matcher m = p.matcher("Mr. Smith");

if (m.find()) {
    System.out.println(m.group(1));  // Smith (only captures name)
}

Backreferences

// \\1 refers to first captured group
Pattern p = Pattern.compile("(\\w+)\\s+\\1");  // Match repeated word
System.out.println(Pattern.matches("hello hello", p.pattern()));  // true
System.out.println(Pattern.matches("hello world", p.pattern())); // false

// HTML tag matching
Pattern p2 = Pattern.compile("<([a-z]+)>.*</\\1>");
System.out.println(Pattern.matches("<div>text</div>", p2.pattern()));  // true

Common Regex Patterns

Email Validation

String emailRegex = "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$";
Pattern.matches(emailRegex, "test@example.com");    // true
Pattern.matches(emailRegex, "invalid@");            // false

Phone Number

// US phone: (123) 456-7890 or 123-456-7890
String phoneRegex = "^(\\(\\d{3}\\)\\s?)?\\d{3}[- ]?\\d{4}$";
Pattern.matches(phoneRegex, "(123) 456-7890");  // true
Pattern.matches(phoneRegex, "123-456-7890");   // true
Pattern.matches(phoneRegex, "1234567890");     // true

URL

String urlRegex = "^(https?://)?(www\\.)?[a-zA-Z0-9-]+\\.[a-zA-Z]{2,}(/.*)?$";
Pattern.matches(urlRegex, "https://www.example.com");  // true
Pattern.matches(urlRegex, "example.com");              // true

Date (YYYY-MM-DD)

String dateRegex = "^\\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\\d|3[01])$";
Pattern.matches(dateRegex, "2024-01-15");   // true
Pattern.matches(dateRegex, "2024-13-01");   // false

Time (HH:MM:SS)

String timeRegex = "^([01]\\d|2[0-3]):([0-5]\\d):([0-5]\\d)$";
Pattern.matches(timeRegex, "14:30:45");  // true
Pattern.matches(timeRegex, "25:00:00"); // false

IP Address

String ipRegex = "^((25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]?\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]?\\d)$";
Pattern.matches(ipRegex, "192.168.1.1");    // true
Pattern.matches(ipRegex, "256.1.1.1");      // false

Password Validation

// At least 8 chars, 1 uppercase, 1 lowercase, 1 digit, 1 special
String passwordRegex = "^(?=.*[a-z])(?=.*[A-Z])(?=.*\\d)(?=.*[@$!%*?&])[A-Za-z\\d@$!%*?&]{8,}$";
Pattern.matches(passwordRegex, "Pass@word1");  // true
Pattern.matches(passwordRegex, "password");   // false

HTML Tag

String tagRegex = "<([a-z]+)[^>]*>(.*)</\\1>";
Pattern.matches(tagRegex, "<div>content</div>");     // true
Pattern.matches(tagRegex, "<p>text</p>");           // true

String Methods with Regex

String text = "Hello123World456";

// matches() - entire string must match
System.out.println(text.matches("\\w+\\d+\\w+"));  // true

// split() - split by pattern
String[] parts = text.split("\\d+");
System.out.println(Arrays.toString(parts));  // [Hello, World, ]

// replaceFirst() - replace first match
System.out.println(text.replaceFirst("\\d+", "#"));  // Hello#World456

// replaceAll() - replace all matches
System.out.println(text.replaceAll("\\d+", "#"));    // Hello#World#

Practical Example: Data Extraction

String data = """
    John Doe, john@email.com, 30
    Jane Smith, jane@email.com, 25
    Bob Wilson, bob@email.com, 35
    """;

Pattern p = Pattern.compile("(\\w+)\\s+(\\w+),\\s+([\\w.@]+),\\s+(\\d+)");
Matcher m = p.matcher(data);

while (m.find()) {
    System.out.println("Name: " + m.group(1) + " " + m.group(2));
    System.out.println("Email: " + m.group(3));
    System.out.println("Age: " + m.group(4));
    System.out.println("---");
}

Practice Examples

Example 1: Validate Input

import java.util.Scanner;

public class InputValidator {
    public static boolean isValidEmail(String email) {
        return email.matches("^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$");
    }
    
    public static boolean isValidPhone(String phone) {
        return phone.matches("^\\(?\\d{3}\\)?[-.\\s]?\\d{3}[-.\\s]?\\d{4}$");
    }
    
    public static boolean isStrongPassword(String password) {
        return password.matches("^(?=.*[a-z])(?=.*[A-Z])(?=.*\\d).{8,}$");
    }
    
    public static void main(String[] args) {
        Scanner scanner = new Scanner(System.in);
        
        System.out.print("Enter email: ");
        String email = scanner.nextLine();
        System.out.println("Valid: " + isValidEmail(email));
        
        System.out.print("Enter phone: ");
        String phone = scanner.nextLine();
        System.out.println("Valid: " + isValidPhone(phone));
    }
}

Example 2: Parse Log File

String log = """
    [2024-01-15 10:30:45] INFO: Application started
    [2024-01-15 10:30:46] ERROR: Database connection failed
    [2024-01-15 10:30:47] WARN: Retrying connection...
    """;

Pattern p = Pattern.compile("\\[([^\\]]+)\\]\\s+(DEBUG|INFO|WARN|ERROR):\\s+(.+)");
Matcher m = p.matcher(log);

while (m.find()) {
    String timestamp = m.group(1);
    String level = m.group(2);
    String message = m.group(3);
    
    System.out.println("[" + level + "] " + message + " (" + timestamp + ")");
}

Example 3: Text Formatting

String text = "This_is_a_test_string";

// Convert underscores to spaces
String formatted = text.replace("_", " ");
System.out.println(formatted);

// Convert camelCase to spaces
String camelCase = "thisIsATest";
String spaced = camelCase.replaceAll("([a-z])([A-Z])", "$1 $2");
System.out.println(spaced);  // "this Is A Test"

// Capitalize first letter of each word
String words = "hello world";
String capitalized = Arrays.stream(words.split("\\s"))
    .map(w -> w.substring(0, 1).toUpperCase() + w.substring(1))
    .reduce((a, b) -> a + " " + b)
    .orElse("");
System.out.println(capitalized);  // "Hello World"

Summary

In this guide, you learned:

  • ✅ Pattern and Matcher classes
  • ✅ Basic regex patterns and metacharacters
  • ✅ Character classes
  • ✅ Quantifiers (greedy vs lazy)
  • ✅ Anchors for position matching
  • ✅ Groups and capturing
  • ✅ Named groups and backreferences
  • ✅ Common regex patterns for validation
  • ✅ String methods with regex
  • ✅ Practical examples

Related Guides


Master regex and become a text processing pro! 🔍

Clone this wiki locally