Skip to content

Re2j runs 5x slower than java.util.regex #162

@YuyuZha0

Description

@YuyuZha0

Here is my benchmark:

import org.apache.commons.lang3.RandomStringUtils;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;

import java.util.Iterator;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

@Warmup(iterations = 2, time = 3)
@Measurement(iterations = 2, time = 3)
@State(Scope.Thread)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Threads(3)
@Fork(1)
/*
 * <pre>
 *     Benchmark                Mode  Cnt     Score   Error  Units
 * Re2jBenchmark.javaMatch  avgt    2   175.926          ns/op
 * Re2jBenchmark.re2jMatch  avgt    2  1067.591          ns/op
 *     <pre/>
 */
public class Re2jBenchmark {

  private static final String PATTERN = "[0-5][0-6]{1,3}[1-7]{1,2}[2580]{1,3}[127]{0,8}";
  private static final Pattern p1 = Pattern.compile(PATTERN);
  private static final com.google.re2j.Pattern p2 = com.google.re2j.Pattern.compile(PATTERN);

  private List<String> inputs;

  private Iterator<String> iterator;

  @Setup
  public void setup() {
    inputs =
        IntStream.rangeClosed(0, 999)
            .mapToObj(n -> RandomStringUtils.randomNumeric(32))
            .collect(Collectors.toList());
    iterator = inputs.listIterator();
  }

  private String nextInput() {
    if (!iterator.hasNext()) {
      iterator = inputs.listIterator();
    }
    return iterator.next();
  }

  @Benchmark
  public boolean javaMatch() {
    return p1.matcher(nextInput()).find();
  }

  @Benchmark
  public boolean re2jMatch() {
    return p2.matcher(nextInput()).find();
  }
}

here is the result:

Benchmark                Mode  Cnt     Score   Error  Units
Re2jBenchmark.javaMatch  avgt    2   175.926          ns/op
Re2jBenchmark.re2jMatch  avgt    2  1067.591          ns/op

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions