-
-
Notifications
You must be signed in to change notification settings - Fork 107
Block-based immutable list implementation (GSoC proposal) #809
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
261b997
988d115
1113036
0d9ade1
440066d
c1fcf44
163dd9b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,167 @@ | ||
| /* | ||
| * Copyright (c) 2015 Typelevel | ||
| * | ||
| * Permission is hereby granted, free of charge, to any person obtaining a copy of | ||
| * this software and associated documentation files (the "Software"), to deal in | ||
| * the Software without restriction, including without limitation the rights to | ||
| * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of | ||
| * the Software, and to permit persons to whom the Software is furnished to do so, | ||
| * subject to the following conditions: | ||
| * | ||
| * The above copyright notice and this permission notice shall be included in all | ||
| * copies or substantial portions of the Software. | ||
| * | ||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | ||
| * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | ||
| * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | ||
| * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
| * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| */ | ||
|
|
||
| package cats.collections.bench | ||
|
|
||
| import cats.collections.{BlockedList, FastBlockedList} | ||
| import org.openjdk.jmh.annotations.* | ||
|
|
||
| import java.util.concurrent.TimeUnit | ||
|
|
||
| @State(Scope.Thread) | ||
| @BenchmarkMode(Array(Mode.AverageTime)) | ||
| @OutputTimeUnit(TimeUnit.NANOSECONDS) | ||
| class BlockedListBenchmark { | ||
|
|
||
| /** | ||
| * Block size under test. Stored per-node in both implementations. | ||
| */ | ||
| @Param(Array("4", "8", "16", "32", "64")) | ||
| var blockSize: Int = _ | ||
|
|
||
| var preparedBlockedList: BlockedList[Int] = _ | ||
| // var preparedFastBlockedList: FastBlockedList[Int] = _ | ||
| var preparedScalaList: List[Int] = _ | ||
|
|
||
| /** | ||
| * Number of elements used in all benchmarks. | ||
| */ | ||
| final val ListSize = 10000 | ||
|
|
||
| @Setup(Level.Trial) | ||
| def setup(): Unit = { | ||
| preparedBlockedList = BlockedList[Int](List.range(1, 10000))(blockSize) | ||
| // preparedFastBlockedList = FastBlockedList[Int](List.range(1, 10000))(blockSize) | ||
| preparedScalaList = List.range(1, 10000) | ||
|
|
||
| } | ||
|
|
||
| def listUncons[A](lst: List[A]): Option[(A, List[A])] = lst match { | ||
| case ::(head, next) => Some((head, next)) | ||
| case Nil => None | ||
| } | ||
|
|
||
| // ----------------------------------------------- Prepend ----------------------------- | ||
| @Benchmark | ||
| def copyOnWritePrepend(): BlockedList[Int] = { | ||
| var list = BlockedList.empty[Int](blockSize) | ||
| var i = 1 | ||
| while (i <= ListSize) { | ||
| list = list.prepend(i) | ||
| i += 1 | ||
| } | ||
| list | ||
| } | ||
|
|
||
| @Benchmark | ||
| def scalaListPrepend(): List[Int] = { | ||
| var list = List.empty[Int] | ||
| var i = 1 | ||
| while (i <= ListSize) { | ||
| list = i :: list | ||
| i += 1 | ||
| } | ||
| list | ||
| } | ||
|
|
||
| // ---------------------------------------------- unCons ------------------------------------------------ | ||
| @Benchmark | ||
| def copyOnWriteUncons(): Unit = { | ||
| var result = preparedBlockedList.uncons | ||
| while (result.isDefined) { | ||
| result = result.get._2.uncons | ||
| } | ||
| } | ||
|
|
||
| @Benchmark | ||
| def scalaListUncons(): Unit = { | ||
| var result = listUncons(preparedScalaList) | ||
| while (result.isDefined) { | ||
| result = listUncons(result.get._2) | ||
| } | ||
| } | ||
|
|
||
| // ---------------------------------------------- tail ------------------------------------------------ | ||
|
|
||
| @Benchmark | ||
| def copyOnTail(): Unit = { | ||
| var result = preparedBlockedList.tailE | ||
| while (!result.isEmpty) { | ||
| result = result.tailE | ||
| } | ||
| } | ||
|
|
||
| @Benchmark | ||
| def scalaListTail(): Unit = { | ||
| var result = preparedScalaList | ||
| while (result.nonEmpty) { | ||
| result = result.tail | ||
| } | ||
| } | ||
|
|
||
| // -------------------------- ForeEach ------------------------------- | ||
|
|
||
| @Benchmark | ||
| def copyOnWriteForEach(): Long = { | ||
| var sum = 0L | ||
| preparedBlockedList.forEach((a: Int) => sum += a) | ||
| sum | ||
| } | ||
|
|
||
| @Benchmark | ||
| def scalaListForeach(): Long = { | ||
| var sum = 0L | ||
| preparedScalaList.foreach(a => sum += a) | ||
| sum | ||
| } | ||
|
|
||
| // ----------------------------------- FoldLeft --------------------------------- | ||
|
|
||
| @Benchmark | ||
| def copyOnWriteFoldLeft(): Long = { | ||
| preparedBlockedList.foldLeft(0L)((acc, elem) => acc + elem) | ||
|
|
||
| } | ||
|
|
||
| @Benchmark | ||
| def scalaListFoldLeft(): Long = { | ||
| preparedScalaList.foldLeft(0L)((acc, a) => acc + a) | ||
| } | ||
|
|
||
| // ----------------------------------- Map --------------------------------- | ||
|
|
||
| @Benchmark | ||
| def blockedListMap(): BlockedList[Int] = { | ||
| preparedBlockedList.map(_ + 1) | ||
| } | ||
|
|
||
| @Benchmark | ||
| def blockedListMap2expirement(): BlockedList[Int] = { | ||
| preparedBlockedList.map2expirement(_ + 1) | ||
| } | ||
|
|
||
| @Benchmark | ||
| def scalaListMap(): List[Int] = { | ||
| preparedScalaList.map(_ + 1) | ||
| } | ||
| } | ||
|
|
||
| // ----------------------------------- ---------------------------- --------------------------------- |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,225 @@ | ||
| /* | ||
| * Copyright (c) 2015 Typelevel | ||
| * | ||
| * Permission is hereby granted, free of charge, to any person obtaining a copy of | ||
| * this software and associated documentation files (the "Software"), to deal in | ||
| * the Software without restriction, including without limitation the rights to | ||
| * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of | ||
| * the Software, and to permit persons to whom the Software is furnished to do so, | ||
| * subject to the following conditions: | ||
| * | ||
| * The above copyright notice and this permission notice shall be included in all | ||
| * copies or substantial portions of the Software. | ||
| * | ||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | ||
| * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | ||
| * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | ||
| * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
| * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| */ | ||
|
|
||
| package cats.collections | ||
|
|
||
| import cats.Eval | ||
|
|
||
| import java.util.NoSuchElementException | ||
| import scala.annotation.tailrec | ||
|
|
||
| sealed trait BlockedList[+T] { | ||
| def uncons[A >: T]: Option[(A, BlockedList[A])] | ||
| def prepend[A >: T](a: A): BlockedList[A] | ||
| def tailE: BlockedList[T] | ||
| def forEach[U](f: T => U): Unit | ||
| def foldLeft[B](start: B)(f: (B, T) => B): B | ||
| def map[B](f: T => B): BlockedList[B] | ||
| def reverse: BlockedList[T] | ||
| def map2expirement[B](f: T => B): BlockedList[B] | ||
| def isEmpty: Boolean | ||
| } | ||
|
|
||
| object BlockedList { | ||
|
|
||
| def apply[A](elements: A*)(BlockSize: Int): BlockedList[A] = { | ||
| elements.foldRight(empty[A](BlockSize))((elem, acc) => acc.prepend(elem)) | ||
| } | ||
| def apply[A](elements: List[A])(BlockSize: Int): BlockedList[A] = { | ||
| elements.foldRight(empty[A](BlockSize))((elem, acc) => acc.prepend(elem)) | ||
| } | ||
| def empty[A](BlockSize: Int): BlockedList[A] = Empty(BlockSize) | ||
|
|
||
| final case class Empty(BlockSize: Int) extends BlockedList[Nothing] { | ||
|
|
||
| override def uncons[A >: Nothing]: Option[(A, BlockedList[A])] = None | ||
|
|
||
| override def prepend[A >: Nothing](a: A): BlockedList[A] = { | ||
| val arrayBlock = new Array[Any](BlockSize) | ||
| val offset = BlockSize - 1 | ||
| arrayBlock(offset) = a | ||
| Impl(offset, arrayBlock, this, BlockSize) | ||
| } | ||
|
|
||
| override def isEmpty: Boolean = true | ||
|
|
||
| override def forEach[U](f: Nothing => U): Unit = () | ||
|
|
||
| override def foldLeft[B](start: B)(f: (B, Nothing) => B): B = start | ||
|
|
||
| override def map[B](f: Nothing => B): BlockedList[B] = this | ||
|
|
||
| override def map2expirement[B](f: Nothing => B): BlockedList[B] = this | ||
|
|
||
| override def reverse: BlockedList[Nothing] = this | ||
|
|
||
| override def tailE: BlockedList[Nothing] = throw new NoSuchElementException() | ||
| } | ||
|
|
||
| final case class Impl[+T](offset: Int, block: Array[Any], tail: BlockedList[T], BlockSize: Int) | ||
| extends BlockedList[T] { | ||
| @inline | ||
| override def uncons[A >: T]: Option[(A, BlockedList[A])] = { | ||
| val next = if (offset + 1 < BlockSize) { | ||
| Impl(offset + 1, block, tail, BlockSize) | ||
| } else { | ||
| tail | ||
| } | ||
| Some((block(offset).asInstanceOf[A], next)) | ||
| } | ||
|
|
||
| override def prepend[A >: T](a: A): BlockedList[A] = { | ||
| val newArray = new Array[Any](BlockSize) | ||
| if (offset > 0) { | ||
| System.arraycopy(block, offset, newArray, offset, BlockSize - offset) | ||
| val nextOffset = offset - 1 | ||
| newArray(nextOffset) = a | ||
| Impl(nextOffset, newArray, tail, BlockSize) | ||
| } else { | ||
| val newOffset = BlockSize - 1 | ||
| newArray(newOffset) = a | ||
| Impl(newOffset, newArray, this, BlockSize) | ||
| } | ||
| } | ||
|
|
||
| override def isEmpty: Boolean = false | ||
|
|
||
| override def forEach[U](f: T => U): Unit = { | ||
Zayd-R marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| @tailrec | ||
| def helper(acc: BlockedList[T]): Unit = { | ||
| acc match { | ||
| case Impl(offset, block, tail, bs) => | ||
| var i = offset | ||
| while (i < bs) { | ||
| f(block(i).asInstanceOf[T]) | ||
| i += 1 | ||
| } | ||
| helper(tail) | ||
|
|
||
| case Empty(bs) => () | ||
| } | ||
| } | ||
| helper(this) | ||
| } | ||
|
|
||
| override def foldLeft[B](start: B)(f: (B, T) => B): B = { | ||
Zayd-R marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| @tailrec | ||
| def helper(finalAcc: B, remainList: BlockedList[T]): B = { | ||
| remainList match { | ||
|
|
||
| case Impl(offset, block, tail, bs) => | ||
| var acc = finalAcc | ||
| var i = offset | ||
| while (i < bs) { | ||
| acc = f(acc, block(i).asInstanceOf[T]) | ||
| i += 1 | ||
| } | ||
| helper(acc, tail) | ||
|
|
||
| case Empty(bs) => finalAcc | ||
| } | ||
| } | ||
| helper(start, this) | ||
| } | ||
|
|
||
| override def map[B](f: T => B): BlockedList[B] = { | ||
Zayd-R marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| @tailrec | ||
| def helper(curent: BlockedList[T], acc: BlockedList[B]): BlockedList[B] = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think a faster approach would be to build a So the recursion would work on: this way we don't have to reverse the blocks themselves, just the order they came in.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So we would be doing two passes and one time building of
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, but the current
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. while working on this I have a question to clarify things, did u mean we are collecting the blocks And the offset per
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. I forgot about the offset. |
||
| curent match { | ||
|
|
||
| case Impl(offset, block, tail, bs) => | ||
| val arrayCopy = new Array[Any](BlockSize) | ||
| var i = offset | ||
| while (i < bs) { | ||
| arrayCopy(i) = f(block(i).asInstanceOf[T]) | ||
| i += 1 | ||
| } | ||
| helper(tail, Impl(offset, arrayCopy, acc, bs)) | ||
|
|
||
| case Empty(bs) => acc | ||
| } | ||
| } | ||
|
|
||
| helper(this, Empty(BlockSize)).reverse | ||
|
|
||
| } | ||
|
|
||
| override def map2expirement[B](f: T => B): BlockedList[B] = { | ||
| def helper(blocks: List[(Int, Array[Any])], acc: BlockedList[T]): BlockedList[B] = acc match { | ||
| case Impl(offset, block, tail, bs) => | ||
| val arrayCopy = new Array[Any](bs) | ||
| var i = offset | ||
| while (i < bs) { | ||
| arrayCopy(i) = f(block(i).asInstanceOf[T]) | ||
| i += 1 | ||
| } | ||
| helper((offset, arrayCopy) :: blocks, tail) | ||
|
|
||
| case Empty(bs) => | ||
| blocks.foldLeft(BlockedList.empty(bs)) { case (blockListAcc, (perNodeOffset, arrayBlock)) => | ||
| Impl(perNodeOffset, arrayBlock, blockListAcc, bs) | ||
| } | ||
| } | ||
| helper(Nil, this) | ||
| } | ||
|
|
||
| // def map2expirement[B](f: T => B): BlockedList[B] = { | ||
| // def helper(curent: BlockedList[T], acc: BlockedList[B] => BlockedList[B]): BlockedList[B] = { | ||
| // curent match { | ||
| // case Impl(offset, block, tail, bs) => | ||
| // val arrayCopy = new Array[Any](BlockSize) | ||
| // var i = offset | ||
| // while(i < bs) { | ||
| // arrayCopy(i) = f( block(i).asInstanceOf[T] ) | ||
| // i += 1 | ||
| // } | ||
| // helper(tail, ( (rest: BlockedList[B]) => acc(Impl(offset, arrayCopy, rest, bs)) )) | ||
| // | ||
| // case Empty(bs) => acc(empty(bs)) | ||
| // } | ||
| // } | ||
| // helper(this, identity) | ||
| // } | ||
|
|
||
| override def reverse: BlockedList[T] = { | ||
| @tailrec | ||
| def helper(curent: BlockedList[T], acc: BlockedList[T]): BlockedList[T] = { | ||
| curent match { | ||
| case Impl(offset, block, tail, bs) => | ||
| helper(tail, Impl(offset, block, acc, bs)) | ||
|
|
||
| case Empty(bs) => acc | ||
| } | ||
| } | ||
| helper(this, Empty(BlockSize)) | ||
| } | ||
|
|
||
| override def tailE: BlockedList[T] = { | ||
| if (offset + 1 < BlockSize) { | ||
| Impl(offset + 1, block, tail, BlockSize) | ||
| } else { | ||
| tail | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it's a hypothesis that always allocating the same size could be more efficient for the GC (since it will have many identical arrays to reuse potentially), but that is just a guess. An alternative would be that we allocate only as big of an Array as we would need, up to a maximum size. That may improve performance in practice.