summaryrefslogtreecommitdiff
path: root/iter.go
diff options
context:
space:
mode:
Diffstat (limited to 'iter.go')
-rw-r--r--iter.go471
1 files changed, 471 insertions, 0 deletions
diff --git a/iter.go b/iter.go
new file mode 100644
index 0000000..e765378
--- /dev/null
+++ b/iter.go
@@ -0,0 +1,471 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package iter provides basic definitions and operations related to
+iterators over sequences.
+
+# Iterators
+
+An iterator is a function that passes successive elements of a
+sequence to a callback function, conventionally named yield.
+The function stops either when the sequence is finished or
+when yield returns false, indicating to stop the iteration early.
+This package defines [Seq] and [Seq2]
+(pronounced like seek—the first syllable of sequence)
+as shorthands for iterators that pass 1 or 2 values per sequence element
+to yield:
+
+ type (
+ Seq[V any] func(yield func(V) bool)
+ Seq2[K, V any] func(yield func(K, V) bool)
+ )
+
+Seq2 represents a sequence of paired values, conventionally key-value
+or index-value pairs.
+
+Yield returns true if the iterator should continue with the next
+element in the sequence, false if it should stop.
+
+For instance, [maps.Keys] returns an iterator that produces the sequence
+of keys of the map m, implemented as follows:
+
+ func Keys[Map ~map[K]V, K comparable, V any](m Map) iter.Seq[K] {
+ return func(yield func(K) bool) {
+ for k := range m {
+ if !yield(k) {
+ return
+ }
+ }
+ }
+ }
+
+Further examples can be found in [The Go Blog: Range Over Function Types].
+
+Iterator functions are most often called by a [range loop], as in:
+
+ func PrintAll[V any](seq iter.Seq[V]) {
+ for v := range seq {
+ fmt.Println(v)
+ }
+ }
+
+# Naming Conventions
+
+Iterator functions and methods are named for the sequence being walked:
+
+ // All returns an iterator over all elements in s.
+ func (s *Set[V]) All() iter.Seq[V]
+
+The iterator method on a collection type is conventionally named All,
+because it iterates a sequence of all the values in the collection.
+
+For a type containing multiple possible sequences, the iterator's name
+can indicate which sequence is being provided:
+
+ // Cities returns an iterator over the major cities in the country.
+ func (c *Country) Cities() iter.Seq[*City]
+
+ // Languages returns an iterator over the official spoken languages of the country.
+ func (c *Country) Languages() iter.Seq[string]
+
+If an iterator requires additional configuration, the constructor function
+can take additional configuration arguments:
+
+ // Scan returns an iterator over key-value pairs with min ≤ key ≤ max.
+ func (m *Map[K, V]) Scan(min, max K) iter.Seq2[K, V]
+
+ // Split returns an iterator over the (possibly-empty) substrings of s
+ // separated by sep.
+ func Split(s, sep string) iter.Seq[string]
+
+When there are multiple possible iteration orders, the method name may
+indicate that order:
+
+ // All returns an iterator over the list from head to tail.
+ func (l *List[V]) All() iter.Seq[V]
+
+ // Backward returns an iterator over the list from tail to head.
+ func (l *List[V]) Backward() iter.Seq[V]
+
+ // Preorder returns an iterator over all nodes of the syntax tree
+ // beneath (and including) the specified root, in depth-first preorder,
+ // visiting a parent node before its children.
+ func Preorder(root Node) iter.Seq[Node]
+
+# Single-Use Iterators
+
+Most iterators provide the ability to walk an entire sequence:
+when called, the iterator does any setup necessary to start the
+sequence, then calls yield on successive elements of the sequence,
+and then cleans up before returning. Calling the iterator again
+walks the sequence again.
+
+Some iterators break that convention, providing the ability to walk a
+sequence only once. These “single-use iterators” typically report values
+from a data stream that cannot be rewound to start over.
+Calling the iterator again after stopping early may continue the
+stream, but calling it again after the sequence is finished will yield
+no values at all. Doc comments for functions or methods that return
+single-use iterators should document this fact:
+
+ // Lines returns an iterator over lines read from r.
+ // It returns a single-use iterator.
+ func (r *Reader) Lines() iter.Seq[string]
+
+# Pulling Values
+
+Functions and methods that accept or return iterators
+should use the standard [Seq] or [Seq2] types, to ensure
+compatibility with range loops and other iterator adapters.
+The standard iterators can be thought of as “push iterators”, which
+push values to the yield function.
+
+Sometimes a range loop is not the most natural way to consume values
+of the sequence. In this case, [Pull] converts a standard push iterator
+to a “pull iterator”, which can be called to pull one value at a time
+from the sequence. [Pull] starts an iterator and returns a pair
+of functions—next and stop—which return the next value from the iterator
+and stop it, respectively.
+
+For example:
+
+ // Pairs returns an iterator over successive pairs of values from seq.
+ func Pairs[V any](seq iter.Seq[V]) iter.Seq2[V, V] {
+ return func(yield func(V, V) bool) {
+ next, stop := iter.Pull(seq)
+ defer stop()
+ for {
+ v1, ok1 := next()
+ if !ok1 {
+ return
+ }
+ v2, ok2 := next()
+ // If ok2 is false, v2 should be the
+ // zero value; yield one last pair.
+ if !yield(v1, v2) {
+ return
+ }
+ if !ok2 {
+ return
+ }
+ }
+ }
+ }
+
+If clients do not consume the sequence to completion, they must call stop,
+which allows the iterator function to finish and return. As shown in
+the example, the conventional way to ensure this is to use defer.
+
+# Standard Library Usage
+
+A few packages in the standard library provide iterator-based APIs,
+most notably the [maps] and [slices] packages.
+For example, [maps.Keys] returns an iterator over the keys of a map,
+while [slices.Sorted] collects the values of an iterator into a slice,
+sorts them, and returns the slice, so to iterate over the sorted keys of a map:
+
+ for _, key := range slices.Sorted(maps.Keys(m)) {
+ ...
+ }
+
+# Mutation
+
+Iterators provide only the values of the sequence, not any direct way
+to modify it. If an iterator wishes to provide a mechanism for modifying
+a sequence during iteration, the usual approach is to define a position type
+with the extra operations and then provide an iterator over positions.
+
+For example, a tree implementation might provide:
+
+ // Positions returns an iterator over positions in the sequence.
+ func (t *Tree[V]) Positions() iter.Seq[*Pos]
+
+ // A Pos represents a position in the sequence.
+ // It is only valid during the yield call it is passed to.
+ type Pos[V any] struct { ... }
+
+ // Pos returns the value at the cursor.
+ func (p *Pos[V]) Value() V
+
+ // Delete deletes the value at this point in the iteration.
+ func (p *Pos[V]) Delete()
+
+ // Set changes the value v at the cursor.
+ func (p *Pos[V]) Set(v V)
+
+And then a client could delete boring values from the tree using:
+
+ for p := range t.Positions() {
+ if boring(p.Value()) {
+ p.Delete()
+ }
+ }
+
+[The Go Blog: Range Over Function Types]: https://go.dev/blog/range-functions
+[range loop]: https://go.dev/ref/spec#For_range
+*/
+package iter
+
+import (
+ "internal/race"
+ "runtime"
+ "unsafe"
+)
+
+// Seq is an iterator over sequences of individual values.
+// When called as seq(yield), seq calls yield(v) for each value v in the sequence,
+// stopping early if yield returns false.
+// See the [iter] package documentation for more details.
+type Seq[V any] func(yield func(V) bool)
+
+// Seq2 is an iterator over sequences of pairs of values, most commonly key-value pairs.
+// When called as seq(yield), seq calls yield(k, v) for each pair (k, v) in the sequence,
+// stopping early if yield returns false.
+// See the [iter] package documentation for more details.
+type Seq2[K, V any] func(yield func(K, V) bool)
+
+type coro struct{}
+
+//go:linkname newcoro runtime.newcoro
+func newcoro(func(*coro)) *coro
+
+//go:linkname coroswitch runtime.coroswitch
+func coroswitch(*coro)
+
+// Pull converts the “push-style” iterator sequence seq
+// into a “pull-style” iterator accessed by the two functions
+// next and stop.
+//
+// Next returns the next value in the sequence
+// and a boolean indicating whether the value is valid.
+// When the sequence is over, next returns the zero V and false.
+// It is valid to call next after reaching the end of the sequence
+// or after calling stop. These calls will continue
+// to return the zero V and false.
+//
+// Stop ends the iteration. It must be called when the caller is
+// no longer interested in next values and next has not yet
+// signaled that the sequence is over (with a false boolean return).
+// It is valid to call stop multiple times and when next has
+// already returned false. Typically, callers should “defer stop()”.
+//
+// It is an error to call next or stop from multiple goroutines
+// simultaneously.
+//
+// If the iterator panics during a call to next (or stop),
+// then next (or stop) itself panics with the same value.
+func Pull[V any](seq Seq[V]) (next func() (V, bool), stop func()) {
+ var (
+ v V
+ ok bool
+ done bool
+ yieldNext bool
+ racer int
+ panicValue any
+ seqDone bool // to detect Goexit
+ )
+ c := newcoro(func(c *coro) {
+ race.Acquire(unsafe.Pointer(&racer))
+ if done {
+ race.Release(unsafe.Pointer(&racer))
+ return
+ }
+ yield := func(v1 V) bool {
+ if done {
+ return false
+ }
+ if !yieldNext {
+ panic("iter.Pull: yield called again before next")
+ }
+ yieldNext = false
+ v, ok = v1, true
+ race.Release(unsafe.Pointer(&racer))
+ coroswitch(c)
+ race.Acquire(unsafe.Pointer(&racer))
+ return !done
+ }
+ // Recover and propagate panics from seq.
+ defer func() {
+ if p := recover(); p != nil {
+ panicValue = p
+ } else if !seqDone {
+ panicValue = goexitPanicValue
+ }
+ done = true // Invalidate iterator
+ race.Release(unsafe.Pointer(&racer))
+ }()
+ seq(yield)
+ var v0 V
+ v, ok = v0, false
+ seqDone = true
+ })
+ next = func() (v1 V, ok1 bool) {
+ race.Write(unsafe.Pointer(&racer)) // detect races
+
+ if done {
+ return
+ }
+ if yieldNext {
+ panic("iter.Pull: next called again before yield")
+ }
+ yieldNext = true
+ race.Release(unsafe.Pointer(&racer))
+ coroswitch(c)
+ race.Acquire(unsafe.Pointer(&racer))
+
+ // Propagate panics and goexits from seq.
+ if panicValue != nil {
+ if panicValue == goexitPanicValue {
+ // Propagate runtime.Goexit from seq.
+ runtime.Goexit()
+ } else {
+ panic(panicValue)
+ }
+ }
+ return v, ok
+ }
+ stop = func() {
+ race.Write(unsafe.Pointer(&racer)) // detect races
+
+ if !done {
+ done = true
+ race.Release(unsafe.Pointer(&racer))
+ coroswitch(c)
+ race.Acquire(unsafe.Pointer(&racer))
+
+ // Propagate panics and goexits from seq.
+ if panicValue != nil {
+ if panicValue == goexitPanicValue {
+ // Propagate runtime.Goexit from seq.
+ runtime.Goexit()
+ } else {
+ panic(panicValue)
+ }
+ }
+ }
+ }
+ return next, stop
+}
+
+// Pull2 converts the “push-style” iterator sequence seq
+// into a “pull-style” iterator accessed by the two functions
+// next and stop.
+//
+// Next returns the next pair in the sequence
+// and a boolean indicating whether the pair is valid.
+// When the sequence is over, next returns a pair of zero values and false.
+// It is valid to call next after reaching the end of the sequence
+// or after calling stop. These calls will continue
+// to return a pair of zero values and false.
+//
+// Stop ends the iteration. It must be called when the caller is
+// no longer interested in next values and next has not yet
+// signaled that the sequence is over (with a false boolean return).
+// It is valid to call stop multiple times and when next has
+// already returned false. Typically, callers should “defer stop()”.
+//
+// It is an error to call next or stop from multiple goroutines
+// simultaneously.
+//
+// If the iterator panics during a call to next (or stop),
+// then next (or stop) itself panics with the same value.
+func Pull2[K, V any](seq Seq2[K, V]) (next func() (K, V, bool), stop func()) {
+ var (
+ k K
+ v V
+ ok bool
+ done bool
+ yieldNext bool
+ racer int
+ panicValue any
+ seqDone bool
+ )
+ c := newcoro(func(c *coro) {
+ race.Acquire(unsafe.Pointer(&racer))
+ if done {
+ race.Release(unsafe.Pointer(&racer))
+ return
+ }
+ yield := func(k1 K, v1 V) bool {
+ if done {
+ return false
+ }
+ if !yieldNext {
+ panic("iter.Pull2: yield called again before next")
+ }
+ yieldNext = false
+ k, v, ok = k1, v1, true
+ race.Release(unsafe.Pointer(&racer))
+ coroswitch(c)
+ race.Acquire(unsafe.Pointer(&racer))
+ return !done
+ }
+ // Recover and propagate panics from seq.
+ defer func() {
+ if p := recover(); p != nil {
+ panicValue = p
+ } else if !seqDone {
+ panicValue = goexitPanicValue
+ }
+ done = true // Invalidate iterator.
+ race.Release(unsafe.Pointer(&racer))
+ }()
+ seq(yield)
+ var k0 K
+ var v0 V
+ k, v, ok = k0, v0, false
+ seqDone = true
+ })
+ next = func() (k1 K, v1 V, ok1 bool) {
+ race.Write(unsafe.Pointer(&racer)) // detect races
+
+ if done {
+ return
+ }
+ if yieldNext {
+ panic("iter.Pull2: next called again before yield")
+ }
+ yieldNext = true
+ race.Release(unsafe.Pointer(&racer))
+ coroswitch(c)
+ race.Acquire(unsafe.Pointer(&racer))
+
+ // Propagate panics and goexits from seq.
+ if panicValue != nil {
+ if panicValue == goexitPanicValue {
+ // Propagate runtime.Goexit from seq.
+ runtime.Goexit()
+ } else {
+ panic(panicValue)
+ }
+ }
+ return k, v, ok
+ }
+ stop = func() {
+ race.Write(unsafe.Pointer(&racer)) // detect races
+
+ if !done {
+ done = true
+ race.Release(unsafe.Pointer(&racer))
+ coroswitch(c)
+ race.Acquire(unsafe.Pointer(&racer))
+
+ // Propagate panics and goexits from seq.
+ if panicValue != nil {
+ if panicValue == goexitPanicValue {
+ // Propagate runtime.Goexit from seq.
+ runtime.Goexit()
+ } else {
+ panic(panicValue)
+ }
+ }
+ }
+ }
+ return next, stop
+}
+
+// goexitPanicValue is a sentinel value indicating that an iterator
+// exited via runtime.Goexit.
+var goexitPanicValue any = new(int)