Generators / yield

BlitzMax Forums/BlitzMax Programming/Generators / yield

Yasha

(Posted 2014) [#1]

More language extensions... this time, a Generator class.

generator.bmx:

' Generator implementation for BlitzMax

Import "generator.o"
SuperStrict

Type TGenerator Abstract
	Field _frame:Byte Ptr, _ins:Byte Ptr, _st:Int
	
	Method Yield(val:Object) Final
		GEN_Yield2 _frame, Varptr _ins, val
	End Method
	Method Done() Final
		_st = 0
	End Method
	Method Resume:Object() Final
		If _st = -1 Or _ins = Byte Ptr(0)
			_st = 1
			GEN_CalleeSave _frame ; Return Run()	'It is IMPERATIVE that nothing come between these two commands - sensitive to register allocation
		Else
			Return GEN_Resume(_frame, _ins)
		EndIf
	End Method
	Method Reset() Final
		_st = -1
	End Method
	Method Run:Object() Abstract
	
	Method New()
		_frame = MemAlloc(256)	'Technically not safe but I think it's enough (better than BRL.Reflection can manage)
		_st = -1 ; _ins = Byte Ptr(0)
	End Method
	Method Delete()
		MemFree _frame
	End Method
	Method ObjectEnumerator:TGeneratorEnumerator() Final
		Local e:TGeneratorEnumerator = New TGeneratorEnumerator ; e.this = Self ; Return e
	End Method
End Type

Type TGeneratorEnumerator Final
	Field this:TGenerator
	Method HasNext:Int() Final
		Return this._st <> 0
	End Method
	Method NextObject:Object() Final
		Return this.Resume()
	End Method
End Type

Private
Extern
Function GEN_Yield2(fr:Byte Ptr, ins:Byte Ptr, val:Object) = "GENERATOR_Yield2"
Function GEN_Resume:Object(fr:Byte Ptr, ins:Byte Ptr) = "GENERATOR_Resume"
Function GEN_CalleeSave(fr:Byte Ptr) = "GENERATOR_CalleeSave"
'Function bbRefMethodPtr:Byte Ptr(o:Object, i:Int)	'Reminder: may want to use this to bolster CalleeSave
End Extern

Global _gen:Int = _init()
Function _init:Int()
?Not x86
	RuntimeError "TGenerator does not support non-x86 platforms"
?
	Return 0
End Function
Public

generator.S (compile with 'gcc -m32 -c generator.S' to get generator.o):

#if defined(__WIN32__) || defined(__APPLE__)
# define FUNC(s) _##s
#else
# define FUNC(s) s
#endif

.text
.globl FUNC(GENERATOR_Yield2)
.globl FUNC(GENERATOR_Resume)
.globl FUNC(GENERATOR_CalleeSave)

// Also unwinds the frame of the wrapping BM func
FUNC(GENERATOR_Yield2):  //(void * frame, void ** ins, void * val)
        // Save the return pointer
	mov 4(%ebp), %eax
        mov 8(%esp), %edx
	mov %eax, (%edx)
        // Save the gen's callee-save registers
        mov 4(%esp), %edx
        mov %ebx, (%edx)
        mov %esi, 4(%edx)
        mov %edi, 8(%edx)
        // Restore the local callee-save registers
        mov 12(%edx), %ebx
        mov 16(%edx), %esi
        mov 20(%edx), %edi
        add $24, %edx
        // Save the yield value
        mov 12(%esp), %eax
        mov %eax, (%edx)  // Not keeping this
        // Pop the call to the Yield wrapper
        mov %ebp, %esp
        pop %ebp
        // Put the yield value in the now-redundant arg slots
        mov (%edx), %eax
        mov %eax, -4(%esp)
        // Save the stack frame
	mov %ebp, %ecx
        sub %esp, %ecx
        mov %ecx, (%edx)    // Save the frame size
        cmp $0, %ecx
        je _end_y
        add $4, %edx
        mov %esp, %ecx
    _top_y:
        mov (%ecx), %eax
        mov %eax, (%edx)
        add $4, %ecx
        add $4, %edx
        cmp %ebp, %ecx
        jne _top_y
    _end_y:
        // Return the Yield value
        mov -4(%esp), %eax
        mov %ebp, %esp  // return *from caller*
        pop %ebp
	ret

FUNC(GENERATOR_CalleeSave): //(void * frame)
        mov 4(%esp), %edx
        // Save local callee-save registers
        mov %ebx, 12(%edx)
        mov %esi, 16(%edx)
        mov %edi, 20(%edx)
        ret

FUNC(GENERATOR_Resume):  //(void * frame, void ** ins)
        mov 4(%esp), %edx
        push %ebp
        mov %esp, %ebp
        // Save local callee-save registers
        mov %ebx, 12(%edx)
        mov %esi, 16(%edx)
        mov %edi, 20(%edx)
        // Restore the gen's callee-save registers
        mov (%edx), %ebx
        mov 4(%edx), %esi
        mov 8(%edx), %edi
        add $24, %edx
        // Restore the stack frame pointers
        mov %ebp, %esp
        sub (%edx), %esp
        // Restore the stack frame content
        mov (%edx), %eax
        cmp $0, %eax
        je _end_r
        add $4, %edx
        mov %esp, %ecx
    _top_r:
        mov (%edx), %eax
        mov %eax, (%ecx)
        add $4, %ecx
        add $4, %edx
        cmp %ebp, %ecx
        jne _top_r
    _end_r:
        mov 12(%ebp), %eax  // Return to saved instruction
        jmp *%eax
        //

And an simple (worthless) example similar to the Python one on the wiki:

Import "generator.bmx"
SuperStrict

Type Count Extends TGenerator
	Function From:TGenerator(n:Int)
		Local g:Count = New Count ; g.n = n ; Return g
	End Function
	
	Field n:Int
	Method Run:Object()
	'	Print "starting run"
		Local x:Int = n		'Notice how we're not updating the field within the loop
		Repeat
		'	Print "loop top"
			Yield String(x)
		'	Print "restored"
			x :+ 1
		Forever
	End Method
End Type


Print "~nLooping:"
For Local i:String = EachIn Count.From(10)
'	Print "enum loop top"
	Print i
'	Print "enum loop bottom"
	If Int(i) > 15 Then Exit
Next

Print "~nManual resumption:"
Local g:TGenerator = Count.From(5)
Print g.Resume().ToString()
Print g.Resume().ToString()
Print g.Resume().ToString()

Print "~ndone."

Uncomment the Print lines to see weird control flow in action.

It struck me after beginning this that for all practical purposes, an ObjectEnumerator is a generator anyway, it just doesn't make the Yield command explicit... well, this offers an explicit Yield command that can express slightly more unreadable logic.

The example should be pretty straightforward, but anyway - to implement your own generator function, subclass TGenerator and give it a Run() method; return objects via Yield. The Run() method acts as the body of the generator function. Do not invoke Run() directly - it will probably crash your program if you do.

Assuming you're using the generator in a For-EachIn loop, the generator will continue to return objects and resume execution after Yield until either the loop kills it (with Exit, as usual), or the generator calls Done (which will tell the loop that there are no more values). If you're not using a loop (?), you can resume the generator (or start it for the first time) with Resume. The generator can also have its position reset with Reset, causing it to forget where it Yielded from and roll from the top on the next run. (Reset does not affect anything else, e.g. object fields.)

The Count.From function and the parameter field .n in the example are not integral to generators, and you can use this sort of thing, or not, as you like.

Minimally tested on OSX only so far.

Who was John Galt?

(Posted 2014) [#2]

What's the point of this? Serious question not a troll. What sort of thing is it used for?

Yasha

(Posted 2014) [#3]

Two parts to answering that.

Firstly, the technical functionality that this adds is a "Yield" keyword (I've implemented it as a method, but you should think of it as a keyword). "Yield" (in the context of generators) is like "Return" except it remembers where you Yielded from, and the function can be resumed immediately afterwards. (Coroutine yield is more complicated... and not relevant here.) So if you yield in the middle of a loop, you can resume the function and continue the rest of the loop and iterate again. (If I got this right, it should record the state of all the local variables too, minimizing the need for instance fields as locals.)

Secondly, the concept: what generators are usually for is supplying objects to a consumer - such as a loop body, shown here. The conceptual difference between an enumerator and a generator is that an enumerator, as we usually see in BlitzMax, takes objects from an existing collection; a generator is normally lazy, that is, it creates the stream of objects on-demand (as shown in the example with an infinite list of numbers). A generator is thus more general than an enumerator: it can handle infinite sequences (obviously can't be created in advance), or it can specialize each instance in response to something that happens in between (cooperative tasks), such as perhaps the loop body providing feedback ("no! you're doin' it wrong!").

The Yield command makes it easier to express the production of objects in terms of a stream, instead of as a sequence: you can write you object production as a single loop, and it looks like a production loop is passing things "up" to a function (when in fact the loop is being paused and the objects are being passed down to the consumer). That way you can have separate but interleaved producer/consumer loops. (You could envisage this in terms of two threads, except each one has to pause the other while it does its thing then hand back control, so it only needs one OS thread.)

Mechanically speaking there's nothing to stop you using ObjectEnumerator to create lazy streams, as I mentioned in the OP; but it's a little less expressive as the enumerator is always locked into looking like a higher level that has to "return" complete objects. With generators you're able to write object production as a loop, which may come closer to what you actually want to express by making it look like a production stream.

If that didn't make sense (which it probably didn't), the best I can suggest is to look at the wiki articles for Generator and Coroutine, and maybe find some Python tutorials on its version of this.

Who was John Galt?

(Posted 2014) [#4]

Thanks Yasha for the detailed explanation. I'm leaving it to sink in for a bit, then I'll be taking a look at those links.

Yasha

(Posted 2014) [#5]

I'm ashamed to admit that there was a really stupid mistake in this, which I think has now been fixed - it didn't actually save the state of all local variables (which I didn't notice because the example used a field - now it uses a local to demonstrate the ...uh, point). Worse, it corrupted others in the rest of the program! (Basically I had forgotten entirely about the existence of callee-save registers.)

Please copy the code again if you grabbed the initial version.

Derron

(Posted 2014) [#6]

I had to wait for the explanation too - so thanks @ John for asking.

@do not run "run()" directly ... what about naming such functions "_Run" so people knowing this convention know what it means without BlitzMax having the private-functionality.

@functionality
I can think of thingies like the generator doing "BeforeRun" and then "Run" so things can be done hidden/automagically.
May save Code when extending objects, overriding functions/methods which need then "super.methodname()" within to call the original function.

Other ideas how this little pearl could be used?

bye
Ron

Yasha

(Posted 2014) [#7]

'BeforeRun'? I don't follow.... This doesn't provide any obvious means to handle "silent extension" (AKA "advice"), at least not that I can see. That would be a useful feature but I don't see the connection to generators.

I was going to try to make an Aspects module next, though, if that interests you.

Derron

(Posted 2014) [#8]

"BeforeRun" ...

Something in the likes of:

for local obj:TMyObjects = eachin generator.getEach()
obj.save()
next

now that getEach calls obj.BeforeSave() before it returns the object to the for loop. So that "BeforeSave" does not get called in TMyObjects.Save() (which could be extended multiple times without calling "super.save()" in the overriden save()-function.

Now I feel kind of dumb :D

bye
Ron