Code archives/Miscellaneous/Simple Expression Compiler

This code has been declared by its author to be Public Domain code.

Download source code

Simple Expression Compiler by GW2014
I knocked this up yesterday and thought I would share.
The code parses basic style expressions and translates to x86 assembly.
I'm not sure if the generated asm is correct or not.



The Lexer
Rem
	Simple Expression compiler v0.1 by Aaron Woodard, Jan 2014 [admin@...]
	TODO: 
		Add 'else' clause to 'if'
		Check if the generated asm is even remotely correct..
		better error msgs
EndRem
Const TOK_NUMBER%=1
Const TOK_FLOAT%=2
Const TOK_PLUS%=3
Const TOK_MINUS%=4
Const TOK_MUL%=5
Const TOK_DIV%=6
Const TOK_IDENT%=7
Const TOK_EQUALS%=8
Const TOK_DBLEQUALS%=9
Const TOK_LPAREN%=10
Const TOK_RPAREN%=11
Const TOK_LBRAC%=12
Const TOK_RBRAC%=13
Const TOK_STRING%=14
Const TOK_VAR%=15
Const TOK_SEMICOL%=16
Const TOK_IF%=17
Const TOK_ELSE%=18
Const TOK_ENDIF%=19
Const TOK_LT%=20
Const TOK_GT%=21
Const TOK_LTE%=22
Const TOK_GTE%=23
Const TOK_NE%=24
Const TOK_THEN%=25 '!


Const TOK_EOF%=100


'------------------------------------------------------------------------------------------------------------------------------
Type tToken
	Field Typ%
	Field Value$
	
	Method dump()
		DebugLog("Typ:"+typ+ " > '" + Value + "'")
	End Method
	
	Function Create:tToken(_typ%,_value$)
		Local t:tToken = New tToken
		t.typ=_typ
		t.value=_value
		Return t
	End Function
End Type
'------------------------------------------------------------------------------------------------------------------------------

Global CurrIdx%=0
Global CurrToke:tToken
Global NextToke:ttoken

'------------------------------------------------------------------------------------------------------------------------------
Function Consume()
	currToke = NextToke
	NextToke= Toke()
	If currToke Then DebugLog("CT='"+currtoke.value+"'")
End Function
'------------------------------------------------------------------------------------------------------------------------------
Function Toke:tToken()
	Local t:tToken=New tToken
	Local tmp$
	
	If CurrIdx+1 > codestring.length Then 
		t.typ = TOK_EOF 
		Return t
	EndIf
	
	tmp = codestring[CurrIdx..CurrIdx+1]
	While "~t~r~n ".contains(tmp)
		CurrIdx :+ 1		
		tmp = codestring[CurrIdx..CurrIdx+1]
		If CurrIdx+1 > codestring.length Then 
			t.typ = TOK_EOF
			Return t
		EndIf 
	Wend
	
	'  NUMBER
	If "1234567890.".contains(tmp) Then
		t = LexNumber()
		If Not t Then RuntimeError("null token!")
		Return t
	EndIf
	
	' +
	If tmp = "+" Then
		CurrIdx :+ 1
		Return tToken.Create(TOK_PLUS,tmp)
	EndIf
	
	' -
	If tmp = "-" Then
		CurrIdx :+ 1
		Return tToken.Create(TOK_MINUS,tmp)
	EndIf
	
	' *
	If tmp = "*" Then
		CurrIdx :+ 1
		Return tToken.Create(TOK_MUL,tmp)
	EndIf
	
	' /
	If tmp = "/" Then
		CurrIdx :+ 1
		Return tToken.Create(TOK_DIV,tmp)
	EndIf
	
	' WORD
	
	If "abcdefghijklmnopqrstuvwxyz_".contains(tmp) Then
		t = LexWord()
		Select t.Value
			Case "var"
				Return tToken.Create(TOK_VAR,"var")
			Case "if"
				Return tToken.Create(TOK_IF,"if")
			Case "then"
				Return tToken.Create(TOK_THEN,"then")
			Case "else"
				Return tToken.Create(TOK_ELSE,"else")
			Case "endif"
				Return tToken.Create(TOK_ENDIF,"endif")
			Default
				Return t
				'Error("Unkown token '" + t.value + "'" )
		End Select
	EndIf
	
	' == or =
	If tmp = "=" Then		
		If codestring[CurrIdx..CurrIdx+2] = "==" Then
			CurrIdx :+ 2
			Return tToken.Create(TOK_DBLEQUALS,"==")
		Else
			CurrIdx :+ 1
			Return tToken.Create(TOK_EQUALS,tmp)
		EndIf
	EndIf
	
	' (
	If tmp = "(" Then
		CurrIdx :+ 1
		Return tToken.Create(TOK_LPAREN,tmp)
	EndIf
	
	' )
	If tmp = ")" Then
		CurrIdx :+ 1
		Return tToken.Create(TOK_RPAREN,tmp)
	EndIf
	
	' ;
	If tmp = ";" Then
		CurrIdx :+ 1
		Return tToken.Create(TOK_SEMICOL,tmp)
	EndIf
	
	
	' [
	If tmp = "[" Then
		CurrIdx :+ 1
		Return tToken.Create(TOK_LBRAC,tmp)
	EndIf
	
	' ]
	If tmp = "]" Then
		CurrIdx :+ 1
		Return tToken.Create(TOK_RBRAC,tmp)
	EndIf
	
	If tmp = "<" Then
		If codestring[CurrIdx..CurrIdx+2] = "<=" Then
			CurrIdx :+ 2
			Return tToken.Create(TOK_LTE,"<=")
		ElseIf codestring[CurrIdx..CurrIdx+2] = "<>" Then
			CurrIdx :+ 2
			Return tToken.Create(TOK_NE,"<>")
		Else
			CurrIdx :+ 1
			Return tToken.Create(TOK_LT,"<")
		EndIf
	EndIf
	
	If tmp = ">" Then
		If codestring[CurrIdx..CurrIdx+2] = ">=" Then
			CurrIdx :+ 2
			Return tToken.Create(TOK_GTE,">=")
		Else
			CurrIdx :+ 1
			Return tToken.Create(TOK_GT,">")
		EndIf
	EndIf

	If tmp = "~q" Then
		t=lexString()
		CurrIdx :+ 1
		Return t
	EndIf

	RuntimeError("Unknown Token '" + tmp + "'")
End Function
'------------------------------------------------------------------------------------------------------------------------------
Function Lexnumber:tToken()
	Local t:tToken=New tToken
	Local tmp$
	Local dotcount%=0
	
	While "1234567890.".contains(codestring[CurrIdx..CurrIdx+1])
		If codestring[CurrIdx..CurrIdx+1] = "." Then dotcount :+ 1
		If dotcount > 1 Then RuntimeError("Malformed number! '" + tmp + "'")
		tmp :+ codestring[CurrIdx..CurrIdx+1]
		CurrIdx :+ 1
	Wend
	t.typ = TOK_FLOAT
	t.value = tmp
	Return t
End Function
'------------------------------------------------------------------------------------------------------------------------------
Function LexWord:tToken()
	Local t:tToken=New tToken
	Local tmp$
	While "abcdefghijklmnopqrstuvwxyz_".contains(codestring[CurrIdx..CurrIdx+1])
		tmp :+ codestring[CurrIdx..CurrIdx+1]
		CurrIdx :+ 1
	Wend
	t.typ = TOK_IDENT
	t.value = tmp
	Return t
End Function
'------------------------------------------------------------------------------------------------------------------------------
Function LexString:tToken()
	Local t:tToken=New tToken
	Local tmp$
	Local endt$
	currIdx :+ 1

	Repeat
		tmp :+ codestring[CurrIdx..CurrIdx+1]
		currIdx :+ 1
	Until "~r~n~q".Contains(codestring[CurrIdx..CurrIdx+1])
	endt = codestring[CurrIdx..CurrIdx+1]	
	
	If endt = "~q" Then
		t.typ = TOK_STRING
		t.value = tmp
		Return t
	EndIf
	
	If "~r~n".contains(endt) Then
		RuntimeError("Malformed string literal '" + tmp + "'")
	EndIf
End Function
'------------------------------------------------------------------------------------------------------------------------------




Parser:


SuperStrict 
Framework brl.retro

Include "lexer.bmx"
	
Rem
	Simple Expression compiler v0.1 by Aaron Woodard, Jan 2014 [admin@...]
	TODO: 
		Add 'else' clause to 'if'
		Check if the generated asm is even remotely correct..
		better error msgs
EndRem



Global CodeString$	
Global num_numbers#[]
Global vars$[]
Global cLabels%
Global asmstring$


CodeString = 	"var x;~n"
CodeString :+ 	"var myvar;~n"
CodeString :+ 	"var anothervar;~n"
CodeString :+ 	"x=0.5*2;~n"
CodeString :+ 	"if(x>1) then~n"
CodeString :+ 	"	myvar=1+2-3*4/5;~n"
CodeString :+ 	"	if(myvar==1.1) then~n"
CodeString :+ 	"		anothervar=x*(-myvar+123.456)*-0.1;~n"
CodeString :+ 	"	endif~n"
CodeString :+ 	"endif~n"



Print "~n~n"+CodeString+"~n~n"
Consume()
Consume()
Parse()

Print prolog()
Print asmstring
Print epilog()
Print datasec()







'------------------------------------------------------------------------------------------------------------------------------
Function Parse()
	While  CurrToke.typ <> TOK_EOF And (CurrToke.typ = TOK_Var Or (CurrToke.typ = TOK_IDENT And NextToke.Typ = TOK_EQUALS) Or CurrToke.Typ = TOK_IF)
		If CurrToke.typ = TOK_Var Then
			DeclareVar()
		
		ElseIf CurrToke.typ = TOK_IDENT And NextToke.Typ = TOK_EQUALS Then
			Assignment()
				
		ElseIf CurrToke.Typ = TOK_IF Then
			IfExpression()		
		EndIf
	Wend
End Function
'------------------------------------------------------------------------------------------------------------------------------
Function DeclareVar()
	Local idx%
	Local v$
	If CurrToke.typ = TOK_Var Then
		Consume()
		If CurrToke.typ = TOK_IDENT Then 
			If checkvars(Currtoke.Value) Then Error("Already Defined")
			v=Currtoke.Value
			vars :+ [v]
			Consume()
			If CurrToke.Typ <> TOK_SEMICOL Then Error("missing ';'")
			Consume()
		EndIf
	EndIf
End Function
'------------------------------------------------------------------------------------------------------------------------------
Function Assignment()
	Local idx%
	Local v$
	While CurrToke.typ = TOK_IDENT And NextToke.Typ = TOK_EQUALS 
		v=Currtoke.Value
		Consume()
		If CurrToke.typ = TOK_EQUALS Then	
			Consume()
			Expression()
			idx = getvarindex(v)
			If idx<0 Then error("invalid for var  '"+v+"'")
			addasm("	fstp dword [ebp-" + (4+idx*4) + "]		;store in "+v )
			
			If CurrToke.Typ <> TOK_SEMICOL Then Error("missing ';'")
			Consume()
		EndIf
	Wend
End Function

'------------------------------------------------------------------------------------------------------------------------------
Function Expression()
	AddExpression()
End Function
'------------------------------------------------------------------------------------------------------------------------------
Function IfExpression()
	Local labElseif$ 
	Local labEndif$
	Local cond$=""
	 
	While CurrToke.Typ = TOK_IF 
		If CurrToke.Typ = TOK_IF Then

			labEndif= MakeLabel("_endif")
			
			Consume()
			If CurrToke.Typ <> TOK_LPAREN Then
				Error("Expected '('" )
			EndIf
			Consume()
						
			CondExpression(cond)
			


			If CurrToke.Typ <> TOK_RPAREN Then
				Error("Expected ')'" )
			EndIf
			Consume()
			'Then
			If CurrToke.Typ <> TOK_THEN Then
				Error("Expected 'then'" )
			EndIf
			Consume()
			
			addasm("	fxch")
			addasm("	fucompp")
			addasm("	fnstsw ax")
			addasm("	sahf")
		
			Select cond
				Case ">"
					addasm("	setbe al")
				Case "<"
					addasm("	setae al")
				Case ">="
					addasm("	setb al")
				Case "<="
					addasm("	seta al")
				Case "=="
					addasm("	setnz al")
				Case "<>"
					addasm("	setz al")
				Default
					DebugLog("What!?")
			End Select
			
			addasm("	movzx eax,al")
			addasm("	cmp eax,0")
			addasm("	jne "+labEndif)
			
			Parse()
			
			If CurrToke.Typ <> TOK_ENDIF Then
				Error("Expected 'endif'" )
			EndIf
			addasm(labEndif)
			Consume()
		EndIf
	Wend
End Function
'------------------------------------------------------------------------------------------------------------------------------
Function CondExpression(cond$ Var)
	Expression()
	If CurrToke.Typ <> TOK_LT And CurrToke.Typ <> TOK_GT And CurrToke.Typ <> TOK_LTE And CurrToke.Typ <> TOK_GTE And CurrToke.Typ <> TOK_NE And CurrToke.Typ <> TOK_DBLEQUALS Then
		Error("Expected '<,>,<=,>=,<>,=='" )
	EndIf
	
	Select CurrToke.Value
		Case ">","<",">=","<=","==","<>"
			cond = CurrToke.Value
		Default 
			Error("Unkown token")
	End Select
	 
	Consume()
	Expression()
End Function

'------------------------------------------------------------------------------------------------------------------------------
Function AddExpression()
	If CurrToke.Typ<>TOK_MINUS And CurrToke.Typ<> TOK_LPAREN And CurrToke.Typ<> TOK_FLOAT And CurrToke.Typ<>TOK_IDENT Then Error("Expected '-' or '(' or number or ident'")
	 
	MulExpression()
	While CurrToke.Typ = TOK_PLUS 'Then
		Consume()
		MulExpression()
		addasm("	faddp ")
	Wend
	While CurrToke.Typ = TOK_MINUS 'Then
		Consume()
		MulExpression()
		addasm("	fsubp ")
	Wend
End Function

'------------------------------------------------------------------------------------------------------------------------------
Function MulExpression()
	If CurrToke.Typ<>TOK_MINUS And CurrToke.Typ<> TOK_LPAREN And CurrToke.Typ<> TOK_FLOAT And CurrToke.Typ<>TOK_IDENT Then Error("Expected '-' or '(' or number or ident'")
	Primary()
	While CurrToke.Typ = TOK_MUL 'Then
		Consume()
		Primary()
		addasm("	fmulp ")
	Wend
	While CurrToke.Typ = TOK_DIV 'Then
		Consume()
		Primary()
		addasm("	fdivp ")
	Wend
End Function

'------------------------------------------------------------------------------------------------------------------------------
Function Primary()
	Local idx%,v$
	If CurrToke.Typ<>TOK_MINUS And CurrToke.Typ<> TOK_LPAREN And CurrToke.Typ<> TOK_FLOAT And CurrToke.Typ<>TOK_IDENT Then Error("Expected '-' or '(' or number or ident'")
	While CurrToke.Typ = TOK_MINUS
		Consume()
		If CurrToke.Typ = TOK_FLOAT Then
			CurrToke.Value= String(-Float(CurrToke.Value))
		Else 
			Expression()
			addasm("	fchs")	
		End If
	Wend
	
	While CurrToke.Typ = TOK_LPAREN 		
		Consume()
		Expression()
		If CurrToke.Typ <> TOK_RPAREN Then Error("!!")
		Consume() 
	Wend
	
	While CurrToke.Typ = TOK_FLOAT 
		addasm("	fld dword [_" + addorgetnum(CurrToke.Value)+"]")
		Consume()
	Wend
	
	While CurrToke.Typ = TOK_IDENT
		v=CurrToke.Value 
			idx = getvarindex(v)
			If idx<0 Then error("unkown var '"+v+"'")
			addasm("	fld dword [ebp-"+(4+idx*4)+"]			;load '"+v+"'" )
			Consume()
		Return 
	Wend
End Function

'------------------------------------------------------------------------------------------------------------------------------
Function Error(s$)
	Print("ERR!  " + s)
	DebugStop
	End	
End Function

'------------------------------------------------------------------------------------------------------------------------------
Function dbg(s$)
	'Print("dbg:  " + s)
End Function

'------------------------------------------------------------------------------------------------------------------------------
Function FloatHex:String(num:Float) 'lendian
	Local p:Float Ptr = Varptr num
	Local bp:Byte Ptr = Int Ptr Int(p)
	Local out:String = ""
	
	out:+Right(Hex(bp[3]), 2)
	out :+ Right(Hex(bp[2]),2)
	out :+ Right(Hex(bp[1]),2)
	out :+ Right(Hex(bp[0]),2)
	Return "0x" + out
End Function 
'------------------------------------------------------------------------------------------------------------------------------
Function addorgetnum%(v$)
	For Local i% = 0 Until num_numbers.Length
		If num_numbers[i] = Float(v) Then
			Return i
		EndIf
	Next
	num_numbers :+ [Float(v)] '= AppendFArray( num_numbers,Float(v) )
	Return num_numbers.Length-1 'num_numbers[num_numbers.Length-1]
End Function

'------------------------------------------------------------------------------------------------------------------------------
Function checkvars%(v$)
	If Not vars.length Then Return False
	For Local i% = 0 Until vars.length
		If v=vars[i] Then Return True
	Next
	Return False
End Function

'------------------------------------------------------------------------------------------------------------------------------
Function getvarindex%(v$)
	For Local i% = 0 Until vars.length
		If v=vars[i] Then 
			Return i
		EndIf
	Next
	Return -1
End Function

'------------------------------------------------------------------------------------------------------------------------------
Function addasm(s$)
	asmstring :+ s + "~n"
End Function
'------------------------------------------------------------------------------------------------------------------------------
Function prolog$()
	Local tmp$
	tmp = ";------Begin------;~n_func:~n"
	tmp :+ "	push ebp~n~tmov ebp,esp~n"
	If vars.length Then
		tmp :+ "	sub esp, " + (vars.Length*4)
	EndIf 
	Return tmp
End Function	
'------------------------------------------------------------------------------------------------------------------------------
Function epilog$()
	Local tmp$
	tmp = "	mov esp,ebp~n~tpop ebp~n~tret~n;------End------;"
	Return tmp
End Function
'------------------------------------------------------------------------------------------------------------------------------
Function datasec$()
	Local tmp$
	tmp ="~nsection ~qdata~q~n"
	For Local i% = 0 Until num_numbers.length
		tmp :+ "_"+i+":~n"
		tmp :+"dd " + FloatHex(num_numbers[i]) + "		;-> "+num_numbers[i]+"~n"
		tmp :+"align 4~n"
	Next
	Return tmp
End Function
'------------------------------------------------------------------------------------------------------------------------------
Function MakeLabel$(pre$="_")
	cLabels :+ 1
	Return Pre + String(cLabels) +":"
End Function

'
'
'
'
'
---------------------------------
-=[ Here is an example input: ]=-
---------------------------------
var x;
var myvar;
var anothervar;
x=0.5*2;
if(x>1) then
	myvar=1+2-3*4/5;
	if(myvar==1.1) then
		anothervar=x*(-myvar+123.456)*-0.1;
	endif
endif

-------------------------------
-=[ and here is the output ]=-
-------------------------------

;------Begin------;
_func:
	push ebp
	mov ebp,esp
	sub esp, 12
	fld dword [_0]
	fld dword [_1]
	fmulp 
	fstp dword [ebp-4]		;store in x
	fld dword [ebp-4]			;load 'x'
	fld dword [_2]
	fxch
	fucompp
	fnstsw ax
	sahf
	setbe al
	movzx eax,al
	cmp eax,0
	jne _endif1:
	fld dword [_2]
	fld dword [_1]
	faddp 
	fld dword [_3]
	fld dword [_4]
	fmulp 
	fld dword [_5]
	fdivp 
	fsubp 
	fstp dword [ebp-8]		;store in myvar
	fld dword [ebp-8]			;load 'myvar'
	fld dword [_6]
	fxch
	fucompp
	fnstsw ax
	sahf
	setnz al
	movzx eax,al
	cmp eax,0
	jne _endif2:
	fld dword [ebp-4]			;load 'x'
	fld dword [ebp-8]			;load 'myvar'
	fld dword [_7]
	faddp 
	fchs
	fmulp 
	fld dword [_8]
	fmulp 
	fstp dword [ebp-12]		;store in anothervar
_endif2:
_endif1:

	mov esp,ebp
	pop ebp
	ret
;------End------;

section "data"
_0:
dd 0x3F000000		;-> 0.500000000
align 4
_1:
dd 0x40000000		;-> 2.00000000
align 4
_2:
dd 0x3F800000		;-> 1.00000000
align 4
_3:
dd 0x40400000		;-> 3.00000000
align 4
_4:
dd 0x40800000		;-> 4.00000000
align 4
_5:
dd 0x40A00000		;-> 5.00000000
align 4
_6:
dd 0x3F8CCCCD		;-> 1.10000002
align 4
_7:
dd 0x42F6E979		;-> 123.456001
align 4
_8:
dd 0xBDCCCCCD		;-> -0.100000001
align 4

Comments

None.

Code Archives Forum