Code archives/Miscellaneous/Simple Expression Compiler
This code has been declared by its author to be Public Domain code.
Download source code
| |||||
I knocked this up yesterday and thought I would share. The code parses basic style expressions and translates to x86 assembly. I'm not sure if the generated asm is correct or not. The Lexer Rem Simple Expression compiler v0.1 by Aaron Woodard, Jan 2014 [admin@...] TODO: Add 'else' clause to 'if' Check if the generated asm is even remotely correct.. better error msgs EndRem Const TOK_NUMBER%=1 Const TOK_FLOAT%=2 Const TOK_PLUS%=3 Const TOK_MINUS%=4 Const TOK_MUL%=5 Const TOK_DIV%=6 Const TOK_IDENT%=7 Const TOK_EQUALS%=8 Const TOK_DBLEQUALS%=9 Const TOK_LPAREN%=10 Const TOK_RPAREN%=11 Const TOK_LBRAC%=12 Const TOK_RBRAC%=13 Const TOK_STRING%=14 Const TOK_VAR%=15 Const TOK_SEMICOL%=16 Const TOK_IF%=17 Const TOK_ELSE%=18 Const TOK_ENDIF%=19 Const TOK_LT%=20 Const TOK_GT%=21 Const TOK_LTE%=22 Const TOK_GTE%=23 Const TOK_NE%=24 Const TOK_THEN%=25 '! Const TOK_EOF%=100 '------------------------------------------------------------------------------------------------------------------------------ Type tToken Field Typ% Field Value$ Method dump() DebugLog("Typ:"+typ+ " > '" + Value + "'") End Method Function Create:tToken(_typ%,_value$) Local t:tToken = New tToken t.typ=_typ t.value=_value Return t End Function End Type '------------------------------------------------------------------------------------------------------------------------------ Global CurrIdx%=0 Global CurrToke:tToken Global NextToke:ttoken '------------------------------------------------------------------------------------------------------------------------------ Function Consume() currToke = NextToke NextToke= Toke() If currToke Then DebugLog("CT='"+currtoke.value+"'") End Function '------------------------------------------------------------------------------------------------------------------------------ Function Toke:tToken() Local t:tToken=New tToken Local tmp$ If CurrIdx+1 > codestring.length Then t.typ = TOK_EOF Return t EndIf tmp = codestring[CurrIdx..CurrIdx+1] While "~t~r~n ".contains(tmp) CurrIdx :+ 1 tmp = codestring[CurrIdx..CurrIdx+1] If CurrIdx+1 > codestring.length Then t.typ = TOK_EOF Return t EndIf Wend ' NUMBER If "1234567890.".contains(tmp) Then t = LexNumber() If Not t Then RuntimeError("null token!") Return t EndIf ' + If tmp = "+" Then CurrIdx :+ 1 Return tToken.Create(TOK_PLUS,tmp) EndIf ' - If tmp = "-" Then CurrIdx :+ 1 Return tToken.Create(TOK_MINUS,tmp) EndIf ' * If tmp = "*" Then CurrIdx :+ 1 Return tToken.Create(TOK_MUL,tmp) EndIf ' / If tmp = "/" Then CurrIdx :+ 1 Return tToken.Create(TOK_DIV,tmp) EndIf ' WORD If "abcdefghijklmnopqrstuvwxyz_".contains(tmp) Then t = LexWord() Select t.Value Case "var" Return tToken.Create(TOK_VAR,"var") Case "if" Return tToken.Create(TOK_IF,"if") Case "then" Return tToken.Create(TOK_THEN,"then") Case "else" Return tToken.Create(TOK_ELSE,"else") Case "endif" Return tToken.Create(TOK_ENDIF,"endif") Default Return t 'Error("Unkown token '" + t.value + "'" ) End Select EndIf ' == or = If tmp = "=" Then If codestring[CurrIdx..CurrIdx+2] = "==" Then CurrIdx :+ 2 Return tToken.Create(TOK_DBLEQUALS,"==") Else CurrIdx :+ 1 Return tToken.Create(TOK_EQUALS,tmp) EndIf EndIf ' ( If tmp = "(" Then CurrIdx :+ 1 Return tToken.Create(TOK_LPAREN,tmp) EndIf ' ) If tmp = ")" Then CurrIdx :+ 1 Return tToken.Create(TOK_RPAREN,tmp) EndIf ' ; If tmp = ";" Then CurrIdx :+ 1 Return tToken.Create(TOK_SEMICOL,tmp) EndIf ' [ If tmp = "[" Then CurrIdx :+ 1 Return tToken.Create(TOK_LBRAC,tmp) EndIf ' ] If tmp = "]" Then CurrIdx :+ 1 Return tToken.Create(TOK_RBRAC,tmp) EndIf If tmp = "<" Then If codestring[CurrIdx..CurrIdx+2] = "<=" Then CurrIdx :+ 2 Return tToken.Create(TOK_LTE,"<=") ElseIf codestring[CurrIdx..CurrIdx+2] = "<>" Then CurrIdx :+ 2 Return tToken.Create(TOK_NE,"<>") Else CurrIdx :+ 1 Return tToken.Create(TOK_LT,"<") EndIf EndIf If tmp = ">" Then If codestring[CurrIdx..CurrIdx+2] = ">=" Then CurrIdx :+ 2 Return tToken.Create(TOK_GTE,">=") Else CurrIdx :+ 1 Return tToken.Create(TOK_GT,">") EndIf EndIf If tmp = "~q" Then t=lexString() CurrIdx :+ 1 Return t EndIf RuntimeError("Unknown Token '" + tmp + "'") End Function '------------------------------------------------------------------------------------------------------------------------------ Function Lexnumber:tToken() Local t:tToken=New tToken Local tmp$ Local dotcount%=0 While "1234567890.".contains(codestring[CurrIdx..CurrIdx+1]) If codestring[CurrIdx..CurrIdx+1] = "." Then dotcount :+ 1 If dotcount > 1 Then RuntimeError("Malformed number! '" + tmp + "'") tmp :+ codestring[CurrIdx..CurrIdx+1] CurrIdx :+ 1 Wend t.typ = TOK_FLOAT t.value = tmp Return t End Function '------------------------------------------------------------------------------------------------------------------------------ Function LexWord:tToken() Local t:tToken=New tToken Local tmp$ While "abcdefghijklmnopqrstuvwxyz_".contains(codestring[CurrIdx..CurrIdx+1]) tmp :+ codestring[CurrIdx..CurrIdx+1] CurrIdx :+ 1 Wend t.typ = TOK_IDENT t.value = tmp Return t End Function '------------------------------------------------------------------------------------------------------------------------------ Function LexString:tToken() Local t:tToken=New tToken Local tmp$ Local endt$ currIdx :+ 1 Repeat tmp :+ codestring[CurrIdx..CurrIdx+1] currIdx :+ 1 Until "~r~n~q".Contains(codestring[CurrIdx..CurrIdx+1]) endt = codestring[CurrIdx..CurrIdx+1] If endt = "~q" Then t.typ = TOK_STRING t.value = tmp Return t EndIf If "~r~n".contains(endt) Then RuntimeError("Malformed string literal '" + tmp + "'") EndIf End Function '------------------------------------------------------------------------------------------------------------------------------ Parser: SuperStrict Framework brl.retro Include "lexer.bmx" Rem Simple Expression compiler v0.1 by Aaron Woodard, Jan 2014 [admin@...] TODO: Add 'else' clause to 'if' Check if the generated asm is even remotely correct.. better error msgs EndRem Global CodeString$ Global num_numbers#[] Global vars$[] Global cLabels% Global asmstring$ CodeString = "var x;~n" CodeString :+ "var myvar;~n" CodeString :+ "var anothervar;~n" CodeString :+ "x=0.5*2;~n" CodeString :+ "if(x>1) then~n" CodeString :+ " myvar=1+2-3*4/5;~n" CodeString :+ " if(myvar==1.1) then~n" CodeString :+ " anothervar=x*(-myvar+123.456)*-0.1;~n" CodeString :+ " endif~n" CodeString :+ "endif~n" Print "~n~n"+CodeString+"~n~n" Consume() Consume() Parse() Print prolog() Print asmstring Print epilog() Print datasec() '------------------------------------------------------------------------------------------------------------------------------ Function Parse() While CurrToke.typ <> TOK_EOF And (CurrToke.typ = TOK_Var Or (CurrToke.typ = TOK_IDENT And NextToke.Typ = TOK_EQUALS) Or CurrToke.Typ = TOK_IF) If CurrToke.typ = TOK_Var Then DeclareVar() ElseIf CurrToke.typ = TOK_IDENT And NextToke.Typ = TOK_EQUALS Then Assignment() ElseIf CurrToke.Typ = TOK_IF Then IfExpression() EndIf Wend End Function '------------------------------------------------------------------------------------------------------------------------------ Function DeclareVar() Local idx% Local v$ If CurrToke.typ = TOK_Var Then Consume() If CurrToke.typ = TOK_IDENT Then If checkvars(Currtoke.Value) Then Error("Already Defined") v=Currtoke.Value vars :+ [v] Consume() If CurrToke.Typ <> TOK_SEMICOL Then Error("missing ';'") Consume() EndIf EndIf End Function '------------------------------------------------------------------------------------------------------------------------------ Function Assignment() Local idx% Local v$ While CurrToke.typ = TOK_IDENT And NextToke.Typ = TOK_EQUALS v=Currtoke.Value Consume() If CurrToke.typ = TOK_EQUALS Then Consume() Expression() idx = getvarindex(v) If idx<0 Then error("invalid for var '"+v+"'") addasm(" fstp dword [ebp-" + (4+idx*4) + "] ;store in "+v ) If CurrToke.Typ <> TOK_SEMICOL Then Error("missing ';'") Consume() EndIf Wend End Function '------------------------------------------------------------------------------------------------------------------------------ Function Expression() AddExpression() End Function '------------------------------------------------------------------------------------------------------------------------------ Function IfExpression() Local labElseif$ Local labEndif$ Local cond$="" While CurrToke.Typ = TOK_IF If CurrToke.Typ = TOK_IF Then labEndif= MakeLabel("_endif") Consume() If CurrToke.Typ <> TOK_LPAREN Then Error("Expected '('" ) EndIf Consume() CondExpression(cond) If CurrToke.Typ <> TOK_RPAREN Then Error("Expected ')'" ) EndIf Consume() 'Then If CurrToke.Typ <> TOK_THEN Then Error("Expected 'then'" ) EndIf Consume() addasm(" fxch") addasm(" fucompp") addasm(" fnstsw ax") addasm(" sahf") Select cond Case ">" addasm(" setbe al") Case "<" addasm(" setae al") Case ">=" addasm(" setb al") Case "<=" addasm(" seta al") Case "==" addasm(" setnz al") Case "<>" addasm(" setz al") Default DebugLog("What!?") End Select addasm(" movzx eax,al") addasm(" cmp eax,0") addasm(" jne "+labEndif) Parse() If CurrToke.Typ <> TOK_ENDIF Then Error("Expected 'endif'" ) EndIf addasm(labEndif) Consume() EndIf Wend End Function '------------------------------------------------------------------------------------------------------------------------------ Function CondExpression(cond$ Var) Expression() If CurrToke.Typ <> TOK_LT And CurrToke.Typ <> TOK_GT And CurrToke.Typ <> TOK_LTE And CurrToke.Typ <> TOK_GTE And CurrToke.Typ <> TOK_NE And CurrToke.Typ <> TOK_DBLEQUALS Then Error("Expected '<,>,<=,>=,<>,=='" ) EndIf Select CurrToke.Value Case ">","<",">=","<=","==","<>" cond = CurrToke.Value Default Error("Unkown token") End Select Consume() Expression() End Function '------------------------------------------------------------------------------------------------------------------------------ Function AddExpression() If CurrToke.Typ<>TOK_MINUS And CurrToke.Typ<> TOK_LPAREN And CurrToke.Typ<> TOK_FLOAT And CurrToke.Typ<>TOK_IDENT Then Error("Expected '-' or '(' or number or ident'") MulExpression() While CurrToke.Typ = TOK_PLUS 'Then Consume() MulExpression() addasm(" faddp ") Wend While CurrToke.Typ = TOK_MINUS 'Then Consume() MulExpression() addasm(" fsubp ") Wend End Function '------------------------------------------------------------------------------------------------------------------------------ Function MulExpression() If CurrToke.Typ<>TOK_MINUS And CurrToke.Typ<> TOK_LPAREN And CurrToke.Typ<> TOK_FLOAT And CurrToke.Typ<>TOK_IDENT Then Error("Expected '-' or '(' or number or ident'") Primary() While CurrToke.Typ = TOK_MUL 'Then Consume() Primary() addasm(" fmulp ") Wend While CurrToke.Typ = TOK_DIV 'Then Consume() Primary() addasm(" fdivp ") Wend End Function '------------------------------------------------------------------------------------------------------------------------------ Function Primary() Local idx%,v$ If CurrToke.Typ<>TOK_MINUS And CurrToke.Typ<> TOK_LPAREN And CurrToke.Typ<> TOK_FLOAT And CurrToke.Typ<>TOK_IDENT Then Error("Expected '-' or '(' or number or ident'") While CurrToke.Typ = TOK_MINUS Consume() If CurrToke.Typ = TOK_FLOAT Then CurrToke.Value= String(-Float(CurrToke.Value)) Else Expression() addasm(" fchs") End If Wend While CurrToke.Typ = TOK_LPAREN Consume() Expression() If CurrToke.Typ <> TOK_RPAREN Then Error("!!") Consume() Wend While CurrToke.Typ = TOK_FLOAT addasm(" fld dword [_" + addorgetnum(CurrToke.Value)+"]") Consume() Wend While CurrToke.Typ = TOK_IDENT v=CurrToke.Value idx = getvarindex(v) If idx<0 Then error("unkown var '"+v+"'") addasm(" fld dword [ebp-"+(4+idx*4)+"] ;load '"+v+"'" ) Consume() Return Wend End Function '------------------------------------------------------------------------------------------------------------------------------ Function Error(s$) Print("ERR! " + s) DebugStop End End Function '------------------------------------------------------------------------------------------------------------------------------ Function dbg(s$) 'Print("dbg: " + s) End Function '------------------------------------------------------------------------------------------------------------------------------ Function FloatHex:String(num:Float) 'lendian Local p:Float Ptr = Varptr num Local bp:Byte Ptr = Int Ptr Int(p) Local out:String = "" out:+Right(Hex(bp[3]), 2) out :+ Right(Hex(bp[2]),2) out :+ Right(Hex(bp[1]),2) out :+ Right(Hex(bp[0]),2) Return "0x" + out End Function '------------------------------------------------------------------------------------------------------------------------------ Function addorgetnum%(v$) For Local i% = 0 Until num_numbers.Length If num_numbers[i] = Float(v) Then Return i EndIf Next num_numbers :+ [Float(v)] '= AppendFArray( num_numbers,Float(v) ) Return num_numbers.Length-1 'num_numbers[num_numbers.Length-1] End Function '------------------------------------------------------------------------------------------------------------------------------ Function checkvars%(v$) If Not vars.length Then Return False For Local i% = 0 Until vars.length If v=vars[i] Then Return True Next Return False End Function '------------------------------------------------------------------------------------------------------------------------------ Function getvarindex%(v$) For Local i% = 0 Until vars.length If v=vars[i] Then Return i EndIf Next Return -1 End Function '------------------------------------------------------------------------------------------------------------------------------ Function addasm(s$) asmstring :+ s + "~n" End Function '------------------------------------------------------------------------------------------------------------------------------ Function prolog$() Local tmp$ tmp = ";------Begin------;~n_func:~n" tmp :+ " push ebp~n~tmov ebp,esp~n" If vars.length Then tmp :+ " sub esp, " + (vars.Length*4) EndIf Return tmp End Function '------------------------------------------------------------------------------------------------------------------------------ Function epilog$() Local tmp$ tmp = " mov esp,ebp~n~tpop ebp~n~tret~n;------End------;" Return tmp End Function '------------------------------------------------------------------------------------------------------------------------------ Function datasec$() Local tmp$ tmp ="~nsection ~qdata~q~n" For Local i% = 0 Until num_numbers.length tmp :+ "_"+i+":~n" tmp :+"dd " + FloatHex(num_numbers[i]) + " ;-> "+num_numbers[i]+"~n" tmp :+"align 4~n" Next Return tmp End Function '------------------------------------------------------------------------------------------------------------------------------ Function MakeLabel$(pre$="_") cLabels :+ 1 Return Pre + String(cLabels) +":" End Function | |||||
' ' ' ' ' --------------------------------- -=[ Here is an example input: ]=- --------------------------------- var x; var myvar; var anothervar; x=0.5*2; if(x>1) then myvar=1+2-3*4/5; if(myvar==1.1) then anothervar=x*(-myvar+123.456)*-0.1; endif endif ------------------------------- -=[ and here is the output ]=- ------------------------------- ;------Begin------; _func: push ebp mov ebp,esp sub esp, 12 fld dword [_0] fld dword [_1] fmulp fstp dword [ebp-4] ;store in x fld dword [ebp-4] ;load 'x' fld dword [_2] fxch fucompp fnstsw ax sahf setbe al movzx eax,al cmp eax,0 jne _endif1: fld dword [_2] fld dword [_1] faddp fld dword [_3] fld dword [_4] fmulp fld dword [_5] fdivp fsubp fstp dword [ebp-8] ;store in myvar fld dword [ebp-8] ;load 'myvar' fld dword [_6] fxch fucompp fnstsw ax sahf setnz al movzx eax,al cmp eax,0 jne _endif2: fld dword [ebp-4] ;load 'x' fld dword [ebp-8] ;load 'myvar' fld dword [_7] faddp fchs fmulp fld dword [_8] fmulp fstp dword [ebp-12] ;store in anothervar _endif2: _endif1: mov esp,ebp pop ebp ret ;------End------; section "data" _0: dd 0x3F000000 ;-> 0.500000000 align 4 _1: dd 0x40000000 ;-> 2.00000000 align 4 _2: dd 0x3F800000 ;-> 1.00000000 align 4 _3: dd 0x40400000 ;-> 3.00000000 align 4 _4: dd 0x40800000 ;-> 4.00000000 align 4 _5: dd 0x40A00000 ;-> 5.00000000 align 4 _6: dd 0x3F8CCCCD ;-> 1.10000002 align 4 _7: dd 0x42F6E979 ;-> 123.456001 align 4 _8: dd 0xBDCCCCCD ;-> -0.100000001 align 4 |
Comments
None.
Code Archives Forum