Code archives/Algorithms/Regular expressions
This code has been declared by its author to be Public Domain code.
Download source code
| |||||
I saw this a few days ago, and it looked very appealing. My master's project involved a lot of FSAs, so I thought this would be well within my grasp. Turns out the FSAs aren't the hard part, it's interpreting a pattern correctly to *make* an FSA that's hard. Anyway, after a good day or so of despairing at the computer, I think I've got it working. This should accept all the patterns that the reAnimator machine does. The code is not pretty, even by my standards, this is what I hacked together while trying to sort out how it all fits together. I'm sure in the future I'll actually need a regular expression matcher for something and I'll write a nice one. Anyway, enjoy! | |||||
Type charset Field pattern$ Field ranges:TList Method New() ranges=New TList End Method Function Create:charset(pattern$) If Not pattern Return emptyset cs:charset=New charset cs.pattern=pattern While Len(pattern) If Len(pattern)>2 And Chr(pattern[1])="-" low=pattern[0] high=pattern[2] cs.addrange(low,high) pattern=pattern[3..] Else c=pattern[0] cs.addrange(c,c) pattern=pattern[1..] EndIf Wend Return cs End Function Method addrange(low,high) Local range[]=[low,high] ranges.addlast range pattern:+Chr(low)+"-"+Chr(high) End Method Method match(cr$) c=Asc(cr) Local range[] For range=EachIn ranges If c>=range[0] And c<=range[1] Return True EndIf Next Return False End Method Method repr$() Local range[] txt$="" For range=EachIn ranges If range[0]=range[1] txt:+Chr(range[0]) Else txt:+Chr(range[0])+"-"+Chr(range[1]) EndIf Next Return txt End Method End Type Global emptyset:charset=New charset Global digits:charset=charset.Create("0-9") Global alphanum:charset=charset.Create("0-9A-Za-z") Global nonalphanum:charset=New charset Global nondigits:charset=New charset Global fullset:charset=New charset fullset.addrange 0,255 nondigits.addrange 0,47 nondigits.addrange 58,255 nonalphanum.addrange 0,47 nonalphanum.addrange 58,64 nonalphanum.addrange 91,96 nonalphanum.addrange 123,255 Function listsame(l1:TList,l2:TList) If l1.count()<>l2.count() Return False For o:Object=EachIn l1 If Not l2.contains(o) Return False Next Return True End Function Type fsa Field accepting Field transitions:TList Field name$ Method New() transitions=New TList End Method Method addtransition(symbol$,dest:fsa) transitions.addlast transition.Create(symbol,dest) End Method Method matches:TList(symbol$,l:TList=Null) If Not l l=New TList For t:transition=EachIn transitions If t.cs.match(symbol) l.addlast t.dest EndIf Next Return l End Method Method evaluate(pattern$,spaces$="") Print spaces+name+"?"+pattern If Not pattern Print "end at "+name Return accepting EndIf symbol$=Chr(pattern[0]) npattern$=pattern[1..] For f:fsa=EachIn matches(symbol) If f.evaluate(npattern,spaces+" ") Return True Next For t:transition=EachIn transitions If t.cs=emptyset Print spaces+"empty move to "+t.dest.name If t.dest.evaluate(pattern,spaces) Return True EndIf Next Print spaces+"fail at "+name Return False End Method Method emptymoves:TList(l:TList=Null,addself=0) If Not l l=New TList If addself l.addlast(Self) For t:transition=EachIn transitions If t.cs=emptyset If Not l.contains(t.dest) l.addlast t.dest t.dest.emptymoves(l) EndIf EndIf Next Return l End Method Method repr$() txt$=name+"~n" If accepting txt:+"accepting~n" For t:transition=EachIn transitions txt:+" "+t.repr()+"~n" Next Return txt End Method Function collapse:tmap(f:fsa,checked:tmap=Null) If Not checked checked=New tmap If checked.contains(f) Return checked ntransitions:tmap=New tmap destinations:TList=New TList For t:transition=EachIn f.transitions If Not ntransitions.contains(t.cs.pattern) ntransitions.insert t.cs.pattern,New TList EndIf tl:TList=TList(ntransitions.valueforkey(t.cs.pattern)) If Not tl.contains(t.dest) tl.addlast t.dest EndIf If Not destinations.contains(t.dest) destinations.addlast t.dest EndIf Next 'Rem of:fsa=New fsa 'Print of.name For key$=EachIn ntransitions.keys() If key tl:TList=TList(ntransitions.valueforkey(key)) tname$="" For fp:fsa=EachIn tl For f2:fsa=EachIn fp.emptymoves() If Not tl.contains(f2) tl.addlast f2 EndIf Next Next For f2:fsa=EachIn tl If tname tname:+"," tname:+f2.name Next tname="{"+tname+"}" 'Print " "+key+" -> "+tname EndIf Next 'EndRem checked.insert f,ntransitions For f2:fsa=EachIn destinations fsa.collapse f2,checked Next Return checked End Function Function powerset:fsa(startnode:fsa) Global allnodes:tmap,newnodes:TList maps:tmap=collapse(startnode) allnodes:tmap=New tmap Function findnode:fsa(l:TList,adding=1) For l2:TList=EachIn allnodes.keys() If listsame(l,l2) Return fsa(allnodes.valueforkey(l2)) Next nf:fsa=New fsa nf.name=ziplist(l) 'Print "new node "+nf.name allnodes.insert l,nf If adding newnodes.addlast l EndIf Return nf End Function Function ziplist$(l:TList) l=l.copy() l.sort txt$="" For f:fsa=EachIn l If txt txt:+"," txt:+f.name Next Return "{"+txt+"}" End Function outstart:fsa=Null newnodes:TList=New TList newnodes.addlast startnode.emptymoves(Null,1) While newnodes.count() 'get the new state we're making. It's a list of the old nodes l:TList=TList(newnodes.removefirst()) nf:fsa=findnode(l,False) If Not outstart outstart=nf nf.name=ziplist(l) 'Print "constructing "+nf.name ntransitions:tmap=New tmap 'for each old node in the list, work out the transitions For f:fsa=EachIn l If f.accepting nf.accepting=1 'get transition map for this nfsa node fnt:tmap=tmap(maps.valueforkey(f)) 'for each symbol, add all the destinations For key$=EachIn fnt.keys() If key If Not ntransitions.contains(key) ntransitions.insert key,New TList EndIf otl:TList=TList(fnt.valueforkey(key)) ntl:TList=TList(ntransitions.valueforkey(key)) For f2:fsa=EachIn otl If Not ntl.contains(f2) ntl.addlast f2 Next EndIf Next Next 'we now have a full set of transitions, make the node For key$=EachIn ntransitions.keys() df:fsa=findnode(TList(ntransitions.valueforkey(key))) nf.addtransition key,df Next 'Print nf.repr() Wend Print "ALL NODES" For f:fsa=EachIn allnodes.values() Print f.repr() Next Return outstart End Function End Type Type transition Field cs:charset Field dest:fsa Function Create:transition(pattern$,dest:fsa) t:transition=New transition t.cs=charset.Create(pattern) t.dest=dest Return t End Function Method repr$() Return cs.repr()+" -> "+dest.name End Method End Type Global numnodes=0 Function compile:fsa[](pattern$,starts:fsa[],spaces$="") Print spaces+"compile: "+pattern Local bits$[] Local ends:fsa[],ostarts:fsa[],nends:fsa[] bits=splitpipes(pattern) 'note that starts is not really a set of starting nodes for the whole machine, 'but in fact the previous set of finals. If Len(bits)=1 While Len(pattern) ostarts=starts symbol$=Chr(pattern[0]) 'Print spaces+">"+symbol nends=Null Select symbol Case "(" 'start brackets Print spaces+"brackets" inparens=1 i=1 While inparens Select Chr(pattern[i]) Case "(" inparens:+1 Case ")" inparens:-1 End Select i:+1 Wend bit$=pattern[1..i-1] ends=compile(bit,starts,spaces+" ") pattern=pattern[i..] Case "[" Print spaces+"squares" i=1 While Chr(pattern[i])<>"]" i:+1 Wend bit$=pattern[0..i+1] 'Print spaces+bit pattern=pattern[i+1..] nf:fsa=New fsa For f:fsa=EachIn starts f.addtransition bit,nf Next ends=[nf] Case "\" symbol=Chr(pattern[1]) Print "special character "+symbol Local cs:charset Select symbol Case "d" 'digit cs=digits Case "D" cs=nondigits Case "w" cs=alphanum Case "W" cs=nonalphanum Default cs=charset.Create(symbol) End Select Print cs.repr() mf:fsa=New fsa mf.name="m" nf:fsa=New fsa numnodes:+1 nf.name=String(numnodes) For f:fsa=EachIn starts tr:transition=New transition tr.cs=cs tr.dest=mf f.transitions.addlast tr Next mf.addtransition "",nf ends=[mf] nends=[nf] pattern=pattern[2..] Case "." Print "any character" mf:fsa=New fsa nf:fsa=New fsa mf.name="m" numnodes:+1 nf.name=String(numnodes) For f:fsa=EachIn starts tr:transition=New transition tr.cs=fullset tr.dest=mf f.transitions.addlast tr Next mf.addtransition "",nf ends=[mf] nends=[nf] pattern=pattern[1..] Default 'normal character Print spaces+"character: "+symbol mf:fsa=New fsa mf.name="m" nf:fsa=New fsa numnodes:+1 nf.name=String(numnodes) For f:fsa=EachIn starts f.addtransition symbol,mf Next mf.addtransition "",nf ends=[mf] nends=[nf] pattern=pattern[1..] End Select If Len(pattern) op$=Chr(pattern[0]) Else op$="" EndIf Select op Case "*" 'kleene star closure - none or more times fin:fsa=New fsa For f:fsa=EachIn ends f.addtransition "",fin Next For f:fsa=EachIn starts f.addtransition "",fin fin.addtransition "",f If nends For f2:fsa=EachIn nends f.addtransition "",f2 Next EndIf Next fin2:fsa=New fsa fin.addtransition "",fin2 ends=[fin2] pattern=pattern[1..] Case "?" 'zero or one times fin:fsa=New fsa For f:fsa=EachIn starts f.addtransition "",fin If nends For f2:fsa=EachIn nends f.addtransition "",f2 Next EndIf Next For f:fsa=EachIn ends f.addtransition "",fin Next ends=[fin] pattern=pattern[1..] Case "+" 'one or more times fin:fsa=New fsa For f:fsa=EachIn ends f.addtransition "",fin Next For f:fsa=EachIn starts fin.addtransition "",f Next ends=[fin] pattern=pattern[1..] End Select If nends ends=nends EndIf starts=ends Wend Else Print spaces+"pipes" For bit$=EachIn bits ends:+compile(bit,starts,spaces+" ") Next EndIf Return ends End Function Function splitpipes$[](pattern$) 'Print pattern inparens=0 i=0 Local bits$[0] starti=0 While i<Len(pattern) Select Chr(pattern[i]) Case "\" i:+1 Case "(" inparens:+1 Case ")" inparens:-1 Case "|" If Not inparens bits:+[pattern[starti..i]] starti=i+1 EndIf End Select i:+1 Wend If starti<Len(pattern) bits=bits+[pattern[starti..]] EndIf Return bits End Function start:fsa=New fsa start.name="s" 're$="(([1-9]+[0-9]*)|0)(.[0-9]+)?" 're$="[a-z]*(,? [a-z]*)*" re$=".*" 're$=Input("re> ") For f:fsa=EachIn compile(re,[start]) f.accepting=1 Next start=fsa.powerset(start) in$="" While in<>"quit" in$=Input(">") If start.evaluate(in) Print "Yes" Else Print "No" EndIf Wend |
Comments
| ||
Interesting! |
Code Archives Forum