Need help with learning AI

Blitz3D Forums/Blitz3D Beginners Area/Need help with learning AI

Sonic65

(Posted 2008) [#1]

Okay, so for my science fair project, I'm programming a Tic-Tac-Toe AI opponent which starts out with no knowledge of the game, and learns as it goes along. I tried to implement a simple reinforcement learning system, but it doesn't give good enough results; it only wins 86% of its total games at most (at the beginning of the program, at around 100 games) against a random opponent, and then drops to 77-80% and stays there. I want the learning AI to win over 90% of the time, at least.

Here's the code I'm using (doesn't require any external files):

Graphics 640,480,0,2
SetBuffer BackBuffer()

AppTitle "Tic-Tac-Toe w/ Learning AI"


; Each state is defined in a type.

Type state
	Field stateID
	Field boardVals[8]
	Field cellWeights#[8]
	Field lossNum
End Type


; Variables and arrays

Dim currentBoard(8)				; current board
Global stateMap[8]				; state map for current game
Global moveMap[8]				; move map for current game

Global currentMoveVals#[8]		; move values for the last game (only used for debug display)

Global moveNum = 0				; move #
Global highState = 0			; the state ID of the last state created

Global winNum# = 0				; # of games won
Global totalGames# = 0			; total games played


; Main loop

While Not KeyHit(1)
	Cls
	
	If Not checkWin() > 0.5 Then
		; AI's turn			
		learningAI()
		updateStateMap()
		
		
		
		If Not checkWin() > 0.5 Then
			; Random opponent's turn
			randomMove(2)
			updateStateMap()
			
			checkWin()
		EndIf
	EndIf
	
	displayBoard()
	
	;Check if the game has reached an outcome; if so, give AI feedback and restart.
	
	If checkWin() > 0.5 Then 
		feedbackValues()
		If checkWin() = 1 Then winNum = winNum + 1
		If checkWin() = 2 Then lossNum = lossNum + 1
		totalGames = totalGames + 1
		
		;debugDisplay()			; Uncomment this line to enable the debug display at the end of each game.
		
		clearBoard()
		
		For x = 0 To 8
			stateMap[x] = 0
			moveMap[x] = 0
			currentMoveVals[x] = 0
		Next
		moveNum = 0
		
	EndIf
	
	; Display # of games won, total games, % of games won
	
	Text 40,20,"GAMES WON: " + winNum#
	Text 40,35,"TOTAL GAMES: " + totalGames
	If totalGames>0 Then Text 40,75,"% OF GAMES WON: " + (winNum# / totalGames#) * 100
		
	Flip 0
Wend



Function learningAI()
	stateMatched = False
		
	; The following for loop checks each state to see if it matches the current board. If it does,
	; it checks to see which cell has the most weight.
	For a.state = Each state	
		For x = 0 To 8
			If currentBoard(x) = a\boardVals[x] Then matchNum = matchNum+1
		Next
			
		If matchNum = 8 Then
			maxWeight = 0
			maxCell = 0
			
			; Find highest weight, and the corresponding cell for that weight.
			For x = 0 To 8
				weight = a\cellWeights[x]			
				If weight > maxWeight Then
					weight = maxWeight
					maxCell = x
				EndIf
				
			Next
			
			cellNum = maxCell		
			Exit
		EndIf	
	Next

	; Marks the highest-rated cell if the state has been matched in the state table. If it hasn't,
	; either move in the first unmarked spot or move randomly.
	If stateMatched = True Then	
		currentBoard(cellNum) = 1
		moveMap[moveNum] = cellNum
	Else	
		; Sometimes exploratory steps are taken, to increase the number of possible states the AI sees.
		If Rnd(0,1)>0.05 Then moveMap[moveNum] = firstMove(1) Else moveMap[moveNum] = randomMove(1)
	EndIf	
End Function
		

; Makes a random move. Takes the player # as a parameter (this function is used by both opponents).

Function randomMove(oppNum)
	count = 0
	
	Repeat
		count = count + 1
		ff = Rand(0,8)
	Until currentBoard(ff) = 0 Or count > 100
		
	If count < 100 Then currentBoard(ff) = oppNum
	
	Return ff
End Function


; Puts a piece in the first available spot.

Function firstMove(oppNum)
	For ff = 0 To 8
		If currentBoard(ff) = 0 Then 
			Exit
		EndIf
	Next
	
	If currentBoard(ff) = 0 Then 
		currentBoard(ff) = oppNum
	Else
		ff = 0
	EndIf
	
	Return ff
End Function


; Checks to see if the game has reached an outcome.

Function checkWin()
	For oppNum = 1 To 2
	
		; Horizontal/vertical check
		For x = 0 To 2			
			If currentBoard(x) = oppNum And currentBoard(x+3) = oppNum And currentBoard(x+6) = oppNum Then
				Return oppNum
			EndIf
			
			If currentBoard(x*3) = oppNum And currentBoard((x*3)+1) = oppNum And currentBoard((x*3)+2) = oppNum Then
				Return oppNum
			EndIf
		Next
		
		; Diagonal checks
		If currentBoard(0) = oppNum And currentBoard(4) = oppNum And currentBoard(8) = oppNum Then
			Return oppNum
		EndIf
		
		If currentBoard(2) = oppNum And currentBoard(4) = oppNum And currentBoard(6) = oppNum Then
			Return oppNum
		EndIf
	Next
	
	
	blankNum% = 0
	
	; Check to see how many blank spaces there are.
	For x = 0 To 8
		If currentBoard(x) = 0 Then blankNum = blankNum + 1
	Next
	
	; If all spaces are taken up, then game is a draw, so return 3; if not, go on and return 0.5.
	If blankNum = 0 Then Return 3
	
	Return 0.5
	
End Function


; Displays the board.

Function displayBoard()
	x2%=0
	y2%=0
	
	For x = 0 To 8
		number% = currentBoard(x)
			
		Select number
			Case 1
				lStr$ = "X"
			Case 2
				lStr$ = "O"
			Default
				lStr$ = " "		
		End Select
				
		Text 270+(x2*40),190+(y2*40),lStr$
		If x2<2 Then Text 290+(x2*40),190+(y2*40),"|"
		If y2<2 Then Text 270,210+(y2*40),"-----------"
			
		
		x2=x2+1
		
		If x2=3 Then
			y2=y2+1
			x2=0
		EndIf
	Next
End Function


; Clears the current board.

Function clearBoard()
	For x = 0 To 8
		currentBoard(x) = 0
	Next
End Function


; Updates the state map, identifying which states the current game has gone through, and also
; adding new states when they are encountered.

Function updateStateMap()
	moveNum = moveNum + 1
	
	stateMatched = False
	stateNum = 0
	
	; Search each state for a match to the current board. If there is a match, then update
	; the state map and return.
	For a.state = Each state
		For x = 0 To 8
			If currentBoard(x) = a\boardVals[x] Then matchNum = matchNum+1
			
			If matchNum = 9 Then
				stateNum = a\stateID
				stateMap[moveNum] = stateNum
				Return
			EndIf
		Next
		
	Next
	
	; If there is not a match, then initialize a new state, update the state map with this state,
	; and return.
	a.state = New state
	highState = highState + 1
			
	For x = 0 To 8
		a\boardVals[x] = currentBoard(x)
		a\cellWeights[x] = 0
		a\stateID = highState
	Next
	
	stateMap[moveNum] = highState
	
End Function


; Feedback function for end of game; adjusts the cell weights for all the states used in the
; previous game using the outcome of the game.

Function feedbackValues()
	For a.state = Each state
		For x = 0 To 8
			If a\stateID = stateMap[x] And a\stateID > 0 Then
				If checkWin() = 1 Then
					; Win feedback			[+3]		
					a\cellWeights[moveMap[x]] = a\cellWeights[moveMap[x]]+3
				Else	
					; Lose/draw feedback	[-5]
					a\cellWeights[moveMap[x]] = a\cellWeights[moveMap[x]]-5
				EndIf
				
				currentMoveVals[x] = a\cellWeights[moveMap[x]]
			EndIf
		Next
	Next	
End Function


; Debug

Function debugDisplay()
	For x = 0 To 8
		Text 300,10*x,stateMap[x]
		Text 400,10*x,moveMap[x]
		Text 500,10*x,currentMoveVals[x]
		Text 400,100,highState
	Next
	
	Flip
	Delay 500
End Function

If anyone could please help me figure out what I'm doing wrong here, it would be greatly appreciated.

H. T. U.

(Posted 2008) [#2]

There's a program in the code archives that does exactly what you want (if you want to look).

H. T. U.

(Posted 2008) [#3]

There's a program in the code archives that does exactly what you want (if you want to look).

EDIT: Oops, lol!

Nate the Great

(Posted 2008) [#4]

Hey i just made something like that! haha wow thats a coincidence... plz give me credit if you use it but you dont have to... here is a link

:)

http://www.blitzbasic.com/codearcs/codearcs.php?code=2370#comments

edit: when I run your code in debug mode I get an error array index out of bounds

Sonic65

(Posted 2008) [#5]

That program in the code archives is pretty neat; however, it seems like it wouldn't do very well against an opponent that doesn't play well, i.e. a random opponent (also, it uses a LOT of external files). For this project, I'm going to have to test the AI against three different opponents, each playing a different way.

I tried to implement a system to also store the opponent's moves, and give feedback to those, but the results are still the same as before. Here's the new code:

Graphics 640,480,0,2
SetBuffer BackBuffer()

AppTitle "Tic-Tac-Toe w/ Learning AI"


; Each state is defined in a type.

Type state
	Field stateID
	Field boardVals[8]
	Field cellWeights#[8]
	Field fromOpponent = False
End Type

; Variables and arrays

Dim currentBoard(8)				; current board
Global stateMap[8]				; state map for current game
Global moveMap[8]				; move map for current game

Global oppStateMap[8]			; opponent's state map
Global oppMoveMap[8]			; opponent's move map

Global opponentBoard[8]			; array used for reversed board
Global playerBoard[8]			; array used for reversed board

Global currentMoveVals#[8]		; move values for the last game (only used for debug display)
Global currentOppMoveVals#[8]	; move values for the last game (only used for debug display)

Global moveNum = 0				; move #
Global highState = 0			; the state ID of the last state created

Global winNum# = 0				; # of games won
Global totalGames# = 0			; total games played

Global fileOut=WriteFile("test2.txt")


; Main loop

While Not KeyHit(1)
	Cls
	
	If Not checkWin() > 0.5 Then
		updateStateMap(True)
		updateStateMap_Opponent()
		
		; AI's turn			
		learningAI()
		
		updateStateMap(False)
		updateStateMap_Opponent()
		
		
		
		If Not checkWin() > 0.5 Then
			; Random opponent's turn
			oppMoveMap[moveNum] = randomMove(2)
			updateStateMap(False)
			updateStateMap_Opponent()
			
			checkWin()
		EndIf
	EndIf
	
	displayBoard()
	
	;Check if the game has reached an outcome; if so, give AI feedback and restart.
	
	If checkWin() > 0.5 Then 
		feedbackValues()
		If checkWin() = 1 Then winNum = winNum + 1
		totalGames = totalGames + 1
		
		;debugDisplay()			; Uncomment this line to enable the debug display at the end of each game.
		
		clearBoard()
		
		For x = 0 To 8
			stateMap[x] = 0
			moveMap[x] = 0
			currentMoveVals[x] = 0
			
			oppStateMap[x] = 0
			oppMoveMap[x] = 0
			currentOppMoveVals[x] = 0
		Next
		moveNum = 0
		
	EndIf
	
	; Display # of games won, total games, % of games won
	
	Text 40,20,"GAMES WON: " + winNum#
	Text 40,35,"TOTAL GAMES: " + totalGames
	If totalGames>0 Then Text 40,75,"% OF GAMES WON: " + (winNum# / totalGames#) * 100
		
	Flip 0
Wend


For a.state = Each state
	If a\fromOpponent = True Then
	
		maxWeight = 0
		maxCell = 0
		
		For x = 0 To 8
			WriteLine fileOut,a\boardVals[x]
			
			If a\cellWeights[x] > maxWeight Then
				maxWeight = a\cellWeights[x]
				maxCell = x
			EndIf
		Next
		
		WriteLine fileOut,""
		WriteLine fileOut, maxCell
		WriteLine fileOut,""
	EndIf
Next

CloseFile fileOut
			



Function learningAI()
	stateMatched = False
	matchNum = 0
	
	; The following for loop checks each state to see if it matches the current board. If it does,
	; it checks to see which cell has the most weight.
	For a.state = Each state
		matchNum = 0
		
		For x = 0 To 8
			If a\fromOpponent = False Then
				If currentBoard(x) = a\boardVals[x] Or a\boardVals[x] = 0 Then matchNum = matchNum+1
			Else
				If a\boardVals[x] = 0 Or (currentBoard(x)=2 And a\boardVals[x]=1) Then matchNum = matchNum + 1
			EndIf
		Next
			
		If matchNum = 9 Then
			maxWeight = -1
			maxCell = -1
			stateMatched = False
			
			; Find highest weight, and the corresponding cell for that weight.
			For x = 0 To 8
				weight = a\cellWeights[x]
						
				If weight > maxWeight Then			
					maxWeight = weight
					maxCell = x
					stateMatched = True
				EndIf
			Next
			
			If maxCell > 0 Then Exit
		EndIf
	Next

	; Marks the highest-rated cell if the state has been matched in the state table. If it hasn't,
	; either move in the first unmarked spot or move randomly.
	If stateMatched = True And currentBoard(maxCell) = 0 Then
		moveMap[moveNum] = maxCell
		currentBoard(maxCell) = 1
	Else	
		; Sometimes exploratory steps are taken, to increase the number of possible states the AI sees.
		If Rnd(0,1) > 0.1 Then moveMap[moveNum] = firstMove(1) Else moveMap[moveNum] = randomMove(1)
	EndIf	
End Function
		

; Makes a random move. Takes the player # as a parameter (this function is used by both opponents).

Function randomMove(oppNum)
	count = 0
	
	Repeat
		count = count + 1
		ff = Rand(0,8)
	Until currentBoard(ff) = 0 Or count > 100
		
	If count < 100 Then currentBoard(ff) = oppNum
	
	Return ff
End Function


; Puts a piece in the first available spot.

Function firstMove(oppNum)
	For ff = 0 To 8
		If currentBoard(ff) = 0 Then 
			Exit
		EndIf
	Next
	
	If currentBoard(ff) = 0 Then 
		currentBoard(ff) = oppNum
	Else
		ff = 0
	EndIf
	
	Return ff
End Function


; Checks to see if the game has reached an outcome.

Function checkWin()
	For oppNum = 1 To 2
	
		; Horizontal/vertical check
		For x = 0 To 2			
			If currentBoard(x) = oppNum And currentBoard(x+3) = oppNum And currentBoard(x+6) = oppNum Then
				Return oppNum
			EndIf
			
			If currentBoard(x*3) = oppNum And currentBoard((x*3)+1) = oppNum And currentBoard((x*3)+2) = oppNum Then
				Return oppNum
			EndIf
		Next
		
		; Diagonal checks
		If currentBoard(0) = oppNum And currentBoard(4) = oppNum And currentBoard(8) = oppNum Then
			Return oppNum
		EndIf
		
		If currentBoard(2) = oppNum And currentBoard(4) = oppNum And currentBoard(6) = oppNum Then
			Return oppNum
		EndIf
	Next
	
	
	blankNum% = 0
	
	; Check to see how many blank spaces there are.
	For x = 0 To 8
		If currentBoard(x) = 0 Then blankNum = blankNum + 1
	Next
	
	; If all spaces are taken up, then game is a draw, so return 3; if not, go on and return 0.5.
	If blankNum = 0 Then Return 3
	
	Return 0.5
	
End Function


; Displays the board.

Function displayBoard()
	x2%=0
	y2%=0
	
	For x = 0 To 8
		number% = currentBoard(x)
			
		Select number
			Case 1
				lStr$ = "X"
			Case 2
				lStr$ = "O"
			Default
				lStr$ = " "		
		End Select
				
		Text 270+(x2*40),190+(y2*40),lStr$
		If x2<2 Then Text 290+(x2*40),190+(y2*40),"|"
		If y2<2 Then Text 270,210+(y2*40),"-----------"
			
		
		x2=x2+1
		
		If x2=3 Then
			y2=y2+1
			x2=0
		EndIf
	Next
End Function


; Clears the current board.

Function clearBoard()
	For x = 0 To 8
		currentBoard(x) = 0
	Next
End Function


; Updates the state map, identifying which states the current game has gone through, and also
; adding new states when they are encountered.

Function updateStateMap(incrMoveNum)
	If incrMoveNum <> False Then moveNum = moveNum + 1
	
	stateMatched = False
	stateNum = 0
	matchNum = 0

	For x = 0 To 8
		If currentBoard(x) = 1 Then
			playerBoard[x] = 1
		Else
			playerBoard[x] = 0
		EndIf
	Next
		
	; Search each state for a match to the current board. If there is a match, then update
	; the state map and return.
	For a.state = Each state
		matchNum = 0
		
		For x = 0 To 8
			If playerBoard[x] = a\boardVals[x]  Or playerBoard[x] = 0 Then matchNum = matchNum + 1

			
			If matchNum = 9 Then
				stateNum = a\stateID
				stateMap[moveNum] = stateNum
				Return
			EndIf
		Next
	Next
	
	; If there is not a match, then initialize a new state, update the state map with this state,
	; and return.
	a.state = New state
	highState = highState + 1
			
	For x = 0 To 8
		a\boardVals[x] = playerBoard[x]
		a\cellWeights[x] = 0
		a\stateID = highState
		a\fromOpponent = False
	Next
	
	stateMap[moveNum] = highState
	
End Function



; Updates the opponent state map, identifying which states the current game has gone through, and also
; adding new states when they are encountered.

Function updateStateMap_Opponent()
	stateMatched = False
	stateNum = 0
	
	For x = 0 To 8
		If currentBoard(x) = 2 Then
			opponentBoard[x] = 1
		Else
			opponentBoard[x] = 0
		EndIf
	Next
	
	; Search each state for a match to the current board. If there is a match, then update
	; the state map and return.
	For a.state = Each state
		matchNum = 0
		
		For x = 0 To 8
			If a\fromOpponent = True
				If opponentBoard[x] = a\boardVals[x]  Or opponentBoard[x] = 0 Then matchNum = matchNum + 1
				
				If matchNum = 9 Then
					stateNum = a\stateID
					oppStateMap[moveNum] = stateNum
					Return
				EndIf
			EndIf
		Next
		
	Next
	
	; If there is not a match, then initialize a new state, update the state map with this state,
	; and return.
	a.state = New state
	highState = highState + 1
			
	For x = 0 To 8
		a\boardVals[x] = opponentBoard[x]
		a\cellWeights[x] = 0
		a\stateID = highState
		a\fromOpponent = True
	Next
	
	oppStateMap[moveNum] = highState
	
End Function


; Feedback function for end of game; adjusts the cell weights for all the states used in the
; previous game using the outcome of the game.

Function feedbackValues()
	For a.state = Each state
		For x = 0 To 8
			If a\fromOpponent = False Then
				If a\stateID = stateMap[x] And a\stateID > 0 Then
					If checkWin() = 1 Then
						; Win feedback			[+3]		
						a\cellWeights[moveMap[x]] = a\cellWeights[moveMap[x]]+3
					Else	
						; Lose/draw feedback	[-5]
						a\cellWeights[moveMap[x]] = a\cellWeights[moveMap[x]]-5
					EndIf
					
					currentMoveVals[x] = a\cellWeights[moveMap[x]]
				EndIf
			Else
				If a\stateID = oppStateMap[x] And a\stateID > 0 Then
					If checkWin() = 2 Then	
						a\cellWeights[oppMoveMap[x]] = a\cellWeights[oppMoveMap[x]]+(3*x)	
					EndIf
					
					currentOppMoveVals[x] = a\cellWeights[oppMoveMap[x]]
				EndIf
			EndIf
		Next
	Next	
End Function


; Debug

Function debugDisplay()
	For x = 0 To 8
		Text 350,10*x,stateMap[x]
		Text 400,10*x,moveMap[x]
		Text 450,10*x,currentMoveVals[x]
		Text 500,10*x,oppStateMap[x]
		Text 550,10*x,oppMoveMap[x]
		Text 600,10*x,currentOppMoveVals[x]
		Text 400,100,highState
	Next
	
	Flip
	Delay 500
End Function

I know I'm doing something wrong, but I have no idea what. (BTW, this should work in debug mode now.)

GIB3D

(Posted 2008) [#6]

How can you play Tic-Tac-Toe more than two different ways, Trying To Win and Trying To Draw(No one wins)?

Sonic65

(Posted 2008) [#7]

The three opponents are as follows:

Random - moves randomly

Expert - tries to win (uses pattern tables)

Control - moves psuedo-randomly; given the same board state, it will
always move in the same position (which is selected randomly when
said board state is first encountered).

Right now, it can't even get 90% against the random opponent. :(

GIB3D

(Posted 2008) [#8]

v Another non-helpfull post v

Oh, pattern tables, I would've never thought of using something like that ;)

Good thing you're not trying to make a Human Vs. Computer Sudoku game.

^ Another non-helpfull post ^

Nate the Great

(Posted 2008) [#9]

I will look over your code when I have time but for now can you explain exactly how it works?

BTW the reason I have my program make all of those external files is so i am able to save it at its state by putting them all in a folder and I can reset it by deleting them or I could even modify them to make it smarter :)

mtnhome3d

(Posted 2008) [#10]

A strange game, the only winning move is not to play. you should name the app "joshua". ok enough jokes! in the code, which opponent does it simulate. is there a switch to change the opponent?

Sonic65

(Posted 2008) [#11]

mtnhome3d:
It uses the random opponent right now. It needs to be able to beat this first =P

Nate the Great:

Each board is stored in two states; one state for the player's positions (with the board stored as 0 for blank/opponent and 1 for the player), and one state for the opponent's positions (0 for blank/player, 1 for the opponent). This is better than having them in one state because it greatly decreases the number of truly unique states (according to my math, there are only 1024 possible states, instead of the 362,880 states needed if every board was stored as a whole). To store states, the program uses the type 'state'.

The AI cycles through each state (player or opponent), to see if it matches the current board. (Note that the AI doesn't take advantage of rotations.) If it does, then it finds the highest-ranking cell in that state, and marks it. It also updates the move and state arrays, which keep track of the moves made and states encountered in the current game. (These are cleared after every game).

If it cannot match a state to the current board, then it either moves randomly or moves into the first available position on the board. (Which of these it does is determined by a random number; there is a 10% chance it will move randomly, and a 90% chance it will move into the first available position). It also updates the move array for the player (this keeps track of the moves made in the current game, so that at the end of the game these moves can be evaluated and their value changed).

After each move, the player and opponent states are updated. If the current board represents a new state, then a new state will be created. Either way, the updating functions update the player and enemy state arrays (which keep track of the states encountered in the current game, in order).

At the end of the game, cell values are updated for all the states encountered during the game. For player states, the update function adds 3 to the value of the cell moved in if the AI won, and subtracts 5 if it lost. For opponent states, it ignores losses, and only updates if the opponent wins; it updates the value of each cell moved in during the last game so that the closer to the end of the game, the higher the reward for that cell. (This way, the opponent's winning move counts more than its first, and the AI theoretically should know that it should move in that space...but it doesn't.)

Sorry if I didn't explain that very well. If there is any specific part that you're unclear about and don't understand from the code, just ask me. =P

Nate the Great

(Posted 2008) [#12]

hmm... great explanation. It sounds like it does something almost exactly the same as my program except mine takes advantages of fliped and rotated boards. The whole idea of giving each cell a value sounds like one of those things that works easily in theory but is much harder to apply. I think you should start out without it giving each cell a value and instead giving each cell a 1 or a 0 for if it moved there. this may simplify things but you dont have to do it. In my tic tac toe program it checks each board and if it is the same and the computer side won it will most likely move there. it then checks to see if it also matches a losing board and if it does it starts over again or moves randomly... start over (not completely just go back to the comp moving randomly) then try a simpler concept and build uppon it like I did and you may come up with some better ideas :) it is very hard to just jump into a great idea but it is easier to look at the great idea and break it down. Then when you start you wont be instantly satisfied however as you improve it over time it will work out :) hope this helped

Sonic65

(Posted 2008) [#13]

Success! I decided to research the topic a bit more, and found a Perl program which did exactly what I wanted to do here. After looking over that program and replicating it Blitz3D, as well as making it about 100x faster (in terms of how fast it learns, not program speed), it reaches the goal of 90% in only ~2000 games! The process it uses is very similar to the process I described above, except it stores the board in a string, and uses a different feedback function.

Here's the code (again, doesn't require any external files):

Graphics 640,480,0,2
SetBuffer BackBuffer()
SeedRnd MilliSecs()


; Each state is in a type, with two fields; the board it represents and its value
Type state
	Field boardID$
	Field stateV#
End Type

; These variables are used when a function needs to return a string or decimal.
Global rBString$
Global rBVal#

Global currentBoard[8]			; Stores the current board.


Global winNum#					; Number of games won
Global numOfEpisodes = 50000	; Total number of training games to play
Global currentEpisode# = 0		; Current training game
Global winRate#					; Win rate (%)

Global terminalState = False
Global humanMode = False		; set this to true to enable player control. if not set, the
								; program goes into training mode (against a random opponent).



loadSavedStates()

While currentEpisode < numOfEpisodes
	
	clearBoard()
	terminalState = False
	
	Cls
	
	While terminalState = False
	
		; AI's turn
		convertToString(currentBoard)
		curStr$ = rBString$
		
		selectAction()
		
		convertToString(currentBoard)
		newStr$ = rBString$
		
		; Update the state values after the player's turn.
		updateV(curStr$,newStr$)
		
		; Check if anybody has won, or if the game is a draw.
		isWinnerL = checkWin(newStr)
		If isWinnerL <> 0 Then
			terminalState = True
			Exit
		EndIf
		
		
		; Opponent turn	
		convertToString(currentBoard)
		curStr$ = rBString$		
		
		If humanMode = False
			randomMove(2)
		Else
			humanPlayer()
			FlushKeys()
		EndIf
		
		convertToString(currentBoard)
		newStr$ = rBString$
		
		; Update the state values after the opponent's turn.
		updateV(curStr$,newStr$)	
		
		; Check if anybody has won, or if the game is a draw.
		isWinnerL = checkWin(newStr)
		If isWinnerL <> 0 Then
			terminalState = True
			Exit
		EndIf	
		
	Wend
	
	Cls
	
	currentEpisode = currentEpisode + 1	
	
	If isWinnerL = 1 Then winNum = winNum + 1
	
	displayBoard()
	winRate# = (winNum#/currentEpisode#) * 100

	Text 20,20,currentEpisode
	Text 80,20,winNum
	Text 140,20,winRate
	Flip 0	
	
Wend

Cls

Print winNum
Flip 0
Delay 500

; Save the AI states and corresponding values to a file.
saveFile()



; Displays the board.

Function displayBoard()
	x2%=0
	y2%=0
	
	For y = 0 To 2
		For x = 0 To 2
			pos = (y*3) + x
			
			number% = currentBoard[pos]
			
			Select number
				Case 1
					lStr$ = "X"
				Case 2
					lStr$ = "O"
				Default
					lStr$ = " "		
			End Select
				
			Text 270+(x*40),190+(y*40),lStr$
			If x<2 Then Text 290+(x*40),190+(y*40),"|"
			If y<2 Then Text 270,210+(y*40),"-----------"
		Next
	Next
End Function


; Clears the board.

Function clearBoard()
	For x = 0 To 8
		currentBoard[x] = 0
	Next
End Function


; Converts a board array to a string.
Function convertToString(board[8])
	returnStr$ = ""
	
	returnStr$ = returnStr$ + board[0]
	returnStr$ = returnStr$ + board[1]
	returnStr$ = returnStr$ + board[2]
	returnStr$ = returnStr$ + board[3]
	returnStr$ = returnStr$ + board[4]
	returnStr$ = returnStr$ + board[5]
	returnStr$ = returnStr$ + board[6]
	returnStr$ = returnStr$ + board[7]
	returnStr$ = returnStr$ + board[8]
	
	rbString$ = returnStr$
End Function


; Function which selects the move for the learning AI.

Function selectAction()
	Local evalBoard[8]
	Local maxV# = -2
	Local maxCell = 0
	
	; Copies the current board array to the local evaluation board array.
	
	For x = 0 To 8
		evalBoard[x] = currentBoard[x]
	Next
	
	For x = 0 To 8
		If evalBoard[x] = 0 Then
			
			; Evaluate all blank spaces, and find the highest-ranked one.
			
			evalBoard[x] = 1
			
			convertToString(evalBoard)
			selS$ = rBString$
			
			evalBoard[x] = 0
			
			If stateExists(selS$) Then
				; If a state exists that matches the board, then get its value.
				
				getStateValue(selS$)
				val# = rbVal#
			Else
				; If there isn't any state that matches with the board, then create a new one
				; with a value of 0.5.
				
				createNewState(selS$,0.5)
				val# = 0.5
			EndIf
			
			If val > maxV
				maxV = val
				maxCell = x
			EndIf
			
		EndIf
	Next
	
	; Mark the spot selected.
	currentBoard[maxCell] = 1
End Function


; Check the board string attached, to see if there is a winner.

Function checkWin(bStr$)
	Local tempB[8]
	
	For x = 0 To 8
		tempB[x] = Int(Mid$(bStr,x+1,1))
	Next	
	
	For winner = 1 To 2
	
		; Horizontal/vertical checks
		For x = 0 To 2
			If tempB[x] = winner And tempB[x+3] = winner And tempB[x+6] = winner Then Return winVal(winner)
			If tempB[x*3] = winner And tempB[(x*3)+1] = winner And tempB[(x*3)+2] = winner Then Return winVal(winner)
		Next
		
		; Diagonal checks
		If tempB[0] = winner And tempB[4] = winner And tempB[8] = winner Then Return winVal(winner)
		If tempB[2] = winner And tempB[4] = winner And tempB[6] = winner Then Return winVal(winner)
	Next
	
	; Check if there are any empty spaces left; if not, return a draw.
	emptyCount = 0
	
	For x = 0 To 8
		If tempB[x] = 0 Then emptyCount = emptyCount + 1
	Next
	
	If emptyCount = 0 Then Return 3 Else Return 0
		
End Function


; Win values; 1 for the player, -1 for the opponent
Function winVal(winner)
	If winner = 1 Then Return 1
	If winner = 2 Then Return -1
End Function



;----------------------------------------------------
; STATE FUNCTIONS
;----------------------------------------------------

; Check to see if a state exists for the board string given.

Function stateExists(selS$)
	For a.state = Each state
		If a\boardID$ = selS$ Then Return True
	Next
	
	Return False
End Function


; Get the value for the state given.

Function getStateValue(selS$)
	For a.state = Each state
		If a\boardID$ = selS$ Then 
			rbVal# = a\stateV#
			Return True
		EndIf
	Next
	
	rbVal# = 0
End Function


; Update the value of the state given, with the value given.

Function updateStateValue(selS$,value#)
	For a.state = Each state
		If a\boardID$ = selS$ Then 
			a\stateV# = value#
			Return True
		EndIf
	Next
	
	Return False
End Function


; Create a new state with the board given, containing the value given.
Function createNewState(selS$,value#)
	a.state = New state
	a\boardID$ = selS$
	a\stateV# = value#
	Return True
End Function


; State value updating function; the heart of the AI system.

Function updateV(curStr$,newStr$)
	Local alpha# 
	
	Local cVal# = 0
	Local nVal# = 0
	
	alpha# = 0.08 * (0.95 - (0.9 * (currentEpisode/numOfEpisodes)))	; Alpha value; increases as the
																	; number of games increases.
																	; Influences learning rate.
	
	If stateExists(curStr$) Then
		getStateValue(curStr$)		; If a state exists for the old board, get its value.
	Else
		lookupV(curStr$)			; If not, look up the value to see if it is a winning board.
	EndIf
	
	cVal = rbVal
		
	
	If stateExists(newStr$) Then
		getStateValue(newStr$)		; If a state exists for the new board, get its value.
	Else
		lookupV(newStr$)			; If not, look up the value to see if it is a winning board.
	EndIf
	
	nVal = rbVal
	
	newVal# = cVal + (alpha * (nVal - cVal))	; Updates the state value, based on the values of the
	updateStateValue(curStr, newVal)			; previous and new states.
End Function


; Value lookup function; checks if the state given is a winning state, a losing state, a draw state,
; or none, and return a corresponding value.

Function lookupV(state$)
	isWinnerV = checkWin(state$)
	
	If isWinnerV = 3 Then
		createNewState(state$,0)
		rbVal = 0
		Return
	ElseIf isWinnerV = 1 Then
		createNewState(state$,1)
		rbVal = 1
		Return
	ElseIf isWinnerV = -1
		createNewState(state$,-1)
		rbVal = -1
		Return -1
	EndIf
	
	createNewState(state$,0.5)
	rbVal = 0.5
	Return
End Function


; Random move functions, used for training.

Function randomMove(oppNum)
	count = 0
	
	Repeat
		count = count + 1
		ff = Rand(0,8)
	Until currentBoard[ff] = 0 Or count > 100
		
	If count < 100 Then currentBoard[ff] = oppNum
	
	Return ff
End Function


; Saves all the states and their values to a file.

Function saveFile()
	fileOut=WriteFile("trainData.txt")
	
	For a.state = Each state
		WriteLine fileOut, a\boardID
		WriteLine fileOut, Str$(a\stateV#)
		WriteLine fileOut, " "
	Next
End Function


; Loads saved states and their values from a file.

Function loadSavedStates()
	If FileType("trainData.txt")
		fileIn = ReadFile("trainData.txt")
		
		While Not Eof(fileIn)
			a.state = New state
			a\boardID$ = ReadLine(fileIn)
			a\stateV# = ReadLine(fileIn)
			ReadLine(fileIn)
		Wend
	EndIf	
End Function


; Player control function.

Function humanPlayer()
	Local hMoved = False
	
	While hMoved = False
		displayBoard()
		Flip
		WaitKey()
		
		If KeyHit(2) Then
			If currentBoard[0] = 0 Then currentBoard[0] = 2: Return
		ElseIf KeyHit(3)
			If currentBoard[1] = 0 Then currentBoard[1] = 2: Return
		ElseIf KeyHit(4)
			If currentBoard[2] = 0 Then currentBoard[2] = 2: Return
		ElseIf KeyHit(16)
			If currentBoard[3] = 0 Then currentBoard[3] = 2: Return
		ElseIf KeyHit(17)
			If currentBoard[4] = 0 Then currentBoard[4] = 2: Return
		ElseIf KeyHit(18)
			If currentBoard[5] = 0 Then currentBoard[5] = 2: Return
		ElseIf KeyHit(30)
			If currentBoard[6] = 0 Then currentBoard[6] = 2: Return
		ElseIf KeyHit(31)
			If currentBoard[7] = 0 Then currentBoard[7] = 2: Return
		ElseIf KeyHit(32)
			If currentBoard[8] = 0 Then currentBoard[8] = 2: Return
		EndIf
		
	Wend
End Function

If you want to play against it without having to train it first against the random opponent, then download the file below and put it in the same folder as the code (without changing the name or the file), and set the human flag to True.

http://www.fileden.com/files/2006/8/1/150488/trainData.txt

Nate the Great

(Posted 2008) [#14]

hmmm... very nice but when I set the human factor to true it only takes it 3 turns to start beating me and it always moves in the corner