Commit 4c3bcfa7 authored by DUFAIX Clement's avatar DUFAIX Clement
Browse files

it works!

parent 297c2780
......@@ -21,6 +21,8 @@ class Game:
self.joueur2 = joueur2
self.joueur1.setPlayerNumber(1)
self.joueur2.setPlayerNumber(2)
self.joueur1.setGame(self)
self.joueur2.setGame(self)
self.window = Window(self)
self.gameManager()
input() # fin du jeu
......@@ -38,11 +40,13 @@ class Game:
else:
self.joueur2.takeDecision(self)
self.actualPlayer = 1
input()
self.boardGame = [1, 1, 1, 1, 2, 2, 2, 2, 0]
self.winner = 0
self.actualPlayer = 1
self.window.drawTheScene()
#self.window.drawTheScene()
self.endGame()
def isEndGame(self, nextPlayer): # est ce la fin du jeu? est ce que le prochain joueur peut jouer?
......@@ -60,7 +64,7 @@ class Game:
print("Player " + str(self.winner) + " win!")
def movePawn(self, positionToMove):
positionToMove = positionToMove - 1 # pour la position dans le tableau
#positionToMove = positionToMove - 1 # pour la position dans le tableau
newPosition = self.emptyPlaceToMove(positionToMove)
if newPosition == -1: # si aucune place n'a été trouvé ou possible
return False
......@@ -92,6 +96,12 @@ class Game:
class Player():
position = 0
def __init__(self,lr=0.01, num_episode=10000, eps=0.5, gamma=0.95, eps_decay=0.001):
pass
def setGame(self, game):
self.game = game
@abstractmethod
def takeDecision(self, game):
pass
......@@ -122,6 +132,22 @@ class RealPlayer(Player):
class Agent(Player):
def __init__(self,lr=0.01, num_episode=10000, eps=0.5, gamma=0.95, eps_decay=0.001):
self.n_a = [0, 1, 2, 3, 4, 5, 6, 7, 8]
self.total_reward = 0
self.lr = lr
self.num_episode = num_episode
self.eps = eps
self.gamma = gamma
self.eps_decay = eps_decay
self.tour = 0
def setGame(self, game):
self.game = game
self.n_s = self.game.getBoardGame()
self.Q = np.zeros((len(self.n_s), len(self.n_a)))
def greedy(self, Q, s):
return np.argmax(Q[s])
......@@ -131,60 +157,51 @@ class Agent(Player):
else:
return self.greedy(Q, s)
def SARSA(self, game, lr=0.01, num_episode=10000, eps=0.5, gamma=0.95, eps_decay=0.001):
n_a = [1, 2, 3, 4, 5, 6, 7, 8, 9]
n_s = game.getBoardGame()
def SARSA(self, lr=0.01, num_episode=10000, eps=0.5, gamma=0.95, eps_decay=0.001):
Q = np.zeros((len(n_s), len(n_a)))
for ep in range(num_episode):
actual_board = game.getBoardGame()
played = False
total_reward = 0
actual_board = self.game.getBoardGame()
played = False
if eps > 0.01:
eps -= eps_decay
if self.eps > 0.01:
self.eps -= self.eps_decay
action = self.eps_greedy(Q, actual_board, eps)
action = self.eps_greedy(self.Q, actual_board, self.eps)
while not played:
played = game.movePawn(action)
new_board_game = game.getBoardGame()
new_action = self.eps_greedy(Q, new_board_game, eps)
while not played:
played = self.game.movePawn(action)
new_board_game = self.game.getBoardGame()
new_action = self.eps_greedy(self.Q, new_board_game, self.eps)
print(str(action)+ " "+str(played))
if not played:
reward = -10
total_reward += reward
Q[actual_board][action] = Q[actual_board][action] + lr * (
reward + gamma * Q[new_board_game][new_action] - Q[actual_board][action])
if not played:
reward = -10
self.total_reward += reward
self.Q[actual_board][action] = self.Q[actual_board][action] + self.lr * (
reward + gamma * self.Q[new_board_game][new_action] - self.Q[actual_board][action])
actual_board = new_board_game
action = new_action
actual_board = new_board_game
action = new_action
reward = 20
total_reward += reward
Q[actual_board][action] = Q[actual_board][action] + lr * (
reward + gamma * Q[new_board_game][new_action] - Q[actual_board][action])
reward = 20
self.total_reward += reward
self.Q[actual_board][action] = self.Q[actual_board][action] + self.lr * (
reward + gamma * self.Q[new_board_game][new_action] - self.Q[actual_board][action])
if (game.winner == 1 and self.position == 1) or (game.winner == 2 and self.position == 2):
reward = 200
total_reward += reward
Q[actual_board][action] = Q[actual_board][action] + lr * (
reward + gamma * Q[new_board_game][new_action] - Q[actual_board][action])
if (self.game.winner == 1 and self.position == 1) or (self.game.winner == 2 and self.position == 2):
reward = 200
self.total_reward += reward
elif (game.winner == 1 and self.position == 2) or (game.winner == 2 and self.position == 1):
reward = -100
total_reward += reward
Q[actual_board][action] = Q[actual_board][action] + lr * (
reward + gamma * Q[new_board_game][new_action] - Q[actual_board][action])
elif (self.game.winner == 1 and self.position == 2) or (self.game.winner == 2 and self.position == 1):
reward = -100
self.total_reward += reward
if (ep % 300) == 0:
print("Episode:{:5d} Eps:{:2.4f} Rew:{:2.4f}".format(ep, eps, total_reward))
print("Eps:{:2.4f} Rew:{:2.4f}".format(self.eps, self.total_reward))
def takeDecision(self, game):
print("\nAu tour du joueur " + str(game.actualPlayer) + ", l'IA joue")
self.SARSA(game)
self.SARSA()
class Window(QMainWindow):
......@@ -216,47 +233,47 @@ class Window(QMainWindow):
self.index = 0
self.painter = QPainter(self) # 1
self.painter.setPen(QPen(Qt.black, 4, Qt.SolidLine))
self.painter.drawText(250, 50, "1")
self.painter.drawText(250, 50, "0")
self.painter.setBrush(QBrush(self.chooseColor(), Qt.SolidPattern))
self.painter.drawEllipse(250, 50, 70, 70)
self.painter.setPen(QPen(Qt.black, 4, Qt.SolidLine))
self.painter.drawText(350, 100, "2")
self.painter.drawText(350, 100, "1")
self.painter.setBrush(QBrush(self.chooseColor(), Qt.SolidPattern))
self.painter.drawEllipse(350, 100, 70, 70)
self.painter.setPen(QPen(Qt.black, 4, Qt.SolidLine))
self.painter.drawText(400, 200, "3")
self.painter.drawText(400, 200, "2")
self.painter.setBrush(QBrush(self.chooseColor(), Qt.SolidPattern))
self.painter.drawEllipse(400, 200, 70, 70)
self.painter.setPen(QPen(Qt.black, 4, Qt.SolidLine))
self.painter.drawText(350, 300, "4")
self.painter.drawText(350, 300, "3")
self.painter.setBrush(QBrush(self.chooseColor(), Qt.SolidPattern))
self.painter.drawEllipse(350, 300, 70, 70)
self.painter.setPen(QPen(Qt.black, 4, Qt.SolidLine))
self.painter.drawText(250, 350, "5")
self.painter.drawText(250, 350, "4")
self.painter.setBrush(QBrush(self.chooseColor(), Qt.SolidPattern))
self.painter.drawEllipse(250, 350, 70, 70)
self.painter.setPen(QPen(Qt.black, 4, Qt.SolidLine))
self.painter.drawText(150, 300, "6")
self.painter.drawText(150, 300, "5")
self.painter.setBrush(QBrush(self.chooseColor(), Qt.SolidPattern))
self.painter.drawEllipse(150, 300, 70, 70)
self.painter.setPen(QPen(Qt.black, 4, Qt.SolidLine))
self.painter.drawText(100, 200, "7")
self.painter.drawText(100, 200, "6")
self.painter.setBrush(QBrush(self.chooseColor(), Qt.SolidPattern))
self.painter.drawEllipse(100, 200, 70, 70)
self.painter.setPen(QPen(Qt.black, 4, Qt.SolidLine))
self.painter.drawText(150, 100, "8")
self.painter.drawText(150, 100, "7")
self.painter.setBrush(QBrush(self.chooseColor(), Qt.SolidPattern))
self.painter.drawEllipse(150, 100, 70, 70)
self.painter.setPen(QPen(Qt.black, 4, Qt.SolidLine))
self.painter.drawText(250, 200, "9")
self.painter.drawText(250, 200, "8")
self.painter.setBrush(QBrush(self.chooseColor(), Qt.SolidPattern))
self.painter.drawEllipse(250, 200, 70, 70)
self.painter.end()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment