Commit 07fd8e2e authored by DUFAIX Clement's avatar DUFAIX Clement
Browse files

fix Q

parent 4c3bcfa7
......@@ -2,6 +2,7 @@ import sys
from abc import abstractmethod
import time
import numpy as np
from more_itertools import distinct_permutations
from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
......@@ -16,7 +17,7 @@ class Game:
winner = 0
def __init__(self, joueur1, joueur2):
def __init__(self, joueur1, joueur2, nbPartie=100):
self.joueur1 = joueur1
self.joueur2 = joueur2
self.joueur1.setPlayerNumber(1)
......@@ -24,6 +25,7 @@ class Game:
self.joueur1.setGame(self)
self.joueur2.setGame(self)
self.window = Window(self)
self.nbPartie = nbPartie
self.gameManager()
input() # fin du jeu
......@@ -31,37 +33,37 @@ class Game:
return self.boardGame
def gameManager(self):
while (not self.isEndGame(self.actualPlayer)):
self.window.drawTheScene()
if self.actualPlayer == 1:
self.joueur1.takeDecision(self)
self.actualPlayer = 2
else:
self.joueur2.takeDecision(self)
self.actualPlayer = 1
input()
self.boardGame = [1, 1, 1, 1, 2, 2, 2, 2, 0]
self.winner = 0
self.actualPlayer = 1
#self.window.drawTheScene()
self.endGame()
for _ in range(self.nbPartie):
print("\nNouvelle Partie!")
while (not self.isEndGame(self.actualPlayer)):
self.window.drawTheScene()
if self.actualPlayer == 1:
self.joueur1.takeDecision(self)
self.actualPlayer = 2
else:
self.joueur2.takeDecision(self)
self.actualPlayer = 1
input()
self.boardGame = [1, 1, 1, 1, 2, 2, 2, 2, 0]
self.winner = 0
self.actualPlayer = 1
print("Player " + str(self.winner) + " win!")
#self.window.drawTheScene()
def isEndGame(self, nextPlayer): # est ce la fin du jeu? est ce que le prochain joueur peut jouer?
for i in range(9): # pour chaque emplacement de pions
if (self.boardGame[i] == nextPlayer): # si c'est un pion au prochain joueur, peut il se déplacer?
if (self.emptyPlaceToMove(i) != -1):
return False
return True # si aucun déplacement est possible, fin du jeu
def endGame(self):
if self.actualPlayer == 1:
self.winner = 2
else:
self.winner = 1
print("Player " + str(self.winner) + " win!")
#self.endGame()
return True # si aucun déplacement est possible, fin du jeu
def movePawn(self, positionToMove):
#positionToMove = positionToMove - 1 # pour la position dans le tableau
......@@ -146,21 +148,36 @@ class Agent(Player):
def setGame(self, game):
self.game = game
self.n_s = self.game.getBoardGame()
self.Q = np.zeros((len(self.n_s), len(self.n_a)))
self.Q = {}
for p in distinct_permutations('111122220'):
for action in self.n_a :
self.Q[''.join(p), action] = 0
#print(self.Q)
def greedy(self, Q, s):
return np.argmax(Q[s])
values = np.array([Q[s,a] for a in self.n_a])
return np.argmax(values)
def eps_greedy(self, Q, s, eps):
if random.random() < eps:
return np.random.randint(Q.shape[1])
temp = random.randint(0,8)
#print("je joue random avec : "+str(temp))
return temp
else:
return self.greedy(Q, s)
temp = self.greedy(Q, s)
#print("je greed avec : "+str(temp))
return temp
def SARSA(self, lr=0.01, num_episode=10000, eps=0.5, gamma=0.95, eps_decay=0.001):
actual_board = self.game.getBoardGame()
tmp = self.game.getBoardGame()
actual_board = ''
for i in tmp:
actual_board = actual_board + str(i)
#state_actual_board = str(actual_board)
played = False
if self.eps > 0.01:
......@@ -170,23 +187,21 @@ class Agent(Player):
while not played:
played = self.game.movePawn(action)
new_board_game = self.game.getBoardGame()
tmp = self.game.getBoardGame()
new_board_game = ''
for i in tmp:
new_board_game = new_board_game + str(i)
state_new_board_game = str(new_board_game)
new_action = self.eps_greedy(self.Q, new_board_game, self.eps)
print(str(action)+ " "+str(played))
if not played:
reward = -10
self.total_reward += reward
self.Q[actual_board][action] = self.Q[actual_board][action] + self.lr * (
reward + gamma * self.Q[new_board_game][new_action] - self.Q[actual_board][action])
self.Q[actual_board,action] = self.Q[actual_board,action] + self.lr * (
reward + gamma * self.Q[new_board_game,new_action] - self.Q[actual_board,action])
actual_board = new_board_game
action = new_action
reward = 20
self.total_reward += reward
self.Q[actual_board][action] = self.Q[actual_board][action] + self.lr * (
reward + gamma * self.Q[new_board_game][new_action] - self.Q[actual_board][action])
if (self.game.winner == 1 and self.position == 1) or (self.game.winner == 2 and self.position == 2):
reward = 200
......@@ -196,8 +211,20 @@ class Agent(Player):
reward = -100
self.total_reward += reward
self.total_reward += reward
self.Q[actual_board,action] = self.Q[actual_board,action] + self.lr * (
reward + gamma * self.Q[new_board_game,new_action] - self.Q[actual_board,action])
print("Eps:{:2.4f} Rew:{:2.4f}".format(self.eps, self.total_reward))
def isWinner(isWinner):
if(isWinner):
reward = 200
self.total_reward += reward
else:
self.total_reward += reward
def takeDecision(self, game):
print("\nAu tour du joueur " + str(game.actualPlayer) + ", l'IA joue")
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment