diff --git a/res/save/learn.ser b/res/save/learn.ser index 1e0c605..8a51ba3 100644 Binary files a/res/save/learn.ser and b/res/save/learn.ser differ diff --git a/src/IA/QLearning.java b/src/IA/QLearning.java index 290f283..8f0f224 100644 --- a/src/IA/QLearning.java +++ b/src/IA/QLearning.java @@ -95,6 +95,7 @@ public class QLearning { bestMouvement = mouvement; } } + return bestMouvement; } } diff --git a/src/Main.java b/src/Main.java index 0dbdfd2..d863528 100644 --- a/src/Main.java +++ b/src/Main.java @@ -4,43 +4,50 @@ import IA.QTable; import environnement.*; import game.Terminal; import personnage.*; +import tests.IATest; public class Main { public static void main(String[] args) { - Personnage.n = 4; + // Personnage.n = 4; - Map map = new Map(12, 22); + // Map map = new Map(12, 22); - // lancer en local - if (args.length < 2) { - Grid[][] grid = map.getGrid(); + // // lancer en local + // if (args.length < 2) { + // Grid[][] grid = map.getGrid(); - // QTable qTable = new QTable(); - // qTable.getValues("res" + File.separator + "save" + File.separator + "learn.ser"); + // QTable qTable1 = new QTable(); + // qTable1.getValues("path_to_save_qtable1.ser"); - // Avant de jouer contre l'ia, vous pouvez essayer de l'entrainer avec la fonction tests.IATest.learnIAvsIA() - // il jouera avec lui meme et mettra les sauvegardes dans le dossier learn.ser, + // QTable qTable2 = new QTable(); + // qTable2.getValues("path_to_save_qtable2.ser"); - // Attention lors de l'apprentissage, ne pas couper le processus sinon vous allez perdre toute vos donnees - Personnage[] personnages = new Personnage[] { - new Player(new int[] {2, 2}, "Philippe Etchebest"), - new Player(new int[] {grid[0].length - 3, grid.length - 3}, "Luke Skywalker"), - // new Robot("Robot", new int[] {grid[0].length - 3, grid.length - 3}), - // new IAQLearning(new int[] {grid[0].length - 3, grid.length - 3), - }; + // // Avant de jouer contre l'ia, vous pouvez essayer de l'entrainer avec la fonction tests.IATest.learnIAvsIA() + // // il jouera avec lui meme et mettra les sauvegardes dans le dossier learn.ser, - // map.addObjectsRandomize(new Item[] {Item.FRAISE, Item.WALL}, 2); - // map.addObjects(Item.FRAISE, 2, 2); + // // Attention lors de l'apprentissage, ne pas couper le processus sinon vous allez perdre toute vos donnees + // Personnage[] personnages = new Personnage[] { + // new IAQLearning(new int[] {2, 2}, qTable1), + // // new Player(new int[] {2, 2}, "Philippe Etchebest"), + // // new Player(new int[] {grid[0].length - 3, grid.length - 3}, "Luke Skywalker"), + // // new Robot("Robot", new int[] {grid[0].length - 3, grid.length - 3}), + // new IAQLearning(new int[] {grid[0].length - 3, grid.length - 3}, qTable2), + // }; - new Terminal(map, personnages).run(); - } - // lancer en ligne - else { - Personnage[] personnages = new Personnage[] { - new Player(new int[] {0, 0}, "Philippe Etchebest"), - }; + // // map.addObjectsRandomize(new Item[] {Item.FRAISE, Item.WALL}, 2); + // // map.addObjects(Item.FRAISE, 2, 2); - new Terminal(map, personnages).run(args[0], args[1]); - } + // new Terminal(map, personnages).run(); + // } + // // lancer en ligne + // else { + // Personnage[] personnages = new Personnage[] { + // new Player(new int[] {0, 0}, "Philippe Etchebest"), + // }; + + // new Terminal(map, personnages).run(args[0], args[1]); + // } + + IATest.learnIAvsIA(); } } diff --git a/src/tests/IATest.java b/src/tests/IATest.java index bcbbba7..dc3860c 100644 --- a/src/tests/IATest.java +++ b/src/tests/IATest.java @@ -1,9 +1,11 @@ package tests; import java.io.File; +import java.util.Arrays; import IA.QTable; import IA.State; +import display.Display; import environnement.Grid; import environnement.Map; import personnage.IAQLearning; @@ -22,14 +24,14 @@ public class IATest { double decay_rate = 0.995; double minEpsilon = 0.01; - int totalEpisodes = 1000; + int totalEpisodes = 200; - Personnage.n = 2; + Personnage.n = 4; for(int episode = 0; episode < totalEpisodes; episode++) { QTable qTable = new QTable(); - IAQLearning iaqLearning = new IAQLearning(new int[] {0, 0}, qTable, alpha, gamma, epsilon); - Map map = new Map(20, 20); + IAQLearning iaqLearning = new IAQLearning(new int[] {2, 2}, qTable, alpha, gamma, epsilon); + Map map = new Map(12, 22); qTable.getValues(path); @@ -72,37 +74,29 @@ public class IATest { public static void learnIAvsIA() { double alpha = 0.1; double gamma = 0.9; + double epsilon = 0.1; - double[] epsilon = new double[] {1.0,}; - - double decay_rate = 0.995; - double minEpsilon = 0.01; - - int totalEpisodes = 1000; + int maxEpisode = 1000; Personnage.n = 4; - for (int episode = 0; episode < totalEpisodes; episode++) { + for (int episode = 0; episode < maxEpisode; episode++) { QTable qTable = new QTable(); - - IAQLearning[] iaqLearnings = new IAQLearning[] { - new IAQLearning(new int[] {2, 2}, qTable, alpha, gamma, epsilon[0]), - new IAQLearning(new int[] {9, 19}, qTable, alpha, gamma, epsilon[1]) - }; - - Map map = new Map(12, 22); - - boolean isGameOver = false; - qTable.getValues(path); - while(true) { - for (int i = 0; i < iaqLearnings.length; i++) { - IAQLearning iaqLearning = iaqLearnings[i]; + Map map = new Map(12, 22); - Grid[][] gridMap = map.getGrid(); - Map mapIA = new Map(gridMap[0].length, gridMap.length); - mapIA.replaceGrid(gridMap); + IAQLearning[] iaqLearnings = new IAQLearning[] { + new IAQLearning(new int[] {2, 2}, qTable, alpha, gamma, epsilon), + new IAQLearning(new int[] {9, 19}, qTable, alpha, gamma, epsilon), + }; + + boolean isGameOver = false; + + while(true) { + for (int personnages = 0; personnages < iaqLearnings.length; personnages++) { + IAQLearning iaqLearning = iaqLearnings[personnages]; + Map mapIA = new Map(map.getGrid()[0].length, map.getGrid().length); for (IAQLearning value : iaqLearnings) { map.placePersonnages(value); @@ -110,15 +104,25 @@ public class IATest { State currentState = iaqLearning.getCurrentState(map.getGrid()); Mouvement mouvement = iaqLearning.bestMouvement(currentState); - + iaqLearning.moveSnake(mouvement); int[] coordinate = iaqLearning.getHeadCoordinate(); - for (int[] snakeCoordinate : iaqLearnings[(i + 1) % 2].getCoordinate()) { - if (coordinate[0] == snakeCoordinate[0] && coordinate[1] == snakeCoordinate[1]) { - iaqLearning.receiveReward(currentState, mouvement, -10.0, currentState); - iaqLearnings[(i + 1) % 2].receiveReward(currentState, mouvement, 10.0, currentState); + if (map.isGameOver(coordinate) || iaqLearning.applyEffects(map.getEffect(coordinate))) { + iaqLearning.receiveReward(currentState, mouvement, -1000, currentState); + isGameOver = true; + break; + } + + int value = (personnages + 1) % 2; + + for (int[] snakeCoordinate : iaqLearnings[value].getCoordinate()) { + if (Arrays.equals(coordinate, snakeCoordinate)) { + iaqLearnings[value].receiveReward(currentState, mouvement, 1000, currentState); + iaqLearning.receiveReward(currentState, mouvement, -500, currentState); + + isGameOver = true; break; } } @@ -126,24 +130,19 @@ public class IATest { mapIA.placePersonnages(iaqLearning); State nextState = iaqLearning.getCurrentState(mapIA.getGrid()); - iaqLearning.receiveReward(currentState, mouvement, -0.1, nextState); + iaqLearning.receiveReward(currentState, mouvement, -0.01, nextState); iaqLearning.increaseRound(); mapIA.clearMap(); map.clearMap(); } - + if(isGameOver) break; + qTable.save(path); + + System.out.println("Episode: " + episode + " States: " + qTable.getqValues().size()); } - - qTable.save(path); - - for (int i = 0; i < epsilon.length; i++) { - epsilon[i] = Math.max(minEpsilon, epsilon[i] * decay_rate); - } - - System.out.println("Episode: " + episode + " | Robot 1 States: " + qTable.getqValues().size()); } } }