Automatically assigned DDC number: 00631
Manually assigned DDC number: 00631
Number of references: 4
Title: Learning and Exploitation do not Conflict under Minimax Optimality
Subject: Learning and Exploitation do not Conflict under Minimax Optimality
Description: . We show that adaptive real time dynamic programming extended with the action selection strategy which chooses the best action according to the latest estimate of the cost function yields asymptotically optimal policies within finite time under the minimax optimality criterion. From this it follows that learning and exploitation do not conflict under this special optimality criterion. We relate this result to learning optimal strategies in repeated two-player zero-sum deterministic games. Keywords. reinforcement learning, self-optimizing systems, dynamic games 1 Introduction Reinforcement learning (RL) concerns practical problems related to learning of optimal behaviour in sequential decision tasks. The most popular theoretical framework adopted by RL researchers is that of Markovian Decision Problems (MDPs). One of the main questions in RL is what extent of exploration is needed for a learner so that the price of exploration does not become too demanding. Usually some exploration (e...
Contributor: The Pennsylvania State University CiteSeer Archives
Publisher: unknown
Date: 1998-10-12
Format: ps
Identifier: http://citeseer.ist.psu.edu/147329.html
Source: http://sneaker.mindmaker.kfkipark.hu/~szepes/papers/ecml97.ps.gz
Language: en
Relation:
Relation:
Relation:
Relation:
Rights: unrestricted
<?xml version="1.0" encoding="UTF-8"?>
<references_metadata>
<rec ID="/291270.html" Type="mastersthesis" CiteSeer_Book="" CiteSeer_Volume="" Title="Modular Neural Networks for Learning Context-Dependent Game Strategies,">
<identifier Org="ISBN:0262100657" Paper_ID="/291270.html" Extracted="0262100657" />
<identifier Org="ISBN:0262201046" Paper_ID="/291270.html" Extracted="0262201046" />
<identifier Org="ISBN:0262621118" Paper_ID="/291270.html" Extracted="0262621118" DDC="570/.1/13" Normalized_DDC="570113" Normalized_Weight="0.14285714285714285" />
<identifier Org="ISBN:0780319028" Paper_ID="/291270.html" Extracted="0780319028" DDC="006.3" Normalized_DDC="0063" Normalized_Weight="0.14285714285714285" />
<identifier Org="ISBN:0792397169" Paper_ID="/291270.html" Extracted="0792397169" DDC="006.3/1" Normalized_DDC="00631" Normalized_Weight="0.14285714285714285" />
<identifier Org="ISBN:0805815902" Paper_ID="/291270.html" Extracted="0805815902" />
<identifier Org="ISBN:1558603352" Paper_ID="/291270.html" Extracted="1558603352" DDC="006.3/1" Normalized_DDC="00631" Normalized_Weight="0.14285714285714285" />
<identifier Org="ISBN:1590330218" Paper_ID="/291270.html" Extracted="1590330218" DDC="006.3/1" Normalized_DDC="00631" Normalized_Weight="0.14285714285714285" />
<identifier Org="ISBN:3540497196" Paper_ID="/291270.html" Extracted="3540497196" DDC="629.8932" Normalized_DDC="6298932" Normalized_Weight="0.14285714285714285" />
<identifier Org="ISBN:3540628584" Paper_ID="/291270.html" Extracted="3540628584" DDC="006.3/1" Normalized_DDC="00631" Normalized_Weight="0.14285714285714285" />
</rec>
<rec ID="/68056.html" Type="inproceedings" CiteSeer_Book="Proceedings of the 13th International Conference on Machine Learning ICML96" CiteSeer_Volume="" Title="A Generalized Reinforcement-Learning Model: {C}onvergence and Applications," />
<rec ID="/40386.html" Type="techreport" CiteSeer_Book="" CiteSeer_Volume="" Title="Generalized Markov Decision Processes: Dynamic-programming and Reinforcement-learning Algorithms,">
<identifier Org="ISBN:1558604197" Paper_ID="/40386.html" Extracted="1558604197" />
<identifier Org="ISBN:1586038915" Paper_ID="/40386.html" Extracted="1586038915" DDC="006.3" Normalized_DDC="0063" Normalized_Weight="0.25" />
<identifier Org="ISBN:3540628584" Paper_ID="/40386.html" Extracted="3540628584" DDC="006.3/1" Normalized_DDC="00631" Normalized_Weight="0.25" />
<identifier Org="ISBN:3540688463" Paper_ID="/40386.html" Extracted="3540688463" />
<identifier Org="ISBN:3540770003" Paper_ID="/40386.html" Extracted="3540770003" />
<identifier Org="ISBN:3540878041" Paper_ID="/40386.html" Extracted="3540878041" DDC="006.3" Normalized_DDC="0063" Normalized_Weight="0.25" />
<identifier Org="ISBN:427490525X" Paper_ID="/40386.html" Extracted="427490525X" DDC="006.3" Normalized_DDC="0063" Normalized_Weight="0.25" />
</rec>
<rec ID="SELF" Type="SELF" CiteSeer_Book="SELF" CiteSeer_Volume="SELF" Title="Learning and Exploitation do not Conflict under Minimax Optimality">
<identifier Org="ISBN:3540628584" Paper_ID="SELF" Extracted="3540628584" DDC="006.3/1" Normalized_DDC="00631" Normalized_Weight="1.0" />
</rec>
<rec ID="/75955.html" Type="incollection" CiteSeer_Book="Advances in Neural Information Processing Systems 7" CiteSeer_Volume="" Title="Learning To Play the Game of Chess,">
<identifier Org="ISBN:0262201046" Paper_ID="/75955.html" Extracted="0262201046" />
<identifier Org="ISBN:0387741607" Paper_ID="/75955.html" Extracted="0387741607" DDC="006.3" Normalized_DDC="0063" Normalized_Weight="0.07142857142857142" />
<identifier Org="ISBN:0415111900" Paper_ID="/75955.html" Extracted="0415111900" DDC="192" Normalized_DDC="192" Normalized_Weight="0.07142857142857142" />
<identifier Org="ISBN:0780366573" Paper_ID="/75955.html" Extracted="0780366573" DDC="006.3" Normalized_DDC="0063" Normalized_Weight="0.07142857142857142" />
<identifier Org="ISBN:0791801764" Paper_ID="/75955.html" Extracted="0791801764" />
<identifier Org="ISBN:0806528001" Paper_ID="/75955.html" Extracted="0806528001" DDC="355.3434" Normalized_DDC="3553434" Normalized_Weight="0.07142857142857142" />
<identifier Org="ISBN:1552126501" Paper_ID="/75955.html" Extracted="1552126501" DDC="794./1" Normalized_DDC="7941" Normalized_Weight="0.07142857142857142" />
<identifier Org="ISBN:1590330218" Paper_ID="/75955.html" Extracted="1590330218" DDC="006.3/1" Normalized_DDC="00631" Normalized_Weight="0.07142857142857142" />
<identifier Org="ISBN:1841693367" Paper_ID="/75955.html" Extracted="1841693367" DDC="794" Normalized_DDC="794" Normalized_Weight="0.07142857142857142" />
<identifier Org="ISBN:3540201211" Paper_ID="/75955.html" Extracted="3540201211" DDC="006.3/1" Normalized_DDC="00631" Normalized_Weight="0.07142857142857142" />
<identifier Org="ISBN:3540430806" Paper_ID="/75955.html" Extracted="3540430806" DDC="794.81" Normalized_DDC="79481" Normalized_Weight="0.07142857142857142" />
<identifier Org="ISBN:3540590528" Paper_ID="/75955.html" Extracted="3540590528" DDC="006.3" Normalized_DDC="0063" Normalized_Weight="0.07142857142857142" />
<identifier Org="ISBN:3540679251" Paper_ID="/75955.html" Extracted="3540679251" DDC="006.3" Normalized_DDC="0063" Normalized_Weight="0.07142857142857142" />
<identifier Org="ISBN:3540719830" Paper_ID="/75955.html" Extracted="3540719830" DDC="006.3" Normalized_DDC="0063" Normalized_Weight="0.07142857142857142" />
<identifier Org="ISBN:3540745645" Paper_ID="/75955.html" Extracted="3540745645" DDC="006.3" Normalized_DDC="0063" Normalized_Weight="0.07142857142857142" />
<identifier Org="ISBN:3790813486" Paper_ID="/75955.html" Extracted="3790813486" DDC="794.8/151" Normalized_DDC="7948151" Normalized_Weight="0.07142857142857142" />
</rec>
</references_metadata>