Regret minimization in Python

Since my PhD (game theory, but heavy in mathematical analysis and partial differential equations) I was interested in nice, simple algorithms that would beat fancy theoretical constructs. One of those happens to be regret minimization.

Suppose we want to play rock, paper, scissors for money. Each of us puts 100 EUR on the table, we play and the winner takes the money. ┬áLet’s further assume that you play rock and I play paper (so I win) and thus take the 100 EUR from the table. In this case, your regret for not playing paper is 100 (because we would have tied), but your regret for not paying scissors is 200, because you did not win.

Formally, if I, J denote the strategy sets of the players (here I=J and both are equal to {rock, paper, scissors}), then the regret for player 1’s action i, given the last-round moves i^*, j^* is:

 \max(u(i,j^*)-u(i^*,j^*),0).

How do we get a strategy out of this? We initialize a counter at the beginning of the game. We play randomly in the beginning, and from the second round on, we compute the regrets for each action and divide each of these numbers by the sum of regrets of our actions. We will choose the action with a random strategy, playing with probability proportional to the normalized cumulative regret.

Code in Python below:


import matplotlib.pyplot as plt
import random
import numpy as np

np.set_printoptions(precision=2)

# Base player that chooses a constant mixed strategy every time
class Player:
    def __init__(self):
        self.my_moves = []
        self.other_moves = []

    def move(self, strategy):
        # Input: a vector of probability distributions for actions
        # Output: a pure action

        r = random.uniform(0,1)
        n_actions = len(strategy)

        a = 0
        cumulative_proba = 0.0

        while a<n_actions-1:
            cumulative_proba += strategy[a] 
            if r < cumulative_proba: return a
            a +=1
        return a

class RegretPlayer(Player):
    def __init__(self):
        super(RegretPlayer, self).__init__()
        self.regret_sum = np.zeros(3)

    def regret(self):

        if len(self.my_moves)>0:
            my_action = self.my_moves[-1]
            his_action = self.other_moves[-1]
        else:
            return np.zeros(3)

        # Payoffs from my perspective
        my_payoff = np.zeros(3)

        # If we play the same, I don't get any payoff
        my_payoff[his_action] = 0.

        # I win when he plays scissors and I pay rock, 
        # or when I play the "next" (Rock = 0, Paper = 1, Scissors = 2)
        my_payoff[0 if his_action == 2 else his_action + 1] = 1

        # I lose when I play scissors and he plays rock, 
        # or when I play the "previous" action         
        my_payoff[2 if his_action == 0 else his_action -1] = -1

        regrets = [my_payoff[a]-my_payoff[my_action] for a in range(3)]
        regrets = np.array(regrets)
        return regrets

    def get_regret_mixed_strategy(self):

        normalize_sum = 0.0
        strategy = np.zeros(3)
        regret = self.regret()                   

        for a in range(3):
            strategy[a] = max(self.regret_sum[a],0)
            normalize_sum += strategy[a]

        # If all regrets are positive, play randomly
        if normalize_sum > 0:
            strategy = strategy / normalize_sum
        else:
            strategy = np.ones(3)/3

        self.regret_sum += regret

        return strategy            

def payoff(my_action, his_action):
    if my_action == his_action: 
        return 0
    if his_action == 0 and my_action==2 or my_action == his_action-1:
        return -1
    return 1

def run(n_rounds = 10):
    p1 = RegretPlayer()
    p2 = Player() 

    total_p1 = 0.0
    total_p2 = 0.0

    rounds_won_p1 = 0.

    plt.ion()
    plt.axis([-0.1,n_rounds+0.1,-0.1,1.1])
    print("*"*100)
    print("The match begins")
    print("*"*100)
    for n in range(1,n_rounds):

        regret_strategy_p1 = p1.get_regret_mixed_strategy()

        m1 = p1.move(regret_strategy_p1)

        m2 = p2.move([0.4,0.3,0.3])

        # Players update the info of the moves
        p1.my_moves.append(m1)
        p1.other_moves.append(m2)    

        total_p1 += payoff(m1,m2)


        #### SHOW RESULTS
        moves_map = {0:"Rock", 1:"Paper", 2:"Scissors"}
        print('-'*50)
        print("Regret:")
        print(p1.regret_sum)
        print("Strategy:", regret_strategy_p1)
        print("My move: %s" % moves_map[m1])
        print("His move: %s" % moves_map[m2])
        print("Payoffs:")
        print(total_p1)
        print(total_p2)

        rounds_won_p1 += 1 if payoff(m1,m2) >0 else 0

        # Plot the moves
        plt.title("Percentage of rounds won using a regret strategy")
        plt.scatter(n,rounds_won_p1/n, color = "red")

        plt.show()
        plt.pause(0.1)

if __name__ == "__main__":
    run(n_rounds = 100)