Source code for tomsup.ktom_functions

"""
This script contains all function related to the implementation of the
k-ToM agent.
"""
# For reference the parameter order of the k-ToM parameters is:
# [1] Volatility
# [2] Behavioural temperature
# [3] Dilution
# [4] Bias

import copy
from typing import Tuple, Union

import numpy as np
from scipy.special import expit as inv_logit
from scipy.special import logit

from tomsup.payoffmatrix import PayoffMatrix

# Learning subfunctions


[docs]def p_op_var0_update(prev_p_op_mean0: float, prev_p_op_var0: float, volatility: float): """Variance update of the 0-ToM Examples: >>> p_op_var0_update(1, 0.2, 1) 0.8348496471878395 >>> #Higher volatility results in a higher variance >>> p_op_var0_update(1, 0.2, 1) < p_op_var0_update (1, 0.2, 2) True >>> #Mean close to 0.5 gives lower variance >>> p_op_var0_update(1, 0.45, 1) < p_op_var0_update (1, 0.2, 2) True """ # Input variable transforms volatility = np.exp(volatility) prev_p_op_var0 = np.exp(prev_p_op_var0) prev_p_op_mean0 = inv_logit(prev_p_op_mean0) # Update new_p_op_var0 = 1 / ( (1 / (volatility + prev_p_op_var0)) + prev_p_op_mean0 * (1 - prev_p_op_mean0) ) # Output variable transform new_p_op_var0 = np.log(new_p_op_var0) return new_p_op_var0
[docs]def p_op_mean0_update(prev_p_op_mean0: float, p_op_var0: float, op_choice: int): """0-ToM updates mean choice probability estimate""" # Input variable transforms p_op_var0 = np.exp(p_op_var0) # Update new_p_op_mean0 = prev_p_op_mean0 + p_op_var0 * ( op_choice - inv_logit(prev_p_op_mean0) ) # For numerical purposes, according to the VBA package new_p_op_mean0 = logit(inv_logit(new_p_op_mean0)) return new_p_op_mean0
[docs]def p_opk_approx_fun( prev_p_op_mean: np.array, prev_param_var: np.array, prev_gradient: np.array, level: int, ): """ Approximates the estimated choice probability of the opponent on the previous round. A semi-analytical approximation derived in Daunizeau, J. (2017) Examples: >>> p_opk_approx_fun(prev_p_op_mean = np.array([0]), prev_param_var = np.array([[0, 0, 0]]), prev_gradient = np.array([[0, 0, 0]]), level = 1) """ # Constants a = 0.205 b = -0.319 c = 0.781 d = 0.870 # Input variable transforms prev_param_var = np.exp(prev_param_var) # Prepare variance by weighing with gradient prev_var_prepped = np.zeros(level) for level_idx in range(level): prev_var_prepped[level_idx] = prev_param_var[level_idx, :].T.dot( prev_gradient[level_idx, :] ** 2 ) # Equation p_opk_approx = (prev_p_op_mean + b * prev_var_prepped**c) / np.sqrt( 1 + a * prev_var_prepped**d ) # Output variable transform p_opk_approx = np.log(inv_logit(p_opk_approx)) return p_opk_approx
[docs]def p_k_udpate( prev_p_k: np.array, p_opk_approx: np.array, op_choice: int, dilution=None ): """ k-ToM updates its estimate of opponents sophistication level. If k-ToM has a dilution parameter, it does a partial forgetting of learned estimates. Examples: >>> p_k_udpate(prev_p_k = np.array([1.]), p_opk_approx = np.array([-0.69314718]), op_choice = 1, dilution = None) """ # Input variable transforms p_opk_approx = np.exp(p_opk_approx) if dilution: dilution = inv_logit(dilution) # Do partial forgetting if dilution: prev_p_k = (1 - dilution) * prev_p_k + dilution / len(prev_p_k) # Calculate new_p_k = op_choice * (prev_p_k * p_opk_approx / sum(prev_p_k * p_opk_approx)) + ( 1 - op_choice ) * (prev_p_k * (1 - p_opk_approx) / sum(prev_p_k * (1 - p_opk_approx))) # Force probability sum to 1 if len(new_p_k) > 1: new_p_k[-1] = 1 - sum(new_p_k[:-1]) return new_p_k
[docs]def param_var_update( prev_p_op_mean: np.array, prev_param_var: np.array, prev_gradient: np.array, p_k: np.array, volatility: float, volatility_dummy=None, **kwargs ): """ k-ToM updates its uncertainty / variance on its estimates of opponent's parameter values Examples: >>> param_var_update(prev_param_mean = np.array([[0, 0, 0]]), \ prev_param_var = np.array([[0, 0, 0]]), \ prev_gradient = np.array([0, 0, 0]), p_k = np.array([1.]), \ volatility = -2, volatility_dummy = None) array([[0.12692801, 0. , 0. ]]) """ # Dummy constant: sets volatility to 0 for all except volatility opponent # parameter estimates if volatility_dummy is None: volatility_dummy = np.zeros(prev_param_var.shape[1] - 1) volatility_dummy = np.concatenate(([1], volatility_dummy), axis=None) # Input variable transforms prev_p_op_mean = inv_logit(prev_p_op_mean) prev_param_var = np.exp(prev_param_var) volatility = np.exp(volatility) * volatility_dummy # Calculate new_var = 1 / ( 1 / (prev_param_var + volatility) + p_k[:, np.newaxis] * prev_p_op_mean[:, np.newaxis] * (1 - prev_p_op_mean[:, np.newaxis]) * prev_gradient**2 ) # Output variable transform new_var = np.log(new_var) return new_var
[docs]def param_mean_update( prev_p_op_mean: np.array, prev_param_mean: np.array, prev_gradient: np.array, p_k: np.array, param_var, op_choice: int, ): """ k-ToM updates its estimates of opponent's parameter values Examples: >>> param_mean_update(prev_p_op_mean, prev_param_mean = np.array([[0, 0, 0]]), prev_gradient = np.array([0, 0, 0]), p_k = np.array([0, 0, 0]), param_var, op_choice) """ # Input variable transforms param_var = np.exp(param_var) * prev_gradient # Calculate new_param_mean = ( prev_param_mean + p_k[:, np.newaxis] * param_var * (op_choice - inv_logit(prev_p_op_mean))[:, np.newaxis] ) # Used for numerical purposes (similar to the VBA package) new_param_mean = logit(inv_logit(new_param_mean)) return new_param_mean
[docs]def gradient_update( params, p_op_mean, param_mean, sim_prev_internal_states, sim_self_choice, sim_op_choice, sim_level, sim_agent, p_matrix, **kwargs ): """The gradient update of the k-ToM agent""" # Make empty list for fillin in gradients gradient = np.zeros(len(param_mean)) # The gradient is calculated for each parameter one at a time for param, _ in enumerate(param_mean): # Calculate increment increment = max(abs(1e-4 * param_mean[param]), 1e-4) # Use normal parameter estimates param_mean_incr = np.copy(param_mean) # But increment the current parameter param_mean_incr[param] = param_mean[param] + increment # Make parameter structure similar to own # sim_params_incr = copy.deepcopy(params) sim_params_incr = dict() # Populate it with estimated values, including the increment for param_idx, param_key in enumerate(params): sim_params_incr[param_key] = param_mean_incr[param_idx] # Simulate opponent learning using the incremented sim_new_internal_states_incr = learning_function( sim_prev_internal_states, sim_params_incr, sim_self_choice, sim_op_choice, sim_level, sim_agent, p_matrix, **kwargs ) # Simulate opponent decision using incremented parameters p_op_mean_incr = decision_function( sim_new_internal_states_incr, sim_params_incr, sim_agent, sim_level, p_matrix, )[ 0 ] # only use the first part of the output # Calculate the gradient: a measure of the size of the influence of # the incremented parameter value gradient[param] = (p_op_mean_incr - p_op_mean) / increment return gradient
# Decision subfunctions
[docs]def p_op0_fun(p_op_mean0: float, p_op_var0: float): """ 0-ToM combines the mean and variance of its parameter estimate into a final choice probability estimate. To avoid unidentifiability problems this function does not use 0-ToM's volatility parameter. Examples: >>> p_op0_fun(p_op_mean0 = 0.7, p_op_var0 = 0.3) """ # Constants a = 0.36 # Input variable transforms p_op_var0 = np.exp(p_op_var0) # Calculate p_op0 = p_op_mean0 / np.sqrt(1 + a * p_op_var0) # Output variable transforms p_op0 = inv_logit(p_op0) return p_op0
[docs]def p_opk_fun(p_op_mean: np.array, param_var: np.array, gradient: np.array): """ k-ToM combines the mean choice probability estimate and the variances of its parameter estimates into a final choice probability estimate. To avoid unidentifiability problems this function does not use 0-ToM's volatility parameter. """ # Constants a = 0.36 # Input variable transforms param_var = np.exp(param_var) # Prepare variance by weighing with gradient var_prepped = np.sum((param_var * gradient**2), axis=1) # Calculate p_opk = p_op_mean / np.sqrt(1 + a * var_prepped) # Output variable transform p_opk = inv_logit(p_opk) return p_opk
[docs]def expected_payoff_fun(p_op: float, agent: int, p_matrix: PayoffMatrix): """Calculate expected payoff of choosing 1 over 0 Args: p_op (float): The probability of the opponent choosing 1 agent (int): The perspective of the agent p_matrix (PayoffMatrix): A payoff matrix Returns: The expected payoff Examples: >>> staghunt = PayoffMatrix(name = 'staghunt') >>> expected_payoff_fun(1, agent = 0, p_matrix = staghunt) 2 """ # Calculate expected_payoff = p_op * ( p_matrix.payoff(1, 1, agent) - p_matrix.payoff(0, 1, agent) ) + (1 - p_op) * (p_matrix.payoff(1, 0, agent) - p_matrix.payoff(0, 0, agent)) return expected_payoff
def softmax(expected_payoff, params: dict) -> float: """ Softmax function for calculating own probability of choosing 1 """ # Extract necessary parameters b_temp = params["b_temp"] if "bias" in params: bias = params["bias"] # Input variable transforms b_temp = np.exp(b_temp) # Divide by temperature parameter expected_payoff = expected_payoff / b_temp # Add bias, optional if "bias" in params: expected_payoff = expected_payoff + bias # The logit transform completes the softmax function p_self = inv_logit(expected_payoff) # Set output bounds if p_self > 0.999: p_self = 0.999 # warn("Choice probability constrained at upper bound 0.999 to avoid # rounding errors", Warning) if p_self < 0.001: p_self = 0.001 # warn("Choice probability constrained at lower bound 0.001 to avoid # rounding errors", Warning) return p_self # Full learning and decision functions
[docs]def learning_function( prev_internal_states: dict, params: dict, self_choice: int, op_choice: int, level: int, agent: int, p_matrix: PayoffMatrix, **kwargs ) -> dict: """The general learning function for the k-ToM agent Args: prev_internal_states (dict): Previous internal states params (dict): The parameters self_choice (int): Previous choice of the agent op_choice (int): Opponents choice level (int): sophistication level agent (int): the perspective of the agent in the payoff matrix (0 or 1) p_matrix (PayoffMatrix): a payoff matrix Returns: dict: The updated internal states """ # Extract needed parameters volatility = params["volatility"] if "dilution" in params: dilution = params["dilution"] else: dilution = None # Make empty dictionary for storing updates states new_internal_states = {} opponent_states = {} # If the (simulated) agent is a 0-ToM if level == 0: # Extract needed variables prev_p_op_mean0 = prev_internal_states["own_states"]["p_op_mean0"] prev_p_op_var0 = prev_internal_states["own_states"]["p_op_var0"] # Update 0-ToM's uncertainty of opponent choice probability p_op_var0 = p_op_var0_update(prev_p_op_mean0, prev_p_op_var0, volatility) # Update 0-ToM's mean estimate of opponent choice probability p_op_mean0 = p_op_mean0_update(prev_p_op_mean0, p_op_var0, op_choice) # Gather own internal states own_states = {"p_op_mean0": p_op_mean0, "p_op_var0": p_op_var0} # If the (simulated) agent is a k-ToM else: # Extract needed variables prev_p_k = prev_internal_states["own_states"]["p_k"] prev_p_op_mean = prev_internal_states["own_states"]["p_op_mean"] prev_param_mean = prev_internal_states["own_states"]["param_mean"] prev_param_var = prev_internal_states["own_states"]["param_var"] prev_gradient = prev_internal_states["own_states"]["gradient"] # Update opponent level probabilities p_opk_approx = p_opk_approx_fun( prev_p_op_mean, prev_param_var, prev_gradient, level ) p_k = p_k_udpate(prev_p_k, p_opk_approx, op_choice, dilution) # Update parameter estimates param_var = param_var_update( prev_p_op_mean, prev_param_var, prev_gradient, p_k, volatility, **kwargs ) param_mean = param_mean_update( prev_p_op_mean, prev_param_mean, prev_gradient, p_k, param_var, op_choice ) # Do recursive simulating of opponent # Make empty structure for new means and gradients p_op_mean = np.zeros(level) gradient = np.zeros([level, param_mean.shape[1]]) # Prepare simulated opponent perspective # simulated perspective swtiches own and opponent role sim_agent = 1 - agent # And previous choices sim_self_choice, sim_op_choice = op_choice, self_choice # k-ToM simulates an opponent for each level below its own for sim_level in range(level): # Further preparation of simulated perspective sim_prev_internal_states = copy.deepcopy( prev_internal_states["opponent_states"][sim_level] ) # Make parameter structure similar to own sim_params = dict() # Populate it with estimated values for param_idx, param_key in enumerate(params): sim_params[param_key] = param_mean[sim_level, param_idx] # Simulate opponent learning (recursive) sim_new_internal_states = learning_function( sim_prev_internal_states, sim_params, sim_self_choice, sim_op_choice, sim_level, sim_agent, p_matrix, **kwargs ) # Simulate opponent deciding p_op_mean[sim_level] = decision_function( sim_new_internal_states, sim_params, sim_agent, sim_level, p_matrix )[ 0 ] # only use the first part of the output # Update gradient (recursive) gradient[sim_level] = gradient_update( params, p_op_mean[sim_level], param_mean[sim_level], sim_prev_internal_states, sim_self_choice, sim_op_choice, sim_level, sim_agent, p_matrix, **kwargs ) # Save opponent's states opponent_states[sim_level] = sim_new_internal_states # Gather own internal states own_states = { "p_k": p_k, "p_op_mean": p_op_mean, "param_mean": param_mean, "param_var": param_var, "gradient": gradient, } # Save the updated estimated and own internal states new_internal_states["opponent_states"] = opponent_states new_internal_states["own_states"] = own_states return new_internal_states
[docs]def decision_function( new_internal_states: dict, params: dict, agent: int, level: int, p_matrix: PayoffMatrix, ) -> Tuple[float, float]: """The decision function of the k-ToM agent Args: new_internal_states (dict): Dict of updated internal states params (dict): The parameters agent (int): the perspective of the agent in the payoff matrix level (int): The sophistication level of the agent p_matrix (PayoffMatrix): a payoff matrix Returns: Tuple[float, float]: a tuple contain probability of self choosing 1 and op choosing 1. Examples: >>> penny = PayoffMatrix(name = "penny_competitive") >>> new_internal_states = {'opponent_states': {}, \ 'own_states': {'p_op_mean0': 30, 'p_op_var0': 2}} >>> params = {'volatility': -2, 'b_temp': -1} >>> decision_function(new_internal_states, params, agent = 0, \ level = 0, p_matrix = penny) -5.436561973742046 """ # If (simulated) agent is a 0-ToM if level == 0: # Extract needed variables p_op_mean0 = new_internal_states["own_states"]["p_op_mean0"] p_op_var0 = new_internal_states["own_states"]["p_op_var0"] # Estimate probability of opponent choice p_op = p_op0_fun(p_op_mean0, p_op_var0) # If the (simulated) agent is a k-ToM else: # Extract needed variables p_op_mean = new_internal_states["own_states"]["p_op_mean"] param_var = new_internal_states["own_states"]["param_var"] gradient = new_internal_states["own_states"]["gradient"] p_k = new_internal_states["own_states"]["p_k"] # Estimate probability of opponent choice for each simulated level p_opk = p_opk_fun(p_op_mean, param_var, gradient) # Weigh choice probabilities by level probabilities for aggregate # choice probability estimate p_op = np.sum(p_opk * p_k) # Calculate expected payoff expected_payoff = expected_payoff_fun(p_op, agent, p_matrix) # Softmax p_self = softmax(expected_payoff, params) # Output variable transform p_self = logit(p_self) return (p_self, p_op)
# Full k-ToM Function
[docs]def k_tom( prev_internal_states: dict, params: dict, self_choice: int, op_choice: int, level: int, agent: int, p_matrix: PayoffMatrix, **kwargs ) -> Tuple[int, dict]: """The full k-ToM implementation Args: prev_internal_states (dict): Dict of previous internal states params (dict): The parameters self_choice (int): the agent choice the previous round op_choice (int): The opponents choice the previous round level (int): The sophistication level of the agent agent (int): the perspective of the agent in the payoff matrix p_matrix (PayoffMatrix): a payoff matrix Returns: Tuple[int, dict]: a tuple containing the choice and the updated internal states """ # Update estimates of opponent based on behaviour if self_choice is not None: new_internal_states = learning_function( prev_internal_states, params, self_choice, op_choice, level, agent, p_matrix, **kwargs ) else: # If first round or missed round, make no update new_internal_states = prev_internal_states # Calculate own decision probability p_self, p_op = decision_function( new_internal_states, params, agent, level, p_matrix ) # Probability transform p_self = inv_logit(p_self) # Save own choice probability new_internal_states["own_states"]["p_self"] = p_self new_internal_states["own_states"]["p_op"] = p_op # Make decision choice = np.random.binomial(1, p_self) return (choice, new_internal_states)
# Initializing function
[docs]def init_k_tom(params: dict, level: int, priors: Union[dict, str] = "default"): """ Initialization function of the k-ToM agent Args: params (dict): The starting parameters level (int): The sophistication level of the agent priors (Union[dict, str], optional): The priors of the k-ToM. Default to "default". See tutorial on how to set internal states of the k-ToM agent. Examples: >>> init_k_tom(params = {'volatility': -2, 'b_temp': -1, 'bias':0 }, level=1, priors='default') """ # If no priors are specified if priors == "default": # Set default priors priors = {"p_op_mean0": 0, "p_op_var0": 0} # Agnostic if level > 0: # the following is not used by 0-ToM priors["p_op_mean"] = 0 priors["param_mean"] = np.repeat(0.0, len(params)) priors["param_var"] = np.repeat(0.0, len(params)) priors["gradient"] = np.repeat(0.0, len(params)) if "bias" in params: # Following the original VBA implementation priors["gradient"][-1] = 0.999999997998081 # Make empty list for prior internal states internal_states = {} opponent_states = {} # If the (simulated) agent i a 0-ToM if level == 0: # Set priors for choice probability estimate and its uncertainty p_op_var0 = priors["p_op_var0"] p_op_mean0 = priors["p_op_mean0"] # Gather own internal states own_states = {"p_op_mean0": p_op_mean0, "p_op_var0": p_op_var0} # If the (simulated) agent is a k-ToM else: # Set priors p_k = np.repeat((1 / level), level) p_op_mean = np.repeat(priors["p_op_mean"], level) param_var = np.tile(priors["param_var"], (level, 1)) param_mean = np.tile(priors["param_mean"], (level, 1)) gradient = np.tile(priors["gradient"], (level, 1)) # k-ToM simulates an opponent for each level below its own for level_index in range(level): # Simulate opponents to create the recursive data structure sim_new_internal_states = init_k_tom(params, level_index, priors) # Save opponent's states opponent_states[level_index] = sim_new_internal_states # Gather own internal states own_states = { "p_k": p_k, "p_op_mean": p_op_mean, "param_mean": param_mean, "param_var": param_var, "gradient": gradient, } # Save own choice probability own_states["p_self"] = np.nan own_states["p_op"] = np.nan # Save the updated estimated and own internal states internal_states["opponent_states"] = opponent_states internal_states["own_states"] = own_states return internal_states