Source code for tasks.probSelect

# -*- coding: utf-8 -*-
"""
:Author: Dominic Hunt

:Reference: Genetic triple dissociation reveals multiple roles for dopamine in reinforcement learning.
            Frank, M. J., Moustafa, A. a, Haughey, H. M., Curran, T., & Hutchison, K. E. (2007).
            Proceedings of the National Academy of Sciences of the United States of America, 104(41), 16311–16316.
            doi:10.1073/pnas.0706111104

"""
from __future__ import division, print_function, unicode_literals, absolute_import

import numpy as np

import itertools

from tasks.taskTemplate import Task
from model.modelTemplate import Stimulus, Rewards


[docs]class ProbSelect(Task):
    """
    Probabilistic selection task based on Genetic triple dissociation reveals multiple roles for dopamine in reinforcement learning.
                                        Frank, M. J., Moustafa, A. a, Haughey, H. M., Curran, T., & Hutchison, K. E. (2007).
                                        Proceedings of the National Academy of Sciences of the United States of America, 104(41), 16311–16316.
                                        doi:10.1073/pnas.0706111104

    Many methods are inherited from the tasks.taskTemplate.Task class.
    Refer to its documentation for missing methods.

    Attributes
    ----------
    Name : string
        The name of the class used when recording what has been used.

    Parameters
    ----------
    reward_probability : float in range [0,1], optional
        The probability that a reward is given for choosing action A. Default
        is 0.7
    action_reward_probabilities : dictionary, optional
        A dictionary of the potential actions that can be taken and the
        probability of a reward.
        Default {0:rewardProb, 1:1-rewardProb, 2:0.5, 3:0.5}
    learning_action_pairs : list of tuples, optional
        The pairs of actions shown together in the learning phase.
    learning_length : int, optional
        The number of trials in the learning phase. Default is 240
    test_length : int, optional
        The number of trials in the test phase. Default is 60
    reward_size : float, optional
        The size of reward given if successful. Default 1
    number_actions : int, optional
        The number of actions that can be chosen at any given time, chosen at
        random from actRewardProb. Default 4

    Notes
    -----
    The task is broken up into two sections: a learning phase and a
    transfer phase. Participants choose between pairs of four actions: A, B, M1
    and M2. Each provides a reward with a different probability: A:P>0.5,
    B:1-P<0.5, M1=M2=0.5. The transfer phase has all the action pairs but no
    feedback. This class only covers the learning phase, but models are
    expected to be implemented as if there is a transfer phase.

    """

    def __init__(self,
                 reward_probability=0.7,
                 learning_action_pairs=None,
                 action_reward_probabilities=None,
                 learning_length=240,
                 test_length=60,
                 number_actions=None,
                 reward_size=1):

        if learning_action_pairs is None:
            learning_action_pairs = [(0, 1), (2, 3)]

        if not action_reward_probabilities:
            action_reward_probabilities = {0: reward_probability,
                                           1: 1 - reward_probability,
                                           2: 0.5,
                                           3: 0.5}

        if not number_actions:
            number_actions = len(action_reward_probabilities)

        super(ProbSelect, self).__init__()

        self.parameters["reward_probability"] = reward_probability
        self.parameters["action_reward_probabilities"] = action_reward_probabilities
        self.parameters["learning_action_pairs"] = learning_action_pairs
        self.parameters["learning_length"] = learning_length
        self.parameters["test_length"] = test_length
        self.parameters["number_actions"] = number_actions
        self.parameters["reward_size"] = reward_size

        self.t = -1
        self.reward_probability = reward_probability
        self.action_reward_probabilities = action_reward_probabilities
        self.learning_action_pairs = learning_action_pairs
        self.learning_length = learning_length
        self.reward_size = reward_size
        self.task_length = learning_length + test_length
        self.action = None
        self.reward_value = -1
        self.number_actions = number_actions
        self.choices = action_reward_probabilities.keys()

        self.action_sequence = self.__generate_action_sequence(action_reward_probabilities,
                                                               learning_action_pairs,
                                                               learning_length,
                                                               test_length)

        # Recording variables
        self.record_reward_values = [-1] * self.task_length
        self.record_actions = [-1] * self.task_length

[docs]    def next(self):
        """
        Produces the next stimulus for the iterator

        Returns
        -------
        stimulus : None
        next_valid_actions : Tuple of length 2 of ints
            The list of valid actions that the model can respond with.

        Raises
        ------
        StopIteration
        """

        self.t += 1

        if self.t == self.task_length:
            raise StopIteration

        next_stimulus = None
        next_valid_actions = self.action_sequence[self.t]

        return next_stimulus, next_valid_actions

[docs]    def receiveAction(self, action):
        """
        Receives the next action from the participant

        Parameters
        ----------
        action : int or string
            The action taken by the model
        """

        self.action = action

[docs]    def feedback(self):
        """
        Responds to the action from the participant
        """
        # The probability of success varies depending on if it is choice

        if self.t < self.learning_length:
            action_reward_probabilities = self.action_reward_probabilities[self.action]

            if action_reward_probabilities >= np.random.rand(1):
                reward = self.reward_size
            else:
                reward = 0
        else:
            reward = float('Nan')

        self.reward_value = reward

        self.storeState()

        return reward

[docs]    def proceed(self):
        """
        Updates the task after feedback
        """

        pass

[docs]    def returnTaskState(self):
        """
        Returns all the relevant data for this task run

        Returns
        -------
        results : dictionary
            A dictionary containing the class parameters  as well as the other useful data
        """

        results = self.standardResultOutput()

        results["rewVals"] = np.array(self.record_reward_values)
        results["Actions"] = np.array(self.record_actions)
        results["validAct"] = np.array(self.action_sequence)

        return results

[docs]    def storeState(self):
        """ Stores the state of all the important variables so that they can be
        output later """

        self.record_actions[self.t] = self.action
        self.record_reward_values[self.t] = self.reward_value

    @staticmethod
    def __generate_action_sequence(action_reward_probability,
                                   learning_action_pairs,
                                   learning_length,
                                   test_length):

        pair_nums = range(len(learning_action_pairs))
        action_pairs = np.array(learning_action_pairs)

        pairs = np.random.choice(pair_nums, size=learning_length, replace=True)
        action_sequence = list(action_pairs[pairs])

        for t in xrange(test_length):
            pairs = np.random.choice(pair_nums, size=2, replace=False)
            elements = np.random.choice([0, 1], size=2, replace=True)

            pair = [action_pairs[p, e] for p, e in itertools.izip(pairs, elements)]
            action_sequence.append(pair)

        return action_sequence


[docs]class StimulusProbSelectDirect(Stimulus):
    """
    Processes the selection stimuli for models expecting just the event

    Examples
    --------
    >>> stim = StimulusProbSelectDirect()
    >>> stim.processStimulus(1)
    (1, 1)
    >>> stim.processStimulus(0)
    (1, 1)
    """

[docs]    def processStimulus(self, observation):
        """
        Processes the decks stimuli for models expecting just the event

        Returns
        -------
        stimuliPresent :  int or list of int
        stimuliActivity : float or list of float

        """
        return 1, 1


[docs]class RewardProbSelectDirect(Rewards):
    """
    Processes the probabilistic selection reward for models expecting just the reward

    """

[docs]    def processFeedback(self, reward, action, stimuli):
        """

        Returns
        -------
        modelFeedback:
        """
        return reward