Skip to content

swarmrl.trainers.trainer Module API Reference

Module for the Trainer parent.

Trainer

Parent class for the RL Trainer.

Attributes

rl_protocols : list(protocol) A list of RL protocols to use in the simulation. loss : Loss An optimization method to compute the loss and update the model.

Source code in swarmrl/trainers/trainer.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
class Trainer:
    """
    Parent class for the RL Trainer.

    Attributes
    ----------
    rl_protocols : list(protocol)
            A list of RL protocols to use in the simulation.
    loss : Loss
            An optimization method to compute the loss and update the model.
    """

    _engine = None

    @property
    def engine(self):
        """
        Runner engine property.
        """
        return self._engine

    @engine.setter
    def engine(self, value):
        """
        Set the engine value.
        """
        self._engine = value

    def __init__(
        self,
        agents: List[ActorCriticAgent],
    ):
        """
        Constructor for the MLP RL.

        Parameters
        ----------
        agents : list
                A list of RL agents
        loss : Loss
                A loss model to use in the A-C loss computation.
        """
        self.agents = {}

        # Add the protocols to an easily accessible internal dict.
        # TODO: Maybe turn into a dataclass? Not sure if it helps yet.
        for agent in agents:
            self.agents[str(agent.particle_type)] = agent

    def initialize_training(self) -> ForceFunction:
        """
        Return an initialized interaction model.

        Returns
        -------
        interaction_model : ForceFunction
                Interaction model to start the simulation with.
        """

        return ForceFunction(
            agents=self.agents,
        )

    def update_rl(self) -> Tuple[ForceFunction, np.ndarray]:
        """
        Update the RL algorithm.

        Returns
        -------
        interaction_model : MLModel
                Interaction model to use in the next episode.
        reward : np.ndarray
                Current mean episode reward. This is returned for nice progress bars.
        killed : bool
                Whether or not the task has ended the training.
        """
        reward = 0.0  # TODO: Separate between species and optimize visualization.
        switches = []

        for agent in self.agents.values():
            if isinstance(agent, ActorCriticAgent):
                ag_reward, ag_killed = agent.update_agent()
                reward += np.mean(ag_reward)
                switches.append(ag_killed)

        # Create a new interaction model.
        interaction_model = ForceFunction(agents=self.agents)
        return interaction_model, np.array(reward), any(switches)

    def export_models(self, directory: str = "Models"):
        """
        Export the models to the specified directory.

        Parameters
        ----------
        directory : str (default='Models')
                Directory in which to save the models.

        Returns
        -------
        Saves the actor and the critic to the specific directory.
        """
        for agent in self.agents.values():
            agent.save_agent(directory)

    def restore_models(self, directory: str = "Models"):
        """
        Export the models to the specified directory.

        Parameters
        ----------
        directory : str (default='Models')
                Directory from which to load the objects.

        Returns
        -------
        Loads the actor and critic from the specific directory.
        """
        for agent in self.agents.values():
            agent.restore_agent(directory)

    def initialize_models(self):
        """
        Initialize all of the models in the gym.
        """
        for agent in self.agents.values():
            agent.initialize_network()

    def perform_rl_training(self, **kwargs):
        """
        Perform the RL training.

        Parameters
        ----------
        **kwargs
            All arguments related to the specific trainer.
        """
        raise NotImplementedError("Implemented in child class")

engine property writable

Runner engine property.

__init__(agents)

Constructor for the MLP RL.

Parameters

agents : list A list of RL agents loss : Loss A loss model to use in the A-C loss computation.

Source code in swarmrl/trainers/trainer.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def __init__(
    self,
    agents: List[ActorCriticAgent],
):
    """
    Constructor for the MLP RL.

    Parameters
    ----------
    agents : list
            A list of RL agents
    loss : Loss
            A loss model to use in the A-C loss computation.
    """
    self.agents = {}

    # Add the protocols to an easily accessible internal dict.
    # TODO: Maybe turn into a dataclass? Not sure if it helps yet.
    for agent in agents:
        self.agents[str(agent.particle_type)] = agent

export_models(directory='Models')

Export the models to the specified directory.

Parameters

directory : str (default='Models') Directory in which to save the models.

Returns

Saves the actor and the critic to the specific directory.

Source code in swarmrl/trainers/trainer.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def export_models(self, directory: str = "Models"):
    """
    Export the models to the specified directory.

    Parameters
    ----------
    directory : str (default='Models')
            Directory in which to save the models.

    Returns
    -------
    Saves the actor and the critic to the specific directory.
    """
    for agent in self.agents.values():
        agent.save_agent(directory)

initialize_models()

Initialize all of the models in the gym.

Source code in swarmrl/trainers/trainer.py
134
135
136
137
138
139
def initialize_models(self):
    """
    Initialize all of the models in the gym.
    """
    for agent in self.agents.values():
        agent.initialize_network()

initialize_training()

Return an initialized interaction model.

Returns

interaction_model : ForceFunction Interaction model to start the simulation with.

Source code in swarmrl/trainers/trainer.py
62
63
64
65
66
67
68
69
70
71
72
73
74
def initialize_training(self) -> ForceFunction:
    """
    Return an initialized interaction model.

    Returns
    -------
    interaction_model : ForceFunction
            Interaction model to start the simulation with.
    """

    return ForceFunction(
        agents=self.agents,
    )

perform_rl_training(**kwargs)

Perform the RL training.

Parameters

**kwargs All arguments related to the specific trainer.

Source code in swarmrl/trainers/trainer.py
141
142
143
144
145
146
147
148
149
150
def perform_rl_training(self, **kwargs):
    """
    Perform the RL training.

    Parameters
    ----------
    **kwargs
        All arguments related to the specific trainer.
    """
    raise NotImplementedError("Implemented in child class")

restore_models(directory='Models')

Export the models to the specified directory.

Parameters

directory : str (default='Models') Directory from which to load the objects.

Returns

Loads the actor and critic from the specific directory.

Source code in swarmrl/trainers/trainer.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
def restore_models(self, directory: str = "Models"):
    """
    Export the models to the specified directory.

    Parameters
    ----------
    directory : str (default='Models')
            Directory from which to load the objects.

    Returns
    -------
    Loads the actor and critic from the specific directory.
    """
    for agent in self.agents.values():
        agent.restore_agent(directory)

update_rl()

Update the RL algorithm.

Returns

interaction_model : MLModel Interaction model to use in the next episode. reward : np.ndarray Current mean episode reward. This is returned for nice progress bars. killed : bool Whether or not the task has ended the training.

Source code in swarmrl/trainers/trainer.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def update_rl(self) -> Tuple[ForceFunction, np.ndarray]:
    """
    Update the RL algorithm.

    Returns
    -------
    interaction_model : MLModel
            Interaction model to use in the next episode.
    reward : np.ndarray
            Current mean episode reward. This is returned for nice progress bars.
    killed : bool
            Whether or not the task has ended the training.
    """
    reward = 0.0  # TODO: Separate between species and optimize visualization.
    switches = []

    for agent in self.agents.values():
        if isinstance(agent, ActorCriticAgent):
            ag_reward, ag_killed = agent.update_agent()
            reward += np.mean(ag_reward)
            switches.append(ag_killed)

    # Create a new interaction model.
    interaction_model = ForceFunction(agents=self.agents)
    return interaction_model, np.array(reward), any(switches)