Skip to content

swarmrl.trainers.episodic_trainer Module API Reference

Module for the EpisodicTrainer

EpisodicTrainer

Bases: Trainer

Class for the simple MLP RL implementation.

Attributes

rl_protocols : list(protocol) A list of RL protocols to use in the simulation.

Source code in swarmrl/trainers/episodic_trainer.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
class EpisodicTrainer(Trainer):
    """
    Class for the simple MLP RL implementation.

    Attributes
    ----------
    rl_protocols : list(protocol)
            A list of RL protocols to use in the simulation.
    """

    def perform_rl_training(
        self,
        get_engine: callable,
        system: "System",
        n_episodes: int,
        episode_length: int,
        reset_frequency: int = 1,
        load_bar: bool = True,
    ):
        """
        Perform the RL training.

        Parameters
        ----------
        get_engine : callable
                Function to get the engine for the simulation.
        system_runner : espressomd.System
                Engine used to perform steps for each agent.
        n_episodes : int
                Number of episodes to use in the training.
        episode_length : int
                Number of time steps in one episode.
        reset_frequency : int (default=1)
                After how many episodes is the simulation reset.
        load_bar : bool (default=True)
                If true, show a progress bar.

        Notes
        -----
        If you are using semi-episodic training but your task kills the
        simulation, the system will be reset.
        """
        killed = False
        rewards = [0.0]
        current_reward = 0.0
        force_fn = self.initialize_training()

        progress = Progress(
            "Episode: {task.fields[Episode]}",
            BarColumn(),
            "Episode reward: {task.fields[current_reward]} Running Reward:"
            " {task.fields[running_reward]}",
            TimeRemainingColumn(),
        )

        with progress:
            task = progress.add_task(
                "Episodic Training",
                total=n_episodes,
                Episode=0,
                current_reward=current_reward,
                running_reward=np.mean(rewards),
                visible=load_bar,
            )
            for episode in range(n_episodes):

                # Check if the system should be reset.
                if episode % reset_frequency == 0 or killed:
                    self.engine = None
                    self.engine = get_engine(system)

                    # Initialize the tasks and observables.
                    for agent in self.agents.values():
                        agent.reset_agent(self.engine.colloids)

                self.engine.integrate(episode_length, force_fn)

                force_fn, current_reward, killed = self.update_rl()

                rewards.append(current_reward)

                episode += 1
                progress.update(
                    task,
                    advance=1,
                    Episode=episode,
                    current_reward=np.round(current_reward, 2),
                    running_reward=np.round(np.mean(rewards[-10:]), 2),
                )
                self.engine.finalize()

        return np.array(rewards)

perform_rl_training(get_engine, system, n_episodes, episode_length, reset_frequency=1, load_bar=True)

Perform the RL training.

Parameters

get_engine : callable Function to get the engine for the simulation. system_runner : espressomd.System Engine used to perform steps for each agent. n_episodes : int Number of episodes to use in the training. episode_length : int Number of time steps in one episode. reset_frequency : int (default=1) After how many episodes is the simulation reset. load_bar : bool (default=True) If true, show a progress bar.

Notes

If you are using semi-episodic training but your task kills the simulation, the system will be reset.

Source code in swarmrl/trainers/episodic_trainer.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def perform_rl_training(
    self,
    get_engine: callable,
    system: "System",
    n_episodes: int,
    episode_length: int,
    reset_frequency: int = 1,
    load_bar: bool = True,
):
    """
    Perform the RL training.

    Parameters
    ----------
    get_engine : callable
            Function to get the engine for the simulation.
    system_runner : espressomd.System
            Engine used to perform steps for each agent.
    n_episodes : int
            Number of episodes to use in the training.
    episode_length : int
            Number of time steps in one episode.
    reset_frequency : int (default=1)
            After how many episodes is the simulation reset.
    load_bar : bool (default=True)
            If true, show a progress bar.

    Notes
    -----
    If you are using semi-episodic training but your task kills the
    simulation, the system will be reset.
    """
    killed = False
    rewards = [0.0]
    current_reward = 0.0
    force_fn = self.initialize_training()

    progress = Progress(
        "Episode: {task.fields[Episode]}",
        BarColumn(),
        "Episode reward: {task.fields[current_reward]} Running Reward:"
        " {task.fields[running_reward]}",
        TimeRemainingColumn(),
    )

    with progress:
        task = progress.add_task(
            "Episodic Training",
            total=n_episodes,
            Episode=0,
            current_reward=current_reward,
            running_reward=np.mean(rewards),
            visible=load_bar,
        )
        for episode in range(n_episodes):

            # Check if the system should be reset.
            if episode % reset_frequency == 0 or killed:
                self.engine = None
                self.engine = get_engine(system)

                # Initialize the tasks and observables.
                for agent in self.agents.values():
                    agent.reset_agent(self.engine.colloids)

            self.engine.integrate(episode_length, force_fn)

            force_fn, current_reward, killed = self.update_rl()

            rewards.append(current_reward)

            episode += 1
            progress.update(
                task,
                advance=1,
                Episode=episode,
                current_reward=np.round(current_reward, 2),
                running_reward=np.round(np.mean(rewards[-10:]), 2),
            )
            self.engine.finalize()

    return np.array(rewards)