rl_define_new_agent.Rd
For users who wish to supply their own simulation code, calling this function streamlines the creation of RL Agent Objects allowing individuals to benefit from a consistent API for RL simulations and take advantage of internal safeguards.
rl_define_new_agent(
model_type,
simulation_code_file,
required_args,
required_methods = c("reinforcements", "stimuli", "arms", "policy"),
return_agent = TRUE
)
What type of model is the agent? E.g., "kArmedBandit" or "tdrlConditioning".
A file path containing templated simulation code. See the examples for details.
A character vector indicating required arguments of the
RL Agent. For example, c("num_trials", "num_episodes", "gamma", "alpha")
.
Which methods should the agent support? Setting reinforcements? Stimuli? Arms? See the examples for more details.
Logical: TRUE
(default) and the actual agent object
will be returned and can be instantiated. FALSE
and a string containing
the code to generate the R6 Agent will be returned.
If return_agent = TRUE
(default), an R6 object of class
model_type
for simulating RL tasks with the file supplied by the
simulation_code_file
argument. Otherwise, a character string with the
code to define the R6 object.
# Define a temporal-difference conditioning agent using the built in algorithm
td_cond_agent <- rl_define_new_agent(
model_type = "TD Conditioning",
# Use built in model specification "td_conditioning", setting read = FALSE to
# simply specify the path
simulation_code_file = use_agent_template("td_conditioning", read = FALSE),
# What values must be set for the agent/simulation to work?
required_args = c("num_stimuli", "num_trials", "num_episodes", "gamma", "alpha"),
# Only need to specify reinforcements and stimuli structure
required_methods = c("reinforcements", "stimuli"),
return_agent = TRUE # return the actual RL Agent object
)
# Initialize a new conditioning agent as tdCond
tdCond <- td_cond_agent$new(
model_id = "Classical Conditioning via TDRL",
num_stimuli = 1,
num_trials = 100,
num_episodes = 10,
gamma = 1,
alpha = 0.3
)
tdCond$
set_stimuli(
list(
one = data.frame(
onset = 3,
offset = 8,
magnitude = 1,
trial = 1:100
)
)
)$
set_reinforcements(
list(
data.frame(
onset = 8,
offset = 8,
magnitude = 1,
trial = 1:100
)
)
)$
simulate_agent()
# Define a temporal-difference conditioning agent using the built in algorithm
td_cond_agent <- rl_define_new_agent(
model_type = "TD Conditioning",
# Use built in model specification "td_conditioning", setting read = FALSE to
# simply specify the path
simulation_code_file = use_agent_template("td_conditioning", read = FALSE),
# What values must be set for the agent/simulation to work?
required_args = c("num_stimuli", "num_trials", "num_episodes", "gamma", "alpha"),
# Only need to specify reinforcements and stimuli structure
required_methods = c("reinforcements", "stimuli"),
return_agent = TRUE # return the actual RL Agent object
)
# Initialize a new conditioning agent as tdCond
tdCond <- td_cond_agent$new(
model_id = "Classical Conditioning via TDRL",
num_stimuli = 1,
num_trials = 100,
num_episodes = 10,
gamma = 1,
alpha = 0.3
)
tdCond$
set_stimuli(
list(
one = data.frame(
onset = 3,
offset = 8,
magnitude = 1,
trial = 1:100
)
)
)$
set_reinforcements(
list(
data.frame(
onset = 8,
offset = 8,
magnitude = 1,
trial = 1:100
)
)
)$
simulate_agent()
# Define a temporal-difference conditioning agent using the built in algorithm
k_armed_agent <- rl_define_new_agent(
model_type = "K-Armed Bandit",
# Use built in model specification "k_armed_bandit", setting read = FALSE to
# simply specify the path
simulation_code_file = use_agent_template("k_armed_bandit", read = FALSE),
# What values must be set for the agent/simulation to work?
required_args = c(
"num_arms", "num_trials", "num_episodes",
"action_episode", "reinforcement_episode",
"gamma", "alpha"
),
required_methods = c("arms", "policy"), # must specify the arms and policy structure
return_agent = TRUE # return the actual RL Agent object
)
# Initialize a k-Armed Bandit agent as 'twoArms'
twoArms <- k_armed_agent$new(
model_id = "Two Armed Bandit Example",
num_trials = 100,
num_episodes = 4,
num_arms = 2,
action_episode = 2,
reinforcement_episode = 3,
gamma = 1,
alpha = 0.3
)
# Set the arm structure, action-selection policy, and simulate
twoArms$
set_arms(
list(
left = data.frame(
probability = 0.1,
magnitude = 1,
alternative = 0,
trial = 1:100
),
right = data.frame(
probability = 0.8,
magnitude = 1,
alternative = 0,
trial = 1:100
)
)
)$
set_policy(
policy = "softmax",
tau = 0.5
)$
simulate_agent()