-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval_config.yaml
156 lines (135 loc) · 8.07 KB
/
eval_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
dataset:
data_path: '../data/json_2.1.1/train'
eval_id_data_path: '../data/json_2.1.1/valid_seen' # null/None to disable
eval_ood_data_path: '../data/json_2.1.1/valid_unseen' # null/None to disable
num_train_games: -1 # max training games (<=0 indicates full dataset)
num_eval_games: -1 # max evaluation games (<=0 indicates full dataset)
pddl:
domain: '../data/alfred.pddl' # PDDL domain file that defines world dynamics
env:
type: 'AlfredTWEnv' # 'AlfredTWEnv' or 'AlfredThorEnv' or 'AlfredHybrid'
regen_game_files: False # check if game is solvable by expert and save to game.tw-pddl file
domain_randomization: False # shuffle Textworld print order and object id nums
task_types: [1, 2, 3, 4, 5, 6] # task-type ids: 1 - Pick & Place, 2 - Examine in Light, 3 - Clean & Place, 4 - Heat & Place, 5 - Cool & Place, 6 - Pick Two & Place
expert_timeout_steps: 150 # max steps before timeout for expert to solve the task
expert_type: "handcoded" # 'handcoded' or 'downward'. Note: the downward planner is very slow for real-time use
goal_desc_human_anns_prob: 0.0 # prob of using human-annotated goal language instead of templated goals (1.0 indicates all human annotations from ALFRED)
hybrid:
start_eps: 100000 # starting episode of hybrid training, tw-only training upto this point
thor_prob: 0.5 # prob of AlfredThorEnv during hybrid training
eval_mode: "tw" # 'tw' or 'thor' - env used for evaluation during hybrid training
thor:
screen_width: 300 # width of THOR window
screen_height: 300 # height of THOR window
smooth_nav: False # smooth rotations, looks, and translations during navigation (very slow)
save_frames_to_disk: False # save frame PNGs to disk (useful for making videos)
save_frames_path: '../videos/' # path to save frame PNGs
controller:
type: 'oracle' # 'oracle' or 'oracle_astar' or 'mrcnn' or 'mrcnn_astar' (aka BUTLER)
debug: False
load_receps: False # load receptacle locations from precomputed dict (if available)
mask_rcnn:
pretrained_model_path: 'agents/detector/models/mrcnn.pth'
general:
random_seed: 42
use_cuda: True # disable this when running on machine without cuda
visdom: False # plot training/eval curves, run with visdom server
task: 'alfred'
training_method: 'dagger' # 'dqn' or 'dagger'
save_path: './runs/' # path to save pytorch models
observation_pool_capacity: 3 # k-size queue, 0 indicates no observation
hide_init_receptacles: False # remove initial observation containing navigable receptacles
training:
batch_size: 10
max_episode: 50000
smoothing_eps: 0.1
optimizer:
learning_rate: 0.001
clip_grad_norm: 5
evaluate:
run_eval: True
batch_size: 10 # number of parallel eval threads
repeats: 1 # number of times to loop over eval games (we used 3 in paper experiments)
controllers: # different controllers to evaluate with
# - 'oracle'
# - 'mrcnn_astar'
envs: # different environments to evaluate in
- 'AlfredTWEnv'
# - 'AlfredThorEnv'
env:
type:
'AlfredThorEnv'
eval_paths: # different splits to evaluate on
- '../data/json_2.1.1/valid_seen'
- '../data/json_2.1.1/valid_unseen'
eval_experiment_tag: "eval_run_001" # save results json with this prefix
checkpoint:
report_frequency: 10 # report every N episode
experiment_tag: 'test' # name of experiment
load_pretrained: True # during test, enable this so that the agent load your pretrained model
load_from_tag: 'pretrained_checkpoint_id' # name of pre-trained model to load in save_path
model:
encoder_layers: 1
decoder_layers: 1
encoder_conv_num: 5
block_hidden_dim: 64
n_heads: 1
dropout: 0.1
block_dropout: 0.1
recurrent: True
rl:
action_space: "admissible" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'beam_search_choice' or 'exhaustive' (not working)
max_target_length: 20 # max token length for seq2seq generation
beam_width: 10 # 1 means greedy
generate_top_k: 3
training:
max_nb_steps_per_episode: 50 # terminate after this many steps
learn_start_from_this_episode: 0 # delay updates until this epsiode
target_net_update_frequency: 500 # sync target net with online net per this many epochs
replay:
accumulate_reward_from_final: True
count_reward_lambda: 0.0 # 0 to disable
novel_object_reward_lambda: 0.0 # 0 to disable
discount_gamma_game_reward: 0.9
discount_gamma_count_reward: 0.5
discount_gamma_novel_object_reward: 0.5
replay_memory_capacity: 500000 # adjust this depending on your RAM size
replay_memory_priority_fraction: 0.5
update_per_k_game_steps: 5
replay_batch_size: 64
multi_step: 3
replay_sample_history_length: 4
replay_sample_update_from: 2
epsilon_greedy:
noisy_net: False # if this is true, then epsilon greedy is disabled
epsilon_anneal_episodes: 1000 # -1 if not annealing
epsilon_anneal_from: 0.3
epsilon_anneal_to: 0.1
dagger:
action_space: "generation" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'exhaustive' (not working)
max_target_length: 20 # max token length for seq2seq generation
beam_width: 10 # 1 means greedy
generate_top_k: 5
unstick_by_beam_search: True # use beam-search for failed actions, set True during evaluation
training:
max_nb_steps_per_episode: 50 # terminate after this many steps
fraction_assist:
fraction_assist_anneal_episodes: 50000
fraction_assist_anneal_from: 1.0
fraction_assist_anneal_to: 0.01
fraction_random:
fraction_random_anneal_episodes: 0
fraction_random_anneal_from: 0.0
fraction_random_anneal_to: 0.0
replay:
replay_memory_capacity: 500000
update_per_k_game_steps: 5
replay_batch_size: 64
replay_sample_history_length: 4
replay_sample_update_from: 2
vision_dagger:
model_type: "resnet" # 'resnet' (whole image features) or 'maskrcnn_whole' (whole image MaskRCNN feats) or 'maskrcnn' (top k MaskRCNN detection feats) or 'no_vision' (zero vision input)
resnet_fc_dim: 64
maskrcnn_top_k_boxes: 10 # top k box features
use_exploration_frame_feats: False # append feats from initial exploration (memory intensive!)
sequence_aggregation_method: "average" # 'sum' or 'average' or 'rnn'