-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathagent.py
More file actions
executable file
·36 lines (30 loc) · 1.74 KB
/
agent.py
File metadata and controls
executable file
·36 lines (30 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from pathlib import Path
import torch
from torch.distributions.categorical import Categorical
import torch.nn as nn
from models.actor_critic import ActorCritic
from models.tokenizer import Tokenizer
from models.world_model import WorldModel
from utils import extract_state_dict
class Agent(nn.Module):
def __init__(self, tokenizer: Tokenizer, world_model: WorldModel, actor_critic: ActorCritic):
super().__init__()
self.tokenizer = tokenizer
self.world_model = world_model
self.actor_critic = actor_critic
@property
def device(self):
return self.actor_critic.conv1.weight.device
def load(self, path_to_checkpoint: Path, device: torch.device, load_tokenizer: bool = True, load_world_model: bool = True, load_actor_critic: bool = True) -> None:
agent_state_dict = torch.load(path_to_checkpoint, map_location=device)
if load_tokenizer:
self.tokenizer.load_state_dict(extract_state_dict(agent_state_dict, 'tokenizer'))
if load_world_model:
self.world_model.load_state_dict(extract_state_dict(agent_state_dict, 'world_model'))
if load_actor_critic:
self.actor_critic.load_state_dict(extract_state_dict(agent_state_dict, 'actor_critic'))
def act(self, obs: torch.FloatTensor, should_sample: bool = True, temperature: float = 1.0) -> torch.LongTensor:
input_ac = obs if self.actor_critic.use_original_obs else torch.clamp(self.tokenizer.encode_decode(obs, should_preprocess=True, should_postprocess=True), 0, 1)
logits_actions = self.actor_critic(input_ac).logits_actions[:, -1] / temperature
act_token = Categorical(logits=logits_actions).sample() if should_sample else logits_actions.argmax(dim=-1)
return act_token