import gymimport torchimport torch.nn as nnimport torch.optim as optimimport torch.nn.functional as Ffrom torch.distributions import Categorical# 1. 하이퍼파라미터 설정learning_rate = 0.001gamma = 0.99n_episodes = 1000# 2. Actor-Critic 네트워크 정의# 핵심: Actor와 Critic이 앞단 레이어를 공유하거나, 별도 헤드를 가짐class ActorCritic(nn.Module): def __init__(self): super(ActorCritic, self).__init__() self.fc1 = nn.Li..