#625 fix more pre-commit errors

Jan Michelfeit · Jan Michelfeit · commit 62157e624b08 · 2022-12-02T00:40:58.000+01:00
diff --git a/src/imitation/algorithms/pebble/__init__.py b/src/imitation/algorithms/pebble/__init__.py
@@ -0,0 +1 @@
+"""PEBBLE specific algorithms."""
diff --git a/src/imitation/algorithms/pebble/entropy_reward.py b/src/imitation/algorithms/pebble/entropy_reward.py
@@ -25,7 +25,7 @@ class PebbleRewardPhase(Enum):
 class PebbleStateEntropyReward(ReplayBufferAwareRewardFn):
     """Reward function for implementation of the PEBBLE learning algorithm.
 
-    See https://arxiv.org/pdf/2106.05091.pdf .
+    See https://arxiv.org/abs/2106.05091 .
 
     The rewards returned by this function go through the three phases:
     1. Before enough samples are collected for entropy calculation, the
diff --git a/src/imitation/algorithms/preference_comparisons.py b/src/imitation/algorithms/preference_comparisons.py
@@ -363,7 +363,7 @@ def __init__(
 
     def unsupervised_pretrain(self, steps: int, **kwargs: Any) -> None:
         self.train(steps, **kwargs)
-        self.reward_fn.unsupervised_exploration_finish()  # type: ignore[attribute-error]
+        self.reward_fn.unsupervised_exploration_finish()  # type: ignore
 
 
 def _get_trajectories(
diff --git a/src/imitation/policies/base.py b/src/imitation/policies/base.py
@@ -76,7 +76,7 @@ class SAC1024Policy(sac_policies.SACPolicy):
     """Actor and value networks with two hidden layers of 1024 units respectively.
 
     This matches the implementation of SAC policies in the PEBBLE paper. See:
-    https://arxiv.org/pdf/2106.05091.pdf
+    https://arxiv.org/abs/2106.05091
     https://github.com/denisyarats/pytorch_sac/blob/master/config/agent/sac.yaml
 
     Note: This differs from stable_baselines3 SACPolicy by having 1024 hidden units
diff --git a/src/imitation/rewards/reward_function.py b/src/imitation/rewards/reward_function.py
@@ -5,6 +5,8 @@
 
 import numpy as np
 
+import imitation.policies.replay_buffer_wrapper
+
 
 class RewardFn(Protocol):
     """Abstract class for reward function.
@@ -40,6 +42,8 @@ class ReplayBufferAwareRewardFn(RewardFn, abc.ABC):
     @abc.abstractmethod
     def on_replay_buffer_initialized(
         self,
-        replay_buffer: "ReplayBufferRewardWrapper",  # type: ignore[name-defined] # noqa
+        replay_buffer: (
+            "imitation.policies.replay_buffer_wrapper.ReplayBufferRewardWrapper"
+        ),
     ):
         pass
diff --git a/src/imitation/scripts/config/train_preference_comparisons.py b/src/imitation/scripts/config/train_preference_comparisons.py
@@ -61,7 +61,7 @@ def train_defaults():
     checkpoint_interval = 0  # Num epochs between saving (<0 disables, =0 final only)
     query_schedule = "hyperbolic"
 
-    # Whether to use the PEBBLE algorithm (https://arxiv.org/pdf/2106.05091.pdf)
+    # Whether to use the PEBBLE algorithm (https://arxiv.org/abs/2106.05091)
     pebble_enabled = False
     unsupervised_agent_pretrain_frac = 0.0