Skip to content

Commit b0c9f69

Browse files
Merge pull request #4 from JulienT01/main
Improve tests and code coverage
2 parents 2d02501 + 0db00a8 commit b0c9f69

9 files changed

Lines changed: 141 additions & 31 deletions

File tree

.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,10 @@ dmypy.json
127127

128128
# Pyre type checker
129129
.pyre/
130+
131+
#vsCode config
132+
.vscode/
133+
134+
#tests artefact
135+
test.pdf
136+
examples/.adastop_comparator.pkl

adastop/cli.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,9 @@ def compare(ctx, input_file, n_groups, size_group, n_permutations, alpha, beta,
6161
if i in comparator.current_comparisons.ravel():
6262
names.append(comparator.agent_names[i])
6363

64-
6564
Z = [np.hstack([comparator.eval_values[agent], df[agent]]) for agent in names]
66-
if len(Z[0]) > comparator.K * n_fits_per_group:
67-
raise ValueError('Error: you tried to use more group than what was initially declared, this is not allowed by the theory.')
65+
if len(names) == 0:
66+
raise ValueError('Error: you tried to use more group than necessary. Use adastop status to see current status for more info.')
6867
assert "continue" in list(comparator.decisions.values()), "Test finished at last iteration."
6968

7069
else:

adastop/compare_agents.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def partial_compare(self, eval_values, verbose=True):
212212
if self.agent_names is None:
213213
self.agent_names = list(eval_values.keys())
214214

215-
Z = [eval_values[agent] for agent in self.agent_names]
215+
Z = [np.array(eval_values[agent]) for agent in self.agent_names]
216216
n_managers = len(Z)
217217
if isinstance(self.n,int):
218218
self.n = np.array([self.n]*n_managers)
@@ -256,13 +256,13 @@ def partial_compare(self, eval_values, verbose=True):
256256

257257
# Compute admissible values, i.e. values that would not be rejected nor accepted.
258258
admissible_values_sup = values[
259-
self.level_spent + icumulative_probas <= clevel
259+
self.level_spent + icumulative_probas < clevel
260260
]
261261

262262
if len(admissible_values_sup) > 0:
263263
bk_sup = admissible_values_sup[0] # the minimum admissible value
264264
level_to_add = icumulative_probas[
265-
self.level_spent + icumulative_probas <= clevel
265+
self.level_spent + icumulative_probas < clevel
266266
][0]
267267
else:
268268
# This case is possible if clevel-self.level_spent <= 1/ self.normalization (smallest proba possible),
@@ -272,7 +272,7 @@ def partial_compare(self, eval_values, verbose=True):
272272

273273
cumulative_probas = np.arange(len(values)) / self.normalization # corresponds to P(T < t)
274274
admissible_values_inf = values[
275-
self.power_spent + cumulative_probas < dlevel
275+
self.power_spent + cumulative_probas <= dlevel
276276
]
277277

278278
if len(admissible_values_inf) > 0:

docs/tutorials.md

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,17 @@ The command line interface takes csv files as input. Each csv file must contain
1515
Below, we give an example based on files containing the evaluations of PPO,DDPG,SAC,TRPO, four Deep Reinforcement Learning algorithmes, given in the \`examples\` directory of the main repository.
1616

1717

18+
## Installation
19+
20+
To install adastop, use pip:
21+
```bash
22+
pip install adastop
23+
```
24+
25+
This will automatically install the command line interface as well as the python library.
26+
27+
28+
1829
## Help for cli tool
1930

2031
The AdaStop algorithm is initialized with the first test done through \`adastop compare\` and the current state of AdaStop is then saved in a pickle file. The help of \`adastop\` command line can be obtained with the following:
@@ -90,7 +101,7 @@ The input format of adastop is under the form of a csv file containing the score
90101

91102
Let us launch AdaStop on this first batch of data.
92103

93-
First, we clean up the corrent directory of any litter files that could have been spawned by a previous usage of \`adastop\` (if you never used \`adastop\` before, this command will not have any effect).
104+
First, we clean up the current directory of any litter files that could have been spawned by a previous usage of \`adastop\` (if you never used \`adastop\` before, this command will not have any effect).
94105

95106
```bash
96107
adastop reset . # reset the state of the comparator (remove hidden pickle file)
@@ -144,14 +155,14 @@ adastop compare --n-groups 5 --size-group 5 walker5.csv
144155

145156
Test is finished, decisions are
146157

147-
| | Agent1 vs Agent2 | mean Agent1 | mean Agent2 | mean diff | std Agent 1 | std Agent 2 | decisions |
148-
|--- |---------------- |----------- |----------- |--------- |----------- |----------- |--------- |
149-
| 0 | PPO vs DDPG | 2901.53 | 884.119 | 2017.41 | 1257.93 | 535.74 | larger |
150-
| 0 | PPO vs SAC | 2901.53 | 4543.4 | -1641.87 | 1257.93 | 432.13 | smaller |
151-
| 0 | PPO vs TRPO | 2901.53 | 1215.42 | 1686.11 | 1257.93 | 529.672 | larger |
152-
| 0 | DDPG vs SAC | 884.119 | 4543.4 | -3659.28 | 535.74 | 432.13 | smaller |
153-
| 0 | DDPG vs TRPO | 884.119 | 1215.42 | -331.297 | 535.74 | 529.672 | smaller |
154-
| 0 | SAC vs TRPO | 4543.4 | 1215.42 | 3327.98 | 432.13 | 529.672 | larger |
158+
| Agent1 vs Agent2 | mean Agent1 | mean Agent2 | mean diff | std Agent 1 | std Agent 2 | decisions |
159+
|----------------- |------------ |------------ |---------- |------------ |------------ |---------- |
160+
| PPO vs DDPG | 2901.53 | 884.119 | 2017.41 | 1257.93 | 535.74 | larger |
161+
| PPO vs SAC | 2901.53 | 4543.4 | -1641.87 | 1257.93 | 432.13 | smaller |
162+
| PPO vs TRPO | 2901.53 | 1215.42 | 1686.11 | 1257.93 | 529.672 | larger |
163+
| DDPG vs SAC | 884.119 | 4543.4 | -3659.28 | 535.74 | 432.13 | smaller |
164+
| DDPG vs TRPO | 884.119 | 1215.42 | -331.297 | 535.74 | 529.672 | smaller |
165+
| SAC vs TRPO | 4543.4 | 1215.42 | 3327.98 | 432.13 | 529.672 | larger |
155166

156167
Comparator Saved
157168

docs/tutorials.org

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,21 @@ Please note that if, in the process of the algorithm, all the comparisons for on
1515

1616
Below, we give an example based on files containing the evaluations of PPO,DDPG,SAC,TRPO, four Deep Reinforcement Learning algorithmes, given in the =examples= directory of the main repository.
1717

18+
19+
20+
21+
** Installation
22+
23+
To install adastop, use pip:
24+
25+
#+begin_src bash :session *shell* :results verbatim :exports both
26+
pip install adastop
27+
#+end_src
28+
29+
This will automatically install the command line interface as well as the python library.
30+
31+
32+
1833
** Help for cli tool
1934

2035
The AdaStop algorithm is initialized with the first test done through =adastop compare= and the current state of AdaStop is then saved in a pickle file. The help of =adastop= command line can be obtained with the following:
@@ -47,7 +62,7 @@ The input format of adastop is under the form of a csv file containing the score
4762

4863
Let us launch AdaStop on this first batch of data.
4964

50-
First, we clean up the corrent directory of any litter files that could have been spawned by a previous usage of =adastop= (if you never used =adastop= before, this command will not have any effect).
65+
First, we clean up the current directory of any litter files that could have been spawned by a previous usage of =adastop= (if you never used =adastop= before, this command will not have any effect).
5166

5267
#+begin_src bash :session *shell* :results verbatim :exports both
5368
adastop reset . # reset the state of the comparator (remove hidden pickle file)

docs/user_guide.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Then, once you did the comparison on the first file, you can use iteratively `ad
6060

6161
#### Choice of comparisons
6262

63-
In adastopn, one can choose which comparisons are done. The default is to do all the pairwise comparisons between two algorithms. In practice, it is sometimes sufficient to compare to only one of them, a benchmark, for this the `--compare-to-first` argument can be used. For a more fine-grained control on which comparison to do, the python API can take the comparisons as input.
63+
In adastop, one can choose which comparisons are done. The default is to do all the pairwise comparisons between two algorithms. In practice, it is sometimes sufficient to compare to only one of them, a benchmark, for this the `--compare-to-first` argument can be used. For a more fine-grained control on which comparison to do, the python API can take the comparisons as input.
6464

6565
**Remark**: it is not statistically ok to execute adastop several times and interpret the result as though it was only one test, if adastop is run several times this is multiple testing and some calibration has to be done. Instead, it is better to do all the comparisons at the same time, running the adastop algorithm only once, and adastop will handle the multiplicity of hypotheses by itself.
6666

tests/test_cli.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,57 @@
11
import pytest
22
from click.testing import CliRunner
33
from adastop.cli import adastop
4+
import os
45

56
# we reuse a bit of pytest's own testing machinery, this should eventually come
67
import subprocess
78

89

910
def test_cli():
1011
runner = CliRunner()
12+
test_pdf_path = "test.pdf"
13+
14+
if os.path.exists(test_pdf_path):
15+
os.remove(test_pdf_path)
16+
1117
result = runner.invoke(adastop, ['reset', 'examples'])
1218
assert result.exit_code == 0
1319
for j in range(1,6):
14-
15-
result = runner.invoke(adastop, ['compare', 'examples/walker'+str(j)+'.csv'])
20+
result = runner.invoke(adastop, ['compare', "--seed", "1", 'examples/walker'+str(j)+'.csv'])
1621
assert result.exit_code == 0
1722

18-
result = runner.invoke(adastop, ['compare', 'examples/walker3.csv'])
23+
result = runner.invoke(adastop, ['compare',"--seed", "1", 'examples/walker3.csv'])
1924
assert result.exit_code == 1
25+
assert result.exception.args[0] == 'Error: you tried to use more group than necessary. Use adastop status to see current status for more info.'
2026

21-
result = runner.invoke(adastop, ['plot', 'examples', "test.pdf"])
27+
28+
result = runner.invoke(adastop, ['plot', 'examples', test_pdf_path])
2229
assert result.exit_code == 0
2330
result = runner.invoke(adastop, ['status', 'examples'])
2431
assert result.exit_code == 0
32+
assert os.path.exists(test_pdf_path) == True
33+
2534

2635
result = runner.invoke(adastop, ['reset', 'examples'])
2736
assert result.exit_code == 0
2837

29-
result = runner.invoke(adastop, ['compare', "--compare-to-first", 'examples/walker1.csv'])
38+
result = runner.invoke(adastop, ['compare', "--compare-to-first","--seed", "1", 'examples/walker1.csv'])
3039
assert result.exit_code == 0
40+
41+
42+
43+
def test_plot_no_comparator_save_file():
44+
runner = CliRunner()
45+
runner.invoke(adastop, ['reset', 'examples'])
46+
47+
result = runner.invoke(adastop, ['plot', 'examples', "test.pdf"])
48+
assert result.exit_code == 1
49+
assert result.exception.args[0] == 'Comparator save file not found.'
50+
51+
def test_status_no_comparator_save_file():
52+
runner = CliRunner()
53+
runner.invoke(adastop, ['reset', 'examples'])
54+
55+
result = runner.invoke(adastop, ['status', 'examples'])
56+
assert result.exit_code == 1
57+
assert result.exception.args[0] == 'Comparator save file not found.'
Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,27 @@
55
B = 5000
66
alpha = 0.05
77
n_runs = 10
8+
seed = 42
89

9-
def test_runtime():
10+
def test_partial_compare():
11+
rng = np.random.RandomState(seed)
1012
idxs = []
1113
comparator = MultipleAgentsComparator(n=3, K=3, B=B, alpha=alpha, seed=42, beta = 0.01)
12-
evals = {"Agent "+str(k):np.random.normal(size=3) for k in range(3)}
14+
evals = {"Agent "+str(k): rng.normal(size=3) for k in range(3)}
1315
comparator.partial_compare(evals)
16+
17+
18+
def test_partial_compare_not_enough_points():
19+
comparator = MultipleAgentsComparator(n=3, K=3, B=5000, alpha=-1e-5, seed=42, beta = 0.01)
20+
evals = {"Agent 1":np.array([0,0,0]),"Agent 2":np.array([0,0,0]),"Agent 3":np.array([0,0,0])}
21+
comparator.partial_compare(evals)
22+
1423

1524

1625
@pytest.mark.parametrize("K,n", [(10,2),(5,3), (3, 5), (1, 15)])
1726
def test_type1(K,n):
27+
rng = np.random.RandomState(seed)
28+
1829
idxs = []
1930
n_agents = 3
2031
for M in range(n_runs):
@@ -23,16 +34,18 @@ def test_type1(K,n):
2334
while not comparator.is_finished:
2435
if len(evals) >0:
2536
for k in range(n_agents):
26-
evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] ,np.random.normal(size=n)])
37+
evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] , rng.normal(size=n)])
2738
else:
28-
evals = {"Agent "+str(k): np.random.normal(size=n) for k in range(n_agents)}
39+
evals = {"Agent "+str(k): rng.normal(size=n) for k in range(n_agents)}
2940
comparator.partial_compare(evals)
3041
idxs.append(not("equal" in comparator.decisions.values()))
3142
print(comparator.get_results())
3243
assert np.mean(idxs) < 2*alpha + 1/4/(np.sqrt(n_runs)), "type 1 error seems to be too large."
3344

3445
@pytest.mark.parametrize("K,n", [(5,3), (3, 5), (1, 15)])
3546
def test_type1_large_beta(K,n):
47+
rng = np.random.RandomState(seed)
48+
3649
idxs = []
3750
n_agents = 3
3851
for M in range(n_runs):
@@ -41,16 +54,18 @@ def test_type1_large_beta(K,n):
4154
while not comparator.is_finished:
4255
if len(evals) >0:
4356
for k in range(n_agents):
44-
evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] ,np.random.normal(size=n)])
57+
evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] , rng.normal(size=n)])
4558
else:
46-
evals = {"Agent "+str(k): np.random.normal(size=n) for k in range(n_agents)}
59+
evals = {"Agent "+str(k): rng.normal(size=n) for k in range(n_agents)}
4760
comparator.partial_compare(evals)
4861
idxs.append(not("equal" in comparator.decisions.values()))
4962
print(comparator.get_results())
5063
assert np.mean(idxs) < 2*alpha + 1/4/(np.sqrt(n_runs)), "type 1 error seems to be too large."
5164

5265
@pytest.mark.parametrize("K,n", [(3, 5), (1, 15)])
5366
def test_type2(K,n):
67+
rng = np.random.RandomState(seed)
68+
5469
idxs = []
5570
n_agents = 2
5671
for M in range(n_runs):
@@ -59,9 +74,9 @@ def test_type2(K,n):
5974
while not comparator.is_finished:
6075
if len(evals) >0:
6176
for k in range(n_agents):
62-
evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] ,np.random.normal(size=n)+2*k])
77+
evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] , rng.normal(size=n)+2*k])
6378
else:
64-
evals = {"Agent "+str(k): np.random.normal(size=n)+2*k for k in range(n_agents)}
79+
evals = {"Agent "+str(k): rng.normal(size=n)+2*k for k in range(n_agents)}
6580
comparator.partial_compare(evals)
6681
idxs.append(not("equal" in comparator.decisions.values()))
6782
assert np.mean(idxs) > 0.3, "type 2 error seems to be too large."

tests/test_plot.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,39 @@ def test_plot_sota_noteq():
7676
# plt.savefig('fig2.pdf')
7777
fig, axes= plt.subplots(1,2)
7878
comparator.plot_results_sota(axes=axes)
79+
80+
81+
82+
def test_plot_noteq2():
83+
n_agents = 3
84+
comparator = MultipleAgentsComparator(n=10, K=K, B=B, alpha=alpha, seed=42, beta = 0.01)
85+
evals = {}
86+
while not comparator.is_finished:
87+
if len(evals) >0:
88+
for k in range(n_agents):
89+
evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] , np.abs(2*K-k)+np.random.normal(size=10)])
90+
else:
91+
evals = {"Agent "+str(k): np.random.normal(size=10)+np.abs(2*K-k) for k in range(n_agents)}
92+
comparator.partial_compare(evals)
93+
# plt.savefig('fig2.pdf')
94+
fig, axes= plt.subplots(1,2)
95+
comparator.plot_results(axes=axes)
96+
97+
def test_plot_sota_noteq2():
98+
n_agents = 3
99+
comparisons = np.array([(0,i) for i in [1,2]])
100+
comparator = MultipleAgentsComparator(n=10, K=K, B=B, alpha=alpha,
101+
comparisons=comparisons, seed=42, beta = 0.01)
102+
evals = {}
103+
while not comparator.is_finished:
104+
if len(evals) >0:
105+
for k in range(n_agents):
106+
evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] ,np.random.normal(size=10)+np.abs(2*K-k)])
107+
else:
108+
evals = {"Agent "+str(k): np.random.normal(size=10)+np.abs(2*K-k) for k in range(n_agents)}
109+
comparator.partial_compare(evals)
110+
comparator.plot_results_sota()
111+
# plt.savefig('fig2.pdf')
112+
fig, axes= plt.subplots(1,2)
113+
comparator.plot_results_sota(axes=axes)
114+

0 commit comments

Comments
 (0)