ZhiyuanZeng commited on
Commit
c379861
·
1 Parent(s): e9144e0

updata README

Browse files
Files changed (1) hide show
  1. README.md +22 -2
README.md CHANGED
@@ -36,13 +36,18 @@ try:
36
  outputs = [
37
  "Wrong Format",
38
  r"<answer>0</answer>", # Wrong Answer
39
- r"<answer>" + str(RLVE_Gymenv.problem.parameter["reference_answer"]) + r"</answer>", # Correct Answer
40
  ]
41
 
42
  for output in outputs:
43
  result = RLVE_Gymenv.step(RlveGymAction(output = output))
44
  print(f"Sent: '{output}'")
45
  print(f"Result: `{result}`")
 
 
 
 
 
46
 
47
  finally:
48
  # Always clean up
@@ -127,7 +132,22 @@ Please check [here](server/RLVE_Gym_environment.py) for detailed usage:
127
  - `environment_identifier` (str) - The environment's identifier. Check [here](server/Gym/environments/__init__.py) for detailed usage.
128
  - `difficulty` (int) - The difficulty of generated problems.
129
  - `answer_markers` (Tuple[str] of length 2) - How the environment extracts the final answer from a model output.
130
- - `seed` (int) - The initial seed to use when generating the first problem. Whenever `reset()` is called, the seed will be incremented by 1.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  ### Action
133
  **RlveGymAction**: Contains a single field
 
36
  outputs = [
37
  "Wrong Format",
38
  r"<answer>0</answer>", # Wrong Answer
39
+ r"<answer>4753</answer>", # Please replace "4753" with the correct Answer
40
  ]
41
 
42
  for output in outputs:
43
  result = RLVE_Gymenv.step(RlveGymAction(output = output))
44
  print(f"Sent: '{output}'")
45
  print(f"Result: `{result}`")
46
+ print(f"`verifier_result`: `{result.observation.verifier_result}`")
47
+ print(f"`reward`: `{result.reward}`")
48
+ print("`accuracy`: `{}`".format(result.observation.verifier_result["accuracy"]))
49
+ print("(so far) sum_accuracy/num_samples = {}/{}".format(RLVE_Gymenv.state().sum_accuracy, RLVE_Gymenv.state().num_samples))
50
+ print("\n")
51
 
52
  finally:
53
  # Always clean up
 
132
  - `environment_identifier` (str) - The environment's identifier. Check [here](server/Gym/environments/__init__.py) for detailed usage.
133
  - `difficulty` (int) - The difficulty of generated problems.
134
  - `answer_markers` (Tuple[str] of length 2) - How the environment extracts the final answer from a model output.
135
+ - `initial_seed` (int) - The initial seed to use when generating the first problem. Whenever `reset()` is called, the seed will be incremented by 1.
136
+
137
+ Right now, you can set these arguments by passing them through environment variables:
138
+
139
+ ```python
140
+ RLVE_Gymenv = RlveGymEnv.from_docker_image(
141
+ "RLVE_Gym-env:latest",
142
+ env_vars = {
143
+ "RLVEGYM_ENVIRONMENT_IDENTIFIER": "Sorting",
144
+ "RLVEGYM_DIFFICULTY": "2",
145
+ "RLVEGYM_ANSWER_MARKER_START": r"\boxed{",
146
+ "RLVEGYM_ANSWER_MARKER_END": r"}",
147
+ "RLVEGYM_INITIAL_SEED": "10",
148
+ },
149
+ )
150
+ ```
151
 
152
  ### Action
153
  **RlveGymAction**: Contains a single field