Spaces:
Sleeping
Sleeping
Commit
·
c379861
1
Parent(s):
e9144e0
updata README
Browse files
README.md
CHANGED
|
@@ -36,13 +36,18 @@ try:
|
|
| 36 |
outputs = [
|
| 37 |
"Wrong Format",
|
| 38 |
r"<answer>0</answer>", # Wrong Answer
|
| 39 |
-
r"<answer>"
|
| 40 |
]
|
| 41 |
|
| 42 |
for output in outputs:
|
| 43 |
result = RLVE_Gymenv.step(RlveGymAction(output = output))
|
| 44 |
print(f"Sent: '{output}'")
|
| 45 |
print(f"Result: `{result}`")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
finally:
|
| 48 |
# Always clean up
|
|
@@ -127,7 +132,22 @@ Please check [here](server/RLVE_Gym_environment.py) for detailed usage:
|
|
| 127 |
- `environment_identifier` (str) - The environment's identifier. Check [here](server/Gym/environments/__init__.py) for detailed usage.
|
| 128 |
- `difficulty` (int) - The difficulty of generated problems.
|
| 129 |
- `answer_markers` (Tuple[str] of length 2) - How the environment extracts the final answer from a model output.
|
| 130 |
-
- `
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
### Action
|
| 133 |
**RlveGymAction**: Contains a single field
|
|
|
|
| 36 |
outputs = [
|
| 37 |
"Wrong Format",
|
| 38 |
r"<answer>0</answer>", # Wrong Answer
|
| 39 |
+
r"<answer>4753</answer>", # Please replace "4753" with the correct Answer
|
| 40 |
]
|
| 41 |
|
| 42 |
for output in outputs:
|
| 43 |
result = RLVE_Gymenv.step(RlveGymAction(output = output))
|
| 44 |
print(f"Sent: '{output}'")
|
| 45 |
print(f"Result: `{result}`")
|
| 46 |
+
print(f"`verifier_result`: `{result.observation.verifier_result}`")
|
| 47 |
+
print(f"`reward`: `{result.reward}`")
|
| 48 |
+
print("`accuracy`: `{}`".format(result.observation.verifier_result["accuracy"]))
|
| 49 |
+
print("(so far) sum_accuracy/num_samples = {}/{}".format(RLVE_Gymenv.state().sum_accuracy, RLVE_Gymenv.state().num_samples))
|
| 50 |
+
print("\n")
|
| 51 |
|
| 52 |
finally:
|
| 53 |
# Always clean up
|
|
|
|
| 132 |
- `environment_identifier` (str) - The environment's identifier. Check [here](server/Gym/environments/__init__.py) for detailed usage.
|
| 133 |
- `difficulty` (int) - The difficulty of generated problems.
|
| 134 |
- `answer_markers` (Tuple[str] of length 2) - How the environment extracts the final answer from a model output.
|
| 135 |
+
- `initial_seed` (int) - The initial seed to use when generating the first problem. Whenever `reset()` is called, the seed will be incremented by 1.
|
| 136 |
+
|
| 137 |
+
Right now, you can set these arguments by passing them through environment variables:
|
| 138 |
+
|
| 139 |
+
```python
|
| 140 |
+
RLVE_Gymenv = RlveGymEnv.from_docker_image(
|
| 141 |
+
"RLVE_Gym-env:latest",
|
| 142 |
+
env_vars = {
|
| 143 |
+
"RLVEGYM_ENVIRONMENT_IDENTIFIER": "Sorting",
|
| 144 |
+
"RLVEGYM_DIFFICULTY": "2",
|
| 145 |
+
"RLVEGYM_ANSWER_MARKER_START": r"\boxed{",
|
| 146 |
+
"RLVEGYM_ANSWER_MARKER_END": r"}",
|
| 147 |
+
"RLVEGYM_INITIAL_SEED": "10",
|
| 148 |
+
},
|
| 149 |
+
)
|
| 150 |
+
```
|
| 151 |
|
| 152 |
### Action
|
| 153 |
**RlveGymAction**: Contains a single field
|