diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..b7bd5d47e9d674f78ba16a7d9af98655fbfad86e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,76 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Multi-stage build using openenv-base +# This Dockerfile is flexible and works for both: +# - In-repo environments (with local src/core) +# - Standalone environments (with openenv-core from pip) +# The build script (openenv build) handles context detection and sets appropriate build args. + +ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest +FROM ${BASE_IMAGE} AS builder + +WORKDIR /app + +# Build argument to control whether we're building standalone or in-repo +ARG BUILD_MODE=in-repo +ARG ENV_NAME=RLVE_Gym + +# Copy environment code (always at root of build context) +COPY . /app/env + +# For in-repo builds, openenv-core is already in the pyproject.toml dependencies +# For standalone builds, openenv-core will be installed from pip via pyproject.toml +WORKDIR /app/env + +# Ensure uv is available (for local builds where base image lacks it) +RUN if ! command -v uv >/dev/null 2>&1; then \ + curl -LsSf https://astral.sh/uv/install.sh | sh && \ + mv /root/.local/bin/uv /usr/local/bin/uv && \ + mv /root/.local/bin/uvx /usr/local/bin/uvx; \ + fi + +# Install dependencies using uv sync +# If uv.lock exists, use it; otherwise resolve on the fly +RUN --mount=type=cache,target=/root/.cache/uv \ + if [ -f uv.lock ]; then \ + uv sync --frozen --no-install-project --no-editable; \ + else \ + uv sync --no-install-project --no-editable; \ + fi + +RUN --mount=type=cache,target=/root/.cache/uv \ + if [ -f uv.lock ]; then \ + uv sync --frozen --no-editable; \ + else \ + uv sync --no-editable; \ + fi + +# Final runtime stage +FROM ${BASE_IMAGE} + +WORKDIR /app + +# Copy the virtual environment from builder +COPY --from=builder /app/env/.venv /app/.venv + +# Copy the environment code +COPY --from=builder /app/env /app/env + +# Set PATH to use the virtual environment +ENV PATH="/app/.venv/bin:$PATH" + +# Set PYTHONPATH so imports work correctly +ENV PYTHONPATH="/app/env:$PYTHONPATH" + +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +# Run the FastAPI server +# The module path is constructed to work with the /app/env structure +ENV ENABLE_WEB_INTERFACE=true +CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"] diff --git a/README.md b/README.md index a14874900975e0cdcac63d93041a52291e41b9f6..9cd48c085919495add7565107a3de69f04282dc9 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,198 @@ --- -title: RLVE Gym -emoji: 🦀 -colorFrom: blue -colorTo: purple +title: Rlve Gym Environment Server +emoji: 📡 +colorFrom: purple +colorTo: blue sdk: docker pinned: false +app_port: 8000 +base_path: /web +tags: + - openenv --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# Rlve Gym Environment + +A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns. + +## Quick Start + +The simplest way to use the Rlve Gym environment is through the `RlveGymEnv` class: + +```python +from RLVE_Gym import RlveGymAction, RlveGymEnv + +try: + # Create environment from Docker image + RLVE_Gymenv = RlveGymEnv.from_docker_image("RLVE_Gym-env:latest") + + # Reset + result = RLVE_Gymenv.reset() + print(f"Reset: {result.observation.echoed_message}") + + # Send multiple messages + messages = ["Hello, World!", "Testing echo", "Final message"] + + for msg in messages: + result = RLVE_Gymenv.step(RlveGymAction(message=msg)) + print(f"Sent: '{msg}'") + print(f" → Echoed: '{result.observation.echoed_message}'") + print(f" → Length: {result.observation.message_length}") + print(f" → Reward: {result.reward}") + +finally: + # Always clean up + RLVE_Gymenv.close() +``` + +That's it! The `RlveGymEnv.from_docker_image()` method handles: +- Starting the Docker container +- Waiting for the server to be ready +- Connecting to the environment +- Container cleanup when you call `close()` + +## Building the Docker Image + +Before using the environment, you need to build the Docker image: + +```bash +# From project root +docker build -t RLVE_Gym-env:latest -f server/Dockerfile . +``` + +## Deploying to Hugging Face Spaces + +You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command: + +```bash +# From the environment directory (where openenv.yaml is located) +openenv push + +# Or specify options +openenv push --namespace my-org --private +``` + +The `openenv push` command will: +1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`) +2. Prepare a custom build for Hugging Face Docker space (enables web interface) +3. Upload to Hugging Face (ensuring you're logged in) + +### Prerequisites + +- Authenticate with Hugging Face: The command will prompt for login if not already authenticated + +### Options + +- `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory) +- `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml) +- `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM) +- `--private`: Deploy the space as private (default: public) + +### Examples + +```bash +# Push to your personal namespace (defaults to username/env-name from openenv.yaml) +openenv push + +# Push to a specific repository +openenv push --repo-id my-org/my-env + +# Push with a custom base image +openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest + +# Push as a private space +openenv push --private + +# Combine options +openenv push --repo-id my-org/my-env --base-image custom-base:latest --private +``` + +After deployment, your space will be available at: +`https://huggingface.co/spaces/` + +The deployed space includes: +- **Web Interface** at `/web` - Interactive UI for exploring the environment +- **API Documentation** at `/docs` - Full OpenAPI/Swagger interface +- **Health Check** at `/health` - Container health monitoring + +## Environment Details + +### Action +**RlveGymAction**: Contains a single field +- `message` (str) - The message to echo back + +### Observation +**RlveGymObservation**: Contains the echo response and metadata +- `echoed_message` (str) - The message echoed back +- `message_length` (int) - Length of the message +- `reward` (float) - Reward based on message length (length × 0.1) +- `done` (bool) - Always False for echo environment +- `metadata` (dict) - Additional info like step count + +### Reward +The reward is calculated as: `message_length × 0.1` +- "Hi" → reward: 0.2 +- "Hello, World!" → reward: 1.3 +- Empty message → reward: 0.0 + +## Advanced Usage + +### Connecting to an Existing Server + +If you already have a Rlve Gym environment server running, you can connect directly: + +```python +from RLVE_Gym import RlveGymEnv + +# Connect to existing server +RLVE_Gymenv = RlveGymEnv(base_url="") + +# Use as normal +result = RLVE_Gymenv.reset() +result = RLVE_Gymenv.step(RlveGymAction(message="Hello!")) +``` + +Note: When connecting to an existing server, `RLVE_Gymenv.close()` will NOT stop the server. + +## Development & Testing + +### Direct Environment Testing + +Test the environment logic directly without starting the HTTP server: + +```bash +# From the server directory +python3 server/RLVE_Gym_environment.py +``` + +This verifies that: +- Environment resets correctly +- Step executes actions properly +- State tracking works +- Rewards are calculated correctly + +### Running Locally + +Run the server locally for development: + +```bash +uvicorn server.app:app --reload +``` + +## Project Structure + +``` +RLVE_Gym/ +├── __init__.py # Module exports +├── README.md # This file +├── openenv.yaml # OpenEnv manifest +├── pyproject.toml # Project metadata and dependencies +├── uv.lock # Locked dependencies (generated) +├── client.py # RlveGymEnv client implementation +├── models.py # Action and Observation models +└── server/ + ├── __init__.py # Server module exports + ├── RLVE_Gym_environment.py # Core environment logic + ├── app.py # FastAPI application + └── Dockerfile # Container image definition +``` diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e0b3a8cbd3b4e5c48cadaf8c73bf361599423a90 --- /dev/null +++ b/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Rlve Gym Environment - A simple test environment for HTTP server.""" + +from .client import RlveGymEnv +from .models import RlveGymAction, RlveGymObservation + +__all__ = ["RlveGymAction", "RlveGymObservation", "RlveGymEnv"] + diff --git a/client.py b/client.py new file mode 100644 index 0000000000000000000000000000000000000000..1a05afbd3fa3fac1dc295e7362a1ba014ea4dc03 --- /dev/null +++ b/client.py @@ -0,0 +1,62 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Rlve Gym Environment HTTP Client. + +This module provides the client for connecting to a Rlve Gym Environment server +over HTTP. +""" + +from typing import Dict + +from openenv_core.client_types import StepResult +from openenv_core.http_env_client import HTTPEnvClient + +from .models import RlveGymState, RlveGymAction, RlveGymObservation + + +class RlveGymEnv(HTTPEnvClient[RlveGymAction, RlveGymObservation]): + """ + HTTP client for the Rlve Gym Environment. + + This client connects to a RlveGymEnvironment HTTP server and provides + methods to interact with it: reset(), step(), and state access. + """ + + def _step_payload(self, action: RlveGymAction) -> Dict: + """ + Convert RlveGymAction to JSON payload for step request. + + Args: + action: RlveGymAction instance + + Returns: + Dictionary representation suitable for JSON encoding + """ + return { + "output": action.output, + } + + def _parse_result(self, payload: Dict) -> StepResult[RlveGymObservation]: + """ + Parse server response into StepResult[RlveGymObservation]. + + Args: + payload: JSON response from server + + Returns: + StepResult with RlveGymObservation + """ + obs = RlveGymObservation(**payload["observation"]) + return StepResult( + observation=obs, + reward=payload.get("reward"), + done=payload.get("done", False), + ) + + def _parse_state(self, payload: Dict) -> RlveGymState: + return RlveGymState(**payload) diff --git a/models.py b/models.py new file mode 100644 index 0000000000000000000000000000000000000000..0944782c1c12bd4268741a69e002d5b795ba2048 --- /dev/null +++ b/models.py @@ -0,0 +1,45 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Data models for the Rlve Gym Environment. + +The RLVE_Gym environment is a simple test environment that echoes back messages. +""" + +from dataclasses import dataclass + +from openenv_core.env_server.types import Action, Observation, State + +from typing import Dict, Union + + +@dataclass(kw_only=True) +class RlveGymState(State): + """State of the RLVE_Gym containing the seed.""" + seed: int + problem_input: str = None + + num_samples: int = 0 + sum_accuracy: int = 0 + + +@dataclass(kw_only=True) +class RlveGymAction(Action): + """Action for the RLVE_Gym environment - just a model output.""" + + output: str + + +@dataclass(kw_only=True) +class RlveGymObservation(Observation): + """Observation from the RLVE_Gym environment.""" + + problem_input: str + verifier_result: Dict[str, Union[float, int]] + + success: bool + message: str \ No newline at end of file diff --git a/openenv.yaml b/openenv.yaml new file mode 100644 index 0000000000000000000000000000000000000000..080d8b2468f6586958eedfcf1b402e49c25bc932 --- /dev/null +++ b/openenv.yaml @@ -0,0 +1,7 @@ +spec_version: 1 +name: RLVE_Gym +type: space +runtime: fastapi +app: server.app:app +port: 8000 + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..253b97524ad310d0f1910bc186f1c456b89b07ea --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,51 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +[build-system] +requires = ["setuptools>=45", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "openenv-RLVE_Gym" +version = "0.1.0" +description = "Rlve Gym environment for OpenEnv" +requires-python = ">=3.10" +dependencies = [ + # Core OpenEnv dependencies (required for server functionality) + # "openenv-core @ git+https://github.com/meta-pytorch/OpenEnv.git@main#subdirectory=src/core", + "openenv-core>=0.1.0", + "fastapi>=0.115.0", + "pydantic>=2.0.0", + "uvicorn>=0.24.0", + "requests>=2.31.0", + # Environment-specific dependencies + # Add all dependencies needed for your environment here + # Examples: + # "numpy>=1.19.0", + # "torch>=2.0.0", + # "gymnasium>=0.29.0", + # "openspiel>=1.0.0", + # "smolagents>=1.22.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0.0", + "pytest-cov>=4.0.0", +] + +[project.scripts] +# Server entry point - enables running via: uv run --project . server +# or: python -m RLVE_Gym.server.app +server = "RLVE_Gym.server.app:main" + +[tool.setuptools] +packages = ["RLVE_Gym", "RLVE_Gym.server"] +package-dir = { "RLVE_Gym" = ".", "RLVE_Gym.server" = "server" } + +[tool.setuptools.packages.find] +where = ["."] + diff --git a/server/Gym/__init__.py b/server/Gym/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/server/Gym/environment.py b/server/Gym/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..994cc0c0f6b0068c05cedbb069b066d404ace9be --- /dev/null +++ b/server/Gym/environment.py @@ -0,0 +1,217 @@ +import os +import re +import copy +from abc import ABC, abstractmethod +from typing import Dict, Optional, Tuple, Any, Union + + + +import functools +from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError + +class TimeoutException(Exception) : + pass + +def timeout(seconds) : + def decorator(func) : + @functools.wraps(func) + def wrapper(*args, **kwargs) : + executor = ThreadPoolExecutor(max_workers = 1) + future = executor.submit(func, *args, **kwargs) + try : + return future.result(timeout=seconds) + except FutureTimeoutError : + raise TimeoutException("Function timed out after {} seconds".format(seconds)) + finally : + executor.shutdown(wait=False, cancel_futures=True) + return wrapper + return decorator + + + +import torch +import random +import numpy as np +def manual_seed(args_or_seed : int, fix_cudnn = False) : + random.seed(args_or_seed) + np.random.seed(args_or_seed) + torch.manual_seed(args_or_seed) + torch.cuda.manual_seed_all(args_or_seed) + os.environ["PYTHONHASHSEED"] = str(args_or_seed) + if fix_cudnn : + torch.backends.cudnn.deterministic = True # noqa + torch.backends.cudnn.benchmark = False # noqa + + + +class VerifiableEnvironment(ABC) : + """ + Abstract base class for a verifiable environment. + """ + def __init__(self, answer_markers : Optional[Tuple[str, str]] = None) : + """ + Initializes the environment with default seed and parameter values. + """ + self.seed = None + self.parameter = None + + if answer_markers is None : + answer_markers = (r"", r"") + assert hasattr(answer_markers, "__len__"), "answer_markers should have __len__" + assert len(answer_markers) == 2 and isinstance(answer_markers[0], str) and isinstance(answer_markers[1], str), "answer_markers should be a tuple of two strings" + self.answer_markers = answer_markers + + self.passing_reward_threshold = 1.0 + + + def generator(self, seed : int, parameter : Optional[Dict] = None, timeout_second : int = 10) -> bool : + """ + Initializes the environment with the given seed and (initial) parameters, and samples environment-specific parameters to generate a problem. + + Args: + seed (int): Random seed for reproducibility. + parameter (Optional[Dict]): Dictionary of (initial) problem parameters. + timeout_second (int): Timeout in seconds for the generation process. + + Returns: + bool: True if the generation was successful, False otherwise. + """ + @timeout(timeout_second) + def self_generate() : + self.seed = seed + self.parameter = copy.deepcopy(parameter) if parameter is not None else {} + + manual_seed(self.seed) + self._generate() + try : + self_generate() + except : + return False + return self.parameter is not None + + + @abstractmethod + def _generate(self) -> None : + """ + Subclasses must implement problem generation using self.seed and self.parameter. + """ + pass + + + def prompt_generator(self) -> str : + """ + Generates the prompt string for the problem. + + Returns: + str: The formatted prompt for the problem. + """ + assert self.seed is not None and self.parameter is not None, "generator() should be called before prompt_generator()" + + return self._prompt_generate() + + + @abstractmethod + def _prompt_generate(self) -> str : + """ + Subclasses must implement prompt generation using self.seed and self.parameter. + + Returns: + str: The problem prompt. + """ + pass + + + def processor(self, output : str) -> Any : + """ + Processes the model's output to extract useful information. + + Args: + output (str): The string output from a model. + + Returns: + Any: Any useful information that may be used for following steps (e.g., scoring). + """ + + # Remove everything before the first "Assistant:" (if possible) + if "Assistant:" in output : + output = output.split("Assistant:", 1)[1] + elif "<|im_start|>assistant" in output : + output = output.split("<|im_start|>assistant", 1)[1] + else : + pass + + answer_pattern = re.escape(self.answer_markers[0]) + r"(.*?)" + re.escape(self.answer_markers[1]) + matches = list(re.finditer(answer_pattern, output, re.DOTALL)) + if matches : + answer = matches[-1].group(1) + else : + answer = None + return self._process(answer) + + + @abstractmethod + def _process(self, answer : Optional[str]) -> Any : + """ + Subclasses must implement the processing of the answer. + + Args: + answer (str): The model's answer. If it is None, it means the model did not provide an answer in the expected format. + + Returns: + Any: The processed answer, which may be used for scoring. + """ + pass + + + @abstractmethod + def scorer(self, output : str) -> float : + """ + Computes a numeric score for the output, which should be in [-1.0, +1.0]. + + Args: + output (str): The model's output. + + Returns: + float: The score for the given output, between -1.0 and +1.0. + """ + pass + + + def verifier(self, output : str) -> Dict[str, Union[float, int]] : + """ + Verifies the model's output. + """ + try : + score = self.scorer(output) + except : + score = -1.0 + assert -1.0 <= score <= +1.0, "Score out of bounds: score={}\n\nPrompt:\n{}".format(score, self.prompt_generator()) + + eps = 1E-6 + return dict( + reward = score, # [-1.0, +1.0] + accuracy = int(score >= self.passing_reward_threshold - eps), # 0 or 1 + format_score = int(score >= -1.0 + eps), # 0 or 1 + ) + + + def get_config(self) -> Dict : + """ + Returns the configuration of the current problem. + + Returns: + Dict: Dictionary with keys 'seed' and 'parameter'. + """ + return dict(seed = self.seed, parameter = self.parameter, passing_reward_threshold = self.passing_reward_threshold) + + + def set_config(self, config : Dict) -> None : + """ + Sets the configuration for the current problem. + + Args: + config (Dict): Dictionary with 'seed' and 'parameter' keys. + """ + assert "seed" in config, "seed is required in config" + assert "parameter" in config, "parameter is required in config" + self.seed, self.parameter, self.passing_reward_threshold = config["seed"], config["parameter"], config.get("passing_reward_threshold", 1.0) \ No newline at end of file diff --git a/server/Gym/environments/__init__.py b/server/Gym/environments/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7f03da0bbc8c0a25515e83fefc8c29a8221e0fa8 --- /dev/null +++ b/server/Gym/environments/__init__.py @@ -0,0 +1,802 @@ +from .ab_program_simulation import ABProgramSimulation_Environment +from .add_multiple_divisible_counting import AddMultiple_Divisible_Counting_Environment +from .addition_table import AdditionTable_Environment +from .almost_complete_graph_cycle_counting import AlmostCompleteGraphCycleCounting_Environment +from .and_or_sequence_counting import AndOr_Sequence_Counting_Environment +from .anti_palindromic_substring_counting import AntiPalindromicSubstringCounting_Environment +from .axis_k_center import Axis_KCenter_Environment +from .baj_bytecomputer import BAJBytecomputer_Environment +from .banned_point_superset_path_counting import BannedPointSupersetPathCounting_Environment +from .banyan_heart import BanyanHeart_Environment +from .bez_minimalist_security import BEZMinimalistSecurity_Environment +from .bezout_identity import BezoutIdentity_Environment +from .binario import Binario_Environment +from .binario_no_adjacency_requirement import Binario_NoAdjacencyRequirement_Environment +from .binary_alternation import BinaryAlternation_Environment +from .binary_linear_equation_solution_counting import BinaryLinearEquation_SolutionCounting_Environment +from .binary_tree_leaf_num_expectation import BinaryTreeLeafNumExpectation_Environment +from .bit_equation_counting import BitEquationCounting_Environment +from .bitand_zero_path_counting import BitAndZero_PathCounting_Environment +from .bitwise_operation_sequence_counting import BitwiseOperationSequenceCounting_Environment +from .block_image import BlockImage_Environment +from .bounded_adjacency_difference_permutation_counting import BoundedAdjacencyDifference_Permutation_Counting_Environment +from .bounded_interval_intersection import BoundedIntervalIntersection_Environment +from .bounded_mean_subarray_counting import BoundedMeanSubarrayCounting_Environment +from .bounded_subarray_counting import BoundedSubarrayCounting_Environment +from .box_scheduling import BoxScheduling_Environment +from .bridge import Bridge_Environment +from .bubble_swap_lower_bound_permutation_counting import BubbleSwapLowerBound_PermutationCounting_Environment +from .bucket_sorting import BucketSorting_Environment +from .campfire_party import CampfireParty_Environment +from .campsite_puzzle import CampsitePuzzle_Environment +from .canon import Canon_Environment +from .cantor_expansion import CantorExpansion_Environment +from .capital_city_effect import CapitalCityEffect_Environment +from .card_coloring_counting import CardColoringCounting_Environment +from .catalan_number_mod import CatalanNumberMod_Environment +from .check_all_cycle_xor_zero import CheckAllCycleXorZero_Environment +from .cho_hamsters import ChoHamsters_Environment +from .cinema import Cinema_Environment +from .circuit import Circuit_Environment +from .circulating_decimal_counting import CirculatingDecimalCounting_Environment +from .circulating_grid import CirculatingGrid_Environment +from .cleaning_up import CleaningUp_Environment +from .clear_symmetry import ClearSymmetry_Environment +from .clique_independent_set_partitioning_counting import Clique_IndependentSet_Partitioning_Counting_Environment +from .coin_square_game import CoinSquareGame_Environment +from .coloring_counting import ColoringCounting_Environment +from .combination_odd_subsequence_counting import CombinationOddSubsequenceCounting_Environment +from .concatenation_partition_counting_sum import ConcatenationPartitionCountingSum_Environment +from .congruent_equation import CongruentEquation_Environment +from .construct_hack_interval import ConstructHackInterval_Environment +from .convex_hull import ConvexHull_Environment +from .cornfield import Cornfield_Environment +from .countdown import CountdownEqual_Environment, CountdownClose_Environment +from .cow_dance_show import CowDanceShow_Environment +from .crt import CRT_Environment +from .cryptarithmetic import Cryptarithmetic_Environment +from .cube_fixed_local_maximum_counting import Cube_FixedLocalMaximumCounting_Environment +from .cycle_counting import CycleCounting_Environment +from .decreasing_digit_counting import DecreasingDigitCounting_Environment +from .degree_fixed_spanning_tree import DegreeFixed_SpanningTree_Environment +from .delta_min_popcount import DeltaMinPopcount_Environment +from .delta_nim_game import DeltaNimGame_Environment +from .derangement_extension import DerangementExtension_Environment +from .difference_constraint_system import DifferenceConstraintSystem_Environment +from .difference_constraint_system_dag import DifferenceConstraintSystemDAG_Environment +from .different_color_pairing import DifferentColorPairing_Environment +from .differentiate import Differentiate_Environment +from .digit_lis_counting import DigitLISCounting_Environment +from .discrete_logarithm import DiscreteLogarithm_Environment +from .disinfection import Disinfection_Environment +from .distinct_array_permutation import DistinctArrayPermutation_Environment +from .distinct_edge_colored_complete_graph_counting import DistinctEdgeColoredCompleteGraphCounting_Environment +from .division import Division_Environment +from .divisor_flip_expectation import DivisorFlipExpectation_Environment +from .double_cross_counting import DoubleCrossCounting_Environment +from .double_palindromic_string_counting import DoublePalindromicStringCounting_Environment +from .double_stack_sorting import DoubleStackSorting_Environment +from .dyn_dynamite import DynDynamite_Environment +from .eight_digit_puzzle import EightDigitPuzzle_Environment +from .emperor_worries import EmperorWorries_Environment +from .energy_storage_meter import EnergyStorageMeter_Environment +from .euclid_game import EuclidGame_Environment +from .even_degree_graph_partitioning import EvenDegreeGraphPartitioning_Environment +from .expression_adding_parenthese_counting import Expression_AddingParenthese_Counting_Environment +from .face_right_way import FaceRightWay_Environment +from .factorial_trailing_zero_count import FactorialTrailingZeroCount_Environment +from .fbi_binary_tree import FBI_BinaryTree_Environment +from .fibonacci import Fibonacci_Environment +from .fibonacci_containing_counting import FibonacciContainingCounting_Environment +from .fibtrain import Fibtrain_Environment +from .firework_show import FireworkShow_Environment +from .fixed_mod_k_selection_counting import FixedModK_Selection_Counting_Environment +from .fixed_one_edge_num_spanning_tree import FixedOneEdgeNum_SpanningTree_Environment +from .fractional_programming import FractionalProgramming_Environment +from .fractional_programming_bipartite_graph_matching import FractionalProgramming_BipartiteGraphMatching_Environment +from .futoshiki_puzzle import FutoshikiPuzzle_Environment +from .gas_fire_extinguishers import GasFireExtinguishers_Environment +from .gaussian_elimination import GaussianElimination_Environment +from .gcd_fibonacci_product import GCDFibonacciProduct_Environment +from .gcd_lcm_counting import GcdLcmCounting_Environment +from .gcd_one_counting import GCDOne_Counting_Environment +from .gcd_prime_counting import GCDPrime_Counting_Environment +from .gold_washing import GoldWashing_Environment +from .gra_minima_game import GraMinimaGame_Environment +from .grade_ranking_counting import GradeRankingCounting_Environment +from .graph_contain_tree_counting import GraphContainTreeCounting_Environment +from .graph_isomorphism import GraphIsomorphism_Environment +from .grid_bfs import GridBFS_Environment +from .grid_coloring_counting import GridColoringCounting_Environment +from .grid_component import GridComponent_Environment +from .grid_local_minimum_counting import GridLocalMinimumCounting_Environment +from .grid_parity_construction import GridParityConstruction_Environment +from .grid_triangle_counting import GridTriangleCounting_Environment +from .halving_chain_counting import HalvingChainCounting_Environment +from .hamiltonian_path import HamiltonianPath_Environment +from .hamiltonian_path_existence import HamiltonianPathExistence_Environment +from .heap_counting import HeapCounting_Environment +from .hitori_puzzle import HitoriPuzzle_Environment +from .hungry_rabbit import HungryRabbit_Environment +from .hur_warehouse_store import HURWarehouseStore_Environment +from .imp_party import ImpParty_Environment +from .individual_sum_bounded_sequence_counting import IndividualSumBounded_SequenceCounting_Environment +from .integer_factorization_counting import IntegerFactorizationCounting_Environment +from .integer_programming import IntegerProgramming_Environment +from .integral import Integral_Environment +from .inversion_pair import InversionPair_Environment +from .inversion_pair_k_counting import InversionPairK_Counting_Environment +from .josephus import Josephus_Environment +from .jug_puzzle import JugPuzzle_Environment +from .k_partition import KPartition_Environment +from .kakurasu import Kakurasu_Environment +from .kidding_me import KiddingMe_Environment +from .king_sorting import KingSorting_Environment +from .klo_blocks import KloBlocks_Environment +from .knapsack import Knapsack_Environment +from .knights_and_knaves import KnightsAndKnaves_Environment +from .kos_dicing import KosDicing_Environment +from .kth_binary_tree import Kth_BinaryTree_Environment +from .kth_semi_balanced_bracket_sequence import Kth_SemiBalancedBracketSequence_Environment +from .kth_subsequence import KthSubsequence_Environment +from .kur import KUR_Environment +from .lamp_changing import LampChanging_Environment +from .land_acquisition import LandAcquisition_Environment +from .landform_generation_counting import LandformGenerationCounting_Environment +from .largest_convex_polygon import LargestConvexPolygon_Environment +from .largest_rectangle_among_points import LargestRectangle_AmongPoints_Environment +from .las import LAS_Environment +from .las_laser import LASLaser_Environment +from .lcm import LCM_Environment +from .lds_two_counting import LDSTwo_Counting_Environment +from .light_up_puzzle import LightUpPuzzle_Environment +from .link_beads import LinkBeads_Environment +from .lis_lds_concatenation import LIS_LDS_Concatenation_Environment +from .liz_lollipop import LIZ_Lollipop_Environment +from .longest_double_palindrome import Longest_DoublePalindrome_Environment +from .longest_matching_subsequence import Longest_MatchingSubsequence_Environment +from .longest_maxdiff_bounded_interval import LongestMaxDiffBoundedInterval_Environment +from .longest_path import LongestPath_Environment +from .longest_repeated_palindrome import Longest_RepeatedPalindrome_Environment +from .maf_mafia import MafMafia_Environment +from .magic_square_puzzle import MagicSquarePuzzle_Environment +from .making_grade import MakingGrade_Environment +from .matrix_binary_exponentiation import Matrix_BinaryExponentiation_Environment +from .matrix_permutation_both_diagonal_one import MatrixPermutation_BothDiagonalOne_Environment +from .matrix_permutation_equivalence import MatrixPermutationEquivalence_Environment +from .matrix_permutation_main_diagonal_one import MatrixPermutation_MainDiagonalOne_Environment +from .matrix_pooling import MatrixPooling_Environment +from .matrix_rmq_counting import MatrixRMQCounting_Environment +from .max_different_group_pair_division import MaxDifferentGroupPairDivision_Environment +from .max_grid_path_intersection import MaxGridPathIntersection_Environment +from .max_minimum_after_interval_addition import MaxMinimum_AfterIntervalAddition_Environment +from .max_mult_split import MaxMultSplit_Environment +from .max_multiplication_fixed_sum import MaxMultiplicationFixedSum_Environment +from .max_no_conflicting_bombs import MaxNoConflictingBombs_Environment +from .max_nonadjacent_k_element_sum import Max_NonAdjacent_KElementSum_Environment +from .max_permutation import MaxPermutation_Environment +from .max_rmq_expectation import MaxRMQExpectation_Environment +from .max_segment_coverage_constraint import MaxSegmentCoverageConstraint_Environment +from .max_sum_lds import MaxSumLDS_Environment +from .max_three_square_sum import MaxThreeSquareSum_Environment +from .max_tree_constrained_permutation_weight import Max_TreeConstrainedPermutation_Weight_Environment +from .max_tree_k_path_coverage import MaxTree_KPathCoverahe_Environment +from .max_tree_xor_path import MaxTreeXorPath_Environment +from .max_weight_palindromic_substring import MaxWeightPalindromicSubstring_Environment +from .max_xor_path import MaxXorPath_Environment +from .max_xor_set import MaxXorSet_Environment +from .maximum_achromatic_number import MaximumAchromaticNumber_Environment +from .maximum_clique import MaximumClique_Environment +from .maximum_divisor import MaximumDivisor_Environment +from .maximum_independent_set_grid import MaximumIndependentSetGrid_Environment +from .maximum_independent_set_tree import Maximum_IndependentSet_Tree_Environment +from .maximum_lexicographical_order_subsequence import MaximumLexicographicalOrderSubsequence_Environment +from .maximum_point_segment_matching import MaximumPointSegmentMatching_Environment +from .maximum_subsequence_num import Maximum_SubsequenceNum_Environment +from .maximum_weight_matching import MaximumWeightMatching_Environment +from .maze import Maze_Environment +from .min_conversion_to_cycle_cost import MinConversionToCycleCost_Environment +from .min_cost_reducing_lnds import MinCostReducingLNDS_Environment +from .min_cost_tree_coverage import MinCostTreeCoverage_Environment +from .min_cube_assignment import MinCubeAssignment_Environment +from .min_division_sum_xor import MinDivisionSumXor_Environment +from .min_inorder_binary_tree import MinInorderBinaryTree_Environment +from .min_kdivisor_number import MinKDivisorNumber_Environment +from .min_no_solution_linear_diophantine_equation import MinNoSolutionLinearDiophantineEquation_Environment +from .min_nonsubstring import MinNonsubstring_Environment +from .min_pairsum_multiplication_permutation import MinPairSumMultiplicationPermutation_Environment +from .min_path_cover_dag import MinPathCover_DAG_Environment +from .min_sum_chebyshev_distance import MinSumChebyshevDistance_Environment +from .min_sum_distance_square import MinSumDistanceSquare_Environment +from .min_sum_pre_xor import MinSumPreXor_Environment +from .min_swap_two_permutations import MinSwapTwoPermutations_Environment +from .min_xor_pair import MinXorPair_Environment +from .minesweeping import Minesweeping_Environment +from .minimal_cyclic_shift import MinimalCyclicShift_Environment +from .minimum_chromatic_number import MinimumChromaticNumber_Environment +from .minimum_chromatic_number_segment_overlap import MinimumChromaticNumber_SegmentOverlap_Environment +from .minimum_cost_maximum_flow import MinimumCost_MaximumFlow_Environment +from .minimum_crossing_edges_graph_partition import Minimum_CrossingEdges_GraphPartition_Environment +from .minimum_directed_spanning_tree import MinimumDirectedSpanningTree_Environment +from .minimum_dominating_interval import Minimum_DominatingInterval_Environment +from .minimum_dominating_set import Minimum_DominatingSet_Environment +from .minimum_dominating_set_grid import Minimum_DominatingSet_Grid_Environment +from .minimum_fibonacci_representation import MinimumFibonacciRepresentation_Environment +from .minimum_harmonious_chromatic_number import MinimumHarmoniousChromaticNumber_Environment +from .minimum_interval_coverage import MinimumIntervalCoverage_Environment +from .minimum_max_abs_slicer import Minimum_MaxAbsSlicer_Environment +from .minimum_max_slicer import Minimum_MaxSlicer_Environment +from .minimum_ratio_path import MinimumRatioPath_Environment +from .minimum_spanning_tree import MinimumSpanningTree_Environment +from .minimum_spanning_tree_counting import MinimumSpanningTreeCounting_Environment +from .minimum_steiner_tree import MinimumSteinerTree_Environment +from .minimum_sum_difference_submatrix import MinimumSumDifferenceSubmatrix_Environment +from .minimum_tree_weighted_dominating_ancestor import MinimumTreeWeightedDominatingAncestor_Environment +from .minimum_unconflicted_grid_kmax import MinimumUnconflictedGridKMax_Environment +from .minimum_vertex_cover import Minimum_VertexCover_Environment +from .minimum_weighted_spanning_tree import MinimumWeightedSpanningTree_Environment +from .mitter_transportation import MitterTransportation_Environment +from .mixed_graph_eulerian_circuit import MixedGraphEulerianCircuit_Environment +from .money_charging_game import MoneyChargingGame_Environment +from .monochrome_block_counting import MonochromeBlockCounting_Environment +from .monotonic_stack import MonotonicStack_Environment +from .most_component_tree_removing_two_paths import MostComponentTreeRemovingTwoPaths_Environment +from .most_num_edge_non_self_isomorphism import MostNumEdge_NonSelfIsomorphism_Environment +from .multidrink import MultiDrink_Environment +from .multiple_flipping_game import MultipleFlippingGame_Environment +from .multiplication import Multiplication_Environment +from .myj import MYJ_Environment +from .nand_result_counting import NANDResultCounting_Environment +from .negative_base import NegativeBase_Environment +from .new_nim_game import NewNimGame_Environment +from .next_palindromic import NextPalindromic_Environment +from .nine_puzzle import NinePuzzle_Environment +from .no_adjacent_girl_counting import NoAdjacentGirlCounting_Environment +from .no_double_triple_counting import NoDoubleTripleCounting_Environment +from .not_containing_string_counting import NotContainingStringCounting_Environment +from .number_partition_counting import NumberPartitionCounting_Environment +from .numbrix import Numbrix_Environment +from .odd_visitation import OddVisitation_Environment +from .odl_distance import ODLDistance_Environment +from .pair_more_one_counting import PairMoreOneCounting_Environment +from .palembang_bridges import PalembangBridges_Environment +from .palindrome_partition_counting import PalindromePartitionCounting_Environment +from .palindromic_substring_number_counting import PalindromicSubstringNumberCounting_Environment +from .pan_solar_panels import PanSolarPanels_Environment +from .path_no_going_back_counting import Path_NoGoingBack_Counting_Environment +from .patrol import Patrol_Environment +from .pcp_permutation import PCPPermutation_Environment +from .pipeline_arrangement import PipelineArrangement_Environment +from .pol_polarization import POLPolarization_Environment +from .polya_model import PolyaModel_Environment +from .polynomial_factorization import PolynomialFactorization_Environment +from .polynomial_interpolation import PolynomialInterpolation_Environment +from .polynomial_minimum import PolynomialMinimum_Environment +from .polynomial_remainder import PolynomialRemainder_Environment +from .power_cycle import PowerCycle_Environment +from .power_shortcut import PowerShortcut_Environment +from .powernest import PowerNest_Environment +from .prefix_concatenation import PrefixConcatenation_Environment +from .prefix_product_mod_distinct_permutation import PrefixProductMODDistinctPermutation_Environment +from .prefix_sum_mod_distinct_permutation import PrefixSumMODDistinctPermutation_Environment +from .prefixuffix import Prefixuffix_Environment +from .preorder_traversal import PreorderTraversal_Environment +from .prime_graph_minimum_chromatic_number import PrimeGraph_MinimumChromaticNumber_Environment +from .protecting_flowers import ProtectingFlowers_Environment +from .pythagorean_graph_independent_set_counting import PythagoreanGraph_IndependentSetCounting_Environment +from .quad_magic_items import QuadMagicItems_Environment +from .quadratic_function_segmentation import QuadraticFunctionSegmentation_Environment +from .quantum_lock_puzzle import QuantumLockPuzzle_Environment +from .queen_placement import QueenPlacement_Environment +from .random_range_max_expectation import RandomRangeMaxExpectation_Environment +from .range_constrained_increasing_sequence_counting import RangeConstrained_IncreasingSequence_Counting_Environment +from .range_four_sequence_construction import RangeFourSequenceConstruction_Environment +from .range_shrinking_sequence_counting import RangeShrinkingSequenceCounting_Environment +from .recursive_function import RecursiveFunction_Environment +from .recursive_sequence_sum_construction import RecursiveSequenceSumConstruction_Environment +from .repeat_sequence_lnds import RepeatSequenceLNDS_Environment +from .root_extraction import RootExtraction_Environment +from .round_robin import RoundRobin_Environment +from .roundtable_assignment import RoundTableAssignment_Environment +from .royal_lock_counting import RoyalLockCounting_Environment +from .salad_bar import SaladBar_Environment +from .salesman_fatigue import SalesmanFatigue_Environment +from .same_adjacency_counting import SameAdjacencyCounting_Environment +from .sat import SAT_Environment +from .scc_sequence_counting import SCC_Sequence_Counting_Environment +from .secret_cow_code import SecretCowCode_Environment +from .segment_min_length_equal_counting import SegmentMinLengthEqual_Counting_Environment +from .segment_tree_sorting_counting import SegmentTreeSortingCounting_Environment +from .self_power_sequence_mod import SelfPowerSequenceMOD_Environment +from .set_cover import SetCover_Environment +from .set_splitting import SetSplitting_Environment +from .shared_substring_counting import SharedSubstringCounting_Environment +from .shortest_path import ShortestPath_Environment +from .shortest_path_count_construction import ShortestPathCountConstruction_Environment +from .shortest_unicolor_substring import ShortestUnicolorSubstring_Environment +from .singing_girl_story import SingingGirlStory_Environment +from .single_stack_sorting import SingleStackSorting_Environment +from .ska_rock_garden import SkaRockGarden_Environment +from .skyscraper_puzzle import SkyscraperPuzzle_Environment +from .skyscraper_sum_puzzle import SkyscraperSumPuzzle_Environment +from .sliding_window import SlidingWindow_Environment +from .slo_elephants import SLOElephants_Environment +from .smallest_binary_multiple import SmallestBinaryMultiple_Environment +from .smallest_circle import SmallestCircle_Environment +from .sorting import Sorting_Environment +from .spiral_matrix import SpiralMatrix_Environment +from .splitting_game import SplittingGame_Environment +from .spy_network import SpyNetwork_Environment +from .squ_squarks import SquSquarks_Environment +from .square_undamaged_point_counting import SquareUndamagedPointCounting_Environment +from .star_battle import StarBattle_Environment +from .stirling_second import StirlingSecond_Environment +from .stone_game import StoneGame_Environment +from .stone_intervals_game import StoneIntervalsGame_Environment +from .string_partition_shuffle import StringPartitionShuffle_Environment +from .string_reversal_construction import StringReversalConstruction_Environment +from .stu_well import STUWell_Environment +from .stunt_flying import StuntFlying_Environment +from .subarray_sum_xor import SubarraySumXor_Environment +from .subarray_xor_sum import SubarrayXorSum_Environment +from .subgraph_isomorphism import SubgraphIsomorphism_Environment +from .submatrix_sum_divisible_counting import SubmatrixSumDivisibleCounting_Environment +from .subsequence_reversal_lnds import SubsequenceReversalLNDS_Environment +from .subset_sum import SubsetSum_Environment +from .subset_sum_sequence import SubsetSumSequence_Environment +from .sudoku import Sudoku_Environment +from .sum_divisor_num import Sum_DivisorNum_Environment +from .sum_gcd import SumGCD_Environment +from .sum_gcd_with_individual import SumGCDWithIndividual_Environment +from .sum_lcm import SumLCM_Environment +from .sum_manhattan_curved_surface import SumManhattan_CurvedSurface_Environment +from .sum_mod import SumMOD_Environment +from .sum_phi_interval import SumPHIInterval_Environment +from .sum_product_divisor_num import SumProductDivisorNum_Environment +from .sum_pseudo_euclidean import SumPseudoEuclidean_Environment +from .sum_set_multiplication import SumSetMultiplication_Environment +from .sum_spanning_tree_gcd import SumSpanningTreeGCD_Environment +from .sum_triangle_area import SumTriangleArea_Environment +from .sum_xor_divisor_num import SumXorDivisorNum_Environment +from .survo_puzzle import SurvoPuzzle_Environment +from .taking_prime_game import TakingPrimeGame_Environment +from .task_arrangement import TaskArrangement_Environment +from .tetris_attack import TetrisAttack_Environment +from .three_string_common_subsequence_counting import ThreeStringCommonSubsequenceCounting_Environment +from .three_vertex_cycle_counting import ThreeVertexCycleCounting_Environment +from .topological_sort import TopologicalSort_Environment +from .topological_sort_minimal_lexicographical_order import TopologicalSort_MinimalLexicographicalOrder_Environment +from .tournament_longest_path import Tournament_LongestPath_Environment +from .transmission_delay import TransmissionDelay_Environment +from .tree_add_one_edge_diameter import TreeAddOneEdgeDiameter_Environment +from .tree_center import TreeCenter_Environment +from .tree_change_one_edge_diameter import TreeChangeOneEdgeDiameter_Environment +from .tree_coloring import TreeColoring_Environment +from .tree_distance_equal_triad_counting import Tree_DistanceEqualTriad_Counting_Environment +from .tree_dynamic_xor_zero_path import TreeDynamic_XORZeroPath_Environment +from .tree_elimination_expectation import TreeElimination_Expectation_Environment +from .tree_even_partitioning import TreeEvenPartitioning_Environment +from .tree_maximum_visited_vertex import TreeMaximumVisitedVertex_Environment +from .tree_random_walk_expectation import TreeRandomWalkExpectation_Environment +from .tree_topological_sequence_counting import TreeTopologicalSequenceCounting_Environment +from .triumphal_arch import TriumphalArch_Environment +from .twiddle_puzzle import TwiddlePuzzle_Environment +from .two_sat import TwoSAT_Environment +from .two_set_all_coprime_counting import TwoSet_AllCoprime_Counting_Environment +from .undamaged_submatrix_counting import UndamagedSubmatrixCounting_Environment +from .value_diminishing_selection import ValueDiminishingSelection_Environment +from .vertex_k_center import Vertex_KCenter_Environment +from .virus_synthesis import VirusSynthesis_Environment +from .visible_line import VisibleLine_Environment +from .warehouse_construction import WarehouseConstruction_Environment +from .weighted_binarytree import WeightedBinaryTree_Environment +from .weighted_lis import WeightedLIS_Environment +from .whack_a_mole import WhackAMole_Environment +from .wil import WIL_Environment +from .wyc import WYC_Environment +from .wyr_leveling_ground import WYRLevelingGround_Environment +from .xor_equation_counting import XorEquationCounting_Environment +from .zero_prefix_subset_counting import ZeroPrefixSubsetCounting_Environment + + +identifier2environment = { + "ABProgramSimulation" : ABProgramSimulation_Environment, + "AddMultiple_Divisible_Counting" : AddMultiple_Divisible_Counting_Environment, + "AdditionTable" : AdditionTable_Environment, + "AlmostCompleteGraphCycleCounting" : AlmostCompleteGraphCycleCounting_Environment, + "AndOr_Sequence_Counting" : AndOr_Sequence_Counting_Environment, + "AntiPalindromicSubstringCounting" : AntiPalindromicSubstringCounting_Environment, + "Axis_KCenter" : Axis_KCenter_Environment, + "BAJBytecomputer" : BAJBytecomputer_Environment, + "BannedPointSupersetPathCounting" : BannedPointSupersetPathCounting_Environment, + "BanyanHeart" : BanyanHeart_Environment, + "BEZMinimalistSecurity" : BEZMinimalistSecurity_Environment, + "BezoutIdentity" : BezoutIdentity_Environment, + "Binario" : Binario_Environment, + "Binario_NoAdjacencyRequirement" : Binario_NoAdjacencyRequirement_Environment, + "BinaryAlternation" : BinaryAlternation_Environment, + "BinaryLinearEquation_SolutionCounting" : BinaryLinearEquation_SolutionCounting_Environment, + "BinaryTreeLeafNumExpectation" : BinaryTreeLeafNumExpectation_Environment, + "BitEquationCounting" : BitEquationCounting_Environment, + "BitAndZero_PathCounting" : BitAndZero_PathCounting_Environment, + "BitwiseOperationSequenceCounting" : BitwiseOperationSequenceCounting_Environment, + "BlockImage" : BlockImage_Environment, + "BoundedAdjacencyDifference_Permutation_Counting" : BoundedAdjacencyDifference_Permutation_Counting_Environment, + "BoundedIntervalIntersection" : BoundedIntervalIntersection_Environment, + "BoundedMeanSubarrayCounting" : BoundedMeanSubarrayCounting_Environment, + "BoundedSubarrayCounting" : BoundedSubarrayCounting_Environment, + "BoxScheduling" : BoxScheduling_Environment, + "Bridge" : Bridge_Environment, + "BubbleSwapLowerBound_PermutationCounting" : BubbleSwapLowerBound_PermutationCounting_Environment, + "BucketSorting" : BucketSorting_Environment, + "CampfireParty" : CampfireParty_Environment, + "CampsitePuzzle" : CampsitePuzzle_Environment, + "Canon" : Canon_Environment, + "CantorExpansion" : CantorExpansion_Environment, + "CapitalCityEffect" : CapitalCityEffect_Environment, + "CardColoringCounting" : CardColoringCounting_Environment, + "CatalanNumberMod" : CatalanNumberMod_Environment, + "CheckAllCycleXorZero" : CheckAllCycleXorZero_Environment, + "ChoHamsters" : ChoHamsters_Environment, + "Cinema" : Cinema_Environment, + "Circuit" : Circuit_Environment, + "CirculatingDecimalCounting" : CirculatingDecimalCounting_Environment, + "CirculatingGrid" : CirculatingGrid_Environment, + "CleaningUp" : CleaningUp_Environment, + "ClearSymmetry" : ClearSymmetry_Environment, + "Clique_IndependentSet_Partitioning_Counting" : Clique_IndependentSet_Partitioning_Counting_Environment, + "CoinSquareGame" : CoinSquareGame_Environment, + "ColoringCounting" : ColoringCounting_Environment, + "CombinationOddSubsequenceCounting" : CombinationOddSubsequenceCounting_Environment, + "ConcatenationPartitionCountingSum" : ConcatenationPartitionCountingSum_Environment, + "CongruentEquation" : CongruentEquation_Environment, + "ConstructHackInterval" : ConstructHackInterval_Environment, + "ConvexHull" : ConvexHull_Environment, + "Cornfield" : Cornfield_Environment, + "CountdownEqual" : CountdownEqual_Environment, "CountdownClose" : CountdownClose_Environment, + "CowDanceShow" : CowDanceShow_Environment, + "CRT" : CRT_Environment, + "Cryptarithmetic" : Cryptarithmetic_Environment, + "Cube_FixedLocalMaximumCounting" : Cube_FixedLocalMaximumCounting_Environment, + "CycleCounting" : CycleCounting_Environment, + "DecreasingDigitCounting" : DecreasingDigitCounting_Environment, + "DegreeFixed_SpanningTree" : DegreeFixed_SpanningTree_Environment, + "DeltaMinPopcount" : DeltaMinPopcount_Environment, + "DeltaNimGame" : DeltaNimGame_Environment, + "DerangementExtension" : DerangementExtension_Environment, + "DifferenceConstraintSystem" : DifferenceConstraintSystem_Environment, + "DifferenceConstraintSystemDAG" : DifferenceConstraintSystemDAG_Environment, + "DifferentColorPairing" : DifferentColorPairing_Environment, + "Differentiate" : Differentiate_Environment, + "DigitLISCounting" : DigitLISCounting_Environment, + "DiscreteLogarithm" : DiscreteLogarithm_Environment, + "Disinfection" : Disinfection_Environment, + "DistinctArrayPermutation" : DistinctArrayPermutation_Environment, + "DistinctEdgeColoredCompleteGraphCounting" : DistinctEdgeColoredCompleteGraphCounting_Environment, + "Division" : Division_Environment, + "DivisorFlipExpectation" : DivisorFlipExpectation_Environment, + "DoubleCrossCounting" : DoubleCrossCounting_Environment, + "DoublePalindromicStringCounting" : DoublePalindromicStringCounting_Environment, + "DoubleStackSorting" : DoubleStackSorting_Environment, + "DynDynamite" : DynDynamite_Environment, + "EightDigitPuzzle" : EightDigitPuzzle_Environment, + "EmperorWorries" : EmperorWorries_Environment, + "EnergyStorageMeter" : EnergyStorageMeter_Environment, + "EuclidGame" : EuclidGame_Environment, + "EvenDegreeGraphPartitioning" : EvenDegreeGraphPartitioning_Environment, + "Expression_AddingParenthese_Counting" : Expression_AddingParenthese_Counting_Environment, + "FaceRightWay" : FaceRightWay_Environment, + "FactorialTrailingZeroCount" : FactorialTrailingZeroCount_Environment, + "FBI_BinaryTree" : FBI_BinaryTree_Environment, + "Fibonacci" : Fibonacci_Environment, + "FibonacciContainingCounting" : FibonacciContainingCounting_Environment, + "Fibtrain" : Fibtrain_Environment, + "FireworkShow" : FireworkShow_Environment, + "FixedModK_Selection_Counting" : FixedModK_Selection_Counting_Environment, + "FixedOneEdgeNum_SpanningTree" : FixedOneEdgeNum_SpanningTree_Environment, + "FractionalProgramming" : FractionalProgramming_Environment, + "FractionalProgramming_BipartiteGraphMatching" : FractionalProgramming_BipartiteGraphMatching_Environment, + "FutoshikiPuzzle" : FutoshikiPuzzle_Environment, + "GasFireExtinguishers" : GasFireExtinguishers_Environment, + "GaussianElimination" : GaussianElimination_Environment, + "GCDFibonacciProduct" : GCDFibonacciProduct_Environment, + "GcdLcmCounting" : GcdLcmCounting_Environment, + "GCDOne_Counting" : GCDOne_Counting_Environment, + "GCDPrime_Counting" : GCDPrime_Counting_Environment, + "GoldWashing" : GoldWashing_Environment, + "GraMinimaGame" : GraMinimaGame_Environment, + "GradeRankingCounting" : GradeRankingCounting_Environment, + "GraphContainTreeCounting" : GraphContainTreeCounting_Environment, + "GraphIsomorphism" : GraphIsomorphism_Environment, + "GridBFS" : GridBFS_Environment, + "GridColoringCounting" : GridColoringCounting_Environment, + "GridComponent" : GridComponent_Environment, + "GridLocalMinimumCounting" : GridLocalMinimumCounting_Environment, + "GridParityConstruction" : GridParityConstruction_Environment, + "GridTriangleCounting" : GridTriangleCounting_Environment, + "HalvingChainCounting" : HalvingChainCounting_Environment, + "HamiltonianPath" : HamiltonianPath_Environment, + "HamiltonianPathExistence" : HamiltonianPathExistence_Environment, + "HeapCounting" : HeapCounting_Environment, + "HitoriPuzzle" : HitoriPuzzle_Environment, + "HungryRabbit" : HungryRabbit_Environment, + "HURWarehouseStore" : HURWarehouseStore_Environment, + "ImpParty" : ImpParty_Environment, + "IndividualSumBounded_SequenceCounting" : IndividualSumBounded_SequenceCounting_Environment, + "IntegerFactorizationCounting" : IntegerFactorizationCounting_Environment, + "IntegerProgramming" : IntegerProgramming_Environment, + "Integral" : Integral_Environment, + "InversionPair" : InversionPair_Environment, + "InversionPairK_Counting" : InversionPairK_Counting_Environment, + "Josephus" : Josephus_Environment, + "JugPuzzle" : JugPuzzle_Environment, + "KPartition" : KPartition_Environment, + "Kakurasu" : Kakurasu_Environment, + "KiddingMe" : KiddingMe_Environment, + "KingSorting" : KingSorting_Environment, + "KloBlocks" : KloBlocks_Environment, + "Knapsack" : Knapsack_Environment, + "KnightsAndKnaves" : KnightsAndKnaves_Environment, + "KosDicing" : KosDicing_Environment, + "Kth_BinaryTree" : Kth_BinaryTree_Environment, + "Kth_SemiBalancedBracketSequence" : Kth_SemiBalancedBracketSequence_Environment, + "KthSubsequence" : KthSubsequence_Environment, + "KUR" : KUR_Environment, + "LampChanging" : LampChanging_Environment, + "LandAcquisition" : LandAcquisition_Environment, + "LandformGenerationCounting" : LandformGenerationCounting_Environment, + "LargestConvexPolygon" : LargestConvexPolygon_Environment, + "LargestRectangle_AmongPoints" : LargestRectangle_AmongPoints_Environment, + "LAS" : LAS_Environment, + "LASLaser" : LASLaser_Environment, + "LCM" : LCM_Environment, + "LDSTwo_Counting" : LDSTwo_Counting_Environment, + "LightUpPuzzle" : LightUpPuzzle_Environment, + "LinkBeads" : LinkBeads_Environment, + "LIS_LDS_Concatenation" : LIS_LDS_Concatenation_Environment, + "LIZ_Lollipop" : LIZ_Lollipop_Environment, + "Longest_DoublePalindrome" : Longest_DoublePalindrome_Environment, + "Longest_MatchingSubsequence" : Longest_MatchingSubsequence_Environment, + "LongestMaxDiffBoundedInterval" : LongestMaxDiffBoundedInterval_Environment, + "LongestPath" : LongestPath_Environment, + "Longest_RepeatedPalindrome" : Longest_RepeatedPalindrome_Environment, + "MafMafia" : MafMafia_Environment, + "MagicSquarePuzzle" : MagicSquarePuzzle_Environment, + "MakingGrade" : MakingGrade_Environment, + "Matrix_BinaryExponentiation" : Matrix_BinaryExponentiation_Environment, + "MatrixPermutation_BothDiagonalOne" : MatrixPermutation_BothDiagonalOne_Environment, + "MatrixPermutationEquivalence" : MatrixPermutationEquivalence_Environment, + "MatrixPermutation_MainDiagonalOne" : MatrixPermutation_MainDiagonalOne_Environment, + "MatrixPooling" : MatrixPooling_Environment, + "MatrixRMQCounting" : MatrixRMQCounting_Environment, + "MaxDifferentGroupPairDivision" : MaxDifferentGroupPairDivision_Environment, + "MaxGridPathIntersection" : MaxGridPathIntersection_Environment, + "MaxMinimum_AfterIntervalAddition" : MaxMinimum_AfterIntervalAddition_Environment, + "MaxMultSplit" : MaxMultSplit_Environment, + "MaxMultiplicationFixedSum" : MaxMultiplicationFixedSum_Environment, + "MaxNoConflictingBombs" : MaxNoConflictingBombs_Environment, + "Max_NonAdjacent_KElementSum" : Max_NonAdjacent_KElementSum_Environment, + "MaxPermutation" : MaxPermutation_Environment, + "MaxRMQExpectation" : MaxRMQExpectation_Environment, + "MaxSegmentCoverageConstraint" : MaxSegmentCoverageConstraint_Environment, + "MaxSumLDS" : MaxSumLDS_Environment, + "MaxThreeSquareSum" : MaxThreeSquareSum_Environment, + "Max_TreeConstrainedPermutation_Weight" : Max_TreeConstrainedPermutation_Weight_Environment, + "MaxTree_KPathCoverage" : MaxTree_KPathCoverahe_Environment, + "MaxTreeXorPath" : MaxTreeXorPath_Environment, + "MaxWeightPalindromicSubstring" : MaxWeightPalindromicSubstring_Environment, + "MaxXorPath" : MaxXorPath_Environment, + "MaxXorSet" : MaxXorSet_Environment, + "MaximumAchromaticNumber" : MaximumAchromaticNumber_Environment, + "MaximumClique" : MaximumClique_Environment, + "MaximumDivisor" : MaximumDivisor_Environment, + "MaximumIndependentSetGrid" : MaximumIndependentSetGrid_Environment, + "Maximum_IndependentSet_Tree" : Maximum_IndependentSet_Tree_Environment, + "MaximumLexicographicalOrderSubsequence" : MaximumLexicographicalOrderSubsequence_Environment, + "MaximumPointSegmentMatching" : MaximumPointSegmentMatching_Environment, + "Maximum_SubsequenceNum" : Maximum_SubsequenceNum_Environment, + "MaximumWeightMatching" : MaximumWeightMatching_Environment, + "Maze" : Maze_Environment, + "MinConversionToCycleCost" : MinConversionToCycleCost_Environment, + "MinCostReducingLNDS" : MinCostReducingLNDS_Environment, + "MinCostTreeCoverage" : MinCostTreeCoverage_Environment, + "MinCubeAssignment" : MinCubeAssignment_Environment, + "MinDivisionSumXor" : MinDivisionSumXor_Environment, + "MinInorderBinaryTree" : MinInorderBinaryTree_Environment, + "MinKDivisorNumber" : MinKDivisorNumber_Environment, + "MinNoSolutionLinearDiophantineEquation" : MinNoSolutionLinearDiophantineEquation_Environment, + "MinNonsubstring" : MinNonsubstring_Environment, + "MinPairSumMultiplicationPermutation" : MinPairSumMultiplicationPermutation_Environment, + "MinPathCover_DAG" : MinPathCover_DAG_Environment, + "MinSumChebyshevDistance" : MinSumChebyshevDistance_Environment, + "MinSumDistanceSquare" : MinSumDistanceSquare_Environment, + "MinSumPreXor" : MinSumPreXor_Environment, + "MinSwapTwoPermutations" : MinSwapTwoPermutations_Environment, + "MinXorPair" : MinXorPair_Environment, + "Minesweeping" : Minesweeping_Environment, + "MinimalCyclicShift" : MinimalCyclicShift_Environment, + "MinimumChromaticNumber" : MinimumChromaticNumber_Environment, + "MinimumChromaticNumber_SegmentOverlap" : MinimumChromaticNumber_SegmentOverlap_Environment, + "MinimumCost_MaximumFlow" : MinimumCost_MaximumFlow_Environment, + "Minimum_CrossingEdges_GraphPartition" : Minimum_CrossingEdges_GraphPartition_Environment, + "MinimumDirectedSpanningTree" : MinimumDirectedSpanningTree_Environment, + "Minimum_DominatingInterval" : Minimum_DominatingInterval_Environment, + "Minimum_DominatingSet" : Minimum_DominatingSet_Environment, + "Minimum_DominatingSet_Grid" : Minimum_DominatingSet_Grid_Environment, + "MinimumFibonacciRepresentation" : MinimumFibonacciRepresentation_Environment, + "MinimumHarmoniousChromaticNumber" : MinimumHarmoniousChromaticNumber_Environment, + "MinimumIntervalCoverage" : MinimumIntervalCoverage_Environment, + "Minimum_MaxAbsSlicer" : Minimum_MaxAbsSlicer_Environment, + "Minimum_MaxSlicer" : Minimum_MaxSlicer_Environment, + "MinimumRatioPath" : MinimumRatioPath_Environment, + "MinimumSpanningTree" : MinimumSpanningTree_Environment, + "MinimumSpanningTreeCounting" : MinimumSpanningTreeCounting_Environment, + "MinimumSteinerTree" : MinimumSteinerTree_Environment, + "MinimumSumDifferenceSubmatrix" : MinimumSumDifferenceSubmatrix_Environment, + "MinimumTreeWeightedDominatingAncestor" : MinimumTreeWeightedDominatingAncestor_Environment, + "MinimumUnconflictedGridKMax" : MinimumUnconflictedGridKMax_Environment, + "Minimum_VertexCover" : Minimum_VertexCover_Environment, + "MinimumWeightedSpanningTree" : MinimumWeightedSpanningTree_Environment, + "MitterTransportation" : MitterTransportation_Environment, + "MixedGraphEulerianCircuit" : MixedGraphEulerianCircuit_Environment, + "MoneyChargingGame" : MoneyChargingGame_Environment, + "MonochromeBlockCounting" : MonochromeBlockCounting_Environment, + "MonotonicStack" : MonotonicStack_Environment, + "MostComponentTreeRemovingTwoPaths" : MostComponentTreeRemovingTwoPaths_Environment, + "MostNumEdge_NonSelfIsomorphism" : MostNumEdge_NonSelfIsomorphism_Environment, + "MultiDrink" : MultiDrink_Environment, + "MultipleFlippingGame" : MultipleFlippingGame_Environment, + "Multiplication" : Multiplication_Environment, + "MYJ" : MYJ_Environment, + "NANDResultCounting" : NANDResultCounting_Environment, + "NegativeBase" : NegativeBase_Environment, + "NewNimGame" : NewNimGame_Environment, + "NextPalindromic" : NextPalindromic_Environment, + "NinePuzzle" : NinePuzzle_Environment, + "NoAdjacentGirlCounting" : NoAdjacentGirlCounting_Environment, + "NoDoubleTripleCounting" : NoDoubleTripleCounting_Environment, + "NotContainingStringCounting" : NotContainingStringCounting_Environment, + "NumberPartitionCounting" : NumberPartitionCounting_Environment, + "Numbrix" : Numbrix_Environment, + "OddVisitation" : OddVisitation_Environment, + "ODLDistance" : ODLDistance_Environment, + "PairMoreOneCounting" : PairMoreOneCounting_Environment, + "PalembangBridges" : PalembangBridges_Environment, + "PalindromePartitionCounting" : PalindromePartitionCounting_Environment, + "PalindromicSubstringNumberCounting" : PalindromicSubstringNumberCounting_Environment, + "PanSolarPanels" : PanSolarPanels_Environment, + "Path_NoGoingBack_Counting" : Path_NoGoingBack_Counting_Environment, + "Patrol" : Patrol_Environment, + "PCPPermutation" : PCPPermutation_Environment, + "PipelineArrangement" : PipelineArrangement_Environment, + "POLPolarization" : POLPolarization_Environment, + "PolyaModel" : PolyaModel_Environment, + "PolynomialFactorization" : PolynomialFactorization_Environment, + "PolynomialInterpolation" : PolynomialInterpolation_Environment, + "PolynomialMinimum" : PolynomialMinimum_Environment, + "PolynomialRemainder" : PolynomialRemainder_Environment, + "PowerCycle" : PowerCycle_Environment, + "PowerShortcut" : PowerShortcut_Environment, + "PowerNest" : PowerNest_Environment, + "PrefixConcatenation" : PrefixConcatenation_Environment, + "PrefixProductMODDistinctPermutation" : PrefixProductMODDistinctPermutation_Environment, + "PrefixSumMODDistinctPermutation" : PrefixSumMODDistinctPermutation_Environment, + "Prefixuffix" : Prefixuffix_Environment, + "PreorderTraversal" : PreorderTraversal_Environment, + "PrimeGraph_MinimumChromaticNumber" : PrimeGraph_MinimumChromaticNumber_Environment, + "ProtectingFlowers" : ProtectingFlowers_Environment, + "PythagoreanGraph_IndependentSetCounting" : PythagoreanGraph_IndependentSetCounting_Environment, + "QuadMagicItems" : QuadMagicItems_Environment, + "QuadraticFunctionSegmentation" : QuadraticFunctionSegmentation_Environment, + "QuantumLockPuzzle" : QuantumLockPuzzle_Environment, + "QueenPlacement" : QueenPlacement_Environment, + "RandomRangeMaxExpectation" : RandomRangeMaxExpectation_Environment, + "RangeConstrained_IncreasingSequence_Counting" : RangeConstrained_IncreasingSequence_Counting_Environment, + "RangeFourSequenceConstruction" : RangeFourSequenceConstruction_Environment, + "RangeShrinkingSequenceCounting" : RangeShrinkingSequenceCounting_Environment, + "RecursiveFunction" : RecursiveFunction_Environment, + "RecursiveSequenceSumConstruction" : RecursiveSequenceSumConstruction_Environment, + "RepeatSequenceLNDS" : RepeatSequenceLNDS_Environment, + "RootExtraction" : RootExtraction_Environment, + "RoundRobin" : RoundRobin_Environment, + "RoundTableAssignment" : RoundTableAssignment_Environment, + "RoyalLockCounting" : RoyalLockCounting_Environment, + "SaladBar" : SaladBar_Environment, + "SalesmanFatigue" : SalesmanFatigue_Environment, + "SameAdjacencyCounting" : SameAdjacencyCounting_Environment, + "SAT" : SAT_Environment, + "SCC_Sequence_Counting" : SCC_Sequence_Counting_Environment, + "SecretCowCode" : SecretCowCode_Environment, + "SegmentMinLengthEqual_Counting" : SegmentMinLengthEqual_Counting_Environment, + "SegmentTreeSortingCounting" : SegmentTreeSortingCounting_Environment, + "SelfPowerSequenceMOD" : SelfPowerSequenceMOD_Environment, + "SetCover" : SetCover_Environment, + "SetSplitting" : SetSplitting_Environment, + "SharedSubstringCounting" : SharedSubstringCounting_Environment, + "ShortestPath" : ShortestPath_Environment, + "ShortestPathCountConstruction" : ShortestPathCountConstruction_Environment, + "ShortestUnicolorSubstring" : ShortestUnicolorSubstring_Environment, + "SingingGirlStory" : SingingGirlStory_Environment, + "SingleStackSorting" : SingleStackSorting_Environment, + "SkaRockGarden" : SkaRockGarden_Environment, + "SkyscraperPuzzle" : SkyscraperPuzzle_Environment, + "SkyscraperSumPuzzle" : SkyscraperSumPuzzle_Environment, + "SlidingWindow" : SlidingWindow_Environment, + "SLOElephants" : SLOElephants_Environment, + "SmallestBinaryMultiple" : SmallestBinaryMultiple_Environment, + "SmallestCircle" : SmallestCircle_Environment, + "Sorting" : Sorting_Environment, + "SpiralMatrix" : SpiralMatrix_Environment, + "SplittingGame" : SplittingGame_Environment, + "SpyNetwork" : SpyNetwork_Environment, + "SquSquarks" : SquSquarks_Environment, + "SquareUndamagedPointCounting" : SquareUndamagedPointCounting_Environment, + "StarBattle" : StarBattle_Environment, + "StirlingSecond" : StirlingSecond_Environment, + "StoneGame" : StoneGame_Environment, + "StoneIntervalsGame" : StoneIntervalsGame_Environment, + "StringPartitionShuffle" : StringPartitionShuffle_Environment, + "StringReversalConstruction" : StringReversalConstruction_Environment, + "STUWell" : STUWell_Environment, + "StuntFlying" : StuntFlying_Environment, + "SubarraySumXor" : SubarraySumXor_Environment, + "SubarrayXorSum" : SubarrayXorSum_Environment, + "SubgraphIsomorphism" : SubgraphIsomorphism_Environment, + "SubmatrixSumDivisibleCounting" : SubmatrixSumDivisibleCounting_Environment, + "SubsequenceReversalLNDS" : SubsequenceReversalLNDS_Environment, + "SubsetSum" : SubsetSum_Environment, + "SubsetSumSequence" : SubsetSumSequence_Environment, + "Sudoku" : Sudoku_Environment, + "Sum_DivisorNum" : Sum_DivisorNum_Environment, + "SumGCD" : SumGCD_Environment, + "SumGCDWithIndividual" : SumGCDWithIndividual_Environment, + "SumLCM" : SumLCM_Environment, + "SumManhattan_CurvedSurface" : SumManhattan_CurvedSurface_Environment, + "SumMOD" : SumMOD_Environment, + "SumPHIInterval" : SumPHIInterval_Environment, + "SumProductDivisorNum" : SumProductDivisorNum_Environment, + "SumPseudoEuclidean" : SumPseudoEuclidean_Environment, + "SumSetMultiplication" : SumSetMultiplication_Environment, + "SumSpanningTreeGCD" : SumSpanningTreeGCD_Environment, + "SumTriangleArea" : SumTriangleArea_Environment, + "SumXorDivisorNum" : SumXorDivisorNum_Environment, + "SurvoPuzzle" : SurvoPuzzle_Environment, + "TakingPrimeGame" : TakingPrimeGame_Environment, + "TaskArrangement" : TaskArrangement_Environment, + "TetrisAttack" : TetrisAttack_Environment, + "ThreeStringCommonSubsequenceCounting" : ThreeStringCommonSubsequenceCounting_Environment, + "ThreeVertexCycleCounting" : ThreeVertexCycleCounting_Environment, + "TopologicalSort" : TopologicalSort_Environment, + "TopologicalSort_MinimalLexicographicalOrder" : TopologicalSort_MinimalLexicographicalOrder_Environment, + "Tournament_LongestPath" : Tournament_LongestPath_Environment, + "TransmissionDelay" : TransmissionDelay_Environment, + "TreeAddOneEdgeDiameter" : TreeAddOneEdgeDiameter_Environment, + "TreeCenter" : TreeCenter_Environment, + "TreeChangeOneEdgeDiameter" : TreeChangeOneEdgeDiameter_Environment, + "TreeColoring" : TreeColoring_Environment, + "Tree_DistanceEqualTriad_Counting" : Tree_DistanceEqualTriad_Counting_Environment, + "TreeDynamic_XORZeroPath" : TreeDynamic_XORZeroPath_Environment, + "TreeElimination_Expectation" : TreeElimination_Expectation_Environment, + "TreeEvenPartitioning" : TreeEvenPartitioning_Environment, + "TreeMaximumVisitedVertex" : TreeMaximumVisitedVertex_Environment, + "TreeRandomWalkExpectation" : TreeRandomWalkExpectation_Environment, + "TreeTopologicalSequenceCounting" : TreeTopologicalSequenceCounting_Environment, + "TriumphalArch" : TriumphalArch_Environment, + "TwiddlePuzzle" : TwiddlePuzzle_Environment, + "TwoSAT" : TwoSAT_Environment, + "TwoSet_AllCoprime_Counting" : TwoSet_AllCoprime_Counting_Environment, + "UndamagedSubmatrixCounting" : UndamagedSubmatrixCounting_Environment, + "ValueDiminishingSelection" : ValueDiminishingSelection_Environment, + "Vertex_KCenter" : Vertex_KCenter_Environment, + "VirusSynthesis" : VirusSynthesis_Environment, + "VisibleLine" : VisibleLine_Environment, + "WarehouseConstruction" : WarehouseConstruction_Environment, + "WeightedBinaryTree" : WeightedBinaryTree_Environment, + "WeightedLIS" : WeightedLIS_Environment, + "WhackAMole" : WhackAMole_Environment, + "WIL" : WIL_Environment, + "WYC" : WYC_Environment, + "WYRLevelingGround" : WYRLevelingGround_Environment, + "XorEquationCounting" : XorEquationCounting_Environment, + "ZeroPrefixSubsetCounting" : ZeroPrefixSubsetCounting_Environment, +} \ No newline at end of file diff --git a/server/Gym/environments/ab_program_simulation/__init__.py b/server/Gym/environments/ab_program_simulation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1f2d7997e04acb4bfb2657a69d1be1ba7358041a --- /dev/null +++ b/server/Gym/environments/ab_program_simulation/__init__.py @@ -0,0 +1 @@ +from .environment import ABProgramSimulation_Environment diff --git a/server/Gym/environments/ab_program_simulation/environment.py b/server/Gym/environments/ab_program_simulation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d8f76c6816c549cf998ec888ab017b61ff482243 --- /dev/null +++ b/server/Gym/environments/ab_program_simulation/environment.py @@ -0,0 +1,109 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class ABProgramSimulation_Environment(VerifiableEnvironment) : # Source : https://x.com/VictorTaelin/status/1776096481704804789 + prompt_template = \ +r"""A::B is a system with 4 tokens: `A#`, `#A`, `B#` and `#B`. + +An A::B program is a sequence of tokens, e.g., `B# A# #B #A B#`. + +To *compute* a program, we must rewrite neighbor tokens, using the rules (whenever two neighbor tokens have their `#` facing each-other, they must be rewritten according to the corresponding rule) : ++ `A# #A` ... becomes ... `` (nothing) ++ `A# #B` ... becomes ... `#B A#` ++ `B# #A` ... becomes ... `#A B#` ++ `B# #B` ... becomes ... `` (nothing) + +Please give the final state of the program: {program} +An example for output format: `B# A# A#` +""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the AB_Program_Simulation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + assert "max_steps" in self.parameter, "max_steps is required in parameter" + max_steps = self.parameter["max_steps"] + assert max_steps >= 1, "max_steps should be greater than or equal to 1" + + while True : + distribution = [random.randint(1, N) for _ in range(4)] + distribution = [d / sum(distribution) for d in distribution] + self.parameter["program"] = [["A#", "#A", "B#", "#B"][i] for i in random.choices(range(4), distribution, k = N)] + + current, final = self.parameter["program"].copy(), None + for step in range(max_steps) : + new_program = None + + for i in range(len(current) - 1) : + a, b = current[i], current[i + 1] + if a == "A#" and b == "#A" : + new_program = current[: i] + current[i + 2 :] + elif a == "A#" and b == "#B" : + new_program = current[: i] + ["#B", "A#"] + current[i + 2 :] + elif a == "B#" and b == "#A" : + new_program = current[: i] + ["#A", "B#"] + current[i + 2 :] + elif a == "B#" and b == "#B" : + new_program = current[: i] + current[i + 2 :] + if new_program is not None: + break + + if new_program is None : + final = current + break + else : + current = new_program + + if final is not None : + self.parameter["reference_answer"] = " ".join(final) + self.parameter["gold_answer"] = final + break + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(program = " ".join(self.parameter["program"])) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = answer.split() + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if not all(token in ("A#", "#A", "B#", "#B") for token in processed_result) : + return self.rewards["wrong_format"] + + if processed_result == self.parameter["gold_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/add_multiple_divisible_counting/__init__.py b/server/Gym/environments/add_multiple_divisible_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8c5744ece979365e1963eb42a2f065abffed4069 --- /dev/null +++ b/server/Gym/environments/add_multiple_divisible_counting/__init__.py @@ -0,0 +1 @@ +from .environment import AddMultiple_Divisible_Counting_Environment diff --git a/server/Gym/environments/add_multiple_divisible_counting/environment.py b/server/Gym/environments/add_multiple_divisible_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..dc5de9cb25e10093226250fc4aa0ff3ca0561ed7 --- /dev/null +++ b/server/Gym/environments/add_multiple_divisible_counting/environment.py @@ -0,0 +1,122 @@ +import math +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class AddMultiple_Divisible_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4466 + prompt_template = \ +r"""Please compute the number of pairs (a, b) such that: +- 1 ≤ a < b ≤ {N} +- a × b is divisible by a + b + +**Output Format:** Your final answer should be a single integer — the number of such pairs (a, b).""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the AddMultiple_Divisible_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 6, "MAX_N should be greater than or equal to 6" + + N = self.parameter["N"] = random.randint(6, MAX_N) + + + def calc(x : int, y : int) -> int : + """ + Compute + sum_{k = x+1..2*x-1} floor(y / k) + by grouping k’s with the same quotient. + """ + if y == 0 : + return 0 + a = 0 + z = x << 1 + i = x + 1 + while i < z : + q = y // i + if q == 0 : + break + j = min(y // q, z - 1) + a += (j - i + 1) * q + i = j + 1 + return a + + m = math.isqrt(N) + + mu = [0] * (m + 1) + mu[1] = 1 + is_comp = [False] * (m + 1) + primes = [] + + for i in range(2, m + 1) : + if not is_comp[i] : + primes.append(i) + mu[i] = -1 + for p in primes : + ip = i * p + if ip > m : + break + is_comp[ip] = True + if i % p == 0 : + mu[ip] = 0 + break + else : + mu[ip] = -mu[i] + + ans = 0 + for i in range(1, m + 1) : + if mu[i] == 0 : + continue + ii = i * i + top = m // i + for j in range(1, top + 1) : + y = N // (ii * j) + ans += mu[i] * calc(j, y) + assert ans > 0, "Answer should be greater than 0" + self.parameter["reference_answer"] = ans + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/addition_table/__init__.py b/server/Gym/environments/addition_table/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6c5cde3dd2e285c9625e1d54fbaaf6126415d3b1 --- /dev/null +++ b/server/Gym/environments/addition_table/__init__.py @@ -0,0 +1 @@ +from .environment import AdditionTable_Environment diff --git a/server/Gym/environments/addition_table/environment.py b/server/Gym/environments/addition_table/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b9e06a473d740725bc07aa9f3e6a1ee11e137734 --- /dev/null +++ b/server/Gym/environments/addition_table/environment.py @@ -0,0 +1,132 @@ +import random +from typing import Optional, Dict +from ...environment import VerifiableEnvironment + + +class AdditionTable_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1013 + prompt_template = \ +r"""You are given an unknown base-N number system (N is an integer ≥ 3), and {N} distinct digits {ALL_LETTERS} in that system. The digits satisfy the following equations in base-N: + +{EQUATIONS} + +Note: +- {ALL_LETTERS} are distinct digits in the range [0, N−1]. +- Expressions like ba represent base-N numbers formed by **concatenation**. For example, if a=1 and b=2, then ba = "21" in base-N. + +Your task is to find the correct base N (in decimal), and the values of {ALL_LETTERS} (also in decimal) that satisfy all the equations. + +Output Format: +Your final answer should be a single line containing N, {ALL_LETTERS} (all in decimal), separated by **spaces**. +Example: `{N_plus_1} {EXAMPLE_1}` (do **NOT** include the backticks or quotes); this means N={N_plus_1}, {EXAMPLE_2}. +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, wrong_N : float = 0.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the AdditionTable_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "wrong_N" : wrong_N, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N in range(3, 26 + 1), "N should be in the range [3, 26]" + + digit2letter = self.parameter["digit2letter"] = [chr(i) for i in range(97, 97 + N)] + random.shuffle(digit2letter) + + letter2digit = {letter : digit for digit, letter in enumerate(digit2letter)} + self.parameter["reference_answer"] = "{} {}".format(N, " ".join([str(letter2digit[chr(i)]) for i in range(97, 97 + N)])) + + + def convert_to_expression(self, n : int) -> str : + N = self.parameter["N"] + + if n == 0 : + return self.parameter["digit2letter"][0] + else : + expression = "" + while n > 0 : + digit = n % N + expression = self.parameter["digit2letter"][digit] + expression + n //= N + return expression + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + ALL_LETTERS = ", ".join([chr(i) for i in range(97, 97 + N)]) + + digit2letter = self.parameter["digit2letter"] + letter2digit = {letter : digit for digit, letter in enumerate(digit2letter)} + + EQUATIONS = [] + for a_ascii in range(97, 97 + N) : + for b_ascii in range(a_ascii, 97 + N) : + a = chr(a_ascii) + b = chr(b_ascii) + EQUATIONS.append("{} + {} = {}".format(a, b, self.convert_to_expression(letter2digit[a] + letter2digit[b]))) + EQUATIONS = "\n".join(EQUATIONS) + + return self.prompt_template.format( + ALL_LETTERS = ALL_LETTERS, + EQUATIONS = EQUATIONS, + N = N, + N_plus_1 = N + 1, + EXAMPLE_1 = " ".join([str(_) for _ in range(N)]), + EXAMPLE_2 = ", ".join(["{}={}".format(chr(i), i - 97) for i in range(97, 97 + N)]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[Dict] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if len(answer_array) != self.parameter["N"] + 1 : + return dict() + N = answer_array[0] + digits = answer_array[1 :] + return dict(N = N, digits = digits) + except ValueError : + return dict() + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not processed_result : + return self.rewards["invalid_answer"] + + N = processed_result["N"] + if N != self.parameter["N"] : + return self.rewards["wrong_N"] + + predict_digits = processed_result["digits"] + assert len(predict_digits) == N, "digits should have the same length as N" + + letter2digit = {letter : digit for digit, letter in enumerate(self.parameter["digit2letter"])} + assert len(letter2digit) == N, "letter2digit should have the same length as N" + gold_digits = [letter2digit[chr(i)] for i in range(97, 97 + N)] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(float(a == b) for a, b in zip(gold_digits, predict_digits)) / N) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * all(a == b for a, b in zip(gold_digits, predict_digits)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/almost_complete_graph_cycle_counting/__init__.py b/server/Gym/environments/almost_complete_graph_cycle_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0bc68fa8747724664567d5aca3af2423b775f92f --- /dev/null +++ b/server/Gym/environments/almost_complete_graph_cycle_counting/__init__.py @@ -0,0 +1 @@ +from .environment import AlmostCompleteGraphCycleCounting_Environment diff --git a/server/Gym/environments/almost_complete_graph_cycle_counting/environment.py b/server/Gym/environments/almost_complete_graph_cycle_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..2080f8b340a96d71c24d3e142c56acf0e2dff29c --- /dev/null +++ b/server/Gym/environments/almost_complete_graph_cycle_counting/environment.py @@ -0,0 +1,94 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class AlmostCompleteGraphCycleCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3862 + prompt_template = \ +r"""Consider a graph with {N} vertices labeled from 1 to {N}. Every pair of vertices is connected by an undirected edge, except for the edge between vertices 1 and {N} (so the graph has {N} × ({N} - 1) / 2 - 1 edges). + +What's the number of **simple cycles** in this graph? A simple cycle must: +- Have at least 3 vertices, +- Contain no repeated vertices or edges, +- Be considered the same as any cycle with the same set of edges (regardless of order or starting point); for example, `(1, 2, 3, 4)` and `(2, 1, 4, 3)` are the same, but `(1, 2, 3, 4)` and `(2, 1, 3, 4)` are different. +Output the answer modulo {MOD}.""" + + def __init__(self, + max_MOD : int = 1000000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the AlmostCompleteGraphCycleCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 4, "MAX_N should be greater than or equal to 4" + + N = self.parameter["N"] = random.randint(4, MAX_N) + + MOD = self.parameter["MOD"] = 2 * random.randint(1, self.max_MOD // 2) + 1 + + + INV2 = (MOD + 1) // 2 + + def calc(x, y, s, N): + """ + x: current count of cycles for K_s + y: current count of paths of length 1 (one edge) in K_s + s: starting i value (we've precomputed up to K_s) + N: target N + """ + for i in range(s, N): + # compute ((i-1)*(i-2)/2) % MOD efficiently + half = ((i - 1) % MOD) * ((i - 2) % MOD) % MOD * INV2 % MOD + x = (x + y * half) % MOD + y = (y * ((i - 2) % MOD) + 1) % MOD + # finally add the contribution for closing the cycle at N + half_n = ((N - 2) % MOD) * ((N - 3) % MOD) % MOD * INV2 % MOD + return (x + y * half_n) % MOD + + if N <= 3 : + self.parameter["reference_answer"] = 0 + else : + self.parameter["reference_answer"] = calc(1, 2, 4, N) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/and_or_sequence_counting/__init__.py b/server/Gym/environments/and_or_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e492737f992b17ae240ad4f056740eb577526046 --- /dev/null +++ b/server/Gym/environments/and_or_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .environment import AndOr_Sequence_Counting_Environment diff --git a/server/Gym/environments/and_or_sequence_counting/environment.py b/server/Gym/environments/and_or_sequence_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..986a4adcde3699eac013623f5603ac614f63c051 --- /dev/null +++ b/server/Gym/environments/and_or_sequence_counting/environment.py @@ -0,0 +1,147 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class AndOr_Sequence_Counting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an integer array `A` of length {N}: +{A} + +Please count the number of valid integer arrays `B` of length {N} that satisfy the following conditions: +- For all indices 0 <= i <= {N_minus_1}, the value B[i] must be in the range: 0 <= B[i] < 2^{M} = {power_2_M} +- For all indices 0 <= i < {N_minus_1}, the following bitwise conditions hold: + - (A[i] & B[i]) <= (A[i + 1] & B[i + 1]) + - (A[i] | B[i]) >= (A[i + 1] | B[i + 1]) + - (Here, `&` is the bitwise AND operator and `|` is the bitwise OR operator.) + +**Output Format:** Your final answer should be a single integer — the number of valid arrays `B` that satisfy all the above conditions.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the AndOr_Sequence_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 1, "M should be greater than or equal to 1" + + A = self.parameter["A"] = [random.randint(0, 2 ** M - 1) for i in range(N)] + + + def dp1(N, M, A) : + F = [[[0] * N for _ in range(N)] for _ in range(2)] + for l in range(N) : + for r in range(l, N) : + F[1][l][r] = 1 + + for b in range(M + 1) : + now = b % 2 + lst = now ^ 1 + + for i in range(N) : + for j in range(N) : + F[now][i][j] = 0 + + Pre = [0] * (N + 1) + for i in range(1, N + 1) : + Pre[i] = Pre[i - 1] + ((A[i - 1] >> b) & 1) + + for l in range(N) : + for r in range(l, N) : + for x in range(l - 1, r + 1) : + if Pre[r + 1] - Pre[x + 1] != (r - x) : + continue + + left_count = F[lst][l][x] if x >= l else 1 + right_count = F[lst][x + 1][r] if x+1 <= r else 1 + F[now][l][r] += left_count * right_count + + return F[M % 2][0][N - 1] + + def dp2(N, M, A) : + F = [[[0] * N for _ in range(N)] for _ in range(2)] + for l in range(N) : + for r in range(l, N) : + F[1][l][r] = 1 + + for b in range(M + 1) : + now = b % 2 + lst = now ^ 1 + for i in range(N) : + for j in range(N) : + F[now][i][j] = 0 + + Pre = [0] * (N + 1) + for i in range(1, N + 1) : + Pre[i] = Pre[i - 1] + ((A[i - 1] >> b) & 1) + + for l in range(N) : + for r in range(l, N) : + for x in range(l - 1, r + 1) : + if Pre[r + 1] - Pre[x + 1] != 0: + continue + + left_count = F[lst][l][x] if x >= l else 1 + right_count = F[lst][x + 1][r] if x + 1 <= r else 1 + F[now][l][r] += left_count * right_count + + return F[M % 2][0][N - 1] + + self.parameter["reference_answer"] = dp1(N, M - 1, A) * dp2(N, M - 1, A) + + def _prompt_generate(self) -> str : + N, M = self.parameter["N"], self.parameter["M"] + return self.prompt_template.format( + N = self.parameter["N"], + N_minus_1 = self.parameter["N"] - 1, + M = self.parameter["M"], + power_2_M = 2 ** self.parameter["M"], + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/anti_palindromic_substring_counting/__init__.py b/server/Gym/environments/anti_palindromic_substring_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..801e71c20e9fa6be542741296fd75b8a625e4ba8 --- /dev/null +++ b/server/Gym/environments/anti_palindromic_substring_counting/__init__.py @@ -0,0 +1 @@ +from .environment import AntiPalindromicSubstringCounting_Environment diff --git a/server/Gym/environments/anti_palindromic_substring_counting/environment.py b/server/Gym/environments/anti_palindromic_substring_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..23ec86d2acc712d98040a1120565dfeab52df630 --- /dev/null +++ b/server/Gym/environments/anti_palindromic_substring_counting/environment.py @@ -0,0 +1,142 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class AntiPalindromicSubstringCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3501 + prompt_template = \ +r"""We define an **anti-palindromic binary string** as a binary string such that its reverse is equal to the bitwise complement of the original string (i.e., '0' becomes '1' and '1' becomes '0'). For example, `000111` is anti-palindromic because its reverse is `111000`, which is the bitwise complement of `000111`. But `1001` is not, because its reverse is `1001`, while its flipped version is `0110`. + +You are given a binary string: {S} +Please count the number of **contiguous substrings** of `S` that are anti-palindromic. Two substrings are considered different if they appear at different positions in `S`. Output a single integer — the number of anti-palindromic substrings.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the AntiPalindromicSubstringCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + endpoints = random.sample(range(1, N), random.randint(0, N - 1)) + endpoints.sort() + endpoints = [0] + endpoints + [N] + + one_probability = random.random() + + S = "" + for i in range(len(endpoints) - 1) : + length = endpoints[i + 1] - endpoints[i] + if length % 2 == 0 : + half = "".join("1" if random.random() < one_probability else "0" for _ in range(length // 2)) + S += half + "".join("1" if c == "0" else "0" for c in reversed(half)) + else : + S += "".join("1" if random.random() < one_probability else "0" for _ in range(length)) + self.parameter["S"] = S + assert len(S) == N, f"Generated string length {len(S)} does not match N {N}" + + + # Build the “S” array from the C++: + # S[0] = '$', S[1] = '#', then for each char: c, '#', and finally a trailing '$' + T = ['$','#'] + for c in S: + T.append(c) + T.append('#') + T.append('$') + + length = len(T) + tot = length - 2 # corresponds to C++ `tot` (1 + 2*N) + + # P[i] will hold the Manacher‐style radius at center i + P = [0] * length + + # inversion map for the 0/1 bits and the separator '#' + inv = {'0':'1', '1':'0', '#':'#'} + + pos = 1 # center of the rightmost-reaching antisymmetry + mx = 1 # its right boundary = pos + P[pos] + ans = 0 + + # only odd i (the '#' positions) correspond to even‐length substrings + for i in range(1, tot+1, 2): + if i < mx: + mirror = 2*pos - i + # same as: len[i] = min(mx - i, len[mirror]) + P[i] = min(mx - i, P[mirror]) + else: + P[i] = 1 + + # expand as long as T[i + P] == inv[T[i - P]] + while True: + left = i - P[i] + right = i + P[i] + # boundary guard + if left < 0 or right >= length: + break + # must both be in our inv‐map (i.e. '#','0','1') + cL = T[left] + cR = T[right] + if cL not in inv or cR not in inv: + break + if cR == inv[cL]: + P[i] += 1 + else: + break + + # update the farthest-reaching center + if i + P[i] > mx: + mx = i + P[i] + pos = i + + # each full two‐step in the radius == one antisymmetric substring + ans += (P[i] >> 1) + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(S = self.parameter["S"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * int(processed_result == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/axis_k_center/__init__.py b/server/Gym/environments/axis_k_center/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9ffd2d01d622e98907202a244a4f8405b3abf239 --- /dev/null +++ b/server/Gym/environments/axis_k_center/__init__.py @@ -0,0 +1 @@ +from .environment import Axis_KCenter_Environment diff --git a/server/Gym/environments/axis_k_center/environment.py b/server/Gym/environments/axis_k_center/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d95676bdee1c3582947f7862543d821ffc8a5263 --- /dev/null +++ b/server/Gym/environments/axis_k_center/environment.py @@ -0,0 +1,129 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Axis_KCenter_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/K4767 + prompt_template = \ +r"""You are given {N} points on a line, labeled from 0 to {N_minus_1}. Their positions (from left to right) are: {X} + +Please select a set of {K} distinct points. Try your best to minimize the total distance from all points to their nearest selected point (the distance is the absolute difference between positions). + +**Output Format:** Your final answer should be a single line containing the indices of the selected {K} points in any order, separated by spaces.""" + + def __init__(self, + position_multiple : int = 5, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Axis_KCenter_Environment instance. + """ + super().__init__(**kwargs) + + self.position_multiple = position_multiple + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + K = self.parameter["K"] = random.randint(1, N - 1) + + X = self.parameter["X"] = random.sample(range(N * self.position_multiple + 1), N) + X.sort() + + + INF = N * (X[-1] - X[0] + 1) + + # Krecompute w[l][r]: cost of one post office for villages l..r (inclusive, 0-indexed) + w = [[0] * N for _ in range(N)] + for l in range(N): + for r in range(l + 1, N): + m = (l + r) // 2 + w[l][r] = w[l][r - 1] + (X[r] - X[m]) + + # dp[i][j]: minimum total distance covering the first i villages with j post offices + dp = [[INF] * (K + 1) for _ in range(N + 1)] + # d[i][j]: the k giving the optimum for dp[i][j], for Knuth optimization + d = [[0] * (K + 1) for _ in range(N + 2)] + + dp[0][0] = 0 + + for j in range(1, K + 1): + d[N + 1][j] = N + for i in range(N, 0, -1): + best = INF + argk = 0 + start = d[i][j - 1] + end = d[i + 1][j] + if end > i - 1: + end = i - 1 + for k in range(start, end + 1): + cost = dp[k][j - 1] + w[k][i - 1] + if cost < best: + best = cost + argk = k + dp[i][j] = best + d[i][j] = argk + + # Output the result: all N villages with K post offices + self.parameter["gold_answer"] = dp[N][K] + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + K = self.parameter["K"], + X = " ".join(map(str, self.parameter["X"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + selected_points = processed_result + + if len(selected_points) != len(set(selected_points)) : + return self.rewards["invalid_solution"] + if len(selected_points) != self.parameter["K"] : + return self.rewards["invalid_solution"] + if not all(0 <= u < self.parameter["N"] for u in selected_points) : + return self.rewards["invalid_solution"] + + answer = sum(min(abs(self.parameter["X"][u] - self.parameter["X"][v]) for v in selected_points) for u in range(self.parameter["N"])) + gold = self.parameter["gold_answer"] + assert gold <= answer, "gold should be less than or equal to answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/baj_bytecomputer/__init__.py b/server/Gym/environments/baj_bytecomputer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2d671f3b75389579e735aaa15608b75cc15bbf26 --- /dev/null +++ b/server/Gym/environments/baj_bytecomputer/__init__.py @@ -0,0 +1 @@ +from .environment import BAJBytecomputer_Environment diff --git a/server/Gym/environments/baj_bytecomputer/environment.py b/server/Gym/environments/baj_bytecomputer/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..da8d064736a4e24b54d9e2c1559312acc31e8e1c --- /dev/null +++ b/server/Gym/environments/baj_bytecomputer/environment.py @@ -0,0 +1,109 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class BAJBytecomputer_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3558 + prompt_template = \ +r"""You are given an array X of length {N}, where each element is initially -1, 0, or +1: {X} +You may perform the following operation any number of times: choose an index i (1 ≤ i < {N}), and update X[i + 1] := X[i + 1] + X[i]. Your goal is to make the array non-decreasing, i.e., X[1] ≤ X[2] ≤ ... ≤ X[{N}]; please output the **minimum number of operations** required to achieve this.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = 1.0, incorrect_answer : float = 0.0, + **kwargs): + """ + Initialize the BAJBytecomputer_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "correct_answer": correct_answer, + "incorrect_answer": incorrect_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + while True : + distribution = [random.randint(1, N) for _ in range(3)] + X = self.parameter["X"] = [random.choices([-1, 0, 1], weights = distribution)[0] for _ in range(N)] + + + # Compute a suitable "infinity" based on the maximum possible operations: + # At most 2 operations per element (for N-1 transitions), so 2*N + a small buffer + INF = 2 * N + 5 + + # The three possible values after operations + val = [-1, 0, 1] + + # dp[j] = minimum operations to make the previous element equal to val[j] + # Initialize for the first element + prev = [INF] * 3 + prev[X[0] + 1] = 0 + + # Iterate through the sequence + for i in range(1, N): + curr = [INF] * 3 + x = X[i] + for j in range(3): + ops_so_far = prev[j] + if ops_so_far >= INF: + continue + prev_val = val[j] + + # 0 operations on x: new_x = x + new_x = x + if new_x >= prev_val: + curr[new_x + 1] = min(curr[new_x + 1], ops_so_far) + + # 1 operation on x: new_x = x + prev_val + new_x = x + prev_val + if -1 <= new_x <= 1 and new_x >= prev_val: + curr[new_x + 1] = min(curr[new_x + 1], ops_so_far + 1) + + # 2 operations on x: new_x = x + 2 * prev_val + new_x = x + 2 * prev_val + if -1 <= new_x <= 1 and new_x >= prev_val: + curr[new_x + 1] = min(curr[new_x + 1], ops_so_far + 2) + + prev = curr + + # The answer is the minimum operations to end with any of {-1,0,1} + ans = min(prev) + if ans < INF: + self.parameter["reference_answer"] = ans + break + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + X = ", ".join("X[{}]={}".format(i + 1, Xi) for i, Xi in enumerate(self.parameter["X"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["incorrect_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/banned_point_superset_path_counting/__init__.py b/server/Gym/environments/banned_point_superset_path_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b2060a65fdf1ff9a50adf37700de79c1fc14d52a --- /dev/null +++ b/server/Gym/environments/banned_point_superset_path_counting/__init__.py @@ -0,0 +1 @@ +from .environment import BannedPointSupersetPathCounting_Environment diff --git a/server/Gym/environments/banned_point_superset_path_counting/environment.py b/server/Gym/environments/banned_point_superset_path_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f1d6800a51a78648721f980c91d8104def841bb1 --- /dev/null +++ b/server/Gym/environments/banned_point_superset_path_counting/environment.py @@ -0,0 +1,170 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class BannedPointSupersetPathCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3734 + prompt_template = \ +r"""In a three-dimensional space, you start at point (0, 0, 0) and want to reach the point ({N}, {M}, {R}). At each step, if you are currently at (x, y, z), you may move to a new (different from the current one) point of one of the following types: +1. (x', y, z) such that x AND x' = x +2. (x, y', z) such that y AND y' = y +3. (x, y, z') such that z AND z' = z +(AND refers to the bitwise AND operation.) + +You are **not allowed** to visit any of the following points: +{obstacles} + +Please count the number of distinct valid paths from (0, 0, 0) to ({N}, {M}, {R}) that avoid all forbidden points. Output the result modulo {MOD}.""" + + def __init__(self, + max_MOD : int = 10000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) -> None: + """ + Initialize the BannedPointSupersetPathCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format": wrong_format, + "wrong_range": wrong_range, + "correct_answer": correct_answer, + "wrong_answer": wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_M_R" in self.parameter, "MAX_N_M_R is required in parameter" + MAX_N_M_R = self.parameter["MAX_N_M_R"] + assert MAX_N_M_R >= 1, "MAX_N_M_R should be greater than or equal to 1" + + while True : + N, M, R = self.parameter["N"], self.parameter["M"], self.parameter["R"] = random.randint(0, MAX_N_M_R), random.randint(0, MAX_N_M_R), random.randint(0, MAX_N_M_R) + if (2 ** N.bit_count()) * (2 ** M.bit_count()) * (2 ** R.bit_count()) - 2 >= 1 : + break + + assert "MAX_O" in self.parameter, "MAX_O is required in parameter" + MAX_O = self.parameter["MAX_O"] + assert MAX_O >= 1, "MAX_O should be greater than or equal to 1" + MAX_O = min(MAX_O, (2 ** N.bit_count()) * (2 ** M.bit_count()) * (2 ** R.bit_count()) - 2) + O = self.parameter["O"] = random.randint(1, MAX_O) + + def convert_to_bits(x) -> List[int] : + result = [] + bit = 1 + while bit <= x : + if x & bit : + result.append(bit) + bit <<= 1 + return result + N_bits, M_bits, R_bits = convert_to_bits(N), convert_to_bits(M), convert_to_bits(R) + def random_subset(bits : List[int]) -> int : + bits = random.sample(bits, random.randint(0, len(bits))) + return sum(bits) + + obstacles = set() + while len(obstacles) < O : + x, y, z = random_subset(N_bits), random_subset(M_bits), random_subset(R_bits) + if (x, y, z) != (0, 0, 0) and (x, y, z) != (N, M, R) and (x, y, z) not in obstacles: + obstacles.add((x, y, z)) + obstacles = list(obstacles) + random.shuffle(obstacles) + self.parameter["obstacles"] = obstacles.copy() + + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + points = [(0, 0, 0)] + obstacles + points.sort() # lex order by x, then y, then z + points.append((N, M, R)) + total = len(points) + + # Determine needed bit‐count dimensions + dx = N.bit_count() + dy = M.bit_count() + dz = R.bit_count() + max_d = max(dx, dy, dz) + + # Precompute binomial coefficients up to max_d + binom = [[0] * (max_d + 1) for _ in range(max_d + 1)] + for i in range(max_d + 1): + binom[i][0] = 1 + for j in range(1, i + 1): + binom[i][j] = (binom[i - 1][j - 1] + binom[i - 1][j]) % MOD + + # Precompute f[x][y][z]: number of ways from (0,0,0) to a diff‐vector with + # x one‐bit‐flips in X, y flips in Y, z flips in Z (ignoring obstacles). + f = [[[0] * (dz + 1) for _ in range(dy + 1)] for __ in range(dx + 1)] + f[0][0][0] = 1 + for x in range(dx + 1): + for y in range(dy + 1): + for z in range(dz + 1): + if x == y == z == 0: + continue + val = 0 + # transitions increasing X + for i in range(x): + val = (val + f[i][y][z] * binom[x][i]) % MOD + # transitions increasing Y + for j in range(y): + val = (val + f[x][j][z] * binom[y][j]) % MOD + # transitions increasing Z + for k in range(z): + val = (val + f[x][y][k] * binom[z][k]) % MOD + f[x][y][z] = val + + # DP over the sorted points + # g[i] = (−1) * sum_{j < i, p[j] ⊆ p[i]} g[j] * f[ popcount differences ] + g = [0] * total + g[0] = 1 # only one way to stay at the origin + for i in range(1, total): + xi, yi, zi = points[i] + acc = 0 + for j in range(i): + xj, yj, zj = points[j] + # check subset on all three coordinates + if (xj & xi) == xj and (yj & yi) == yj and (zj & zi) == zj: + bx = (xi ^ xj).bit_count() + by = (yi ^ yj).bit_count() + bz = (zi ^ zj).bit_count() + acc = (acc + g[j] * f[bx][by][bz]) % MOD + g[i] = (-acc) % MOD + + # The answer is -g[last] mod MOD, which recovers the positive sum + self.parameter["reference_answer"] = (-g[-1]) % MOD + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + R = self.parameter["R"], + obstacles = "\n".join("({}, {}, {})".format(x, y, z) for x, y, z in self.parameter["obstacles"]), + MOD = self.parameter["MOD"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/banyan_heart/__init__.py b/server/Gym/environments/banyan_heart/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7b63079086e7f7d6d7d08d1be99d934104650aeb --- /dev/null +++ b/server/Gym/environments/banyan_heart/__init__.py @@ -0,0 +1 @@ +from .environment import BanyanHeart_Environment diff --git a/server/Gym/environments/banyan_heart/environment.py b/server/Gym/environments/banyan_heart/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8da987ccd396f44a83e745ad451e242b71abdd46 --- /dev/null +++ b/server/Gym/environments/banyan_heart/environment.py @@ -0,0 +1,165 @@ +import random +import networkx +from typing import Optional +from ...environment import VerifiableEnvironment + + +class BanyanHeart_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""We use the following process to generate a tree with {N} vertices labeled from 1 to {N}: +- Initially, the tree contains only vertex 1, and its **heart vertex** is also 1. +- At each step, we add a new vertex `i` (2 ≤ i ≤ {N}) and connect it to an existing vertex with an undirected edge. Then, the heart vertex moves one step toward `i` (i.e., it moves to the neighbor that is closer to `i`). +- This process continues until all {N} vertices have been added. + +The final tree has the following edges: +{edges} + +Can you determine which vertices could be the heart vertex after the process is completed? Output a single line with {N} characters (either `T` or `F`) without separators, where the i-th character is `T` if vertex i can be the heart vertex, and `F` otherwise.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(intersection/union)^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the BanyanHeart_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + edges = self.parameter["edges"] = [] + permutations = list(range(1, N + 1)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v)) + random.shuffle(edges) + + for u, v in edges : + assert 1 <= u < v <= N + assert len(edges) == len(set(edges)) == N - 1 + + tree = networkx.Graph() + tree.add_edges_from(edges) + assert networkx.is_tree(tree) + + + # Build adjacency list dynamically + adjacency = [[] for _ in range(N + 1)] + for u, v in edges: + adjacency[u].append(v) + adjacency[v].append(u) + + # Arrays (1..N); index 0 acts as a dummy node + dep = [0] * (N + 1) + siz = [0] * (N + 1) + hson = [0] * (N + 1) + hson2 = [0] * (N + 1) + f = [0] * (N + 1) + ans = [False] * (N + 1) + + # cmp function: return the index with larger siz + def cmp(x, y): + return x if siz[x] > siz[y] else y + + # Iterative dfs1: compute dep, siz, hson, hson2, f + stack = [(1, 0, 0)] # (u, parent, state) state 0=enter, 1=exit + dep[0] = 0 + while stack: + u, fa, state = stack.pop() + if state == 0: + dep[u] = dep[fa] + 1 + stack.append((u, fa, 1)) + for v in adjacency[u]: + if v == fa: + continue + stack.append((v, u, 0)) + else: + # post-order processing + s = 1 + h1 = 0 + h2 = 0 + for v in adjacency[u]: + if v == fa: + continue + s += siz[v] + if siz[v] > siz[h1]: + h2 = h1 + h1 = v + elif siz[v] > siz[h2]: + h2 = v + siz[u] = s + hson[u] = h1 + hson2[u] = h2 + + if f[h1] <= (siz[u] - 1 - siz[h1]): + fv = (siz[u] - 1) % 2 + else: + fv = f[h1] - (siz[u] - 1 - siz[h1]) + f[u] = fv + 1 + + # Iterative dfs2: compute ans + stack = [(1, 0, 0)] # (u, parent, h) + while stack: + u, fa, h = stack.pop() + tmp = cmp(hson[u], h) + if f[tmp] <= N - dep[u] - siz[tmp]: + ans[u] = ((N & 1) == (dep[u] & 1)) + for v in adjacency[u]: + if v == fa: + continue + if v == hson[u]: + h_child = cmp(hson2[u], h) + else: + h_child = cmp(hson[u], h) + stack.append((v, u, h_child)) + + self.parameter["reference_answer"] = "".join("T" if ans[i] else "F" for i in range(1, N + 1)) + assert "T" in self.parameter["reference_answer"], "At least one vertex should be able to be the heart vertex" + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + answer = answer.strip() + if not(len(answer) == self.parameter["N"] and all(c in "TF" for c in answer)) : + return None + return answer + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + intersection = sum((a == "T" and b == "T") for a, b in zip(processed_result, self.parameter["reference_answer"])) + union = sum((a == "T" or b == "T") for a, b in zip(processed_result, self.parameter["reference_answer"])) + assert intersection <= union, "intersection should not exceed union" + + if self.rewards["rewarding_strategy"] == "(intersection/union)^beta" : + return ((intersection / union) ** self.rewards["rewarding_beta"]) * self.rewards["rewarding_weight"] + elif self.rewards["rewarding_strategy"] == "intersection=union" : + return self.rewards["rewarding_weight"] * (intersection == union) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/bez_minimalist_security/__init__.py b/server/Gym/environments/bez_minimalist_security/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e4067d415a7d7befc5faef6010923ab759270559 --- /dev/null +++ b/server/Gym/environments/bez_minimalist_security/__init__.py @@ -0,0 +1 @@ +from .environment import BEZMinimalistSecurity_Environment diff --git a/server/Gym/environments/bez_minimalist_security/environment.py b/server/Gym/environments/bez_minimalist_security/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..ac91d39a9e84add98174a124364ad1760e72a6ec --- /dev/null +++ b/server/Gym/environments/bez_minimalist_security/environment.py @@ -0,0 +1,221 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class BEZMinimalistSecurity_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3544 + prompt_template = \ +r"""There is an array P of length {N}. Initially, P is: {P} + +Now we want to construct a new array P' of length {N}, where 0 <= P'[i] <= P[i] for all i. Additionally, there are some constraints of the form P'[u] + P'[v] = w, where u and v are indices and w is a constant (it is guaranteed that P[u] + P[v] >= w). The constraints are: +{constraints} + +Please output P'[0], P'[1], ..., P'[{N_minus_1}], separated by spaces, such that they satisfy all the constraints and their sum is {minimized_or_maximized}.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, + rewarding_strategy_min : str = "(gold/answer)^beta", rewarding_weight_min : float = +1.0, rewarding_beta_min : float = 5.0, + rewarding_strategy_max : str = "(answer/gold)^beta", rewarding_weight_max : float = +1.0, rewarding_beta_max : float = 5.0, + **kwargs) : + """ + Initialize the BEZMinimalistSecurity_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy_max" : rewarding_strategy_max, + "rewarding_weight_max" : rewarding_weight_max, + "rewarding_beta_max" : rewarding_beta_max, + "rewarding_strategy_min" : rewarding_strategy_min, + "rewarding_weight_min" : rewarding_weight_min, + "rewarding_beta_min" : rewarding_beta_min, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be at least 3" + + P_prime = [random.randint(0, N) for _ in range(N)] + + assert "edge_ratio" in self.parameter, "edge_ratio is required in parameter" + edge_ratio = self.parameter["edge_ratio"] + edges = self.parameter["edges"] = random.sample([(u, v, P_prime[u] + P_prime[v]) for u in range(N) for v in range(u + 1, N)], max(1, min(N * (N - 1) // 2, int(edge_ratio * N)))) + random.shuffle(edges) + for u, v, w in edges : + assert 0 <= u < v < N + assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique" + + P = self.parameter["P"] = [P_prime_u + random.randint(0, N) for P_prime_u in P_prime] + + + # Build adjacency list (0-indexed) + adjacency = [[] for _ in range(N)] + for u, v, w in edges: + adjacency[u].append((v, w)) + adjacency[v].append((u, w)) + + vis = [False] * N + sgn = [0] * N + cons = [0] * N + q = [0] * N + mn = 0 + mx = 0 + + def wa() : + assert False, "Invalid solution" + + def dfs(u): # Depth-first search on component + nonlocal fix + vis[u] = True + stc.append(u) + # Early exit if constraint too large + if cons[u] > 10**6: + wa() + for v, w in adjacency[u]: + if not vis[v]: + sgn[v] = -sgn[u] + cons[v] = w - cons[u] + dfs(v) + else: + if sgn[u] == sgn[v]: + res = w - cons[u] - cons[v] + # Must be even + if res & 1: + wa() + denom = 2 * sgn[u] + res //= denom + # Check valid fixed value + if res < 0 or res > P[anc] or (fix is not None and fix != res): + wa() + fix = res + else: + # Sum of constants must match + if cons[u] + cons[v] != w: + wa() + + # Process each connected component + for i in range(N): + if not vis[i]: + stc = [] # nodes in current component + anc = i # anchor node for fixed value range + fix = None # fixed solution parameter + sgn[i] = 1 # sign for anchor + cons[i] = 0 # constant offset for anchor + dfs(i) + + if fix is not None: + # Unique solution determined by `fix` + for u in stc: + q[u] = sgn[u] * fix + cons[u] + delta = P[u] - q[u] + mn += delta + mx += delta + if q[u] < 0 or q[u] > P[u]: + wa() + # Verify edges + for u in stc: + for v, w in adjacency[u]: + if q[u] + q[v] != w: + wa() + else: + # Range of valid `fix` values [l, r] + l, r = 0, P[anc] + for u in stc: + if sgn[u] == 1: + l = max(l, -cons[u]) + r = min(r, P[u] - cons[u]) + else: + l = max(l, cons[u] - P[u]) + r = min(r, cons[u]) + if l > r: + wa() + # Compute sum of reductions for minimal `fix = l` + base_sum = 0 + tsign = 0 + for u in stc: + base_sum += P[u] - (l * sgn[u] + cons[u]) + tsign -= sgn[u] + # Depending on tsign, extremes at l or r + if tsign > 0: + mx += base_sum + tsign * (r - l) + mn += base_sum + else: + mx += base_sum + mn += base_sum + tsign * (r - l) + + self.parameter["minimized_or_maximized"] = random.choice(["minimized", "maximized"]) + if self.parameter["minimized_or_maximized"] == "minimized" : + self.parameter["gold_answer"] = sum(P) - mx + elif self.parameter["minimized_or_maximized"] == "maximized" : + self.parameter["gold_answer"] = sum(P) - mn + else : + raise ValueError("minimized_or_maximized should be either 'minimized' or 'maximized'") + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + P = " ".join("P[{}]={}".format(i, P_i) for i, P_i in enumerate(self.parameter["P"])), + constraints = "\n".join("P'[{}] + P'[{}] = {}".format(u, v, w) for u, v, w in self.parameter["edges"]), + minimized_or_maximized = self.parameter["minimized_or_maximized"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + P_prime = processed_result + if len(P_prime) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(0 <= P_prime_u <= P_u for P_prime_u, P_u in zip(P_prime, self.parameter["P"])) : + return self.rewards["invalid_solution"] + if not all(P_prime[u] + P_prime[v] == w for u, v, w in self.parameter["edges"]) : + return self.rewards["invalid_solution"] + + gold, answer = self.parameter["gold_answer"], sum(P_prime) + if self.parameter["minimized_or_maximized"] == "minimized" : + assert 0 <= gold <= answer, "For minimization, answer should be greater than 0 and at least as large as the gold answer" + if self.rewards["rewarding_strategy_min"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "If answer is 0, gold should also be 0" + return self.rewards["rewarding_weight_min"] * 1.0 + return self.rewards["rewarding_weight_min"] * ((gold / answer) ** self.rewards["rewarding_beta_min"]) + elif self.rewards["rewarding_strategy_min"] == "gold=answer" : + return self.rewards["rewarding_weight_min"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_min"])) + elif self.parameter["minimized_or_maximized"] == "maximized" : + assert 0 <= answer <= gold, "For maximization, answer should be greater than 0 and at most as large as the gold answer" + if self.rewards["rewarding_strategy_max"] == "(answer/gold)^beta" : + if gold == 0 : + assert answer == 0, "If gold is 0, answer should also be 0" + return self.rewards["rewarding_weight_max"] * 1.0 + return self.rewards["rewarding_weight_max"] * ((answer / gold) ** self.rewards["rewarding_beta_max"]) + elif self.rewards["rewarding_strategy_max"] == "gold=answer" : + return self.rewards["rewarding_weight_max"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_max"])) + else : + assert False, "minimize_or_maximize should be either 'minimize' or 'maximize'" + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/bezout_identity/__init__.py b/server/Gym/environments/bezout_identity/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2c6cc0f6694ce7226a501ef6e1107bab1bbfe67d --- /dev/null +++ b/server/Gym/environments/bezout_identity/__init__.py @@ -0,0 +1 @@ +from .environment import BezoutIdentity_Environment diff --git a/server/Gym/environments/bezout_identity/environment.py b/server/Gym/environments/bezout_identity/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..1230967719223dce36dd19197bf78fca784fa4d8 --- /dev/null +++ b/server/Gym/environments/bezout_identity/environment.py @@ -0,0 +1,134 @@ +import math +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class BezoutIdentity_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an array of length {N}, denoted as A[1], ..., A[{N}]. Please find **integers** X[1], ..., X[{N}] such that the value of S = A[1] * X[1] + ... + A[{N}] * X[{N}] satisfies the condition: **S > 0**. Try your best to **minimize the value of S** while meeting this condition. + +A: {A} + +**Output Format:** Output a single line containing X[1], ..., X[{N}], separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the BezoutIdentity_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "MAX_A" in self.parameter, "MAX_A is required in parameter" + MAX_A = self.parameter["MAX_A"] + assert MAX_A >= 2, "MAX_A should be greater than or equal to 2" + + self.parameter["A"] = A = [] + for _ in range(N) : + picked_a, best_counting = None, -1 + for try_step in range(1024) : + current_a = random.randint(2, MAX_A) + counting = sum(int(math.gcd(current_a, _a) > 1) for _a in A) + if counting > best_counting : + best_counting, picked_a = counting, current_a + if best_counting == len(A) : + break + if random.random() < 0.5 : + picked_a = -picked_a + A.append(picked_a) + random.shuffle(A) + assert len(A) == N, "The length of A should be equal to N" + + + def exgcd(a, b): + """ + Returns (g, x, y) such that + g = gcd(a, b) + a*x + b*y = g + Ensures g >= 0. + """ + if b == 0: + return (abs(a), 1 if a >= 0 else -1, 0) + g, x1, y1 = exgcd(b, a % b) + # b*x1 + (a%b)*y1 = g + # a%b = a - (a//b)*b + x = y1 + y = x1 - (a // b) * y1 + return (g, x, y) + + # initialize with A[0] + g = abs(A[0]) + X = [0] * N + X[0] = 1 if A[0] >= 0 else -1 + + # incorporate each A[i] + for i in range(1, N): + ai = A[i] + g2, u, v = exgcd(g, ai) + # scale previous coefficients by u + for j in range(i): + X[j] *= u + # coefficient for A[i] is v + X[i] = v + g = g2 + + S = sum(x * a for x, a in zip(X, A)) + assert S == g + assert S > 0, "The sum S must be greater than 0" + self.parameter["reference_answer"] = " ".join(map(str, X)) + self.parameter["gold_answer"] = S + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = ", ".join(map(str, self.parameter["A"])), + ) + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + S = sum(x * a for x, a in zip(processed_result, self.parameter["A"])) + if S <= 0 : + return self.rewards["invalid_solution"] + assert self.parameter["gold_answer"] <= S, "The computed sum S must be greater than or equal to the gold answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((self.parameter["gold_answer"] / S) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == S) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/binario/__init__.py b/server/Gym/environments/binario/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..18046fd7861d739b7bf80830ad172ca2b21dff01 --- /dev/null +++ b/server/Gym/environments/binario/__init__.py @@ -0,0 +1 @@ +from .environment import Binario_Environment diff --git a/server/Gym/environments/binario/environment.py b/server/Gym/environments/binario/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..ea5b152897d1b4d1c799817be0fe4d431ea527d0 --- /dev/null +++ b/server/Gym/environments/binario/environment.py @@ -0,0 +1,188 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Binario_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} matrix. Each cell contains either '0', '1', or '*' ('*' means the cell is empty). Please fill all '*' cells with either '0' or '1' such that: +1. The number of `1`s in each row (from top to bottom) is: {row_counts}. +2. The number of `1`s in each column (from left to right) is: {col_counts}. +3. No more than two consecutive cells in a row or column can contain the same number. + +The matrix is given in **row-major order**, with each row represented as a string of '0', '1', and '*': +{matrix} + +**Output Format:** Output {N} lines, each containing {M} characters, where each character is either '0' or '1'. The output should match the format of the input (i.e., one row per line, no separators).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the Binario_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + + def generate_matrix(N, M): + # Initialize the grid with None + grid = [[None] * M for _ in range(N)] + + all_cells = [(i, j) for i in range(N) for j in range(M)] + random.shuffle(all_cells) # Shuffle to ensure randomness in placement + + backtrack_counting = 0 + + def backtrack(idx): + # If we've filled past the last row, we're done + if idx == len(all_cells): + return True + i, j = all_cells[idx] + + nonlocal backtrack_counting + backtrack_counting += 1 + if backtrack_counting > 10000000: + return False + + # Try placing 0 or 1 in random order + for v in random.sample(["0", "1"], 2): + # Check adjacency constraints in row (no three in a row) + if j >= 2 and grid[i][j-1] == v and grid[i][j-2] == v: + continue + if j >= 1 and j + 1 < M and grid[i][j-1] == v and grid[i][j+1] == v: + continue + if j + 2 < M and grid[i][j+1] == v and grid[i][j+2] == v: + continue + + # Check adjacency constraints in column + if i >= 2 and grid[i-1][j] == v and grid[i-2][j] == v: + continue + if i >= 1 and i + 1 < N and grid[i-1][j] == v and grid[i+1][j] == v: + continue + if i + 2 < N and grid[i+1][j] == v and grid[i+2][j] == v: + continue + + # Place v + grid[i][j] = v + + # Recurse + if backtrack(idx + 1): + return True + + grid[i][j] = None + + # No valid value at (i, j): backtrack + return False + + return grid if backtrack(0) else None + + matrix = generate_matrix(N, M) + if matrix is None : + self.parameter = None + return + self.parameter["reference_answer"] = "\n".join("".join(row) for row in matrix) + + self.parameter["row_counts"] = [sum(int(cell == "1") for cell in row) for row in matrix] + self.parameter["col_counts"] = [sum(int(matrix[i][j] == "1") for i in range(N)) for j in range(M)] + + assert "sparsity" in self.parameter, "sparsity is required in parameter" + sparsity = self.parameter["sparsity"] + assert 0 < sparsity < 1, "sparsity should be between 0 and 1" + empty_cells = random.sample(range(N * M), max(1, int(N * M * sparsity))) + for cell in empty_cells : + row, column = divmod(cell, M) + matrix[row][column] = '*' + self.parameter["matrix"] = ["".join(row) for row in matrix] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + matrix = "\n".join("".join(map(str, row)) for row in self.parameter["matrix"]), + row_counts = ", ".join(map(str, self.parameter["row_counts"])), + col_counts = ", ".join(map(str, self.parameter["col_counts"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(line.strip()) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N, M = self.parameter["N"], self.parameter["M"] + solution = processed_result + + if len(solution) != N or any(len(row) != M for row in solution) : + return self.rewards["wrong_format"] + for row in solution : + if not all(c in "01" for c in row) : + return self.rewards["wrong_format"] + + for row, original_row in zip(solution, self.parameter["matrix"]) : + for cell, original_cell in zip(row, original_row) : + if original_cell != '*' and cell != original_cell : + assert (original_cell == '0' and cell == '1') or (original_cell == '1' and cell == '0') + return self.rewards["invalid_solution"] + + delta = [ + (+1, 0), + (-1, 0), + (0, +1), + (0, -1), + ] + for i in range(N) : + for j in range(M) : + for di, dj in delta : + ni, nj = i + di, j + dj + nni, nnj = i + 2 * di, j + 2 * dj + if 0 <= ni < N and 0 <= nj < M and 0 <= nni < N and 0 <= nnj < M : + if solution[i][j] == solution[ni][nj] == solution[nni][nnj] : + return self.rewards["invalid_solution"] + + row_counts = [sum(int(cell == "1") for cell in row) for row in solution] + col_counts = [sum(int(solution[i][j] == "1") for i in range(N)) for j in range(M)] + + satisfied = sum(int(answer == gold) for answer, gold in zip(row_counts, self.parameter["row_counts"])) + \ + sum(int(answer == gold) for answer, gold in zip(col_counts, self.parameter["col_counts"])) + assert satisfied <= N + M, "satisfied should not exceed N + M" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / (N + M)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == (N + M)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/binario_no_adjacency_requirement/__init__.py b/server/Gym/environments/binario_no_adjacency_requirement/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..905d12ae08956b0e662e38fb7cc86a32f1ab43a7 --- /dev/null +++ b/server/Gym/environments/binario_no_adjacency_requirement/__init__.py @@ -0,0 +1 @@ +from .environment import Binario_NoAdjacencyRequirement_Environment diff --git a/server/Gym/environments/binario_no_adjacency_requirement/environment.py b/server/Gym/environments/binario_no_adjacency_requirement/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..baa1cb9726ee357055b1d9a83509f8c987985c52 --- /dev/null +++ b/server/Gym/environments/binario_no_adjacency_requirement/environment.py @@ -0,0 +1,114 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Binario_NoAdjacencyRequirement_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a (2 × {N}) × (2 × {M}) matrix. Each cell contains either '0', '1', or '*' ('*' means the cell is empty). Please fill all '*' cells with either '0' or '1' such that: +1. Each **row** contains exactly {M} '0's and {M} '1's. +2. Each **column** contains exactly {N} '0's and {N} '1's. + +The matrix is given in **row-major order**, with each row represented as a string of '0', '1', and '*': +{matrix} + +**Output Format:** Output (2 × {N}) lines, each containing (2 × {M}) characters, where each character is either '0' or '1'. The output should match the format of the input (i.e., one row per line, no separators).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, wrong_solution : float = 0.0, correct_solution : float = 1.0, + **kwargs) : + """ + Initialize the Binario_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "wrong_solution" : wrong_solution, + "correct_solution" : correct_solution, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + + row_permutation, col_permutation = list(range(2 * N)), list(range(2 * M)) + random.shuffle(row_permutation) + random.shuffle(col_permutation) + + matrix = [[str((row_permutation[i] + col_permutation[j]) % 2) for j in range(2 * M)] for i in range(2 * N)] + self.parameter["reference_answer"] = "\n".join("".join(row) for row in matrix) + + assert "sparsity" in self.parameter, "sparsity is required in parameter" + sparsity = self.parameter["sparsity"] + assert 0 < sparsity < 1, "sparsity should be between 0 and 1" + empty_cells = random.sample(range((2 * N) * (2 * M)), max(1, int((2 * N) * (2 * M) * sparsity))) + for cell in empty_cells : + row, column = divmod(cell, 2 * M) + matrix[row][column] = '*' + self.parameter["matrix"] = ["".join(row) for row in matrix] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + matrix = "\n".join("".join(map(str, row)) for row in self.parameter["matrix"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(line.strip()) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N, M = self.parameter["N"], self.parameter["M"] + solution = processed_result + + if len(solution) != 2 * N or any(len(row) != 2 * M for row in solution) : + return self.rewards["wrong_format"] + for row in solution : + if not all(c in "01" for c in row) : + return self.rewards["wrong_format"] + + for row, original_row in zip(solution, self.parameter["matrix"]) : + for cell, original_cell in zip(row, original_row) : + if original_cell != '*' and cell != original_cell : + assert (original_cell == '0' and cell == '1') or (original_cell == '1' and cell == '0') + return self.rewards["invalid_solution"] + + for i in range(2 * N) : + if solution[i].count('1') != solution[i].count('0') : + return self.rewards["wrong_solution"] + assert solution[i].count('1') == M, "Row {} does not have exactly {} ones".format(i, M) + assert solution[i].count('0') == M, "Row {} does not have exactly {} zeros".format(i, M) + for j in range(2 * M) : + if sum(solution[i][j] == '1' for i in range(2 * N)) != sum(solution[i][j] == '0' for i in range(2 * N)) : + return self.rewards["wrong_solution"] + assert sum(solution[i][j] == '1' for i in range(2 * N)) == N, "Column {} does not have exactly {} ones".format(j, N) + assert sum(solution[i][j] == '0' for i in range(2 * N)) == N, "Column {} does not have exactly {} zeros".format(j, N) + + return self.rewards["correct_solution"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/binary_alternation/__init__.py b/server/Gym/environments/binary_alternation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5680595d8285ca18ebadf09e1da14e54145bc3ff --- /dev/null +++ b/server/Gym/environments/binary_alternation/__init__.py @@ -0,0 +1 @@ +from .environment import BinaryAlternation_Environment diff --git a/server/Gym/environments/binary_alternation/environment.py b/server/Gym/environments/binary_alternation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..4755484bdeb5d7b13ee819b66c6bdfa9a539b573 --- /dev/null +++ b/server/Gym/environments/binary_alternation/environment.py @@ -0,0 +1,121 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class BinaryAlternation_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a binary string of length {N}, consisting of `0`s and `1`s. It is 0-indexed: {string} + +In one operation, you may **swap** the characters at indices `i` and `j` (0 ≤ i, j < {N}). Please transform the string into an **alternating binary string** (no two adjacent characters are the same) using the **minimum number of operations**. + +**Output Format:** Each operation should be written on a single line in the format: `i j`, where `i` and `j` are the indices being swapped. Do **NOT** include backticks or quotes. Output one operation per line in the order they should be performed.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the BinaryAlternation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "zero_count" in self.parameter, "zero_count is required in parameter" + zero_count = self.parameter["zero_count"] + assert zero_count >= 2, "zero_count should be greater than or equal to 2" + + one_count = random.randint(zero_count - 1, zero_count + 1) + + string = ["0"] * zero_count + ["1"] * one_count + random.shuffle(string) + string = self.parameter["string"] = "".join(string) + + self.parameter["reference_answer"] = None + + + def compute(should : str) -> List[str] : + zero_to_one, one_to_zero = [], [] + for i, now in enumerate(string) : + if now != should : + if now == "0" : + zero_to_one.append(i) + else : + one_to_zero.append(i) + should = "1" if should == "0" else "0" + assert len(zero_to_one) == len(one_to_zero), "zero_to_one and one_to_zero should have the same length" + solution = [] + for i, j in zip(zero_to_one, one_to_zero) : + solution.append("{} {}".format(i, j)) + return solution + + if zero_count >= one_count : + self.parameter["reference_answer"] = compute("0") + if one_count >= zero_count : + candidate = compute("1") + if self.parameter["reference_answer"] is None or len(candidate) < len(self.parameter["reference_answer"]) : + self.parameter["reference_answer"] = candidate + self.parameter["gold_answer"] = len(self.parameter["reference_answer"]) + self.parameter["reference_answer"] = "\n".join(self.parameter["reference_answer"]) + + + def _prompt_generate(self) -> str : + string = self.parameter["string"] + return self.prompt_template.format(N = len(string), string = string) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + actions = [] + for line in answer.splitlines() : + line = line.strip() + if line : + actions.append(line.split()) + action = actions[-1] + if len(action) != 2 : + return None + try : + action[0] = int(action[0]) + action[1] = int(action[1]) + except ValueError : + return None + return actions + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + string = list(self.parameter["string"]) + for i, j in processed_result : + if not (0 <= i < len(string) and 0 <= j < len(string)) : + return self.rewards["invalid_solution"] + string[i], string[j] = string[j], string[i] + string = "".join(string) + if any(string[i] == string[i + 1] for i in range(len(string) - 1)) : + return self.rewards["invalid_solution"] + + gold, answer = self.parameter["gold_answer"], len(processed_result) + assert gold <= answer, "gold should be less than or equal to answer" + + if answer == 0 : + return self.rewards["rewarding_weight"] + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/binary_linear_equation_solution_counting/__init__.py b/server/Gym/environments/binary_linear_equation_solution_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..01d39758a46a378c5e842550d96bda1b9af20ee3 --- /dev/null +++ b/server/Gym/environments/binary_linear_equation_solution_counting/__init__.py @@ -0,0 +1 @@ +from .environment import BinaryLinearEquation_SolutionCounting_Environment diff --git a/server/Gym/environments/binary_linear_equation_solution_counting/environment.py b/server/Gym/environments/binary_linear_equation_solution_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d1b614f0137ab35af8131cd85788f5df2d65aa0d --- /dev/null +++ b/server/Gym/environments/binary_linear_equation_solution_counting/environment.py @@ -0,0 +1,187 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class BinaryLinearEquation_SolutionCounting_Environment(VerifiableEnvironment) : + prompt_template = r"""What is the number of integer solution pairs (x, y) such that ({A}) * x + ({B}) * y + ({C}) = 0, with {X1} <= x <= {X2} and {Y1} <= y <= {Y2}?""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + not_guaranteed_probability : float = 0.05, + **kwargs) : + """ + Initialize the BinaryLinearEquation_SolutionCounting instance. + """ + super().__init__(**kwargs) + + self.not_guaranteed_probability = not_guaranteed_probability + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_RANGE" in self.parameter, "MAX_RANGE is required in parameter" + MAX_RANGE = self.parameter["MAX_RANGE"] + assert MAX_RANGE >= 8, "MAX_RANGE must be at least 8" + + A = self.parameter["A"] = random.randint(-MAX_RANGE, +MAX_RANGE) + B = self.parameter["B"] = random.randint(-MAX_RANGE, +MAX_RANGE) + not_guaranteed = random.random() < self.not_guaranteed_probability + if not_guaranteed : + X1 = self.parameter["X1"] = random.randint(-MAX_RANGE, +MAX_RANGE) + X2 = self.parameter["X2"] = random.randint(X1, +MAX_RANGE) + Y1 = self.parameter["Y1"] = random.randint(-MAX_RANGE, +MAX_RANGE) + Y2 = self.parameter["Y2"] = random.randint(Y1, +MAX_RANGE) + C = self.parameter["C"] = random.randint(-2 * (MAX_RANGE ** 2),+2 * (MAX_RANGE ** 2)) + else : + x = random.randint(-MAX_RANGE, +MAX_RANGE) + y = random.randint(-MAX_RANGE, +MAX_RANGE) + C = self.parameter["C"] = -(A * x + B * y) + X1 = self.parameter["X1"] = random.randint(-MAX_RANGE, x) + X2 = self.parameter["X2"] = random.randint(x, +MAX_RANGE) + Y1 = self.parameter["Y1"] = random.randint(-MAX_RANGE, y) + Y2 = self.parameter["Y2"] = random.randint(y, +MAX_RANGE) + + + def gcd(a, b): + while b: + a, b = b, a % b + return abs(a) + + def extended_gcd_positive(a, b): + # Returns (g, x, y) with a*x + b*y = g, for a,b >= 0 + if b == 0: + return (a, 1, 0) + g, x1, y1 = extended_gcd_positive(b, a % b) + return (g, y1, x1 - (a // b) * y1) + + def ceil_div(a, b): + # Ceil division that works for any sign of b + return -((-a) // b) + + def floor_div(a, b): + # Floor division (Python's // already floors) + return a // b + + def k_range(a0, step, L, R): + """ + From constraint: L <= a0 + step*k <= R + Return [lo, hi] for integer k, or (1, 0) for empty. + """ + if step > 0: + lo = ceil_div(L - a0, step) + hi = floor_div(R - a0, step) + else: # step < 0 + # Inequality reverses when dividing by a negative + lo = ceil_div(R - a0, step) + hi = floor_div(L - a0, step) + return lo, hi + + def compute(A, B, C, X1, X2, Y1, Y2): + if X1 > X2: + X1, X2 = X2, X1 + if Y1 > Y2: + Y1, Y2 = Y2, Y1 + + # Degenerate cases + if A == 0 and B == 0: + return (X2 - X1 + 1) * (Y2 - Y1 + 1) if C == 0 else 0 + + if A == 0: + # B*y + C = 0 + if C % B == 0: + y = -C // B + return (X2 - X1 + 1) if (Y1 <= y <= Y2) else 0 + else: + return 0 + + if B == 0: + # A*x + C = 0 + if C % A == 0: + x = -C // A + return (Y2 - Y1 + 1) if (X1 <= x <= X2) else 0 + else: + return 0 + + # General case + d = gcd(A, B) + if C % d != 0: + return 0 + + # Find one solution to A*x + B*y = -C + _, xg, yg = extended_gcd_positive(abs(A), abs(B)) # gives axg + byg = gcd(|A|,|B|) + if A < 0: + xg = -xg + if B < 0: + yg = -yg + + mult = (-C) // d + x0 = xg * mult + y0 = yg * mult + + # Parametric form + step_x = B // d + step_y = -A // d # note: can be negative + + # k-range from x and y intervals + kx_lo, kx_hi = k_range(x0, step_x, X1, X2) + ky_lo, ky_hi = k_range(y0, step_y, Y1, Y2) + + lo = max(kx_lo, ky_lo) + hi = min(kx_hi, ky_hi) + + return 0 if lo > hi else hi - lo + 1 + + self.parameter["reference_answer"] = compute(A, B, C, X1, X2, Y1, Y2) + if not not_guaranteed : + assert self.parameter["reference_answer"] >= 1 + else : + assert self.parameter["reference_answer"] >= 0 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + A = self.parameter["A"], + B = self.parameter["B"], + C = self.parameter["C"], + X1 = self.parameter["X1"], + X2 = self.parameter["X2"], + Y1 = self.parameter["Y1"], + Y2 = self.parameter["Y2"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/binary_tree_leaf_num_expectation/__init__.py b/server/Gym/environments/binary_tree_leaf_num_expectation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aa10696a48567c2ea5848b0ad96002396962d151 --- /dev/null +++ b/server/Gym/environments/binary_tree_leaf_num_expectation/__init__.py @@ -0,0 +1 @@ +from .environment import BinaryTreeLeafNumExpectation_Environment diff --git a/server/Gym/environments/binary_tree_leaf_num_expectation/environment.py b/server/Gym/environments/binary_tree_leaf_num_expectation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d7928771fdaac77452d48d81f3c22ae624a9d422 --- /dev/null +++ b/server/Gym/environments/binary_tree_leaf_num_expectation/environment.py @@ -0,0 +1,76 @@ +import math +import random +from typing import Optional, Tuple +from ...environment import VerifiableEnvironment + + +class BinaryTreeLeafNumExpectation_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3978 + prompt_template = \ +r"""We uniformly at random generate a **binary tree** with exactly {N} nodes (all distinct binary trees with {N} nodes are equally likely). Two binary trees are considered identical if and only if: +- both are empty, **OR** +- both are non-empty, and their left subtrees are identical and their right subtrees are identical. + +What is the expected number of **leaf** nodes (nodes whose left and right children are both empty) in the generated binary tree? Output the result as `A/B` (do NOT include quotes), where A and B are positive integers separated by a slash `/`.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the BinaryTreeLeafNumExpectation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 5, "MAX_N should be greater than or equal to 5" + + N = self.parameter["N"] = random.randint(1, MAX_N) + + A, B = N * (N + 1), 2 * (2 * N - 1) + gcd_AB = math.gcd(A, B) + A //= gcd_AB + B //= gcd_AB + self.parameter["gold_answer"] = dict(A = A, B = B) + self.parameter["reference_answer"] = "{}/{}".format(A, B) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int]] : + if answer is not None : + answer = answer.strip() + try : + A, B = map(int, map(str.strip, answer.split('/'))) + return (A, B) + except : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + A, B = processed_result + if not (A > 0 and B > 0) : + return self.rewards["wrong_format"] + gold_A, gold_B = self.parameter["gold_answer"]["A"], self.parameter["gold_answer"]["B"] + gcd_AB = math.gcd(A, B) + A //= gcd_AB + B //= gcd_AB + if (A, B) == (gold_A, gold_B) : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/bit_equation_counting/__init__.py b/server/Gym/environments/bit_equation_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3303bef9ea2393921d65ecafc60f22b0af843cbe --- /dev/null +++ b/server/Gym/environments/bit_equation_counting/__init__.py @@ -0,0 +1 @@ +from .environment import BitEquationCounting_Environment diff --git a/server/Gym/environments/bit_equation_counting/environment.py b/server/Gym/environments/bit_equation_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..843b782d28a280603e648853a2c96688a4133ba1 --- /dev/null +++ b/server/Gym/environments/bit_equation_counting/environment.py @@ -0,0 +1,91 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class BitEquationCounting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Given a Boolean expression (where `_` represents a variable that can be 0 or 1, `&` is bitwise AND, `|` is bitwise OR, and `^` is bitwise XOR): {expression} + +There are 2^{N} possible combinations of values for the variables. Your task is to find how many of these combinations make the expression evaluate to true. + +**Output Format:** Your final answer should be a single integer — the number of combinations that make the expression true. Example: `15` (do **NOT** include quotes or backticks).""" + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the BitEquationCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2"\ + + def build_expression(n) : + if n == 1 : + return "_", 1, 1 + left_n = random.randint(1, n - 1) + right_n = n - left_n + left_expr, left_true, left_false = build_expression(left_n) + right_expr, right_true, right_false = build_expression(right_n) + op = random.choice(("&", "|", "^")) + if op == "&" : + true_count = left_true * right_true + false_count = (2 ** n) - true_count + elif op == "|" : + false_count = left_false * right_false + true_count = (2 ** n) - false_count + elif op == "^" : + true_count = left_true * right_false + left_false * right_true + false_count = left_true * right_true + left_false * right_false + assert true_count + false_count == 2 ** n, "XOR operation should cover all cases" + else : + raise ValueError("Invalid operator") + return "({} {} {})".format(left_expr, op, right_expr), true_count, false_count + expression, true_count, false_count = build_expression(N) + + self.parameter["expression"] = expression[1 : -1] + self.parameter["reference_answer"] = true_count + + def _prompt_generate(self) -> str : + return self.prompt_template.format(expression = self.parameter["expression"], N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result <= 2 ** self.parameter["N"]) : + return self.rewards["wrong_range"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/bitand_zero_path_counting/__init__.py b/server/Gym/environments/bitand_zero_path_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d201d96a816cb768baa627424c6a2ecf125c7e86 --- /dev/null +++ b/server/Gym/environments/bitand_zero_path_counting/__init__.py @@ -0,0 +1 @@ +from .environment import BitAndZero_PathCounting_Environment diff --git a/server/Gym/environments/bitand_zero_path_counting/environment.py b/server/Gym/environments/bitand_zero_path_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..ae8e04f07f05ac3936d52399e1562a7271b73a7a --- /dev/null +++ b/server/Gym/environments/bitand_zero_path_counting/environment.py @@ -0,0 +1,135 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class BitAndZero_PathCounting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a **directed graph** with an **infinite number of vertices**, where each vertex is labeled with a non-negative integer: `0`, `1`, `2`, ... + +There is a directed edge from vertex `s` to vertex `t` if and only if: +- `s < t`, and +- `s & t = 0` (where `&` denotes the bitwise AND operation) + +Please compute the number of **distinct paths** from vertex `{S}` to vertex `{T}`. Give the result **modulo {MOD}**. +Note that the two vertices labels are provided in **binary (base-2)** representation. + +**Output Format:** Your final answer should be a single integer — the number of distinct paths modulo `{MOD}`.""" + MOD = 10000 + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the BitAndZero_PathCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + def _generate_helper(self) -> None : + assert "max_length" in self.parameter, "max_length is required in parameter" + max_length = self.parameter["max_length"] + assert max_length >= 1, "max_length should be greater than or equal to 1" + + S = "1" + "".join(str(random.randint(0, 1)) for _ in range(random.randint(1, max_length) - 1)) + T = "1" + "".join(str(random.randint(0, 1)) for _ in range(random.randint(1, max_length) - 1)) + + if len(S) > len(T) or (len(S) == len(T) and S > T) : + S, T = T, S + # Ensure S <= T + self.parameter["S"], self.parameter["T"] = S, T + + + MOD = self.MOD + + def Mult(a: int, b: int) -> int: + return (a * b) % MOD + + def Add(a: int, b: int) -> int: + s = a + b + return s - MOD if s >= MOD else s + + S = list(map(int, S)) + T = list(map(int, T)) + N, M = len(S), len(T) + + if M > N: + S = [0] * (M - N) + S + else: + assert M == N + + G = [[[0, 0] for _ in range(M)] for __ in range(2)] + for st in (0, 1): + G[st][0][st] = 1 + for i in range(1, M): + G[st][i][0] = Add(G[st][i-1][0], G[st][i-1][1]) + G[st][i][1] = G[st][i-1][0] + + H = 1 + while H <= M and S[H-1] == 0: + H += 1 + + F = [[0] * M for _ in range(M + 1)] + F[1][0] = 1 + + for i in range(2, M + 1): + for x in range(0, i - 1): + bit = T[i-1] + if i <= H: + F[i][x+1] = Add(F[i][x+1], Mult(F[i-1][x], G[1][x+1][bit])) + if i < H: + total = Add(G[0][x][bit], G[1][x][bit]) + F[i][x] = Add(F[i][x], Mult(F[i-1][x], total)) + if i > H: + F[i][x] = Add(F[i][x], Mult(F[i-1][x], G[S[i-1]][x][bit])) + + ans = 0 + for x in range(0, M): + ans = Add(ans, F[M][x]) + self.parameter["reference_answer"] = ans + + + def _generate(self) -> None : + while True : + self._generate_helper() + if self.parameter["reference_answer"] not in (0, 1) : + break + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + S = self.parameter["S"], + T = self.parameter["T"], + MOD = self.MOD, + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.MOD) : + return self.rewards["wrong_range"] + + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/bitwise_operation_sequence_counting/__init__.py b/server/Gym/environments/bitwise_operation_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..66d9ed550c8f53b271d0c33bcb5ec1b9eaa66261 --- /dev/null +++ b/server/Gym/environments/bitwise_operation_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .environment import BitwiseOperationSequenceCounting_Environment diff --git a/server/Gym/environments/bitwise_operation_sequence_counting/environment.py b/server/Gym/environments/bitwise_operation_sequence_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed2d58e36ed1c8b184515eb180466b06e49086c --- /dev/null +++ b/server/Gym/environments/bitwise_operation_sequence_counting/environment.py @@ -0,0 +1,150 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class BitwiseOperationSequenceCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4424 + prompt_template = \ +r"""You are given an array A of {N} + 1 binary strings, each of length {M}. The strings are: +{A} + +You will insert an operation (`AND` or `OR`) between every pair of adjacent elements in A, resulting in {N} operations total, to form an expression. You can evaluate the expression from left to right (without operator precedence) to get the final result of the expression. +Count the number of different ways to insert these operations such that the final result equals this binary string: {R}""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the BitwiseOperationSequenceCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N_M) + M = self.parameter["M"] = random.randint(2, MAX_N_M) + + self.parameter["A"] = A = [None] * (N + 1) + A[0] = "0" * M + result = "0" * M + AND_probability = random.random() + for i in range(1, N + 1) : + one_probability = random.random() + A[i] = "".join(str(int(random.random() < one_probability)) for _ in range(M)) + operation = "AND" if random.random() < AND_probability else "OR" + if operation == "AND" : + result = "".join(str(int(A[i][j]) & int(result[j])) for j in range(M)) + else : + result = "".join(str(int(A[i][j]) | int(result[j])) for j in range(M)) + self.parameter["R"] = result + + + S = A[1 :] + + # rk will store the current column order (0-indexed) + rk = list(range(M)) + # b[j][i] will store the bit in column j, row i + b = [[0] * N for _ in range(M)] + + # Read the N rows of the matrix, and maintain the stable partition of rk + for i in range(N): + s = S[i] + # parse the bits of this row + row = [int(ch) for ch in s] + # fill b + for j in range(M): + b[j][i] = row[j] + # stable partition rk: first zeros, then ones + new_rk = [] + for k in rk: + if row[k] == 0: + new_rk.append(k) + for k in rk: + if row[k] == 1: + new_rk.append(k) + rk = new_rk + + # Compute Ans[j] = integer value of column j (bits b[j][N-1]...b[j][0]) mod MOD + Ans = [0] * M + for j in range(M): + val = 0 + # build the number from most-significant bit b[j][N-1] down to b[j][0] + for i in range(N - 1, -1, -1): + val = val * 2 + b[j][i] + Ans[j] = val + + def compute() : + s = result + # Find the first position in rk where the bit is '1' + Rk_idx = M # default to sentinel + for idx in range(M): + if s[rk[idx]] == '1': + Rk_idx = idx + break + # Find the last position in rk where the bit is '0' + Lk_idx = -1 # default to before first + for idx in range(M - 1, -1, -1): + if s[rk[idx]] == '0': + Lk_idx = idx + break + + # If the first '1' comes before the last '0', no valid interval + if Rk_idx < Lk_idx: + return 0 + else: + # Determine the two endpoints' values + x_val = 0 if Lk_idx == -1 else Ans[rk[Lk_idx]] + y_val = (2 ** N) if Rk_idx == M else Ans[rk[Rk_idx]] + # Answer is y_val - x_val + return y_val - x_val + + self.parameter["reference_answer"] = compute() + assert self.parameter["reference_answer"] > 0 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + A = "\n".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + R = self.parameter["R"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/block_image/__init__.py b/server/Gym/environments/block_image/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..da615ff18ba48fc47de3879c772818377932c035 --- /dev/null +++ b/server/Gym/environments/block_image/__init__.py @@ -0,0 +1 @@ +from .environment import BlockImage_Environment diff --git a/server/Gym/environments/block_image/environment.py b/server/Gym/environments/block_image/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9143f09d5a0b8c52bf64fe0ef618f7e6bc28e80c --- /dev/null +++ b/server/Gym/environments/block_image/environment.py @@ -0,0 +1,259 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class BlockImage_Environment(VerifiableEnvironment) : # Source: https://www.luogu.com.cn/problem/P1058 + prompt_template = \ +r"""You are given a {M} × {N} rectangular grid, where each cell represents a stack of identical cube blocks. Each cube has size 1 × 1 × 1, and no rotation or flipping is allowed — all cubes are placed in the same orientation. +You are given a matrix representing the number of cubes stacked on each cell in the grid (the integer at row i and column j indicates how many cube blocks are stacked on the cell located at row i, column j): +{matrix} + +The visual representation of a **single cube** follows this fixed format: + +$$ +\def\arraystretch{1e-10} +\begin{aligned} +&\verb! +---+!\\ +&\verb! / /|!\\ +&\verb!+---+ |!\quad\textsf{height}\\ +&\verb!| | +!\\ +&\verb!| |/ !\quad\textsf{width}\\ +&\verb!+---+ !\\ +& \quad\textsf{length} +\end{aligned} +$$ + +Each `+` represents a corner, `-` spans the cube’s length, `/` shows depth (width), and `|` shows height. Empty space in the final drawing should be represented using `.`. + +The 3D isometric projection follows specific stacking rules: + +- **Two cubes side by side (left/right):** +$$ +\def\arraystretch{1e-10} +\begin{aligned} +\verb!..+---+---+!\\ +\verb!./ / /|!\\ +\verb!+---+---+ |!\\ +\verb!| | | +!\\ +\verb!| | |/.!\\ +\verb!+---+---+..!\\ +\end{aligned} +$$ + +- **Two cubes stacked vertically (top/bottom):** +$$ +\def\arraystretch{1e-10} +\begin{aligned} +\verb!..+---+!\\ +\verb!./ /|!\\ +\verb!+---+ |!\\ +\verb!| | +!\\ +\verb!| |/|!\\ +\verb!+---+ |!\\ +\verb!| | +!\\ +\verb!| |/.!\\ +\verb!+---+..!\\ +\end{aligned} +$$ + +- **Two cubes front/back (depth):** +$$ +\def\arraystretch{1e-10} +\begin{aligned} +\verb!....+---+!\\ +\verb!.../ /|!\\ +\verb!..+---+ |!\\ +\verb!./ /| +!\\ +\verb!+---+ |/.!\\ +\verb!| | +..!\\ +\verb!| |/...!\\ +\verb!+---+....!\\ +\end{aligned} +$$ + +The bottom-left corner of the lowest cube in cell ({M}, 1) (bottom row, first column) should align with the bottom-left of the entire drawing. + +**Output Format:** +Your final output should be a string matrix of dimensions K × L (i.e., it has K lines separated by line breaks, with each line containing exactly L characters), where K is the number of rows and L is the number of columns **required to draw the 3D structure correctly** according to the rules above. + +--- + +**Example 1** + +When the rectangular grid is 1 × 2, and the number of cubes in each cell is as follows: +1 3 + +The output is (do **NOT** include the backticks or quotes — use the format below exactly): +``` +......+---+ +...../ /| +....+---+ | +....| | + +....| |/| +....+---+ | +..+-| | + +./ | |/| ++---+---+ | +| | | + +| | |/. ++---+---+.. +``` + +--- + +**Example 2** + +When the rectangular grid is 3 × 4, and the number of cubes in each cell is as follows: +2 2 1 2 +2 2 1 1 +3 2 1 2 + +The output is (do **NOT** include the backticks or quotes — use the format below exactly): +``` +......+---+---+...+---+ +..+---+ / /|../ /| +./ /|-+---+ |.+---+ | ++---+ |/ /| +-| | + +| | +---+ |/+---+ |/| +| |/ /| +/ /|-+ | ++---+---+ |/+---+ |/| + +| | | +-| | + |/. +| | |/ | |/| +.. ++---+---+---+---+ |/... +| | | | | +.... +| | | | |/..... ++---+---+---+---+...... +``` +""" + + def __init__(self, + max_height : int = 5, + wrong_format : float = -1.0, invalid_answer : int = -0.5, wrong_size : int = 0.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 2.0, + **kwargs) : + """ + Initialize the BlockImage_Environment instance. + """ + super().__init__(**kwargs) + + self.max_height = max_height + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "wrong_size" : wrong_size, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_M_N" in self.parameter, "MAX_M_N is required in parameter" + MAX_M_N = self.parameter["MAX_M_N"] + assert MAX_M_N >= 1, "MAX_M_N should be greater than or equal to 1" + + M = self.parameter["M"] = random.randint(1, MAX_M_N) + N = self.parameter["N"] = random.randint(1, MAX_M_N) + grid = self.parameter["grid"] = [[random.randint(1, self.max_height) for j in range(N)] for i in range(M)] + + + max_row = 0 + max_col = 0 + for i in range(M) : + for j in range(N) : + a = grid[i][j] + t = M - i - 1 + cand_col = 2 * t + 4 * j + 6 + if cand_col > max_col : + max_col = cand_col + cand_row = 2 * t + 3 * (a - 1) + 5 + if cand_row > max_row : + max_row = cand_row + + + height = max_row + 1 + width = max_col + 1 + canvas = [['.' for _ in range(width)] for _ in range(height)] + template = [ + "..+---+", + "./ /|", + "+---+ |", + "| | +", + "| |/.", + "+---+.." + ] + + + for i in range(M) : + for j in range(N) : + a = grid[i][j] + t = M - i - 1 + for k in range(a) : + x_offset = 2 * t + 4 * j + y_offset = 2 * t + 3 * k + for r in range(6) : + for c in range(7) : + ch = template[r][c] + if ch != '.' : + row_index = y_offset + (5 - r) + col_index = x_offset + c + canvas[row_index][col_index] = ch + + output_lines = [] + for row in range(height - 1, -1, -1) : + output_lines.append("".join(canvas[row])) + self.parameter["reference_answer"] = "\n".join(output_lines) + + def _prompt_generate(self) -> str : + prompt = self.prompt_template + prompt = prompt.replace("{M}", str(self.parameter["M"])) + prompt = prompt.replace("{N}", str(self.parameter["N"])) + prompt = prompt.replace("{matrix}", "\n".join(" ".join(map(str, row)) for row in self.parameter["grid"])) + return prompt + + + def _process(self, answer : Optional[str]) -> Optional[List[str]] : + if answer is not None : + answer = answer.strip() + image = [] + for line in answer.splitlines() : + line = line.strip() + if line : + image.append(line) + return image + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + image = processed_result + + if not image : + return self.rewards["wrong_format"] + for row in image : + if len(row) != len(image[0]) : + return self.rewards["wrong_format"] + if not all(ch in ".+-/| " for ch in row) : + return self.rewards["invalid_answer"] + + gold_image = self.parameter["reference_answer"].split("\n") + if len(image) != len(gold_image) : + return self.rewards["wrong_size"] + if len(image[0]) != len(gold_image[0]) : + return self.rewards["wrong_size"] + + total_correct = 0 + for gold_row, row in zip(gold_image, image) : + assert len(gold_row) == len(row) + total_correct += sum(gold_row[i] == row[i] for i in range(len(gold_row))) + total_cells = len(gold_image) * len(gold_image[0]) + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * (((total_correct / total_cells)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (total_correct == total_cells) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/bounded_adjacency_difference_permutation_counting/__init__.py b/server/Gym/environments/bounded_adjacency_difference_permutation_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..438152b92fd5e14239baf7e84a7d398de96becaa --- /dev/null +++ b/server/Gym/environments/bounded_adjacency_difference_permutation_counting/__init__.py @@ -0,0 +1 @@ +from .environment import BoundedAdjacencyDifference_Permutation_Counting_Environment diff --git a/server/Gym/environments/bounded_adjacency_difference_permutation_counting/environment.py b/server/Gym/environments/bounded_adjacency_difference_permutation_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..87144ead0cb54927ed8f9c63522c241c14a8538f --- /dev/null +++ b/server/Gym/environments/bounded_adjacency_difference_permutation_counting/environment.py @@ -0,0 +1,135 @@ +import random +from typing import Optional +from itertools import permutations +from ...environment import VerifiableEnvironment + + +class BoundedAdjacencyDifference_Permutation_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3867 + prompt_template = r"""What is the number of permutations of 1, 2, ..., {N} such that for every two adjacent elements (i.e., the i-th and (i+1)-th elements for all 1 <= i < N), the absolute difference between them is at most {K}?""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the BoundedAdjacencyDifference_Permutation_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + self.parameter["K"] = K = random.randint(2, min(4, N - 2)) + + + # Precompute factorials up to K (K <= 4) + FACT = [1] * (K + 1) + for i in range(1, K + 1): + FACT[i] = FACT[i - 1] * i + FK = FACT[K] # K! + + # All permutations of length K in lexicographic order + PERMS = [list(p) for p in permutations(range(K))] + TM = (1 << (K + 1)) - 1 # mask with (K+1) ones + + # DP over i (size), ip (permutation index), ic (mask) + # Use rolling arrays to keep memory tight and sizes appropriate + prev = [[0] * (TM + 1) for _ in range(FK)] + for ip in range(FK): + prev[ip][TM] = 1 # base: i = K + + for i in range(K + 1, N + 1): + cur = [[0] * (TM + 1) for _ in range(FK)] + for ip in range(FK): + tp = PERMS[ip] # current permutation of size K + for ic in range(TM + 1): + val = prev[ip][ic] + if not val: + continue + # Try to insert the new maximum at each available slot j + for j in range(K + 1): + if ((ic >> j) & 1) == 0: + continue + + # Insert into permutation representation + ttp_ins = tp[:j] + [K] + tp[j:] # length K+1, values in {0..K} + l0 = ttp_ins.index(0) # first position of '0' + ttp_trim = ttp_ins[:l0] + ttp_ins[l0 + 1:] # remove that '0' + ttp = [x - 1 for x in ttp_trim] # now a perm of {0..K-1} + + # Update slot mask + tc_bits = [ (ic >> l) & 1 for l in range(K + 1) ] + ttc2 = tc_bits[:j] + [1] + tc_bits[j:] # insert a '1' at j + # remove index l0+1 and then clear index l0 + ttc_removed = ttc2[:l0 + 1] + ttc2[l0 + 2:] + ttc_removed[l0] = 0 + icc = 0 + for l in range(K + 1): + if ttc_removed[l]: + icc |= (1 << l) + + # Lehmer code -> permutation index 'ipp' + ipp = 0 + seen = [0] * K + for pos in range(K): + v = ttp[pos] + ch = 0 + for z in range(v): + if seen[z] == 0: + ch += 1 + seen[v] = 1 + ipp += ch * FACT[K - 1 - pos] + + cur[ipp][icc] += val + prev = cur + + ans = 0 + for ip in range(FK): + for ic in range(TM + 1): + ans += prev[ip][ic] + assert ans > 0, "The answer should be positive" + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/bounded_interval_intersection/__init__.py b/server/Gym/environments/bounded_interval_intersection/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..14db661e9dfb9c33374082dd2dccb3a125291602 --- /dev/null +++ b/server/Gym/environments/bounded_interval_intersection/__init__.py @@ -0,0 +1 @@ +from .environment import BoundedIntervalIntersection_Environment diff --git a/server/Gym/environments/bounded_interval_intersection/environment.py b/server/Gym/environments/bounded_interval_intersection/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..40ce155cca4a7ddc5089eff19fdf50400ee2ddba --- /dev/null +++ b/server/Gym/environments/bounded_interval_intersection/environment.py @@ -0,0 +1,98 @@ +import heapq +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class BoundedIntervalIntersection_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""An interval [l, r]'s length is defined as r - l. The length of an **empty intersection** is considered to be 0. The **intersection** of a set of intervals is the range covered by all of them simultaneously. + +You are given {N} intervals: +{intervals} + +Please count how many **non-empty subsets** (i.e., from the total of 2^{N} - 1 non-empty subsets) have an intersection of length **greater than or equal to {K}**. + +**Output Format:** Your final answer should be a single integer — the number of non-empty subsets of intervals whose intersection has length at least {K}.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the BoundedIntervalIntersection_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + self.parameter["intervals"] = [] + for i in range(N) : + l = random.randint(0, N) + r = random.randint(l, N) + self.parameter["intervals"].append((l, r)) + + K = self.parameter["K"] = random.randint(1, max(min(r - l for l, r in self.parameter["intervals"]), 1)) + assert K > 0, "K should be greater than 0" + + + intervals = self.parameter["intervals"].copy() + intervals.sort(key = lambda x : x[0]) + + Q = [] + ans = 0 + + for l, r in intervals : + if r - l >= K : + while Q and Q[0] < l + K : + heapq.heappop(Q) + ans += pow(2, len(Q)) + heapq.heappush(Q, r) + + self.parameter["reference_answer"] = ans + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + K = self.parameter["K"], + intervals = "\n".join(["[{}, {}]".format(l, r) for l, r in self.parameter["intervals"]]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/bounded_mean_subarray_counting/__init__.py b/server/Gym/environments/bounded_mean_subarray_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..153d792693758b4efa1f4d2bed6c4c2f0c3b634a --- /dev/null +++ b/server/Gym/environments/bounded_mean_subarray_counting/__init__.py @@ -0,0 +1 @@ +from .environment import BoundedMeanSubarrayCounting_Environment diff --git a/server/Gym/environments/bounded_mean_subarray_counting/environment.py b/server/Gym/environments/bounded_mean_subarray_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..3bd0c08b292b317640cea3c9447463680069876a --- /dev/null +++ b/server/Gym/environments/bounded_mean_subarray_counting/environment.py @@ -0,0 +1,108 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class BoundedMeanSubarrayCounting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Given an array A of length {N}: +{A} + +How many nonempty contiguous subarrays have a mean greater than or equal to {K}? + +**Output Format:** Your final answer should be a single integer — the total number of nonempty subarrays of A whose mean is greater than or equal to {K}.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the BoundedIntervalCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + A = self.parameter["A"] = [random.randint(0, N) for _ in range(N)] + K = self.parameter["K"] = random.randint(min(A), max(A)) + + + v = [0] * (N + 1) + for i in range(1, N + 1) : + v[i] = v[i - 1] + A[i - 1] - K + + tmp = [0] * (N + 1) + + res = 0 + def cdq(l, r) : + nonlocal res + if l >= r : + return + mid = (l + r) // 2 + cdq(l, mid) + cdq(mid + 1, r) + + i, j = l, mid + 1 + sum_left = 0 + for k in range(l, r + 1) : + if j > r or (i <= mid and v[i] <= v[j]) : + sum_left += 1 + tmp[k] = v[i] + i += 1 + else : + res += sum_left + tmp[k] = v[j] + j += 1 + + for k in range(l, r + 1) : + v[k] = tmp[k] + + cdq(0, N) + assert res > 0 + self.parameter["reference_answer"] = res + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = " ".join(map(str, self.parameter["A"])), + K = self.parameter["K"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/bounded_subarray_counting/__init__.py b/server/Gym/environments/bounded_subarray_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..257087ada1b7294165711fbd889ffb64cd6b9452 --- /dev/null +++ b/server/Gym/environments/bounded_subarray_counting/__init__.py @@ -0,0 +1 @@ +from .environment import BoundedSubarrayCounting_Environment diff --git a/server/Gym/environments/bounded_subarray_counting/environment.py b/server/Gym/environments/bounded_subarray_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8f97b4abb57bdd3c587a6ac1eeebd111b47fcf7a --- /dev/null +++ b/server/Gym/environments/bounded_subarray_counting/environment.py @@ -0,0 +1,118 @@ +import random +import bisect +from typing import Optional +from ...environment import VerifiableEnvironment + + +class BoundedSubarrayCounting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Given an array A of length {N}: +{A} + +Repeat array A {M} times to form a new array B of length {N} * {M} = {NM}. In the new array B, how many (nonempty) contiguous subarrays have a total sum less than or equal to {K}? + +**Output Format:** Your final answer should be a single integer — the total number of (nonempty) subarrays in B whose sum is less than or equal to {K}.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the BoundedIntervalCounting_Environment instance. + """ + + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 2, "M should be greater than or equal to 2" + + A = self.parameter["A"] = [random.randint(1, N) for _ in range(N)] + K = self.parameter["K"] = random.randint(max(A), sum(A) * M) + + + # build prefix sums s[0..n] + s = [0] * (N + 1) + for i in range(1, N + 1): + s[i] = s[i - 1] + A[i - 1] + total = s[N] + + ans = 0 + # precompute m*(m-1)/2 * n for the “full‐span” case + mmn = M * (M - 1) // 2 * N + + for i in range(1, N + 1) : + si = s[i] + if si < K : + # how many *full* repeats we can append after position i without exceeding k + d = (K - si) // total + if d < M - 1 : + # contributions from using 0,1,...,d full copies + ans += i * (d + 1) + d * (d + 1) // 2 * N + + # partial in the (d+1)-th copy + e = (K - si) % total + # find smallest j with s[j] >= total - e + j = bisect.bisect_left(s, total - e) + # for each of the remaining (m-1-d) copies, we can take up to (n-j) more elements + ans += (i + d * N + (N - j)) * (M - 1 - d) + else : + # we can take all m copies plus all possible “full-span” subarrays + ans += i * M + mmn + else : + # even the prefix [1..i] exceeds k, so only shorter endings count + # find j so that s[i] - s[j] <= k => s[j] >= s[i] - k + j = bisect.bisect_left(s, si - K) + ans += (i - j) * M + + self.parameter["reference_answer"] = ans + assert ans > 0 + + def _prompt_generate(self) -> str : + N, M = self.parameter["N"], self.parameter["M"] + return self.prompt_template.format( + N = N, + M = M, + NM = N * M, + A = " ".join(map(str, self.parameter["A"])), + K = self.parameter["K"], + ) + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/box_scheduling/__init__.py b/server/Gym/environments/box_scheduling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..25d36be8526795116ca1abee76d070cf46655a07 --- /dev/null +++ b/server/Gym/environments/box_scheduling/__init__.py @@ -0,0 +1 @@ +from .environment import BoxScheduling_Environment diff --git a/server/Gym/environments/box_scheduling/environment.py b/server/Gym/environments/box_scheduling/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f9ff141ef2c440b86b2b2310698e11b3eb9ad9f4 --- /dev/null +++ b/server/Gym/environments/box_scheduling/environment.py @@ -0,0 +1,160 @@ +import math +import random +from typing import Optional, List +from bisect import bisect_left, insort +from ...environment import VerifiableEnvironment + + +class BoxScheduling_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3207 + prompt_template = \ +r"""You are given a sequence C: {C} + +Now, please determine two non-negative integer sequences X[1], ..., X[{N_minus_1}] and Y[1], ..., Y[{N_minus_1}] such that: +- For 1 ≤ i ≤ {N_minus_1}, define: Pos[i] = (C[i] + {D} × X[i] + Y[i]) mod {N} +- The values Pos[1], ..., Pos[{N_minus_1}] must be all distinct. +- No Pos[i] can be equal to {S}. +- Among all valid solutions: + + First, minimize the lexicographical order of sequence Y. + + If multiple solutions have the same Y, then choose the one with the smallest lexicographical order of X. + +**Output Format:** A single line containing Pos[1], ..., Pos[{N_minus_1}], separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the BoxScheduling_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is requid in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 3, "MAX_N should be greater than or equal to 3" + + N = self.parameter["N"] = random.randint(3, MAX_N) + C = self.parameter["C"] = [random.randint(0, N - 1) for _ in range(N - 1)] + for iter in range(int(N ** 0.5)) : + D = self.parameter["D"] = random.randint(1, N - 1) + if math.gcd(D, N) > 1 : + break + S = self.parameter["S"] = random.randint(0, N - 1) + + + c = [0] + C + + # 2) DSU for “next free” in a D‐cycle + parent = list(range(N)) + def find(x): + while parent[x] != x: + parent[x] = parent[parent[x]] + x = parent[x] + return x + + # 3) prepare the multiset st of (residue_mod_G, count) + G = math.gcd(D, N) + con = N // G + tar = S % G + + # st will be a sorted list of (residue, remaining_slots) + st = [] + # we'll fill p[] as we go + p = [0] * N + + # initialize + for r in range(G): + if r != tar: + # all con slots available + insort(st, (r, con)) + else: + # reserve one for the empty slot at i=0 + p[0] = S + # mark S as used by linking it to (S+D)%N + parent[S] = find((S + D) % N) + # if there are more in this class, keep (con-1) + if con > 1: + insort(st, (r, con - 1)) + + # 4) assign positions for boxes 1..N-1 + for i in range(1, N): + key = c[i] % G + + # find the first entry in st with residue >= key + idx = bisect_left(st, (key, -1)) + if idx == len(st): + # wrap around to the smallest residue + idx = 0 + + r, cnt = st.pop(idx) + # if more remain in this residue‐class, put it back + if cnt > 1: + insort(st, (r, cnt - 1)) + + # compute the base position before DSU‐skipping + if r >= key: + j = (c[i] + (r - key)) % N + else: + # jump up one multiple of G + j = (((c[i] // G) + 1) * G + r) % N + + # find the actual next free slot in its D‐cycle + pj = find(j) + p[i] = pj + # mark pj used + parent[pj] = find((pj + D) % N) + + self.parameter["gold_answer"] = p[1 :] + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["gold_answer"])) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + C = " ".join("C[{}]={}".format(i + 1, Ci) for i, Ci in enumerate(self.parameter["C"])), + D = self.parameter["D"], + S = self.parameter["S"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + Pos = processed_result + if len(Pos) != self.parameter["N"] - 1 : + return self.rewards["invalid_solution"] + if set(Pos) != set(range(self.parameter["N"])) - {self.parameter["S"]} : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], Pos)) / (self.parameter["N"] - 1)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == Pos) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/bridge/__init__.py b/server/Gym/environments/bridge/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c5d1faac814b548311eabf00e666ac17f4051367 --- /dev/null +++ b/server/Gym/environments/bridge/__init__.py @@ -0,0 +1 @@ +from .environment import Bridge_Environment diff --git a/server/Gym/environments/bridge/environment.py b/server/Gym/environments/bridge/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..148141c38b4b7118d63aaae8deb22f125ec611f7 --- /dev/null +++ b/server/Gym/environments/bridge/environment.py @@ -0,0 +1,161 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Bridge_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices labeled from 0 to {N_minus_1}. The graph contains the following undirected edges: +{edges} + +Your task is to find all edges (u, v) such that removing the edge (u, v) from the graph would disconnect vertices u and v (which are initially connected). + +**Output Format:** Assuming the edges are (u_1, v_1), (u_2, v_2), ..., (u_k, v_k), your final answer should be a single line containing `u_1 v_1 u_2 v_2 ... u_k v_k`, where the vertices are separated by spaces. Example: {two_edges} (do **NOT** include quotes or backticks).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(found/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the CutEdge_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 1" + + assert "component_num" in self.parameter, "component_num is required in parameter" + component_num = self.parameter["component_num"] + assert 2 <= component_num <= N, "component_num should be between 2 and N" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + while True : + components = [random.randint(0, component_num - 1) for vertex in range(N)] + if len(set(components)) >= 2 : + break + + component2vertices = [[] for _ in range(component_num)] + for vertex, component in enumerate(components) : + component2vertices[component].append(vertex) + + edges = self.parameter["edges"] = [] + remaining_edges = [] + + previous_vertices = [] + for component in range(component_num) : + vertices = component2vertices[component] + if len(vertices) == 0 : + continue + if previous_vertices : + u = random.choice(previous_vertices) + v = random.choice(vertices) + edges.append((min(u, v), max(u, v))) + for u in vertices : + for v in vertices : + if u < v : + remaining_edges.append((u, v)) + previous_vertices += vertices + + num_edges = int(edge_density * N * (N - 1) / 2) + if len(edges) < num_edges : + edges += random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + + adj = [[] for _ in range(N)] + for u, v in edges : + adj[u].append(v) + adj[v].append(u) + + disc = [-1] * N + low = [0] * N + timer = 0 + bridges = set() + + def dfs(u : int, parent : int) : + nonlocal timer + disc[u] = low[u] = timer + timer += 1 + for v in adj[u] : + if v == parent : + continue + if disc[v] == -1 : + dfs(v, u) + low[u] = min(low[u], low[v]) + if low[v] > disc[u] : + bridges.add((min(u, v), max(u, v))) + else : + low[u] = min(low[u], disc[v]) + + for u in range(N) : + if disc[u] == -1 : + dfs(u, -1) + + self.parameter["bridges"] = bridges = list(bridges) + assert len(bridges) > 0, "There should be at least one bridge" + self.parameter["reference_answer"] = " ".join("{} {}".format(u, v) for u, v in bridges) + + def _prompt_generate(self) -> str : + edges = self.parameter["edges"] + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in edges), + two_edges = " ".join("{} {}".format(u, v) for u, v in edges[: 2]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + bridges = processed_result + if len(bridges) % 2 != 0 : + return self.rewards["wrong_format"] + bridges = [(min(bridges[i], bridges[i + 1]), max(bridges[i], bridges[i + 1])) for i in range(0, len(bridges), 2)] + + if len(bridges) != len(set(bridges)) : + return self.rewards["invalid_solution"] + bridges = set(bridges) + + gold_bridges = set(map(tuple, self.parameter["bridges"])) + if not (bridges <= gold_bridges) : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "(found/all)^beta" : + return self.rewards["rewarding_weight"] * ((len(bridges) / len(gold_bridges)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "found=all" : + return self.rewards["rewarding_weight"] * (len(bridges) == len(gold_bridges)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/bubble_swap_lower_bound_permutation_counting/__init__.py b/server/Gym/environments/bubble_swap_lower_bound_permutation_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d8a5f6e2ac8182bc7c7071962c75b7cb7ac13fa9 --- /dev/null +++ b/server/Gym/environments/bubble_swap_lower_bound_permutation_counting/__init__.py @@ -0,0 +1 @@ +from .environment import BubbleSwapLowerBound_PermutationCounting_Environment diff --git a/server/Gym/environments/bubble_swap_lower_bound_permutation_counting/environment.py b/server/Gym/environments/bubble_swap_lower_bound_permutation_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d7c3b59586382d7f4294147e00da1e91eb05160f --- /dev/null +++ b/server/Gym/environments/bubble_swap_lower_bound_permutation_counting/environment.py @@ -0,0 +1,126 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class BubbleSwapLowerBound_PermutationCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4769 + prompt_template = \ +r"""Consider bubble sort on a permutation p[1..{N}] using the standard double loop: +``` +for i = 1 to N: + for j = 1 to N-1: + if p[j] > p[j+1]: swap p[j], p[j+1] +``` +It is known that the number of swaps performed by this algorithm is at least LB(p) = (abs(1 - p[1]) + abs(2 - p[2]) + ... + abs(N - p[N])) / 2. Tell me the number of permutations p of 1, 2, ..., {N} that satisfy BOTH: +1) The bubble sort swap count equals the lower bound: swaps(p) = LB(p). +2) p is lexicographically strictly greater than the given permutation P: {P}""" + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the BubbleSwapLowerBound_PermutationCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + self.parameter["P"] = P = list(range(1, N + 1)) + random.shuffle(P) + + + q = P.copy() + + # Build Pascal triangle up to 2*N (inclusive), no modulo during building + max_row = 2 * N + C = [] + for i in range(max_row + 1): + row = [0] * (i + 1) + row[0] = 1 + row[-1] = 1 + for j in range(1, i): + row[j] = C[i - 1][j - 1] + C[i - 1][j] + C.append(row) + + def comb(n, m): + if n < 0 or m < 0 or m > n: + return 0 + # n <= max_row should always hold given how F is used + return C[n][m] + + def F(i, j): + # i, j are 0/1-based consistent with the original usage: + # F(i-1, max(mx, v) + 1) in the loop with i from 1..N + if not (i <= j <= N): + return 0 + x = 2 * N - i - j - 1 + a = N - i - 1 + b = N - j - 2 + return comb(x, a) - comb(x, b) + + vis = [False] * (N + 2) # 1..N used; N+1 safe guard + ans = 0 + mx = 0 + mn = 1 + flag = False + + for i in range(1, N + 1): + v = q[i - 1] + if flag: + continue + ans += F(i - 1, max(mx, v) + 1) + if mx > v and v > mn: + flag = True + mx = max(mx, v) + vis[v] = True + while mn <= N and vis[mn]: + mn += 1 + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + P = ", ".join("P[{}]={}".format(i, Pi) for i, Pi in enumerate(self.parameter["P"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if processed_result == 0 : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/bucket_sorting/__init__.py b/server/Gym/environments/bucket_sorting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..479cbfbd53a421b4f0e7b89293ff93320a64556a --- /dev/null +++ b/server/Gym/environments/bucket_sorting/__init__.py @@ -0,0 +1 @@ +from .environment import BucketSorting_Environment diff --git a/server/Gym/environments/bucket_sorting/environment.py b/server/Gym/environments/bucket_sorting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..a1df4118f453b575b869e0307706d2e9f91b654c --- /dev/null +++ b/server/Gym/environments/bucket_sorting/environment.py @@ -0,0 +1,76 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class BucketSorting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given the following array: {array} + +Please find the number that appears **most frequently** in the array. If there are multiple numbers with the same highest frequency, you may output **any one** of them.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the BucketSoring_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_answer": invalid_answer, + "correct_answer": correct_answer, + "wrong_answer": wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "MAX" in self.parameter, "MAX is required in parameter" + MAX = self.parameter["MAX"] + assert MAX >= 1, "MAX should be greater than or equal to 1" + + + self.parameter["array"] = [random.randint(0, MAX) for _ in range(N)] + + self.parameter["value2count"] = {} + for value in self.parameter["array"] : + if value not in self.parameter["value2count"] : + self.parameter["value2count"][value] = 0 + self.parameter["value2count"][value] += 1 + + self.parameter["reference_answer"] = max(self.parameter["value2count"].items(), key = lambda x : x[1])[0] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(array = " ".join(map(str, self.parameter["array"]))) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result not in self.parameter["value2count"] : + return self.rewards["invalid_answer"] + + if self.parameter["value2count"][processed_result] == max(self.parameter["value2count"].values()) : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/campfire_party/__init__.py b/server/Gym/environments/campfire_party/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..980d1198fc34802dfa39103570fa8d05f52dc56c --- /dev/null +++ b/server/Gym/environments/campfire_party/__init__.py @@ -0,0 +1 @@ +from .environment import CampfireParty_Environment diff --git a/server/Gym/environments/campfire_party/environment.py b/server/Gym/environments/campfire_party/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..44d6a4d39cfd12b4171239ecd347c125ba9c9dbb --- /dev/null +++ b/server/Gym/environments/campfire_party/environment.py @@ -0,0 +1,226 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class CampfireParty_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1053 + prompt_template = \ +r"""There are {N} students labeled from `0` to `{N_minus_1}`. At the beginning, they are sitting in a **circle** in the order: `0, 1, ..., {N_minus_1}`. Each student has **two specific friends** they want to sit next to. Your task is to rearrange the students around the circle so that **every student is adjacent to both of their desired neighbors**. +{desired_neighbors} + +To achieve this, you may perform a series of operations. Each operation is represented as a tuple `(b_1, b_2, ..., b_m)`, where: +- The student `b_1` moves to the position of `b_2`, `b_2` moves to the position of `b_3`, ..., and `b_m` moves to the position of `b_1`. +- The cost of an operation is equal to the number of students involved (`m`). +- No student may appear more than once in a single operation. + +Your goal is to achieve the desired circular arrangement using the **minimum total cost** across all operations. + +**Output Format:** +Your final answer should contain K lines, where K is the number of operations you perform. The K lines should each describe one operation: a space-separated list of the students involved in that operation, in the order `(b_1, b_2, ..., b_m)`. +Example (do **NOT** include the backticks or quotes): +``` +0 1 2 +1 2 +2 3 +``` +This means: +- There are 3 operations, +- The first operation rotates students 0 → 1 → 2 → 0, +- The second rotates (swaps) students 1 ↔ 2, +- The third rotates (swaps) students 2 ↔ 3, +- And the total cost is `3 + 2 + 2 = 7`. +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(gold/answer)^beta", rewarding_beta : float = +3.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the CampfireParty_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + permutation = list(range(N)) + random.shuffle(permutation) + + adjacent = self.parameter["desired_neighbors"] = [None] * N + for i, student in enumerate(permutation) : + a, b = permutation[(i - 1 + N) % N], permutation[(i + 1) % N] + adjacent[student] = (a, b) + + for student, (a, b) in enumerate(adjacent) : + assert student in adjacent[a], f"Student {student} is not adjacent to {a}" + assert student in adjacent[b], f"Student {student} is not adjacent to {b}" + + + permutation = [] + x, parent = 0, -1 + while True : + if x == 0 and parent != -1 : + break + permutation.append(x) + for y in adjacent[x] : + assert y is not None + if y == parent : + continue + x, parent = y, x + break + + assert len(permutation) == N, "Permutation length should be equal to N" + + def solve() : + target = permutation.copy() + positions = [None] * N + for i, p in enumerate(target) : + positions[p] = i + + counting = {} + for i, position in enumerate(positions) : + diff = (position - i + N) % N + counting[diff] = counting.get(diff, 0) + 1 + optimal_diff = max(counting, key = lambda x : counting[x]) + + start = [(i - optimal_diff) % N for i in range(N)] + for i, p in enumerate(start) : + positions[p] = i + + target_positions = [None] * N + for i, p in enumerate(target) : + target_positions[p] = i + + cycles = [] + + point = [None] * N + for s, position, target_position in zip(range(N), positions, target_positions) : + if position == target_position : + continue + point[s] = start[target_position] + + visited = [False] * N + for s in range(N) : + if visited[s] : + continue + if point[s] is None : + continue + cycle = [] + x = s + while True : + cycle.append(x) + visited[x] = True + x = point[x] + if x == s : + break + cycles.append(cycle) + + def operation(cycle) : + assert len(cycle) >= 2 + assert len(cycle) == len(set(cycle)) + new_positions = [positions[i] for i in cycle] + new_positions = new_positions[1 :] + [new_positions[0]] + for i, new_position in zip(cycle, new_positions) : + start[new_position] = i + positions[i] = new_position + return len(cycle) + + cost = sum(operation(cycle) for cycle in cycles) + + for s, t in zip(start, target) : + assert s == t + for i, p in enumerate(start) : + assert positions[p] == i + + return cost, cycles + + cost, cycles = solve() + permutation.reverse() + candidate_cost, candidate_cycles = solve() + if cost > candidate_cost : + cost, cycles = candidate_cost, candidate_cycles + + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, cycle)) for cycle in cycles) + self.parameter["reference_answer_cost"] = cost + assert cost == sum(len(cycle) for cycle in cycles) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + desired_neighbors = "\n".join("Student {} prefers neighbors: {} and {}".format(student, a, b) for student, (a, b) in enumerate(self.parameter["desired_neighbors"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + cycles = [] + for line in answer.splitlines() : + line = line.strip() + if line : + cycles.append(list(map(int, line.split()))) + return cycles + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + permutation = list(range(self.parameter["N"])) + positions = list(range(self.parameter["N"])) + for cycle in processed_result : + for student in cycle : + if not (0 <= student < self.parameter["N"]) : + return self.rewards["invalid_solution"] + if len(cycle) == 1 : + continue + if len(cycle) != len(set(cycle)) : + return self.rewards["invalid_solution"] + + new_positions = [positions[i] for i in cycle] + new_positions = new_positions[1 :] + [new_positions[0]] + for i, new_position in zip(cycle, new_positions) : + permutation[new_position] = i + positions[i] = new_position + for i, p in enumerate(permutation) : + assert positions[p] == i + + for student, (a, b) in enumerate(self.parameter["desired_neighbors"]) : + p, pa, pb = positions[student], positions[a], positions[b] + if pa not in ((p - 1 + self.parameter["N"]) % self.parameter["N"], (p + 1) % self.parameter["N"]) : + return self.rewards["unsuccessful_solution"] + if pb not in ((p - 1 + self.parameter["N"]) % self.parameter["N"], (p + 1) % self.parameter["N"]) : + return self.rewards["unsuccessful_solution"] + + cost = sum(len(cycle) for cycle in processed_result) + gold = self.parameter["reference_answer_cost"] + assert gold <= cost, "cost should be greater than or equal to reference_answer_cost" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if cost == 0 : + return self.rewards["rewarding_weight"] + return self.rewards["rewarding_weight"] * ((gold / cost) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == cost) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/campsite_puzzle/__init__.py b/server/Gym/environments/campsite_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cec581436ed5648244756452d343c635be416a09 --- /dev/null +++ b/server/Gym/environments/campsite_puzzle/__init__.py @@ -0,0 +1 @@ +from .environment import CampsitePuzzle_Environment diff --git a/server/Gym/environments/campsite_puzzle/environment.py b/server/Gym/environments/campsite_puzzle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..5265cee3327d4591f013693fe5102ccc6b5a9fc1 --- /dev/null +++ b/server/Gym/environments/campsite_puzzle/environment.py @@ -0,0 +1,174 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class CampsitePuzzle_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} matrix. Each cell contains either '0', '1', or '*' ('*' means the cell is empty). Please fill all '*' cells with either '0' or '1' such that: +1. No two (horizontally or vertically) adjacent cells in a row or column can both contain `1`. +2. The number of `1`s in each row (from top to bottom) is: {row_counts}. +3. The number of `1`s in each column (from left to right) is: {col_counts}. + +The matrix is given in **row-major order**, with each row represented as a string of '0', '1', and '*': +{matrix} + +**Output Format:** Output {N} lines, each containing {M} characters, where each character is either '0' or '1'. The output should match the format of the input (i.e., one row per line, no separators).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the CampsitePuzzle_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + + def generate_matrix(N, M): + + # Initialize the grid with None + grid = [[None] * M for _ in range(N)] + all_cells = [(i, j) for i in range(N) for j in range(M)] + random.shuffle(all_cells) # Shuffle to ensure randomness in placement + + def backtrack(idx): + # If we've filled past the last row, we're done + if idx == len(all_cells): + return True + i, j = all_cells[idx] + + # Try placing 0 or 1 in random order + for v in random.sample(["0", "1"], 2): + # Check adjacency constraints in row (no three in a row) + if j >= 1 and grid[i][j-1] == v == "1": + continue + if j + 1 < M and grid[i][j+1] == v == "1": + continue + + # Check adjacency constraints in column + if i >= 1 and grid[i-1][j] == v == "1": + continue + if i + 1 < N and grid[i+1][j] == v == "1": + continue + + # Place v + grid[i][j] = v + + # Recurse + assert backtrack(idx + 1) + return True + + # Undo placement + grid[i][j] = None + + # No valid value at (i, j): backtrack + return False + + assert backtrack(0), "Failed to generate a valid matrix" + return grid + + matrix = generate_matrix(N, M) + self.parameter["reference_answer"] = "\n".join("".join(row) for row in matrix) + + self.parameter["row_counts"] = [sum(int(cell == "1") for cell in row) for row in matrix] + self.parameter["col_counts"] = [sum(int(matrix[i][j] == "1") for i in range(N)) for j in range(M)] + + assert "sparsity" in self.parameter, "sparsity is required in parameter" + sparsity = self.parameter["sparsity"] + assert 0 < sparsity < 1, "sparsity should be between 0 and 1" + empty_cells = random.sample(range(N * M), max(1, int(N * M * sparsity))) + for cell in empty_cells : + row, column = divmod(cell, M) + matrix[row][column] = '*' + self.parameter["matrix"] = ["".join(row) for row in matrix] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + matrix = "\n".join("".join(map(str, row)) for row in self.parameter["matrix"]), + row_counts = ", ".join(map(str, self.parameter["row_counts"])), + col_counts = ", ".join(map(str, self.parameter["col_counts"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(line.strip()) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N, M = self.parameter["N"], self.parameter["M"] + solution = processed_result + + if len(solution) != N or any(len(row) != M for row in solution) : + return self.rewards["wrong_format"] + for row in solution : + if not all(c in "01" for c in row) : + return self.rewards["wrong_format"] + + for row, original_row in zip(solution, self.parameter["matrix"]) : + for cell, original_cell in zip(row, original_row) : + if original_cell != '*' and cell != original_cell : + assert (original_cell == '0' and cell == '1') or (original_cell == '1' and cell == '0') + return self.rewards["invalid_solution"] + + delta = [ + (+1, 0), + (-1, 0), + (0, +1), + (0, -1), + ] + for i in range(N) : + for j in range(M) : + for di, dj in delta : + ni, nj = i + di, j + dj + if 0 <= ni < N and 0 <= nj < M and solution[i][j] == solution[ni][nj] == "1" : + return self.rewards["invalid_solution"] + + row_counts = [sum(int(cell == "1") for cell in row) for row in solution] + col_counts = [sum(int(solution[i][j] == "1") for i in range(N)) for j in range(M)] + + satisfied = sum(int(answer == gold) for answer, gold in zip(row_counts, self.parameter["row_counts"])) + \ + sum(int(answer == gold) for answer, gold in zip(col_counts, self.parameter["col_counts"])) + assert satisfied <= N + M, "satisfied should not exceed N + M" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / (N + M)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == (N + M)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/canon/__init__.py b/server/Gym/environments/canon/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8d179ea35d56820087eaac91123addba6424c910 --- /dev/null +++ b/server/Gym/environments/canon/__init__.py @@ -0,0 +1 @@ +from .environment import Canon_Environment diff --git a/server/Gym/environments/canon/environment.py b/server/Gym/environments/canon/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..0ed886671ec55e0d8aae64083d80167fbdec9aac --- /dev/null +++ b/server/Gym/environments/canon/environment.py @@ -0,0 +1,100 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Canon_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3214 + prompt_template = \ +r"""Let S be the set of integers from 1 to {N} ({N} integers in total). + +Please count the number of sequences T[1], ..., T[{M}] such that: +- Each T[i] is a **non-empty subset** of S. +- For each integer x in [1, {N}], the total number of subsets T[i] that contain x is an **even number** (including 0). +- T[1], ..., T[{M}] are **distinct** subsets. + +**Output Format:** Output a single integer — the number of valid sequences T, modulo {MOD}.""" + + + def __init__(self, + max_MOD : int = 1000000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the Canon_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N_M) + M = self.parameter["M"] = random.randint(2, MAX_N_M) + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + # tot = (2^N mod MOD) - 1 + tot = pow(2, N, MOD) - 1 + + # Precompute A[i] = (tot) * (tot - 1) * ... * (tot - (i - 1)) mod MOD + A = [0] * (M + 1) + A[0] = 1 + for i in range(1, M + 1): + # multiply by (tot - (i - 1)), ensure non-negative before mod + A[i] = A[i - 1] * ((tot - (i - 1)) % MOD) % MOD + + # f[i] will count, up to multiplying by i!, the number of valid sequences of i distinct subsets + f = [0] * (M + 1) + f[0] = 1 + # f[1] stays 0 (no way to have one non-empty subset and all pitches even) + for i in range(2, M + 1): + # start with all ways to pick (i-1) distinct subsets + val = A[i - 1] + # subtract those where the i-th subset repeated some previous pattern + val = (val - f[i - 1]) % MOD + # subtract configurations where a pitch appears an odd number of times due to overlaps + # the correction term is f[i-2] * (i-1) * (tot - (i-2)) + correction = f[i - 2] * (i - 1) % MOD * ((tot - (i - 2)) % MOD) % MOD + val = (val - correction) % MOD + f[i] = val + + self.parameter["reference_answer"] = f[M] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"], MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/cantor_expansion/__init__.py b/server/Gym/environments/cantor_expansion/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6a9031b9388172e5600d10f723e2b94d2e2783b0 --- /dev/null +++ b/server/Gym/environments/cantor_expansion/__init__.py @@ -0,0 +1 @@ +from .environment import CantorExpansion_Environment diff --git a/server/Gym/environments/cantor_expansion/environment.py b/server/Gym/environments/cantor_expansion/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..202d0b98eb40a93517252ab378ffeae27c787fc5 --- /dev/null +++ b/server/Gym/environments/cantor_expansion/environment.py @@ -0,0 +1,167 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class CantorExpansion_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3477 + prompt_template = \ +r"""Given a sequence of integers: {A} + +Please count the number of distinct permutations of this sequence that are **lexicographically smaller** than the original sequence. Output a single integer — the number of such permutations modulo {MOD}. +Note: Permutations that only differ by the positions of equal elements are considered the **same**.""" + + def __init__(self, + max_MOD : int = 100000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the CantorExpansion_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + M = random.randint(2, N) + A = self.parameter["A"] = [random.randint(1, M) for _ in range(N)] + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + M = max(A) + + # 1. Factor MOD into its prime factors and compute phi(MOD) + ph = MOD + nt = MOD + p_list = [] + i = 2 + while i * i <= nt: + if nt % i == 0: + p_list.append(i) + ph = ph // i * (i - 1) + while nt % i == 0: + nt //= i + i += 1 + if nt > 1: + p_list.append(nt) + ph = ph // nt * (nt - 1) + pc = len(p_list) + + # 2. Fenwick tree (BIT) for counting how many of the suffix elements are <= a given value + T = [0] * (M + 1) + def bit_add(x): + while x <= M: + T[x] += 1 + x += x & -x + def bit_sum(x): + s = 0 + while x > 0: + s += T[x] + x -= x & -x + return s + + # 3. Arrays to track multiplicative state modulo MOD + iv = [0] * (N + 2) # iv[k] = modular inverse of k (for k co-prime to MOD), filled on the fly + iv[1] = 1 + tp = [0] * pc # exponent counts for each prime in p_list + tc = 1 # current product of all co-prime parts mod MOD + cnt = [0] * (M + 1) # how many times each value appears in the suffix + + ans = 0 + + # Seed with the last element in the permutation + bit_add(A[N-1]) + cnt[A[N-1]] += 1 + + # Process positions from right to left + for idx in range(N - 2, -1, -1): + # w = how many suffix elements are strictly smaller than A[idx] + w = bit_sum(A[idx] - 1) + + # 1) Multiply in the next factorial factor: (suffix length)! + k = (N - 1) - idx + tmp = k + for j, pj in enumerate(p_list): + while tmp % pj == 0: + tmp //= pj + tp[j] += 1 + tc = tc * tmp % MOD + + # 2) Add this element into the BIT and update its count + bit_add(A[idx]) + iv[k + 1] = pow(k + 1, ph - 1, MOD) # inverse of k+1, co-prime part only used later + cnt[A[idx]] += 1 + + # 3) Divide out the new multiplicity factorial factor + tmp = cnt[A[idx]] + for j, pj in enumerate(p_list): + while tmp % pj == 0: + tmp //= pj + tp[j] -= 1 + tc = tc * iv[tmp] % MOD + + # 4) If there are smaller choices w, add w * (remaining permutations) to the rank + if w > 0: + # multiply by w + tmp = w + for j, pj in enumerate(p_list): + while tmp % pj == 0: + tmp //= pj + tp[j] += 1 + tc = tc * tmp % MOD + + # compute the current value = tc * ∏ p_i^{tp_i} mod MOD + cur = tc + for j, pj in enumerate(p_list): + if tp[j]: + cur = cur * pow(pj, tp[j], MOD) % MOD + ans = (ans + cur) % MOD + + # divide back by w to restore state + tmp = w + for j, pj in enumerate(p_list): + while tmp % pj == 0: + tmp //= pj + tp[j] -= 1 + tc = tc * iv[tmp] % MOD + + self.parameter["reference_answer"] = ans % MOD + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(A = ", ".join(map(str, self.parameter["A"])), MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/capital_city_effect/__init__.py b/server/Gym/environments/capital_city_effect/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..add3cd460f573957f5f5c7ecc7aa7ceb7631a8a2 --- /dev/null +++ b/server/Gym/environments/capital_city_effect/__init__.py @@ -0,0 +1 @@ +from .environment import CapitalCityEffect_Environment diff --git a/server/Gym/environments/capital_city_effect/environment.py b/server/Gym/environments/capital_city_effect/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..57aa0513a93f653da9d624541671b1e8b500ae61 --- /dev/null +++ b/server/Gym/environments/capital_city_effect/environment.py @@ -0,0 +1,106 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class CapitalCityEffect_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3754 + prompt_template = r"""Let’s define f(x) as follows, where x is a positive integer in its base-10 representation: +- Divide x into **segments**, where each segment is a maximal substring consisting of the same digit. +- For each segment, compute `digit × (length of segment)^2`. +- Then, f(x) is the **sum** over all segments. +- For example, f(2334222) = 2×1² + 3×2² + 4×1² + 2×3² = 2 + 12 + 4 + 18 = 36, where the segments are `2` (length 1), `33` (length 2), `4` (length 1), and `222` (length 3). + +Please output the sum of f(x) for all integers x in the range [{L}, {R}] (inclusive).""" + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the CapitalCityEffect_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta + } + + + def _generate(self) -> None : + assert "MAX_R" in self.parameter, "MAX_R is required in parameter" + MAX_R = self.parameter["MAX_R"] + assert MAX_R >= 20, "MAX_R should be greater than or equal to 20" + R = self.parameter["R"] = random.randint(20, MAX_R) + L = self.parameter["L"] = random.randint(1, R) + + + def solve(x): + digits = list(map(int, str(x))) + n = len(digits) + # memo for non-tight states: key = (pos, last, length, sum_), value = total houses + dp = {} + + def dfs(pos, last, length, sum_, tight): + # If we've placed all digits, add the final segment's contribution + if pos == n: + return sum_ + (length * length * last if last != -1 else 0) + + # Only memoize when we're not tight + if not tight: + key = (pos, last, length, sum_) + if key in dp: + return dp[key] + + maxd = digits[pos] if tight else 9 + ans = 0 + for d in range(maxd + 1): + if d == last: + # extend current segment + new_sum = sum_ + new_len = length + 1 + else: + # close off previous segment (if any) and start a new one + closed = (length * length * last) if last != -1 else 0 + new_sum = sum_ + closed + new_len = 1 + ans += dfs(pos + 1, d, new_len, new_sum, tight and d == maxd) + + if not tight: + dp[key] = ans + return ans + + return dfs(0, -1, 0, 0, True) + self.parameter["reference_answer"] = solve(R) - solve(L - 1) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(L = self.parameter["L"], R = self.parameter["R"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/card_coloring_counting/__init__.py b/server/Gym/environments/card_coloring_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..92b7e45ddbfe02ee2c2a76185444f18942a76293 --- /dev/null +++ b/server/Gym/environments/card_coloring_counting/__init__.py @@ -0,0 +1 @@ +from .environment import CardColoringCounting_Environment \ No newline at end of file diff --git a/server/Gym/environments/card_coloring_counting/environment.py b/server/Gym/environments/card_coloring_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..089204b11cbb51ccf48e0f47192e262fe687f819 --- /dev/null +++ b/server/Gym/environments/card_coloring_counting/environment.py @@ -0,0 +1,198 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment +from collections import deque + + +class CardColoringCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1446 + prompt_template = \ +r"""You have **{N}** cards, and there are some card shuffling methods. Each shuffle method is described by a row of {N} integers: X_1, X_2, ..., X_{N}. After applying a shuffle method, the card at position i moves to the position X_i. The shuffling methods are as follows: +{shuffling_methods} + +Now, you need to color these cards into **{R}** red cards, **{G}** green cards, and **{B}** blue cards. +Determine how many distinct coloring schemes exist, where two colorings are considered the same if and only if one can be transformed into the other by any combination of the available shuffling methods (shuffles may be applied multiple times and in any order). + +**Output Format:** Your final answer should be a single integer — the total number of distinct coloring schemes, considering equivalence under the shuffling methods.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the CardColoringCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "K" in self.parameter, "K is required in parameter" + K = self.parameter["K"] + assert K >= 0, "K should be greater than or equal to 0" + + # Randomly generate R, G, B such that R + G + B = N + R = random.randint(0, N) + remaining = N - R + G = random.randint(0, remaining) + B = remaining - G + RGB = [R, G, B] + random.shuffle(RGB) + R, G, B = RGB + self.parameter["R"] = R + self.parameter["G"] = G + self.parameter["B"] = B + + # Randomly select K permutations from all N-permutations + # For large N, generating all permutations is infeasible. + # Instead, generate K random shuffling methods (permutations) directly. + selected_perms = [] + while len(selected_perms) < K: + perm = tuple(random.sample(range(1, N + 1), N)) + selected_perms.append(perm) + + self.parameter["shuffling_methods"] = selected_perms + + # Generate the subgroup generated by selected_perms + + def perm_compose(p1, p2): + # Compose two permutations (1-based indexing) + return tuple(p1[p2[i]-1] for i in range(len(p1))) + + # Start with the identity permutation + identity = tuple(range(1, N+1)) + subgroup = set() + queue = deque() + subgroup.add(identity) + queue.append(identity) + + for perm in selected_perms: + perm_tuple = tuple(perm) + if perm_tuple not in subgroup: + subgroup.add(perm_tuple) + queue.append(perm_tuple) + + while queue: + current = queue.popleft() + for perm in selected_perms: + perm_tuple = tuple(perm) + composed = perm_compose(current, perm_tuple) + if composed not in subgroup: + subgroup.add(composed) + queue.append(composed) + + subgroup_perms = [list(perm) for perm in subgroup] + + def solve(n: int, red: int, blue: int, green: int, v: list) -> int: + # Initialize visited array for cycle detection + vis = [False] * (n + 1) + + # dp table dimensions dynamically based on R, G, B counts + # dp[r][b][g] stores the number of ways to arrange r red, b blue, g green items + dp = [[[0 for _ in range(green + 1)] for _ in range(blue + 1)] for _ in range(red + 1)] + + num_cycles = 0 + cycle_sizes = [0] * (n+1) # Stores the length of each cycle found + + # Step 1: Detect cycles in the permutation + for i in range(1, n + 1): + if not vis[i]: + x = i + len_cycle = 0 + while not vis[x]: + len_cycle += 1 + vis[x] = True + x = v[x] # Follow the permutation cycle + num_cycles += 1 + cycle_sizes[num_cycles] = len_cycle + + # Step 2: Initialize DP for no items + dp[0][0][0] = 1 + + # Step 3: Populate DP table using cycle lengths + # Iterate through each detected cycle + for x_idx in range(1, num_cycles + 1): + current_size = cycle_sizes[x_idx] + # Iterate backwards through the DP table to use previous states + for i in range(red, -1, -1): + for j in range(blue, -1, -1): + for k in range(green, -1, -1): + # If current cycle size can be used for red items + if i >= current_size: + dp[i][j][k] += dp[i - current_size][j][k] + # If current cycle size can be used for blue items + if j >= current_size: + dp[i][j][k] += dp[i][j - current_size][k] + # If current cycle size can be used for green items + if k >= current_size: + dp[i][j][k] += dp[i][j][k - current_size] + + return dp[red][blue][green] + + def work(N: int, R: int, G: int, B: int, subgroup_perms: list[list[int]]) -> int: + total_ans = 0 + num_of_perms = len(subgroup_perms) + for perm_list in subgroup_perms: + # Create a 1-indexed permutation list 'v_current' from the input 'perm_list' + v_current = [0] * (N + 1) + for idx in range(N): + v_current[idx + 1] = perm_list[idx] # Adjusting for 1-based indexing + + total_ans += solve(N, R, B, G, v_current) + + # Only add the identity permutation if it is not already in subgroup_perms + v_identity = [i for i in range(1, N + 1)] + if v_identity not in subgroup_perms: + num_of_perms += 1 + v_identity_full = [0] + v_identity # 1-based indexing + total_ans += solve(N, R, B, G, v_identity_full) + + return total_ans // num_of_perms + + self.parameter["reference_answer"] = work(N, R, G, B, subgroup_perms) + assert self.parameter["reference_answer"] > 0 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + R = self.parameter["R"], + G = self.parameter["G"], + B = self.parameter["B"], + shuffling_methods = "\n".join(" ".join(map(str, perm)) for perm in self.parameter["shuffling_methods"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/catalan_number_mod/__init__.py b/server/Gym/environments/catalan_number_mod/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1c64d0ba779c1e9978870d1d8284d51601c989c9 --- /dev/null +++ b/server/Gym/environments/catalan_number_mod/__init__.py @@ -0,0 +1 @@ +from .environment import CatalanNumberMod_Environment diff --git a/server/Gym/environments/catalan_number_mod/environment.py b/server/Gym/environments/catalan_number_mod/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..993012cd1d4eb7bbbae6cfb232092442ed43390b --- /dev/null +++ b/server/Gym/environments/catalan_number_mod/environment.py @@ -0,0 +1,111 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class CatalanNumberMod_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3200 + prompt_template = \ +r"""We define a **valid permutation** of the integers from 1 to 2×{N} (i.e., a permutation A[1], A[2], ..., A[2×{N}]) that satisfies all of the following conditions: +- A[1] < A[3] < ... < A[2×{N} - 1] (all elements at odd indices form a strictly increasing sequence) +- A[2] < A[4] < ... < A[2×{N}] (all elements at even indices form a strictly increasing sequence) +- For all i = 1 to {N}, A[2i - 1] < A[2i] (each adjacent pair forms an increasing pair) + +Please compute the total number of such valid permutations. Output the result modulo {MOD}.""" + + def __init__(self, + max_MOD : int = 1000000000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the CatalanNumberMod_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 2, "MAX_N should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N) + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + limit = 2 * N + + # Linear sieve to compute smallest prime factor (spf) for each number up to 2N + spf = [0] * (limit + 1) + primes = [] + for i in range(2, limit + 1): + if spf[i] == 0: + spf[i] = i + primes.append(i) + for p in primes: + ip = i * p + if p > spf[i] or ip > limit: + break + spf[ip] = p + + # cnt[i] will hold the exponent contribution of i in the product: + # numerator: product of (n+2)*(n+3)*...*(2n) + # denominator: product of 1*2*...*n + cnt = [0] * (limit + 1) + # subtract denominator + for i in range(1, N + 1): + cnt[i] = -1 + # add numerator (skip N+1, since it's neither in numerator nor denominator) + for i in range(N + 2, limit + 1): + cnt[i] = 1 + + # Propagate those counts down to prime factors + for i in range(limit, 1, -1): + if spf[i] < i: + c = cnt[i] + cnt[spf[i]] += c + cnt[i // spf[i]] += c + + # Multiply out primes^cnt[p] mod P + result = 1 + for p in primes: + exp = cnt[p] + if exp: + result = result * pow(p, exp, MOD) % MOD + + self.parameter["reference_answer"] = result + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/check_all_cycle_xor_zero/__init__.py b/server/Gym/environments/check_all_cycle_xor_zero/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f68004f3586ecce10a5235b4b8bbfba289af9a12 --- /dev/null +++ b/server/Gym/environments/check_all_cycle_xor_zero/__init__.py @@ -0,0 +1 @@ +from .environment import CheckAllCycleXorZero_Environment diff --git a/server/Gym/environments/check_all_cycle_xor_zero/environment.py b/server/Gym/environments/check_all_cycle_xor_zero/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..82f890ad8b844c669873edfc0385d39ef6590ef4 --- /dev/null +++ b/server/Gym/environments/check_all_cycle_xor_zero/environment.py @@ -0,0 +1,103 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class CheckAllCycleXorZero_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3907 + prompt_template = \ +r"""We have an **undirected graph** with {N} vertices labeled from `1` to `{N}`. The graph contains the following undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning an undirected edge connects vertex `u` to vertex `v` with weight `w`: +{edges} + +A cycle is defined as a path that starts and ends at the same vertex. Determine whether **every** cycle in the graph has an XOR sum of its edge weights equal to 0; output YES if the condition holds for every cycle in the graph, otherwise output NO.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the CheckAllCycleXorZero_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + weight_range = 2 ** (N * (N - 1) // 2).bit_length() - 1 + + edges = self.parameter["edges"] = [] + + permutations = list(range(1, N + 1)) + random.shuffle(permutations) + XORs = [0] * (N + 1) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v, w = vertex, random.choice(permutations[: index]), random.randint(0, weight_range) + XORs[u] = XORs[v] ^ w + u, v = min(u, v), max(u, v) + edges.append((u, v, w)) + + must_YES = random.choice(["YES", "NO"]) + self.parameter["reference_answer"] = "YES" + + assert "edge_ratio" in self.parameter, "edge_ratio is required in parameter" + edge_ratio = self.parameter["edge_ratio"] + num_edges = int(edge_ratio * N) + if len(edges) < num_edges : + remaining_edges = list(set((u, v) for u in range(1, N + 1) for v in range(u + 1, N + 1)) - set((u, v) for u, v, w in edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + for u, v in remaining_edges : + if must_YES == "YES" : + w = XORs[u] ^ XORs[v] + else : + w = random.randint(0, weight_range) + if (XORs[u] ^ XORs[v]) != w : + self.parameter["reference_answer"] = "NO" + edges.append((u, v, w)) + else : + assert False, "The number of edges should be less than num_edges" + if must_YES == "YES" : + assert self.parameter["reference_answer"] == "YES", "The reference answer should be YES" + random.shuffle(edges) + + for u, v, w in edges : + assert 1 <= u < v <= N + assert 0 <= w <= weight_range, "edge weight should be within the specified range" + assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique" + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result not in ("YES", "NO") : + return self.rewards["invalid_answer"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/cho_hamsters/__init__.py b/server/Gym/environments/cho_hamsters/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4f05b6c87fe88e6b50f66c22ed4464a5cf937505 --- /dev/null +++ b/server/Gym/environments/cho_hamsters/__init__.py @@ -0,0 +1 @@ +from .environment import ChoHamsters_Environment diff --git a/server/Gym/environments/cho_hamsters/environment.py b/server/Gym/environments/cho_hamsters/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..c45c1350bc28b9eae2e8ef56f939e1e83001ce6a --- /dev/null +++ b/server/Gym/environments/cho_hamsters/environment.py @@ -0,0 +1,164 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class ChoHamsters_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3502 + prompt_template = \ +r"""You are given {N} strings, listed below (it is guaranteed that for all i ≠ j, the string S[i] is **NOT** a contiguous substring of S[j]): +{strings} + +Please construct a string T such that the **sum** (for all i) of `counting(T, S[i])` is **at least {M}**, where `counting(T, s)` is the number of (possibly overlapping) occurrences of the string `s` in `T`. +Try your best to **minimize the length** of such a string `T`. Output a single integer — the minimum possible length of `T`.""" + + def __init__(self, + length_multiple_min : int = 2, length_multiple_max : int = 3, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the ChoHamsters_Environment instance. + """ + super().__init__(**kwargs) + + self.length_multiple_min, self.length_multiple_max = length_multiple_min, length_multiple_max + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + while True : + S = self.parameter["S"] = [] + for _ in range(N) : + length = random.randint(N * self.length_multiple_min, N * self.length_multiple_max) + a_probability = random.random() + Si = "".join("a" if random.random() < a_probability else "b" for _ in range(length)) + S.append(Si) + if all(Si not in Sj for i, Si in enumerate(S) for j, Sj in enumerate(S) if i != j) : + break + + assert "MAX_M" in self.parameter, "MAX_M is required in parameter" + MAX_M = self.parameter["MAX_M"] + assert MAX_M >= 1, "MAX_M should be greater than or equal to 1" + + M = self.parameter["M"] = random.randint(1, MAX_M) + + + # Compute prefix-function (KMP) for each string in S + # pi[i][k] = length of longest proper prefix of S[i] which is also a suffix of S[i][:k+1] + pi = [] + for s in S: + L = len(s) + p = [0] * L + j = 0 + for i in range(1, L): + while j > 0 and s[j] != s[i]: + j = p[j-1] + if s[j] == s[i]: + j += 1 + p[i] = j + pi.append(p) + + # Determine an upper bound INF based on maximum possible cost: + # worst case, no overlaps, each added name costs its full length, + # so M * max_len + something. + max_len = max(len(s) for s in S) + INF = M * max_len + 1 + + # Build the transition matrix Tra of size (N+1) x (N+1) + # Node 0 is the start; nodes 1..N correspond to S[0]..S[N-1] + Tra = [[INF] * (N+1) for _ in range(N+1)] + + # From start (0) to each name x: cost = full length of name x + for x in range(1, N+1): + Tra[0][x] = len(S[x-1]) + # From any name x back to start is impossible (set to INF) + # Tra[x][0] already INF + + # Precompute transition costs between names + # Tra[x][y] = extra letters needed to append name y after name x + for x in range(1, N+1): + sx = S[x-1] + len_x = len(sx) + for y in range(1, N+1): + sy = S[y-1] + len_y = len(sy) + # Find overlap: longest suffix of sx matching prefix of sy + j = 0 + # iterate over sx[1..end] (0-based: positions 1..len_x-1) + for i in range(1, len_x): + while j > 0 and sy[j] != sx[i]: + j = pi[y-1][j-1] + if sy[j] == sx[i]: + j += 1 + # j is the overlap length + Tra[x][y] = len_y - j + + # Matrix multiplication in min-plus (tropical) semiring + def mat_mult(A, B): + C = [[INF] * (N+1) for _ in range(N+1)] + for i in range(N+1): + for j in range(N+1): + # we can skip if A[i][j] is INF + aij = A[i][j] + if aij == INF: + continue + row_i = C[i] + bj = B[j] + for k in range(N+1): + v = aij + bj[k] + if v < row_i[k]: + row_i[k] = v + return C + + # Fast exponentiation: compute Ans = Tra^M + # Ans initially Tra^1 + Ans = [row[:] for row in Tra] + exp = M - 1 # we already account for one application of Tra + base = [row[:] for row in Tra] + while exp > 0: + if exp & 1: + Ans = mat_mult(Ans, base) + base = mat_mult(base, base) + exp >>= 1 + + # The answer is the minimum cost from start (0) to any name after M transitions + result = min(Ans[0][1:]) # ignore Ans[0][0] + self.parameter["reference_answer"] = result + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + strings = "\n".join("S[{}]={}".format(i, Si) for i, Si in enumerate(self.parameter["S"])), + M = self.parameter["M"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/cinema/__init__.py b/server/Gym/environments/cinema/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4f4de87de90dfedeaeba275778bc70357b1eb7b6 --- /dev/null +++ b/server/Gym/environments/cinema/__init__.py @@ -0,0 +1 @@ +from .environment import Cinema_Environment diff --git a/server/Gym/environments/cinema/environment.py b/server/Gym/environments/cinema/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..450f870c8dd52d17a0a3f0bf7ea15481e15f1bf2 --- /dev/null +++ b/server/Gym/environments/cinema/environment.py @@ -0,0 +1,79 @@ +import math +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Cinema_Environment(VerifiableEnvironment) : # Source: https://www.luogu.com.cn/problem/P3330 + prompt_template = \ +r"""There are {N} people entering a cinema and {K} numbered seats labeled from 1 to {K}. + +Each person, in order from 1 to {N}, independently picks a random integer L from 1 to {K}, uniformly at random. +- If seat L is unoccupied, they take it. +- If it's taken, they try seat L + 1, then L + 2, ..., up to seat {K}, until they find a free seat. +- If all seats from L to {K} are occupied, the person must stand. + +Please compute the **probability that all {N} people get a seat** (i.e., no one ends up standing). Output the probability as a reduced fraction `A B`, where A/B is the probability and gcd(A, B) = 1. + +**Output Format:** A single line with two integers `A B`, separated by a space — the reduced fraction representing the answer.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the Cinema_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_K" in self.parameter, "MAX_N_K is required in parameter" + MAX_N_K = self.parameter["MAX_N_K"] + assert MAX_N_K >= 2, "MAX_N_K should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N_K) + K = self.parameter["K"] = random.randint(N, MAX_N_K) + assert N <= K, "N should be less than or equal to K" + + + ans1 = ((K + 1) ** (N - 1)) * (K - N + 1) + ans2 = K ** N + tmp = math.gcd(ans1,ans2) + ans1 //= tmp + ans2 //= tmp + self.parameter["gold_answer"] = (ans1, ans2) + self.parameter["reference_answer"] = "{} {}".format(ans1, ans2) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + a, b = map(int, answer.split()) + return a, b + except : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, tuple) and len(processed_result) == 2, "Processed result should be a tuple of two integers" + if processed_result == self.parameter["gold_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/circuit/__init__.py b/server/Gym/environments/circuit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..512d6f78647d72927c50f41784c0c2817767f5b5 --- /dev/null +++ b/server/Gym/environments/circuit/__init__.py @@ -0,0 +1 @@ +from .environment import Circuit_Environment diff --git a/server/Gym/environments/circuit/environment.py b/server/Gym/environments/circuit/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b8668d49290d5b7a6a95506f592062deeb33547e --- /dev/null +++ b/server/Gym/environments/circuit/environment.py @@ -0,0 +1,137 @@ +import random +from typing import Optional, List, Dict +from ...environment import VerifiableEnvironment + + +class Circuit_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""There are {N} boolean (0/1) values x[0], x[1], ..., x[{N_minus_1}]. + +Given a Boolean expression (where `&` is bitwise AND, `|` is bitwise OR, and `^` is bitwise XOR): {expression} +Please find any solution x[0], x[1], ..., x[{N_minus_1}] that makes the expression evaluate to 1. + +Output Format: Your final answer should be a single line containing x[0], x[1], ..., x[{N_minus_1}], separated by **spaces**. +Example: `{N_boolean}` (do **NOT** include quotes or backticks).""" + + def __init__(self, + binary_ops_probs : Dict[str, float] = None, + wrong_format : float = -1.0, invalid_solution : float = -0.5, correct_solution : float = +1.0, wrong_solution : float = 0.0, + **kwargs) : + """ + Initialize the Circuit_Environment instance. + """ + super().__init__(**kwargs) + + if binary_ops_probs is None : + binary_ops_probs = { + "&" : 0.25, + "|" : 0.25, + "^" : 0.5, + } + assert abs(sum(binary_ops_probs.values()) - 1.0) < 1E-8, "binary_ops_probs values should sum to 1" + self.binary_ops_probs = binary_ops_probs + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "correct_solution" : correct_solution, + "wrong_solution" : wrong_solution, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= N, "M should be greater than or equal to N" + + binary_ops, binary_probs = zip(*self.binary_ops_probs.items()) + + while True : + x = [random.randint(0, 1) for i in range(N)] + + def build_tree(n) : + if n == 1 : + index = random.randint(0, N - 1) + return index, x[index] + left_n = random.randint(1, n - 1) + right_n = n - left_n + left_tree, left_value = build_tree(left_n) + right_tree, right_value = build_tree(right_n) + op = random.choices(binary_ops, weights = binary_probs, k = 1)[0] + if op == "&" : + value = left_value & right_value + elif op == "|" : + value = left_value | right_value + elif op == "^" : + value = left_value ^ right_value + else : + raise ValueError("Invalid operator") + return (left_tree, op, right_tree), value + tree, value = build_tree(M) + + if value == 1 : + self.parameter["reference_answer"] = " ".join(map(str, x)) + self.parameter["tree"] = tree + break + + def build_expression(self, tree) : + if isinstance(tree, int) : + return "x[{}]".format(tree) + left_tree, op, right_tree = tree + return "({} {} {})".format(self.build_expression(left_tree), op, self.build_expression(right_tree)) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + expression = self.build_expression(self.parameter["tree"])[1 : -1], + N_boolean = " ".join(str(i % 2) for i in range(self.parameter["N"])), + ) + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + x = processed_result + if len(x) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(xi in (0, 1) for xi in x) : + return self.rewards["invalid_solution"] + + def compute(tree) : + if isinstance(tree, int) : + return x[tree] + left_tree, op, right_tree = tree + left_value = compute(left_tree) + right_value = compute(right_tree) + if op == "&" : + return left_value & right_value + elif op == "|" : + return left_value | right_value + elif op == "^" : + return left_value ^ right_value + else : + raise ValueError("Invalid operator") + + if compute(self.parameter["tree"]) == 1 : + return self.rewards["correct_solution"] + else : + return self.rewards["wrong_solution"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/circulating_decimal_counting/__init__.py b/server/Gym/environments/circulating_decimal_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..08aad2a11b86f06037a96b683be4efd0786e93d0 --- /dev/null +++ b/server/Gym/environments/circulating_decimal_counting/__init__.py @@ -0,0 +1 @@ +from .environment import CirculatingDecimalCounting_Environment diff --git a/server/Gym/environments/circulating_decimal_counting/environment.py b/server/Gym/environments/circulating_decimal_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..83918c8d64377b98594c098ab1600106863b872f --- /dev/null +++ b/server/Gym/environments/circulating_decimal_counting/environment.py @@ -0,0 +1,147 @@ +from math import gcd +from functools import lru_cache +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class CirculatingDecimalCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1587 + prompt_template = \ +r"""Please count how many **distinct pure repeating decimals** (in terms of numeric value) exist in base ${K}$, that can be written as a reduced fraction $\frac{x}{y}$ where $1 \le x \le {N}$ and $1 \le y \le {M}$, with $x$ and $y$ being integers. +A number is called a **pure repeating decimal** if and only if it can be written in the form of $$a.\dot{c_1} c_2 c_3 \dots c_{p - 1} \dot{c_p}$$, where $a$ is an integer, $p \ge 1$, and each $c_i$ ($1 \le i \le p$) is a digit in base ${K}$. + +Examples: +- In base 10, $0.454545\ldots = 0.\dot{4}\dot{5}$ is a pure repeating decimal; it can be written as $\frac{5}{11}$ or $\frac{10}{22}$. +- In contrast, $0.166666\ldots = 0.1\dot{6}$ is **not** pure repeating in base 10; it can be written as $\frac{1}{6}$. + +Note: +- **Integers are considered pure repeating**, because their decimal part can be represented as a repeating sequence of 0s. +- **Finite decimals with non-zero fractional parts** are **not** considered pure repeating. + +**Output Format:** Your final answer should be a single integer — the total number of such distinct pure repeating decimals.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the CirculatingDecimalCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 1, "MAX_N should be greater than or equal to 1" + N = self.parameter["N"] = random.randint(1, MAX_N) + + assert "MAX_M" in self.parameter, "MAX_M is required in parameter" + MAX_M = self.parameter["MAX_M"] + assert MAX_M >= 1, "MAX_M should be greater than or equal to 1" + M = self.parameter["M"] = random.randint(1, MAX_M) + + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K >= 2, "MAX_K should be greater than or equal to 2" + K = self.parameter["K"] = random.randint(2, MAX_K) + + + LIM = min(M, max(K, int(M ** 0.5) + 1)) + + g = [0] * (K + 1) + for i in range(1, K + 1): + g[i] = g[i - 1] + (1 if gcd(i, K) == 1 else 0) + + mu = [0] * (LIM + 1) + is_comp = [False] * (LIM + 1) + f = [0] * (LIM + 1) + primes = [] + + mu[1] = 1 + f[1] = 1 + + def G(x): + return (x // K) * g[K] + g[x % K] + + for i in range(2, LIM + 1): + if not is_comp[i]: + primes.append(i) + mu[i] = -1 + for p in primes: + ip = i * p + if ip > LIM: + break + is_comp[ip] = True + if i % p == 0: + mu[ip] = 0 + break + else: + mu[ip] = -mu[i] + f[i] = f[i - 1] + mu[i] * (G(i) - G(i - 1)) + + @lru_cache(None) + def F(x): + if x <= LIM: + return f[x] + res = 1 + l = 2 + while l <= x: + t = x // l + r = x // t + res -= F(t) * (G(r) - G(l - 1)) + l = r + 1 + return res + + ans = 0 + l = 1 + up = min(N, M) + while l <= up: + n_div = N // l + m_div = M // l + r = min(N // n_div, M // m_div) + ans += n_div * G(m_div) * (F(r) - F(l - 1)) + l = r + 1 + + assert ans > 0 + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.replace(r"{K}", str(self.parameter["K"])) \ + .replace(r"{N}", str(self.parameter["N"])) \ + .replace(r"{M}", str(self.parameter["M"])) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/circulating_grid/__init__.py b/server/Gym/environments/circulating_grid/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0e1408f5b6bd4073681d0fb4e9c6cb5ad281a590 --- /dev/null +++ b/server/Gym/environments/circulating_grid/__init__.py @@ -0,0 +1 @@ +from .environment import CirculatingGrid_Environment diff --git a/server/Gym/environments/circulating_grid/environment.py b/server/Gym/environments/circulating_grid/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..58ff6f7f25ac0b75bfc7771c4d2495a1eadfd117 --- /dev/null +++ b/server/Gym/environments/circulating_grid/environment.py @@ -0,0 +1,232 @@ +import random +from collections import deque +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class CirculatingGrid_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3965 + prompt_template = \ +r"""Consider a {R} × {C} grid, where each cell has coordinates (i, j) (0 ≤ i < {R}, 0 ≤ j < {C}). Each cell contains one of the characters `L`, `R`, `U`, or `D`, meaning: +- `L`: moves to (i, (j - 1) MOD {C}) +- `R`: moves to (i, (j + 1) MOD {C}) +- `U`: moves to ((i - 1) MOD {R}, j) +- `D`: moves to ((i + 1) MOD {R}, j) +Here, (-1 MOD N) = N - 1. + +You are given such a grid: +{grid} + +Modify any number of cells so that the resulting grid satisfies the following condition: Starting from any cell, it must be possible to eventually return to the same cell (simply standing there at the beginning does not count). Can you use as small the number of changes (i.e., number of cells modified) as possible? Output the modified grid in the same format — exactly {R} lines, each containing {C} characters (`L`, `R`, `U`, or `D`) with **no separators**.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the CirculatingGrid_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_R_C" in self.parameter, "MAX_R_C is required in parameter" + MAX_R_C = self.parameter["MAX_R_C"] + assert MAX_R_C >= 3, "MAX_R_C must be at least 3" + + R, C = self.parameter["R"], self.parameter["C"] = random.randint(2, MAX_R_C), random.randint(2, MAX_R_C) + + LRUD_distribution = [random.randint(1, R * C) for _ in range(4)] + grid = self.parameter["grid"] = [[random.choices(['L', 'R', 'U', 'D'], weights = LRUD_distribution)[0] for _ in range(C)] for _ in range(R)] + + + # Directions: L, R, U, D + DX = [0, 0, -1, 1] # row delta + DY = [-1, 1, 0, 0] # col delta + DIR_ID = {'L': 0, 'R': 1, 'U': 2, 'D': 3} + + class Edge: + __slots__ = ('to', 'rev', 'cap', 'cost') + def __init__(self, to, rev, cap, cost): + self.to = to + self.rev = rev + self.cap = cap + self.cost = cost + + def add_edge(graph, u, v, cap, cost): + graph[u].append(Edge(v, len(graph[v]), cap, cost)) + graph[v].append(Edge(u, len(graph[u]) - 1, 0, -cost)) + + def min_cost_max_flow(graph, N, s, t, INF): + flow = 0 + cost = 0 + dist = [0] * N + inq = [False] * N + prev_node = [-1] * N + prev_edge = [-1] * N + + while True: + # SPFA to find shortest augmenting path by cost + for i in range(N): + dist[i] = INF + inq[i] = False + prev_node[i] = -1 + prev_edge[i] = -1 + dist[s] = 0 + q = deque([s]) + inq[s] = True + + while q: + u = q.popleft() + inq[u] = False + for ei, e in enumerate(graph[u]): + if e.cap > 0: + v = e.to + nd = dist[u] + e.cost + if nd < dist[v]: + dist[v] = nd + prev_node[v] = u + prev_edge[v] = ei + if not inq[v]: + inq[v] = True + q.append(v) + + if prev_node[t] == -1: + break # no more augmenting paths + + # Find bottleneck + addf = INF + v = t + while v != s: + u = prev_node[v] + ei = prev_edge[v] + e = graph[u][ei] + if e.cap < addf: + addf = e.cap + v = u + + # Augment + v = t + while v != s: + u = prev_node[v] + ei = prev_edge[v] + e = graph[u][ei] + e.cap -= addf + graph[v][e.rev].cap += addf + cost += addf * e.cost + v = u + + flow += addf + + return flow, cost + + def compute(): + # MP holds the direction id (0..3) for each cell + MP = [[0] * C for _ in range(R)] + for i in range(R): + for j in range(C): + MP[i][j] = DIR_ID[grid[i][j]] + + n_left = R * C + offset = n_left + s = 2 * n_left + t = s + 1 + N = t + 1 + + # INF derived from input size; safely larger than any possible path cost + INF = R * C * 4 + 5 + + graph = [[] for _ in range(N)] + + # Build edges from each cell (left partition) to its 4 neighbors (right partition) + for i in range(R): + for j in range(C): + u = i * C + j + for k in range(4): + ni = (i + DX[k]) % R + nj = (j + DY[k]) % C + v = offset + (ni * C + nj) + cost = 0 if k == MP[i][j] else 1 + add_edge(graph, u, v, 1, cost) + + # Source to all left nodes; all right nodes to sink + for u in range(n_left): + add_edge(graph, s, u, 1, 0) + for v in range(offset, offset + n_left): + add_edge(graph, v, t, 1, 0) + + _, total_cost = min_cost_max_flow(graph, N, s, t, INF) + return total_cost + + self.parameter["gold_answer"] = compute() + assert self.parameter["gold_answer"] >= 0, "Gold answer must be non-negative" + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + R = self.parameter["R"], + C = self.parameter["C"], + grid = "\n".join("".join(row) for row in self.parameter["grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[str]] : + if answer is not None : + answer = answer.strip() + grid = [] + for line in answer.splitlines() : + line = line.strip() + if line : + grid.append(line) + return grid + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + grid = processed_result + + if len(grid) != self.parameter["R"] : + return self.rewards["wrong_format"] + if not all(len(row) == self.parameter["C"] for row in grid) : + return self.rewards["wrong_format"] + if not all(all(c in "LRUD" for c in row) for row in grid) : + return self.rewards["wrong_format"] + + in_degree = [[0] * self.parameter["C"] for _ in range(self.parameter["R"])] + for i in range(self.parameter["R"]) : + for j in range(self.parameter["C"]) : + if grid[i][j] == "L" : + in_degree[i][(j - 1 + self.parameter["C"]) % self.parameter["C"]] += 1 + elif grid[i][j] == "R" : + in_degree[i][(j + 1) % self.parameter["C"]] += 1 + elif grid[i][j] == "U" : + in_degree[(i - 1 + self.parameter["R"]) % self.parameter["R"]][j] += 1 + elif grid[i][j] == "D" : + in_degree[(i + 1) % self.parameter["R"]][j] += 1 + else : + assert False, "Invalid character in grid" + if not all(in_degree[i][j] == 1 for i in range(self.parameter["R"]) for j in range(self.parameter["C"])) : + return self.rewards["invalid_solution"] + + answer, gold = sum(int(grid[i][j] != self.parameter["grid"][i][j]) for i in range(self.parameter["R"]) for j in range(self.parameter["C"])), self.parameter["gold_answer"] + assert gold <= answer, "Gold answer is greater than the computed answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "Gold answer is non-zero but computed answer is zero" + return self.rewards["rewarding_weight"] * 1.0 + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/cleaning_up/__init__.py b/server/Gym/environments/cleaning_up/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..522b378df6dec43ee02a025e3faff4f38ddc8dab --- /dev/null +++ b/server/Gym/environments/cleaning_up/__init__.py @@ -0,0 +1 @@ +from .environment import CleaningUp_Environment diff --git a/server/Gym/environments/cleaning_up/environment.py b/server/Gym/environments/cleaning_up/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..0bb592098b925ba6eb497121eea129d32b694138 --- /dev/null +++ b/server/Gym/environments/cleaning_up/environment.py @@ -0,0 +1,169 @@ +import math +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class CleaningUp_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2943 + prompt_template = \ +r"""You are given {N} numbers A[1], A[2], ..., A[{N}]. The values are: {A} +You may divide these numbers (in order) into **consecutive non-empty batches**. Let the total number of batches be k, and let end[1], end[2], ..., end[k] (1 ≤ end[1] < end[2] < ... < end[k] = {N}) denote the last index of each batch. This means: +- Batch 1 contains A[1] to A[end[1]] +- Batch 2 contains A[end[1] + 1] to A[end[2]] +- ... +- Batch k contains A[end[k − 1] + 1] to A[end[k]] (with end[k] = {N}) + +Define the cost of a division as follows: +- For each batch i (1 <= i <= k), let K[i] be the number of **distinct** values in that batch. +- The total cost is the sum of K[i]^2 (i.e., the square of K[i]) over all batches. + +Can you find a division that **minimizes the total cost**? + +**Output Format:** +Output a single line: `end[1] end[2] ... end[k]` (space-separated, with `end[k] = {N}`). +Example: `1 2 {N}` means there are 3 batches ending at indices 1, 2, and {N}, respectively.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_beta : float = 5.0, rewarding_weight : float = 1.0, + **kwargs) : + """ + Initialize the CleaningUp_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + while True : + endpoints = random.sample(range(1, N), k = random.randint(1, N - 1)) + endpoints.sort() + endpoints += [N] + + for i in range(len(endpoints) - 1, 0, -1) : + endpoints[i] -= endpoints[i - 1] + + A = self.parameter["A"] = [] + for x in endpoints : + assert x > 0 + number_range = 1 + while (number_range + 1) * (number_range + 1) <= x : + number_range += 1 + number_range = random.sample(range(1, N + 1), k = number_range) + A.extend([random.choice(number_range) for _ in range(x)]) + assert len(A) == N + + + # Read preferences P (1-indexed); set P[0]=0 as a harmless sentinel + P = [0] * (N + 1) + for i in range(1, N + 1): + P[i] = A[i - 1] + + k = int(math.isqrt(N)) # sqrt(N) + # Move-to-front list of last occurrences for up to k+1 distinct foods + last = [-1] * (k + 2) # +2 to be safe for j=k+1 during shifting + last[0] = 0 + + # DP: f[i] = minimal total cost for first i cows + f = [None] * (N + 1) + f[0] = 0 + + for i in range(1, N + 1): + x = P[i] + + # Find position j in move-to-front list for current type (or insertion point) + j = 0 + while j <= k and last[j] != -1 and P[last[j]] != x: + j += 1 + + # Move-to-front: shift [0..j-1] right by one, put i at front + while j > 0: + last[j] = last[j - 1] + j -= 1 + last[0] = i + + # Transition: consider segments ending at i with up to k distinct foods + best = None + j = 1 + while j <= k and last[j] != -1: + prev = f[last[j]] + cand = None if prev is None else prev + j * j + if best is None or (cand is not None and cand < best): + best = cand + j += 1 + + f[i] = best + + self.parameter["gold_answer"] = f[N] + assert self.parameter["gold_answer"] > 0 + + if self.parameter["gold_answer"] < min(N, len(set(A)) ** 2) : + break + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = ", ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if not answer_array : + return None + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N = self.parameter["N"] + + ends = processed_result + if not (1 <= len(ends) <= N) : + return self.rewards["invalid_solution"] + for i in range(len(ends)) : + if not (1 <= ends[i] <= N) : + return self.rewards["invalid_solution"] + if i and not (ends[i - 1] < ends[i]) : + return self.rewards["invalid_solution"] + if ends[-1] != N : + return self.rewards["invalid_solution"] + + A = [None] + self.parameter["A"] + answer = 0 + last = 0 + for end in ends : + K = len(set(A[last + 1 : end + 1])) + answer += K ** 2 + last = end + gold = self.parameter["gold_answer"] + assert 0 < gold <= answer, "Gold answer should be less than or equal to the computed answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/clear_symmetry/__init__.py b/server/Gym/environments/clear_symmetry/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cd291381c3717db76afb6f0f29c812cfbdf26b6d --- /dev/null +++ b/server/Gym/environments/clear_symmetry/__init__.py @@ -0,0 +1 @@ +from .environment import ClearSymmetry_Environment diff --git a/server/Gym/environments/clear_symmetry/environment.py b/server/Gym/environments/clear_symmetry/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..50b9e1900020dd13cd544a8edcf7c29b33eb1a28 --- /dev/null +++ b/server/Gym/environments/clear_symmetry/environment.py @@ -0,0 +1,72 @@ +import random +import math +from typing import Optional +from ...environment import VerifiableEnvironment + + +class ClearSymmetry_Environment(VerifiableEnvironment): + prompt_template = \ +r"""Consider some square matrix A with side n consisting of zeros and ones. There are n rows numbered from 1 to n from top to bottom and n columns numbered from 1 to n from left to right in this matrix. We'll denote the element of the matrix which is located at the intersection of the i-row and the j-th column as A(i, j). + +Let's call matrix A clear if no two cells containing ones have a common side. +Let's call matrix A symmetrical if it matches the matrices formed from it by a horizontal and/or a vertical reflection. Formally, for each pair (i, j) (1 ≤ i, j ≤ n) both of the following conditions must be met: A(i, j) = A(n - i + 1, j) and A(i, j) = A(i, n - j + 1). +Let's define the sharpness of matrix A as the number of ones in it. + +Given integer x = {x}, your task is to find the smallest positive integer n such that there exists a clear symmetrical matrix A with side n and sharpness x. +Please output only the integer n in your response without any other text. +""" + + def __init__(self, + wrong_format: float = -1.0, correct_solution: float = 1.0, incorrect_solution: float = 0.0, + **kwargs): + """ + Initialize the ClearSymmetry_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "correct_solution": correct_solution, + "incorrect_solution": incorrect_solution, + } + + def _generate(self) -> None: + assert "MAX_X" in self.parameter, "MAX_X is required in parameter" + MAX_X = self.parameter["MAX_X"] + assert MAX_X >= 1, "MAX_X should be greater than or equal to 1" + + self.parameter["x"] = random.randint(1, MAX_X) + x = self.parameter["x"] + + # Compute the reference answer using the provided solution, source: https://codeforces.com/contest/201/submission/163120300 + def find_smallest_positive_integer(n: int) -> int: + if n == 3: + return 5 + n = math.ceil(math.sqrt(2*n-1)) + return n + 1-n%2 + + self.parameter["reference_answer"] = find_smallest_positive_integer(x) + + def _prompt_generate(self) -> str: + return self.prompt_template.format(x=self.parameter["x"]) + + def _process(self, answer: Optional[str]) -> Optional[int]: + if answer is not None: + answer = answer.strip() + try: + int_answer = int(answer) + return int_answer + except ValueError: + return None + else: + return None + + def scorer(self, output: str) -> float: + processed_result = self.processor(output) + if processed_result is not None: + if processed_result == self.parameter["reference_answer"]: + return self.rewards["correct_solution"] + else: + return self.rewards["incorrect_solution"] + else: + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/clique_independent_set_partitioning_counting/__init__.py b/server/Gym/environments/clique_independent_set_partitioning_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5625f66da73e71bd58347479cd781f314c6b76a0 --- /dev/null +++ b/server/Gym/environments/clique_independent_set_partitioning_counting/__init__.py @@ -0,0 +1 @@ +from .environment import Clique_IndependentSet_Partitioning_Counting_Environment diff --git a/server/Gym/environments/clique_independent_set_partitioning_counting/environment.py b/server/Gym/environments/clique_independent_set_partitioning_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..304e9d9d24d6924b231f1b554856a4f5b0142dba --- /dev/null +++ b/server/Gym/environments/clique_independent_set_partitioning_counting/environment.py @@ -0,0 +1,207 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Clique_IndependentSet_Partitioning_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3513 + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. The graph contains the following undirected edges: +{edges} + +Please output the number of ways to partition the vertices into two **non-empty** sets S and T such that: +- S is a **clique** (i.e., every pair of distinct vertices in S is connected by an edge), +- T is an **independent set** (i.e., no pair of distinct vertices in T is connected by an edge), +- S and T are **disjoint** (i.e., S ∩ T = ∅).""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the Clique_IndependentSet_Partitioning_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + clique = random.sample(range(N), random.randint(2, N - 1)) + independent_set = list(set(range(N)) - set(clique)) + edges = self.parameter["edges"] = [] + for u in clique : + for v in clique : + if u < v : + edges.append((u, v)) + edges += random.sample([(min(u, v), max(u, v)) for u in clique for v in independent_set], random.randint(0, len(clique) * len(independent_set))) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + + flg = [ [False]*N for _ in range(N) ] + for u, v in edges : + flg[u][v]= flg[v][u] = True + + # 2-SAT implication graph on 2*N nodes: + # 0..N-1 == X_i (i is in the support group) + # N..2N-1 == ¬X_i (i is in the conspiracy group) + dfn = [0] * (2*N) + low = [0] * (2*N) + in_stack = [False] * (2*N) + col = [0] * (2*N) + stack = [] + tot, colid = 0, 0 + + def tarjan(u): + nonlocal tot, colid + tot += 1 + dfn[u] = low[u] = tot + in_stack[u] = True + stack.append(u) + + pos = u % N + for i in range(N): + if i == pos: + continue + v = -1 + # if u represents ¬X_pos (i.e. u>=N) and pos knows i, + # then add implication ¬X_pos → X_i + if u >= N and flg[pos][i]: + v = i + # if u represents X_pos (u 0 + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/coin_square_game/__init__.py b/server/Gym/environments/coin_square_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fa1f4f911ca0b272118358b1787cff44f3b3e824 --- /dev/null +++ b/server/Gym/environments/coin_square_game/__init__.py @@ -0,0 +1 @@ +from .environment import CoinSquareGame_Environment diff --git a/server/Gym/environments/coin_square_game/environment.py b/server/Gym/environments/coin_square_game/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d32ce425b7aacf46985d31333eb883f76d021bf9 --- /dev/null +++ b/server/Gym/environments/coin_square_game/environment.py @@ -0,0 +1,116 @@ +import random +from array import array +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class CoinSquareGame_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2964 + prompt_template = \ +r"""You are given {N} coins in a row (1-indexed from left to right). The i-th coin has value C[i]: {C} +Alice and Bob play alternately, with Alice going first. On a turn, a player removes some **positive number** of **leftmost** coins and adds the sum of their values to their own score. The game ends when no coins remain. + +Rules: +- On Alice’s **first** turn, she may take either 1 or 2 coins. +- Thereafter, if the previous player took k coins, the current player may take any number of coins from 1 to min(k * 2, the number of remaining coins). + +Assuming both players play optimally, what is the **maximum total value** Alice can obtain?""" + + def __init__(self, + weight_multiple : int = 2, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the CoinSquareGame_Environment instance. + """ + super().__init__(**kwargs) + + self.weight_multiple = weight_multiple + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 5, "N should be greater than or equal to 5" + + C = self.parameter["C"] = [random.randint(1, N * self.weight_multiple) for _ in range(N)] + + + A = C + # Build prefix sums of the reversed sequence (to match the C++ approach) + S = [0] * (N + 1) + for i in range(1, N + 1): + S[i] = S[i - 1] + A[N - i] + + # dp_rows[i] will store dp[i][j] for j = 0..floor((i+1)/2) + # (indices beyond this plateau to the same value, so we clamp when reading) + dp_rows = [None] * (N + 1) + dp_rows[0] = array('I', [0]) + + for i in range(1, N + 1): + max_j = (i + 1) // 2 + row = array('I', [0] * (max_j + 1)) + for j in range(1, max_j + 1): + k = 2 * j - 1 + # Start with dp[i][j-1] + best = row[j - 1] + + # Option 1: take k coins if possible + r = i - k + if r >= 0: + prev_row = dp_rows[r] + prev_max_j = len(prev_row) - 1 + idx = k if k <= prev_max_j else prev_max_j # clamp + cand = S[i] - prev_row[idx] + if cand > best: + best = cand + + # Option 2: take k+1 coins if possible + r2 = i - (k + 1) + if r2 >= 0: + prev_row2 = dp_rows[r2] + prev2_max_j = len(prev_row2) - 1 + idx2 = (k + 1) if (k + 1) <= prev2_max_j else prev2_max_j # clamp + cand2 = S[i] - prev_row2[idx2] + if cand2 > best: + best = cand2 + + row[j] = best + + dp_rows[i] = row + + self.parameter["reference_answer"] = dp_rows[N][1] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + C = " ".join("C[{}]={}".format(i, Ci) for i, Ci in enumerate(self.parameter["C"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/coloring_counting/__init__.py b/server/Gym/environments/coloring_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..42ba0ff32e3434a5d524436714ae34f6c4104880 --- /dev/null +++ b/server/Gym/environments/coloring_counting/__init__.py @@ -0,0 +1 @@ +from .environment import ColoringCounting_Environment diff --git a/server/Gym/environments/coloring_counting/environment.py b/server/Gym/environments/coloring_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..461c7c6dfc58378730ca14c4655ca2bc2739210f --- /dev/null +++ b/server/Gym/environments/coloring_counting/environment.py @@ -0,0 +1,148 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class ColoringCounting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. The graph contains the following undirected edges: +{edges} + +You are also given an array `R` of length {N}, where `R[u]` denotes the **maximum allowed color** for vertex `u`: +{R} + +A coloring assigns an integer `C[u]` to each vertex `u`, satisfying the following conditions: +- `0 <= C[u] <= R[u]` for all vertices `u` +- For every edge `(u, v)`, `C[u] ≠ C[v]` (i.e., adjacent vertices must have different colors) + +The **value** of a valid coloring is the number of **distinct colors used** (i.e., the count of unique values among `C[0], C[1], ..., C[{N_minus_1}]`). Please compute the **total value of all valid colorings**. + +**Output Format:** Your final answer should be a single integer — the **sum of values** over all valid colorings of the graph.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the ColoringCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 1" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = random.sample([(u, v) for u in range(N) for v in range(u + 1, N)], int(edge_density * N * (N - 1) / 2)) + random.shuffle(edges) + + Deg = [0] * N + + for u, v in edges : + assert 0 <= u < v < N + Deg[u] += 1 + Deg[v] += 1 + assert len(edges) == len(set(edges)), "edges should be unique" + + R = self.parameter["R"] = tuple(random.randint(Deg[u], 2 * Deg[u]) for u in range(N)) + + + nodes = list(enumerate(R)) + nodes.sort(key = lambda x : x[1]) + sorted_R = [r for _, r in nodes] + orig_to_sorted = [0] * N + for new_idx, (orig_idx, _) in enumerate(nodes) : + orig_to_sorted[orig_idx] = new_idx + + G = [[False] * N for _ in range(N)] + for u, v in edges : + u = orig_to_sorted[u] + v = orig_to_sorted[v] + G[u][v] = G[v][u] = True + + total_S = 1 << N + Can = [True] * total_S + for S in range(total_S) : + for u in range(N) : + if not (S >> u) & 1 : + continue + for v in range(u + 1, N) : + if (S >> v) & 1 and G[u][v]: + Can[S] = False + break + if not Can[S] : + break + + F = [[0] * (N + 1) for _ in range(total_S)] + F[total_S - 1][0] = 1 + + for S in range(total_S - 1, 0, -1) : + for i in range(N) : + if (S >> i) & 1 : + Min = i + break + max_k = min(sorted_R[Min], N - 1) + for k in range(max_k + 1) : + ways = F[S][k] + if ways == 0 : + continue + W = S & ~(1 << Min) + T = W + while True : + if Can[T | (1 << Min)] : + new_S = W & ~T + F[new_S][k + 1] += ways * (sorted_R[Min] + 1 - k) + if T == 0 : + break + T = (T - 1) & W + + self.parameter["reference_answer"] = sum(F[0][k] * k for k in range(1, N + 1)) + assert self.parameter["reference_answer"] > 0 + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + R = "\n".join("R[{}]={}".format(u, Ru) for u, Ru in enumerate(self.parameter["R"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/combination_odd_subsequence_counting/__init__.py b/server/Gym/environments/combination_odd_subsequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..337da0c3d64d35011c73b83e088275a0a8baa202 --- /dev/null +++ b/server/Gym/environments/combination_odd_subsequence_counting/__init__.py @@ -0,0 +1 @@ +from .environment import CombinationOddSubsequenceCounting_Environment diff --git a/server/Gym/environments/combination_odd_subsequence_counting/environment.py b/server/Gym/environments/combination_odd_subsequence_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b2e2f1f1339ee084e7d627a980cce970da40cbd8 --- /dev/null +++ b/server/Gym/environments/combination_odd_subsequence_counting/environment.py @@ -0,0 +1,102 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class CombinationOddSubsequenceCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3773 + prompt_template = \ +r"""You are given a sequence of **distinct** integers: {array} + +Please count the number of subsequences (not necessarily contiguous, but the order must be preserved) a[1], ..., a[k] such that: +1. k ≥ 2 (the subsequence must have at least two elements); +2. C(a[1], a[2]) × C(a[2], a[3]) × ... × C(a[k−1], a[k]) is **odd**, where C(x, y) denotes the binomial coefficient "x choose y". + +**Output Format:** A single integer — the number of such valid subsequences.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the CombinationOddSubsequenceCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + A = self.parameter["A"] = random.sample(range(1, 2 * N), N) + random.shuffle(A) + + + max_val = max(A) + T = [-1] * (max_val + 1) + for i, v in enumerate(A): + T[v] = i + + # f[i] = number of non-increasing subsequences starting at i (including length-1) + f = [0] * N + ans = 0 + + # DP from right to left + for i in range(N - 1, -1, -1): + mask = A[i] + cnt = 1 # the subsequence [A[i]] itself + # enumerate all non-zero proper submasks j of mask + j = mask & (mask - 1) + while j: + idx = T[j] + # if that value appears later in the sequence, extend subsequences + if idx > i: + cnt += f[idx] + j = mask & (j - 1) + f[i] = cnt + ans += cnt + + # subtract the length-1 subsequences to count only those of length >= 2 + ans -= N + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(array = " ".join(map(str, self.parameter["A"]))) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + if b == 0 : + return self.rewards["rewarding_weight"] * (a == 0) + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/concatenation_partition_counting_sum/__init__.py b/server/Gym/environments/concatenation_partition_counting_sum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4264a6850300875c859b8143b4bbd0a2042a23e2 --- /dev/null +++ b/server/Gym/environments/concatenation_partition_counting_sum/__init__.py @@ -0,0 +1 @@ +from .environment import ConcatenationPartitionCountingSum_Environment diff --git a/server/Gym/environments/concatenation_partition_counting_sum/environment.py b/server/Gym/environments/concatenation_partition_counting_sum/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8077d74631785ae00b89c39f2049ae6ae8610da4 --- /dev/null +++ b/server/Gym/environments/concatenation_partition_counting_sum/environment.py @@ -0,0 +1,163 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class ConcatenationPartitionCountingSum_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3176 + prompt_template = \ +r"""Define F[n] as follows: +- F[0] = 1 +- For all n ≥ 1: F[n] = sum(F[n - m] for m in range(1, min(n, {M}) + 1)) (Python-like syntax) + +You are given a number string S: {S} +Consider all possible partitions of S into non-empty substrings s[1], s[2], ..., s[k] (for any k ≥ 1), such that concatenating s[1] through s[k] gives exactly {S}. Note that leading zeros are allowed in any s[i]. For each such partition, compute the value F[int(s[1]) + int(s[2]) + ... + int(s[k])]. Please compute the total sum of this value over all such partitions, modulo {MOD}.""" + + def __init__(self, + max_MOD : int = 10000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the ConcatenationPartitionCountingSum_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + S = self.parameter["S"] = "".join(random.choices("0123456789", k = N)) + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 1, "M should be greater than or equal to 1" + + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + class Node: + def __init__(self, init_zero=True): + # initialize a MxM matrix of zeros + self.a = [[0] * M for _ in range(M)] if init_zero else None + + def init(self): + # companion matrix for transitions: P[0] + for i in range(M): + self.a[i][M-1] = 1 + for i in range(1, M): + self.a[i][i-1] = 1 + + def init1(self): + # identity matrix + for i in range(M): + self.a[i][i] = 1 + + def __mul__(self, other): + # matrix multiplication mod + z = Node() + for i in range(M): + for k in range(M): + if self.a[i][k] == 0: + continue + aik = self.a[i][k] + row_z = z.a[i] + row_o = other.a[k] + for j in range(M): + row_z[j] = (row_z[j] + aik * row_o[j]) % MOD + return z + + def __add__(self, other): + # matrix addition mod + z = Node() + for i in range(M): + for j in range(M): + z.a[i][j] = (self.a[i][j] + other.a[i][j]) % MOD + return z + + + def ksm(mat, exp): + # fast exponentiation of matrix mat^exp + res = Node() + res.init1() + base = mat + e = exp + while e > 0: + if e & 1: + res = res * base + base = base * base + e >>= 1 + return res + + digits = [int(ch) for ch in S] + + # precompute P[i] = P^(10^i) + P = [None] * N + P[0] = Node() + P[0].init() + for i in range(1, N): + P[i] = ksm(P[i-1], 10) + + # F[i][j]: transition matrix for substring S[i..j] + F = [[None] * N for _ in range(N)] + for j in range(N): + for i in range(j, -1, -1): + d = digits[i] + if i == j: + F[i][j] = ksm(P[0], d) + else: + # F[i][j] = F[i+1][j] * P[j-i]^d + t = ksm(P[j-i], d) + F[i][j] = F[i+1][j] * t + + # DP g: g[k] is matrix for prefix of length k + g = [None] * (N + 1) + # g[0] = identity + g[0] = Node() + g[0].init1() + for i in range(1, N + 1): + cur = Node() + # sum over previous split points + for j in range(i): + cur = cur + (g[j] * F[j][i-1]) + g[i] = cur + + # answer: sum of first row of g[N] + self.parameter["reference_answer"] = sum(g[N].a[0][i] for i in range(M)) % MOD + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(S = self.parameter["S"], M = self.parameter["M"], MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/congruent_equation/__init__.py b/server/Gym/environments/congruent_equation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a13f6bef22b2b006c31da06504f839c89113b0c2 --- /dev/null +++ b/server/Gym/environments/congruent_equation/__init__.py @@ -0,0 +1 @@ +from .environment import CongruentEquation_Environment diff --git a/server/Gym/environments/congruent_equation/environment.py b/server/Gym/environments/congruent_equation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7adf98dc43918e638ed9074750b5a60c0584b7d3 --- /dev/null +++ b/server/Gym/environments/congruent_equation/environment.py @@ -0,0 +1,90 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class CongruentEquation_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1082 + prompt_template = \ +r"""Find the **smallest positive integer solution** `x` to the following congruence equation: + +`{A} * x ≡ 1 (mod {B})` + +Output Format: +Your final answer should be a single positive integer representing the smallest solution `x`. +Example: `17` (do **NOT** include the backticks or quotes). +""" + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs): + """ + Initialize the CongruentEquation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_A_B" in self.parameter, "MAX_A_B is required in parameter" + MAX_A_B = self.parameter["MAX_A_B"] + assert MAX_A_B >= 2, "MAX_A_B should be greater than or equal to 1" + + while True : + A = self.parameter["A"] = random.randint(1, MAX_A_B) + B = self.parameter["B"] = random.randint(2, MAX_A_B) + + def exgcd(a, b) : + if b == 0 : + return a, 1, 0 + d, x1, y1 = exgcd(b, a % b) + x = y1 + y = x1 - (a // b) * y1 + return d, x, y + d, x, y = exgcd(A, B) + + if d == 1 : + x = (x % B + B) % B + assert x > 0, "x should be positive, but got {}".format(x) + assert A * x % B == 1, "A * x % B should be equal to 1, but got {} != {}".format(A * x % B, 1) + self.parameter["reference_answer"] = x + break + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + A = self.parameter["A"], + B = self.parameter["B"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + if self.parameter["A"] * processed_result % self.parameter["B"] != 1 : + return self.rewards["invalid_answer"] + + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + assert processed_result > self.parameter["reference_answer"], "processed_result should be greater than reference_answer, but got {} <= {}".format(processed_result, self.parameter["reference_answer"]) + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/construct_hack_interval/__init__.py b/server/Gym/environments/construct_hack_interval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5384e77bf01d018b227f3c717c256d79dda1389e --- /dev/null +++ b/server/Gym/environments/construct_hack_interval/__init__.py @@ -0,0 +1 @@ +from .environment import ConstructHackInterval_Environment diff --git a/server/Gym/environments/construct_hack_interval/environment.py b/server/Gym/environments/construct_hack_interval/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7a8220c56e07f1308ecf64263ec4ac6dad7d5680 --- /dev/null +++ b/server/Gym/environments/construct_hack_interval/environment.py @@ -0,0 +1,90 @@ +import random +from typing import Optional, Tuple +from ...environment import VerifiableEnvironment + +class ConstructHackInterval_Environment(VerifiableEnvironment) : # Source: https://codeforces.com/problemset/problem/468/C + prompt_template = \ +r"""Let's define f(x) as the sum of digits in the decimal representation of number x (for example, f(1234) = 1 + 2 + 3 + 4). Please construct an interval [L, R], such that the sum of f(x) for all x in the interval is divisible by {MOD}. +Note that L and R should be both positive integers, L should be less than or equal to R, and R should be less than or equal to 10 * {MOD}. + +Output Format: Your final answer should be **two integers** on a line by itself, representing the value of L and R of the interval. Example: `5 123` (do **NOT** include the backticks or quotes). +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, correct_solution : float = +1.0, wrong_solution : float = 0.0, + **kwargs) : + """ + Initialize the ConstructHackInterval_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "correct_solution" : correct_solution, + "wrong_solution" : wrong_solution, + } + + def _generate(self) -> None : + assert "MAX_MOD" in self.parameter, "MAX_MOD is required in parameter" + MAX_MOD = self.parameter["MAX_MOD"] + assert MAX_MOD >= 1, "MAX_MOD should be greater than or equal to 1" + + MOD = self.parameter["MOD"] = random.randint(1, MAX_MOD) + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + MOD = self.parameter["MOD"], + ) + + def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int]] : + if answer is not None : + answer = answer.strip() + try : + L, R = map(int, answer.split()) + return L, R + except : + return None + else : + return None + + def count_digit_sum(self, L, R): + def count_digits_up_to(n): + ''' + Count the sum of digits of all numbers in the interval [0, n]. + ''' + if n < 0: + return 0 + if n < 10: + return sum(range(1, n + 1)) + + # Count digits in numbers up to n + digits = len(str(n)) + total = 0 + first_digit = int(str(n)[0]) + remaining = int(str(n)[1:]) if len(str(n)) > 1 else 0 + + # Count digits in numbers with fewer digits: 00..0 to (x-1)99..9 (d-1 full digits, x is the first digit) + total += (digits-1) * 45 * (10 ** (digits-2)) * first_digit + first_digit * (first_digit - 1) // 2 * (10 ** (digits-1)) + + # Add contribution from remaining part: >= x00..0 to n + total += count_digits_up_to(remaining) + first_digit * (remaining + 1) + + return total + + return count_digits_up_to(R) - count_digits_up_to(L-1) + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + L, R = processed_result + if not (1 <= L <= R and R <= 10 * self.parameter["MOD"]) : + return self.rewards["invalid_solution"] + digit_sum = self.count_digit_sum(L, R) + MOD = self.parameter["MOD"] + if digit_sum % MOD == 0 : + return self.rewards["correct_solution"] + else : + return self.rewards["wrong_solution"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/convex_hull/__init__.py b/server/Gym/environments/convex_hull/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d4dfe8e9021022812a7a1de4e6cd5eaf3afc3309 --- /dev/null +++ b/server/Gym/environments/convex_hull/__init__.py @@ -0,0 +1 @@ +from .environment import ConvexHull_Environment diff --git a/server/Gym/environments/convex_hull/environment.py b/server/Gym/environments/convex_hull/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..1ad4c709ba09f0c38975f37db74da8ad90e817a5 --- /dev/null +++ b/server/Gym/environments/convex_hull/environment.py @@ -0,0 +1,150 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class ConvexHull_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a set of {N} points on a 2D plane labeled from 0 to {N_minus_1}. +It is guaranteed that: +(1) all the coordinates are integers; +(2) no two points have the same coordinates; +(3) no three points are on the same line. +Below is the set of points: +{points} + +Your task is to find the **convex hull** of these points, which is the smallest convex polygon that contains all the points. + +**Output Format:** Your output should be one single **integer**, representing the value of 2 times the area of the convex hull (which can be proven to be an integer).""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the ConvexHull_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None: + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + self.parameter["points"] = set() + lines = set() + for i in range(N): + while True: + x = random.randint(0, N) + y = random.randint(0, N) + if (x, y) in self.parameter["points"]: + continue + + coline = False + new_lines = set() + for (px, py) in self.parameter["points"]: + if px == x: + a, b, c = 1, 0, -x + else: + a, b = py - y, x - px + c = -(a * x + b * y) + + def gcd(a, b): + while b: + a, b = b, a % b + return a + + g = gcd(abs(a), gcd(abs(b), abs(c))) + a, b, c = a // g, b // g, c // g + + if a < 0: + a, b, c = -a, -b, -c + elif a == 0 and b < 0: + b, c = -b, -c + + if (a, b, c) in lines: + coline = True + break + + new_lines.add((a, b, c)) + + if coline: + continue + + self.parameter["points"].add((x, y)) + lines.update(new_lines) + break + + self.parameter["points"] = list(self.parameter["points"]) + + labels = list(range(len(self.parameter["points"]))) + sorted_point_labels = sorted(labels, key=lambda i: (self.parameter["points"][i][0], self.parameter["points"][i][1])) + + # calculate the convex hull using Andrew's algorithm + def cross_product(o, a, b): + return (a[0] - o[0]) * (b[1] - o[1]) - (a[1] - o[1]) * (b[0] - o[0]) + + lower = [] + for i in sorted_point_labels: + while len(lower) >= 2 and cross_product(self.parameter["points"][lower[-2]], self.parameter["points"][lower[-1]], self.parameter["points"][i]) <= 0: + lower.pop() + lower.append(i) + + upper = [] + for i in reversed(sorted_point_labels): + while len(upper) >= 2 and cross_product(self.parameter["points"][upper[-2]], self.parameter["points"][upper[-1]], self.parameter["points"][i]) <= 0: + upper.pop() + upper.append(i) + + convex_hull = lower[:-1] + upper[:-1] + area = 0 + + for i in range(len(convex_hull)): + j = (i + 1) % len(convex_hull) + x1, y1 = self.parameter["points"][convex_hull[i]] + x2, y2 = self.parameter["points"][convex_hull[j]] + area += x1 * y2 - x2 * y1 + + self.parameter["reference_answer"] = abs(area) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + points = "\n".join("({}, {})".format(x, y) for x, y in self.parameter["points"]), + ) + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/cornfield/__init__.py b/server/Gym/environments/cornfield/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..40a09c3e931fa774e8be8cc8f5843920aeeb956b --- /dev/null +++ b/server/Gym/environments/cornfield/__init__.py @@ -0,0 +1 @@ +from .environment import Cornfield_Environment diff --git a/server/Gym/environments/cornfield/environment.py b/server/Gym/environments/cornfield/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..113bedce655bf99ef7b4f72fe984b990e7e9b3a0 --- /dev/null +++ b/server/Gym/environments/cornfield/environment.py @@ -0,0 +1,150 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Cornfield_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3287 + prompt_template = \ +r"""You are given an array `H` of length {N}. The initial values of the array are: {H} +You may perform **at most {K} operations**. In each operation, you choose an interval [L, R] (0 ≤ L ≤ R < {N}), and increment each element H[i] by 1 for all i in the range L ≤ i ≤ R. Try your best to **maximize the length of the longest non-decreasing subsequence** (not necessarily contiguous) in the final array after performing the operations. + +**Output Format:** Output at most {K} lines. Each line should contain two integers L and R (0-indexed), separated by a space, indicating an interval you chose for an operation.""" + + def __init__(self, + wrong_format: float = -1.0, invalid_solution: float = -0.5, rewarding_strategy: str = "(answer/gold)^beta", rewarding_weight: float = +1.0, rewarding_beta: float = 5.0, + **kwargs): + """ + Initialize the Cornfield_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + H = self.parameter["H"] = [random.randint(1, 2 * N) for _ in range(N)] + K = self.parameter["K"] = random.randint(1, max(1, min(N, sum(max(H[i - 1] - H[i], 0) for i in range(1, N))))) + + + def lowbit(x: int) -> int: + """Return the lowest set bit of x.""" + return x & -x + + def add(bit, X, Y, x, y, value): + """2-D BIT (Fenwick) – update point (x, y) to max(current, value).""" + while x <= X: + yy = y + row = bit[x] + while yy <= Y: + if value > row[yy]: + row[yy] = value + yy += lowbit(yy) + x += lowbit(x) + + def query(bit, x, y): + """2-D BIT (Fenwick) – max over rectangle (1..x , 1..y).""" + res = 0 + while x: + yy = y + row = bit[x] + while yy: + v = row[yy] + if v > res: + res = v + yy -= lowbit(yy) + x -= lowbit(x) + return res + + max_height = max(H) + X = K + 1 # first dimension size + Y = max_height + K # second dimension size (heights after boosts) + + # 2-D BIT initialised with 0 (lists are 1-based for Fenwick convenience) + BIT = [[0] * (Y + 2) for _ in range(X + 2)] + + answer = 0 + for h in H: # iterate through every corn stalk + for j in range(K, -1, -1): # j = # boosts that will still cover this stalk + cur_height = h + j # final possible height of this stalk + best = query(BIT, j + 1, cur_height) + 1 + if best > answer: + answer = best + add(BIT, X, Y, j + 1, cur_height, best) + + self.parameter["gold_answer"] = answer + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + K = self.parameter["K"], + H = " ".join("H[{}]={}".format(i, Hi) for i, Hi in enumerate(self.parameter["H"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + operations = [] + for line in answer.splitlines() : + line = line.strip() + if line : + L, R = map(int, line.split()) + operations.append((L, R)) + return operations + except : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) > self.parameter["K"] : + return self.rewards["invalid_solution"] + + delta = [0] * self.parameter["N"] + for L, R in processed_result : + if not (0 <= L <= R < self.parameter["N"]) : + return self.rewards["invalid_solution"] + delta[L] += 1 + if R + 1 < self.parameter["N"] : + delta[R + 1] -= 1 + + H = self.parameter["H"].copy() + for i in range(self.parameter["N"]) : + if i > 0 : + delta[i] += delta[i - 1] + H[i] += delta[i] + + F = [0] * self.parameter["N"] + for i in range(self.parameter["N"]) : + F[i] = 1 + for j in range(i) : + if H[j] <= H[i] : + F[i] = max(F[i], F[j] + 1) + answer, gold = max(F), self.parameter["gold_answer"] + assert 1 <= answer <= gold, "answer should be between 1 and gold_answer" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * int(answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/countdown/__init__.py b/server/Gym/environments/countdown/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c643f1269dc801d89666c181bb1f7f485e8cf7ad --- /dev/null +++ b/server/Gym/environments/countdown/__init__.py @@ -0,0 +1 @@ +from .environment import CountdownEqual_Environment, CountdownClose_Environment diff --git a/server/Gym/environments/countdown/environment.py b/server/Gym/environments/countdown/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..2a6b396d1af32801a5d7a5d733dec5dd42382150 --- /dev/null +++ b/server/Gym/environments/countdown/environment.py @@ -0,0 +1,272 @@ +import re +import math +import random +from abc import abstractmethod +from typing import Optional, List, Dict +from ...environment import VerifiableEnvironment + + +class Countdown_Environment(VerifiableEnvironment) : + operations = ("+", "-", "*", "/") + epsilon = 1E-5 + + @abstractmethod + def _check_parameter(self) -> bool : + """ + Check if the parameter is valid. + + Returns: + bool: True if the parameter is valid, False otherwise. + """ + pass + + def _generate(self) -> None : + assert "max_target" in self.parameter, "max_target is required in parameter" + max_target = self.parameter["max_target"] + assert max_target >= 0, "max_target should be greater than or equal to 0" + + assert "max_operand" in self.parameter, "max_operand is required in parameter" + max_operand = self.parameter["max_operand"] + assert max_operand >= 1, "max_operand should be greater than or equal to 1" + + assert "num_operands" in self.parameter, "num_operands is required in parameter" + num_operands = self.parameter["num_operands"] + assert num_operands >= 2, "num_operands should be greater than or equal to 2" + + while True : + self.parameter["target"] = random.randint(0, max_target) + self.parameter["operands"] = [random.randint(1, max_operand) for _ in range(num_operands)] + assert len(self.parameter["operands"]) == num_operands, "Invalid number of operands" + if self._check_parameter() : + break + + + def _prompt_generate(self) -> str : + return self._prompt_template().format(target = self.parameter["target"], operands = " ".join(map(str, self.parameter["operands"])), operations = ", ".join(self.operations)) + + @abstractmethod + def _prompt_template(self) -> str : + pass + + + def _process(self, answer : Optional[str]) -> Dict : + if answer is not None : + answer = answer.strip() + + def calculate_expression() : + allowed_pattern = r"^[\d+\-*/().\s]+$" + if not re.match(allowed_pattern, answer) : + raise ValueError("Invalid characters in expression") + res = eval(answer, {"__builtins__" : None}, {}) + try : + if not math.isfinite(float(res)) : + return None + return res + except : + return None + + def valid_expression() -> bool : + used_operands = sorted([int(operand) for operand in re.findall(r"\d+", answer)]) + available_operands = sorted(self.parameter["operands"]) + return used_operands == available_operands + + try : + return {"format" : True, "result" : calculate_expression() if valid_expression() else None} + except : + return {"format" : False} + else : + return {"format" : False} + + +class CountdownEqual_Environment(Countdown_Environment) : + def __init__(self, + wrong_format : float = -1.0, invalid_expression : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the CountdownEqual_Environment instance. + + Args: + reward (dict): Dictionary of rewards for different evaluation results. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_expression" : invalid_expression, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _prompt_template(self) -> str : + return \ + r"""You are given the following numbers: {operands} +Your task is to create a valid mathematical expression that evaluates to the target number: **{target}**. + +Constraints: +- Use **each number exactly once** +- Only these basic arithmetic operations are allowed: {operations} +- You may use **parentheses** to define order of operations +- Your answer should be a **single valid expression**, with no extra explanation + +Provide only the expression as your final answer.""" + + + def _check_parameter(self) -> bool : + visited = set() + def search(operands : List[int]) -> bool : + if len(operands) == 1 : + return operands[0] == self.parameter["target"] + + sorted_operands = tuple(sorted(operands)) + if sorted_operands in visited : + return False + visited.add(sorted_operands) + + for i in range(len(operands)) : + for j in range(len(operands)) : + if i != j : + for op in self.operations : + new_operands = [operands[k] for k in range(len(operands)) if k != i and k != j] + + if op == "+" : + if i > j : + continue + new_operands.append(operands[i] + operands[j]) + elif op == "-" : + if operands[i] >= operands[j] : + new_operands.append(operands[i] - operands[j]) + else : + continue + elif op == "*" : + if i > j : + continue + new_operands.append(operands[i] * operands[j]) + elif op == "/" : + if operands[i] >= 0 and operands[j] > 0 and operands[i] % operands[j] == 0 : + new_operands.append(operands[i] // operands[j]) + else : + continue + else : + raise NotImplementedError("Unsupported operation") + + if search(new_operands) : + return True + + return False + + return search(self.parameter["operands"]) + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result["format"] : + if processed_result["result"] is not None : + if abs(processed_result["result"] - self.parameter["target"]) < self.epsilon : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["invalid_expression"] + else : + return self.rewards["wrong_format"] + + +class CountdownClose_Environment(Countdown_Environment) : + def _prompt_template(self) -> str : + return \ + r"""You are given the following numbers: {operands} +Your task is to create a valid mathematical expression whose result has the **minimal absolute difference** from the target number: **{target}**. Try your best to get as close to the target as possible. + +Constraints: +- Use **each number exactly once** +- Only these basic arithmetic operations are allowed: {operations} +- You may use **parentheses** to define order of operations +- Your answer should be a **single valid expression**, with no extra explanation + +Provide only the expression as your final answer.""" + + + def __init__(self, + wrong_format : float = -1.0, invalid_expression : float = -0.5, rewarding_strategy : str = "1/(1+|answer-target|)", rewarding_weight : float = 1.0, + **kwargs) : + """ + Initialize the CountdownClose_Environment instance. + + Args: + reward (dict): Dictionary of rewards for different evaluation results. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_expression" : invalid_expression, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + } + + + def _check_parameter(self) -> bool : + if self.rewards["rewarding_strategy"] == "1/(1+|answer-target|)" and self.parameter["num_operands"] <= 6 : + self.parameter["reference_result"] = None + + visited = set() + def search(operands : List[float]) -> None : + if len(operands) == 1 : + if self.parameter["reference_result"] is None or abs(operands[0] - self.parameter["target"]) < abs(self.parameter["reference_result"] - self.parameter["target"]) : + self.parameter["reference_result"] = operands[0] + return + + sorted_operands = tuple(sorted(map(lambda x : str(round(x, 5)), operands))) + if sorted_operands in visited : + return + visited.add(sorted_operands) + + for i in range(len(operands)) : + for j in range(len(operands)) : + if i != j : + for op in self.operations : + new_operands = [operands[k] for k in range(len(operands)) if k != i and k != j] + + if op == "+" : + if i > j : + continue + new_operands.append(operands[i] + operands[j]) + elif op == "-" : + new_operands.append(operands[i] - operands[j]) + elif op == "*" : + if i > j : + continue + new_operands.append(operands[i] * operands[j]) + elif op == "/" : + if operands[j] != 0 : + new_operands.append(operands[i] / operands[j]) + else : + continue + else : + raise NotImplementedError("Unsupported operation") + + search(new_operands) + + search([float(operand) for operand in self.parameter["operands"]]) + assert self.parameter["reference_result"] is not None + + if self.rewards["rewarding_strategy"] == "1/(1+|answer-target|)" : + self.passing_reward_threshold = self.rewards["rewarding_weight"] / (1 + abs(self.parameter["reference_result"] - self.parameter["target"])) + else : + assert False + + return True + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result["format"] : + if processed_result["result"] is not None : + if self.rewards["rewarding_strategy"] == "1/(1+|answer-target|)" : + return self.rewards["rewarding_weight"] / (1 + abs(processed_result["result"] - self.parameter["target"])) + else : + raise NotImplementedError("Unsupported rewarding strategy") + else : + return self.rewards["invalid_expression"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/cow_dance_show/__init__.py b/server/Gym/environments/cow_dance_show/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..672dfcd1dd8981278cd1a12795d4a53394fbe352 --- /dev/null +++ b/server/Gym/environments/cow_dance_show/__init__.py @@ -0,0 +1 @@ +from .environment import CowDanceShow_Environment diff --git a/server/Gym/environments/cow_dance_show/environment.py b/server/Gym/environments/cow_dance_show/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d147c5532d96fbc3b586b9834ca921ec23ffa984 --- /dev/null +++ b/server/Gym/environments/cow_dance_show/environment.py @@ -0,0 +1,83 @@ +import heapq +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class CowDanceShow_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3611 + prompt_template = \ +r"""There are {N} cows labeled from 1 to {N}, and the i-th cow takes d[i] time to dance. The array d is given as: {d} + +The cows dance on the stage as follows: +- Initially, the first {K} cows (cows 1 through {K}) are on the stage. +- Each cow dances for its own time d[i]. When a cow finishes dancing, it leaves the stage. +- As soon as a cow leaves, the next available cow in label order (if any) **immediately** takes its place. For example, when the first cow leaves, cow {K} + 1 enters the stage. + +I am asking you to output the time when all cows have finished dancing.""" + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the CowDanceShow_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + self.parameter["d"] = [random.randint(1, N) for _ in range(N)] + self.parameter["K"] = random.randint(2, N - 1) + + + def compute(K): + cow = self.parameter["d"].copy() + # Initialize a min-heap with the first K cows + heap = cow[:K] + heapq.heapify(heap) + # For each remaining cow, schedule it on the earliest free spot + for i in range(K, N): + t = heapq.heappop(heap) + heapq.heappush(heap, t + cow[i]) + # The total time is the maximum finish time on stage + return max(heap) + self.parameter["reference_answer"] = compute(self.parameter["K"]) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + K = self.parameter["K"], + d = ", ".join("d[{}]={}".format(i, di) for i, di in enumerate(self.parameter["d"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/crt/__init__.py b/server/Gym/environments/crt/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ad9992f41695b44d0cbedd359b77bd4a756a978b --- /dev/null +++ b/server/Gym/environments/crt/__init__.py @@ -0,0 +1 @@ +from .environment import CRT_Environment diff --git a/server/Gym/environments/crt/environment.py b/server/Gym/environments/crt/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..55ac64057805a2ffa8630b23f8d79c15f458fcf9 --- /dev/null +++ b/server/Gym/environments/crt/environment.py @@ -0,0 +1,76 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class CRT_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a system of {M} modular congruences: +{equations} + +Your task is to find **any non-negative integer x** that satisfies all of the above congruences. + +**Output Format:** Your output should be a **single integer x** that satisfies all the equations.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_X" in self.parameter, "MAX_X is required in parameter" + MAX_X = self.parameter["MAX_X"] + assert MAX_X >= 2, "MAX_X should be greater than or equal to 2" + + X = self.parameter["reference_answer"] = random.randint(2, MAX_X) + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 1, "M should be greater than or equal to 1" + + + B = self.parameter["B"] = random.sample(range(2, X + 1), min(M, X - 1)) + self.parameter["X_mod_B"] = [X % b for b in B] + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + M = len(self.parameter["B"]), + equations = "\n".join("x ≡ {} (mod {})".format(x_mod_b, b) for x_mod_b, b in zip(self.parameter["X_mod_B"], self.parameter["B"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + X = processed_result + if X < 0 : + return self.rewards["wrong_format"] + + satisfied = sum(int(X % b == x_mod_b) for x_mod_b, b in zip(self.parameter["X_mod_B"], self.parameter["B"])) + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / len(self.parameter["B"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == len(self.parameter["B"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/cryptarithmetic/__init__.py b/server/Gym/environments/cryptarithmetic/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..93977c044450cda4915bcfb7c0fbcf408dad6bc3 --- /dev/null +++ b/server/Gym/environments/cryptarithmetic/__init__.py @@ -0,0 +1 @@ +from .environment import Cryptarithmetic_Environment diff --git a/server/Gym/environments/cryptarithmetic/environment.py b/server/Gym/environments/cryptarithmetic/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8ada304f650bc7e3d875fdaed060db9c2f1527bf --- /dev/null +++ b/server/Gym/environments/cryptarithmetic/environment.py @@ -0,0 +1,145 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +def Add(a_digits : List[int], b_digits : List[int], base : int) -> List[int] : + c_digits = [] + + carray = 0 + for i in range(max(len(a_digits), len(b_digits))) : + a = a_digits[i] if i < len(a_digits) else 0 + b = b_digits[i] if i < len(b_digits) else 0 + + c = a + b + carray + carray = c // base + c_digits.append(c % base) + if carray > 0 : + c_digits.append(carray) + + return c_digits + + +class Cryptarithmetic_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Now consider a number system with base {N}, which uses digits d[0], d[1], ..., d[{N_minus_1}]. +Each d[i] is a unique integer in the range [0, {N_minus_1}], but their actual values are unknown. + +We define the number `d[i0]d[i1]...d[ik]` to represent the value `d[i0] * {N}^k + d[i1] * {N}^(k-1) + ... + d[ik] * {N}^0`, +where `d[i]` is the actual digit assigned to index `i`, and the number is visually written using the digits `d[i0]`, `d[i1]`, ..., `d[ik]`. + +You are given the following equation in this unknown base-{N} digit system: +{addend_1} ++ +{addend_2} += +{sum_result} + +Your task is to find one possible assignment of values (in decimal) for d[0], d[1], ..., d[{N_minus_1}] such that the equation holds true. + +Output Format: +Your final answer should be a single line containing the decimal values of d[0], d[1], ..., d[{N_minus_1}], in order, separated by spaces. +Example: `{all_digits_in_order}` (do **NOT** include the backticks or quotes); this means d[0] = 0, d[1] = 1, ..., d[{N_minus_1}] = {N_minus_1}. +""" + + def __init__(self, + wrong_format : float = -1.0, not_permutation : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 3.0, + **kwargs) : + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "not_permutation" : not_permutation, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + digits = self.parameter["digits"] = list(range(N)) + random.shuffle(digits) + self.parameter["reference_answer"] = " ".join([str(digits[i]) for i in range(N)]) + + assert "addend_length" in self.parameter, "addend_length is required in parameter" + addend_length = self.parameter["addend_length"] + addend_1 = self.parameter["addend_1"] = [random.randint(0 if _ < addend_length - 1 else 1, N - 1) for _ in range(addend_length)] + addend_2 = self.parameter["addend_2"] = [random.randint(0 if _ < addend_length - 1 else 1, N - 1) for _ in range(addend_length)] + self.parameter["sum_result"] = Add(addend_1, addend_2, N) + # self.parameter["addend_1"], self.parameter["addend_2"], self.parameter["sum_result"] are all the actual digits + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + gold_digit2i = {digit : i for i, digit in enumerate(self.parameter["digits"])} + def print_dis(digits : List[int]) -> str : + return "".join(["d[{}]".format(gold_digit2i[digits[i]]) for i in range(len(digits) - 1, -1, -1)]) # digits[gold_digit2i[digits[i] = digit]] = digit + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + addend_1 = print_dis(self.parameter["addend_1"]), + addend_2 = print_dis(self.parameter["addend_2"]), + sum_result = print_dis(self.parameter["sum_result"]), + all_digits_in_order = " ".join([str(i) for i in range(N)]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N = self.parameter["N"] + if len(processed_result) != N : + return self.rewards["not_permutation"] + if len(set(processed_result)) != N : + return self.rewards["not_permutation"] + for i in processed_result : + if not (0 <= i < N) : + return self.rewards["not_permutation"] + + digits = processed_result + + gold_digit2i = {digit : i for i, digit in enumerate(self.parameter["digits"])} + addend_1 = [digits[gold_digit2i[digit]] for digit in self.parameter["addend_1"]] + addend_2 = [digits[gold_digit2i[digit]] for digit in self.parameter["addend_2"]] + sum_result = Add(addend_1, addend_2, N) + gold_sum_result = self.parameter["sum_result"].copy() + + if len(sum_result) < len(gold_sum_result) : + assert len(sum_result) == self.parameter["addend_length"] and len(gold_sum_result) == self.parameter["addend_length"] + 1 + sum_result.append(0) + elif len(sum_result) > len(gold_sum_result) : + assert len(sum_result) == self.parameter["addend_length"] + 1 and len(gold_sum_result) == self.parameter["addend_length"] + gold_sum_result.append(0) + else : + pass + assert len(sum_result) == len(gold_sum_result), "sum_result and gold_sum_result should have the same length" + + digit2i = {digit : i for i, digit in enumerate(digits)} + sum_result = [digit2i[digit] for digit in sum_result] + gold_sum_result = [gold_digit2i[digit] for digit in gold_sum_result] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(float(a == b) for a, b in zip(sum_result, gold_sum_result)) / len(sum_result)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * all(a == b for a, b in zip(sum_result, gold_sum_result)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/cube_fixed_local_maximum_counting/__init__.py b/server/Gym/environments/cube_fixed_local_maximum_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..99c6a53c6317b90bfca1040baf4bb95139591234 --- /dev/null +++ b/server/Gym/environments/cube_fixed_local_maximum_counting/__init__.py @@ -0,0 +1 @@ +from .environment import Cube_FixedLocalMaximumCounting_Environment diff --git a/server/Gym/environments/cube_fixed_local_maximum_counting/environment.py b/server/Gym/environments/cube_fixed_local_maximum_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..06b3bb490b615f5711eef74cda5ead0b1f3011b9 --- /dev/null +++ b/server/Gym/environments/cube_fixed_local_maximum_counting/environment.py @@ -0,0 +1,130 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Cube_FixedLocalMaximumCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P5400 + prompt_template = \ +r"""You are given a 3D grid of size {N} × {M} × {L}. Each cell will be filled with a unique number from 1 to {total} (where {total} = {N} × {M} × {L}). The numbers are assigned randomly and uniformly — every permutation of the {total} numbers over the grid is equally likely. A cell is called **dominant** if its value is strictly greater than all other cells that share at least one coordinate (i.e., same x, y, or z index). Please compute the probability that **exactly** {K} dominant cells exist after filling the grid. + +**Output Format:** Output a single integer — the required probability modulo {MOD}.""" + MOD = 998244353 + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the Cube_FixedLocalMaximumCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_M_L" in self.parameter, "MAX_N_M_L is required in parameter" + MAX_N_M_L = self.parameter["MAX_N_M_L"] + assert MAX_N_M_L >= 2, "MAX_N_M_L should be greater than or equal to 2" + + N, M, L = self.parameter["N"], self.parameter["M"], self.parameter["L"] = random.randint(2, MAX_N_M_L), random.randint(2, MAX_N_M_L), random.randint(2, MAX_N_M_L) + K = self.parameter["K"] = random.randint(2, min(N, M, L)) + + + def inv_list(n): + """Compute modular inverses of 1..n under MOD.""" + invs = [0] * (n + 1) + invs[1] = 1 + for i in range(2, n + 1): + invs[i] = (-(self.MOD // i) * invs[self.MOD % i]) % self.MOD + return invs + + def modinv(x): + """Modular inverse of x under MOD (MOD is prime).""" + return pow(x, self.MOD - 2, self.MOD) + + def compute(): + Q = min(N, M, L) + invs = inv_list(Q) + + # R(x) = (N-x)*(M-x)*(L-x) mod MOD + def R(x): + return (N - x) * (M - x) % self.MOD * (L - x) % self.MOD + + # Prepare arrays of length Q+1 + vals = [0] * (Q + 1) + iprod = [0] * (Q + 1) # corresponds to iVals in C++ + iprod[0] = 1 + + R0 = R(0) + # Build prefix products of (R0 - R(i)) + for i in range(1, Q + 1): + vals[i] = (R0 - R(i)) % self.MOD + iprod[i] = iprod[i - 1] * vals[i] % self.MOD + + # Compute inverses of those prefix products by reversing + inv_total = modinv(iprod[Q]) + for i in range(Q, 0, -1): + prev = iprod[i - 1] + iprod[i] = inv_total * prev % self.MOD + inv_total = inv_total * vals[i] % self.MOD + + # Now do the main summation for the answer + ans = 0 + C = 0 + S = 1 + for i in range(1, Q + 1): + # S accumulates product over R(i-1) * iprod[i] + S = S * R(i - 1) % self.MOD * iprod[i] % self.MOD + + # update C according to i vs K + if i == K: + C = 1 + elif i > K: + # C = -C * i * invs[i - K] (all mod MOD) + C = (-C * i * invs[i - K]) % self.MOD + + ans = (ans + C * S) % self.MOD + + return ans + + self.parameter["reference_answer"] = compute() + + + def _prompt_generate(self) -> str : + N, M, L = self.parameter["N"], self.parameter["M"], self.parameter["L"] + return self.prompt_template.format( + N = N, M = M, L = L, + total = N * M * L, + K = self.parameter["K"], + MOD = self.MOD, + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.MOD) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/cycle_counting/__init__.py b/server/Gym/environments/cycle_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..067a55538fa8e4e7ee3506b2416b4fc1d586b5f4 --- /dev/null +++ b/server/Gym/environments/cycle_counting/__init__.py @@ -0,0 +1 @@ +from .environment import CycleCounting_Environment diff --git a/server/Gym/environments/cycle_counting/environment.py b/server/Gym/environments/cycle_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..452d0198bbb7151961d3325bb78f8ca733a42db7 --- /dev/null +++ b/server/Gym/environments/cycle_counting/environment.py @@ -0,0 +1,124 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class CycleCounting_Environment(VerifiableEnvironment) : # Source : https://codeforces.com/problemset/problem/11/D + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. The graph contains the following undirected edges: +{edges} + +Please count the number of simple cycles in the graph. A simple cycle is a cycle with at least 3 vertices, with no repeated vertices or edges. +Two cycles are considered equivalent if they consist of the same set of edges, regardless of the order or starting point; for example, the cycles `(0, 1, 2, 3)` and `(1, 0, 3, 2)` are identical, while `(0, 1, 2, 3)` and `(1, 0, 2, 3)` are NOT. + +Output Format: Your final answer should be a single line containing the number of simple cycles in the graph. +""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the CycleCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = random.sample([(u, v) for u in range(N) for v in range(u + 1, N)], int(edge_density * N * (N - 1) / 2)) + random.shuffle(edges) + + assert len(edges) == len(set(edges)), "edges should be unique" + adjacent = [[False] * N for u in range(N)] + for u, v in edges : + assert 0 <= u < v < N + adjacent[u][v] = adjacent[v][u] = True + + + dpF = [[0] * N for S in range(1 << N)] + for end in range(N) : + dpF[1 << end][end] = 1 + answer = 0 + for S in range(1, 1 << N) : + lowindex = 0 + while (1 << lowindex) != (S & -S) : + lowindex += 1 + nowS = S + while nowS : + end = 0 + while (1 << end) != (nowS & -nowS) : + end += 1 + nowS ^= (1 << end) + if not dpF[S][end] : + continue + if adjacent[end][lowindex] : + if S - (1 << lowindex) - (1 << end) > 0 : + answer += dpF[S][end] + nowR = ((1 << N) - 1) - S + while nowR : + next = 0 + while (1 << next) != (nowR & -nowR) : + next += 1 + nowR ^= (1 << next) + if S & (1 << next) : + assert False, "next should not be in S" + if next < lowindex : + continue + if not adjacent[end][next] : + continue + dpF[S | (1 << next)][next] += dpF[S][end] + self.parameter["reference_answer"] = answer // 2 + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/decreasing_digit_counting/__init__.py b/server/Gym/environments/decreasing_digit_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8976d7c7c507717f67834616c8fc462d5768a975 --- /dev/null +++ b/server/Gym/environments/decreasing_digit_counting/__init__.py @@ -0,0 +1 @@ +from .environment import DecreasingDigitCounting_Environment diff --git a/server/Gym/environments/decreasing_digit_counting/environment.py b/server/Gym/environments/decreasing_digit_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..46062d3b0c67e2c9aea21177102e0e41b0b5892c --- /dev/null +++ b/server/Gym/environments/decreasing_digit_counting/environment.py @@ -0,0 +1,103 @@ +import math +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class DecreasingDigitCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1066 + prompt_template = \ +r"""Let R be a number in base 2^{K} = {power_2_K}, satisfying the following conditions: +- R must be **at least a 2-digit** number in base 2^{K} (leading zeros are ignored; i.e., we don’t count numbers like `01` or `0005`). +- When viewed as a number in base 2^{K}, each digit of R, except for the last one, must be **strictly less than** its immediate right neighbor. (Digits are read from **left to right**, with the leftmost digit being the most significant — following natural reading order.) +- When R is converted to its binary representation, the total number of bits (ignoring leading zeros) must not exceed {W}. + +Your task is to determine how many **distinct valid values of R** satisfy all the above conditions. + +**Output Format:** +Your final answer should be a single integer — the total number of distinct values of R. +Example: `10` (do **NOT** include the backticks or quotes); this means there are 10 valid values of R that satisfy the conditions. +""" + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the NumberPartitionCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K >= 2, "MAX_K should be greater than or equal to 2" + + assert "MAX_W" in self.parameter, "MAX_W is required in parameter" + MAX_W = self.parameter["MAX_W"] + assert MAX_W >= 1, "MAX_W should be greater than or equal to 1" + + K = self.parameter["K"] = random.randint(2, MAX_K) + W = self.parameter["W"] = random.randint(K + 1, min(MAX_W, K * (1 << K))) if K + 1 <= min(MAX_W, K * (1 << K)) else MAX_W + + + r0 = W % K + m_max = W // K + (1 if r0 != 0 else 0) + if m_max < 2 : + answer = 0 + else : + max_val = (1 << K) - 1 + total = 0 + for m in range(2, m_max + 1) : + if m > max_val : + continue + if m < m_max or (m == m_max and r0 == 0) : + total += math.comb(max_val, m) + else : + max_high = (1 << r0) - 1 + for i in range(1, max_high + 1) : + ni = max_val - i + mi = m - 1 + if ni >= mi : + total += math.comb(ni, mi) + answer = total + self.parameter["reference_answer"] = answer + + def _prompt_generate(self) -> str : + return self.prompt_template.format(K = self.parameter["K"], W = self.parameter["W"], power_2_K = 2 ** self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/degree_fixed_spanning_tree/__init__.py b/server/Gym/environments/degree_fixed_spanning_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cd78707e8b040b1023c4fa6a24456bd0dbf6ef40 --- /dev/null +++ b/server/Gym/environments/degree_fixed_spanning_tree/__init__.py @@ -0,0 +1 @@ +from .environment import DegreeFixed_SpanningTree_Environment diff --git a/server/Gym/environments/degree_fixed_spanning_tree/environment.py b/server/Gym/environments/degree_fixed_spanning_tree/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..dd1ba9bc0e904e7efb9151f8eb06a6c74198a18f --- /dev/null +++ b/server/Gym/environments/degree_fixed_spanning_tree/environment.py @@ -0,0 +1,138 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class DegreeFixed_SpanningTree_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. + +The graph contains the following undirected edges. Each edge is represented as a tuple `(u, v)`, meaning an undirected edge **connecting vertex `u` to vertex `v`**: +{edges} + +Your task is to select a subset of edges `T = [(u_1, v_1), (u_2, v_2), ..., (u_k, v_k)]` such that: +- The selected edges form a **spanning tree** — that is, they connect all {N} vertices without forming any cycles. +- Each vertex `i` has a **fixed degree** of `d_i`, meaning it must be connected to exactly `d_i` edges in the selected subset: {degrees} + +**Output Format:** +Your final answer should be a single line containing the endpoints of the selected edges in order: `u_1 v_1 u_2 v_2 ... u_k v_k`, separated by **spaces**. +Example: `0 1 1 2 2 3` (do **NOT** include the backticks or quotes); this means the spanning tree includes the edges `(0, 1)`, `(1, 2)`, and `(2, 3)` (assuming 4 vertices in total), where the degrees of 0, 1, 2, and 3 are 1, 2, 2, and 1 respectively.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinimumSpanningTree_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + degrees = self.parameter["degrees"] = [0] * N + edges = self.parameter["edges"] = [] + + self.parameter["reference_answer"] = [] + + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + self.parameter["reference_answer"].append("{} {}".format(u, v)) + u, v = min(u, v), max(u, v) + edges.append((u, v)) + degrees[u] += 1 + degrees[v] += 1 + + self.parameter["reference_answer"] = " ".join(self.parameter["reference_answer"]) + + num_edges = int(edge_density * N * (N - 1) / 2) + if len(edges) < num_edges : + remaining_edges = list(set((u, v) for u in range(N) for v in range(u + 1, N)) - set(edges)) + edges += random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + degrees = ", ".join("d_{}={}".format(i, degree) for i, degree in enumerate(self.parameter["degrees"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + st = processed_result + if len(st) % 2 != 0 : + return self.rewards["wrong_format"] + st = [(st[i], st[i + 1]) for i in range(0, len(st), 2)] + + if len(st) != self.parameter["N"] - 1 : + return self.rewards["invalid_solution"] + if not ((set(u for u, v in st) | set(v for u, v in st)) == set(range(self.parameter["N"]))) : + return self.rewards["invalid_solution"] + + degrees = [0] * self.parameter["N"] + + subgraph = networkx.Graph() + edges = set(map(tuple, self.parameter["edges"])) + for u, v in st : + u, v = min(u, v), max(u, v) + if (u, v) not in edges : + return self.rewards["invalid_solution"] + subgraph.add_edge(u, v) + degrees[u] += 1 + degrees[v] += 1 + if not networkx.is_connected(subgraph) : + return self.rewards["invalid_solution"] + assert networkx.is_tree(subgraph), "The answer should be a tree as it has N - 1 edges and is connected" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + satisfied = sum(int(d_answer == d_gold) for d_answer, d_gold in zip(degrees, self.parameter["degrees"])) + return self.rewards["rewarding_weight"] * ((satisfied / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * all(d_answer == d_gold for d_answer, d_gold in zip(degrees, self.parameter["degrees"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/delta_min_popcount/__init__.py b/server/Gym/environments/delta_min_popcount/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5f4524eca833452c328c74404ef717f3d5edbd99 --- /dev/null +++ b/server/Gym/environments/delta_min_popcount/__init__.py @@ -0,0 +1 @@ +from .environment import DeltaMinPopcount_Environment diff --git a/server/Gym/environments/delta_min_popcount/environment.py b/server/Gym/environments/delta_min_popcount/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..29e46d5e2a14ab306e15e957470908e23ad59f7c --- /dev/null +++ b/server/Gym/environments/delta_min_popcount/environment.py @@ -0,0 +1,76 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class DeltaMinPopcount_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Define `popcount(x)` as the number of 1s in the binary representation of a non-negative integer `x`. For example, `popcount(5) = 2` because `(5)_10 = (101)_2`. + +You are given a binary number `d = ({binary_string})_2` (i.e., the base-2 representation of a decimal integer `d`). +Please compute the **minimum value of** `popcount(n XOR (n + d))` over all non-negative integers `n`, where `XOR` denotes the bitwise exclusive OR operation. + +**Output Format:** Your final answer should be a single base-10 integer — the minimum `popcount(n XOR (n + d))` over all `n >= 0`.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the DeltaMinPopcount_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + def _generate(self) -> None : + assert "digit_num" in self.parameter, "digit_num is required in parameter" + digit_num = self.parameter["digit_num"] + assert digit_num >= 1, "digit_num should be greater than or equal to 1" + + self.parameter["binary_string"] = "1" + "".join(str(random.randint(0, 1)) for _ in range(digit_num - 1)) + + + S = self.parameter["binary_string"] + S = S[::-1] + S = S + "00" + + cur = ans = 0 + for i in range(len(S) - 1) : + x = int(S[i]) + if x != cur : + ans += 1 + if S[i + 1] == "1" : + cur = 1 + else : + cur = 0 + + self.parameter["reference_answer"] = ans + + def _prompt_generate(self) -> str : + return self.prompt_template.format(binary_string = self.parameter["binary_string"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/delta_nim_game/__init__.py b/server/Gym/environments/delta_nim_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f1faf5b17678363a75aeb77d8c5040e6f53c3a39 --- /dev/null +++ b/server/Gym/environments/delta_nim_game/__init__.py @@ -0,0 +1 @@ +from .environment import DeltaNimGame_Environment diff --git a/server/Gym/environments/delta_nim_game/environment.py b/server/Gym/environments/delta_nim_game/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8c35b13e34db869e69b3f84426ccddc5e20b6e5b --- /dev/null +++ b/server/Gym/environments/delta_nim_game/environment.py @@ -0,0 +1,92 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class DeltaNimGame_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3480 + prompt_template = \ +r"""Alice and Bob are playing a game with {N} piles of stones. The number of stones in the i-th pile is A[i], for 0 <= i < {N}. The initial array A is: {A} + +Game rules: +- Players alternate turns, with Alice going first. +- On a turn, a player chooses a pile `i` (0 <= i < {N}) and removes any number of stones (at least 1 and at most A[i]). After the move, the array A must still satisfy the condition: A[i] <= A[i + 1] for all 0 <= i < {N} - 1. +- A player who cannot make a valid move loses. + +Assuming both players play optimally, determine who will win. Output a single word: `Alice` or `Bob` (do NOT include quotes or backticks), indicating the winner.""" + + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the DeltaNimGame instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + self.parameter["reference_answer"] = "Alice" if random.random() < 0.5 else "Bob" + + C = [None] * N + ans = 0 + for i in range(N) : + if i != N - 1 : + C[i] = random.randint(1 if i == 0 else 0, N) + else : + if self.parameter["reference_answer"] == "Alice" : + while True : + C[i] = random.randint(0, N) + if (ans ^ C[i]) != 0 : + break + elif self.parameter["reference_answer"] == "Bob" : + C[i] = ans + else : + assert False, "Invalid reference answer" + if (i & 1) == ((N - 1) & 1) : + ans ^= C[i] + assert (ans == 0) == (self.parameter["reference_answer"] == "Bob"), "Reference answer does not match computed answer" + + A = self.parameter["A"] = [None] * N + for i in range(N) : + A[i] = (A[i - 1] if i - 1 >= 0 else 0) + C[i] + if i >= 1 : + assert A[i] >= A[i - 1], "A should be non-decreasing" + assert A[0] >= 1 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result not in ("Alice", "Bob") : + return self.rewards["invalid_answer"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/derangement_extension/__init__.py b/server/Gym/environments/derangement_extension/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..07cf7a5d29561da3cf3fabe5925211c38f11e5da --- /dev/null +++ b/server/Gym/environments/derangement_extension/__init__.py @@ -0,0 +1 @@ +from .environment import DerangementExtension_Environment diff --git a/server/Gym/environments/derangement_extension/environment.py b/server/Gym/environments/derangement_extension/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e00ed279d205055a558bc235ccd03938a47d87ed --- /dev/null +++ b/server/Gym/environments/derangement_extension/environment.py @@ -0,0 +1,92 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class DerangementExtension_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4071 + prompt_template = r"""What's the number of permutations p of 1, 2, ..., {N} such that exactly {M} indices i satisfy p[i] = i (1-indexed)? Let me know the result modulo {MOD}.""" + MODs = (666623333, 998244353, 10 ** 9 + 7) + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the DerangementExtension_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + M = self.parameter["M"] = random.randint(0, N) + MOD = self.parameter["MOD"] = random.choice(self.MODs) + + + def init(max_n): + prod = [1] * (max_n + 1) + inv = [0] * (max_n + 1) + for i in range(1, max_n + 1): + prod[i] = (prod[i - 1] * i) % MOD + inv[i] = pow(prod[i], MOD - 2, MOD) # modular inverse via Fermat, faithful to C++ logic + + a = [0] * (max_n + 1) # derangements + if max_n >= 2: + a[2] = 1 + for i in range(3, max_n + 1): + a[i] = (i - 1) * ((a[i - 1] + a[i - 2]) % MOD) % MOD + return prod, inv, a + + prod, inv, a = init(N) + + def compute() : + if M == 0: + return a[N] % MOD + if N == M: + return 1 + if N - 1 == M: + return 0 + # C(N, M) * D_{N-M} + comb = (prod[N] * inv[M] % MOD) * inv[N - M] % MOD + ans = (comb * a[N - M]) % MOD + return ans + + self.parameter["reference_answer"] = compute() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"], MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/difference_constraint_system/__init__.py b/server/Gym/environments/difference_constraint_system/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1cc0020434cc7cb8bdd7562cae7adab10bb26ca0 --- /dev/null +++ b/server/Gym/environments/difference_constraint_system/__init__.py @@ -0,0 +1 @@ +from .environment import DifferenceConstraintSystem_Environment diff --git a/server/Gym/environments/difference_constraint_system/environment.py b/server/Gym/environments/difference_constraint_system/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..6a17f4ae29f2095e58e2cd5273db00343e8d1ba5 --- /dev/null +++ b/server/Gym/environments/difference_constraint_system/environment.py @@ -0,0 +1,85 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class DifferenceConstraintSystem_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""There are {N} integers x[0], x[1], ..., x[{N_minus_1}]. They satisfy the following {M} inequations: +{inequations} + +Please find any solution x[0], x[1], ..., x[{N_minus_1}] that satisfies the inequations. + +Output Format: Your final answer should be a single line containing x[0], x[1], ..., x[{N_minus_1}], separated by **spaces**.""" + + def __init__(self, + num_range : int = 5, + wrong_format : float = -1.0, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + super().__init__(**kwargs) + + self.number_range = num_range + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 1, "M should be greater than or equal to 1" + + self.parameter["x"] = [random.randint(-N, +N) for i in range(N)] + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["x"])) + + inequations = self.parameter["inequations"] = random.sample([(i, j) for i in range(N) for j in range(N) if i != j], min(M, N * (N - 1))) + self.parameter["results"] = [self.parameter["x"][i] - self.parameter["x"][j] + random.randint(0, self.number_range) for i, j in inequations] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + N_minus_1 = self.parameter["N"] - 1, + M = self.parameter["M"], + inequations = "\n".join("x[{}] - x[{}] <= {}".format(i, j, result) for (i, j), result in zip(self.parameter["inequations"], self.parameter["results"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + x = processed_result + if len(x) != self.parameter["N"] : + return self.rewards["wrong_format"] + + satisfied = sum(int(x[i] - x[j] <= result) for (i, j), result in zip(self.parameter["inequations"], self.parameter["results"])) + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / len(self.parameter["inequations"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == len(self.parameter["inequations"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/difference_constraint_system_dag/__init__.py b/server/Gym/environments/difference_constraint_system_dag/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1b94f8a887fb907bab01c2204ca62bb1cc59e079 --- /dev/null +++ b/server/Gym/environments/difference_constraint_system_dag/__init__.py @@ -0,0 +1 @@ +from .environment import DifferenceConstraintSystemDAG_Environment diff --git a/server/Gym/environments/difference_constraint_system_dag/environment.py b/server/Gym/environments/difference_constraint_system_dag/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f2af8bf66af007a28edf2f794cadb0491e4a7c54 --- /dev/null +++ b/server/Gym/environments/difference_constraint_system_dag/environment.py @@ -0,0 +1,228 @@ +import random +from collections import deque +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class DifferenceConstraintSystemDAG_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""There are {N} **positive integers** x[0], x[1], ..., x[{N_minus_1}]. They satisfy the following {M} equations/inequations: +{relations} + +Please find any solution x[0], x[1], ..., x[{N_minus_1}] that satisfies all of the equations/inequations. Try your best to minimize x[0] + x[1] + ... + x[{N_minus_1}]. + +Output Format: Your final answer should be a single line containing x[0], x[1], ..., x[{N_minus_1}], separated by **spaces**.""" + + def __init__(self, + wrong_format : float = -1.0, + invalid_solution : float = 0.0, + rewarding_strategy_relation : str = "(satisfied/all)^beta", rewarding_weight_relation : float = +0.5, rewarding_beta_relation : float = 5.0, + rewarding_strategy_sum : str = "(gold/answer)^beta", rewarding_weight_sum : float = +0.5, rewarding_beta_sum : float = 5.0, + **kwargs) : + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy_relation" : rewarding_strategy_relation, + "rewarding_weight_relation" : rewarding_weight_relation, + "rewarding_beta_relation" : rewarding_beta_relation, + "rewarding_strategy_sum" : rewarding_strategy_sum, + "rewarding_weight_sum" : rewarding_weight_sum, + "rewarding_beta_sum" : rewarding_beta_sum, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 1, "M should be greater than or equal to 1" + + Xs = [random.randint(1, N) for i in range(N)] + + relations = self.parameter["relations"] = random.sample([(i, j) for i in range(N) for j in range(N) if i != j], min(M, N * (N - 1))) + ''' + X = 1: A = B + X = 2: A < B + X = 3: A ≥ B + X = 4: A > B + X = 5: A ≤ B + ''' + for i, (A, B) in enumerate(relations) : + if Xs[A] == Xs[B] : + X_choices = (1, 3, 5) + elif Xs[A] < Xs[B] : + X_choices = (2, 5) + elif Xs[A] > Xs[B] : + X_choices = (3, 4) + else : + assert False, "Invalid relation: X[{}]={} and X[{}]={}".format(A, Xs[A], B, Xs[B]) + relations[i] = (random.choice(X_choices), A, B) + + + adj = [[] for _ in range(N)] # adjacency[u] = list[(v, w)] + + for X, A, B in relations: + if X == 1: # equal + adj[A].append((B, 0)) + adj[B].append((A, 0)) + elif X == 2: # A < B ⇒ A→B, +1 + adj[A].append((B, 1)) + elif X == 3: # A ≥ B ⇒ B→A, +0 + adj[B].append((A, 0)) + elif X == 4: # A > B ⇒ B→A, +1 + adj[B].append((A, 1)) + else: # X == 5 A ≤ B ⇒ A→B, +0 + adj[A].append((B, 0)) + + # ---------- Tarjan SCC ---------- + dfn = [-1] * N + low = [0] * N + stack, in_stk = [], [False] * N + scc_id = [-1] * N + time = 0 + sizes = [] # size per component + scc_cnt = 0 + + def tarjan(u: int): + nonlocal time, scc_cnt + dfn[u] = low[u] = time + time += 1 + stack.append(u) + in_stk[u] = True + + for v, _ in adj[u]: + if dfn[v] == -1: + tarjan(v) + low[u] = min(low[u], low[v]) + elif in_stk[v]: + low[u] = min(low[u], dfn[v]) + + if low[u] == dfn[u]: # root of an SCC + sizes.append(0) + while True: + node = stack.pop() + in_stk[node] = False + scc_id[node] = scc_cnt + sizes[scc_cnt] += 1 + if node == u: + break + scc_cnt += 1 + + for i in range(N): + if dfn[i] == -1: + tarjan(i) + + # ---------- build condensed DAG ---------- + dag = [[] for _ in range(scc_cnt)] + indeg = [0] * scc_cnt + + for u in range(N): + su = scc_id[u] + for v, w in adj[u]: + sv = scc_id[v] + if su == sv: + if w == 1: # c ≥ c + 1 impossible + assert False, "Impossible relation: c >= c + 1" + else: + dag[su].append((sv, w)) + indeg[sv] += 1 + + # ---------- longest path on DAG ---------- + dp = [0] * scc_cnt + q = deque(i for i in range(scc_cnt) if indeg[i] == 0) + for i in q: # sources start at 1 candy + dp[i] = 1 + + while q: + u = q.popleft() + for v, w in dag[u]: + if dp[v] < dp[u] + w: + dp[v] = dp[u] + w + indeg[v] -= 1 + if indeg[v] == 0: + if dp[v] == 0: # isolated source + dp[v] = 1 + q.append(v) + + # ---------- final answer ---------- + self.parameter["reference_answer"] = " ".join(str(dp[scc_id[i]]) for i in range(N)) + self.parameter["gold_answer"] = sum(dp[comp] * sizes[comp] for comp in range(scc_cnt)) + assert self.parameter["gold_answer"] == sum(map(int, self.parameter["reference_answer"].split())) <= sum(Xs), "Gold answer should be less than or equal to sum(X)" + + + def _prompt_generate(self) -> str : + X2symbol = { + 1 : "=", + 2 : "<", + 3 : "≥", + 4 : ">", + 5 : "≤", + } + return self.prompt_template.format( + N = self.parameter["N"], + N_minus_1 = self.parameter["N"] - 1, + M = self.parameter["M"], + relations = "\n".join("x[{}] {} x[{}]".format(A, X2symbol[X], B) for X, A, B in self.parameter["relations"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + x = processed_result + if len(x) != self.parameter["N"] : + return self.rewards["wrong_format"] + if not all(xi >= 1 for xi in x) : + return self.rewards["invalid_solution"] + + + reward = 0.0 + + X2function = { + 1 : lambda a, b: a == b, + 2 : lambda a, b: a < b, + 3 : lambda a, b: a >= b, + 4 : lambda a, b: a > b, + 5 : lambda a, b: a <= b, + } + satisfied = sum(int(X2function[X](x[A], x[B])) for X, A, B in self.parameter["relations"]) + assert satisfied <= len(self.parameter["relations"]), "satisfied should be less than or equal to the number of relations" + if self.rewards["rewarding_strategy_relation"] == "(satisfied/all)^beta" : + reward += self.rewards["rewarding_weight_relation"] * ((satisfied / len(self.parameter["relations"])) ** self.rewards["rewarding_beta_relation"]) + elif self.rewards["rewarding_strategy_relation"] == "satisfied=all" : + reward += self.rewards["rewarding_weight_relation"] * (satisfied == len(self.parameter["relations"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_relation"])) + + if satisfied == len(self.parameter["relations"]) : + gold, answer = self.parameter["gold_answer"], sum(x) + assert gold <= answer, "Gold answer should be less than or equal to the answer" + if self.rewards["rewarding_strategy_sum"] == "(gold/answer)^beta" : + reward += self.rewards["rewarding_weight_sum"] * ((gold / answer) ** self.rewards["rewarding_beta_sum"]) + elif self.rewards["rewarding_strategy_sum"] == "gold=answer" : + reward += self.rewards["rewarding_weight_sum"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_sum"])) + + return reward + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/different_color_pairing/__init__.py b/server/Gym/environments/different_color_pairing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0d12be1e65fe2a4184ff6794e1b1187d6062087d --- /dev/null +++ b/server/Gym/environments/different_color_pairing/__init__.py @@ -0,0 +1 @@ +from .environment import DifferentColorPairing_Environment diff --git a/server/Gym/environments/different_color_pairing/environment.py b/server/Gym/environments/different_color_pairing/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..413dea4657426df249cddb7a82a8185316417906 --- /dev/null +++ b/server/Gym/environments/different_color_pairing/environment.py @@ -0,0 +1,112 @@ +import random +from typing import Optional, List, Tuple +from ...environment import VerifiableEnvironment + + +class DifferentColorPairing_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2902 + prompt_template = \ +r"""There are {N} pearls, and each pearl has a color labeled from 1 to {M}. The number of pearls of each color is given as follows: +{C} + +Please form exactly {N_div_2} pairs of pearls such that (1) each pearl belongs to exactly one pair; (2) the two pearls in each pair must have different colors. Output {N_div_2} lines, each containing two integers (separated by a space), representing the colors of the two pearls in one pair.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the DifferentColorPairing_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 6, "N should be greater than or equal to 6" + assert N % 2 == 0, "N should be even" + + M = self.parameter["M"] = random.randint(3, N - 1) + + while True : + C = random.sample(range(1, N), M - 1) + C.sort() + C += [N] + for i in range(M - 1, 0, -1) : + C[i] -= C[i - 1] + assert len(C) == M + assert sum(C) == N + assert all(Ci > 0 for Ci in C) + if not any(Ci > N - Ci for Ci in C) : + self.parameter["C"] = C + break + + # Expand colors: 1 repeated C[0] times, 2 repeated C[1] times, ... + colors = [] + for idx, cnt in enumerate(C, start=1): + if cnt > 0: + colors.extend([idx] * cnt) + + # Output pairs: i with i + N//2 + half = N // 2 + self.parameter["reference_answer"] = "" + for i in range(half): + self.parameter["reference_answer"] += "{} {}\n".format(colors[i], colors[i + half]) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_div_2 = N // 2, + M = self.parameter["M"], + C = "\n".join("Color {} has {} pearls".format(i, Ci) for i, Ci in enumerate(self.parameter["C"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[Tuple[int, int]]] : + if answer is not None : + answer = answer.strip() + try : + pairs = [] + for line in answer.splitlines() : + line = line.strip() + if line : + c1, c2 = map(int, line.split()) + pairs.append((c1, c2)) + return pairs + except : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if len(processed_result) != self.parameter["N"] // 2 : + return self.rewards["wrong_format"] + if not all(1 <= c1 <= self.parameter["M"] and 1 <= c2 <= self.parameter["M"] and c1 != c2 for c1, c2 in processed_result) : + return self.rewards["invalid_solution"] + + C = [0] * self.parameter["M"] + for c1, c2 in processed_result : + C[c1 - 1] += 1 + C[c2 - 1] += 1 + satisfied = sum(Ci == gold_Ci for Ci, gold_Ci in zip(C, self.parameter["C"])) + assert satisfied <= self.parameter["M"], "Satisfaction level exceeded" + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / self.parameter["M"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == self.parameter["M"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/differentiate/__init__.py b/server/Gym/environments/differentiate/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f2e618044b5a52076595d4cb9983c74dac0a9f47 --- /dev/null +++ b/server/Gym/environments/differentiate/__init__.py @@ -0,0 +1 @@ +from .environment import Differentiate_Environment diff --git a/server/Gym/environments/differentiate/environment.py b/server/Gym/environments/differentiate/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..64bc57d7e25953e8a943b0a321f78abab35c4044 --- /dev/null +++ b/server/Gym/environments/differentiate/environment.py @@ -0,0 +1,255 @@ +import math +import sympy +import random +from typing import Optional, List, Dict +from ...environment import VerifiableEnvironment +from ...environment import timeout, TimeoutException + + +def generate_test_points(num : int, low : float, high : float) -> List[float] : + assert num >= 2, "num should be greater than or equal to 2" + return [low + (high - low) * i / (num - 1) for i in range(num)] + + +class Differentiate_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a function: F(x) = {function} + +Your task is to compute its derivative with respect to x (i.e., F'(x)). + +**Output Format:** Your answer should be the expression for F'(x), written in **SymPy syntax**. Do not omit any symbols (e.g., always use `*` for multiplication). +Example: `sin(2*x)/2` (do **NOT** include quotes or backticks).""" + test_points = generate_test_points(1024, -2, +2) + epsilon = 1E-5 + max_val = 1E+4 + + def __init__(self, + node_type_probs : Optional[List[float]] = None, + unary_ops_probs : Dict[str, float] = None, + binary_ops_probs : Dict[str, float] = None, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the Differentiate_Environment instance. + """ + super().__init__(**kwargs) + + if node_type_probs is None : + node_type_probs = (0.5, 0.5) + assert len(node_type_probs) == 2 and abs(sum(node_type_probs) - 1.0) < 1E-8, "node_type_probs should have length 2 and sum to 1" + self.node_type_probs = node_type_probs + + if unary_ops_probs is None : + unary_ops_probs = { + "sin" : 0.1, + "cos" : 0.1, + "exp" : 0.05, + "log" : 0.05, + "const_pow" : 0.1, + "const_add" : 0.25, + "const_mul" : 0.25, + "const_div" : 0.1, + } + assert abs(sum(unary_ops_probs.values()) - 1.0) < 1E-8, "unary_ops_probs values should sum to 1" + self.unary_ops_probs = unary_ops_probs + + if binary_ops_probs is None : + binary_ops_probs = { + "+" : 0.2, + "-" : 0.2, + "*" : 0.3, + "/" : 0.2, + "**" : 0.1, + } + assert abs(sum(binary_ops_probs.values()) - 1.0) < 1E-8, "binary_ops_probs values should sum to 1" + self.binary_ops_probs = binary_ops_probs + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + def _generate(self) -> None : + assert "node_num" in self.parameter, "node_num is required in parameter" + node_num = self.parameter["node_num"] + assert isinstance(node_num, int) and node_num >= 1, "node_num should be a positive integer" + + unary_ops, unary_probs = zip(*self.unary_ops_probs.items()) + binary_ops, binary_probs = zip(*self.binary_ops_probs.items()) + + x = sympy.symbols("x") + + def build_expr(n : int) -> sympy.Expr : + assert n >= 1, "n should be greater than or equal to 1" + if n == 1 : + return x + + if (random.choices(("unary", "binary"), weights = self.node_type_probs, k = 1)[0] if n >= 3 else "unary") == "unary" : + op = random.choices(unary_ops, weights = unary_probs, k = 1)[0] + sub = build_expr(n - 1) + if op == "sin" : + return sympy.sin(sub) + elif op == "cos" : + return sympy.cos(sub) + elif op == "exp" : + return sympy.exp(sub) + elif op == "log" : + return sympy.log(sub) + elif op == "const_pow" : + try : + if random.random() < 0.5 : + return sub ** (1 / sympy.Integer(random.randint(2, 4))) + else : # power + return sub ** sympy.Integer(random.randint(2, 4)) + except : + # Fall back to a safer option if fractional power fails + return sub ** sympy.Integer(random.randint(2, 4)) + elif op == "const_add" : + return sub + sympy.Integer(random.choice([-2, -1, +1, +2])) + elif op == "const_mul" : + if random.random() < 0.5 : # negative + return sub * -sympy.Integer(random.randint(2, 4)) + else : # positive + return sub * sympy.Integer(random.randint(2, 4)) + elif op == "const_div" : + return sub / sympy.Integer(random.randint(2, 4)) + else : + raise NotImplementedError(f"Unknown unary op: {op}") + else : # binary + op = random.choices(binary_ops, weights = binary_probs, k = 1)[0] + assert 1 <= (n - 1) - 1 + left_n = random.randint(1, (n - 1) - 1) + left = build_expr(left_n) + right = build_expr((n - 1) - left_n) + if op == "+" : + return left + right + elif op == "-" : + return left - right + elif op == "*" : + return left * right + elif op == "/" : + return left / right + elif op == "**" : + return left ** right + else : + raise NotImplementedError(f"Unknown binary op: {op}") + + while True : + try : + f_expr = build_expr(node_num) + # Add complexity check after building expression + if sympy.count_ops(f_expr) > 1000: + continue + self.parameter["function"] = str(f_expr) + + f_prime = sympy.diff(f_expr, x) + # Add complexity check after differentiation + if sympy.count_ops(f_prime) > 1000: + continue + self.parameter["reference_answer"] = str(f_prime) + + if not f_expr.free_symbols : + continue + if sympy.zoo in f_expr.atoms() or sympy.nan in f_expr.atoms() : + continue + elif sympy.zoo in f_prime.atoms() or sympy.nan in f_prime.atoms() : + continue + else : + f_prime_compute = sympy.lambdify(x, f_prime, modules = ["math"]) + valid_count = 0 + for pt in self.test_points : + try : + val = float(f_prime_compute(pt)) + except : + continue + if not math.isfinite(val) : + continue + if abs(val) > self.max_val : + valid_count = 0 + break + valid_count += 1 + if valid_count >= len(self.test_points) // 2 : + break + else : + continue + except : + continue + + def _prompt_generate(self) -> str : + return self.prompt_template.format(function = self.parameter["function"]) + + def _process(self, answer : Optional[str]) -> Optional[sympy.Expr] : + if answer is not None : + answer = answer.strip() + # Limit input string length to prevent parsing explosion + if len(answer) > 10000: + return None + try : + expr = sympy.sympify(answer) + return expr + except : + return None + else : + return None + + def scorer(self, output : str) -> float : + @timeout(10) # 10 second timeout + def _scorer_impl(): + processed_result = self.processor(output) + if processed_result is not None and isinstance(processed_result, sympy.Expr) : + x = sympy.symbols("x") + if processed_result.free_symbols - {x} : + return self.rewards["wrong_format"] + + # Check if processed_result is excessively complex compared to reference + try : + if sympy.count_ops(processed_result) > 4 * sympy.count_ops(sympy.sympify(self.parameter["reference_answer"])) : + return self.rewards["wrong_answer"] + except : + return self.rewards["wrong_format"] + + try : + expr = processed_result - sympy.sympify(self.parameter["reference_answer"]) + # Add complexity check after subtraction in scorer + if sympy.count_ops(expr) > 5000: + return self.rewards["wrong_answer"] + except : + return self.rewards["wrong_format"] + + eq = expr.is_zero + if eq is not None : + assert isinstance(eq, bool), "eq should be a boolean value" + if eq : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + + try : + expr_compute = sympy.lambdify(x, expr, modules = ["math"]) + except : + return self.rewards["wrong_answer"] + zero_count = 0 + for pt in self.test_points : + try : + val = float(expr_compute(pt)) + except : + continue + if not math.isfinite(val) : + continue + if abs(val) > self.epsilon : + return self.rewards["wrong_answer"] + else : + zero_count += 1 + + if zero_count >= len(self.test_points) // 4 : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] + + try: + return _scorer_impl() + except TimeoutException: # Catch the specific timeout exception + return -1.0 \ No newline at end of file diff --git a/server/Gym/environments/digit_lis_counting/__init__.py b/server/Gym/environments/digit_lis_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..93bd50b8ca9c7c071b5f2d75fdb3871bbb9fdc29 --- /dev/null +++ b/server/Gym/environments/digit_lis_counting/__init__.py @@ -0,0 +1 @@ +from .environment import DigitLISCounting_Environment \ No newline at end of file diff --git a/server/Gym/environments/digit_lis_counting/environment.py b/server/Gym/environments/digit_lis_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..4dc0ae2cfc0afa45dc7c30ad541c9cbd4cfe2dcc --- /dev/null +++ b/server/Gym/environments/digit_lis_counting/environment.py @@ -0,0 +1,115 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class DigitLISCounting_Environment(VerifiableEnvironment) : # Source : https://acm.hdu.edu.cn/showproblem.php?pid=4352 + prompt_template = \ +r"""Consider all integers N in the inclusive range **[{L}, {R}]**. Interpret each N as a string of decimal digits. +The **power** of N is defined as **the length of the longest strictly increasing subsequence** of its digits. + +Please count how many integers N within the range [{L}, {R}] have a **power value exactly equal to {K}**. + +**Output Format:** Your final answer should be a single integer — the total number of integers between {L} and {R} inclusive, for which the power is exactly {K}.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the DigitLISCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + # Generate a random integer R with exactly N digits (no leading zeros) + R = self.parameter["R"] = random.randint(10 ** (N - 1), 10 ** N - 1) + # Generate a random integer L, L <= R + L = self.parameter["L"] = random.randint(0, R) + K = self.parameter["K"] = random.randint(1, min(N, 10)) + + def new_sta(x, n) : + for i in range(n, 10) : + if (1 << i) & x : + return (x ^ (1 << i)) | (1 << n) + return x | (1 << n) + + def cal(x) : + return bin(x).count('1') + + def dfs(pos, sta, limit, lead) : + if pos == -1 : + return int(cal(sta) == K) + if not limit and not lead and dp[pos][sta][K] != -1 : + return dp[pos][sta][K] + up = a[pos] if limit else 9 + ans = 0 + for i in range(up + 1) : + new_state = 0 if lead and i == 0 else new_sta(sta, i) + ans += dfs(pos - 1, new_state, limit and i == up, lead and i == 0) + if not limit and not lead : + dp[pos][sta][K] = ans + return ans + + def solve(x) : + nonlocal a + pos = -1 + while x > 0 : + pos += 1 + a[pos] = x % 10 + x //= 10 + return dfs(pos, 0, True, True) + + dp = [[[-1 for _ in range(K + 1)] for _ in range(1025)] for _ in range(N + 1)] + a = [0] * N + self.parameter["reference_answer"] = solve(R) - solve(L-1) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + L = self.parameter["L"], + R = self.parameter["R"], + K = self.parameter["K"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/discrete_logarithm/__init__.py b/server/Gym/environments/discrete_logarithm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9bfeb1e17d25efc9e12f30ab91ad0a80b3ff75b2 --- /dev/null +++ b/server/Gym/environments/discrete_logarithm/__init__.py @@ -0,0 +1 @@ +from .environment import DiscreteLogarithm_Environment \ No newline at end of file diff --git a/server/Gym/environments/discrete_logarithm/environment.py b/server/Gym/environments/discrete_logarithm/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..93c1d6e722b1b09edddd166dc116a23d29e7e6bf --- /dev/null +++ b/server/Gym/environments/discrete_logarithm/environment.py @@ -0,0 +1,146 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment +import math + + +class DiscreteLogarithm_Environment(VerifiableEnvironment) : # Source : https://www.spoj.com/problems/MOD/ + prompt_template = \ +r"""Please find the **smallest** non-negative integer **y** such that **({X}^y) MOD {Z} = {K} MOD {Z}**. + +**Output Format:** Your final answer should be a single non-negative integer — the smallest **y** satisfying the equation.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = 0.0, rewarding_strategy : str = "(gold/answer)^beta", rewarding_beta : float = 2.0, rewarding_weight : float = 1.0, + **kwargs) : + """ + Initialize the DiscreteLogarithm_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + def _generate(self) -> None : + assert "MAX_Z" in self.parameter, "MAX_Z is required in parameter" + assert self.parameter["MAX_Z"] >= 2, "MAX_Z should be greater than or equal to 2" + Z = self.parameter["Z"] = random.randint(2, self.parameter["MAX_Z"]) + X = self.parameter["X"] = random.randint(2, Z) + Y = self.parameter["Y"] = random.randint(2, Z) + K = self.parameter["K"] = pow(X, Y, Z) + + def modular_log_solver(a, mod, r): + + def adjust(x, mod): + return (x % mod + mod) % mod + + def check(x, mod): + return adjust(x, mod) + + def power(a, n, mod): + s = 1 + x = a % mod + while n: + if n & 1: + s = s * x % mod + x = x * x % mod + n >>= 1 + return s + + def gcd(a, b): + return math.gcd(a, b) + + def exgcd(a, b): + if b == 0: + return (1, 0) + else: + x1, y1 = exgcd(b, a % b) + x, y = y1, x1 - (a // b) * y1 + return (x, y) + + def BSGS(a, r, mod): + a %= mod + r %= mod + T = int(round(math.sqrt(mod))) + a_T = power(a, T, mod) + H = {} + cur = r + for i in range(1, T+1): + cur = cur * a % mod + H[cur] = i + cur = a_T + for i in range(1, T+2): + val = cur + if val in H: + return i * T - H[val] + cur = cur * a_T % mod + return -1 + + def exBSGS(a, r, mod): + a %= mod + r %= mod + g = gcd(mod, a) + if r % g != 0: + if r == 1: + return 0 + else: + return -1 + if g == 1: + return BSGS(a, r, mod) + else: + iv, y = exgcd(a // g, mod // g) + iv = check(iv, mod // g) + res = exBSGS(a, r // g * iv % (mod // g), mod // g) + if res < 0: + return -1 + return res + 1 + + x = exBSGS(a, r, mod) + return x + + self.parameter["reference_answer"] = modular_log_solver(X, Z, K) + assert self.parameter["reference_answer"] >= 0, "ans should be non-negative" + + def _prompt_generate(self) -> str : + return self.prompt_template.format(X = self.parameter["X"], Z = self.parameter["Z"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if processed_result == self.parameter["reference_answer"] : + return self.rewards["rewarding_weight"] + + if pow(self.parameter["X"], processed_result, self.parameter["Z"]) != self.parameter["K"] : + return self.rewards["invalid_answer"] + + assert processed_result >= self.parameter["reference_answer"], "processed_result should be greater than or equal to reference_answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((self.parameter["reference_answer"] / processed_result) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + assert self.parameter["reference_answer"] != processed_result + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/disinfection/__init__.py b/server/Gym/environments/disinfection/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f0d5fb1c3cb061d215b8303f6ae962736ab8ede8 --- /dev/null +++ b/server/Gym/environments/disinfection/__init__.py @@ -0,0 +1 @@ +from .environment import Disinfection_Environment diff --git a/server/Gym/environments/disinfection/environment.py b/server/Gym/environments/disinfection/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..028c34cd4b756149d735197ad4e52a972ec4fef8 --- /dev/null +++ b/server/Gym/environments/disinfection/environment.py @@ -0,0 +1,198 @@ +import random +from typing import Optional, List, Tuple +from ...environment import VerifiableEnvironment + + +class Disinfection_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a 3D cube of dimensions {A} × {B} × {C} (0-indexed). Some cells in the cube contain the value 1, and the rest are 0. The coordinates of the cells with value 1 are: +{one_coordinates} + +In one operation, you may select a contiguous sub-cube defined by ranges: x ∈ [x1, x2) y ∈ [y1, y2) z ∈ [z1, z2), where 0 ≤ x1 < x2 ≤ {A}, 0 ≤ y1 < y2 ≤ {B}, and 0 ≤ z1 < z2 ≤ {C}. This operation sets **all** values in the sub-cube to 0. The cost of this operation is defined as min(x2 - x1, y2 - y1, z2 - z1). +Please set **all** values in the cube to 0 using a set of such operations with the **minimum total cost**. + +**Output Format:** Output multiple lines. Each line should contain six integers `x1 x2 y1 y2 z1 z2` (do **NOT** include quotes or backticks), separated by spaces, representing one operation.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Disinfection_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_A_B_C" in self.parameter, "MAX_A_B_C is required in parameter" + MAX_A_B_C = self.parameter["MAX_A_B_C"] + assert MAX_A_B_C >= 2, "MAX_A_B_C should be greater than or equal to 2" + + while True : + A, B, C = self.parameter["A"], self.parameter["B"], self.parameter["C"] = random.randint(1, MAX_A_B_C), random.randint(1, MAX_A_B_C), random.randint(1, MAX_A_B_C) + if A != 1 or B != 1 or C != 1 : + break + subA, subB, subC = random.sample(range(A), random.randint(1, A)), random.sample(range(B), random.randint(1, B)), random.sample(range(C), random.randint(1, C)) + one_coordinates = self.parameter["one_coordinates"] = random.sample([(x, y, z) for x in subA for y in subB for z in subC], random.randint(1, len(subA) * len(subB) * len(subC))) + random.shuffle(one_coordinates) + + + def solve_one_case() -> None: + DIMS = [A, B, C] + + # ---------- find the shortest axis ---------- + pos = DIMS.index(min(DIMS)) # 0, 1 or 2 + SMALL = DIMS[pos] # length of the short axis + + # Decide which of the remaining two axes is "left" (U side of the + # bipartite graph) and which is "right" (V side). The original + # code always put the first coordinate **not equal to `pos`** on + # the left, so we do the same. + if pos == 0: + left_len, right_len = B, C # U = j, V = k + elif pos == 1: + left_len, right_len = A, C # U = i, V = k + else: # pos == 2 + left_len, right_len = A, B # U = i, V = j + + CNT = max(left_len, right_len) # array size used in the C++ code + + # ---------- build the 3-D grid and the edge list ---------- + GRID = [[[0] * C for _ in range(B)] for _ in range(A)] + adjacency = [[] for _ in range(CNT)] # list[ list[ (v, layer) ] ] + + # helper to add an (undirected) edge with its layer index + def add_edge(u: int, v: int, layer: int) -> None: + adjacency[u].append((v, layer)) + + for i, j, k in one_coordinates: + if pos == 0: # short axis = i + u, v, layer = j, k, i + elif pos == 1: # short axis = j + u, v, layer = i, k, j + else: # short axis = k + u, v, layer = i, j, k + add_edge(u, v, layer) + + # ---------- variables used in the recursive search ---------- + SEL = [False] * SMALL # which layers of the short axis are chosen + VIS = [0] * CNT # time-stamped visitation array + MATCH = [-1] * CNT # right-side match array (my[ ] in C++) + cur_time = 0 # global DFS clock + best_answer = [10 ** 9] # wrapped in list for closure mutability + + # ---------- depth-first search for augmenting paths ---------- + def dfs(u: int) -> bool: + nonlocal cur_time + for v, lay in adjacency[u]: + if SEL[lay]: # layer already paid for + continue + if VIS[v] == cur_time: # already visited in this search + continue + VIS[v] = cur_time + if MATCH[v] == -1 or dfs(MATCH[v]): + MATCH[v] = u + return True + return False + + # ---------- run a Hungarian style matching on surviving edges ---------- + def run_matching(paid: int) -> int: + """Return paid + |maximum matching| (early-terminate if ≥ best).""" + nonlocal cur_time, MATCH + MATCH = [-1] * CNT + matched = 0 + for u in range(CNT): + cur_time += 1 + if dfs(u): + matched += 1 + if paid + matched >= best_answer[0]: + return paid + matched # prune + return paid + matched + + # ---------- enumerate every subset of the short axis ---------- + def enumerate_layers(depth: int, paid: int) -> None: + if depth == SMALL: # considered all layers + cost = run_matching(paid) + if cost < best_answer[0]: + best_answer[0] = cost + return + # Case 1: pay for this layer + SEL[depth] = True + enumerate_layers(depth + 1, paid + 1) + # Case 2: do not pay + SEL[depth] = False + enumerate_layers(depth + 1, paid) + + enumerate_layers(0, 0) + self.parameter["gold_answer"] = best_answer[0] + assert self.parameter["gold_answer"] > 0, "Gold answer should be greater than 0" + solve_one_case() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + A = self.parameter["A"], + B = self.parameter["B"], + C = self.parameter["C"], + one_coordinates = "\n".join("({},{},{})".format(x, y, z) for x, y, z in self.parameter["one_coordinates"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[Tuple[int, int, int, int, int, int]]] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(tuple(map(int, line.split()))) + if not all(len(row) == 6 for row in matrix) : + return None + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + answer, gold = 0, self.parameter["gold_answer"] + disinfected = [[[False] * self.parameter["C"] for _ in range(self.parameter["B"])] for _ in range(self.parameter["A"])] + for x1, x2, y1, y2, z1, z2 in processed_result : + if not (0 <= x1 < x2 <= self.parameter["A"]) : + return self.rewards["invalid_solution"] + if not (0 <= y1 < y2 <= self.parameter["B"]) : + return self.rewards["invalid_solution"] + if not (0 <= z1 < z2 <= self.parameter["C"]) : + return self.rewards["invalid_solution"] + for x in range(x1, x2) : + for y in range(y1, y2) : + for z in range(z1, z2) : + disinfected[x][y][z] = True + answer += min(x2 - x1, y2 - y1, z2 - z1) + for x, y, z in self.parameter["one_coordinates"] : + if not disinfected[x][y][z] : + return self.rewards["unsuccessful_solution"] + assert gold <= answer, "Gold answer should be less than or equal to the answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/distinct_array_permutation/__init__.py b/server/Gym/environments/distinct_array_permutation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7bbcad3a79dce6e06e4db1f512108fd1dde49c16 --- /dev/null +++ b/server/Gym/environments/distinct_array_permutation/__init__.py @@ -0,0 +1 @@ +from .environment import DistinctArrayPermutation_Environment \ No newline at end of file diff --git a/server/Gym/environments/distinct_array_permutation/environment.py b/server/Gym/environments/distinct_array_permutation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..3d356658b7b962f62683f76cc80f0ac254be04dd --- /dev/null +++ b/server/Gym/environments/distinct_array_permutation/environment.py @@ -0,0 +1,119 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class DistinctArrayPermutation_Environment(VerifiableEnvironment): + prompt_template = \ +r"""You are given an array A with {N} distinct integers (1-indexing): {array} + +Construct an array B by permuting A such that for every non-empty proper subset of indices S = {{x1, x2, ..., xk}} (1 ≤ xi ≤ {N}, 0 < k < {N}) the sums of elements on that positions in A and B are different. + +Your final answer should be a single line containing the permuted array B's elements in order, separated by spaces.""" + + def __init__(self, + wrong_format: float = -1.0, + invalid_solution: float = -0.5, + incorrect_solution: float = 0, + correct_solution: float = 1.0, + **kwargs): + """ + Initialize the DistinctArrayPermutation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "incorrect_solution": incorrect_solution, + "correct_solution": correct_solution, + } + + def _find_valid_permutation(self, arr: List[int]) -> List[int]: + """ + Find a valid permutation of arr such that all subset sums are different. + Uses the elegant solution: sort indices by values, then cyclically assign next value. + """ + n = len(arr) + + # Sort indices by the values in the array + p = sorted([i for i in range(n)], key=lambda x: arr[x]) + + # Create the permutation + b = [0] * n + for i in range(n): + b[p[i]] = arr[p[(i + 1) % n]] + + return b + + def _is_valid_permutation(self, arr_a: List[int], arr_b: List[int]) -> bool: + """ + Check if arr_b is a valid permutation that satisfies the condition. + """ + n = len(arr_a) + + # Check if it's actually a permutation + if sorted(arr_a) != sorted(arr_b): + return False + + # Check all non-empty proper subsets + for mask in range(1, (1 << n) - 1): # From 1 to 2^n - 2 + sum_a = 0 + sum_b = 0 + for i in range(n): + if mask & (1 << i): + sum_a += arr_a[i] + sum_b += arr_b[i] + + if sum_a == sum_b: + return False + + return True + + + def _generate(self) -> None: + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be at least 3" + + # Generate array with distinct integers using max_value = 2*N + # Yes, random.sample() returns a list of N unique elements sampled from range(max_value) + self.parameter["array"] = random.sample(range(2 * N), N) + self.parameter["reference_answer"] = " ".join(map(str, self._find_valid_permutation(self.parameter["array"]))) + + + def _prompt_generate(self) -> str: + return self.prompt_template.format( + N = self.parameter["N"], + array = " ".join(map(str, self.parameter["array"])), + ) + + + def _process(self, answer: Optional[str]) -> Optional[List[int]]: + if answer is not None: + answer = answer.strip() + try: + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError: + return None # Invalid answer format + else: + return None # Invalid answer format + + + def scorer(self, output: str) -> float: + processed_result = self.processor(output) + if processed_result is not None: + assert isinstance(processed_result, list), "processed_result should be a list" + # Check if it's a valid permutation + if sorted(processed_result) != sorted(self.parameter["array"]): + return self.rewards["invalid_solution"] + + # Check if it satisfies the distinct subset sum condition + if self._is_valid_permutation(self.parameter["array"], processed_result): + return self.rewards["correct_solution"] # Correct solution + else: + return self.rewards["incorrect_solution"] # Invalid permutation + + else: + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/distinct_edge_colored_complete_graph_counting/__init__.py b/server/Gym/environments/distinct_edge_colored_complete_graph_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bcd7f58eb67263bb1cdf0b7bb17088ddda7fe689 --- /dev/null +++ b/server/Gym/environments/distinct_edge_colored_complete_graph_counting/__init__.py @@ -0,0 +1 @@ +from .environment import DistinctEdgeColoredCompleteGraphCounting_Environment diff --git a/server/Gym/environments/distinct_edge_colored_complete_graph_counting/environment.py b/server/Gym/environments/distinct_edge_colored_complete_graph_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..5bd70858a4e48e72012f81f08aeefea6e49a3639 --- /dev/null +++ b/server/Gym/environments/distinct_edge_colored_complete_graph_counting/environment.py @@ -0,0 +1,120 @@ +import random +from math import gcd +from typing import Optional +from ...environment import VerifiableEnvironment + + +class DistinctEdgeColoredCompleteGraphCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4128 + prompt_template = r"""Consider all **complete undirected graphs** on vertices 1, 2, ..., {N}, where each edge is assigned a color from {M} colors (labeled from 1 to {M}). Two such graphs G and G' are considered **the same** if there exists a permutation p of the vertices such that for every unordered pair (u, v), the color of edge (u, v) in G equals the color of edge (p(u), p(v)) in G'. What's the number of **distinct** graphs under this equivalence (i.e., the number of non-isomorphic M-colored complete graphs on N vertices) (output the result modulo {MOD})?""" + MODs = (666623333, 998244353, 10 ** 9 + 7) + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the DistinctEdgeColoredCompleteGraphCountingProblem instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + M = self.parameter["M"] = random.randint(2, N * (N - 1) // 2) + MOD = self.parameter["MOD"] = random.choice(self.MODs) + + + # Modular exponentiation + def qPow(b, e): + a = 1 + b %= MOD + while e: + if e & 1: + a = (a * b) % MOD + b = (b * b) % MOD + e >>= 1 + return a + + # Precompute inverses, factorials, inverse factorials up to N + Inv = [0] * (N + 1) + Fac = [0] * (N + 1) + iFac = [0] * (N + 1) + + def Init(limit): + Inv[1] = 1 + for i in range(2, limit + 1): + Inv[i] = (MOD - MOD // i) * Inv[MOD % i] % MOD + Fac[0] = 1 + iFac[0] = 1 + for i in range(1, limit + 1): + Fac[i] = (Fac[i - 1] * i) % MOD + iFac[i] = (iFac[i - 1] * Inv[i]) % MOD + + # Globals mirroring the C++ code + Sum = 0 + stk = [0] # sentinel to mimic C++ global zero-initialized array + t = 0 + n1 = 0 + n2 = 1 + + def DFS(s, mx, c): + nonlocal Sum, t, n1, n2 + if s == 0: + Sum = (Sum + qPow(M, n1) * n2) % MOD + return + a = n1 + b = n2 + for i in range(1, mx + 1): + stk.append(i) + t += 1 + n1 = a + i // 2 + for j in range(1, t): + n1 += gcd(stk[j], i) + n2 = b * Inv[i] % MOD + if i == stk[t - 1]: + n2 = n2 * Fac[c] % MOD * iFac[c + 1] % MOD + DFS(s - i, min(s - i, i), c + 1 if i == stk[t - 1] else 1) + t -= 1 + stk.pop() + + # Run + Init(N) + DFS(N, N, 0) + self.parameter["reference_answer"] = Sum + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"], MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/division/__init__.py b/server/Gym/environments/division/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0a74b24bab0117cab39aff84e3c17f146ff750a6 --- /dev/null +++ b/server/Gym/environments/division/__init__.py @@ -0,0 +1 @@ +from .environment import Division_Environment diff --git a/server/Gym/environments/division/environment.py b/server/Gym/environments/division/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..512e8770342afbd6220c3f142864092e36502960 --- /dev/null +++ b/server/Gym/environments/division/environment.py @@ -0,0 +1,75 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Division_Environment(VerifiableEnvironment) : + prompt_templates = ( + "What is the result of {} divided by {}? Round down to the nearest integer.", + "Compute {} divided by {}, rounding down to the nearest whole number.", + "Find the integer part of {} divided by {}.", + "Compute {} divided by {}, discarding the remainder.", + "What is the quotient when {} is divided by {}, using integer division?", + "If you divide {} by {}, what is the whole number result?", + "Give me the result of {} divided by {} (rounded down).", + "How many full times does {} fit into {}?", + "What do you get when you divide {} by {} and round down?", + "Determine the integer result of {} divided by {}.", + ) # This is probably unnecessary, but just in case we need to diversify the prompt templates. + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the Division_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "divisor_digit_num" in self.parameter, "divisor_digit_num is required in parameter" + divisor_digit_num = self.parameter["divisor_digit_num"] + assert divisor_digit_num >= 1, "divisor_digit_num should be greater than or equal to 1" + + assert "answer_digit_num" in self.parameter, "answer_digit_num is required in parameter" + answer_digit_num = self.parameter["answer_digit_num"] + assert answer_digit_num >= 1, "answer_digit_num should be greater than or equal to 1" + + self.parameter["b"] = random.randint(1, 10 ** divisor_digit_num - 1) + self.parameter["a"] = self.parameter["b"] * random.randint(0, 10 ** answer_digit_num - 1) + random.randint(0, self.parameter["b"] - 1) + + self.parameter["reference_answer"] = self.parameter["a"] // self.parameter["b"] + + self.parameter["prompt_template"] = random.randrange(len(self.prompt_templates)) + + def _prompt_generate(self) -> str : + return self.prompt_templates[self.parameter["prompt_template"]].format(self.parameter["a"], self.parameter["b"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/divisor_flip_expectation/__init__.py b/server/Gym/environments/divisor_flip_expectation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d5bd08988a4ddd107178faf6de014a27db452209 --- /dev/null +++ b/server/Gym/environments/divisor_flip_expectation/__init__.py @@ -0,0 +1 @@ +from .environment import DivisorFlipExpectation_Environment diff --git a/server/Gym/environments/divisor_flip_expectation/environment.py b/server/Gym/environments/divisor_flip_expectation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..1951095d1fe5c70d8b5f32148e0c6b45bef610ac --- /dev/null +++ b/server/Gym/environments/divisor_flip_expectation/environment.py @@ -0,0 +1,120 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class DivisorFlipExpectation_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3750 + prompt_template = \ +r"""You are given {N} lights labeled from 1 to {N}, each in an initial state: `1` (on) or `0` (off). The initial state is: +{state} + +Each light can be toggled by pressing switches. There are {N} switches, and pressing switch `i` will **toggle the state** of all lights whose indices divide `i` (including 1 and i itself). Toggling means changing from 0 to 1 or from 1 to 0. + +You play the following game: +- Repeatedly select a switch **uniformly at random** and press it, until the state of all lights is 0. +- However, if at any point it becomes possible to turn off all lights using **at most {K} switch presses**, you stop random pressing and directly use an optimal (shortest-length) sequence of switches (≤ {K} presses) to turn off all lights. + +Let E be the expected number of total switch presses under this strategy. Compute the integer value of E × {N}! modulo {MOD}.""" + + MOD = 10**9 + 7 + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the DivisorFlipExpectation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + K = self.parameter["K"] = random.randint(0, N) + + one_probability = random.random() + B = [None] + [1 if random.random() < one_probability else 0 for _ in range(N)] + self.parameter["state"] = B.copy() + + + inv = [0] * (N + 1) + inv[1] = 1 + for i in range(2, N + 1): + inv[i] = (self.MOD - self.MOD // i) * inv[self.MOD % i] % self.MOD + + g = [[] for _ in range(N + 1)] + for i in range(1, N + 1): + for j in range(i, N + 1, i): + g[j].append(i) + + tp = 0 + for i in range(N, 0, -1): + if B[i] == 1: + for d in g[i]: + B[d] ^= 1 + tp += 1 + + if tp <= K: + ans = tp + else: + f = [0] * (N + 1) + f[N] = 1 + for i in range(N - 1, 0, -1): + ans_term = (f[i + 1] + 1) % self.MOD + f[i] = (1 + (N - i) * ans_term * inv[i]) % self.MOD + + ans = 0 + for i in range(tp, K, -1): + ans = (ans + f[i]) % self.MOD + ans = (ans + K) % self.MOD + + fact = 1 + for i in range(1, N + 1): + fact = fact * i % self.MOD + ans = ans * fact % self.MOD + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + K = self.parameter["K"], + state = "\n".join("Light {}: {}".format(i, self.parameter["state"][i]) for i in range(1, N + 1)), + MOD = self.MOD, + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.MOD) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/double_cross_counting/__init__.py b/server/Gym/environments/double_cross_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c291e09b9a126261f3f963640f0b50d9964c0e7c --- /dev/null +++ b/server/Gym/environments/double_cross_counting/__init__.py @@ -0,0 +1 @@ +from .environment import DoubleCrossCounting_Environment diff --git a/server/Gym/environments/double_cross_counting/environment.py b/server/Gym/environments/double_cross_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7ab2ae4254f1bd2d3c4eb95ec8a4754f83a75afd --- /dev/null +++ b/server/Gym/environments/double_cross_counting/environment.py @@ -0,0 +1,254 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class DoubleCrossCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3221 + prompt_template = \ +r"""A **double cross** is a specific shape consisting of two horizontal and one vertical segments of `1`s. For example: +``` +.......... +....1..... ..1.. +..11111... .111. +....1..... ..1.. +.1111111.. 11111 +....1..... ..1.. +....1..... +.......... +``` +A valid double cross must satisfy the following conditions: +- The two horizontal segments must not lie on adjacent rows. +- The vertical segment must extend strictly above and strictly below the two horizontal segments. +- The vertical segment must divide both horizontal segments into two equal halves. +- The upper horizontal segment must be strictly shorter than the lower one. +- (Thus, the example on the right is the smallest valid double cross.) + +In the following example, we are given a 0/1 matrix: +``` +10001011 +10111111 +10001101 +11111110 +11111111 +11101011 +``` +There are 5 valid double crosses in this matrix: +``` +....1... ....1... ....1... +...111.. ...111.. ...111.. +....1... ....1... ....1... +..11111. ..11111. ....1... +....1... ....1... ..11111. +........ ....1... ....1... + +....1... ....1... +...111.. ..11111. +....1... ....1... +....1... ....1... +.1111111 .1111111 +....1... ....1... +``` + +Now, given a 0/1 matrix of size {N} × {M}, where each cell is either `1` or `0`. The coordinates of 0-cells are given as follows (0-indexed): +{zero_coordinates} + +More formally, a double cross in the matrix (assuming 0-indexed rows and columns) is defined by the following parameters: +- Four row indices: x_top, x_up, x_down, x_bottom, satisfying: 0 ≤ x_top < x_up, x_up + 1 < x_down < x_bottom < {N} +- One column index y_mid, and two integers up_len and down_len, such that: 0 ≤ y_mid < {M}, 1 ≤ up_len < down_len, and y_mid - down_len ≥ 0, y_mid + down_len < {M} +- The vertical segment of the cross is formed by the column y_mid spanning from x_top to x_bottom, and all cells (x, y_mid) for x_top ≤ x ≤ x_bottom must be `1` +- The upper horizontal segment lies on row x_up, and all cells (x_up, y) for y_mid - up_len ≤ y ≤ y_mid + up_len must be `1`; The lower horizontal segment lies on row x_down, and all cells (x_down, y) for y_mid - down_len ≤ y ≤ y_mid + down_len must be `1` + + +Please compute how many valid double crosses exist in the matrix.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the DoubleCrossCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 5, "MAX_N_M should be greater than or equal to 5" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(5, MAX_N_M), random.randint(5, MAX_N_M) + assert N >= 5 and M >= 5, "N and M should be greater than or equal to 5" + zero_coordinates = self.parameter["zero_coordinates"] = random.sample([(x, y) for x in range(N) for y in range(M)], random.randint(1, int(N * M * 0.25))) + + + size = N * M + 1 # 1-based indexing + vis = [True] * size # True => '1', False => '0' + + for x, y in zero_coordinates: + x += 1 + y += 1 + vis[(x - 1) * M + y] = False + + # ------------------------------------------------------------ + # 2. pre-compute arm lengths + # ------------------------------------------------------------ + L = [0] * size # horizontal half-length (min of both sides) – 1 + U = [0] * size # vertical length upward – 1 + D = [0] * size # vertical length downward – 1 + + # left sweep + for r in range(1, N + 1): + streak = 0 + base = (r - 1) * M + for c in range(1, M + 1): + idx = base + c + streak = streak + 1 if vis[idx] else 0 + L[idx] = streak + + # right sweep + for r in range(1, N + 1): + streak = 0 + base = (r - 1) * M + for c in range(M, 0, -1): + idx = base + c + streak = streak + 1 if vis[idx] else 0 + L[idx] = min(L[idx], streak) + if L[idx]: + L[idx] -= 1 # exclude the centre cell + + # upward sweep + for c in range(1, M + 1): + streak = 0 + idx = c + for r in range(1, N + 1): + streak = streak + 1 if vis[idx] else 0 + U[idx] = streak - 1 if streak else 0 + idx += M + + # downward sweep + for c in range(1, M + 1): + streak = 0 + idx = (N - 1) * M + c + for r in range(N, 0, -1): + streak = streak + 1 if vis[idx] else 0 + D[idx] = streak - 1 if streak else 0 + idx -= M + + # ------------------------------------------------------------ + # 3. Fenwick tree with “three-dimensional” coefficient arrays A, B, C + # (range-update, prefix-sum query for quadratic weights) + # ------------------------------------------------------------ + A = [0] * (M + 1) + B = [0] * (M + 1) + C = [0] * (M + 1) + tag = [0] * (M + 1) # lazy versioning for O(#updates) clearing + version = 1 + + def lb(x: int) -> int: # lowest set bit + return x & -x + + def fenwick_add(x: int, w: int) -> None: + """point-update helper used by the range-add routine""" + i = x + while i <= M: + if tag[i] != version: # clear lazily if we are in a new version + tag[i] = version + A[i] = B[i] = C[i] = 0 + A[i] += w + B[i] += x * w + C[i] += (x * x) * w + i += lb(i) + + def range_add(l: int, r: int, w: int) -> None: + """add w to every position in [l, r] (1-based, inclusive)""" + if l > r or w == 0: + return + fenwick_add(l, w) + fenwick_add(r + 1, -w) + + def prefix_query(x: int) -> int: + """∑_{t ≤ x} ( (t + 3)·t + 2 )/2 * freq(t) where freq(t) is the value after range adds""" + if x <= 0: + return 0 + s1 = s2 = s3 = 0 + i = x + while i: + if tag[i] == version: + s1 += A[i] + s2 += B[i] + s3 += C[i] + i -= lb(i) + res = ((x + 3) * x + 2) + res = (res * s1 + s3 - (2 * x + 3) * s2) + return res // 2 + + # ------------------------------------------------------------ + # 4. sweep each column, building counts on the fly + # ------------------------------------------------------------ + answer = 0 + + for col in range(2, M): # centres cannot be on the very first/last column + version += 1 # “clear” the Fenwick tree for this column + + for row in range(3, N): # need at least two rows above & below + idx = (row - 1) * M + col + + if not vis[idx]: # a ‘0’ breaks the vertical arm + version += 1 # (lazy clear) + continue + + # take current cell as *lower* horizontal bar + if L[idx]: + answer += D[idx] * prefix_query(L[idx] - 1) + + # push the row immediately above as a candidate *upper* bar + upper = idx - M + if L[upper] and U[upper]: + range_add(1, L[upper], U[upper]) + + self.parameter["reference_answer"] = answer + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + zero_coordinates = "\n".join("({}, {})".format(x, y) for x, y in self.parameter["zero_coordinates"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if processed_result == 0 : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/double_palindromic_string_counting/__init__.py b/server/Gym/environments/double_palindromic_string_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..be43f6bd1978b28643fd1d0e79e093b99c7db2f1 --- /dev/null +++ b/server/Gym/environments/double_palindromic_string_counting/__init__.py @@ -0,0 +1 @@ +from .environment import DoublePalindromicStringCounting_Environment \ No newline at end of file diff --git a/server/Gym/environments/double_palindromic_string_counting/environment.py b/server/Gym/environments/double_palindromic_string_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..12f572b7bb697b3f8551e7f901503554a8f8e1bb --- /dev/null +++ b/server/Gym/environments/double_palindromic_string_counting/environment.py @@ -0,0 +1,191 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class DoublePalindromicStringCounting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""We define a string `S` as **double palindromic** if it satisfies all of the following conditions: +- Each character in `S` is an integer between `1` and `{C}` (inclusive). +- `S` can be written as the concatenation of two **non-empty palindromic strings**, `S1` and `S2`, such that `S = S1 + S2`. + +Please count the number of **distinct double palindromic strings** of length **at most** `{N}`. + +**Output Format:** Your final answer should be a single integer — the total number of such distinct double palindromic strings. +""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the DoublePalindromicStringCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 2, "MAX_N should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N) + + assert "C" in self.parameter, "C is required in parameter" + C = self.parameter["C"] + assert C >= 1, "C should be greater than or equal to 1" + + + def pre(N): + mu = [0] * (N+1) + f_pref = [0] * (N+1) + is_comp = [False] * (N+1) + primes = [] + + mu[1] = 1 + f_pref[1] = 1 + + for i in range(2, N+1): + if not is_comp[i]: + primes.append(i) + mu[i] = -1 + f_pref[i] = 1 - i + for p in primes: + ip = i * p + if ip > N: + break + is_comp[ip] = True + if i % p == 0: + f_pref[ip] = f_pref[i] + break + mu[ip] = -mu[i] + f_pref[ip] = f_pref[i] * (1 - p) + for i in range(1, N+1): + mu[i] += mu[i-1] + f_pref[i] += f_pref[i-1] + return mu, f_pref + + def S(n): + return n*(n+1)//2 + + def make_calc1(f_pref, N): + memo = {} + def calc1(n): + if n <= N: + return f_pref[n] + if n in memo: + return memo[n] + res = n + i = 2 + while i <= n: + t = n // i + last = n // t + res -= (S(last) - S(i-1)) * calc1(t) + i = last + 1 + memo[n] = res + return res + return calc1 + + def make_calc2(mu_pref, N): + memo = {} + def calc2(n): + if n <= N: + return mu_pref[n] + if n in memo: + return memo[n] + res = 1 + i = 2 + while i <= n: + t = n // i + last = n // t + res -= (last - i + 1) * calc2(t) + i = last + 1 + memo[n] = res + return res + return calc2 + + def query1(n, C, den): + # ((t*(4n-2) - 4*(t-C)/(C-1)) / (C-1)) + t = pow(C, n+1) + # first subtract the geometric‐sum piece: + part = 4 * (t - C) // den + return (t * (4*n - 2) - part) // den + + def querysum(n, C, den): + half = n // 2 + # sum up to half: + s_half = query1(half, C, den) + t = pow(C, half+1) + extra = (n + half) if (n & 1) else half + return s_half + t * extra + + def solve1(N, C, calc1_fn, den): + ans = 0 + i = 1 + while i <= N: + t = N // i + last = N // t + ans += (querysum(last, C, den) - querysum(i-1, C, den)) * calc1_fn(t) + i = last + 1 + return ans + + def query2(n, C, den): + half = n // 2 + t = pow(C, half+1) + # 2*(t-C)/(C-1) + (t if odd) + base = 2 * (t - C) // den + return base + (t if (n & 1) else 0) + + def solve2(N, C, calc2_fn, den): + ans = 0 + i = 1 + while i <= N: + t = N // i + last = N // t + ans += (query2(last, C, den) - query2(i-1, C, den)) * calc2_fn(t) + i = last + 1 + return ans + + den = C - 1 # we’ll just divide by this + mu_pref, f_pref = pre(N) + calc1_fn = make_calc1(f_pref, N) + calc2_fn = make_calc2(mu_pref, N) + answer = solve1(N, C, calc1_fn, den) - solve2(N, C, calc2_fn, den) + + self.parameter["reference_answer"] = answer + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], C = self.parameter["C"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/double_stack_sorting/__init__.py b/server/Gym/environments/double_stack_sorting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c14c5d32b945dd6210f18dc7f4366bc8fb5acf77 --- /dev/null +++ b/server/Gym/environments/double_stack_sorting/__init__.py @@ -0,0 +1 @@ +from .environment import DoubleStackSorting_Environment diff --git a/server/Gym/environments/double_stack_sorting/environment.py b/server/Gym/environments/double_stack_sorting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..85af15b6df6b1084c4f86205419a7b985d2c2156 --- /dev/null +++ b/server/Gym/environments/double_stack_sorting/environment.py @@ -0,0 +1,122 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class DoubleStackSorting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a queue of integers containing `{N}` elements: `0` at the front and `{N_minus_1}` at the back. You also have two empty stacks, `S1` and `S2`, and an initially empty output sequence. You may perform the following operations: +- `a`: Pop the front of the queue and push it onto `S1`. +- `b`: Pop the top of `S1` and append it to the output sequence. +- `c`: Pop the front of the queue and push it onto `S2`. +- `d`: Pop the top of `S2` and append it to the output sequence. + +Please find a sequence of operations that transforms the initial queue into the output sequence: {sequence} + +**Output Format:** A single line containing the sequence of operations (`a`, `b`, `c`, or `d`) without spaces or additional characters.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the DoubleStackSorting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + operation_distribution = [random.randint(1, N) for _ in range(4)] + operation_distribution = [weight / sum(operation_distribution) for weight in operation_distribution] + + self.parameter["reference_answer"] = "" + + S1, S2 = [], [] + output_sequence = self.parameter["output_sequence"] = [] + queue_front = 0 + while len(output_sequence) < N : + operation = random.choices(["a", "b", "c", "d"], weights = operation_distribution, k = 1)[0] + if operation == "a" and queue_front < N : + self.parameter["reference_answer"] += "a" + S1.append(queue_front) + queue_front += 1 + elif operation == "b" and S1 : + self.parameter["reference_answer"] += "b" + output_sequence.append(S1.pop()) + elif operation == "c" and queue_front < N : + self.parameter["reference_answer"] += "c" + S2.append(queue_front) + queue_front += 1 + elif operation == "d" and S2 : + self.parameter["reference_answer"] += "d" + output_sequence.append(S2.pop()) + assert len(self.parameter["reference_answer"]) == N * 2, "reference_answer should have length 2 * N" + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + sequence = " ".join(map(str, self.parameter["output_sequence"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + S1, S2 = [], [] + output_sequence = [] + queue_front = 0 + + for operation in processed_result : + if operation == "a" : + if queue_front >= self.parameter["N"] : + return self.rewards["invalid_solution"] + S1.append(queue_front) + queue_front += 1 + elif operation == "b" : + if not S1 : + return self.rewards["invalid_solution"] + output_sequence.append(S1.pop()) + elif operation == "c" : + if queue_front >= self.parameter["N"] : + return self.rewards["invalid_solution"] + S2.append(queue_front) + queue_front += 1 + elif operation == "d" : + if not S2 : + return self.rewards["invalid_solution"] + output_sequence.append(S2.pop()) + else : + return self.rewards["wrong_format"] + + if len(output_sequence) != self.parameter["N"] : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["output_sequence"], output_sequence)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["output_sequence"] == output_sequence) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/dyn_dynamite/__init__.py b/server/Gym/environments/dyn_dynamite/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..13970b0b8e3e2b0bb1b0e9198f62d7a07a24de36 --- /dev/null +++ b/server/Gym/environments/dyn_dynamite/__init__.py @@ -0,0 +1 @@ +from .environment import DynDynamite_Environment diff --git a/server/Gym/environments/dyn_dynamite/environment.py b/server/Gym/environments/dyn_dynamite/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..34cb570da60f6b99d1a13b396ae16f361eaa30e2 --- /dev/null +++ b/server/Gym/environments/dyn_dynamite/environment.py @@ -0,0 +1,199 @@ +import queue +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class DynDynamite_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3523 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices, labeled from `0` to `{N_minus_1}`. It contains the following {N_minus_1} undirected edges. Each edge is represented as a tuple `(u, v)`, meaning there is an undirected edge **connecting vertex `u` to vertex `v`**: +{edges} + +You are also given a list of key vertices: {key_vertices} +Please select exactly {M} vertices (from all {N} vertices) to serve as **centers**. Your goal is to **minimize the maximum distance** (measured in number of edges) from any key vertex to its nearest selected center. +Output format: A single line containing the {M} selected centers, separated by spaces.""" + + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the DynDynamite_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)) == N - 1 + + key_vertices = self.parameter["key_vertices"] = random.sample(range(N), random.randint(2, N)) + M = self.parameter["M"] = random.randint(1, len(key_vertices) - 1) + + + d = [0] * N + for key_vertex in key_vertices : + d[key_vertex] = 1 + + # Build adjacency list (0-indexed) + adj = [[] for _ in range(N)] + for a, b in edges: + adj[a].append(b) + adj[b].append(a) + + # Build a parent array and a preorder traversal 'order' + parent = [-1] * N + order = [] + stack = [0] + parent[0] = -1 + while stack: + x = stack.pop() + order.append(x) + for v in adj[x]: + if v == parent[x]: + continue + parent[v] = x + stack.append(v) + + # Sentinels for DP + NEG_INF = -(N + 1) + INF = N + 1 + + # Given a time limit t, compute the minimum number of ignitions needed + def needed(t: int) -> int: + f = [NEG_INF] * N + g = [INF] * N + cnt = 0 + + # Process in reverse preorder (children before parent) + for x in reversed(order): + # If an existing ignition in the subtree covers + # the nearest uncovered bomb within t, discard it + if f[x] + g[x] <= t: + f[x] = NEG_INF + + # If there's an uncovered bomb here (g[x]>t) and + # this room has a bomb, place an ignition here + if g[x] > t and d[x] == 1: + if f[x] < 0: + f[x] = 0 + + # If an ignition at distance exactly t reaches here, + # "use it up" and count it + if f[x] == t: + f[x] = NEG_INF + g[x] = 0 + cnt += 1 + + # Propagate distances up to the parent + p = parent[x] + if p != -1: + # furthest ignition distance + val_f = f[x] + 1 + if val_f > f[p]: + f[p] = val_f + # nearest bomb distance + val_g = g[x] + 1 + if val_g < g[p]: + g[p] = val_g + + # If there's still an ignition reaching the root, count it + if f[0] >= 0: + cnt += 1 + return cnt + + # Binary search on the answer t in [0, N] + l, r = 0, N + while l < r: + mid = (l + r) // 2 + if needed(mid) <= M: + r = mid + else: + l = mid + 1 + + self.parameter["gold_answer"] = l + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("{} {}".format(u, v) for u, v in self.parameter["edges"]), + key_vertices = " ".join(map(str, self.parameter["key_vertices"])), + M = self.parameter["M"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["M"] : + return self.rewards["invalid_solution"] + if not all(0 <= vertex < self.parameter["N"] for vertex in processed_result) : + return self.rewards["invalid_solution"] + + adj = [[] for _ in range(self.parameter["N"])] + for a, b in self.parameter["edges"] : + adj[a].append(b) + adj[b].append(a) + Q = queue.Queue() + distance = [None] * self.parameter["N"] + for start in processed_result : + distance[start] = 0 + Q.put(start) + while not Q.empty() : + u = Q.get() + for v in adj[u] : + if distance[v] is None : + distance[v] = distance[u] + 1 + Q.put(v) + + answer, gold = max(distance[u] for u in self.parameter["key_vertices"]), self.parameter["gold_answer"] + assert 0 < gold <= answer, "gold should be greater than 0 and less than or equal to answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/eight_digit_puzzle/__init__.py b/server/Gym/environments/eight_digit_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..25b225113c78cc7242f402dc93b669fc459fd04b --- /dev/null +++ b/server/Gym/environments/eight_digit_puzzle/__init__.py @@ -0,0 +1 @@ +from .environment import EightDigitPuzzle_Environment diff --git a/server/Gym/environments/eight_digit_puzzle/environment.py b/server/Gym/environments/eight_digit_puzzle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..11d3073d724ccfc87fae012bc7e5c729ad843061 --- /dev/null +++ b/server/Gym/environments/eight_digit_puzzle/environment.py @@ -0,0 +1,120 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class EightDigitPuzzle_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} grid, where each cell contains a digit from `0` to `{NM_minus_1}`. At any time, you can **swap the `0`** with one of its four (existing) neighbors: +- `U` = up +- `D` = down +- `L` = left +- `R` = right + +You start with the following grid: +{start_grid} + +Your goal is to reach the following grid: +{destination_grid} + +**Output Format:** Output a single line containing the sequence of moves made by the `0`, represented by a string of characters (`U`, `D`, `L`, `R`). For example, `RRDDLLUU` (do **NOT** include backticks or quotes) means: right, right, down, down, left, left, up, up.""" + + action2delta = { + "L" : (0, -1), + "R" : (0, +1), + "U" : (-1, 0), + "D" : (+1, 0), + } + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the EightDigitPuzzle_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + + start_permutation = list(range(N * M)) + random.shuffle(start_permutation) + start_grid = self.parameter["start_grid"] = [[start_permutation[i * M + j] for j in range(M)] for i in range(N)] + + assert "steps" in self.parameter, "steps is required in parameter" + steps = self.parameter["steps"] + assert steps >= 1, "steps should be greater than or equal to 1" + + self.parameter["zero_i"], self.parameter["zero_j"] = zero_i, zero_j = [(i, j) for i in range(N) for j in range(M) if start_grid[i][j] == 0][0] + destination_grid = self.parameter["destination_grid"] = [row.copy() for row in start_grid] + + action_distribution = [random.randint(1, N * M) for _ in range(4)] + action_distribution = [weight / sum(action_distribution) for weight in action_distribution] + + self.parameter["reference_answer"] = "" + for step in range(steps) : + while True : + action = random.choices(["U", "D", "L", "R"], weights = action_distribution, k = 1)[0] + new_zero_i, new_zero_j = zero_i + self.action2delta[action][0], zero_j + self.action2delta[action][1] + if 0 <= new_zero_i < N and 0 <= new_zero_j < M : + self.parameter["reference_answer"] += action + destination_grid[zero_i][zero_j], destination_grid[new_zero_i][new_zero_j] = destination_grid[new_zero_i][new_zero_j], destination_grid[zero_i][zero_j] + zero_i, zero_j = new_zero_i, new_zero_j + break + + + def _prompt_generate(self) -> str : + N, M = self.parameter["N"], self.parameter["M"] + return self.prompt_template.format( + N = N, + M = M, + NM_minus_1 = N * M - 1, + start_grid = "\n".join(" ".join(map(str, row)) for row in self.parameter["start_grid"]), + destination_grid = "\n".join(" ".join(map(str, row)) for row in self.parameter["destination_grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + destination_grid = [row.copy() for row in self.parameter["start_grid"]] + zero_i, zero_j = self.parameter["zero_i"], self.parameter["zero_j"] + + for action in processed_result : + if action not in self.action2delta : + return self.rewards["wrong_format"] + new_zero_i, new_zero_j = zero_i + self.action2delta[action][0], zero_j + self.action2delta[action][1] + if 0 <= new_zero_i < self.parameter["N"] and 0 <= new_zero_j < self.parameter["M"] : + destination_grid[zero_i][zero_j], destination_grid[new_zero_i][new_zero_j] = destination_grid[new_zero_i][new_zero_j], destination_grid[zero_i][zero_j] + zero_i, zero_j = new_zero_i, new_zero_j + else : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(sum(int(a == b) for a, b in zip(gold_row, answer_row)) for gold_row, answer_row in zip(self.parameter["destination_grid"], destination_grid)) / (self.parameter["N"] * self.parameter["M"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * all(all(a == b for a, b in zip(gold_row, answer_row)) for gold_row, answer_row in zip(self.parameter["destination_grid"], destination_grid)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/emperor_worries/__init__.py b/server/Gym/environments/emperor_worries/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d9e8cae617e76f252ff992cfe2d90ace865137fe --- /dev/null +++ b/server/Gym/environments/emperor_worries/__init__.py @@ -0,0 +1 @@ +from .environment import EmperorWorries_Environment diff --git a/server/Gym/environments/emperor_worries/environment.py b/server/Gym/environments/emperor_worries/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f4e4e789cf4018938ef1fb3e1b1f2aacbf695d27 --- /dev/null +++ b/server/Gym/environments/emperor_worries/environment.py @@ -0,0 +1,81 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class EmperorWorries_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4409 + prompt_template = \ +r"""There are {N} generals numbered from 0 to {N_minus_1}. The medal requirements are: {A} +Assign medals of various **types** to the generals so that: (1) The medals given to the same general are all of **distinct types** (no duplicate type for one general); (2) Adjacent generals (i and (i+1) mod {N}) share **no common medal type**. What is the **minimum number of medal types** required to satisfy all constraints?""" + def __init__(self, + A_range : int = 2, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the EmperorWorries_Environment instance. + """ + super().__init__(**kwargs) + + self.A_range = A_range + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "K" in self.parameter, "K is required in parameter" + K = self.parameter["K"] + assert K >= 1, "K should be greater than or equal to 1" + + N = self.parameter["N"] = random.choice((2 * K, 2 * K + 1)) + self.parameter["A"] = [random.randint(1, N * self.A_range) for _ in range(N)] + + + A = [None] + self.parameter["A"] # 1-indexed like the C++ array + S = 0 + for i in range(1, N + 1): + S += A[i] + + candidates = [] + for i in range(1, N): + candidates.append(A[i] + A[i + 1]) + candidates.append(A[1] + A[N]) + + K = N // 2 + candidates.append((S + K - 1) // K) # ceil(S / K) without importing math + + self.parameter["reference_answer"] = max(candidates) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + A = "; ".join("General {} needs {} medals of distinct types".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/energy_storage_meter/__init__.py b/server/Gym/environments/energy_storage_meter/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ba7728b9c0a33dac786f6fa2ad0707016da00ea3 --- /dev/null +++ b/server/Gym/environments/energy_storage_meter/__init__.py @@ -0,0 +1 @@ +from .environment import EnergyStorageMeter_Environment diff --git a/server/Gym/environments/energy_storage_meter/environment.py b/server/Gym/environments/energy_storage_meter/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..994477db84a0bed41ceb314ca83110bbb0c6a612 --- /dev/null +++ b/server/Gym/environments/energy_storage_meter/environment.py @@ -0,0 +1,113 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + +class EnergyStorageMeter_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4067 + prompt_template = prompt_template = r"""I want to know the sum of max((i XOR j) − {K}, 0) over all pairs (i, j) such that 0 ≤ i < {N} and 0 ≤ j < {M}, where XOR denotes the bitwise XOR operation.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the EnergyStorageMeter_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 4, "MAX_N_M should be greater than or equal to 4" + + N = self.parameter["N"] = random.randint(2, MAX_N_M) + M = self.parameter["M"] = random.randint(2, MAX_N_M) + K = self.parameter["K"] = random.randint(0, MAX_N_M) + + + def S(l, r): + # sum of integers from l to r inclusive + if l > r: + return 0 + cnt = r - l + 1 + return (l + r) * cnt // 2 + + def calc(l, r, x): + # corresponds to the C++ inline calc + if l <= x <= r: + return S(0, r - x) + elif r < x: + return 0 + else: # x < l + return S(l - x, r - x) + + def solve(): + # collect set bit positions (0..59) for N and M + bitsN = [i for i in range(N.bit_length() + 1) if (N >> i) & 1] + bitsM = [j for j in range(M.bit_length() + 1) if (M >> j) & 1] + + ans = 0 + + for i in bitsN: + for j in bitsM: + u = i if i < j else j + v = i ^ j ^ u # equals max(i, j) + + # Clear lower (i+1) bits of N and (j+1) bits of M, then XOR + ni = (N >> (i + 1)) << (i + 1) + mj = (M >> (j + 1)) << (j + 1) + x = ni ^ mj + + # Clear lower v bits of x + if v > 0: + x = (x >> v) << v + + # r = x with its lower v bits set to 1 + r = x | ((1 << v) - 1) if v > 0 else x + + contrib = (1 << u) * calc(x, r, K) + ans += contrib + + return ans + + self.parameter["reference_answer"] = solve() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if processed_result == 0 : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/euclid_game/__init__.py b/server/Gym/environments/euclid_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4daff945ea8935cfdc05cbcb0d1bd358b7488e37 --- /dev/null +++ b/server/Gym/environments/euclid_game/__init__.py @@ -0,0 +1 @@ +from .environment import EuclidGame_Environment diff --git a/server/Gym/environments/euclid_game/environment.py b/server/Gym/environments/euclid_game/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7333ffd7ae53a195fa23bf3395c3a1ba0473a4d1 --- /dev/null +++ b/server/Gym/environments/euclid_game/environment.py @@ -0,0 +1,70 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class EuclidGame_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1290 + prompt_template = \ +r"""Stan and Ollie are playing a game starting with two integers {X} and {Y}. Stan goes first. + +On each turn, a player may subtract any **positive multiple** of one integer from the other, as long as the result is **non-negative**. The player who makes one of the numbers become **zero** wins the game. + +If both players always play optimally, who will win — Stan or Ollie? + +**Output Format:** Your final answer should be a single word: either `Stan` or `Ollie` (do **NOT** include quotes or backticks), indicating the winner.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the EuclidGame_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + def _generate(self) -> None : + assert "MAX_X_Y" in self.parameter, "MAX_X_Y is required in parameter" + MAX_X_Y = self.parameter["MAX_X_Y"] + assert MAX_X_Y >= 1, "MAX_X_Y should be greater than or equal to 1" + + self.parameter["reference_answer"] = "Stan" if random.random() < 0.5 else "Ollie" + + while True : + X = self.parameter["X"] = random.randint(1, MAX_X_Y) + Y = self.parameter["Y"] = random.randint(1, MAX_X_Y) + def check(x : int, y : int) -> bool : + if not y : + return False + if x // y != 1 : + return True + return not check(y, x - y) + if ("Stan" if check(max(X, Y), min(X, Y)) else "Ollie") == self.parameter["reference_answer"] : + break + + def _prompt_generate(self) -> str : + return self.prompt_template.format(X = self.parameter["X"], Y = self.parameter["Y"]) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result not in ("Stan", "Ollie") : + return self.rewards["invalid_answer"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/even_degree_graph_partitioning/__init__.py b/server/Gym/environments/even_degree_graph_partitioning/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5269ccb883b9f8c30c65d17867b981040278fbbe --- /dev/null +++ b/server/Gym/environments/even_degree_graph_partitioning/__init__.py @@ -0,0 +1 @@ +from .environment import EvenDegreeGraphPartitioning_Environment diff --git a/server/Gym/environments/even_degree_graph_partitioning/environment.py b/server/Gym/environments/even_degree_graph_partitioning/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..05e38b42480ad57b7a43cb94ac92a6cb51edea0f --- /dev/null +++ b/server/Gym/environments/even_degree_graph_partitioning/environment.py @@ -0,0 +1,131 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class EvenDegreeGraphPartitioning_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3429 + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from 0 to {N_minus_1}. The graph contains the following undirected edges: +{edges} + +Please partition the vertices into two groups (labeled 1 and 2) such that: +1. Each vertex belongs to exactly one group. +2. For each vertex, the number of edges connecting it to vertices in the **same** group is even. + +**Output Format:** A single line containing {N} integers (separated by spaces), where the i-th integer is the group number (1 or 2) assigned to vertex i (from 0 to {N_minus_1}).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + while True : + vertex_permutation = list(range(N)) + random.shuffle(vertex_permutation) + group_1 = vertex_permutation[: random.randint(0, N)] + group_2 = vertex_permutation[len(group_1) :] + + edges = self.parameter["edges"] = [] + + degrees = [0] * N + def build(group) : + if len(group) <= 2 : + return + for i in range(1, len(group) - 1) : + neighbors = random.sample(group[: i], random.randint(0, i)) + for neighbor in neighbors : + u, v = min(group[i], neighbor), max(group[i], neighbor) + edges.append((u, v)) + degrees[u] += 1 + degrees[v] += 1 + for vertex in group[: -1] : + if degrees[vertex] % 2 == 1 : + u, v = min(group[-1], vertex), max(group[-1], vertex) + edges.append((u, v)) + degrees[u] += 1 + degrees[v] += 1 + assert all(degrees[vertex] % 2 == 0 for vertex in group), "All vertices in the group should have even degree" + build(group_1) + build(group_2) + + if len(group_1) and len(group_2) : + edges += random.sample([(min(u, v), max(u, v)) for u in group_1 for v in group_2], random.randint(0, len(group_1) * len(group_2))) + + if len(edges) > 0 : + break + + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + labels = [0] * N + for i in range(len(group_1)) : + labels[group_1[i]] = 1 + for i in range(len(group_2)) : + labels[group_2[i]] = 2 + self.parameter["reference_answer"] = " ".join(map(str, labels)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + labels = processed_result + if len(labels) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(label in (1, 2) for label in labels) : + return self.rewards["invalid_solution"] + + degrees = [0] * self.parameter["N"] + for u, v in self.parameter["edges"] : + degrees[u] += (labels[u] == labels[v]) + degrees[v] += (labels[u] == labels[v]) + + satisfied = sum(degree % 2 == 0 for degree in degrees) + assert satisfied <= self.parameter["N"], "satisfied should be less than or equal to N" + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == self.parameter["N"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/expression_adding_parenthese_counting/__init__.py b/server/Gym/environments/expression_adding_parenthese_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..56c779e3bb15978fc2ea9ef5e10e583366079bee --- /dev/null +++ b/server/Gym/environments/expression_adding_parenthese_counting/__init__.py @@ -0,0 +1 @@ +from .environment import Expression_AddingParenthese_Counting_Environment diff --git a/server/Gym/environments/expression_adding_parenthese_counting/environment.py b/server/Gym/environments/expression_adding_parenthese_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..a487b943819168ded39289c465f99f7e127ef115 --- /dev/null +++ b/server/Gym/environments/expression_adding_parenthese_counting/environment.py @@ -0,0 +1,87 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + +class Expression_AddingParenthese_Counting_Environment(VerifiableEnvironment): + prompt_template = \ +r"""Given an expression {expression}, please count the number of **distinct values** that can be obtained by inserting parentheses in the expression (but rearranging terms is NOT allowed).""" + operation_options = ("+", "-", "*") + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the Expression_AddingParenthese_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "num_operands" in self.parameter, "num_operands is required in parameter" + num_operands = self.parameter["num_operands"] + assert num_operands >= 3, "num_operands should be greater than or equal to 3" + + operands = self.parameter["operands"] = [random.randint(1, num_operands * num_operands) for _ in range(num_operands)] + operations = self.parameter["operations"] = [random.choice(self.operation_options) for _ in range(num_operands - 1)] + + dpF = [[set() for _ in range(num_operands)] for _ in range(num_operands)] + def dp(l, r) -> set : + if l == r: + dpF[l][r] = {operands[l]} + return dpF[l][r] + if dpF[l][r] : + return dpF[l][r] + for i in range(l, r) : + left_values = dp(l, i) + right_values = dp(i + 1, r) + for lv in left_values : + for rv in right_values : + if operations[i] == "+" : + dpF[l][r].add(lv + rv) + elif operations[i] == "-" : + dpF[l][r].add(lv - rv) + elif operations[i] == "*" : + dpF[l][r].add(lv * rv) + else : + raise NotImplementedError(f"Operation {operations[i]} is not implemented") + return dpF[l][r] + self.parameter["reference_answer"] = len(dp(0, num_operands - 1)) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(expression = " ".join(str(self.parameter["operands"][i // 2] if i % 2 == 0 else self.parameter["operations"][i // 2]) for i in range(2 * self.parameter["num_operands"] - 1))) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/face_right_way/__init__.py b/server/Gym/environments/face_right_way/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..08b02811fd92c860bc45d00702a995aeb467af57 --- /dev/null +++ b/server/Gym/environments/face_right_way/__init__.py @@ -0,0 +1 @@ +from .environment import FaceRightWay_Environment diff --git a/server/Gym/environments/face_right_way/environment.py b/server/Gym/environments/face_right_way/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..4177bd899a4b8c90fa95c2cf6ccedfe52e059192 --- /dev/null +++ b/server/Gym/environments/face_right_way/environment.py @@ -0,0 +1,167 @@ +import random +from typing import Optional, List, Tuple +from ...environment import VerifiableEnvironment + + +class FaceRightWay_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2882 + prompt_template = \ +r"""There is a 0/1 array A of length {N}, and initially it is: {A} + +Please do the following: +- First, pick a positive integer K, which must remain fixed throughout the process. +- Then, perform M operations. In each operation, you choose an index l (1 ≤ l ≤ {N} - K + 1) and flip all values A[i] with l ≤ i < l + K (i.e., a contiguous subarray of length K). +- Finally, all elements of A must become 0. + +Your goal is: +1. Minimize M (the total number of operations). +2. Among all strategies with minimal M, minimize K. + +**Output Format:** Output M lines, each containing two integers l and l + K - 1 (separated by a space), representing the closed interval [l, l + K - 1] flipped in that operation. All intervals must have the same length K.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, + rewarding_strategy_M : str = "(gold/answer)^beta", rewarding_weight_M : float = +0.5, rewarding_beta_M : float = 5.0, + rewarding_strategy_K : str = "(gold/answer)^beta", rewarding_weight_K : float = +0.5, rewarding_beta_K : float = 5.0, + **kwargs): + """ + Initialize the FaceRightWay_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "unsuccessful_solution": unsuccessful_solution, + "rewarding_strategy_M": rewarding_strategy_M, + "rewarding_weight_M": rewarding_weight_M, + "rewarding_beta_M": rewarding_beta_M, + "rewarding_strategy_K": rewarding_strategy_K, + "rewarding_weight_K": rewarding_weight_K, + "rewarding_beta_K": rewarding_beta_K, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + A = self.parameter["A"] = [0] * N + K = random.randint(2, N) + + left_endpoints = list(range(0, N - K + 1)) + left_endpoints = random.sample(left_endpoints, k = random.randint(1, len(left_endpoints))) + for l in left_endpoints : + for i in range(l, l + K) : + A[i] ^= 1 + + assert any(A), "A should not be all zeros initially" + + + ansK = 1 + ansM = sum(A) + self.parameter["reference_answer"] = "\n".join("{} {}".format(i, i) for i, Ai in enumerate(A, start = 1) if Ai) + + A = [None] + A # 1-indexed + + # Try every K and compute the minimal number of flips M for that K in O(N) + for K in range(1, N + 1): + flip = [0] * (N + 1) # flip[i] == 1 if we start a flip at position i + curr = 0 # parity of active flips affecting current position + m = 0 + possible = True + + currect_answer = "" + + for i in range(1, N + 1): + # Remove the effect of a flip that ends before i + if i - K >= 1: + curr ^= flip[i - K] + + # After applying current parity, do we still see a 'B' at i? + need_flip = A[i] ^ (curr == 1) + if need_flip: + # Can't start a K-flip if it would exceed N + if i + K - 1 > N: + possible = False + break + currect_answer += "{} {}\n".format(i, i + K - 1) + flip[i] = 1 + curr ^= 1 + m += 1 + + if possible and m < ansM: + ansM = m + ansK = K + self.parameter["reference_answer"] = currect_answer.strip() + + self.parameter["gold_answer"] = {"K" : ansK, "M" : ansM} + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = "; ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[Tuple[int, int]]] : + if answer is not None : + answer = answer.strip() + try : + operations = [] + for line in answer.splitlines() : + line = line.strip() + if line : + l, r = map(int, line.split()) + operations.append((l, r)) + return operations + except : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + A = self.parameter["A"].copy() + + K = None + for l, r in processed_result : + if not (1 <= l <= r <= self.parameter["N"]) : + return self.rewards["invalid_solution"] + if K is None : + K = r - l + 1 + if K != r - l + 1 : + return self.rewards["invalid_solution"] + for i in range(l, r + 1) : + A[i - 1] ^= 1 + + if any(A) : + return self.rewards["unsuccessful_solution"] + + reward = 0.0 + + answer_M, gold_M = len(processed_result), self.parameter["gold_answer"]["M"] + assert 0 < gold_M <= answer_M, "Gold M should be less than or equal to answer M" + if self.rewards["rewarding_strategy_M"] == "(gold/answer)^beta": + reward += self.rewards["rewarding_weight_M"] * ((gold_M / answer_M) ** self.rewards["rewarding_beta_M"]) + elif self.rewards["rewarding_strategy_M"] == "gold=answer": + reward += self.rewards["rewarding_weight_M"] * (gold_M == answer_M) + else : + raise NotImplementedError(f"Unknown rewarding strategy: {self.rewards['rewarding_strategy_M']}") + + if gold_M == answer_M : + answer_K, gold_K = K, self.parameter["gold_answer"]["K"] + assert 0 < gold_K <= answer_K, "Gold K should be less than or equal to answer K" + if self.rewards["rewarding_strategy_K"] == "(gold/answer)^beta": + reward += self.rewards["rewarding_weight_K"] * ((gold_K / answer_K) ** self.rewards["rewarding_beta_K"]) + elif self.rewards["rewarding_strategy_K"] == "gold=answer": + reward += self.rewards["rewarding_weight_K"] * (gold_K == answer_K) + else : + raise NotImplementedError(f"Unknown rewarding strategy: {self.rewards['rewarding_strategy_K']}") + + return reward + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/factorial_trailing_zero_count/__init__.py b/server/Gym/environments/factorial_trailing_zero_count/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e712410c72088a14b5792ad38069e8abeb1018f4 --- /dev/null +++ b/server/Gym/environments/factorial_trailing_zero_count/__init__.py @@ -0,0 +1 @@ +from .environment import FactorialTrailingZeroCount_Environment diff --git a/server/Gym/environments/factorial_trailing_zero_count/environment.py b/server/Gym/environments/factorial_trailing_zero_count/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..0d128e9303886867ccc253178fbaedaedeb88099 --- /dev/null +++ b/server/Gym/environments/factorial_trailing_zero_count/environment.py @@ -0,0 +1,88 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class FactorialTrailingZeroCount_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3927 + prompt_template = r"""Compute {N}! (the factorial of {N}; {N} is in base 10) and express the result in base {K}. What's the number of trailing zeros in this base-{K} representation?""" + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the FactorialTrailingZeroCount_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_K" in self.parameter, "MAX_N_K is required in parameter" + MAX_N_K = self.parameter["MAX_N_K"] + assert MAX_N_K >= 10, "MAX_N_K should be greater than or equal to 10" + + N, K = self.parameter["N"], self.parameter["K"] = random.randint(3, MAX_N_K), random.randint(2, MAX_N_K) + + + # Factorize K into primes: K = prod p_i^{c_i} + P = [] + C = [] + i = 2 + while i * i <= K: + if K % i == 0: + cnt = 0 + while K % i == 0: + K //= i + cnt += 1 + P.append(i) + C.append(cnt) + i += 1 + if K > 1: + P.append(K) + C.append(1) + + # Compute the limiting factor: min_i floor(v_p_i(N!) / c_i) + ans = None + for idx in range(len(P)): + p = P[idx] + exp = 0 + now = N + while now: + now //= p + exp += now + t = exp // C[idx] + if ans is None or t < ans: + ans = t + + self.parameter["reference_answer"] = ans if ans is not None else 0 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/fbi_binary_tree/__init__.py b/server/Gym/environments/fbi_binary_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b3742ce3b9b317d22451aa1da70079f1a8a98a59 --- /dev/null +++ b/server/Gym/environments/fbi_binary_tree/__init__.py @@ -0,0 +1 @@ +from .environment import FBI_BinaryTree_Environment diff --git a/server/Gym/environments/fbi_binary_tree/environment.py b/server/Gym/environments/fbi_binary_tree/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..abae6547ed10478885ddced0cbf4ee1c36be42d5 --- /dev/null +++ b/server/Gym/environments/fbi_binary_tree/environment.py @@ -0,0 +1,111 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class FBI_BinaryTree_Environment(VerifiableEnvironment) : # Source: https://www.luogu.com.cn/problem/P1087 + prompt_template = \ +r"""We classify binary strings made up of only `0` and `1` into three types: +- A string consisting of only `0`s is called a **B-string**. +- A string consisting of only `1`s is called an **I-string**. +- A string that contains both `0` and `1` is called an **F-string**. + +An **FBI tree** is a binary tree where each node is labeled as either F, B, or I, based on the type of the substring it represents. +Given a binary string `S`, construct an FBI tree `T` using the following recursive rules: +1. The **root node** corresponds to the entire string `S`, and its type is determined using the rules above. +2. If the length of `S` is greater than 1, divide `S` exactly in half into two equal substrings: `S₁` (left) and `S₂` (right). Recursively build the **left subtree** from `S₁`, and the **right subtree** from `S₂`. + +Your task is to construct the FBI tree from the following binary string of length 2^{N}: +{string} + +Then, output the **postorder traversal** of the tree — a string consisting of the node types in postorder (left, right, root). + +Output Format: +Your output should be a single line containing the postorder traversal of the tree. Each node type (F, B, or I) should appear **without any separators**. +Example: `{all_B_answer}` (do **NOT** include the backticks or quotes). +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + probability_same_as_before : float = 0.7, + **kwargs) : + """ + Initialize the FBI_BinaryTree_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + self.probability_same_as_before = probability_same_as_before + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + string = [random.randint(0, 1)] + for i in range(1, 2 ** N) : + if random.random() < self.probability_same_as_before : + string.append(string[i - 1]) + else : + string.append(random.randint(0, 1)) + string = self.parameter["string"] = "".join(map(str, string)) + assert len(self.parameter["string"]) == (2**N), "string length should be {}".format(2**N) + + def get_postorder(l, r) : + if l == r : + if string[l] == "0" : + return "B" + else : + return "I" + left, right = get_postorder(l, (l + r) // 2), get_postorder((l + r) // 2 + 1, r) + if left[-1] == "B" and right[-1] == "B" : + root = "B" + elif left[-1] == "I" and right[-1] == "I" : + root = "I" + else : + root = "F" + return left + right + root + self.parameter["reference_answer"] = get_postorder(0, 2**N - 1) + assert len(self.parameter["reference_answer"]) == (2**(N + 1) - 1), "reference_answer length should be {}".format(2**(N + 1) - 1) + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + string = self.parameter["string"], + all_B_answer = "B" * len(self.parameter["reference_answer"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + answer = answer.strip() + return answer + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if len(processed_result) != len(self.parameter["reference_answer"]) : + return self.rewards["invalid_solution"] + for char in processed_result : + if char not in ("F", "B", "I") : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(float(a == b) for a, b in zip(self.parameter["reference_answer"], processed_result)) / len(self.parameter["reference_answer"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * all(a == b for a, b in zip(self.parameter["reference"], processed_result)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/fibonacci/__init__.py b/server/Gym/environments/fibonacci/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..39506a422e9f41573b6a2ec78606fb1060efe843 --- /dev/null +++ b/server/Gym/environments/fibonacci/__init__.py @@ -0,0 +1 @@ +from .environment import Fibonacci_Environment diff --git a/server/Gym/environments/fibonacci/environment.py b/server/Gym/environments/fibonacci/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b3a88e134890b71828fb77a0a343bd2377224ee8 --- /dev/null +++ b/server/Gym/environments/fibonacci/environment.py @@ -0,0 +1,119 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Fibonacci_Environment(VerifiableEnvironment) : # Source: https://www.luogu.com.cn/problem/P1349 + prompt_template = \ +r"""We have a sequence $A$, where $A[1] = {A1}$, $A[2] = {A2}$, and for $n > 2$ the recurrence is defined as $A[n] = {P} \times A[n - 1] + {Q} \times A[n - 2]$. Please compute $A[{N}] \bmod {modulo}$. + +Output Format: Your final answer should be a **single integer** on a line by itself, representing the value of $A[{N}] \bmod {modulo}$. +""" + def __init__(self, + modulo : int = 10000, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the Fibonacci_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + self.modulo = modulo + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 3, "MAX_N should be greater than or equal to 3" + + N = self.parameter["N"] = random.randint(3, self.parameter["MAX_N"]) + + A1 = self.parameter["A1"] = random.randint(0, self.modulo - 1) + A2 = self.parameter["A2"] = random.randint(0, self.modulo - 1) + + P = self.parameter["P"] = random.randint(1, self.modulo - 1) + Q = self.parameter["Q"] = random.randint(1, self.modulo - 1) + + + def matrix_multiply(A, B, mod) : + n = len(A) + C = [[0] * n for _ in range(n)] + # transpose B for cache‐friendly access + B_T = [[B[j][i] for j in range(n)] for i in range(n)] + for i in range(n) : + for j in range(n) : + s = 0 + for k in range(n) : + s += A[i][k] * B_T[j][k] + C[i][j] = s % mod + return C + + def matrix_power(A, k, mod) : + n = len(A) + # result = identity + result = [[0] * n for _ in range(n)] + for i in range(n) : + result[i][i] = 1 + base = [row[:] for row in A] + while k > 0 : + if k & 1 : + result = matrix_multiply(result, base, mod) + base = matrix_multiply(base, base, mod) + k >>= 1 + return result + + def solve(p, q, a1, a2, n, m) : + # base cases + if n == 1 : + return a1 % m + if n == 2 : + return a2 % m + + # build the transformation matrix modulo m + T = [ + [p % m, q % m], + [1, 0 ], + ] + # raise T to the (n-2)th power + Tn = matrix_power(T, n - 2, m) + # multiply by the base vector [a2, a1] + return (Tn[0][0] * (a2 % m) + Tn[0][1] * (a1 % m)) % m + + self.parameter["reference_answer"] = solve(P, Q, A1, A2, N, self.modulo) + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + A1 = self.parameter["A1"], + A2 = self.parameter["A2"], + P = self.parameter["P"], + Q = self.parameter["Q"], + N = self.parameter["N"], + modulo = self.modulo, + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/fibonacci_containing_counting/__init__.py b/server/Gym/environments/fibonacci_containing_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4037ec4057616f34a34298b0dc50f21513292f76 --- /dev/null +++ b/server/Gym/environments/fibonacci_containing_counting/__init__.py @@ -0,0 +1 @@ +from .environment import FibonacciContainingCounting_Environment diff --git a/server/Gym/environments/fibonacci_containing_counting/environment.py b/server/Gym/environments/fibonacci_containing_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..022a3db9f4f53bd299662782acf7401fb0aa0832 --- /dev/null +++ b/server/Gym/environments/fibonacci_containing_counting/environment.py @@ -0,0 +1,91 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class FibonacciContainingCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3986 + prompt_template = r"""How many pairs of positive integers (a, b) are there such that, defining f by f(0)=a, f(1)=b, and f(n)=f(n−1)+f(n−2) for n≥2, there exists an n≥2 with f(n)={K}?""" + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the FibonacciContainingCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K >= 2, "MAX_K should be greater than or equal to 2" + + K = self.parameter["K"] = random.randint(2, MAX_K) + + + def gcd(a, b): + return gcd(b, a % b) if b else a + + def lcm(a, b): + return a // gcd(a, b) * b + + def main(): + fib = [1, 1] # dynamic list + e = 1 + while fib[e] + fib[e - 1] <= K: + fib.append(fib[e] + fib[e - 1]) + e += 1 + + ans = 0 + for i in range(1, e): + a = fib[i - 1] + b = fib[i] + x = 1 + while (K - b * x) % a != 0 and K > b * x: + x += 1 + if K <= b * x: + continue + ans += (K - b * x - 1) // lcm(a, b) + 1 + assert ans > 0, "The answer should be positive." + return ans + + self.parameter["reference_answer"] = main() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/fibtrain/__init__.py b/server/Gym/environments/fibtrain/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5249d7009baf5da06442d70bb70fa1e86b8cdd6d --- /dev/null +++ b/server/Gym/environments/fibtrain/__init__.py @@ -0,0 +1 @@ +from .environment import Fibtrain_Environment diff --git a/server/Gym/environments/fibtrain/environment.py b/server/Gym/environments/fibtrain/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7f3e5363b8248b3786d3a4cf7c25f6660df28204 --- /dev/null +++ b/server/Gym/environments/fibtrain/environment.py @@ -0,0 +1,94 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Fibtrain_Environment(VerifiableEnvironment) : # Source: https://www.luogu.com.cn/problem/P1011 + prompt_template = \ +r"""A train departs from its starting station (Station 1) with {A} passengers onboard. There are {N} stations in total, numbered from 1 to {N}. + +At Station 2, an equal number of passengers get on and off, so the total number of passengers onboard remains unchanged at {A}. + +From Station 3 onward (including Station 3) up to Station {N_minus_1}, the boarding and alighting follow a specific rule: +- The number of **boarding** passengers at each station is the **sum of the number of boarding passengers at the previous two stations**. +- The number of **alighting** passengers at each station is **equal to the number of boarding passengers at the previous station**. + +At the final station (Station {N}), **all remaining passengers get off**, and the number of passengers who get off is {M}. + +Given this setup, what is the number of passengers **on the train after it departs from Station {X}**? + +Output Format: +Your final answer should be a **single integer** on a line by itself, representing the number of passengers onboard **after the train departs from Station {X}**. +""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the Fibtrain_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 5, "MAX_N should be greater than or equal to 5" + + N = self.parameter["N"] = random.randint(5, MAX_N) + + assert "MAX_A_B" in self.parameter, "MAX_A_B is required in parameter" + MAX_A_B = self.parameter["MAX_A_B"] + assert MAX_A_B >= 1, "MAX_A_B should be greater than or equal to 1" + + A = self.parameter["A"] = random.randint(1, MAX_A_B) + B = self.parameter["B"] = random.randint(1, MAX_A_B) + + boarding, total = [0] * N, [0] * N + boarding[1], boarding[2] = A, B + total[1], total[2] = A, A + for i in range(3, N) : + boarding[i] = boarding[i - 1] + boarding[i - 2] + total[i] = total[i - 1] + boarding[i] - boarding[i - 1] + self.parameter["M"] = total[N - 1] + + X = self.parameter["X"] = random.randint(3, N - 1) + self.parameter["reference_answer"] = total[X] + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + A = self.parameter["A"], + N = self.parameter["N"], + N_minus_1 = self.parameter["N"] - 1, + M = self.parameter["M"], + X = self.parameter["X"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/firework_show/__init__.py b/server/Gym/environments/firework_show/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5d8d914454997182084f4b28c1db8b9ef72a8134 --- /dev/null +++ b/server/Gym/environments/firework_show/__init__.py @@ -0,0 +1 @@ +from .environment import FireworkShow_Environment diff --git a/server/Gym/environments/firework_show/environment.py b/server/Gym/environments/firework_show/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..66c887a0b788f3d99a6e6f333e3a258c31b6b03f --- /dev/null +++ b/server/Gym/environments/firework_show/environment.py @@ -0,0 +1,127 @@ +import heapq +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class FireworkShow_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3642 + prompt_template = \ +r"""You are given a **tree** with {N} vertices labeled from `1` to `{N}`, where vertex `1` is the **root**. Each vertex (except the root) has a parent `p`, and the edge connecting the vertex to its parent has length `w`. The list of (parent, weight) pairs for each non-root vertex is given as: +{parents} + +Note that these vertices are leaf nodes (i.e., vertices with no children): {leaves} +You can reduce the length of any edge. Specifically, you can change an edge's length `w` to any integer `w'` such that `0 ≤ w'`; the cost of changing an edge from `w` to `w'` is abs(w - w'). You need to make the sum of the edge lengths on the path from each leaf node to the root `1` equal — in other words, all leaf-to-root paths should have the same total length. Output the **minimum total cost** required to achieve this.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the FirworkShow_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + parents = self.parameter["parents"] = [] + is_leaf = [None] + [True] * N + for i in range(2, N + 1) : + parent= random.randint(1, i - 1) + parents.append((parent, random.randint(1, N))) + is_leaf[parent] = False + self.parameter["leaves"] = [i for i in range(2, N + 1) if is_leaf[i]] + + + # adjacency and weights + children = [[] for _ in range(N + 1)] + w = [0] * (N + 1) + res = 0 + + for i in range(2, N + 1): + p, c = parents[i - 2] + children[p].append(i) + w[i] = c + res += c + + def dfs(x): + assert 1 <= x <= N, "Node index out of bounds" + # we store values as negatives so that heapq (a min-heap) + # can pop the "largest" original value first + heap = [] + for y in children[x]: + child_heap = dfs(y) + # small‐to‐large merge + if len(heap) < len(child_heap): + heap, child_heap = child_heap, heap + for val in child_heap: + heapq.heappush(heap, val) + + l = r = 0 + if not is_leaf[x]: + d = len(children[x]) + assert len(children[x]) > 0, "There should be at least one child for non-leaf nodes" + # remove the d-1 largest values + for _ in range(d - 1): + if heap: + heapq.heappop(heap) + # then pop the next two largest into r and l + if heap: + r = -heapq.heappop(heap) + if heap: + l = -heapq.heappop(heap) + else : + assert len(children[x]) == 0, "Leaf nodes should not have children" + + # push back with the current edge weight + heapq.heappush(heap, -(l + w[x])) + heapq.heappush(heap, -(r + w[x])) + return heap + + root_heap = dfs(1) + + # Discard the single largest, then subtract every remaining value + if root_heap: + heapq.heappop(root_heap) + while root_heap: + res -= -heapq.heappop(root_heap) + + self.parameter["reference_answer"] = res + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + parents = "\n".join("Vertex {}: ({}, {})".format(i, p, w) for i, (p, w) in enumerate(self.parameter["parents"], start = 2)), + leaves = ", ".join(map(str, self.parameter["leaves"])), + ) + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/fixed_mod_k_selection_counting/__init__.py b/server/Gym/environments/fixed_mod_k_selection_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f2f3c5e6c9ae66311aae88dd5f64ae6873ae6b5a --- /dev/null +++ b/server/Gym/environments/fixed_mod_k_selection_counting/__init__.py @@ -0,0 +1 @@ +from .environment import FixedModK_Selection_Counting_Environment diff --git a/server/Gym/environments/fixed_mod_k_selection_counting/environment.py b/server/Gym/environments/fixed_mod_k_selection_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..0147c6591298e7faa55023e639b8e0b537a129bf --- /dev/null +++ b/server/Gym/environments/fixed_mod_k_selection_counting/environment.py @@ -0,0 +1,111 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class FixedModK_Selection_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3746 + prompt_template = r"""Please compute $$\left( \sum_{{i = 0}}^\infty C_{{nk}}^{{ik + r}} \right) \bmod p$$, where n = {N}, k = {K}, r = {R}, p = {MOD}.""" + + def __init__(self, + MOD_range : int = 1000000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs): + """ + Initialize the FixedModK_Selection_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.MOD_range = MOD_range + + self.rewards = { + "wrong_format": wrong_format, + "wrong_range": wrong_range, + "correct_answer": correct_answer, + "wrong_answer": wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 1, "MAX_N should be greater than or equal to 1" + N = self.parameter["N"] = random.randint(1, MAX_N) + + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K >= 1, "MAX_K should be greater than or equal to 1" + K = self.parameter["K"] = random.randint(2, MAX_K) + R = self.parameter["R"] = random.randint(0, K - 1) + + MOD = self.parameter["MOD"] = random.randint(2, self.MOD_range) + + + def multiply(lhs, rhs, P, K): + # Convolution modulo K with coefficients modulo P + result = [0] * K + for i in range(K): + for j in range(K): + result[(i + j) % K] = (result[(i + j) % K] + lhs[i] * rhs[j]) % P + return result + + def solve(): + # Prepare base vector a + a = [0] * K + if K == 1: + a[0] = 2 % MOD + else: + a[0] = 1 + a[1] = 1 + + # Identity vector for convolution exponentiation + ans = [0] * K + ans[0] = 1 + + # Exponent: N * K + e = N * K + + # Fast exponentiation by squaring + while e > 0: + if e & 1: + ans = multiply(ans, a, MOD, K) + a = multiply(a, a, MOD, K) + e >>= 1 + + # Output the R-th entry + return ans[R] + + self.parameter["reference_answer"] = solve() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + K = self.parameter["K"], + R = self.parameter["R"], + MOD = self.parameter["MOD"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/fixed_one_edge_num_spanning_tree/__init__.py b/server/Gym/environments/fixed_one_edge_num_spanning_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..61eddd3ca24e94c0b98400ec83708cc390e75c10 --- /dev/null +++ b/server/Gym/environments/fixed_one_edge_num_spanning_tree/__init__.py @@ -0,0 +1 @@ +from .environment import FixedOneEdgeNum_SpanningTree_Environment diff --git a/server/Gym/environments/fixed_one_edge_num_spanning_tree/environment.py b/server/Gym/environments/fixed_one_edge_num_spanning_tree/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..6b2974852980cc6d1accd9ed043d58b31ea04883 --- /dev/null +++ b/server/Gym/environments/fixed_one_edge_num_spanning_tree/environment.py @@ -0,0 +1,130 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class FixedOneEdgeNum_SpanningTree_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3623 + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. The graph contains the following undirected edges. Each edge is represented as a tuple `(u, v, w)` (`w` is either 0 or 1), meaning an undirected edge **connecting vertex `u` to vertex `v` with weight `w`**: +{edges} + +Please select a subset of edges `T = [(u_1, v_1, w_1), (u_2, v_2, w_2), ..., (u_k, v_k, w_k)]` such that: +- k = {N_minus_1} (i.e., you select exactly {N_minus_1} edges), +- The selected edges form a **spanning tree** — that is, they connect all {N} vertices without forming any cycles, +- There are exactly {K} edges with weight 1 in the selected edges, + +**Output Format:** Your final answer should be a single line containing the endpoints of the selected edges in order: `u_1 v_1 u_2 v_2 ... u_k v_k`, separated by **spaces**. +Example: `0 1 1 2 2 3` (do **NOT** include the backticks or quotes); this means the spanning tree includes the edges `(0, 1, w_1)`, `(1, 2, w_2)`, and `(2, 3, w_3)` (assuming 4 vertices in total).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, wrong_solution : float = 0.0, correct_solution : float = +1.0, + **kwargs) : + """ + Initialize the FixedOneEdgeNum_SpanningTree_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "wrong_solution" : wrong_solution, + "correct_solution" : correct_solution, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_ratio" in self.parameter, "edge_ratio is required in parameter" + edge_ratio = self.parameter["edge_ratio"] + + edges = self.parameter["edges"] = [] + + permutations = list(range(N)) + random.shuffle(permutations) + one_probability = random.random() + self.parameter["K"], self.parameter["reference_answer"] = 0, [] + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + self.parameter["reference_answer"].append("{} {}".format(u, v)) + u, v, w = min(u, v), max(u, v), int(random.random() < one_probability) + edges.append((u, v, w)) + self.parameter["K"] += w + self.parameter["reference_answer"] = " ".join(self.parameter["reference_answer"]) + + num_edges = int(edge_ratio * N) + if len(edges) < num_edges : + remaining_edges = list(set((u, v) for u in range(N) for v in range(u + 1, N)) - set((u, v) for u, v, w in edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + one_probability = random.random() + for u, v in remaining_edges : + edges.append((u, v, int(random.random() < one_probability))) + random.shuffle(edges) + + for u, v, w in edges : + assert 0 <= u < v < N + assert w in (0, 1), "edge weight should be either 0 or 1" + assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique" + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + K = self.parameter["K"], + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + mst = processed_result + if len(mst) % 2 != 0 : + return self.rewards["wrong_format"] + mst = [(mst[i], mst[i + 1]) for i in range(0, len(mst), 2)] + + if len(mst) != self.parameter["N"] - 1 : + return self.rewards["invalid_solution"] + if not ((set(u for u, v in mst) | set(v for u, v in mst)) == set(range(self.parameter["N"]))) : + return self.rewards["invalid_solution"] + + subgraph = networkx.Graph() + edge2weight = {(u, v) : w for u, v, w in self.parameter["edges"]} + answer_weight = 0 + for u, v in mst : + u, v = min(u, v), max(u, v) + if (u, v) not in edge2weight : + return self.rewards["invalid_solution"] + answer_weight += edge2weight[(u, v)] + subgraph.add_edge(u, v) + if not networkx.is_connected(subgraph) : + return self.rewards["invalid_solution"] + assert networkx.is_tree(subgraph), "The answer should be a tree as it has N - 1 edges and is connected" + + if answer_weight != self.parameter["K"] : + return self.rewards["wrong_solution"] + else : + return self.rewards["correct_solution"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/fractional_programming/__init__.py b/server/Gym/environments/fractional_programming/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..925d573eccd6729920d20b960b38a40a3e2b2046 --- /dev/null +++ b/server/Gym/environments/fractional_programming/__init__.py @@ -0,0 +1 @@ +from .environment import FractionalProgramming_Environment diff --git a/server/Gym/environments/fractional_programming/environment.py b/server/Gym/environments/fractional_programming/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..5ae25dc9d9c3b581a575184ce0f4c19c84b95103 --- /dev/null +++ b/server/Gym/environments/fractional_programming/environment.py @@ -0,0 +1,108 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class FractionalProgramming_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P10505 + prompt_template = \ +r"""You are given two arrays `A` and `B`, each containing {N} integers: +{A_and_B} + +Please select {K} **distinct indices** `i_1, ..., i_{K}` to maximize the value of `(A[i_1] + ... + A[i_{K}]) / (B[i_1] + ... + B[i_{K}])` + +**Output Format:** Your final answer should be a single line containing the {K} selected indices in any order, separated by spaces.""" + + + def __init__(self, + max_proportion : int = 2, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the FractionalProgramming_Environment instance. + """ + super().__init__(**kwargs) + + self.max_proportion = max_proportion + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + K = self.parameter["K"] = random.randint(2, N - 1) + + B = self.parameter["B"] = [random.randint(1, N) for _ in range(N)] + A = self.parameter["A"] = [random.randint(1, self.max_proportion * b) for b in B] + + l, r = 0.0, max(a / b for a, b in zip(A, B) if b) + solution = None + for _ in range(256) : + mid = (l + r) / 2 + indices = list(range(N)) + indices.sort(key = lambda index : A[index] - mid * B[index], reverse = True) + if sum(A[index] - mid * B[index] for index in indices[: K]) >= 0 : + l = mid + solution = indices[: K].copy() + else : + r = mid + self.parameter["reference_answer"] = " ".join(map(str, solution)) + + self.parameter["gold_SumA"], self.parameter["gold_SumB"] = sum(A[index] for index in solution), sum(B[index] for index in solution) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + K = self.parameter["K"], + A_and_B = "\n".join("A[{}]={} B[{}]={}".format(i, self.parameter["A"][i], i, self.parameter["B"][i]) for i in range(self.parameter["N"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + selected_indices = processed_result + + if len(selected_indices) != self.parameter["K"] : + return self.rewards["invalid_solution"] + if not all(0 <= index < self.parameter["N"] for index in selected_indices) : + return self.rewards["invalid_solution"] + if len(selected_indices) != len(set(selected_indices)) : + return self.rewards["invalid_solution"] + + answer_SumA, answer_SumB = sum(self.parameter["A"][index] for index in selected_indices), sum(self.parameter["B"][index] for index in selected_indices) + gold_SumA, gold_SumB = self.parameter["gold_SumA"], self.parameter["gold_SumB"] + # gold_SumA / gold_SumB >= answer_SumA / answer_SumB <=> gold_SumA * answer_SumB >= answer_SumA * gold_SumB + assert gold_SumA * answer_SumB >= answer_SumA * gold_SumB, "gold_SumA * answer_SumB should be greater than or equal to answer_SumA * gold_SumB" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + # (answer_SumA / answer_SumB) / (gold_SumA / gold_SumB) = (answer_SumA * gold_SumB) / (answer_SumB * gold_SumA) + return self.rewards["rewarding_weight"] * (((answer_SumA * gold_SumB) / (answer_SumB * gold_SumA)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * ((answer_SumA * gold_SumB) == (answer_SumB * gold_SumA)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/fractional_programming_bipartite_graph_matching/__init__.py b/server/Gym/environments/fractional_programming_bipartite_graph_matching/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5a54fe565b26df44992ca7ae9c4ca19ba6113098 --- /dev/null +++ b/server/Gym/environments/fractional_programming_bipartite_graph_matching/__init__.py @@ -0,0 +1 @@ +from .environment import FractionalProgramming_BipartiteGraphMatching_Environment diff --git a/server/Gym/environments/fractional_programming_bipartite_graph_matching/environment.py b/server/Gym/environments/fractional_programming_bipartite_graph_matching/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..94a926a8189a3ff7b19401a165a8037106011559 --- /dev/null +++ b/server/Gym/environments/fractional_programming_bipartite_graph_matching/environment.py @@ -0,0 +1,146 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class FractionalProgramming_BipartiteGraphMatching_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3705 + prompt_template = \ +r"""You are given two matrices A and B of size {N} × {N} (0-indexed). + +Matrix A is (given in **row-major order**, with each row represented as a list of integers separated by spaces): +{A} + +Matrix B is (given in **row-major order**, with each row represented as a list of integers separated by spaces): +{B} + +Please find a permutation P of indices from 0 to {N_minus_1}, i.e., P[0], P[1], ..., P[{N_minus_1}], such that the following value is maximized: (A[0][P[0]] + A[1][P[1]] + ... + A[{N_minus_1}][P[{N_minus_1}]]) / (B[0][P[0]] + B[1][P[1]] + ... + B[{N_minus_1}][P[{N_minus_1}]]) + +**Output Format:** A single line containing P[0], P[1], ..., P[{N_minus_1}], separated by spaces.""" + + def __init__(self, + max_proportion : int = 2, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the FractionalProgramming_BipartiteGraphMatching_Environment instance. + """ + super().__init__(**kwargs) + + self.max_proportion = max_proportion + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + + B = self.parameter["B"] = [[random.randint(1, N) for _ in range(N)] for _ in range(N)] + A = self.parameter["A"] = [[random.randint(1, self.max_proportion * b) for b in B_row] for B_row in B] + + + def max_weight_matching_networkx(W): + # Create bipartite graph + G = networkx.Graph() + N = len(W) + + # Add nodes (left nodes: 0 to N-1, right nodes: N to 2N-1) + left_nodes = list(range(N)) + right_nodes = list(range(N, 2*N)) + G.add_nodes_from(left_nodes, bipartite=0) + G.add_nodes_from(right_nodes, bipartite=1) + + # Add weighted edges + for i in range(N): + for j in range(N): + G.add_edge(i, N + j, weight=W[i][j]) + + # Find maximum weight matching + matching = networkx.max_weight_matching(G, maxcardinality=True) + + # Convert to P array format + P = [-1] * N + for edge in matching: + left, right = edge + if left < N: # left is from left side + P[left] = right - N + else: # left is actually from right side + P[right] = left - N + + total_weight = sum(W[i][P[i]] for i in range(N) if P[i] != -1) + + return P, total_weight + + l, r = 0.0, max(max(a / b for a, b in zip(A_row, B_row)) for A_row, B_row in zip(A, B)) + P = None + for _ in range(256) : + mid = (l + r) / 2 + W = [[A[i][j] - mid * B[i][j] for j in range(N)] for i in range(N)] + + tempP, total_weight = max_weight_matching_networkx(W) + + if total_weight >= 0 : + l = mid + P = tempP.copy() + else: + r = mid + self.parameter["reference_answer"] = " ".join(map(str, P)) + + self.parameter["gold_SumA"], self.parameter["gold_SumB"] = sum(A[i][P[i]] for i in range(N)), sum(B[i][P[i]] for i in range(N)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + A = "\n".join(" ".join(map(str, row)) for row in self.parameter["A"]), + B = "\n".join(" ".join(map(str, row)) for row in self.parameter["B"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + P = processed_result + if len(P) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if set(P) != set(range(self.parameter["N"])) : + return self.rewards["invalid_solution"] + + answer_SumA, answer_SumB = sum(self.parameter["A"][i][P[i]] for i in range(self.parameter["N"])), sum(self.parameter["B"][i][P[i]] for i in range(self.parameter["N"])) + gold_SumA, gold_SumB = self.parameter["gold_SumA"], self.parameter["gold_SumB"] + # gold_SumA / gold_SumB >= answer_SumA / answer_SumB <=> gold_SumA * answer_SumB >= answer_SumA * gold_SumB + assert gold_SumA * answer_SumB >= answer_SumA * gold_SumB, "gold_SumA * answer_SumB should be greater than or equal to answer_SumA * gold_SumB" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + # (answer_SumA / answer_SumB) / (gold_SumA / gold_SumB) = (answer_SumA * gold_SumB) / (answer_SumB * gold_SumA) + return self.rewards["rewarding_weight"] * (((answer_SumA * gold_SumB) / (answer_SumB * gold_SumA)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * ((answer_SumA * gold_SumB) == (answer_SumB * gold_SumA)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/futoshiki_puzzle/__init__.py b/server/Gym/environments/futoshiki_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..85dd4e5489f17943b764354feb82a213b2edcfd5 --- /dev/null +++ b/server/Gym/environments/futoshiki_puzzle/__init__.py @@ -0,0 +1 @@ +from .environment import FutoshikiPuzzle_Environment diff --git a/server/Gym/environments/futoshiki_puzzle/environment.py b/server/Gym/environments/futoshiki_puzzle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7cb718dde4c0ebf76d91a400d88a85165bc918b6 --- /dev/null +++ b/server/Gym/environments/futoshiki_puzzle/environment.py @@ -0,0 +1,123 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class FutoshikiPuzzle_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {N} matrix. Some cells are already filled with integers in the range [0, {N_minus_1}], and the rest are empty (denoted by `-1`). Please fill the empty cells with integers in the same range such that: +- Each **row** and each **column** contains all integers from `0` to `{N_minus_1}` **exactly once**. +- The following **inequality constraints** between cells are satisfied (use `c[i][j]` to denote the cell at row `i`, column `j`, 0-indexed): +{inequalities} + +The original matrix is as follows: +{matrix} + +**Output Format:** Your final answer should contain {N} lines, each with {N} integers separated by spaces. Each line represents a row of the completed matrix, matching the format of the input.""" + + def __init__(self, + inequality_constraint_num_multiple : int = 2, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the FutoshikiPuzzle_Environment instance. + """ + super().__init__(**kwargs) + + self.inequality_constraint_num_multiple = inequality_constraint_num_multiple + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + permutation_row, permutation_col = list(range(N)), list(range(N)) + random.shuffle(permutation_row) + random.shuffle(permutation_col) + + self.parameter["matrix"] = matrix = [[(permutation_row[i] + permutation_col[j]) % N for j in range(N)] for i in range(N)] + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, row)) for row in matrix) + + all_inequalities = [] + for x1 in range(N) : + for y1 in range(N) : + for x2 in range(N) : + for y2 in range(N) : + if matrix[x1][y1] < matrix[x2][y2] : + all_inequalities.append((x1, y1, x2, y2)) + self.parameter["inequalities"] = random.sample(all_inequalities, k = random.randint(1, min(len(all_inequalities), self.inequality_constraint_num_multiple * N))) + + assert "sparsity" in self.parameter, "sparsity is required in parameter" + sparsity = self.parameter["sparsity"] + assert 0 < sparsity < 1, "sparsity should be between 0 and 1" + empty_cells = random.sample(range(N * N), max(1, int(N * N * sparsity))) + for cell in empty_cells : + row, column = divmod(cell, N) + matrix[row][column] = -1 + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + matrix = "\n".join(" ".join(map(str, row)) for row in self.parameter["matrix"]), + inequalities = "\n".join("c[{}][{}] < c[{}][{}]".format(x1, y1, x2, y2) for x1, y1, x2, y2 in self.parameter["inequalities"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(list(map(int, line.split()))) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N = self.parameter["N"] + solution = processed_result + + if len(solution) != N or any(len(row) != N for row in solution) : + return self.rewards["wrong_format"] + + if not all(set(row) == set(range(N)) for row in solution) : + return self.rewards["invalid_solution"] + if not all(set(solution[i][j] for i in range(N)) == set(range(N)) for j in range(N)) : + return self.rewards["invalid_solution"] + + if any(original_value != -1 and original_value != value for original_row, row in zip(self.parameter["matrix"], solution) for original_value, value in zip(original_row, row)) : + return self.rewards["invalid_solution"] + + satisfied = sum(int(solution[x1][y1] < solution[x2][y2]) for x1, y1, x2, y2 in self.parameter["inequalities"]) + assert satisfied <= len(self.parameter["inequalities"]), "satisfied should not exceed the number of inequalities" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / len(self.parameter["inequalities"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (satisfied == len(self.parameter["inequalities"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/gas_fire_extinguishers/__init__.py b/server/Gym/environments/gas_fire_extinguishers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bd5f6325b845dc413539596010c0e9e1d03756dc --- /dev/null +++ b/server/Gym/environments/gas_fire_extinguishers/__init__.py @@ -0,0 +1 @@ +from .environment import GasFireExtinguishers_Environment diff --git a/server/Gym/environments/gas_fire_extinguishers/environment.py b/server/Gym/environments/gas_fire_extinguishers/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..37fc0acfedbcb39a388e3655961b264f91be1b56 --- /dev/null +++ b/server/Gym/environments/gas_fire_extinguishers/environment.py @@ -0,0 +1,168 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class GasFireExtinguishers_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3479 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices labeled from `0` to `{N_minus_1}`. The tree has the following {N_minus_1} undirected edges. Each edge is represented as a tuple `(u, v)`, meaning there is an undirected edge connecting vertex `u` and vertex `v`: +{edges} + +There is an array C[0], C[1], ..., C[{N_minus_1}], all initially set to 0. For each vertex `u` (0 ≤ u < {N}), you must choose a vertex `P[u]` such that the distance (in number of edges) from `u` to `P[u]` is at most {K}; then, increment C[P[u]] by 1. +Try your best to **minimize** the total value of ceil(C[0] / {S}) + ceil(C[1] / {S}) + ... + ceil(C[{N_minus_1}] / {S}), where `ceil(x)` means rounding `x` up to the nearest integer. Output a single line containing `P[0]`, `P[1]`, ..., `P[{N_minus_1}]`, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the GasFireExtinguishers_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)) == N - 1 + + G = networkx.Graph() + G.add_edges_from(edges) + distances = dict(networkx.all_pairs_shortest_path_length(G)) + K = self.parameter["K"] = random.randint(1, max(1, max(distances[u][v] for u in range(N) for v in range(N)) // 2)) + self.parameter["valid_P"] = [[v for v in range(N) if distances[u][v] <= K] for u in range(N)] + S = self.parameter["S"] = random.randint(2, max(2, N // K)) + + + # Build adjacency list for 0-indexed rooms + graph = [[] for _ in range(N)] + for u, v in edges: + graph[u].append(v) + graph[v].append(u) + + # f[u][i]: number of rooms in subtree u at distance exactly i that still need an extinguisher + # g[u][i]: capacity of extinguishers at u that can serve rooms at distance exactly i + f = [[0] * (K + 1) for _ in range(N)] + g = [[0] * (K + 1) for _ in range(N)] + ans = 0 + + def dfs(u, parent): + nonlocal ans + f[u][0] = 1 + # accumulate from children + for v in graph[u]: + if v == parent: + continue + dfs(v, u) + for i in range(K): + f[u][i + 1] += f[v][i] + g[u][i + 1] += g[v][i] + # place new extinguishers for rooms at distance K in subtree + need = (f[u][K] + S - 1) // S + ans += need + # capacity left in newly placed extinguishers + l = need * S - f[u][K] + f[u][K] = 0 + g[u][0] += l + # match needs and capacities within K + # first for exact K distance pairs + for i in range(K + 1): + j = K - i + d = min(f[u][i], g[u][j]) + f[u][i] -= d + g[u][j] -= d + # then for distance K-1 pairs + for i in range(K): + j = K - 1 - i + d = min(f[u][i], g[u][j]) + f[u][i] -= d + g[u][j] -= d + + # run DFS from root 0 + dfs(0, -1) + + # final matching at root + for i in range(K + 1): + for j in range(K + 1): + if i + j <= K: + d = min(f[0][i], g[0][j]) + f[0][i] -= d + g[0][j] -= d + # remaining rooms need extinguishers + tot = sum(f[0][i] for i in range(K + 1)) + ans += (tot + S - 1) // S + + assert ans > 0, "The answer should be greater than 0" + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("{} {}".format(u, v) for u, v in self.parameter["edges"]), + K = self.parameter["K"], + S = self.parameter["S"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + + C = [0] * self.parameter["N"] + for u, P_u in enumerate(processed_result) : + if P_u not in self.parameter["valid_P"][u] : + return self.rewards["invalid_solution"] + C[P_u] += 1 + + answer, gold = sum((C[u] + self.parameter["S"] - 1) // self.parameter["S"] for u in range(self.parameter["N"])), self.parameter["gold_answer"] + assert 0 < gold <= answer, "gold should be greater than 0 and less than or equal to answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise ValueError("Invalid rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/gaussian_elimination/__init__.py b/server/Gym/environments/gaussian_elimination/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f73c78128e3d0f66be461419b4f7966f5187a154 --- /dev/null +++ b/server/Gym/environments/gaussian_elimination/__init__.py @@ -0,0 +1 @@ +from .environment import GaussianElimination_Environment diff --git a/server/Gym/environments/gaussian_elimination/environment.py b/server/Gym/environments/gaussian_elimination/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..a958e343f3fb4b676ca757779bf3fba81b666e68 --- /dev/null +++ b/server/Gym/environments/gaussian_elimination/environment.py @@ -0,0 +1,99 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class GaussianElimination_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""There are {N} integers x[0], x[1], ..., x[{N_minus_1}]. They satisfy the following {M} equations: +{equations} + +Please find any solution x[0], x[1], ..., x[{N_minus_1}] that satisfies the equations. + +Output Format: Your final answer should be a single line containing x[0], x[1], ..., x[{N_minus_1}], separated by **spaces**. +Example: `{one_to_N}` (do **NOT** include quotes or backticks); this means: x[0] = 1, x[1] = 2, ..., x[{N_minus_1}] = {N}. +""" + + def __init__(self, + coefficient_non_zero_probability : float = 0.5, + wrong_format : float = -1.0, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + super().__init__(**kwargs) + + self.coefficient_non_zero_probability = coefficient_non_zero_probability + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 1, "M should be greater than or equal to 1" + + self.parameter["x"] = [random.randint(1, N) for i in range(N)] + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["x"])) + + equations = self.parameter["equations"] = [] + results = self.parameter["results"] = [] + for m in range(M) : + while True : + equation = [] + for i in range(N) : + if random.random() < self.coefficient_non_zero_probability : + coefficient = random.randint(1, max(1, N // 5)) + else : + coefficient = 0 + equation.append(coefficient) + if any(equation) : + break + equations.append(equation) + results.append(sum(coefficient * xi for coefficient, xi in zip(equation, self.parameter["x"]))) + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + N_minus_1 = self.parameter["N"] - 1, + M = self.parameter["M"], + equations = "\n".join(" + ".join("{} * x[{}]".format(coefficient, i) for i, coefficient in enumerate(equation) if coefficient != 0) + " = {}".format(result) for equation, result in zip(self.parameter["equations"], self.parameter["results"])), + one_to_N = " ".join(map(str, range(1, self.parameter["N"] + 1))), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + x = processed_result + if len(x) != self.parameter["N"] : + return self.rewards["wrong_format"] + + satisfied = sum(int(sum(coefficient * xi for coefficient, xi in zip(equation, x)) == result) for equation, result in zip(self.parameter["equations"], self.parameter["results"])) + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / len(self.parameter["equations"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == len(self.parameter["equations"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/gcd_fibonacci_product/__init__.py b/server/Gym/environments/gcd_fibonacci_product/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b4a834b7bb28b598f2ea9099ac6912bcb04cf344 --- /dev/null +++ b/server/Gym/environments/gcd_fibonacci_product/__init__.py @@ -0,0 +1 @@ +from .environment import GCDFibonacciProduct_Environment diff --git a/server/Gym/environments/gcd_fibonacci_product/environment.py b/server/Gym/environments/gcd_fibonacci_product/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..5e40e33b3023370a9e634a1b73b36d9a7b93ab46 --- /dev/null +++ b/server/Gym/environments/gcd_fibonacci_product/environment.py @@ -0,0 +1,140 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class GCDFibonacciProduct_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3704 + prompt_template = \ +r"""The Fibonacci sequence is defined as follows: f(0) = 0, f(1) = 1, and f(n) = f(n - 1) + f(n - 2) for all n ≥ 2. +Please compute the product of all f(gcd(i, j)) for all pairs (i, j) such that 1 ≤ i ≤ {N} and 1 ≤ j ≤ {M}. Output the result modulo {MOD}.""" + MODs = (666623333, 998244353, 10 ** 9 + 7) + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the GCDFibonacciProduct_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 3, "MAX_N_M should be greater than or equal to 3" + + N = self.parameter["N"] = random.randint(3, MAX_N_M) + M = self.parameter["M"] = random.randint(3, MAX_N_M) + + MOD = self.parameter["MOD"] = random.choice(self.MODs) + + + def init(max_n): + # Linear sieve to compute mu[1..max_n] + is_composite = [False] * (max_n + 1) + primes = [] + mu = [0] * (max_n + 1) + mu[1] = 1 + for i in range(2, max_n + 1): + if not is_composite[i]: + primes.append(i) + mu[i] = -1 + for p in primes: + if i * p > max_n: + break + is_composite[i * p] = True + if i % p == 0: + mu[i * p] = 0 + break + else: + mu[i * p] = -mu[i] + + # f and fr arrays + f = [1] * (max_n + 1) + fr = [1] * (max_n + 1) + + A, B = 1, 0 + for i in range(1, max_n + 1): + # update the alternating Fibonacci-like sequence + B = (A + B) % MOD + A = (B - A) % MOD + # precompute factors + invB = pow(B, MOD - 2, MOD) # modular inverse of B + for j in range(i, max_n + 1, i): + k = j // i + m = mu[k] + # apply to f[j] + if m == -1: + f[j] = f[j] * invB % MOD + elif m == 0: + # multiply by 1 — no change + pass + else: # m == 1 + f[j] = f[j] * B % MOD + # apply to fr[j] + # note: fr uses G[1 - mu[k]] + if m == 1: + fr[j] = fr[j] * invB % MOD + elif m == 0: + pass + else: # m == -1 + fr[j] = fr[j] * B % MOD + + # take prefix products + for i in range(1, max_n + 1): + f[i] = f[i-1] * f[i] % MOD + fr[i] = fr[i-1] * fr[i] % MOD + + return f, fr + + f, fr = init(max(N, M)) + + if N > M: + N, M = M, N + ans = 1 + i = 1 + while i <= N: + divN = N // i + divM = M // i + j = min(N // divN, M // divM) + base = f[j] * fr[i-1] % MOD + exponent = divN * divM + ans = ans * pow(base, exponent, MOD) % MOD + i = j + 1 + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"], MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/gcd_lcm_counting/__init__.py b/server/Gym/environments/gcd_lcm_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..842961f41598c5ae189ee2e351f7f89580bb277e --- /dev/null +++ b/server/Gym/environments/gcd_lcm_counting/__init__.py @@ -0,0 +1 @@ +from .environment import GcdLcmCounting_Environment diff --git a/server/Gym/environments/gcd_lcm_counting/environment.py b/server/Gym/environments/gcd_lcm_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d1d853138149469a8bd7b55f0d9555376f44c89d --- /dev/null +++ b/server/Gym/environments/gcd_lcm_counting/environment.py @@ -0,0 +1,130 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class GcdLcmCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1029 + prompt_template = \ +r"""Find the number of pairs of positive integers `(P, Q)` that satisfy the following conditions: + +1. Both `P` and `Q` are **positive integers**. +2. The **greatest common divisor (GCD)** of `P` and `Q` is **{gcd}**. +3. The **least common multiple (LCM)** of `P` and `Q` is **{lcm}**. + +Your task is to determine how many such pairs `(P, Q)` satisfy **all** of the above conditions. + +Output Format: +Your final answer should be a single integer — the number of valid `(P, Q)` pairs. +Example: `4` (do **NOT** include the backticks or quotes); this means there are 4 valid pairs that meet the criteria. +""" + + def __init__(self, + wrong_format : float = -1.0, not_power_2 : float = 0.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + answer_being_0_probability : float = 0.01, + **kwargs) : + """ + Initialize the GcdLcmCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "not_power_2" : not_power_2, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + self.answer_being_0_probability = answer_being_0_probability + + + def _generate(self) -> None : + assert "MAX_LCM" in self.parameter, "MAX_LCM is required in parameter" + MAX_LCM = self.parameter["MAX_LCM"] + assert MAX_LCM >= 3, "MAX_LCM should be greater than or equal to 3" + + if random.random() < self.answer_being_0_probability : + while True : + LCM = self.parameter["LCM"] = random.randint(1, MAX_LCM) + GCD = self.parameter["GCD"] = random.randint(1, LCM) + if LCM % GCD != 0 : + break + else : + LCM = self.parameter["LCM"] = random.randint(1, MAX_LCM) + def all_factors(n) : + factors = set() + for i in range(1, int(n**0.5) + 1) : + if n % i == 0 : + factors.add(i) + factors.add(n // i) + return factors + factors = all_factors(LCM) + GCD = self.parameter["GCD"] = random.choice(list(set(factors))) + + def solve(gcd, lcm) : + def prime_factorization(n) : + prime2count = {} + for x in range(2, int(n**0.5) + 1) : + while n % x == 0 : + prime2count[x] = prime2count.get(x, 0) + 1 + n //= x + if n > 1 : + prime2count[n] = prime2count.get(n, 0) + 1 + return prime2count + + gcd, lcm = prime_factorization(gcd), prime_factorization(lcm) + + counting = 1 + for p in set(gcd.keys()) | set(lcm.keys()) : + x_count, y_count = gcd.get(p, 0), lcm.get(p, 0) + if x_count > y_count : + counting = 0 + break + if x_count == y_count : + counting *= 1 + else : # x_count < y_count + counting *= 2 + return counting + self.parameter["reference_answer"] = solve(self.parameter["GCD"], self.parameter["LCM"]) + assert (self.parameter["reference_answer"] == 0) == (self.parameter["LCM"] % self.parameter["GCD"] != 0) + + def _prompt_generate(self) -> str : + return self.prompt_template.format(gcd = self.parameter["GCD"], lcm = self.parameter["LCM"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + else : + def power_2(n) : + while n and n % 2 == 0 : + n //= 2 + return n <= 1 + if not power_2(processed_result) : + return self.rewards["not_power_2"] + else : + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/gcd_one_counting/__init__.py b/server/Gym/environments/gcd_one_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8d6d841aed3e5bed3e967b800e9a2b2d4eada421 --- /dev/null +++ b/server/Gym/environments/gcd_one_counting/__init__.py @@ -0,0 +1 @@ +from .environment import GCDOne_Counting_Environment diff --git a/server/Gym/environments/gcd_one_counting/environment.py b/server/Gym/environments/gcd_one_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..5c898ff2d5286ad2fadaf25829e2718a8b5a18ea --- /dev/null +++ b/server/Gym/environments/gcd_one_counting/environment.py @@ -0,0 +1,105 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class GCDOne_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2257 + prompt_template = \ +r"""How many pairs (x, y) satisfy gcd(x, y) being exactly 1, where 1 ≤ x ≤ {N} and 1 ≤ y ≤ {M}? Here, gcd(x, y) denotes the **greatest common divisor** of integers x and y. + +**Output Format:** Your final answer should be a single integer — the number of pairs (x, y) such that x and y are coprime, i.e., gcd(x, y) = 1.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the GCDOne_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N_M) + M = self.parameter["M"] = random.randint(2, MAX_N_M) + + + mu = [0] * (min(N, M) + 1) + mu[1] = 1 + flag = [False] * (min(N, M) + 1) + primes = [] + for i in range(2, min(N, M) + 1) : + if not flag[i] : + primes.append(i) + mu[i] = -1 + for p in primes : + ip = i * p + if ip > min(N, M) : + break + flag[ip] = True + if i % p == 0 : + break + else : + mu[ip] = -mu[i] + + f = [0] * (min(N, M) + 1) + for i in range(1, min(N, M) + 1) : + f[i] = mu[i] + + prefix = [0] * (min(N, M) + 1) + s = 0 + for i in range(1, min(N, M) + 1) : + s += f[i] + prefix[i] = s + + ans = 0 + l = 1 + while l <= N and l <= M : + an = N // l + am = M // l + r = min(N // an, M // am) + ans += (prefix[r] - prefix[l-1]) * an * am + l = r + 1 + + self.parameter["reference_answer"] = ans + assert ans > 0 + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/gcd_prime_counting/__init__.py b/server/Gym/environments/gcd_prime_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ede1c95fbadbd0a038f6439152f24e67fa901a63 --- /dev/null +++ b/server/Gym/environments/gcd_prime_counting/__init__.py @@ -0,0 +1 @@ +from .environment import GCDPrime_Counting_Environment diff --git a/server/Gym/environments/gcd_prime_counting/environment.py b/server/Gym/environments/gcd_prime_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9c401ac45b4f918e48672c391e8958898a8f67e3 --- /dev/null +++ b/server/Gym/environments/gcd_prime_counting/environment.py @@ -0,0 +1,108 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class GCDPrime_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2257 + prompt_template = \ +r"""How many pairs (x, y) satisfy gcd(x, y) being a prime number, where 1 ≤ x ≤ {N} and 1 ≤ y ≤ {M}? Here, gcd(x, y) denotes the **greatest common divisor** of integers x and y. + +**Output Format:** Your final answer should be a single integer — the number of pairs (x, y) such that gcd(x, y) is a prime number.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the GCDPrime_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N_M) + M = self.parameter["M"] = random.randint(2, MAX_N_M) + + + mu = [0] * (min(N, M) + 1) + mu[1] = 1 + flag = [False] * (min(N, M) + 1) + primes = [] + for i in range(2, min(N, M) + 1) : + if not flag[i] : + primes.append(i) + mu[i] = -1 + for p in primes : + ip = i * p + if ip > min(N, M) : + break + flag[ip] = True + if i % p == 0 : + break + else : + mu[ip] = -mu[i] + + f = [0] * (min(N, M) + 1) + for p in primes : + for j in range(1, min(N, M) // p + 1) : + f[j * p] += mu[j] + + prefix = [0] * (min(N, M) + 1) + s = 0 + for i in range(1, min(N, M) + 1) : + s += f[i] + prefix[i] = s + + ans = 0 + l = 1 + while l <= N and l <= M : + an = N // l + am = M // l + r = min(N // an, M // am) + ans += (prefix[r] - prefix[l-1]) * an * am + l = r + 1 + + self.parameter["reference_answer"] = ans + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/gold_washing/__init__.py b/server/Gym/environments/gold_washing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f7f76f75486a2f4676d205fc373f81a797b50cba --- /dev/null +++ b/server/Gym/environments/gold_washing/__init__.py @@ -0,0 +1 @@ +from .environment import GoldWashing_Environment diff --git a/server/Gym/environments/gold_washing/environment.py b/server/Gym/environments/gold_washing/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b3341086ed68eb6d44699c1755ce026e2f83102d --- /dev/null +++ b/server/Gym/environments/gold_washing/environment.py @@ -0,0 +1,204 @@ +import heapq +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class GoldWashing_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3303 + prompt_template = \ +r"""Define f(x) as the product of the digits of x. For example, f(123) = 1 × 2 × 3 = 6. + +Let g(a, b) be the number of pairs (x, y) such that: +1. x, y ∈ [1, {N}] +2. f(x) = a and f(y) = b + +Compute g(a, b) for all 1 ≤ a, b ≤ {N}, then sort all g(a, b) values in non-increasing order. Output the sum of the largest {K} values (i.e., the first {K} values in the sorted list).""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the GoldWashing_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 2, "MAX_N should be greater than or equal to 1" + + N = self.parameter["N"] = random.randint(2, MAX_N) + K = self.parameter["K"] = random.randint(1, N) + + + S = str(N) + n = len(S) + + # 1) Generate all products 2^a * 3^b * 5^c * 7^d <= N + primes = [2, 3, 5, 7] + products = [] + def gen(idx, cur): + if idx == 4: + products.append(cur) + return + p = primes[idx] + x = cur + while x <= N: + gen(idx + 1, x) + x *= p + gen(0, 1) + + prod_list = sorted(products) + M_prime = len(prod_list) + index_of = {v: i for i, v in enumerate(prod_list)} + + # 2) Precompute counts for all lengths < n (numbers without zeros) + # fLen[L][j] = number of L-digit numbers (all digits 1..9) whose digit‐product = prod_list[j] + fLen = [None] * (n + 1) + # length = 1 + f1 = [0] * M_prime + for d in range(1, 10): + if d > N: + break + j = index_of.get(d) + if j is not None: + f1[j] += 1 + fLen[1] = f1 + + for L in range(2, n): + prev = fLen[L - 1] + curr = [0] * M_prime + for j_idx, cnt in enumerate(prev): + if cnt == 0: + continue + base = prod_list[j_idx] + for d in range(1, 10): + newp = base * d + if newp > N: + break + newj = index_of[newp] + curr[newj] += cnt + fLen[L] = curr + + # 3) Digit‐DP for length = n, counting numbers in [1..N] with no zeros + digits = list(map(int, S)) + dp_tight = [0] * M_prime # prefix == N so far + dp_loose = [0] * M_prime # prefix < N so far + dp_tight[index_of[1]] = 1 # product = 1 at start + + for pos in range(n): + new_tight = [0] * M_prime + new_loose = [0] * M_prime + ub = digits[pos] + + # transitions from loose (already < N) + for j_idx, cnt in enumerate(dp_loose): + if cnt == 0: + continue + base = prod_list[j_idx] + for d in range(1, 10): + newp = base * d + if newp > N: + break + newj = index_of[newp] + new_loose[newj] += cnt + + # transitions from tight (== N so far) + if ub > 0: + for j_idx, cnt in enumerate(dp_tight): + if cnt == 0: + continue + base = prod_list[j_idx] + # choose d < ub -> becomes loose + for d in range(1, ub): + newp = base * d + if newp > N: + break + newj = index_of[newp] + new_loose[newj] += cnt + # choose d == ub -> stays tight + newp_eq = base * ub + if newp_eq <= N: + newj_eq = index_of[newp_eq] + new_tight[newj_eq] += cnt + + dp_tight, dp_loose = new_tight, new_loose + + # fBound[j] = count of n-digit numbers <= N, no zeros, product = prod_list[j] + fBound = [dp_tight[i] + dp_loose[i] for i in range(M_prime)] + + # 4) Total counts A[j] = sum over lengths 1..n-1 plus fBound for length n + A = fBound[:] # copy + for L in range(1, n): + row = fLen[L] + for j_idx, cnt in enumerate(row): + if cnt: + A[j_idx] += cnt + + # 5) We have sums A[j]; sort them ascending + sums = sorted(A) + + # 6) Take the top K products from the multiset { sums[i]*sums[j] } + # using a max-heap over pairs (i, j) with i, j in [0..M'-1]. + if K > M_prime * M_prime: + K = M_prime * M_prime + + heap = [] + last = M_prime - 1 + for i in range(M_prime): + # push initial pair (i, last) + heap.append((-sums[i] * sums[last], i, last)) + heapq.heapify(heap) + + ans = 0 + for _ in range(K): + negval, i, j = heapq.heappop(heap) + val = -negval + ans += val + if j > 0: + new_pair = sums[i] * sums[j - 1] + heapq.heappush(heap, (-new_pair, i, j - 1)) + + assert ans > 0 + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/gra_minima_game/__init__.py b/server/Gym/environments/gra_minima_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9471ba01721e47a9989949d818f06bad53c17317 --- /dev/null +++ b/server/Gym/environments/gra_minima_game/__init__.py @@ -0,0 +1 @@ +from .environment import GraMinimaGame_Environment diff --git a/server/Gym/environments/gra_minima_game/environment.py b/server/Gym/environments/gra_minima_game/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..298e3567a9fe0626a041e29559dee93ff6d42d75 --- /dev/null +++ b/server/Gym/environments/gra_minima_game/environment.py @@ -0,0 +1,68 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class GraMinimaGame_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3507 + prompt_template = \ +r"""There are {N} numbers: {A} +Alice and Bob are playing a game with these numbers. Alice goes first, and they take turns. On each turn, a player may choose any **non-empty subset** of the remaining numbers, add the **minimum** of that subset to their score, and then remove the entire subset from the game. The game ends when there are no numbers left. +Each player plays optimally to maximize **their score minus their opponent's score**. Please compute the final value of (Alice's score − Bob's score).""" + + def __init__(self, + wrong_format : float = -1.0, float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the GraMinimaGame_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + A = self.parameter["A"] = [random.randint(1, N * 2) for _ in range(N)] + + A = sorted(A) + Ans = 0 + for a in A : + Ans = max(Ans, a - Ans) + self.parameter["reference_answer"] = Ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = " ".join(map(str, self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/grade_ranking_counting/__init__.py b/server/Gym/environments/grade_ranking_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..edf663970052825e752243a116ad8e9c11fa06fb --- /dev/null +++ b/server/Gym/environments/grade_ranking_counting/__init__.py @@ -0,0 +1 @@ +from .environment import GradeRankingCounting_Environment diff --git a/server/Gym/environments/grade_ranking_counting/environment.py b/server/Gym/environments/grade_ranking_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..3d1ff1aca5c664447fff97d75a272042c269a9f1 --- /dev/null +++ b/server/Gym/environments/grade_ranking_counting/environment.py @@ -0,0 +1,219 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class GradeRankingCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3270 + prompt_template = \ +r"""Count the number of matrices A of size {N} × {M} (0-indexed) that satisfy the following conditions: +1. Each element A[i][j] (0 ≤ i < {N}, 0 ≤ j < {M}) is an integer in the range [1, U[j]]. U is: {U} +2. For each column j (0 ≤ j < {M}), there are exactly R[j] rows i (1 ≤ i < {N}) such that A[i][j] > A[0][j]. R is: {R} +3. There are exactly {K} rows i (1 ≤ i < {N}) such that A[0][j] ≥ A[i][j] holds for **all** j (0 ≤ j < {M}). + +Output the number of such matrices modulo {MOD}.""" + + MODs = (10 ** 9 + 7, 998244353) + + def __init__(self, + wrong_format: float = -1.0, wrong_range: float = -0.5, correct_answer: float = +1.0, wrong_answer: float = 0.0, + **kwargs): + """ + Initialize the GradeRankingCountingProblem instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "wrong_range": wrong_range, + "correct_answer": correct_answer, + "wrong_answer": wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + MOD = self.parameter["MOD"] = random.choice(self.MODs) + + A = [[None] * M for _ in range(N)] + losers = set(random.sample(range(1, N), k = random.randint(0, N - 1))) + U = self.parameter["U"] = [random.randint(1, N) for j in range(M)] + R = self.parameter["R"] = [0] * M + for j in range(M) : + A[0][j] = random.randint(1, U[j]) + for i in range(1, N) : + if i in losers : + A[i][j] = random.randint(1, A[0][j]) + else : + A[i][j] = random.randint(1, U[j]) + R[j] += int(A[i][j] > A[0][j]) + K = self.parameter["K"] = sum(int(all(A[0][j] >= A[i][j] for j in range(M))) for i in range(1, N)) + assert K >= len(losers), "K should be at least the number of losers" + + + # ---------- basic combinatorics ---------- + def prepare_factorials(limit: int): + """pre-compute factorials and inverse factorials up to (inclusive)""" + fact = [1] * (limit + 1) + for i in range(1, limit + 1): + fact[i] = fact[i - 1] * i % MOD + inv_fact = [1] * (limit + 1) + inv_fact[limit] = pow(fact[limit], MOD - 2, MOD) + for i in range(limit, 0, -1): + inv_fact[i - 1] = inv_fact[i] * i % MOD + return fact, inv_fact + + + def C(n: int, k: int) -> int: + if k < 0 or k > n: + return 0 + return FACT[n] * INV_FACT[k] % MOD * INV_FACT[n - k] % MOD + + + # ---------- Σ k^p for huge k (Faulhaber via Lagrange) ---------- + def power_sum(p: int, n: int) -> int: + """ + S_p(n) = Σ_{k=1..n} k^p (0 ≤ p ≤ 2N ≈ 200, n may be 1e9) + evaluated in O(p) with Lagrange interpolation over equally–spaced nodes 0 … p+1 + """ + if n == 0: + return 0 + d = p + 1 # degree of the polynomial + if n <= d: # tiny n – direct loop is faster + s = 0 + for k in range(1, n + 1): + s = (s + pow(k, p, MOD)) % MOD + return s + + # pre-compute y[i] = Σ_{k=1..i} k^p for i = 0 … d (0 ≤ d ≤ 200) + y = [0] * (d + 1) + partial = 0 + for i in range(1, d + 1): + partial = (partial + pow(i, p, MOD)) % MOD + y[i] = partial + + x = n % MOD + + # total product P := Π_{j=0..d} (x − j) + P = 1 + for j in range(d + 1): + P = P * ((x - j) % MOD) % MOD + + # Lagrange + res = 0 + for i in range(d + 1): + # numerator = P / (x - i) + num = P * pow((x - i) % MOD, MOD - 2, MOD) % MOD + + # denominator = (-1)^{d-i} · i! · (d-i)! + sign = MOD - 1 if (d - i) & 1 else 1 + denom_inv = sign * INV_FACT[i] % MOD * INV_FACT[d - i] % MOD + + res = (res + y[i] * num % MOD * denom_inv) % MOD + return res + + + # ---------- single course contribution ---------- + def course_contribution(U_i: int, A_i: int, N: int) -> int: + """ + A_i students must be strictly above B in this course + B_i = N-1-A_i students are ≤ B. + f_i = Σ_{S=1..U_i} (U_i-S)^{A_i} · S^{B_i} + = Σ_{j=0..A_i} (-1)^j C(A_i,j) U_i^{A_i-j} · Σ_{k=1..U_i} k^{B_i+j} + """ + B_i = N - 1 - A_i + V = U_i + res = 0 + for j in range(A_i + 1): + coeff = C(A_i, j) + if j & 1: # (-1)^j + coeff = MOD - coeff + term = coeff * pow(V, A_i - j, MOD) % MOD + term = term * power_sum(B_i + j, V) % MOD + res = (res + term) % MOD + return res + + + # ---------- inclusion–exclusion over dominated students ---------- + def pattern_count(N: int, K: int, A_list): + """ + Count ways to pick, for every course i, a subset of size A_i + (taken from the S = N-1-K non-dominated students) + so that every non-dominated student appears ≥1 time. + """ + S = N - 1 - K + total = 0 + for t in range(S + 1): # t = number of non-dominated students *omitted* + if t: # early bailout for impossible A_i > S-t + ok = all(A <= S - t for A in A_list) + if not ok: + continue + prod = 1 + for A in A_list: + prod = prod * C(S - t, A) % MOD + term = C(S, t) * prod % MOD + if t & 1: + total = (total - term) % MOD + else: + total = (total + term) % MOD + # finally multiply by ways to choose which K students are dominated + total = total * C(N - 1, K) % MOD + return total + + R = [r + 1 for r in R] + # factorials up to 2N ≈ 200 cover everything (exponents ≤ 2N-1) + MAX_F = 2 * N + 2 + FACT, INV_FACT = prepare_factorials(MAX_F) + + # per-course numeric factor f_i + F_product = 1 + A_list = [] + for i in range(M): + A_i = R[i] - 1 # number strictly above B + A_list.append(A_i) + F_product = F_product * course_contribution(U[i], A_i, N) % MOD + + # combinatorial patterns for the “> B” sets + PATTERNS = pattern_count(N, K, A_list) + + answer = F_product * PATTERNS % MOD + self.parameter["reference_answer"] = answer + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + MOD = self.parameter["MOD"], + K = self.parameter["K"], + U = " ".join("U[{}]={}".format(j, Uj) for j, Uj in enumerate(self.parameter["U"])), + R = " ".join("R[{}]={}".format(j, Rj) for j, Rj in enumerate(self.parameter["R"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/graph_contain_tree_counting/__init__.py b/server/Gym/environments/graph_contain_tree_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6395cf8a1a110d52edcabb079ad05a88fd875644 --- /dev/null +++ b/server/Gym/environments/graph_contain_tree_counting/__init__.py @@ -0,0 +1 @@ +from .environment import GraphContainTreeCounting_Environment diff --git a/server/Gym/environments/graph_contain_tree_counting/environment.py b/server/Gym/environments/graph_contain_tree_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..02cd702a6ed99c8ac4869d75ce5cd7e1355d472f --- /dev/null +++ b/server/Gym/environments/graph_contain_tree_counting/environment.py @@ -0,0 +1,201 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class GraphContainTreeCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3349 + prompt_template = \ +r"""You are given an **undirected graph** G and a **tree** T, each with {N} vertices labeled from `0` to `{N_minus_1}`. + +- Graph G has the following undirected edge set E1: +{G_edges} + +- Tree T has the following undirected edge set E2: +{T_edges} + +Please compute the number of **bijections** `p` (i.e., permutations) from the vertices of T to the vertices of G such that: for every edge `(u, v)` in E2, the edge `(p(u), p(v))` exists in E1. + +**Output Format:** A single integer representing the number of valid bijections.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the GraphContainTreeCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + + edges = self.parameter["T_edges"] = [] + permutation = list(range(N)) + random.shuffle(permutation) + for index, vertex in enumerate(permutation) : + if index == 0 : + continue + u, v = vertex, random.choice(permutation[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)) == N - 1 + + + edges = self.parameter["G_edges"] = [] + random.shuffle(permutation) + for u, v in self.parameter["T_edges"] : + u, v = permutation[u], permutation[v] + if u > v : + u, v = v, u + edges.append((u, v)) + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + + num_edges = int(edge_density * N * (N - 1) / 2) + if len(edges) < num_edges : + remaining_edges = list(set((u, v) for u in range(N) for v in range(u + 1, N)) - set(edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + edges += remaining_edges + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + edges = None + + + G = [[False] * N for _ in range(N)] + for u, v in self.parameter["G_edges"]: + G[u][v] = G[v][u] = True + + # Remaining tree edges + ADJ = [[] for _ in range(N)] + for u, v in self.parameter["T_edges"]: + ADJ[u].append(v) + ADJ[v].append(u) + + # vis[i] indicates whether original vertex i is selected in the current subset + vis = [False] * N + + # f[u][x] will hold the number of ways to map the subtree of the current tree, + # rooted at u, when u maps to original-graph vertex x + f = [[0] * N for _ in range(N)] + + ans = 0 # final answer accumulator + + def dfs(u, parent, whi): + """ + Perform the DP on the tree: + For each node u, and for each selected original-graph vertex x in whi, + compute f[u][x] = product over children v of (sum over y in whi & G[x][y] of f[v][y]). + """ + for v in ADJ[u]: + if v == parent: + continue + dfs(v, u, whi) + + # Now compute f[u][x] for each x in the current subset + for x in whi: + f[u][x] = 1 + for v in ADJ[u]: + if v == parent: + continue + total = 0 + for y in whi: + if G[x][y]: + total += f[v][y] + f[u][x] *= total + + def solve(): + """ + For the current subset of original-graph vertices (marked by vis), + collect them in whi[], run the tree-DP rooted at 0, + then add or subtract from ans according to the parity of N - |whi|. + """ + nonlocal ans + whi = [i for i in range(N) if vis[i]] + dfs(0, -1, whi) + + # Inclusion–exclusion: subtract if (N - |whi|) is odd, else add + if (N - len(whi)) & 1: + for x in whi: + ans -= f[0][x] + else: + for x in whi: + ans += f[0][x] + + def enumerate_subsets(dep=0): + """ + Recursively enumerate all subsets of {0,1,...,N-1} by toggling vis[dep]. + When dep == N, process the current subset. + """ + if dep == N: + solve() + return + # Exclude dep + vis[dep] = False + enumerate_subsets(dep + 1) + # Include dep + vis[dep] = True + enumerate_subsets(dep + 1) + + # Kick off the subset enumeration and DP + enumerate_subsets() + + # Output the final count + assert ans > 0 + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + G_edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["G_edges"]), + T_edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["T_edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/graph_isomorphism/__init__.py b/server/Gym/environments/graph_isomorphism/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e8ff93bbbcc88017858f0a7041b020b74c3b263b --- /dev/null +++ b/server/Gym/environments/graph_isomorphism/__init__.py @@ -0,0 +1 @@ +from .environment import GraphIsomorphism_Environment diff --git a/server/Gym/environments/graph_isomorphism/environment.py b/server/Gym/environments/graph_isomorphism/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..75ba80c082eae3823783bf4c19160c1283111b6c --- /dev/null +++ b/server/Gym/environments/graph_isomorphism/environment.py @@ -0,0 +1,136 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class GraphIsomorphism_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given two **undirected graphs**, G1 and G2, each with {N} vertices labeled from `0` to `{N_minus_1}`. Both graphs contain exactly {M} **undirected** edges. + +- Graph G1 has the following (undirected) edge set E1: +{G1_edges} + +- Graph G2 has the following (undirected) edge set E2: +{G2_edges} + +Your task is to find a **bijection** (i.e., a permutation) `p` from the vertices of G1 to the vertices of G2 such that: For every edge `(u, v)` in E1, the edge `(p(u), p(v))` exists in E2, and vice versa. + +**Output Format:** Your final answer should be a single line containing the permutation `p(0), p(1), ..., p({N_minus_1})`, separated by spaces. Example: `{reversed_permutation}` (do **NOT** include backticks or quotes); this means `p(0) = {N_minus_1}, ..., p({N_minus_1}) = 0`.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(overlap/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the GraphIsomorphism_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 < edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + assert int(edge_density * N * (N - 1) / 2) > 0 + + G1_edges = self.parameter["G1_edges"] = random.sample([(u, v) for u in range(N) for v in range(u + 1, N)], int(edge_density * N * (N - 1) / 2)) + random.shuffle(G1_edges) + + mapping = list(range(N)) + random.shuffle(mapping) + G2_edges = self.parameter["G2_edges"] = [] + for u, v in G1_edges : + u, v = mapping[u], mapping[v] + if u > v : + u, v = v, u + G2_edges.append((u, v)) + random.shuffle(G2_edges) + + for edges in (G1_edges, G2_edges) : + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + self.parameter["reference_answer"] = " ".join(map(str, mapping)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + G1_edges, G2_edges = self.parameter["G1_edges"], self.parameter["G2_edges"] + assert len(G1_edges) == len(G2_edges), "G1_edges and G2_edges should have the same length" + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + M = len(G1_edges), + G1_edges = "\n".join("({}, {})".format(u, v) for u, v in G1_edges), + G2_edges = "\n".join("({}, {})".format(u, v) for u, v in G2_edges), + reversed_permutation = " ".join(map(str, range(N - 1, -1, -1))), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + permutation = processed_result + if len(permutation) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if len(set(permutation)) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(0 <= i < self.parameter["N"] for i in permutation) : + return self.rewards["invalid_solution"] + + new_G2_edges = set() + for u, v in self.parameter["G1_edges"] : + u, v = permutation[u], permutation[v] + if u > v : + u, v = v, u + new_G2_edges.add((u, v)) + assert len(new_G2_edges) == len(self.parameter["G1_edges"]), "new_G2_edges should have the same length as G1_edges" + overlap = len(new_G2_edges & set(map(tuple, self.parameter["G2_edges"]))) + assert overlap <= len(self.parameter["G2_edges"]), "overlap should be less than or equal to len(G2_edges)" + + # ---------------------------------------- Sanity Check ---------------------------------------- + G1_edge_set, G2_edges_set = set(map(tuple, self.parameter["G1_edges"])), set(map(tuple, self.parameter["G2_edges"])) + unsatisfied = 0 + for u in range(self.parameter["N"]) : + for v in range(u + 1, self.parameter["N"]) : + G2_u, G2_v = permutation[u], permutation[v] + if G2_u > G2_v : + G2_u, G2_v = G2_v, G2_u + unsatisfied += int(((u, v) in G1_edge_set) != ((G2_u, G2_v) in G2_edges_set)) + assert unsatisfied == (len(self.parameter["G2_edges"]) - overlap) * 2 + # ---------------------------------------- Sanity Check ---------------------------------------- + + if self.rewards["rewarding_strategy"] == "(overlap/all)^beta" : + return self.rewards["rewarding_weight"] * ((overlap / len(self.parameter["G2_edges"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "overlap=all" : + return self.rewards["rewarding_weight"] * (overlap == len(self.parameter["G2_edges"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/grid_bfs/__init__.py b/server/Gym/environments/grid_bfs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..83397bb800419184b247275a4fbc20ccd4b5c9fa --- /dev/null +++ b/server/Gym/environments/grid_bfs/__init__.py @@ -0,0 +1 @@ +from .environment import GridBFS_Environment diff --git a/server/Gym/environments/grid_bfs/environment.py b/server/Gym/environments/grid_bfs/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..87d6c753e911c21d6488a70366c15d591ea5336a --- /dev/null +++ b/server/Gym/environments/grid_bfs/environment.py @@ -0,0 +1,106 @@ +import random +from collections import deque +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class GridBFS_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} grid. Each cell contains `0`, `1`, or `X`. For each cell, compute its **shortest distance** to any cell containing `1`, where distance is defined as the minimum number of steps required to move from one cell to another under the following rules: +1. You may move **up**, **down**, **left**, or **right** to an adjacent cell. +2. You **cannot** move through cells containing `X`. +3. If a cell **cannot reach** any `1`, its distance should be -1. +4. Obviously, the distance for a `1` cell is 0; the distance for an `X` cell is also -1. + +The grid is given as follows: +{grid} + +**Output Format:** Output {N} lines, each containing {M} integers (separated by spaces), representing the distance of each cell to the nearest `1` cell.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the GridBFS_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + + cell_distribution = [random.randint(1, N * M) for _ in range(3)] + cell_distribution = [x / sum(cell_distribution) for x in cell_distribution] + grid = self.parameter["grid"] = [[random.choices(["0", "1", "X"], weights = cell_distribution)[0] for _ in range(M)] for _ in range(N)] + + distances = self.parameter["gold_answer"] = [[-1] * M for _ in range(N)] + queue = deque() + for i in range(N) : + for j in range(M) : + if grid[i][j] == "1" : + distances[i][j] = 0 + queue.append((i, j)) + while queue : + x, y = queue.popleft() + for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)] : + nx, ny = x + dx, y + dy + if 0 <= nx < N and 0 <= ny < M and grid[nx][ny] != "X" and distances[nx][ny] == -1 : + distances[nx][ny] = distances[x][y] + 1 + queue.append((nx, ny)) + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, row)) for row in distances) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + grid = "\n".join("".join(row) for row in self.parameter["grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(list(map(int, line.split()))) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + distance = processed_result + if len(distance) != self.parameter["N"] : + return self.rewards["wrong_format"] + if not all(len(row) == self.parameter["M"] for row in distance) : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(sum(answer == gold for answer, gold in zip(answer_row, gold_row)) for answer_row, gold_row in zip(distance, self.parameter["gold_answer"])) / (self.parameter["N"] * self.parameter["M"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return distance == self.parameter["gold_answer"] + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/grid_coloring_counting/__init__.py b/server/Gym/environments/grid_coloring_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..25ffc24b61e6eb2b5d4002d3d392ed0d39cff1ba --- /dev/null +++ b/server/Gym/environments/grid_coloring_counting/__init__.py @@ -0,0 +1 @@ +from .environment import GridColoringCounting_Environment diff --git a/server/Gym/environments/grid_coloring_counting/environment.py b/server/Gym/environments/grid_coloring_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..ba2458b3b033759f9790f2e0fa9c778a7525ed22 --- /dev/null +++ b/server/Gym/environments/grid_coloring_counting/environment.py @@ -0,0 +1,146 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class GridColoringCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3158 + prompt_template = \ +r"""You are given a grid of size {N} × {M}. You may color some cells (and leave others uncolored) using {C} colors labeled from 0 to {C_minus_1}, such that: +1. No two different colors appear in the same row or the same column. +2. Color `i` is used exactly X[i] times. The array X is given as: {Xs} + +Please compute the number of valid colorings modulo {MOD}.""" + + def __init__(self, + max_MOD : int = 10000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the GridColoringCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + while True : + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + sum_X = random.randint(1, N * M) + C = self.parameter["C"] = random.randint(1, min(N, M, sum_X)) + + deltas = random.sample(range(1, sum_X), C - 1) + deltas.sort() + deltas = [0] + deltas + [sum_X] + self.parameter["Xs"] = Xs = [deltas[i + 1] - deltas[i] for i in range(C)] + assert len(Xs) == C and all(x > 0 for x in Xs), "Xs should be a non-empty list of positive integers" + + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + # Precompute binomial coefficients up to N*M + total_cells = N * M + comb = [[0] * (total_cells + 1) for _ in range(total_cells + 1)] + for i in range(total_cells + 1): + comb[i][0] = 1 + for j in range(1, i + 1): + comb[i][j] = (comb[i - 1][j] + comb[i - 1][j - 1]) % MOD + + # f[i][j][k]: number of ways to place first k colors into an i×j subboard + f = [[[0] * (C + 1) for _ in range(M + 1)] for __ in range(N + 1)] + f[0][0][0] = 1 + + # Process each color one by one + for k in range(1, C + 1): + x = Xs[k - 1] + # g[a][b]: number of ways to place x pieces of this color into an a×b rectangle + # so that every row and column used by it has at least one piece, + # by inclusion–exclusion + g = [[0] * (M + 1) for _ in range(N + 1)] + for a in range(1, N + 1): + for b in range(1, M + 1): + if a * b < x: + continue + # total ways to choose x squares out of a*b + val = comb[a * b][x] + # subtract configurations that leave an unused border row or column + for la in range(1, a + 1): + for lb in range(1, b + 1): + if la < a or lb < b: + val -= g[la][lb] * comb[a][la] * comb[b][lb] + g[a][b] = val % MOD + + # Transition: add this color's placements to all previous subboards + for i in range(1, N + 1): + for j in range(1, M + 1): + # split the i×j board into an l×r part (already filled with k−1 colors) + # and a (i−l)×(j−r) part filled with k-th color + for l in range(i): + for r in range(j): + ti, tj = i - l, j - r + if ti * tj < x: + continue + ways = ( + f[l][r][k - 1] + * g[ti][tj] + * comb[N - l][ti] + * comb[M - r][tj] + ) % MOD + f[i][j][k] = (f[i][j][k] + ways) % MOD + + # Sum over all non-empty subboards + answer = 0 + for i in range(1, N + 1): + for j in range(1, M + 1): + answer = (answer + f[i][j][C]) % MOD + + if answer > 0 : + self.parameter["reference_answer"] = answer + break + + + def _prompt_generate(self) -> str : + C = self.parameter["C"] + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + C = C, + C_minus_1 = C - 1, + Xs = " ".join("X[{}]={}".format(i, x) for i, x in enumerate(self.parameter["Xs"])), + MOD = self.parameter["MOD"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/grid_component/__init__.py b/server/Gym/environments/grid_component/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6364142356ec810515efab8fdef1530c7e28bf1b --- /dev/null +++ b/server/Gym/environments/grid_component/__init__.py @@ -0,0 +1 @@ +from .environment import GridComponent_Environment diff --git a/server/Gym/environments/grid_component/environment.py b/server/Gym/environments/grid_component/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..cfdfbee379c00669058b341efe71a1378e24a9fe --- /dev/null +++ b/server/Gym/environments/grid_component/environment.py @@ -0,0 +1,96 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class GridComponent_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} grid. Each cell contains either `0` or `1`. Please compute the **largest connected component** of `1`s in the grid, where a connected component is defined as a group of `1` cells that are reachable from each other by moving **up**, **down**, **left**, or **right** to an adjacent `1` cell. + +The grid is given as follows: +{grid} + +**Output Format:** Output a single integer — the size of the largest connected component (i.e., the number of `1`s in it). If there are no `1`s in the grid, output `0`. +""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the GridComponent_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + + one_probability = random.uniform(0.1, 0.9) + grid = self.parameter["grid"] = ["".join("01"[random.random() < one_probability] for _ in range(M)) for _ in range(N)] + + labels = [[0] * M for _ in range(N)] + def DFS(x, y) : + stack = [(x, y)] + while stack : + x, y = stack.pop() + for dx, dy in [(-1, 0), (+1, 0), (0, -1), (0, +1)] : + nx, ny = x + dx, y + dy + if 0 <= nx < N and 0 <= ny < M and grid[nx][ny] == "1" : + if labels[nx][ny] == 0 : + labels[nx][ny] = labels[x][y] + stack.append((nx, ny)) + else : + assert labels[nx][ny] == labels[x][y], "Labels should match for connected components" + total = 0 + counting = [0] + for x in range(N) : + for y in range(M) : + if grid[x][y] == "1" : + if labels[x][y] == 0 : + total += 1 + counting.append(0) + labels[x][y] = total + DFS(x, y) + counting[labels[x][y]] += 1 + self.parameter["reference_answer"] = max(counting) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + grid = "\n".join(self.parameter["grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/grid_local_minimum_counting/__init__.py b/server/Gym/environments/grid_local_minimum_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fd96533220d26d14c0899be897fc0731fa158f72 --- /dev/null +++ b/server/Gym/environments/grid_local_minimum_counting/__init__.py @@ -0,0 +1 @@ +from .environment import GridLocalMinimumCounting_Environment diff --git a/server/Gym/environments/grid_local_minimum_counting/environment.py b/server/Gym/environments/grid_local_minimum_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..43baed7a4f7abc90b804f3b8644283c176ab067b --- /dev/null +++ b/server/Gym/environments/grid_local_minimum_counting/environment.py @@ -0,0 +1,189 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class GridLocalMinimumCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3160 + prompt_template = \ +r"""Consider a grid of size {N} × {M}, where the numbers from 1 to {N} × {M} are placed in the cells such that **each number appears exactly once**. +A cell is considered a local minimum if its value is strictly less than all of its 8 neighbors (adjacent vertically, horizontally, or diagonally); if a neighbor does not exist, it is considered to be infinitely large. You are given a grid of size {N} × {M} where some cells are marked with `X` and others with `.`. Please count how many valid numberings exist such that the local minima are **exactly** those marked with `X`. The grid is given as follows: +{grid} + +**Output Format:** Output a single integer — the number of valid labelings.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the GridLocalMinimumCountingProblem instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + + permutation = list(range(1, N * M + 1)) + random.shuffle(permutation) + def get_num(i, j) : + return permutation[i * M + j] + self.parameter["grid"] = grid = [['.'] * M for _ in range(N)] + for i in range(N) : + for j in range(M) : + local_minimum = True + for dx, dy in [(-1, -1), (-1, 0), (-1, +1), (0, -1), (0, +1), (+1, -1), (+1, 0), (+1, +1)] : + ni, nj = i + dx, j + dy + if 0 <= ni < N and 0 <= nj < M and get_num(ni, nj) <= get_num(i, j) : + local_minimum = False + break + if local_minimum : + grid[i][j] = 'X' + + + def compute(raw): + # Build boolean map of required local minima + grid = [[(raw[i][j] == 'X') for j in range(M)] for i in range(N)] + + # Quick invalid check: no two required 'X's may be adjacent (including diagonals) + for i in range(N): + for j in range(M): + if grid[i][j]: + for di in (-1, 0, 1): + for dj in (-1, 0, 1): + if di == 0 and dj == 0: + continue + ni, nj = i + di, j + dj + if 0 <= ni < N and 0 <= nj < M and grid[ni][nj]: + assert False, "Invalid grid: two local minima are adjacent" + return + + ans = 0 + + def inrange(x, y): + return 0 <= x < N and 0 <= y < M + + def calc(): + """ + For the current grid of local-minima flags, use inclusion-exclusion DP + to count the number of labelings of the N*M cells so that exactly these + cells are local minima. + """ + pos = [(i, j) for i in range(N) for j in range(M) if grid[i][j]] + cntX = len(pos) + total = N * M + + # dp[used_cells][subset_mask] + # We need rows up to total+1 because we transition from i=total -> i+1=total+1 + dp = [[0] * (1 << cntX) for _ in range(total + 2)] + dp[0][0] = 1 + + for s in range(1 << cntX): + # mark all cells "blocked" by the minima NOT in subset s + blocked = [[False] * M for _ in range(N)] + free_cells = total + for k in range(cntX): + if not (s & (1 << k)): + x, y = pos[k] + for di in (-1, 0, 1): + for dj in (-1, 0, 1): + ni, nj = x + di, y + dj + if inrange(ni, nj) and not blocked[ni][nj]: + blocked[ni][nj] = True + free_cells -= 1 + + for used in range(free_cells + 1): + v = dp[used][s] + if not v: + continue + # place a non-min in one of the remaining free cells + dp[used + 1][s] += v * (free_cells - used) + # or turn one of the excluded minima into an actual minima + for k in range(cntX): + if not (s & (1 << k)): + dp[used + 1][s | (1 << k)] += v + + # We want all total cells assigned, and all minima chosen + return dp[total][(1 << cntX) - 1] + + def dfs(i, j, sign): + nonlocal ans + if i == N: + ans += sign * calc() + return + + # move to next cell + ni, nj = (i, j + 1) if j + 1 < M else (i + 1, 0) + + # option 1: don't add a minima here + dfs(ni, nj, sign) + + # option 2: if this cell is not already a minima, and none of its neighbors is one, we can add it + if not grid[i][j]: + ok = True + for di in (-1, 0, 1): + for dj in (-1, 0, 1): + if di == 0 and dj == 0: + continue + ai, aj = i + di, j + dj + if inrange(ai, aj) and grid[ai][aj]: + ok = False + break + if not ok: + break + if ok: + grid[i][j] = True + dfs(ni, nj, -sign) + grid[i][j] = False + + dfs(0, 0, 1) + assert ans > 0 + return ans + self.parameter["reference_answer"] = compute(grid) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + grid = "\n".join("".join(row) for row in self.parameter["grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/grid_parity_construction/__init__.py b/server/Gym/environments/grid_parity_construction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..faed452358539f9ae9422834fe007550ff4dcaa5 --- /dev/null +++ b/server/Gym/environments/grid_parity_construction/__init__.py @@ -0,0 +1 @@ +from .environment import GridParityConstruction_Environment diff --git a/server/Gym/environments/grid_parity_construction/environment.py b/server/Gym/environments/grid_parity_construction/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f1afb09d39629e6e22bd75d879cf795f8cbc07fd --- /dev/null +++ b/server/Gym/environments/grid_parity_construction/environment.py @@ -0,0 +1,109 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class GridParityConstruction_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Please construct a {N} × {M} binary matrix (i.e., a matrix where each cell is either 0 or 1) such that its **parity matrix** is: +{parity} + +**Definition (Parity Matrix):** For each cell (i, j), its parity is the XOR of the cell’s value and the values of its four neighbors (up, down, left, right). A neighbor outside the grid is treated as 0. + +**Output Format:** Output {N} lines, each with {M} characters (each '0' or '1'), without separators. The format must match the input: one line per row.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the GridParityConstruction_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + + one_probability = random.random() + grid = ["".join("01"[random.random() < one_probability] for _ in range(M)) for _ in range(N)] + self.parameter["reference_answer"] = "\n".join("".join(map(str, row)) for row in grid) + + parity = self.parameter["parity"] = [[0] * M for _ in range(N)] + for i in range(N) : + for j in range(M) : + parity[i][j] ^= int(grid[i][j]) + for di, dj in [(-1, 0), (+1, 0), (0, -1), (0, +1)] : + ni, nj = i + di, j + dj + if 0 <= ni < N and 0 <= nj < M : + parity[i][j] ^= int(grid[ni][nj]) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + parity = "\n".join("".join(map(str, row)) for row in self.parameter["parity"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(line.strip()) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N, M = self.parameter["N"], self.parameter["M"] + grid = processed_result + + if len(grid) != N or any(len(row) != M for row in grid) : + return self.rewards["wrong_format"] + for row in grid : + if not all(c in "01" for c in row) : + return self.rewards["wrong_format"] + + parity = [[0] * M for _ in range(N)] + for i in range(N) : + for j in range(M) : + parity[i][j] ^= int(grid[i][j]) + for di, dj in [(-1, 0), (+1, 0), (0, -1), (0, +1)] : + ni, nj = i + di, j + dj + if 0 <= ni < N and 0 <= nj < M : + parity[i][j] ^= int(grid[ni][nj]) + + satisfied = sum(int(parity[i][j] == self.parameter["parity"][i][j]) for i in range(N) for j in range(M)) + assert satisfied <= N * M, "satisfied should be less than or equal to N * M" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / (N * M)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == (N * M)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/grid_triangle_counting/__init__.py b/server/Gym/environments/grid_triangle_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6ab8b10a521ff2bd87eeb256750366e84c272464 --- /dev/null +++ b/server/Gym/environments/grid_triangle_counting/__init__.py @@ -0,0 +1 @@ +from .environment import GridTriangleCounting_Environment diff --git a/server/Gym/environments/grid_triangle_counting/environment.py b/server/Gym/environments/grid_triangle_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..6559e0055f22ee2821f416bfbe0e8df1a7795e8f --- /dev/null +++ b/server/Gym/environments/grid_triangle_counting/environment.py @@ -0,0 +1,109 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class GridTriangleCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3166 + prompt_template = r"""How many non-degenerate triangles have all three vertices located at integer coordinate points (x, y) where 0 ≤ x ≤ {N} and 0 ≤ y ≤ {M}?""" + + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the GridTriangleCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 1, "MAX_N_M must be at least 1" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(1, MAX_N_M), random.randint(1, MAX_N_M) + + + if N > M: + N, M = M, N + + # Sieve to compute phi up to N + phi = [0] * (N + 1) + mark = [False] * (N + 1) + primes = [] + phi[1] = 1 + for i in range(2, N + 1): + if not mark[i]: + primes.append(i) + phi[i] = i - 1 + for p in primes: + ip = i * p + if ip > N: + break + mark[ip] = True + if i % p == 0: + phi[ip] = phi[i] * p + break + else: + phi[ip] = phi[i] * (p - 1) + + # Combination function C(x, 3) = x*(x-1)*(x-2)/6 + def C(x): + return x * (x - 1) * (x - 2) // 6 + + # Compute the contribution from degenerate (colinear) triples + degenerate = 0 + for d in range(2, N + 1): + term = phi[d] + term *= (N - d + N % d + 2) * (N // d) + term *= (M - d + M % d + 2) * (M // d) + degenerate += term // 2 + + # Total number of triples of points minus colinear ones + total_points = (N + 1) * (M + 1) + total_triples = C(total_points) + subtract_N_lines = (M + 1) * C(N + 1) + subtract_M_lines = (N + 1) * C(M + 1) + + self.parameter["reference_answer"] = total_triples - subtract_N_lines - subtract_M_lines - degenerate + assert self.parameter["reference_answer"] > 0 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/halving_chain_counting/__init__.py b/server/Gym/environments/halving_chain_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9b79a008d702594a33509a2a8ee1902f33c7f8d3 --- /dev/null +++ b/server/Gym/environments/halving_chain_counting/__init__.py @@ -0,0 +1 @@ +from .environment import HalvingChainCounting_Environment diff --git a/server/Gym/environments/halving_chain_counting/environment.py b/server/Gym/environments/halving_chain_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9f1a485899b16b0e8d2f805350d292e549428c10 --- /dev/null +++ b/server/Gym/environments/halving_chain_counting/environment.py @@ -0,0 +1,81 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class HalvingChainCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1028 + prompt_template = \ +r"""Construct sequences based on the following rules: + +1. A sequence that contains only a single number `{N}` is considered a valid sequence. +2. Given any valid sequence, you can create a new valid sequence by appending a positive integer to the end — but the new number must be **at most half** of the last number in the current sequence (i.e., ≤ last_element / 2). + +Your task is to determine how many **distinct valid sequences** can be constructed following these rules. + +Output Format: +Your answer should be a single integer — the total number of valid sequences. +Example: `10` (do **NOT** include the backticks or quotes); this means there are 10 distinct valid sequences. +""" + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the HalvingChainCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 1, "MAX_N should be greater than or equal to 1" + + N = self.parameter["N"] = random.randint(1, MAX_N) + + + # Use dynamic programming to count the number of valid sequences + dpF = [0] * (N + 1) + for x in range(1, N + 1) : + dpF[x] = 1 + for y in range(1, x // 2 + 1) : + dpF[x] += dpF[y] + self.parameter["reference_answer"] = dpF[N] + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/hamiltonian_path/__init__.py b/server/Gym/environments/hamiltonian_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..213dee2ada3f96cae19702d25441fd5e26ea462b --- /dev/null +++ b/server/Gym/environments/hamiltonian_path/__init__.py @@ -0,0 +1 @@ +from .environment import HamiltonianPath_Environment diff --git a/server/Gym/environments/hamiltonian_path/environment.py b/server/Gym/environments/hamiltonian_path/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..15530c1e9a94952b382dc7ab0f4997cd95991045 --- /dev/null +++ b/server/Gym/environments/hamiltonian_path/environment.py @@ -0,0 +1,162 @@ +import heapq +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class HamiltonianPath_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a **directed graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. + +The graph contains the following directed edges. Each edge is represented as a tuple `(s, t, w)`, meaning there is a directed edge **from vertex `s` to vertex `t` with weight `w`**: +{edges} + +Your task is to find a path `p1, p2, ..., pk` such that: +- The path **visits every vertex at least once** (revisiting vertices is allowed). +- Your goal is to **minimize the total weight** of the path. The total weight is the sum of the weights of all edges used in the path. + +Output Format: +Your final answer should be a single line containing the path in order: `p1, p2, ..., pk`, separated by **spaces**. +Example: `0 1 0 2` (do **NOT** include the backticks or quotes); this means the path starts at vertex 0, goes to 1, returns to 0, and then to 2 — thus visiting all three vertices at least once (assuming 3 vertices in total). +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the HamiltonianPath_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = [] + + constructed_path = list(range(N)) + random.shuffle(constructed_path) + self.parameter["reference_answer"] = " ".join(map(str, constructed_path)) + self.parameter["reference_answer_weight"] = 0 + for s, t in zip(constructed_path, constructed_path[1 :]) : + w = random.randint(1, N) + edges.append((s, t, w)) + self.parameter["reference_answer_weight"] += w + + num_edges = int(edge_density * N * (N - 1)) + if len(edges) < num_edges : + remaining_edges = list(set((s, t) for s in range(N) for t in range(N) if s != t) - set((s, t) for s, t, w in edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + for s, t in remaining_edges : + edges.append((s, t, random.randint(1, max(1, N // 2)))) + random.shuffle(edges) + + assert len(edges) == len(set((s, t) for s, t, w in edges)), "edges should be unique" + for s, t, w in edges : + assert 0 <= s < N, "s should be in range" + assert 0 <= t < N, "t should be in range" + assert s != t, "s should not be equal to t" + + + adjacent = [[] for s in range(N)] + for s, t, w in edges : + adjacent[s].append((t, w)) + priority_queue = [(0, (1 << start, start)) for start in range(N)] + visited_states, dist, prev = set(), {(1 << start, start) : 0 for start in range(N)}, {(1 << start, start) : (0, -1) for start in range(N)} + + while priority_queue : + current_dist, (visited, s) = heapq.heappop(priority_queue) + + if visited == (1 << N) - 1 : + assert current_dist < self.parameter["reference_answer_weight"], "current_dist should be less than or equal to reference_answer_weight" + self.parameter["reference_answer_weight"] = current_dist + + self.parameter["reference_answer"] = [] + while True : + assert (visited == 0) == (s == -1), "visited should be 0 if and only if s is -1" + if visited == 0 : + break + self.parameter["reference_answer"].append(s) + visited, s = prev[(visited, s)] + self.parameter["reference_answer"].reverse() + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + + break + + if (visited, s) in visited_states : + continue + visited_states.add((visited, s)) + + for t, w in adjacent[s] : + new_visited = visited | (1 << t) + new_dist = current_dist + w + if dist.get((new_visited, t), self.parameter["reference_answer_weight"]) > new_dist : + dist[(new_visited, t)] = new_dist + prev[(new_visited, t)] = (visited, s) + heapq.heappush(priority_queue, (new_dist, (new_visited, t))) + + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(s, t, w) for s, t, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + path = processed_result + for vertex in path : + if not (0 <= vertex < self.parameter["N"]) : # check if vertex is in range + return self.rewards["invalid_solution"] + if len(set(path)) != self.parameter["N"] : # check if all vertices are visited + return self.rewards["invalid_solution"] + + edge2weight = {(s, t) : w for s, t, w in self.parameter["edges"]} + answer_weight = 0 + for s, t in zip(path, path[1 :]) : + if (s, t) not in edge2weight : + return self.rewards["invalid_solution"] + answer_weight += edge2weight[(s, t)] + assert self.parameter["reference_answer_weight"] <= answer_weight, "answer weight should be greater than or equal to reference_answer_weight" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((self.parameter["reference_answer_weight"] / answer_weight) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer_weight == self.parameter["reference_answer_weight"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/hamiltonian_path_existence/__init__.py b/server/Gym/environments/hamiltonian_path_existence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8cb19fe0209fb4a8c2f52dd9add4dc419e0fe1f2 --- /dev/null +++ b/server/Gym/environments/hamiltonian_path_existence/__init__.py @@ -0,0 +1 @@ +from .environment import HamiltonianPathExistence_Environment diff --git a/server/Gym/environments/hamiltonian_path_existence/environment.py b/server/Gym/environments/hamiltonian_path_existence/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..2f4cc50010b57e7e7a0dda59cea02a1918f38613 --- /dev/null +++ b/server/Gym/environments/hamiltonian_path_existence/environment.py @@ -0,0 +1,109 @@ +import heapq +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class HamiltonianPathExistence_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a **directed graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. The graph contains the following directed edges. Each edge is represented as a tuple `(s, t)`, meaning there is a directed edge **from vertex `s` to vertex `t`**: +{edges} + +Please find a path `p_1, p_2, ..., p_{N}` such that the path **visits every vertex exactly once** (revisiting vertices is NOT allowed). + +Output Format: +Your final answer should be a single line containing the path in order: `p_1, p_2, ..., p_{N}`, separated by **spaces**. +Example: `0 2 1` (do **NOT** include the backticks or quotes); this means the path starts at vertex 0, then goes to vertex 2, and finally to vertex 1 (assuming 3 vertices in total).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(existing/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the HamiltonianPathExistence_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = [] + + constructed_path = list(range(N)) + random.shuffle(constructed_path) + self.parameter["reference_answer"] = " ".join(map(str, constructed_path)) + for s, t in zip(constructed_path, constructed_path[1 :]) : + edges.append((s, t)) + + num_edges = int(edge_density * N * (N - 1)) + if len(edges) < num_edges : + remaining_edges = list(set((s, t) for s in range(N) for t in range(N) if s != t) - set(edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + edges += remaining_edges + random.shuffle(edges) + + assert len(edges) == len(set(edges)), "edges should be unique" + for s, t in edges : + assert 0 <= s < N, "s should be in range" + assert 0 <= t < N, "t should be in range" + assert s != t, "s should not be equal to t" + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(s, t) for s, t in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + path = processed_result + if len(path) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if set(path) != set(range(self.parameter["N"])) : + return self.rewards["invalid_solution"] + + edges = set(map(tuple, self.parameter["edges"])) + existing = sum(int((s, t) in edges) for s, t in zip(path, path[1 :])) + assert existing <= self.parameter["N"] - 1, "existing should be less than or equal to len(path) - 1" + + if self.rewards["rewarding_strategy"] == "(existing/all)^beta" : + return self.rewards["rewarding_weight"] * ((existing / (self.parameter["N"] - 1)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "existing=all" : + return self.rewards["rewarding_weight"] * (existing == (self.parameter["N"] - 1)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/heap_counting/__init__.py b/server/Gym/environments/heap_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..70f44b680dc0d70a98c7de3d1e5e958e80061ff3 --- /dev/null +++ b/server/Gym/environments/heap_counting/__init__.py @@ -0,0 +1 @@ +from .environment import HeapCounting_Environment diff --git a/server/Gym/environments/heap_counting/environment.py b/server/Gym/environments/heap_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..df0a0bed819b828792f533be3ef8cb41698d4377 --- /dev/null +++ b/server/Gym/environments/heap_counting/environment.py @@ -0,0 +1,117 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class HeapCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2606 + prompt_template = r"""Compute the number of permutations `P` of the numbers 1 through {N} such that for all `2 ≤ i ≤ {N}`, it holds that `P[i] > P[i // 2]`. Since the answer may be large, output the result modulo {P}, where {P} is a prime number.""" + + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the HeapCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 3, "MAX_N should be greater than or equal to 3" + N = self.parameter["N"] = random.randint(3, MAX_N) + + is_prime = [True] * ((5 * N) + 1) + is_prime[0] = is_prime[1] = False + for i in range(2, int((5 * N) ** 0.5) + 1) : + if is_prime[i] : + for j in range(i * i, (5 * N) + 1, i): + is_prime[j] = False + P = self.parameter["P"] = random.choice([i for i in range(2, (5 * N) + 1) if is_prime[i]]) + + + def mod_pow(a: int, b: int, p: int) -> int: + """a^b mod p with binary exponentiation""" + res = 1 + while b: + if b & 1: + res = res * a % p + a = a * a % p + b >>= 1 + return res + + + def comb_small(n: int, k: int, p: int, fact: list) -> int: + """C(n,k) mod p with 0 ≤ n,k < p (prime p)""" + if k < 0 or k > n: + return 0 + return fact[n] * mod_pow(fact[k] * fact[n - k] % p, p - 2, p) % p + + + def lucas(n: int, k: int, p: int, fact: list) -> int: + """C(n,k) mod p prime p via Lucas""" + if k == 0: + return 1 + return (lucas(n // p, k // p, p, fact) * + comb_small(n % p, k % p, p, fact)) % p + + # ---------- factorials mod P up to N ---------- + fact = [1] * (N + 1) + for i in range(1, N + 1): + fact[i] = fact[i - 1] * i % P + + # ---------- subtree sizes ---------- + S = [0] * (5 * N + 2) # S[i] = size of subtree rooted at i + for i in range(1, N + 1): + S[i] = 1 + for i in range(N, 1, -1): # bottom-up (skip the root’s "parent") + S[i >> 1] += S[i] + + # ---------- number of labellings ---------- + DP = [1] * (2 * N + 2) # leaves already 1 + for i in range(N, 0, -1): + left = i * 2 + right = left + 1 + dp_left = DP[left] # 1 if child beyond n + dp_right = DP[right] + choose_left = lucas(S[i] - 1, S[left], P, fact) + DP[i] = (choose_left * dp_left * dp_right) % P + + self.parameter["reference_answer"] = DP[1] % P + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], P = self.parameter["P"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["P"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/hitori_puzzle/__init__.py b/server/Gym/environments/hitori_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c750f3882474dc51e60dfaea8832f2c551caa962 --- /dev/null +++ b/server/Gym/environments/hitori_puzzle/__init__.py @@ -0,0 +1 @@ +from .environment import HitoriPuzzle_Environment diff --git a/server/Gym/environments/hitori_puzzle/environment.py b/server/Gym/environments/hitori_puzzle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e617341b4adb2a07616624c321dd902c56a603ac --- /dev/null +++ b/server/Gym/environments/hitori_puzzle/environment.py @@ -0,0 +1,172 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class HitoriPuzzle_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} matrix. Each cell contains an integer. Please "black out" some cells such that: +1. In each row and each column, no number appears more than once **among the remaining (non-blacked-out) cells**. +2. No two blacked-out cells are **adjacent** (horizontally or vertically). +3. All remaining cells must form a **single connected region** — you must be able to reach any remaining cell from any other by moving up, down, left, or right. + +The matrix is given in **row-major order**, with each row represented as a list of integers separated by spaces: +{matrix} + +**Output Format:** Output {N} lines, each containing {M} characters with no separators (also in **row-major order**). Use `.` for a remaining cell and `*` for a blacked-out cell.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the HitoriPuzzle_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def check_connected(self, grid, N, M) : + visited = [[False] * M for _ in range(N)] + def DFS(x, y) : + visited[x][y] = True + for dx, dy in [(-1, 0), (+1, 0), (0, -1), (0, +1)] : + nx, ny = x + dx, y + dy + if 0 <= nx < N and 0 <= ny < M and not visited[nx][ny] and grid[nx][ny] == "." : + DFS(nx, ny) + for i in range(N) : + for j in range(M) : + if grid[i][j] == "." : + DFS(i, j) + return all(visited[_i][_j] for _i in range(N) for _j in range(M) if grid[_i][_j] == ".") + assert False + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + + def generate(N, M) : + matrix = [[None] * M for _ in range(N)] + reference_answer = [["."] * M for _ in range(N)] + + all_cells = [(i, j) for i in range(N) for j in range(M)] + random.shuffle(all_cells) + + def backtrack(idx) : + if idx == len(all_cells) : + return True + i, j = all_cells[idx] + + remaining_numbers = set(matrix[i][_j] for _j in range(M) if reference_answer[i][_j] == "." and matrix[i][_j] is not None) | \ + set(matrix[_i][j] for _i in range(N) if reference_answer[_i][j] == "." and matrix[_i][j] is not None) + + for color in random.sample([".", "*"], 2) : + if color == "." : + num = 0 + while num in remaining_numbers : + num += 1 + matrix[i][j] = num + else : + if not remaining_numbers : + continue + ok = True + for di, dj in [(-1, 0), (+1, 0), (0, -1), (0, +1)] : + ni, nj = i + di, j + dj + if 0 <= ni < N and 0 <= nj < M and reference_answer[ni][nj] == "*" : + ok = False + break + if not ok : + continue + reference_answer[i][j] = "*" + if not self.check_connected(reference_answer, N, M) : + reference_answer[i][j] = "." + continue + matrix[i][j] = random.choice(list(remaining_numbers)) + assert backtrack(idx + 1) + return True + + return False + + assert backtrack(0), "Failed to generate a valid matrix" + return matrix, reference_answer + + self.parameter["matrix"], self.parameter["reference_answer"] = generate(N, M) + self.parameter["reference_answer"] = "\n".join("".join(row) for row in self.parameter["reference_answer"]) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + matrix = "\n".join(" ".join(map(str, row)) for row in self.parameter["matrix"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(line.strip()) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N, M = self.parameter["N"], self.parameter["M"] + solution = processed_result + + if len(solution) != N or any(len(row) != M for row in solution) : + return self.rewards["wrong_format"] + if not all(c in ".*" for row in solution for c in row) : + return self.rewards["wrong_format"] + + for i in range(N) : + for j in range(M) : + if solution[i][j] == "*" : + for di, dj in [(-1, 0), (+1, 0), (0, -1), (0, +1)] : + ni, nj = i + di, j + dj + if 0 <= ni < N and 0 <= nj < M and solution[ni][nj] == "*" : + return self.rewards["invalid_solution"] + if not self.check_connected(solution, N, M) : + return self.rewards["invalid_solution"] + + satisfied = 0 + for i in range(N) : + row_numbers = [self.parameter["matrix"][i][j] for j in range(M) if solution[i][j] == "."] + if len(row_numbers) == len(set(row_numbers)) : + satisfied += 1 + for j in range(M) : + col_numbers = [self.parameter["matrix"][i][j] for i in range(N) if solution[i][j] == "."] + if len(col_numbers) == len(set(col_numbers)) : + satisfied += 1 + assert satisfied <= N + M, "satisfied should not exceed N + M" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / (N + M)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == (N + M)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/hungry_rabbit/__init__.py b/server/Gym/environments/hungry_rabbit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..08f9b39a0b17ec1b84c0824871b205a90c3ebb89 --- /dev/null +++ b/server/Gym/environments/hungry_rabbit/__init__.py @@ -0,0 +1 @@ +from .environment import HungryRabbit_Environment diff --git a/server/Gym/environments/hungry_rabbit/environment.py b/server/Gym/environments/hungry_rabbit/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..372afd9090ea1107c0fcdf22f86180c856d7ff52 --- /dev/null +++ b/server/Gym/environments/hungry_rabbit/environment.py @@ -0,0 +1,110 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class HungryRabbit_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3895 + prompt_template = \ +r"""Let's construct {M} sets of integers S(1), S(2), ..., S(M), where each set contains exactly {K} integers chosen from 1 to {N}. The following conditions must hold: +- For all i (2 ≤ i ≤ {M}), we have {K} - |S(i) ∩ S(i - 1)| ≤ {L}. +{constraints} + +Output {M} lines, where the i-th line contains the {K} integers (in the range of [1, {N}]) in S(i), separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the HungryRabbit_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "unsuccessful_solution": unsuccessful_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 4, "MAX_N_M should be greater than or equal to 4" + + N = self.parameter["N"] = random.randint(4, MAX_N_M) + M = self.parameter["M"] = random.randint(3, MAX_N_M) + K = self.parameter["K"] = random.randint(2, N - 2) + L = self.parameter["L"] = random.randint(1, K - 1) + + + self.parameter["reference_answer"] = [] + forbidden = self.parameter["forbidden"] = [] + for i in range(M) : + if i == 0 : + S_i = random.sample(range(1, N + 1), k = K) + else : + S_i_minus_1 = self.parameter["reference_answer"][-1] + S_i_minus_1_complement = list(set(range(1, N + 1)) - set(S_i_minus_1)) + num_diff = random.randint(0, min((L, len(S_i_minus_1), len(S_i_minus_1_complement)))) + S_i = random.sample(S_i_minus_1, k = K - num_diff) + random.sample(S_i_minus_1_complement, k = num_diff) + random.shuffle(S_i) + assert len(S_i) == K, "Length of S(i) must be K" + self.parameter["reference_answer"].append(S_i) + S_i_complement = list(set(range(1, N + 1)) - set(S_i)) + forbidden.append(sorted(random.sample(S_i_complement, k = random.randint(1, len(S_i_complement))))) + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, S_i)) for S_i in self.parameter["reference_answer"]) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + K = self.parameter["K"], + L = self.parameter["L"], + constraints = "\n".join("- S({}) must not contain any of the forbidden integers: {}".format(i + 1, " ".join(map(str, forbidden_i))) for i, forbidden_i in enumerate(self.parameter["forbidden"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[List[int]]] : + if answer is not None : + answer = answer.strip() + try : + Sets = [] + for line in answer.splitlines() : + line = line.strip() + if line : + Sets.append(list(map(int, line.split()))) + return Sets + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + Sets = processed_result + if len(Sets) != self.parameter["M"] : + return self.rewards["invalid_solution"] + if not all(len(Set) == self.parameter["K"] and len(set(Set)) == self.parameter["K"] for Set in Sets) : + return self.rewards["invalid_solution"] + if not all(1 <= x <= self.parameter["N"] for Set in Sets for x in Set) : + return self.rewards["invalid_solution"] + + if not all(not (set(Set_i) & set(forbidden_i)) for Set_i, forbidden_i in zip(Sets, self.parameter["forbidden"])) : + return self.rewards["unsuccessful_solution"] + + satisfied = sum(int(self.parameter["K"] - len(set(Sets[i]) & set(Sets[i - 1])) <= self.parameter["L"]) for i in range(1, self.parameter["M"])) + assert 0 <= satisfied <= self.parameter["M"] - 1, "satisfied should be between 0 and M-1" + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / (self.parameter["M"] - 1)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == (self.parameter["M"] - 1)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/hur_warehouse_store/__init__.py b/server/Gym/environments/hur_warehouse_store/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..97c42ae7dd146742d05b01eec9428a7e8634af84 --- /dev/null +++ b/server/Gym/environments/hur_warehouse_store/__init__.py @@ -0,0 +1 @@ +from .environment import HURWarehouseStore_Environment diff --git a/server/Gym/environments/hur_warehouse_store/environment.py b/server/Gym/environments/hur_warehouse_store/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..98b10c5bea5c4f3ace1d0e3d06a769b34bfe648a --- /dev/null +++ b/server/Gym/environments/hur_warehouse_store/environment.py @@ -0,0 +1,134 @@ +import heapq +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class HURWarehouseStore_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3545 + prompt_template = \ +r"""You are running a warehouse store for {N} days. On the morning of day i, you receive A[i] items; in the evening of the same day, a customer arrives and demands B[i] items. You can choose to satisfy the customer only if you have at least B[i] items in stock. The arrays A and B are given as follows: +{A_and_B} + +Please maximize the number of customers you can satisfy. Output a single line containing the indices of the days when you satisfy the customers, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the HURWarehouseStore_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + while True : + A = self.parameter["A"] = [random.randint(0, N) for _ in range(N)] + B = self.parameter["B"] = [random.randint(1, N) for _ in range(N)] + + answer_not_zero, stock = False, 0 + for Ai, Bi in zip(A, B) : + stock += Ai + if stock >= Bi : + answer_not_zero = True + break + if answer_not_zero : + break + + + tot = 0 + count = 0 + # max-heap of (b_value, index), implemented by pushing (-b_value, index) + heap = [] + vis = [False] * N + + for i in range(N): + tot += A[i] + + # If we can't satisfy B[i], but there's a previously accepted day with a larger demand, + # remove that day instead to free up space + if heap and tot < B[i]: + # peek at largest b so far + largest_b, idx = heap[0] + largest_b = -largest_b + if largest_b > B[i]: + # remove it + heapq.heappop(heap) + vis[idx] = False + tot += largest_b + count -= 1 + + # Try to accept today + if tot >= B[i]: + tot -= B[i] + heapq.heappush(heap, (-B[i], i)) + vis[i] = True + count += 1 + + assert count > 0, "There should be at least one customer satisfied" + self.parameter["gold_answer"] = count + self.parameter["reference_answer"] = " ".join(str(i+1) for i, v in enumerate(vis) if v) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A_and_B = "\n".join("A[{}]={} B[{}]={}".format(i + 1, Ai, i + 1, Bi) for i, (Ai, Bi) in enumerate(zip(self.parameter["A"], self.parameter["B"]))), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + satisfy = [False] * self.parameter["N"] + for day in processed_result : + day -= 1 + if not (0 <= day < self.parameter["N"]) : + return self.rewards["invalid_solution"] + if satisfy[day] : + return self.rewards["invalid_solution"] + satisfy[day] = True + + stock = 0 + for sold, Ai, Bi in zip(satisfy, self.parameter["A"], self.parameter["B"]) : + stock += Ai + if sold : + if stock < Bi : + return self.rewards["invalid_solution"] + stock -= Bi + + gold, answer = self.parameter["gold_answer"], len(processed_result) + assert answer <= gold, "The answer should not exceed the gold answer" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise ValueError("Invalid rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/imp_party/__init__.py b/server/Gym/environments/imp_party/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7ba99944902973776779997dcbe06a3f9cf8c064 --- /dev/null +++ b/server/Gym/environments/imp_party/__init__.py @@ -0,0 +1 @@ +from .environment import ImpParty_Environment diff --git a/server/Gym/environments/imp_party/environment.py b/server/Gym/environments/imp_party/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..6e3ae27d862704e4123a70159213554307554e57 --- /dev/null +++ b/server/Gym/environments/imp_party/environment.py @@ -0,0 +1,105 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class ImpParty_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3524 + prompt_template = \ +r"""You are given an **undirected graph** with 3 × {N} vertices, labeled from `0` to `{ThreeN_minus_1}`. The graph contains the following undirected edges: +{edges} + +It is guaranteed that the graph contains a **clique of size 2 × {N}** — a set of 2 × {N} vertices in which every pair is connected by an edge. +Your task is to find any **clique of size {N}** in the graph. Output the indices of the selected {N} vertices, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the ImpParty_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + edges = self.parameter["edges"] = [] + + constructed_clique = random.sample(range(3 * N), 2 * N) + for u in constructed_clique : + for v in constructed_clique : + if u < v : + edges.append((u, v)) + + not_in_constructed_clique = list(set(range(3 * N)) - set(constructed_clique)) + edges += random.sample([(min(u, v), max(u, v)) for u in constructed_clique for v in not_in_constructed_clique], random.randint(0, len(constructed_clique) * len(not_in_constructed_clique))) + + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < 3 * N, "edges should be within the range of 0 to 3N-1" + assert len(edges) == len(set(edges)), "edges should be unique" + + self.parameter["reference_answer"] = " ".join(map(str, random.sample(constructed_clique, N))) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + ThreeN_minus_1 = 3 * N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + clique = processed_result + if len(clique) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(0 <= vertex < 3 * self.parameter["N"] for vertex in clique) : + return self.rewards["invalid_solution"] + if len(set(clique)) != len(clique) : + return self.rewards["invalid_solution"] + + satisfied = 0 + edges = set(map(tuple, self.parameter["edges"])) + for u in clique : + for v in clique : + if u < v : + satisfied += int((u, v) in edges) + assert satisfied <= self.parameter["N"] * (self.parameter["N"] - 1) // 2, "satisfied edges should not exceed N choose 2" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / (self.parameter["N"] * (self.parameter["N"] - 1) // 2)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == (self.parameter["N"] * (self.parameter["N"] - 1) // 2)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/individual_sum_bounded_sequence_counting/__init__.py b/server/Gym/environments/individual_sum_bounded_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..17af48710de2d777f7277099a5f3b434b21d246b --- /dev/null +++ b/server/Gym/environments/individual_sum_bounded_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .environment import IndividualSumBounded_SequenceCounting_Environment diff --git a/server/Gym/environments/individual_sum_bounded_sequence_counting/environment.py b/server/Gym/environments/individual_sum_bounded_sequence_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..6a3338f74b33dddc94b9d8f41b4885c996e30443 --- /dev/null +++ b/server/Gym/environments/individual_sum_bounded_sequence_counting/environment.py @@ -0,0 +1,87 @@ +import math +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class IndividualSumBounded_SequenceCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3228 + prompt_template = \ +r"""Count the number of sequences X[1], ..., X[{K}] such that: +- X[1] ≥ 1 +- For all i in [2, {K}]: 1 ≤ X[i] ≤ {M} +- The total sum X[1] + X[2] + ... + X[{K}] ≤ {N} + +Output the count modulo {MOD}.""" + + def __init__(self, + max_MOD : int = 1000000000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the IndividualSumBounded_SequenceCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 2, "MAX_N should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N) + K = self.parameter["K"] = int(2 ** random.uniform(1.0, math.log2(N))) + M = self.parameter["M"] = random.randint(1, (N - 1) // (K - 1)) + assert K >= 2, "K should be at least 2" + assert 1 + M * (K - 1) <= N, "N should be at least 1 + M * (K - 1)" + + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + pow1 = pow(M, K-1, MOD) + pow2 = pow(M, K-2, MOD) + # term1 = N * M^(K-1) mod MOD + term1 = (N % MOD) * pow1 % MOD + # x = M*(M+1)/2 mod MOD + x = (M * (M + 1) // 2) % MOD + # term2 = (K-1) * x * M^(K-2) mod MOD + term2 = ( (K-1) % MOD ) * x % MOD * pow2 % MOD + # answer = term1 - term2 (mod MOD) + ans = (term1 - term2) % MOD + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], K = self.parameter["K"], M = self.parameter["M"], MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/integer_factorization_counting/__init__.py b/server/Gym/environments/integer_factorization_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..66dde1911eab31cdddbea4ff1c0b8b6959a0db81 --- /dev/null +++ b/server/Gym/environments/integer_factorization_counting/__init__.py @@ -0,0 +1 @@ +from .environment import IntegerFactorizationCounting_Environment diff --git a/server/Gym/environments/integer_factorization_counting/environment.py b/server/Gym/environments/integer_factorization_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..af09a574952b2b3e6867cd6e2a5675a49af67f39 --- /dev/null +++ b/server/Gym/environments/integer_factorization_counting/environment.py @@ -0,0 +1,100 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class IntegerFactorizationCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3861 + prompt_template = \ +r"""Count the number of ways to factorize {N} into (multiple, i.e., more than 1) **distinct** positive integers greater than 1 such that their product is {N}. The order of factors does not matter. For example, $688 = 2 × 4 × 86 = 2 × 8 × 43 = 2 × 344 = 4 × 172 = 8 × 86 = 16 × 43$, so there are 6 valid ways in total.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the IntegerFactorizationCountingProblem instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 4, "MAX_N should be greater than or equal to 4" + + N = self.parameter["N"] = random.randint(4, MAX_N) + + + def count_factorizations(N: int) -> int: + # 1. enumerate divisors of N + divs = [] + i = 1 + while i * i <= N: + if N % i == 0: + divs.append(i) + if i != N // i: + divs.append(N // i) + i += 1 + divs.sort() + total = len(divs) + + # 2. map each divisor to its index (0-based) + idx = {d: i for i, d in enumerate(divs)} + + # 3. dp[i] = number of ways to get product = divs[i] using distinct divisors seen so far + dp = [0] * total + dp[0] = 1 # one way to make 1 (the empty product) + + # 4. for each divisor x = divs[j] (skip the first which is 1), + # update dp in place from high i down to j + for j in range(1, total): + xj = divs[j] + for i in range(total - 1, j - 1, -1): + di = divs[i] + if di % xj == 0: + dp[i] += dp[idx[di // xj]] + + # 5. dp[total-1] counts also the trivial factorization [N] → subtract 1 + return dp[total - 1] - 1 + self.parameter["reference_answer"] = count_factorizations(N) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/integer_programming/__init__.py b/server/Gym/environments/integer_programming/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..416c938e426e3961e1b1903cf6189003aa19d35f --- /dev/null +++ b/server/Gym/environments/integer_programming/__init__.py @@ -0,0 +1 @@ +from .environment import IntegerProgramming_Environment diff --git a/server/Gym/environments/integer_programming/environment.py b/server/Gym/environments/integer_programming/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d2137cba54dbe2655d291f3d762eb984ee62ef05 --- /dev/null +++ b/server/Gym/environments/integer_programming/environment.py @@ -0,0 +1,104 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class IntegerProgramming_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""There are {N} integers x[0], x[1], ..., x[{N_minus_1}]. They satisfy the following {M} inequations: +{inequations} + +Please find any solution x[0], x[1], ..., x[{N_minus_1}] that satisfies the inequations. + +Output Format: Your final answer should be a single line containing x[0], x[1], ..., x[{N_minus_1}], separated by **spaces**. +Example: `{one_to_N}` (do **NOT** include quotes or backticks); this means: x[0] = 1, x[1] = 2, ..., x[{N_minus_1}] = {N}. +""" + + def __init__(self, + number_range : int = 4, + coefficient_non_zero_probability : float = 0.5, + wrong_format : float = -1.0, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + super().__init__(**kwargs) + + self.number_range = number_range + + self.coefficient_non_zero_probability = coefficient_non_zero_probability + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 1, "M should be greater than or equal to 1" + + self.parameter["x"] = [random.randint(-N, +N) for i in range(N)] + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["x"])) + + inequations = self.parameter["inequations"] = [] + results = self.parameter["results"] = [] + for m in range(M) : + while True : + inequation = [] + for i in range(N) : + if random.random() < self.coefficient_non_zero_probability : + coefficient = random.randint(1, self.number_range) + if random.random() < 0.5 : + coefficient = -coefficient + else : + coefficient = 0 + inequation.append(coefficient) + if any(inequation) : + break + inequations.append(inequation) # left >= right + results.append(sum(coefficient * xi for coefficient, xi in zip(inequation, self.parameter["x"])) - random.randint(0, self.number_range // 2)) + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + N_minus_1 = self.parameter["N"] - 1, + M = self.parameter["M"], + inequations = "\n".join(" + ".join("({}) * x[{}]".format(coefficient, i) for i, coefficient in enumerate(inequation) if coefficient != 0) + " >= {}".format(result) for inequation, result in zip(self.parameter["inequations"], self.parameter["results"])), + one_to_N = " ".join(map(str, range(1, self.parameter["N"] + 1))), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + x = processed_result + if len(x) != self.parameter["N"] : + return self.rewards["wrong_format"] + + satisfied = sum(int(sum(coefficient * xi for coefficient, xi in zip(inequation, x)) >= result) for inequation, result in zip(self.parameter["inequations"], self.parameter["results"])) + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / len(self.parameter["inequations"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == len(self.parameter["inequations"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/integral/__init__.py b/server/Gym/environments/integral/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4cfc7a222622ceab881e98f1ac66eec3f2f3a2f3 --- /dev/null +++ b/server/Gym/environments/integral/__init__.py @@ -0,0 +1 @@ +from .environment import Integral_Environment diff --git a/server/Gym/environments/integral/environment.py b/server/Gym/environments/integral/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..35f88800b40b0757a4ad2feaa6308c4c4e62aff8 --- /dev/null +++ b/server/Gym/environments/integral/environment.py @@ -0,0 +1,256 @@ +import math +import sympy +import random +from typing import Optional, List, Dict +from ...environment import VerifiableEnvironment +from ...environment import timeout, TimeoutException + + +def generate_test_points(num : int, low : float, high : float) -> List[float] : + assert num >= 2, "num should be greater than or equal to 2" + return [low + (high - low) * i / (num - 1) for i in range(num)] + + +class Integral_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given the derivative of a function: F'(x) = {f_prime} + +Your task is to find **an antiderivative** F(x) such that its derivative is equal to the given expression. + +**Output Format:** Your answer should be the expression for F(x), written in **SymPy syntax**. Do not omit any symbols (e.g., always use `*` for multiplication). +Example: `sin(2*x)/2` (do **NOT** include quotes or backticks).""" + test_points = generate_test_points(1024, -2, +2) + epsilon = 1E-5 + max_val = 1E+4 + + def __init__(self, + node_type_probs : Optional[List[float]] = None, + unary_ops_probs : Dict[str, float] = None, + binary_ops_probs : Dict[str, float] = None, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the Integral_Environment instance. + """ + super().__init__(**kwargs) + + if node_type_probs is None : + node_type_probs = (0.5, 0.5) + assert len(node_type_probs) == 2 and abs(sum(node_type_probs) - 1.0) < 1E-8, "node_type_probs should have length 2 and sum to 1" + self.node_type_probs = node_type_probs + + if unary_ops_probs is None : + unary_ops_probs = { + "sin" : 0.1, + "cos" : 0.1, + "exp" : 0.05, + "log" : 0.05, + "const_pow" : 0.1, + "const_add" : 0.25, + "const_mul" : 0.25, + "const_div" : 0.1, + } + assert abs(sum(unary_ops_probs.values()) - 1.0) < 1E-8, "unary_ops_probs values should sum to 1" + self.unary_ops_probs = unary_ops_probs + + if binary_ops_probs is None : + binary_ops_probs = { + "+" : 0.2, + "-" : 0.2, + "*" : 0.3, + "/" : 0.2, + "**" : 0.1, + } + assert abs(sum(binary_ops_probs.values()) - 1.0) < 1E-8, "binary_ops_probs values should sum to 1" + self.binary_ops_probs = binary_ops_probs + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + def _generate(self) -> None : + assert "node_num" in self.parameter, "node_num is required in parameter" + node_num = self.parameter["node_num"] + assert isinstance(node_num, int) and node_num >= 1, "node_num should be a positive integer" + + unary_ops, unary_probs = zip(*self.unary_ops_probs.items()) + binary_ops, binary_probs = zip(*self.binary_ops_probs.items()) + + x = sympy.symbols("x") + + def build_expr(n : int) -> sympy.Expr : + assert n >= 1, "n should be greater than or equal to 1" + if n == 1 : + return x + + if (random.choices(("unary", "binary"), weights = self.node_type_probs, k = 1)[0] if n >= 3 else "unary") == "unary" : + op = random.choices(unary_ops, weights = unary_probs, k = 1)[0] + sub = build_expr(n - 1) + if op == "sin" : + return sympy.sin(sub) + elif op == "cos" : + return sympy.cos(sub) + elif op == "exp" : + return sympy.exp(sub) + elif op == "log" : + return sympy.log(sub) + elif op == "const_pow" : + try : + if random.random() < 0.5 : + return sub ** (1 / sympy.Integer(random.randint(2, 4))) + else : # power + return sub ** sympy.Integer(random.randint(2, 4)) + except : + # Fall back to a safer option if fractional power fails + return sub ** sympy.Integer(random.randint(2, 4)) + elif op == "const_add" : + return sub + sympy.Integer(random.choice([-2, -1, +1, +2])) + elif op == "const_mul" : + if random.random() < 0.5 : # negative + return sub * -sympy.Integer(random.randint(2, 4)) + else : # positive + return sub * sympy.Integer(random.randint(2, 4)) + elif op == "const_div" : + return sub / sympy.Integer(random.randint(2, 4)) + else : + raise NotImplementedError(f"Unknown unary op: {op}") + else : # binary + op = random.choices(binary_ops, weights = binary_probs, k = 1)[0] + assert 1 <= (n - 1) - 1 + left_n = random.randint(1, (n - 1) - 1) + left = build_expr(left_n) + right = build_expr((n - 1) - left_n) + if op == "+" : + return left + right + elif op == "-" : + return left - right + elif op == "*" : + return left * right + elif op == "/" : + return left / right + elif op == "**" : + return left ** right + else : + raise NotImplementedError(f"Unknown binary op: {op}") + + while True : + try : + f_expr = build_expr(node_num) + # Add complexity check after building expression + if sympy.count_ops(f_expr) > 1000: + continue + self.parameter["reference_answer"] = str(f_expr) + + f_prime = sympy.diff(f_expr, x) + # Add complexity check after differentiation + if sympy.count_ops(f_prime) > 1000: + continue + self.parameter["f_prime"] = str(f_prime) + + if not f_prime.free_symbols : + continue + if sympy.zoo in f_expr.atoms() or sympy.nan in f_expr.atoms() : + continue + elif sympy.zoo in f_prime.atoms() or sympy.nan in f_prime.atoms() : + continue + else : + f_prime_compute = sympy.lambdify(x, f_prime, modules = ["math"]) + valid_count = 0 + for pt in self.test_points : + try : + val = float(f_prime_compute(pt)) + except : + continue + if not math.isfinite(val) : + continue + if abs(val) > self.max_val : + valid_count = 0 + break + valid_count += 1 + if valid_count >= len(self.test_points) // 2 : + break + else : + continue + except : + continue + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(f_prime = self.parameter["f_prime"]) + + def _process(self, answer : Optional[str]) -> Optional[sympy.Expr] : + if answer is not None : + answer = answer.strip() + # Limit input string length to prevent parsing explosion + if len(answer) > 10000: + return None + try : + expr = sympy.sympify(answer) + return expr + except : + return None + else : + return None + + def scorer(self, output : str) -> float : + @timeout(10) # 10 second timeout + def _scorer_impl(): + processed_result = self.processor(output) + if processed_result is not None and isinstance(processed_result, sympy.Expr) : + x = sympy.symbols("x") + if processed_result.free_symbols - {x} : + return self.rewards["wrong_format"] + + # Check if processed_result is excessively complex compared to reference + try : + if sympy.count_ops(processed_result) > 4 * sympy.count_ops(sympy.sympify(self.parameter["reference_answer"])) : + return self.rewards["wrong_answer"] + except : + return self.rewards["wrong_format"] + + try : + expr = sympy.diff(processed_result, x) - sympy.sympify(self.parameter["f_prime"]) + # Add complexity check after differentiation in scorer + if sympy.count_ops(expr) > 5000: + return self.rewards["wrong_answer"] + except : + return self.rewards["wrong_format"] + + eq = expr.is_zero + if eq is not None : + assert isinstance(eq, bool), "eq should be a boolean value" + if eq : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + + try : + expr_compute = sympy.lambdify(x, expr, modules = ["math"]) + except : + return self.rewards["wrong_answer"] + zero_count = 0 + for pt in self.test_points : + try : + val = float(expr_compute(pt)) + except : + continue + if not math.isfinite(val) : + continue + if abs(val) > self.epsilon : + return self.rewards["wrong_answer"] + else : + zero_count += 1 + + if zero_count >= len(self.test_points) // 4 : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] + + try: + return _scorer_impl() + except TimeoutException: # Catch the specific timeout exception + return -1.0 \ No newline at end of file diff --git a/server/Gym/environments/inversion_pair/__init__.py b/server/Gym/environments/inversion_pair/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c63e3415b40e5410c827010fc578a1c6699cd82f --- /dev/null +++ b/server/Gym/environments/inversion_pair/__init__.py @@ -0,0 +1 @@ +from .environment import InversionPair_Environment diff --git a/server/Gym/environments/inversion_pair/environment.py b/server/Gym/environments/inversion_pair/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..fc5d4903d07404c3c7cec0c81f1b9f2f152f39d9 --- /dev/null +++ b/server/Gym/environments/inversion_pair/environment.py @@ -0,0 +1,116 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class InversionPair_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1966 + prompt_template = \ +r"""You are given two arrays A and B, each containing {N} **distinct** integers: +{A} +{B} + +You may perform the following operation any number of times: Swap two **adjacent elements** (i.e., elements at indices i and i+1) in either A or B. +Your goal is to **minimize** the sum: (A[0] - B[0])² + (A[1] - B[1])² + ... + (A[{N_minus_1}] - B[{N_minus_1}])² +Among all ways to achieve the minimum possible sum, please output the **minimum number of adjacent swaps** needed.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the InversionPair_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + A = self.parameter["A"] = random.sample(range(2 * N), N) + B = self.parameter["B"] = random.sample(range(2 * N), N) + + + # get the permutation that maps sorted order of A to sorted order of B + a_idx = list(range(N)) + b_idx = list(range(N)) + a_idx.sort(key=lambda i: A[i]) + b_idx.sort(key=lambda i: B[i]) + + # l[i] = the rank of A[i] in B's sorted order + l = [0] * N + for rank in range(N): + l[a_idx[rank]] = b_idx[rank] + + # Fenwick (BIT) for counting how many already seen have smaller rank + BIT = [0] * (N + 1) + def add(pos, val): + while pos <= N: + BIT[pos] += val + pos += pos & -pos + + def query(pos): + s = 0 + while pos > 0: + s += BIT[pos] + pos -= pos & -pos + return s + + # count inversions in l[] by scanning from right to left + ans = 0 + for i in range(N - 1, -1, -1): + # our ranks in l[i] are 0..N-1, so use pos = l[i]+1 in 1-indexed BIT + pos = l[i] + 1 + # count how many already-added positions < pos + ans += query(pos - 1) + add(pos, 1) + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + B = " ".join("B[{}]={}".format(i, Bi) for i, Bi in enumerate(self.parameter["B"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if processed_result == 0 : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/inversion_pair_k_counting/__init__.py b/server/Gym/environments/inversion_pair_k_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..de50a0fff8d1eacb7ad3c9bf03eb4e4949855bb9 --- /dev/null +++ b/server/Gym/environments/inversion_pair_k_counting/__init__.py @@ -0,0 +1 @@ +from .environment import InversionPairK_Counting_Environment diff --git a/server/Gym/environments/inversion_pair_k_counting/environment.py b/server/Gym/environments/inversion_pair_k_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..437f1a092d8663d36a66d82f66c8b82484b9f873 --- /dev/null +++ b/server/Gym/environments/inversion_pair_k_counting/environment.py @@ -0,0 +1,88 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class InversionPairK_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2513 + prompt_template = \ +r"""Consider all permutations of the numbers `1` through `{N}`. Your task is to **count how many of them have exactly {K} inversion pairs**. +Since the number may be large, output the result **modulo {MOD}**. + +**Definitions:** +- A **permutation of 1 to {N}** is an arrangement of the numbers `1` through `{N}`, where each number appears exactly once. +- An **inversion pair** in a permutation `a_1, a_2, ..., a_{N}` is a pair of indices `(i, j)` such that `i < j` and `a_i > a_j`. + +**Output Format:** +Your final answer should be a single integer — the number of permutations with exactly {K} inversion pairs, **modulo {MOD}**. +Example: `9999` (do **NOT** include the backticks or quotes). +""" + + def __init__(self, + max_MOD : int = 1000000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the InversionPairK_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + K = self.parameter["K"] = random.randint(0, N * (N - 1) // 2) + + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + dpF = [0] * (K + 1) + dpF[0] = 1 + for i in range(1, N + 1) : + prefix_sum = [0] * (K + 1) + prefix_sum[0] = dpF[0] + for k in range(1, K + 1) : + prefix_sum[k] = prefix_sum[k - 1] + dpF[k] + def get_sum(l, r) : + l = max(l, 0) + return prefix_sum[r] - (prefix_sum[l - 1] if l > 0 else 0) + for k in range(min(K, i * (i - 1) // 2) + 1) : + dpF[k] = get_sum(k - (i - 1), k) % MOD + self.parameter["reference_answer"] = dpF[K] + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], K = self.parameter["K"], MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/josephus/__init__.py b/server/Gym/environments/josephus/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c23ab19d4b10fc4b1c9e8e6996ee047de8b96907 --- /dev/null +++ b/server/Gym/environments/josephus/__init__.py @@ -0,0 +1 @@ +from .environment import Josephus_Environment diff --git a/server/Gym/environments/josephus/environment.py b/server/Gym/environments/josephus/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..543641be734b866a487120401d10c1ab49c28d32 --- /dev/null +++ b/server/Gym/environments/josephus/environment.py @@ -0,0 +1,111 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Josephus_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1996 + prompt_template = \ +r"""{N} people are standing in a circle (labeled from 1 to {N}). Starting from the person labeled 1, they count off in order. The person who counts to {M} is eliminated, and the next person resumes counting from 1. This process continues until everyone is eliminated. Please determine the order in which people are eliminated. + +**Output Format:** Your final answer should be a single line containing the labels of the people in the order they are eliminated, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the Josephus_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 3, "MAX_N should be greater than or equal to 3" + + N = self.parameter["N"] = random.randint(3, MAX_N) + M = self.parameter["M"] = random.randint(2, N) + + + bit = [0] * (N + 1) + + def lowbit(x) : + return x & -x + + def add(pos, val) : + while pos <= N : + bit[pos] += val + pos += lowbit(pos) + + def find_kth(k) : + idx = 0 + curr = 0 + max_bit = N.bit_length() + for i in range(max_bit, -1, -1) : + next_idx = idx + (1 << i) + if next_idx <= N and curr + bit[next_idx] < k: + idx = next_idx + curr += bit[next_idx] + return idx + 1 + + for i in range(1, N + 1) : + add(i, 1) + + result = [] + remaining = N + cur = 1 + for _ in range(N) : + cur = (cur - 1 + M - 1) % remaining + 1 + person = find_kth(cur) + result.append(person) + add(person, -1) + remaining -= 1 + + self.parameter["gold_answer"] = result + assert len(result) == N, "The length of the result should be equal to N" + self.parameter["reference_answer"] = " ".join(map(str, result)) + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"]) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_answer"] + if len(set(processed_result)) != self.parameter["N"] : + return self.rewards["invalid_answer"] + if not all(1 <= x <= self.parameter["N"] for x in processed_result) : + return self.rewards["invalid_answer"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(float(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/jug_puzzle/__init__.py b/server/Gym/environments/jug_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b0c2b99d57de757e9d75849b9fbaa8cf7d8458b3 --- /dev/null +++ b/server/Gym/environments/jug_puzzle/__init__.py @@ -0,0 +1 @@ +from .environment import JugPuzzle_Environment diff --git a/server/Gym/environments/jug_puzzle/environment.py b/server/Gym/environments/jug_puzzle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..ce5fec4883eddaa10ea8f011ddadd24df44cc6de --- /dev/null +++ b/server/Gym/environments/jug_puzzle/environment.py @@ -0,0 +1,164 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class JugPuzzle_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given {N} jugs (initially empty) with the following capacities: +{jug_capacities} + +Please fill a jug (you pick the one) with exactly {target_volumn} liters of water. You may perform the following actions: +- `Fill i` — Fill jug `i` to its full capacity. +- `Empty i` — Empty all water from jug `i`. +- `Pour i j` — Pour water from jug `i` to jug `j` until either jug `i` is empty or jug `j` is full. + +**Output Format:** Each action should be written on its own line in the format shown above (without backticks or quotes). Output one action per line, in the order they should be performed.""" + + def __init__(self, + max_capacity_multiple : int = 10, + operation_probabilities : Optional[List[float]] = [0.1, 0.1, 0.8], + wrong_format : float = -1.0, invalid_solution : float = -0.5, wrong_solution : float = 0.0, correct_solution : float = 1.0, + **kwargs) : + """ + Initialize the JugPuzzle_Environment instance. + """ + super().__init__(**kwargs) + + self.max_capacity_multiple = max_capacity_multiple + + assert len(operation_probabilities) == 3, "operation_probabilities should have exactly 3 elements for Fill, Empty, and Pour operations" + assert sum(operation_probabilities) > 0, "operation_probabilities should sum to a positive value" + self.operation_probabilities = operation_probabilities + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "wrong_solution" : wrong_solution, + "correct_solution" : correct_solution, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + capacities = self.parameter["jug_capacities"] = [random.randint(2, N * self.max_capacity_multiple) for _ in range(N)] + differences = set(capacity_i - capacity_j for capacity_j in capacities for capacity_i in capacities if capacity_i != capacity_j) + + jug = random.randint(0, N - 1) + self.parameter["reference_answer"] = "Fill {}".format(jug) + self.parameter["target_volumn"] = capacities[jug] + + assert "steps" in self.parameter, "steps is required in parameter" + steps = self.parameter["steps"] + assert steps >= 2, "steps should be greater than or equal to 2" + + volumns = [0] * N + actions = "" + existing_volumns = set() + for step in range(steps) : + while True : + operation = random.choices(["Fill", "Empty", "Pour"], self.operation_probabilities)[0] + if operation == "Fill" : + jug = random.randint(0, N - 1) + if volumns[jug] < capacities[jug] : + actions += "Fill {}\n".format(jug) + volumns[jug] = capacities[jug] + break + elif operation == "Empty" : + jug = random.randint(0, N - 1) + if volumns[jug] > 0 : + actions += "Empty {}\n".format(jug) + volumns[jug] = 0 + break + elif operation == "Pour" : + jug_i = random.randint(0, N - 1) + jug_j = random.randint(0, N - 1) + if jug_i != jug_j and volumns[jug_i] > 0 and volumns[jug_j] < capacities[jug_j] : + actions += "Pour {} {}\n".format(jug_i, jug_j) + pour_amount = min(volumns[jug_i], capacities[jug_j] - volumns[jug_j]) + volumns[jug_i] -= pour_amount + volumns[jug_j] += pour_amount + break + + target_volumns = set(volumn for volumn in volumns if volumn > 0) - existing_volumns - differences - set(capacities) + if target_volumns : + self.parameter["reference_answer"] = actions + self.parameter["target_volumn"] = random.choice(list(target_volumns)) + existing_volumns |= target_volumns + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + target_volumn = self.parameter["target_volumn"], + jug_capacities = "\n".join("Jug {}'s capacity: {} liters".format(i, capacity) for i, capacity in enumerate(self.parameter["jug_capacities"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + actions = [] + for line in answer.splitlines() : + line = line.strip() + if line : + actions.append(line.split()) + action = actions[-1] + if not action : + return None + if action[0] in ("Fill", "Empty") : + if len(action) != 2 : + return None + try : + action[1] = int(action[1]) + except ValueError : + return None + elif action[0] == "Pour" : + if len(action) != 3 : + return None + try : + action[1] = int(action[1]) + action[2] = int(action[2]) + except ValueError : + return None + else : + return None + return actions + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + volumns = [0] * self.parameter["N"] + for action in processed_result : + if action[0] == "Fill" : + jug = action[1] + if not (0 <= jug < self.parameter["N"]) : + return self.rewards["invalid_solution"] + volumns[jug] = self.parameter["jug_capacities"][jug] + elif action[0] == "Empty" : + jug = action[1] + if not (0 <= jug < self.parameter["N"]) : + return self.rewards["invalid_solution"] + volumns[jug] = 0 + elif action[0] == "Pour" : + jug_i, jug_j = action[1], action[2] + if not (0 <= jug_i < self.parameter["N"] and 0 <= jug_j < self.parameter["N"] and jug_i != jug_j) : + return self.rewards["invalid_solution"] + pour_amount = min(volumns[jug_i], self.parameter["jug_capacities"][jug_j] - volumns[jug_j]) + volumns[jug_i] -= pour_amount + volumns[jug_j] += pour_amount + else : + assert False, "Should be unreachable" + + if self.parameter["target_volumn"] in volumns : + return self.rewards["correct_solution"] + else : + return self.rewards["wrong_solution"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/k_partition/__init__.py b/server/Gym/environments/k_partition/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..614e07691d53fef56601ce7ec25a7fafe6750ae0 --- /dev/null +++ b/server/Gym/environments/k_partition/__init__.py @@ -0,0 +1 @@ +from .environment import KPartition_Environment \ No newline at end of file diff --git a/server/Gym/environments/k_partition/environment.py b/server/Gym/environments/k_partition/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..461a4fa322c7d8f6af16b5748e15774576e14457 --- /dev/null +++ b/server/Gym/environments/k_partition/environment.py @@ -0,0 +1,114 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class KPartition_Environment(VerifiableEnvironment) : # Source : https://en.wikipedia.org/wiki/3-partition_problem + prompt_template = \ +r"""You are given a **multiset S** containing **{N}** positive integers: {Multiset_S}. +Given K=**{K}**, the **target value T** is calculated as the total sum of elements in **S**, divided by **{N} / K = {N} / {K} = {N_divided_by_K}**. +Your task is to find a partition that divides **S** into **{N_divided_by_K}** disjoint **K-tuples** (S_1, S_2, ..., S_{K}), where these tuples **cover the entire set S**, and the sum of the elements in each **K-tuple** equals **T**. + +**Output Format:** Your final answer should contain **{N_divided_by_K} lines**, each containing **{K}** integers representing a valid K-tuple from the partition (with elements separated by spaces).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the KPartition_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + assert "K" in self.parameter, "K is required in parameter" + K = self.parameter["K"] + assert K >= 2, "K should be greater than or equal to 2" + assert N % K == 0, "K should be a factor of N" + + T = self.parameter["T"] = random.randint(max(K, N * K // 10), N * K) # This can be adjusted + + # Generate N // K K-tuples, each summing to T + N_divided_by_K = N // K + Multiset_S = [] + tuples = [] + for _ in range(N_divided_by_K) : + # Generate K - 1 random positive integers less than T + cuts = sorted(random.sample(range(1, T), K - 1)) + tuple_vals = [cuts[0]] + [cuts[i] - cuts[i - 1] for i in range(1, K - 1)] + [T - cuts[-1]] + random.shuffle(tuple_vals) + tuples.append(tuple_vals) + Multiset_S.extend(tuple_vals) + random.shuffle(Multiset_S) + self.parameter["Multiset_S"] = Multiset_S + self.parameter["reference_answer"] = "\n".join([" ".join(map(str, t)) for t in tuples]) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + K = self.parameter["K"] + N_divided_by_K = self.parameter["N"] // self.parameter["K"] + Multiset_S = self.parameter["Multiset_S"] + assert len(Multiset_S) == N, "N should be the size of the multiset S" + assert sum(Multiset_S) % N_divided_by_K == 0, "The sum of the multiset S should be a multiple of N/K" + return self.prompt_template.format( + N = N, + K = K, + N_divided_by_K = N_divided_by_K, + Multiset_S = ", ".join(map(str, Multiset_S)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + tuples = [] + for line in answer.splitlines() : + line = line.strip() + if line : + tuples.append(list(map(int, line.split()))) + return tuples + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + tuples = processed_result + if len(tuples) != self.parameter["N"] // self.parameter["K"] : + return self.rewards["invalid_solution"] + + for t in tuples : + if len(t) != self.parameter["K"] : + return self.rewards["invalid_solution"] + + # Flatten the tuples and compare with Multiset_S + flat_output = sorted([item for group in tuples for item in group]) + multiset_s_sorted = sorted(self.parameter["Multiset_S"]) + assert len(flat_output) == len(multiset_s_sorted), "Flat output and multiset S should have the same length" + if flat_output != multiset_s_sorted : + return self.rewards["invalid_solution"] + + for t in tuples : + if sum(t) != self.parameter["T"] : + return self.rewards["wrong_answer"] + + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/kakurasu/__init__.py b/server/Gym/environments/kakurasu/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9d9d3ee96f5805491f0e4c8e2ace2b3fea85d042 --- /dev/null +++ b/server/Gym/environments/kakurasu/__init__.py @@ -0,0 +1 @@ +from .environment import Kakurasu_Environment diff --git a/server/Gym/environments/kakurasu/environment.py b/server/Gym/environments/kakurasu/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..364bcb8736aeed3a3476bc49f41c03ab1d9c60a8 --- /dev/null +++ b/server/Gym/environments/kakurasu/environment.py @@ -0,0 +1,92 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Kakurasu_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} grid (1-indexed). Fill the grid with `0`s and `1`s such that: +- For each row `i`, the sum of the **column indices** where there are `1`s is equal to `A[i]`. Array `A` is given as: {A} +- For each column `j`, the sum of the **row indices** where there are `1`s is equal to `B[j]`. Array `B` is given as: {B} + +**Output Format:** Your final answer should consist of {N} lines, each containing {M} characters (`0` or `1`, with no separators), representing the filled grid.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the Kakurasu_Environment instance. + """ + + super().__init__(**kwargs) + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 3, "MAX_N_M should be greater than or equal to 3" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + + one_rate = random.uniform(0.1, 0.9) + grid = [["1" if random.random() < one_rate else "0" for _ in range(M)] for _ in range(N)] + self.parameter["reference_answer"] = "\n".join("".join(row) for row in grid) + + A = self.parameter["A"] = [sum((j + 1) for j in range(M) if grid[i][j] == "1") for i in range(N)] + B = self.parameter["B"] = [sum((i + 1) for i in range(N) if grid[i][j] == "1") for j in range(M)] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + A = " ".join("A[{}]={}".format(i + 1, a) for i, a in enumerate(self.parameter["A"])), + B = " ".join("B[{}]={}".format(j + 1, b) for j, b in enumerate(self.parameter["B"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + image = [] + for line in answer.splitlines() : + line = line.strip() + if line : + image.append(line) + return image + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + grid = processed_result + if len(grid) != self.parameter["N"] : + return self.rewards["wrong_format"] + if not all(len(row) == self.parameter["M"] for row in grid) : + return self.rewards["wrong_format"] + if not all(cell in "01" for row in grid for cell in row) : + return self.rewards["wrong_format"] + + A = [sum((j + 1) for j in range(self.parameter["M"]) if grid[i][j] == "1") for i in range(self.parameter["N"])] + B = [sum((i + 1) for i in range(self.parameter["N"]) if grid[i][j] == "1") for j in range(self.parameter["M"])] + assert len(A) == len(self.parameter["A"]) and len(B) == len(self.parameter["B"]), "Length of A or B does not match the expected length" + + satisfied = sum(int(a == gold_a) for a, gold_a in zip(A, self.parameter["A"])) + \ + sum(int(b == gold_b) for b, gold_b in zip(B, self.parameter["B"])) + assert satisfied <= (self.parameter["N"] + self.parameter["M"]), "Satisfied count exceeds the number of rows and columns" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / (self.parameter["N"] + self.parameter["M"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (satisfied == (self.parameter["N"] + self.parameter["M"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/kidding_me/__init__.py b/server/Gym/environments/kidding_me/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..844206a88f2df353dbd7f44ab9aba0b43a1afe83 --- /dev/null +++ b/server/Gym/environments/kidding_me/__init__.py @@ -0,0 +1 @@ +from .environment import KiddingMe_Environment diff --git a/server/Gym/environments/kidding_me/environment.py b/server/Gym/environments/kidding_me/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..cfb6f885b9bd92236bc76c1eb18998d0f0bbe7bd --- /dev/null +++ b/server/Gym/environments/kidding_me/environment.py @@ -0,0 +1,126 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class KiddingMe_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3266 + prompt_template = \ +r"""Please compute the number of {N} × {M} matrices X, such that: +- For each 1 <= i <= {N}, 1 <= j <= {M}, we have 0 <= X[i][j] <= {M} +- For each 1 <= i <= {N}, 1 <= j < {M}, we have X[i][j] < X[i][j + 1] +- For each 1 < i <= {N}, 1 <= j < {M}, we have X[i][j] < X[i - 1][j + 1] + +Please output the result module {MOD} +""" + + MODs = (10 ** 9 + 7, 998244353) + + def __init__(self, + wrong_format: float = -1.0, wrong_range: float = -0.5, correct_answer: float = +1.0, wrong_answer: float = 0.0, + **kwargs): + """ + Initialize the KiddingMe_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "wrong_range": wrong_range, + "correct_answer": correct_answer, + "wrong_answer": wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + MOD = self.parameter["MOD"] = random.choice(self.MODs) + + + # ---------- pre-compute factorials and inverse factorials ---------- + UP = max(N, M) * 3 + 5 # safe upper bound for every x + y that appears + inv = [0] * (UP + 1) # modular inverses of 1 … UP + fact = [1] * (UP + 1) # factorials + inv_fact = [1] * (UP + 1) # inverse factorials (1 / k!) + + inv[1] = 1 + for i in range(2, UP + 1): + inv[i] = MOD - MOD // i * inv[MOD % i] % MOD + + for i in range(1, UP + 1): + fact[i] = fact[i - 1] * i % MOD + inv_fact[i] = inv_fact[i - 1] * inv[i] % MOD + + # ---------- helpers ---------- + def comb(x: int, y: int) -> int: + """C(x + y, x) under MOD (return 0 if any index is negative).""" + if x < 0 or y < 0: + return 0 + return fact[x + y] * inv_fact[x] % MOD * inv_fact[y] % MOD + + + def flip1(x: int, y: int) -> tuple[int, int]: + """Perform the first reflection: swap, then (x--, y++).""" + return y - 1, x + 1 + + + def flip2(x: int, y: int) -> tuple[int, int]: + """Perform the second reflection: swap, then (x += M + 2, y -= M + 2).""" + return y + M + 2, x - (M + 2) + + + # ---------- main inclusion–exclusion ---------- + x, y = N + M + 1, N + ans = comb(x, y) + + while x >= 0 and y >= 0: + x, y = flip1(x, y) + ans = (ans - comb(x, y)) % MOD + x, y = flip2(x, y) + ans = (ans + comb(x, y)) % MOD + + x, y = N + M + 1, N + while x >= 0 and y >= 0: + x, y = flip2(x, y) + ans = (ans - comb(x, y)) % MOD + x, y = flip1(x, y) + ans = (ans + comb(x, y)) % MOD + + # ---------- output ---------- + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + MOD = self.parameter["MOD"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/king_sorting/__init__.py b/server/Gym/environments/king_sorting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0e5795d2c0a36a80563f8652ddbb36329d229850 --- /dev/null +++ b/server/Gym/environments/king_sorting/__init__.py @@ -0,0 +1 @@ +from .environment import KingSorting_Environment diff --git a/server/Gym/environments/king_sorting/environment.py b/server/Gym/environments/king_sorting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..4ab037d46437979e810d9cb415ecc4887fed3798 --- /dev/null +++ b/server/Gym/environments/king_sorting/environment.py @@ -0,0 +1,116 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class KingSorting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1080 + prompt_template = \ +r"""You are given `{N} + 1 = {N_plus_1}` pairs of integers: `(A[0], B[0])`, `(a[1], b[1])`, `(a[2], b[2])`, ..., `(a[{N}], b[{N}])` +{values} + +Your task is to **rearrange the {N} pairs** `(a[i], b[i])` for `1 ≤ i ≤ {N}` in some order (there are `{N}!` possible permutations). After rearrangement, define the new sequence of `{N_plus_1}` pairs as: `(A[0], B[0])`, `(A[1], B[1])`, ..., `(A[{N}], B[{N}])`, where `(A[i], B[i])` comes from the chosen permutation for `i ≥ 1`. + +Your goal is to **minimize** the following value: `max ( A[0] * A[1] * ... * A[i - 1] // B[i] | 1 ≤ i ≤ {N} )` (Note: `//` means **integer division**, i.e., rounded down just like in Python). +That is, for each `i` from `1` to `{N}`, compute the product of all previous `A` values (`A[0]` to `A[i - 1]`) divided by `B[i]`, take the maximum of these, and find a permutation that minimizes this maximum. + +Output Format: +Your final answer should be a single line containing a permutation of integers from `1` to `{N}` (space-separated). +Example: `{REVERSE_INDICES}` (do **NOT** include the backticks or quotes); this means: `(A[1], B[1]) = (a[{N}], b[{N}])`, `(A[2], B[2]) = (a[{N_minus_1}], b[{N_minus_1}])`, ..., `(A[{N}], B[{N}]) = (a[1], b[1])` +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = +5.0, + **kwargs) : + """ + Initialize the KingSorting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "MAX_A_B" in self.parameter, "MAX_A_B is required in parameter" + MAX_A_B = self.parameter["MAX_A_B"] + assert MAX_A_B >= 1, "MAX_A_B should be greater than or equal to 1" + + self.parameter["array"] = [{"index" : index, "A" : random.randint(1, MAX_A_B), "B" : random.randint(1, MAX_A_B)} for index in range(0, N + 1)] + + array = self.parameter["array"].copy() + array[1 :] = sorted(array[1 :], key = lambda x : x["A"] * x["B"]) + Ans = 0 + Mult = array[0]["A"] + for i in range(1, N + 1) : + Ans = max(Ans, Mult // array[i]["B"]) + Mult *= array[i]["A"] + self.parameter["gold_answer"] = Ans + self.parameter["reference_answer"] = " ".join([str(item["index"]) for item in array[1 :]]) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + array = self.parameter["array"] + return self.prompt_template.format( + N = N, + N_plus_1 = N + 1, + N_minus_1 = N - 1, + values = "\n".join(["(A[0], B[0]) = ({}, {})".format(array[0]["A"], array[0]["B"])] + ["(a[{}], b[{}]) = ({}, {})".format(i, i, array[i]["A"], array[i]["B"]) for i in range(1, N + 1)]), + REVERSE_INDICES = " ".join([str(i) for i in range(N, 1 - 1, -1)]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if len(set(processed_result)) != self.parameter["N"] : + return self.rewards["invalid_solution"] + for i in processed_result : + if not (1 <= i <= self.parameter["N"]) : + return self.rewards["invalid_solution"] + + array = self.parameter["array"].copy() + array[1 :] = [array[i] for i in processed_result] + answer = 0 + Mult = array[0]["A"] + for i in range(1, self.parameter["N"] + 1) : + assert array[i]["index"] == processed_result[i - 1] + answer = max(answer, Mult // array[i]["B"]) + Mult *= array[i]["A"] + + assert self.parameter["gold_answer"] <= answer, "answer should be greater than or equal to gold" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert self.parameter["gold_answer"] == 0, "gold_answer should be 0 if answer is 0" + return self.rewards["rewarding_weight"] + return self.rewards["rewarding_weight"] * ((self.parameter["gold_answer"] / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == self.parameter["gold_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/klo_blocks/__init__.py b/server/Gym/environments/klo_blocks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e2d08b70ac029471c1901e44037a154556522bdf --- /dev/null +++ b/server/Gym/environments/klo_blocks/__init__.py @@ -0,0 +1 @@ +from .environment import KloBlocks_Environment diff --git a/server/Gym/environments/klo_blocks/environment.py b/server/Gym/environments/klo_blocks/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..356b3365a126b418abd4f3f48dc17a0f7a66cf0a --- /dev/null +++ b/server/Gym/environments/klo_blocks/environment.py @@ -0,0 +1,91 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class KloBlocks_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3503 + prompt_template = \ +r"""You have an array A of {N} integers, initially it is: {A} +You can perform any number of actions. One action is to pick one item that is **greater than** {K}, subtract 1 from it, and add 1 to an **adjacent** item (either to the left or right, if such an item exists). +Please maximize the length of the longest contiguous subarray where each item is **greater than or equal to** {K}; output its length.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the KloBlocks_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "correct_answer": correct_answer, + "wrong_answer": wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + while True : + A = self.parameter["A"] = [random.randint(1, 2 * N) for _ in range(N)] + min_A, max_A = min(A), max(A) + if not (min_A + 1 <= max_A - 1) : + continue + K = self.parameter["K"] = random.randint(min_A + 1, max_A - 1) + + + # b[0] = 0, b[i] = prefix sum of (A[j] - K) up to j = i + b = [0] * (N + 1) + stack = [] # will store indices with strictly decreasing b-values + ans = 0 + + # Forward pass: build b[], track any prefix >= 0 and build monotonic stack + for i in range(1, N + 1): + b[i] = b[i-1] + A[i-1] - K + if b[i] >= 0: + # we can take the whole prefix 1..i + ans = i + # maintain stack of indices where b is strictly decreasing + if not stack or b[i] < b[stack[-1]]: + stack.append(i) + + # Backward pass: match later indices i with earlier minima in stack + for i in range(N, 0, -1): + # while we can form a non-negative sum from stack[-1]+1 .. i + while stack and b[i] - b[stack[-1]] >= 0: + ans = max(ans, i - stack[-1]) + stack.pop() + + if ans != 1 and ans != N : + self.parameter["reference_answer"] = ans + break + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], A = " ".join(map(str, self.parameter["A"])), K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/knapsack/__init__.py b/server/Gym/environments/knapsack/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..027016caf109ee84e737d018a3e95a891b9e587f --- /dev/null +++ b/server/Gym/environments/knapsack/__init__.py @@ -0,0 +1 @@ +from .environment import Knapsack_Environment diff --git a/server/Gym/environments/knapsack/environment.py b/server/Gym/environments/knapsack/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..eee053f8c091d61f631bd502ac68f315cf253a69 --- /dev/null +++ b/server/Gym/environments/knapsack/environment.py @@ -0,0 +1,100 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Knapsack_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given {N} items labeled from `0` to `{N_minus_1}`. Each item has a **weight** W[i] and a **value** V[i]: +{W_and_V} + +Please select a subset of **distinct items** i_1, i_2, ..., i_k such that: +- The total weight W[i_1] + W[i_2] + ... + W[i_k] is **less than or equal to** {W_max}, and +- Try your best to maximize the total value V[i_1] + V[i_2] + ... + V[i_k]. + +**Output Format:** Your final answer should be a single line containing the indices of the selected items, separated by spaces. +Example: `0 {N_minus_1}` (do **NOT** include quotes or backticks); this means you selected items `0` and `{N_minus_1}`.""" + + def __init__(self, + value_range_multiple : int = 2, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Knapsack_Environment instance. + """ + super().__init__(**kwargs) + self.value_range_multiple = value_range_multiple + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + W = self.parameter["W"] = [random.randint(1, N) for Wi in range(N)] + V = self.parameter["V"] = [random.randint(1, Wi * self.value_range_multiple) for Wi in W] + W_max = self.parameter["W_max"] = random.randint(min(W), sum(W)) + + + F = [0] * (W_max + 1) + Sum_W = 0 + for Wi, Vi in zip(W, V) : + Sum_W += Wi + for w in range(W_max, Wi - 1, -1) : + F[w] = max(F[w], F[w - Wi] + Vi) + self.parameter["gold_answer"] = F[W_max] + assert F[W_max] > 0 + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + W_and_V = "\n".join("W[{}]={} V[{}]={}".format(i, self.parameter["W"][i], i, self.parameter["V"][i]) for i in range(N)), + W_max = self.parameter["W_max"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != len(set(processed_result)) : + return self.rewards["invalid_solution"] + if not all(0 <= i < self.parameter["N"] for i in processed_result) : + return self.rewards["invalid_solution"] + if sum(self.parameter["W"][i] for i in processed_result) > self.parameter["W_max"] : + return self.rewards["invalid_solution"] + + answer, gold = sum(self.parameter["V"][i] for i in processed_result), self.parameter["gold_answer"] + assert answer <= gold, "answer should be less than or equal to gold" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/knights_and_knaves/__init__.py b/server/Gym/environments/knights_and_knaves/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..244859d89c3c424ae0319f1abf694e952bbb8509 --- /dev/null +++ b/server/Gym/environments/knights_and_knaves/__init__.py @@ -0,0 +1 @@ +from .environment import KnightsAndKnaves_Environment diff --git a/server/Gym/environments/knights_and_knaves/environment.py b/server/Gym/environments/knights_and_knaves/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..07f719786bbc8492abfd8e1e44b374825fe1ec3e --- /dev/null +++ b/server/Gym/environments/knights_and_knaves/environment.py @@ -0,0 +1,1095 @@ +from typing import Dict, Optional +import numpy as np +from ...environment import VerifiableEnvironment +import re + +import copy +import enum +import itertools +import unittest + +import numpy as np + + + +"""Knight and Knave problems. + +Each person can have the following (recursive) statements: + - assertion: (telling-truth, i), (lying, i) + - negation: (not, statement) + - conjunction: (and, statement1, statement2), could support more than 2 + - disjunction: (or, statement1, statement2), could support more than 2 + - implication: (->, statement1, statement2) + - equivalence: (<=>, statement1, statement2) + +Please see the unit tests at the bottom on examples of how to use each API. + +Original link: https://github.com/AlphaPav/mem-kk-logic/blob/main/data_prep/lib_kk.py +""" + +#################################################################################### +# Problem Solving +#################################################################################### +def find_solution(statements): + """Find solutions given a list of statements.""" + n_people = len(statements) + single_statement = ('and',) + tuple(('<=>', ('telling-truth', i), statements[i]) + for i in range(len(statements))) + # brute force + solutions = [] + for assignments in itertools.product([True, False], repeat=n_people): + if test_satisfiability(single_statement, assignments): + solutions.append(assignments) + + return solutions + + +def test_satisfiability(statement, assignments): + """Dumb recursive testing.""" + if statement[0] == 'telling-truth': + return assignments[statement[1]] + if statement[0] == 'lying': + return not assignments[statement[1]] + if statement[0] == 'not': + return not test_satisfiability(statement[1], assignments) + if statement[0] == 'and': + return np.all([test_satisfiability(statement[i], assignments) + for i in range(1, len(statement))]) + if statement[0] == 'or': + return np.any([test_satisfiability(statement[i], assignments) + for i in range(1, len(statement))]) + if statement[0] == '->': + val1 = test_satisfiability(statement[1], assignments) + val2 = test_satisfiability(statement[2], assignments) + return (not val1) or val2 + if statement[0] == '<=>': + val1 = test_satisfiability(statement[1], assignments) + val2 = test_satisfiability(statement[2], assignments) + return (val1 and val2) or ((not val1) and (not val2)) + raise KeyError(f'Unknown statement: {statement}') + + +#################################################################################### +# Problem Sampling +#################################################################################### +class KKProblemSampler: + """Problem Sampler for Knight and Knave. + + Args: + rand_seed: seed for random number generators. + n_people: number of people for K&K problems. + depth_constraint: the max depth of each person's statement. The depth refer to the level of + recursion of operators such as 'and', 'or', etc. Increasing the depth would allow + increasing the difficulty. Though currently the automatic formatting of the problems + into nautral languages does not support depth more than 2. + width_constraint: the max width (number of branches in operators such as 'and', 'or') of each + person's statement. + """ + + def __init__(self, rand_seed: int, n_people: int, depth_constraint: int = 2, width_constraint: int = 2): + self.rng = np.random.default_rng(rand_seed) + self.rng_wrong = np.random.default_rng(rand_seed+1) + self.n_people = n_people + self.depth_constraint = depth_constraint + self.width_constraint = width_constraint + + def sample(self): + """Sample a single K&K problem.""" + statements = tuple(self._sample_statement(person_id, self.depth_constraint) + for person_id in range(self.n_people)) + return self._immutable_statements(statements) + + def sample_valid_problems(self, n_problems: int, max_retry: int = 1000, + skip_no_solution: bool = True, skip_multiple_solutions: bool = True): + """Sample valid (has 1 unique solution) problems. + + Args: + n_problems: how many problems to sample. + max_retry: max number of retries per problem before giving up. + skip_no_solution: skip problems without a valid solution. + skip_multiple_solutions: skip problems with more than one solutions. + + Returns + A list of problems, each a dict with keys 'statements' and 'solution'. + """ + problems = [] + unique_statements = set() + for i_problem in range(n_problems): + for _ in range(max_retry): + statements = self.sample() + if statements in unique_statements: + continue # duplicated problem, retry + solutions = find_solution(statements) + if len(solutions) == 0 and skip_no_solution: + continue # retry + if len(solutions) > 1 and skip_multiple_solutions: + continue # retry + sol = solutions[0] if len(solutions) > 0 else None + problems.append({'statements': statements, 'solution': sol, + 'all_solutions': solutions}) + unique_statements.add(statements) + break # continue to next problem + if i_problem + 1 < len(problems): + raise RuntimeError(f'Failed to generate a valid problem after {max_retry} retries.') + return problems + + def sample_flipped_solution(self, solution): + length_of_solution = len(solution) + # Randomly decide how many items to flip (at least one) + num_to_perturb = self.rng_wrong.integers(1, length_of_solution) + + # Randomly choose indices to perturb + indices_to_perturb = list(self.rng_wrong.choice(list(range(length_of_solution)), size=num_to_perturb, replace=False)) + + # Create a new solution with perturbed values + perturbed_solution = tuple( + not solution[i] if i in indices_to_perturb else solution[i] + for i in range(length_of_solution) + ) + return perturbed_solution + + + def sample_invalid_problems(self, n_problems: int, max_retry: int = 1000, + skip_no_solution: bool = True, skip_multiple_solutions: bool = True): + """Sample valid (has 1 unique solution) problems and then perturb the solution. + + Args: + n_problems: how many problems to sample. + max_retry: max number of retries per problem before giving up. + skip_no_solution: skip problems without a valid solution. + skip_multiple_solutions: skip problems with more than one solutions. + + Returns + A list of problems, each a dict with keys 'statements' and 'solution'. + """ + problems = [] + unique_statements = set() + for i_problem in range(n_problems): + for _ in range(max_retry): + statements = self.sample() + if statements in unique_statements: + continue # duplicated problem, retry + solutions = find_solution(statements) + if len(solutions) == 0 and skip_no_solution: + continue # retry + if len(solutions) > 1 and skip_multiple_solutions: + continue # retry + sol = solutions[0] if len(solutions) > 0 else None + ## perturbed + perturbed_sol=self.sample_flipped_solution(sol) + problems.append({'statements': statements, 'solution': perturbed_sol, + 'all_solutions': [perturbed_sol]}) + unique_statements.add(statements) + break # continue to next problem + if i_problem + 1 < len(problems): + raise RuntimeError(f'Failed to generate a valid problem after {max_retry} retries.') + return problems + + + def perturb_problems(self, problems, max_retry: int = 1000, perturb_type: str = 'statement', + num_perturb: int = 1): + """Perturb the problems (generated by this sampler). + + The perturbed problems will change in one place, and is guaranteed to have a different + solution. The 'leaf' perturbation type allows "small" perturbation, but it will have a + high chance of not able to generate valid perturbations when n_people is small (i.e. all + the single-step perturbations do not lead to a valid solution). One potential solution is + to enable `allow_failure` and filter out invalid ones (marked as None). + + Args: + problems: a list of problems generated by this sampler. + max_retry: max number of retries to generate an alternative and valid problem. + perturb_type: 'leaf' means perturbing only a random leaf node (i.e. not compond statements); + 'statement' means change the entire statement from a random person. + num_perturb: number of perturbations to generate. Note the actual returned perturbations + might be fewer than this number (or even an empty list), if max_retry is exhausted. + + Returns: + A list of perturbed problems. + """ + return [self._perturb_problem(p, max_retry=max_retry, perturb_type=perturb_type, num_perturb=num_perturb) + for p in problems] + + def _perturb_problem(self, problem, max_retry: int, perturb_type: str, num_perturb: int): + assert len(problem['statements']) == self.n_people # make sure parameters match + results_set = set() + results_list = [] + for _ in range(max_retry): + statements = self._copy_statements_as_mutable(problem['statements']) + if perturb_type == 'statement': + person = self.rng.integers(0, self.n_people) + statements[person] = self._sample_statement(person, depth_constraint=self.depth_constraint) + elif perturb_type == 'leaf': + person = self.rng.integers(0, self.n_people) + idx = person + container = statements + while not self._is_leaf_node(container[idx]): + container = container[idx] + idx = self.rng.integers(1, len(container)) + assert self._is_leaf_node(container[idx]) + # set depth_constraint to 1 to only sample new leaf node + container[idx] = self._sample_statement(person, depth_constraint=1) + + statements = self._immutable_statements(statements) + if len(set([statements, problem['statements']])) <= 1: + continue # perturbation is identical to the original, retry + + solutions = find_solution(statements) + if len(solutions) != 1: + continue # Not single unique solution, retry + + if len(set([solutions[0], problem['solution']])) <= 1: + continue # solution does not change after perturbation, retry + + if statements in results_set: + continue # duplicate perturbation, retry + + results_set.add(statements) + results_list.append({'statements': statements, 'solution': solutions[0]}) + if len(results_list) >= num_perturb: + break + + if len(results_list)==0: + return [None] + + return results_list + + def _copy_statements_as_mutable(self, statements): + """Make a deep copy of the statements of a problem, turning the tuples into (mutable) lists.""" + statements = copy.deepcopy(statements) + def _make_mutable(x): + if isinstance(x, tuple): + return [_make_mutable(child) for child in x] + return x + return [_make_mutable(s) for s in statements] + + def _immutable_statements(self, mutable_statements): + """Change list back to tuples.""" + def _make_immutable(x): + if isinstance(x, (list, tuple)): + return tuple(_make_immutable(child) for child in x) + if isinstance(x, np.str_): + return str(x) + if isinstance(x, np.int64): + return int(x) + return x + return tuple(_make_immutable(s) for s in mutable_statements) + + def _is_leaf_node(self, statement): + if statement[0] in ['telling-truth', 'lying']: + return True + return False + + def _sample_statement(self, person_id: int, depth_constraint: int): + """Sample a single statement.""" + dice = self.rng.integers(0, 6) + if depth_constraint == 1 or dice == 0: + while True: + knight_or_knave = self.rng.choice(['telling-truth', 'lying']) + person = self.rng.integers(0, self.n_people) + if not (knight_or_knave == 'lying' and person == person_id): + # avoid the trivially unsatisfiable statement + return (knight_or_knave, person) + + if dice == 1: + return ('not', self._sample_statement(person_id, depth_constraint-1)) + if dice in [2, 3]: + operator = ['and', 'or'][dice-2] + n_substatements = self.rng.integers(2, self.width_constraint+1) + + return (operator,) + self._sample_substatements(person_id, depth_constraint, n_substatements) + if dice in [4, 5]: + operator = ['->', '<=>'][dice-4] + return (operator,) + self._sample_substatements(person_id, depth_constraint, 2) + + def _sample_substatements(self, person_id: int, depth_constraint: int, count: int, dedup: bool = True): + """Sample substatements for an operator. + + Args: + person_id: the id of the person making the statements. + depth_constraint: the maximum depth of substatements. + count: number of substatements to generate. + dedup: if True, avoid duplicated substatements. + """ + sub_statements = [] + dedup_set = set() + while True: + stmt = self._sample_statement(person_id, depth_constraint-1) + if dedup: + if stmt in dedup_set: + continue + dedup_set.add(stmt) + + sub_statements.append(stmt) + if len(sub_statements) == count: + break + return tuple(sub_statements) + + +#################################################################################### +# Problem Formatting in natural language +#################################################################################### +COMMON_NAMES = ['Emma', 'Liam', 'Olivia', 'Noah', 'Ava', 'Ethan', 'Sophia', + 'Mason', 'Isabella', 'William', 'Mia', 'James', 'Charlotte', + 'Benjamin', 'Amelia', 'Lucas', 'Harper', 'Henry', 'Evelyn', + 'Alexander', 'Abigail', 'Michael', 'Emily', 'Daniel', 'Elizabeth', + 'Jacob', 'Sofia', 'Logan', 'Avery', 'Jackson', 'Ella', 'Sebastian', + 'Scarlett', 'Jack', 'Grace', 'Aiden', 'Chloe', 'Owen', 'Victoria', + 'Samuel', 'Riley', 'Matthew', 'Aria', 'Joseph', 'Lily', 'Luke', + 'Aurora', 'David', 'Zoey', 'Oliver', 'Penelope'] +UNCOMMON_NAMES = [ + 'Zephyr', 'Elowen', 'Caspian', 'Isolde', 'Osiris', 'Vesper', 'Thaddeus', 'Ondine', + 'Lysander', 'Xanthe', 'Oberon', 'Calliope', 'Leander', 'Eulalia', 'Florian', 'Forsythe', + 'Nephele', 'Peregrine', 'Ianthe', 'Lazarus', 'Elodie', 'Cillian', 'Ottoline', 'Evander', + 'Saffron', 'Caius', 'Zora', 'Cyprian', 'Amaryllis', 'Theron', 'Perdita', 'Ignatius', + 'Zephyrine', 'Balthazar', 'Melisande', 'Zinnia', 'Sylvester', 'Cosima', 'Leocadio', + 'Percival', 'Oceane', 'Evanthe', 'Zenobia', 'Eurydice', 'Quillan', 'Aeronwen', + 'Thorsten', 'Xiomara', 'Zephyrus', 'Ysolde' +] + +KNIGHT_KNAVE_PAIRS = [ + # NOTE: we simply add 's' to make plural, so be careful when choosing words + ['a pioneer', 'a laggard'], + ['a saint', 'a sinner'], + ['a hero', 'a villain'], + ['an angel', 'a devil'], + ['an altruist', 'an egoist'], + ['a sage', 'a fool'], +] +PREFIX = ('A very special island is inhabited only by {knight}s and {knave}s. ' + + '{Knight}s always tell the truth, and {knave}s always lie. ') +POSTFIX = 'So who is {a_knight} and who is {a_knave}?' +TEMPLATES = [ + '{name} said that {content}.', + '{name} told you that {content}.', + '{name} said, "{content}."', + '{name} stated, "{content}".', + 'According to {name}, "{content}".', + '''In {name}'s words: "{content}".''', + '{name} remarked, "{content}".', + '"{content}," {name} declared.', + '{name} was heard saying, "{content}".', + '{name} expressed that {content}.', + '"{content}" - {name}.', + 'As {name} put it, "{content}".', + '{name} asserted: "{content}".', + '"{content}," {name} mentioned.', + '{name} commented, "{content}".', + 'In a statement by {name}: "{content}".', + '{name} noted, "{content}".', + '"{content}," {name} claimed.', +] + + +class KKProblemFormatter: + + def __init__(self, rand_seed, problem): + self.rng = np.random.default_rng(rand_seed) + self.rng_perturb = np.random.default_rng(rand_seed+1) + self.problem = problem + + def format_problem(self, random_names=True, random_saying_template=True, + random_knight_knave_pairs=False, + flip_knight_knave_pair=False, uncommon_name=False, reorder_statement=False): + statements = copy.deepcopy(self.problem['statements']) + + n_people = len(statements) + names = COMMON_NAMES[:n_people] + if random_names: + if uncommon_name==False: + names = list(self.rng.choice(COMMON_NAMES, size=n_people, replace=False)) + else: + names = list(self.rng.choice(UNCOMMON_NAMES, size=n_people, replace=False)) + names = [str(x) for x in names] # convert np.str_ to str + + knight_knave = ['a knight', 'a knave'] + if random_knight_knave_pairs: + knight_knave = self.rng.choice(KNIGHT_KNAVE_PAIRS) + knight_knave = [str(x) for x in knight_knave] # convert np.str_ to str + + if flip_knight_knave_pair: + knight_knave = knight_knave[::-1] + + knight_knave = {'knight': knight_knave[0].split()[1], + 'knave': knight_knave[1].split()[1], + 'a_knight': knight_knave[0], 'a_knave': knight_knave[1]} + knight_knave['Knight'] = knight_knave['knight'].capitalize() + knight_knave['Knave'] = knight_knave['knave'].capitalize() + + text = PREFIX.format(**knight_knave) + text += f'You meet {n_people} inhabitants: ' + text += ', '.join(names[:-1]) + ', and ' + names[-1] + '.' + + text_statements=[] + for i, stmt in enumerate(statements): + tmpl = TEMPLATES[0] + if random_saying_template: + tmpl = self.rng.choice(TEMPLATES) + + content = format_statement(names, knight_knave, stmt) + text_statements.append(' ' + tmpl.format(name=names[i], content=content)) + # text += ' ' + tmpl.format(name=names[i], content=content) + + if reorder_statement: + original_order = list(range(n_people)) + # Copy the original list + shuffled_order = original_order.copy() + + # Shuffle until it's different from the original + while True: + self.rng_perturb.shuffle(shuffled_order) + if shuffled_order != original_order: + break + for i in shuffled_order: + text += text_statements[i] + else: + text += ''.join(text_statements) + + text += ' ' + POSTFIX.format(**knight_knave) + if self.problem['solution'] is None: + solution_text = 'No valid solution exists.' + else: + solution_stmts = [] + for name, indicator in zip(names, self.problem['solution']): + if indicator: + solution_stmts.append(name + ' is ' + knight_knave['a_knight']) + else: + solution_stmts.append(name + ' is ' + knight_knave['a_knave']) + solution_text = ', '.join(solution_stmts[:-1]) + ', and ' + solution_stmts[-1] + '.' + return {'quiz': text, 'names': names, 'knight_knave': knight_knave, + 'solution': self.problem['solution'], + 'solution_text': solution_text} + + +# TODO: currently we do not support formatting of problems with depth more than +# 2. We may need to use LLM or think more about what would be the best way +# to format complicated recursive statements. +def format_knight_knave(names, knight_knave, statement, negation=False): + assert statement[0] in ('telling-truth', 'lying') + text = names[statement[1]] + ' is ' + if negation: + text += 'not ' + text += {'telling-truth': knight_knave['a_knight'], + 'lying': knight_knave['a_knave']}[statement[0]] + return text + + +def format_statement(names, knight_knave, statement): + if statement[0] == 'not': + return format_knight_knave(names, knight_knave, statement[1], negation=True) + if statement[0] in ['and', 'or']: + text = (' ' + statement[0] + ' ').join( + format_knight_knave(names, knight_knave, sub_stmt) for sub_stmt in statement[1:]) + return text + if statement[0] == '->': + return ('If ' + format_knight_knave(names, knight_knave, statement[1]) + ' then ' + + format_knight_knave(names, knight_knave, statement[2])) + if statement[0] == '<=>': + return (format_knight_knave(names, knight_knave, statement[1]) + ' if and only if ' + + format_knight_knave(names, knight_knave, statement[2])) + return format_knight_knave(names, knight_knave, statement) + + +#################################################################################### +# Chain of Thoughts +#################################################################################### +def generate_chain_of_thoughts(statements, dynamic_person_order: bool = True): + """Generate reasoning steps that can solve the problem. + + Args: + statements: the statements of the K&K problem. + dynamic_person_order: if False, it will always go through the list of person in the original order. If True, + it will use a more "natural" order. For example, if person1 mention person5 and person4, then the engine will + check person5 and person4 next, instead of checking person2 next. + """ + n_people = len(statements) + tape = [] + assignments = [None] * n_people + options = {p: [False, True] for p in range(n_people)} + persons_to_consider = tuple(range(n_people)) + p_cursor = 0 + while True: + if p_cursor >= n_people: + tape.append(('success', {'assignments': tuple(assignments)})) + break + + if not options[persons_to_consider[p_cursor]]: + exhausted = [] + while p_cursor >= 0 and not options[persons_to_consider[p_cursor]]: + options[persons_to_consider[p_cursor]] = [False, True] + assignments[persons_to_consider[p_cursor]] = None + exhausted.append(persons_to_consider[p_cursor]) + p_cursor -= 1 + if p_cursor >= 0: + tape.append(('reconsider', {'person': persons_to_consider[p_cursor], 'exhausted': exhausted})) + else: + # we have exhausted all options + tape.append(('fail',)) + break + + person = persons_to_consider[p_cursor] + assignments[person] = options[person].pop() + result, stmt_id = can_be_falsified_v2(statements, assignments) + if result: + tape.append(('proposal', {'person': person, 'assignment': assignments[person], + 'outcome': 'ok'})) + # re-order the next people to consider based on who is mentioned in the current statement + mentioned_people = _find_mentioned_people(statements[person]) + p_cursor += 1 + persons_to_consider = persons_to_consider[:p_cursor] + _reorder_people_sequence( + persons_to_consider[p_cursor:], mentioned_people) + else: + tape.append(('proposal', {'person': person, 'assignment': assignments[person], + 'outcome': 'conflict', 'conflict_statement': (stmt_id, assignments[stmt_id])})) + return tape + + +def _find_mentioned_people(statement): + """Find the id of people mentioned in the statement.""" + if statement[0] in ['lying', 'telling-truth']: + return [statement[1]] + if statement[0] in ['not', 'and', 'or', '->', '<=>']: + return sum([_find_mentioned_people(s) for s in statement[1:]], []) + raise KeyError(f'Unknown statement: {statement}') + + +def _reorder_people_sequence(remaining_people, mentioned_people): + """Reorder the remaining people by brining the mentioned ones to the front.""" + # dedup and keep order + set_uniq_mention = set() + list_uniq_mention = [] + for p in mentioned_people: + if p not in set_uniq_mention: + set_uniq_mention.add(p) + list_uniq_mention.append(p) + + for p in reversed(mentioned_people): + if not p in remaining_people: + continue + idx = remaining_people.index(p) + remaining_people = (p,) + remaining_people[:idx] + remaining_people[idx+1:] + return remaining_people + + +def can_be_falsified_v2(statements, assignments): + """Test falsifiability of partial assignment (v2). + + This version enumerate all possible remaining assignments. This is less efficient than v1. But v1 has + the potential issue that it cannot easily detect self contradictory statement such as + `('<=>', ('lying', 4), ('telling-truth', 4))` when the person 4's assignment is undecided yet. + """ + n_people = len(statements) + remap = [i for i, x in enumerate(assignments) if x is None] + n_unassigned = len(remap) + + for p_idx in range(n_people): + if assignments[p_idx] is None: + continue + p_statement = statements[p_idx] + if not assignments[p_idx]: + p_statement = ('not', p_statement) + has_solution = False + + for proposal in itertools.product([True, False], repeat=n_unassigned): + new_assignments = copy.copy(assignments) + for i, x in zip(remap, proposal): + new_assignments[i] = x + if test_satisfiability(p_statement, new_assignments): + has_solution = True + break + if not has_solution: + return (False, p_idx) # this person's statement cannot be satisfied + + return (True, None) + + +class TruthOrWhatever(enum.Enum): + FALSE = 0 + TRUE = 1 + WHATEVER = 2 + + @classmethod + def from_bool(cls, val: bool): + if val: + return cls.TRUE + else: + return cls.FALSE + + def f_not(self): + if self == self.TRUE: + return self.FALSE + if self == self.FALSE: + return self.TRUE + return self.WHATEVER + + def f_and(self, other): + if self == self.WHATEVER or other == self.WHATEVER: + return self.WHATEVER + if self == self.TRUE: + return self.from_bool(other == self.TRUE) + return self.FALSE + + def f_or(self, other): + if self == self.WHATEVER or other == self.WHATEVER: + return self.WHATEVER + if self == self.FALSE: + return self.from_bool(other == self.TRUE) + return self.TRUE + + +def can_be_falsified(statements, assignments): + """Test if the (partial) assignment can be falsified.""" + def _test(stmt) -> TruthOrWhatever: + if stmt[0] in ['telling-truth', 'lying'] and assignments[stmt[1]] is None: + return TruthOrWhatever.WHATEVER + if stmt[0] == 'telling-truth': + return TruthOrWhatever.from_bool(assignments[stmt[1]] is True) + if stmt[0] == 'lying': + return TruthOrWhatever.from_bool(assignments[stmt[1]] is False) + if stmt[0] == 'not': + return _test(stmt[1]).f_not() + if stmt[0] == 'and': + val = _test(stmt[1]) + for sub_stmt in stmt[2:]: + val = val.f_and(_test(sub_stmt)) + return val + if stmt[0] == 'or': + val = _test(stmt[1]) + for sub_stmt in stmt[2:]: + val = val.f_or(_test(sub_stmt)) + return val + if stmt[0] == '->': + val1 = _test(stmt[1]) + val2 = _test(stmt[2]) + return val1.f_not().f_or(val2) + if stmt[0] == '<=>': + val1 = _test(stmt[1]) + val2 = _test(stmt[2]) + return val1.f_and(val2).f_or(val1.f_not().f_and(val2.f_not())) + raise KeyError(f'Unknown statement: {stmt}') + + for i, (stmt, assmt) in enumerate(zip(statements, assignments)): + if assmt is None: + # this person's claim does not matter + continue + if assmt and _test(stmt) == TruthOrWhatever.FALSE: + return (False, i) + if not assmt and _test(stmt) == TruthOrWhatever.TRUE: + return (False, i) + return (True, None) + + +def format_chain_of_thoughts(problem, formatted_problem, tape, + repeat_claim_for_assumption: bool = True, + repeat_claim_for_contradiction: bool = False): + """Format generate chain-of-thoughts in natural language. + + Repeating the claim makes it a bit more natural, but also increas the number of tokens needed to handle. + + Args: + problem: the K&K problem. + formatted_problem: the formatted results of the K&K problem. + tape: the generated chain of thoughts. + repeat_claim_for_assumption: whether to repeat each person's claim after we assuming they are a knight or knave. + repeat_claim_for_contradiction: whether to repeat the contradicted claim when a contradiction is found. + + Returns: + (header, [step1, step2, ...], footer). The footer contains a conclusion of success or failure. Note the final + solution is not included in the footer. If needed, problem['solution_text'] can be appended here. + """ + format_dict = copy.copy(formatted_problem['knight_knave']) + n_person = len(problem['statements']) + for p in range(n_person): + format_dict[f'P{p}'] = formatted_problem['names'][p] + + header = "Let's think step by step, by considering whether each person is lying and if that leads to contradiction." + steps = [] + for step in tape[:-1]: # last step is fail / success + if step[0] == 'proposal': + t_person = '{P' + str(step[1]['person']) + '}' + t_assignment = '{a_knight}' if step[1]['assignment'] else '{a_knave}' + if step[1]['outcome'] == 'ok': + text = 'Assume ' + t_person + ' is ' + t_assignment + '.' + if repeat_claim_for_assumption: + t_claim = format_statement(formatted_problem['names'], formatted_problem['knight_knave'], + problem['statements'][step[1]['person']]) + text += ' No contradiction is found in their ' + if not step[1]['assignment']: + text += 'false ' + text += 'claim that ' + t_claim + '.' + elif step[1]['outcome'] == 'conflict': + conflict_p, conflict_assignment = step[1]['conflict_statement'] + text = t_person + ' cannot be ' + t_assignment + ', because this would contradict the ' + if not conflict_assignment: + text += 'false ' + text += 'claim of ' + if conflict_p == step[1]['person']: + text += 'their own' + else: + text += '{P' + str(conflict_p) + '}' + if repeat_claim_for_contradiction: + t_claim = format_statement(formatted_problem['names'], formatted_problem['knight_knave'], + problem['statements'][conflict_p]) + text += ' that ' + t_claim + '.' + else: + text += '.' + else: + raise KeyError(f'Unknown outcome for CoT step: {step}') + steps.append(text) + elif step[0] == 'reconsider': + text = 'We have exhausted all possibilities for ' + t_exhausted = ['{P' + str(p_idx) + '}' for p_idx in step[1]['exhausted']] + assert len(t_exhausted) > 0 + if len(t_exhausted) == 1: + text += t_exhausted[0] + elif len(t_exhausted) == 2: + text += ' and '.join(t_exhausted) + else: + t_exhausted[-1] = 'and ' + t_exhausted[-1] + text += ', '.join(t_exhausted) + text += ', so let us go back and reconsider {P' + str(step[1]['person']) + '}.' + steps.append(text) + else: + raise KeyError(f'Unknown CoT step: {step}') + + if tape[-1][0] == 'success': + footer = 'This leads to a feasible solution.' + elif tape[-1][0] == 'fail': + footer = 'All the configurations lead to contradictions.' + else: + raise KeyError(f'Expect success or fail, but get {tape[-1]}') + + steps = [x.format(**format_dict) for x in steps] + return (header, steps, footer) + + +#################################################################################### +# Unit Testing +#################################################################################### +class TestKK(unittest.TestCase): + + def test_find_solution(self): + statements = ( + ('lying', 1), + ('and', ('telling-truth', 0), ('telling-truth', 1)) + ) + sol = find_solution(statements) + self.assertEqual(sol, [(True, False)]) + + def test_sample_problems(self): + n_people = 3 + n_problems = 5 + problem_sampler = KKProblemSampler(1234, n_people=n_people) + problems = problem_sampler.sample_valid_problems(n_problems) + self.assertEqual(len(problems), n_problems) + for problem in problems: + self.assertEqual(set(problem.keys()), set(['statements', 'solution', 'all_solutions'])) + self.assertEqual(len(problem['statements']), n_people) + + def test_format_problems(self): + problem_sampler = KKProblemSampler(1234, n_people=3) + problems = problem_sampler.sample_valid_problems(20, skip_no_solution=False) + + for problem in problems: + formatter = KKProblemFormatter(rand_seed=1234, problem=problem) + formatted_results = formatter.format_problem() + self.assertIn('quiz', formatted_results) + self.assertIn('names', formatted_results) + self.assertIn('solution', formatted_results) + self.assertIn('solution_text', formatted_results) + if problem['solution'] is None: + self.assertEqual(formatted_results['solution_text'], 'No valid solution exists.') + + def test_perturb_problems(self): + n_people = 4 + n_perturb = 3 + problem_sampler = KKProblemSampler(1234, n_people=n_people) + problems = problem_sampler.sample_valid_problems(5) + for perturb_type in ['statement', 'leaf']: + perturbed_problems = problem_sampler.perturb_problems(problems, perturb_type=perturb_type, num_perturb=n_perturb) + self.assertEqual(len(problems), len(perturbed_problems)) + for p1, p2_list in zip(problems, perturbed_problems): + self.assertEqual(len(p2_list), n_perturb) # note this can actual fail, esp for small n_people + self.assertNotEqual(p1['solution'], p2_list[0]['solution']) + n_stmt_diff = 0 + for s1, s2 in zip(p1['statements'], p2_list[0]['statements']): + if s1 != s2: + n_stmt_diff += 1 + self.assertEqual(n_stmt_diff, 1) # exactly 1 statement is different + + def test_chain_of_thoughts(self): + n_people = 5 + n_problems = 120 + problem_sampler = KKProblemSampler(1234, n_people=n_people) + problems = problem_sampler.sample_valid_problems(n_problems, skip_no_solution=False) + for p in problems: + for dynamic_person_order in [False, True]: + tape = generate_chain_of_thoughts(p['statements'], dynamic_person_order=dynamic_person_order) + if p['solution'] is None: + self.assertTupleEqual(tape[-1], ('fail',)) + else: + self.assertEqual(tape[-1][0], ('success')) + self.assertTupleEqual(tape[-1][1]['assignments'], p['solution']) + + def test_chain_of_thoughts_regression(self): + # Regression test: NOTE the correct answer is not unique and it can change when the CoT generator code + # is changed. So the failure of this test does not necessarily mean the code is incorrect. If the code + # is changed and verified to be correct, this test can be updated with the new target outputs. + statements = (('and', ('telling-truth', 2), ('lying', 3)), + ('telling-truth', 2), + ('<=>', ('lying', 4), ('telling-truth', 4)), + ('and', ('lying', 2), ('lying', 4)), + ('lying', 2)) + expected_tape = [ + ('proposal', {'person': 0, 'assignment': True, 'outcome': 'ok'}), + ('proposal', + {'person': 2, + 'assignment': True, + 'outcome': 'conflict', + 'conflict_statement': (2, True)}), + ('proposal', + {'person': 2, + 'assignment': False, + 'outcome': 'conflict', + 'conflict_statement': (0, True)}), + ('reconsider', {'person': 0, 'exhausted': [2]}), + ('proposal', {'person': 0, 'assignment': False, 'outcome': 'ok'}), + ('proposal', + {'person': 2, + 'assignment': True, + 'outcome': 'conflict', + 'conflict_statement': (2, True)}), + ('proposal', {'person': 2, 'assignment': False, 'outcome': 'ok'}), + ('proposal', {'person': 4, 'assignment': True, 'outcome': 'ok'}), + ('proposal', + {'person': 3, + 'assignment': True, + 'outcome': 'conflict', + 'conflict_statement': (3, True)}), + ('proposal', {'person': 3, 'assignment': False, 'outcome': 'ok'}), + ('proposal', + {'person': 1, + 'assignment': True, + 'outcome': 'conflict', + 'conflict_statement': (1, True)}), + ('proposal', {'person': 1, 'assignment': False, 'outcome': 'ok'}), + ('success', {'assignments': (False, False, False, False, True)}) + ] + tape = generate_chain_of_thoughts(statements, dynamic_person_order=True) + self.assertEqual(tape, expected_tape) + +def test_chain_of_thoughts_format_regression(self): + # Regression test: NOTE the correct answer is not unique and it can change when the CoT generator code + # is changed. So the failure of this test does not necessarily mean the code is incorrect. If the code + # is changed and verified to be correct, this test can be updated with the new target outputs. + problem = { + 'statements': (('and', ('telling-truth', 2), ('lying', 3)), + ('telling-truth', 2), + ('<=>', ('lying', 4), ('telling-truth', 4)), + ('and', ('lying', 2), ('lying', 4)), + ('lying', 2)), + 'solution': (False, False, False, False, True), + 'all_slutions': [(False, False, False, False, True)] + } + chain_of_thoughts = generate_chain_of_thoughts(problem['statements']) + formatted_problem = {'knight_knave': {'Knave': 'Knave', + 'Knight': 'Knight', + 'a_knave': 'a knave', + 'a_knight': 'a knight', + 'knave': 'knave', + 'knight': 'knight'}, + 'names': ['David', 'Zoey', 'Alexander', 'Aurora', 'Isabella'], + 'quiz': 'A very special island is inhabited only by knights and knaves. ' + 'Knights always tell the truth, and knaves always lie. You meet 5 ' + 'inhabitants: David, Zoey, Alexander, Aurora, and Isabella. In ' + 'David\'s words: "Alexander is a knight and Aurora is a knave". Zoey ' + 'expressed that Alexander is a knight. Alexander said, "Isabella is a ' + 'knave if and only if Isabella is a knight." Aurora commented, ' + '"Alexander is a knave and Isabella is a knave". According to ' + 'Isabella, "Alexander is a knave". So who is a knight and who is a ' + 'knave?', + 'solution': (False, False, False, False, True), + 'solution_text': 'David is a knave, Zoey is a knave, Alexander is a knave, ' + 'Aurora is a knave, and Isabella is a knight.'} + cot_format = format_chain_of_thoughts(problem, formatted_problem, chain_of_thoughts, + repeat_claim_for_assumption=True, + repeat_claim_for_contradiction=True) + expected_cot = ('Let us think step by step, by considering whether each person is lying and if that leads to contradiction.', + ['Assume David is a knight. No contradiction is found in their claim that Alexander is a knight and Aurora is a knave.', + 'Alexander cannot be a knight, because this would contradict the claim of their own.', + 'Alexander cannot be a knave, because this would contradict the claim of David.', + 'We have exhausted all possibilities for Alexander, so let us go back and reconsider David.', + 'Assume David is a knave. No contradiction is found in their false claim that Alexander is a knight and Aurora is a knave.', + 'Alexander cannot be a knight, because this would contradict the claim of their own.', + 'Assume Alexander is a knave. No contradiction is found in their false claim that Isabella is a knave if and only if Isabella is a knight.', + 'Assume Isabella is a knight. No contradiction is found in their claim that Alexander is a knave.', + 'Aurora cannot be a knight, because this would contradict the claim of their own.', + 'Assume Aurora is a knave. No contradiction is found in their false claim that Alexander is a knave and Isabella is a knave.', + 'Zoey cannot be a knight, because this would contradict the claim of their own.', + 'Assume Zoey is a knave. No contradiction is found in their false claim that Alexander is a knight.'], + 'This leads to a feasible solution.') + self.assertEqual(cot_format, expected_cot) + + cot_format = format_chain_of_thoughts(problem, formatted_problem, chain_of_thoughts, + repeat_claim_for_assumption=False, + repeat_claim_for_contradiction=False) + expected_cot = ('Let us think step by step, by considering whether each person is lying and if that leads to contradiction.', + ['Assume David is a knight.', + 'Alexander cannot be a knight, because this would contradict the claim of their own.', + 'Alexander cannot be a knave, because this would contradict the claim of David.', + 'We have exhausted all possibilities for Alexander, so let us go back and reconsider David.', + 'Assume David is a knave.', + 'Alexander cannot be a knight, because this would contradict the claim of their own.', + 'Assume Alexander is a knave.', + 'Assume Isabella is a knight.', + 'Aurora cannot be a knight, because this would contradict the claim of their own.', + 'Assume Aurora is a knave.', + 'Zoey cannot be a knight, because this would contradict the claim of their own.', + 'Assume Zoey is a knave.'], + 'This leads to a feasible solution.') + self.assertEqual(cot_format, expected_cot) + + + +class KnightsAndKnaves_Environment(VerifiableEnvironment): + prompt_template = \ +r"""{statements} + +So who is a knight and who is a knave? + +Output Format: You must infer the identity of each character. Your final answer must clearly state the identity of each character by following the format example (do **NOT** include the backticks or quotes): `Ella is a knight, Jacob is a knave, Benjamin is a knight, Lucas is a knave, and Samuel is a knight.`""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the KnightsAndKnaves_Environment instance. + """ + + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight": rewarding_weight, + } + + def _generate(self) -> None: + """ + Keys in parameter: + N: number of people for K&K problems. + depth_constraint: the max depth of each person's statement. The depth refer to the level of + recursion of operators such as 'and', 'or', etc. Increasing the depth would allow + increasing the difficulty. Though currently the automatic formatting of the problems + into nautral languages does not support depth more than 2. + width_constraint: the max width (number of branches in operators such as 'and', 'or') of each + person's statement. + """ + + """Generate a new instance of the Knights and Knaves problem.""" + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "depth_constraint" in self.parameter, "depth_constraint is required in parameter" + depth_constraint = self.parameter["depth_constraint"] + assert depth_constraint >= 1, "depth_constraint should be greater than or equal to 1" + + assert "width_constraint" in self.parameter, "width_constraint is required in parameter" + width_constraint = self.parameter["width_constraint"] + assert width_constraint >= 1, "width_constraint should be greater than or equal to 1" + + # Generate problem using the KKProblemSampler + sampler = KKProblemSampler( + rand_seed=self.seed, + n_people=N, + depth_constraint=depth_constraint, + width_constraint=width_constraint + ) + + # Get a valid problem with unique solution + problems = sampler.sample_valid_problems( + n_problems=1, + max_retry=1000, + skip_no_solution=True, + skip_multiple_solutions=True + ) + + if not problems: + raise RuntimeError("Failed to generate a valid problem") + + problem = problems[0] + + # Format the problem into natural language + formatter = KKProblemFormatter(rand_seed=self.seed, problem=problem) + formatted = formatter.format_problem() + + # Store the problem data + self.parameter["names"] = formatted["names"] + self.parameter["quiz"] = formatted["quiz"] + self.parameter["reference_answer"] = formatted["solution_text"] + self.parameter["gold_answer"] = self._process(self.parameter["reference_answer"]) + + def _prompt_generate(self) -> str: + """Generate the prompt for the problem instance.""" + return self.prompt_template.format( + N = self.parameter["N"], + names = ", ".join(self.parameter["names"][:-1]) + ", and " + self.parameter["names"][-1], + statements = self.parameter["quiz"].split("So who is")[0].strip(), + ) + + def _process(self, answer: str) -> Optional[Dict[str, str]]: + """Parses model's answer text into status dictionary. + + Args: + answer: Text extracted from model's tags + Returns: + Dictionary mapping character names to predicted roles, or None if incomplete + """ + status_dict = {} + + if not isinstance(answer, str): + return None + + knight_count = answer.lower().count('knight') + knave_count = answer.lower().count('knave') + + if knight_count + knave_count != self.parameter["N"]: + return None + + for name in self.parameter["names"]: + pattern = re.compile(rf'{re.escape(name)}\b\s+is\s+a\s+\b(knight|knave)', re.IGNORECASE) + match = pattern.search(answer) + + if match: + role = match.group(1).lower() + status_dict[name] = role + # print(f" Found: {name} → {role}") + else: + # print(f" [Error] Missing identification for {name}") + return None + + return status_dict + + + def scorer(self, output: str) -> float: + """Score the model's output.""" + processed_result = self.processor(output) + + if processed_result is None: + return self.rewards["wrong_format"] + + # Convert knight/knave to True/False + model_solution = [] + true_solution = [] + for name in self.parameter["names"]: + model_solution.append(processed_result[name]) + true_solution.append(self.parameter["gold_answer"][name]) + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta": + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(model_solution, true_solution)) / len(model_solution)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer": + return self.rewards["rewarding_weight"] * (model_solution == true_solution) + else: + raise NotImplementedError(f"Unknown rewarding strategy: {self.rewards['rewarding_strategy']}") diff --git a/server/Gym/environments/kos_dicing/__init__.py b/server/Gym/environments/kos_dicing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0bfa8907508ccef1bc7ca9defb0a0b9b218bb9b6 --- /dev/null +++ b/server/Gym/environments/kos_dicing/__init__.py @@ -0,0 +1 @@ +from .environment import KosDicing_Environment diff --git a/server/Gym/environments/kos_dicing/environment.py b/server/Gym/environments/kos_dicing/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..bc3aff001e107c9491cee47da4420c57de4d6b0a --- /dev/null +++ b/server/Gym/environments/kos_dicing/environment.py @@ -0,0 +1,95 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class KosDicing_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3425 + prompt_template = \ +r"""There are {N} players (labeled from 0 to {N_minus_1}) participating in a game consisting of {M} rounds. Each round (a, b) involves two distinct players a and b, given as: +{rounds} + +In each round, exactly one of the two players wins. Please determine the outcome of all rounds such that the **maximum number of total wins by any player** is exactly {K} (basically, each player has a number of wins, and the maximum of these numbers is exactly {K}). + +**Output Format:** Output {M} integers, separated by spaces. The i-th integer represents the winner of the i-th round, either a or b (do NOT include backticks or quotes).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, correct_solution : float = +1.0, wrong_solution : float = 0.0, + **kwargs) : + """ + Initialize the KosDicing_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "correct_solution": correct_solution, + "wrong_solution": wrong_solution, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 1, "M should be greater than or equal to 1" + + rounds = self.parameter["rounds"] = [] + reference_answer = [] + winning_counts = [0] * N + for _ in range(M) : + a, b = random.sample(range(N), 2) + rounds.append((a, b)) + winner = random.choice((a, b)) + winning_counts[winner] += 1 + reference_answer.append(winner) + assert len(rounds) == M, "The number of rounds should be exactly M" + self.parameter["K"] = max(winning_counts) + self.parameter["reference_answer"] = " ".join(map(str, reference_answer)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + M = self.parameter["M"], + rounds = "\n".join("({}, {})".format(a, b) for a, b in self.parameter["rounds"]), + K = self.parameter["K"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["M"] : + return self.rewards["invalid_solution"] + + counting = [0] * self.parameter["N"] + for players, winner in zip(self.parameter["rounds"], processed_result) : + if winner not in players : + return self.rewards["invalid_solution"] + counting[winner] += 1 + if max(counting) != self.parameter["K"] : + return self.rewards["wrong_solution"] + else : + return self.rewards["correct_solution"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/kth_binary_tree/__init__.py b/server/Gym/environments/kth_binary_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c6e1f325edaf8eb842373f3d38fec74dd8195493 --- /dev/null +++ b/server/Gym/environments/kth_binary_tree/__init__.py @@ -0,0 +1 @@ +from .environment import Kth_BinaryTree_Environment diff --git a/server/Gym/environments/kth_binary_tree/environment.py b/server/Gym/environments/kth_binary_tree/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b5ab1b327580132f5df49aff7415bd84b0215b17 --- /dev/null +++ b/server/Gym/environments/kth_binary_tree/environment.py @@ -0,0 +1,119 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Kth_BinaryTree_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2274 + prompt_template = \ +r"""A binary tree is assigned a unique non-negative integer index based on the following rules: + +1. The empty tree has index 0; a single-node tree has index 1. +2. Among all binary trees, those with fewer nodes have smaller indices. +3. For two distinct binary trees A and B with the same number of nodes: + - If the left subtree of A has a smaller index than that of B, then A has a smaller index. + - If their left subtree indices are equal, then the tree with the smaller right subtree index has the smaller overall index. +4. Indices are continuous and unique: each non-negative integer maps to exactly one binary tree, and vice versa. + +Find the binary tree with index {N} and output its postorder traversal using the following format: +- A single-node tree is represented as `X`. +- For a tree with left subtree L and right subtree R (represented as L' and R' respectively), the postorder is `(L')X(R')`. +- If the left subtree is empty, omit its parentheses: `X(R')`. +- If the right subtree is empty, omit its parentheses: `(L')X`. + +**Output Format:** Your output should be a single line containing the postorder traversal. +Example: `((X)X(X))X` (do **NOT** include quotes or backticks; this is the binary tree with index 20).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the Kth_BinaryTree_Environment instance. + """ + + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 1, "MAX_N should be greater than or equal to 1" + + N = self.parameter["N"] = random.randint(1, MAX_N) + + + ordinal = N + 1 + + f = [1, 1] + g = [1, 2] + + i = 2 + while g[-1] < ordinal : + fi = 0 + for j in range(i) : + fi += f[j] * f[i - j - 1] + f.append(fi) + g.append(g[-1] + fi) + i += 1 + + def build(order, wrap) : + if order <= 1: + return "" + s = [] + if wrap : + s.append("(") + + size = next(idx for idx, gi in enumerate(g) if order <= gi) + rest = order - (g[size - 1] if size > 0 else 0) + + for left_nodes in range(size) : + right_nodes = size - 1 - left_nodes + block = f[left_nodes] * f[right_nodes] + if rest <= block : + left_rank = (rest - 1) // f[right_nodes] + 1 + right_rank = rest - (left_rank - 1) * f[right_nodes] + + left_ord = left_rank + (g[left_nodes - 1] if left_nodes > 0 else 0) + right_ord = right_rank + (g[right_nodes - 1] if right_nodes > 0 else 0) + + s.append(build(left_ord, True)) + s.append("X") + s.append(build(right_ord, True)) + break + rest -= block + + if wrap : + s.append(")") + return "".join(s) + + self.parameter["reference_answer"] = build(ordinal, False) + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + answer = answer.strip() + return answer + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not all(c in "X()" for c in processed_result) : + return self.rewards["invalid_solution"] + + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/kth_semi_balanced_bracket_sequence/__init__.py b/server/Gym/environments/kth_semi_balanced_bracket_sequence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4afb3e256ccc25b67bd17b1c73cd5a3c13935041 --- /dev/null +++ b/server/Gym/environments/kth_semi_balanced_bracket_sequence/__init__.py @@ -0,0 +1 @@ +from .environment import Kth_SemiBalancedBracketSequence_Environment diff --git a/server/Gym/environments/kth_semi_balanced_bracket_sequence/environment.py b/server/Gym/environments/kth_semi_balanced_bracket_sequence/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..a375d599007c651ddec1410fad74cb9e38434492 --- /dev/null +++ b/server/Gym/environments/kth_semi_balanced_bracket_sequence/environment.py @@ -0,0 +1,107 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Kth_SemiBalancedBracketSequence_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Consider strings that only contain the characters `(` and `)`: +- A string is called a **balanced bracket sequence** if, after inserting digits and operators, it can form a valid arithmetic expression. For example, `(())` is a balanced bracket sequence, while `)(()` is not. +- A string is called a **semi-balanced bracket sequence** if removing **exactly one bracket** from it can result in a balanced bracket sequence. + +We define the lexicographical order such that `(` comes **before** `)`. Please find the **{K}-th semi-balanced bracket sequence of length {N}**, when all such sequences are sorted in lexicographical order. + +**Output Format:** Your final answer should be a single line containing the semi-balanced bracket sequence.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the Kth_SemiBalancedBracketSequence_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + assert N % 2 == 1, "N should be odd" + + cbs = [[0] * (N + 2) for _ in range(N + 2)] + cbs[0][0] = 1 + for i in range(1, N + 1) : + cbs[i][0] = cbs[i - 1][1] + for j in range(1, N + 1) : + cbs[i][j] = cbs[i - 1][j - 1] + cbs[i - 1][j + 1] + + total = 0 + for i in range(0, N + 1, 2) : + total += 2 * cbs[i][0] * cbs[N - 1 - i][0] + + K = self.parameter["K"] = random.randint(1, total) + + + K -= 1 + + s = ["("] * N + b = [0] * (N + 2) + good = [[False] * (N + 2) for _ in range(N + 2)] + for i in range(1, N + 2) : + good[i][i - 1] = True + + for i in range(1, N + 1) : + b[i] = b[i - 1] + 1 + for j in range(1, i + 1) : + good[j][i] = good[j][i - 1] and (b[i] - b[j - 1] >= 0) + + cur = 0 + for j in range(1, i + 1) : + if good[1][j - 1] and b[j - 1] == 0 and good[j + 1][i] : + cur += cbs[N - i][b[i] - b[j]] + if good[1][i] : + for j in range(i + 1, N + 1) : + cur += 2 * cbs[j - i - 1][b[i]] * cbs[N - j][0] + + if cur <= K : + K -= cur + s[i - 1] = ")" + b[i] = b[i - 1] - 1 + for j in range(1, i + 1) : + good[j][i] = good[j][i - 1] and (b[i] - b[j - 1] >= 0) + + assert len(s) == N and all([c in "()" for c in s]), "The generated sequence is not valid" + self.parameter["reference_answer"] = "".join(s) + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + answer = answer.strip() + return answer + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (len(processed_result) == self.parameter["N"] and all(c in "()" for c in processed_result)) : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(float(a == b) for a, b in zip(self.parameter["reference_answer"], processed_result)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/kth_subsequence/__init__.py b/server/Gym/environments/kth_subsequence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..60151aa861bd4abc6736c1d93e7f41c5c1619d08 --- /dev/null +++ b/server/Gym/environments/kth_subsequence/__init__.py @@ -0,0 +1 @@ +from .environment import KthSubsequence_Environment diff --git a/server/Gym/environments/kth_subsequence/environment.py b/server/Gym/environments/kth_subsequence/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..046c6f5ce36b9587f340951249ee2f525b7727b6 --- /dev/null +++ b/server/Gym/environments/kth_subsequence/environment.py @@ -0,0 +1,101 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class KthSubsequence_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3728 + prompt_template = \ +r"""You are given a string S of length {N}: {S} +There are 2^{N} - 1 non-empty subsequences of S (a subsequence is a string obtained by deleting some characters of S without changing the order of the remaining characters; for example, "abc" is a subsequence of "aebdc"). Among all these subsequences, keep only the **unique** ones and sort them in **lexicographical order**. Number them starting from 1. Please find the {K}-th string in this sorted list. + +**Output Format:** A single string — the {K}-th unique subsequence of S in lexicographical order.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the KthSubsequence_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + a_probability = random.random() + S = self.parameter["S"] = "".join("a" if random.random() < a_probability else "b" for _ in range(N)) + assert len(S) == N, "Generated string S does not match the specified length N" + + Next = [[None] * 2 for i in range(N)] + F = [0] * N + for i in range(N - 1, -1, -1) : + Si = ord(S[i]) - ord('a') + F[i] = 1 + for c in range(2) : + Next[i][c] = Next[i + 1][c] if i + 1 < N else None + if c == Si : + Next[i][c] = i + if i + 1 < N : + for c in range(2) : + if Next[i + 1][c] is not None : + F[i] += F[Next[i + 1][c]] + K = 0 + for c in range(2) : + if Next[0][c] is not None : + K += F[Next[0][c]] + K = self.parameter["K"] = random.randint(1, K) + + + def compute(K : int) -> str : + result = "" + index = 0 + while True : + assert 0 <= index < N, "Index out of bounds" + found = False + for c in range(26) : + if Next[index][c] is not None : + if F[Next[index][c]] >= K : + result += chr(c + ord('a')) + if K == 1 : + return result + else : + index = Next[index][c] + 1 + K -= 1 + found = True + break + else : + K -= F[Next[index][c]] + assert found, "No valid character found, this should not happen" + self.parameter["reference_answer"] = compute(K) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], S = self.parameter["S"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if any(c not in "ab" for c in processed_result) : + return self.rewards["wrong_format"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/kur/__init__.py b/server/Gym/environments/kur/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4ec096bd96a13cecf23a2fb6e0e0d388d1f51c0c --- /dev/null +++ b/server/Gym/environments/kur/__init__.py @@ -0,0 +1 @@ +from .environment import KUR_Environment diff --git a/server/Gym/environments/kur/environment.py b/server/Gym/environments/kur/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..a00c03621948574959bb224d6e447dc70d3970e9 --- /dev/null +++ b/server/Gym/environments/kur/environment.py @@ -0,0 +1,139 @@ +import math +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class KUR_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3589 + prompt_template = \ +r"""You are given a binary string C of length {N}, defined as C[0], C[1], ..., C[{N_minus_1}]. +For each index `i` (0 ≤ i < {N}): +- C[i] = 0 if and only if ({A} × i + {B}) mod {N} < {P}. It is guaranteed that {A} and {N} are coprime. +- Otherwise, C[i] = 1. + +Please output how many times the following binary string appears (as a contiguous substring) in the string C: {T}""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the KUR_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 8, "MAX_N should be greater than or equal to 8" + + assert "MAX_M" in self.parameter, "MAX_M is required in parameter" + MAX_M = self.parameter["MAX_M"] + assert MAX_M >= 2, "MAX_M should be greater than or equal to 2" + + while True : + N = self.parameter["N"] = random.randint(8, MAX_N) + A, B, P = self.parameter["A"], self.parameter["B"], self.parameter["P"] = random.randint(2, N - 1), random.randint(0, N - 1), random.randint(1, N - 1) + if math.gcd(N, A) == 1 : + break + + def compute_answer(T : str) -> int : + M = len(T) + intervals = [] + for x, ch in enumerate(T): + ax = (A * x) % N + if ch == '0': + l = (P - ax - B) % N + r = (N - ax - B) % N + else: + l = (-ax - B) % N + r = (P - ax - B) % N + # now l, r are in [0, N-1] + if l <= r: + intervals.append((l, r - 1)) + else: + intervals.append((0, r - 1)) + intervals.append((l, N - 1)) + + # account for the tail positions + for i in range(N - M + 1, N): + intervals.append(( (A * i) % N, (A * i) % N )) + + intervals.sort() + ans = N + mx = -1 + + for l, r in intervals: + if l <= mx: + # overlapping or contiguous with previous + removed = max(0, r - mx) + ans -= removed + mx = max(mx, r) + else: + # disjoint interval + ans -= (r - l + 1) + mx = r + + return ans + + start_i = random.randint(0, N - 2) + T = "" + Answer2Ts = {} + for i in range(start_i, min(N, start_i + MAX_M)) : + T += "0" if (A * i + B) % N < P else "1" + answer = compute_answer(T) + assert answer >= 1, "Answer should be at least 1" + if answer not in Answer2Ts : + Answer2Ts[answer] = [] + Answer2Ts[answer].append(T) + + self.parameter["reference_answer"] = random.choice(list(Answer2Ts.keys())) + self.parameter["T"] = random.choice(Answer2Ts[self.parameter["reference_answer"]]) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + A = self.parameter["A"], + B = self.parameter["B"], + P = self.parameter["P"], + T = self.parameter["T"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/lamp_changing/__init__.py b/server/Gym/environments/lamp_changing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b9591ad8df0479b99e299675c17456ee79a535df --- /dev/null +++ b/server/Gym/environments/lamp_changing/__init__.py @@ -0,0 +1 @@ +from .environment import LampChanging_Environment diff --git a/server/Gym/environments/lamp_changing/environment.py b/server/Gym/environments/lamp_changing/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..6923529235dd39dde37a30e8f81420584a960439 --- /dev/null +++ b/server/Gym/environments/lamp_changing/environment.py @@ -0,0 +1,77 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class LampChanging_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3937 + prompt_template = \ +r"""There are {N} lamps arranged in a circle, labeled clockwise from 1 to {N}. At each next moment, the state of each lamp depends on its current state and the state of the next lamp in the clockwise direction: +- If the two lamps have the same state, then the lamp will be OFF in the next moment. +- If the two lamps have different states, then the lamp will be ON in the next moment. + +The initial moment is time 0, and the initial states of all lamps are: {situations} +What's the state of lamp {K} at time {T} (Output either ON or OFF)?""" + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the LampChanging_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_T" in self.parameter, "MAX_N_T is required in parameter" + MAX_N_T = self.parameter["MAX_N_T"] + assert MAX_N_T >= 3, "MAX_N_T should be greater than or equal to 3" + self.parameter["reference_answer"] = random.choice(["ON", "OFF"]) + + while True : + N = self.parameter["N"] = random.randint(3, MAX_N_T) + ON_probability = random.random() + B = self.parameter["B"] = [1 if random.random() < ON_probability else 0 for _ in range(N)] + T = self.parameter["T"] = random.randint(2, MAX_N_T) + K = self.parameter["K"] = random.randint(1, N) + + res = 0 + for i in range(T + 1): + if (T & i) == i: # C(T, i) % 2 == 1 <=> i is a submask of T + res ^= B[(i + K - 1) % N] # XOR is addition mod 2 + if self.parameter["reference_answer"] == ("OFF", "ON")[res] : + break + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + T = self.parameter["T"], + K = self.parameter["K"], + situations = "; ".join("Lamp {} is {}".format(i, "ON" if Bi else "OFF") for i, Bi in enumerate(self.parameter["B"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result not in ("ON", "OFF") : + return self.rewards["invalid_answer"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/land_acquisition/__init__.py b/server/Gym/environments/land_acquisition/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c9bae482670692d5c16aeda9cdf3e889be66530c --- /dev/null +++ b/server/Gym/environments/land_acquisition/__init__.py @@ -0,0 +1 @@ +from .environment import LandAcquisition_Environment diff --git a/server/Gym/environments/land_acquisition/environment.py b/server/Gym/environments/land_acquisition/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..2ea513c55d2b181754c79b8da5b9470eab043e55 --- /dev/null +++ b/server/Gym/environments/land_acquisition/environment.py @@ -0,0 +1,150 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class LandAcquisition_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2900 + prompt_template = \ +r"""There are {N} items, and the i-th item has two attributes W[i] and L[i]. The arrays W and L are given as follows: +{W_and_L} + +Partition all items into an arbitrary number of **disjoint non-empty sets**. For each set S, its cost is defined as: cost(S) = max(W[i] for i ∈ S) × max(L[i] for i ∈ S) +Can you make the total cost, which is the sum of costs of all sets, as small as possible? Output M lines, where M is the number of sets in your partition - each line should contain the indices of the items in one set (separated by spaces).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the LandAcquisition_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + while True : + W = self.parameter["W"] = [random.randint(1, N * N) for _ in range(N)] + L = self.parameter["L"] = [random.randint(1, N * N) for _ in range(N)] + + + Land = [None] * (N + 1) + for i, (w, l) in enumerate(zip(W, L), start = 1) : + Land[i] = (w, l) + + # Sort by width asc, then length asc + Land_sorted = sorted(Land[1:], key=lambda x: (x[0], x[1])) + + # Remove dominated rectangles: keep strictly decreasing lengths (stack) + stack = [] + for w, l in Land_sorted: + while stack and l > stack[-1][1]: + stack.pop() + stack.append((w, l)) + + cnt = len(stack) + + # 1-indexed 'needto' with a sentinel at the end so needto[i+1] is safe + needto = [None] + stack + [(0, 0)] + + # DP with Convex Hull Trick (no magic INF; we compute valid states directly) + dp = [None] * (cnt + 1) + dp[0] = 0 + + # Monotone queue of candidate j indices; q[0] = 0 as in the C++ global zero-init + q = [0] + head = 0 + + for i in range(1, cnt + 1): + # Move head forward while the next candidate is better + while head < len(q) - 1: + j0 = q[head] + j1 = q[head + 1] + lhs = dp[j0] - dp[j1] + rhs = -needto[i][0] * (needto[j0 + 1][1] - needto[j1 + 1][1]) + if lhs >= rhs: + head += 1 + else: + break + + j = q[head] + dp[i] = dp[j] + needto[i][0] * needto[j + 1][1] + + # Maintain convexity of the hull + while head < len(q) - 1: + j_last = q[-1] + j_prev = q[-2] + left = (dp[j_last] - dp[j_prev]) * (needto[i + 1][1] - needto[j_prev + 1][1]) + right = (dp[i] - dp[j_prev]) * (needto[j_last + 1][1] - needto[j_prev + 1][1]) + if left <= right: + q.pop() + else: + break + + q.append(i) + + self.parameter["gold_answer"] = dp[cnt] + assert self.parameter["gold_answer"] > 0 + + item_indices = list(range(N)) + item_indices.sort(key = lambda i : (W[i], L[i])) + naive_answer = min(max(W) * max(L), sum(Wi * Li for Wi, Li in zip(W, L))) + for i in range(N - 1) : + group_1 = max(W[j] for j in item_indices[: i + 1]) * max(L[j] for j in item_indices[: i + 1]) + group_2 = max(W[j] for j in item_indices[i + 1 :]) * max(L[j] for j in item_indices[i + 1:]) + naive_answer = min(naive_answer, group_1 + group_2) + assert self.parameter["gold_answer"] <= naive_answer + if self.parameter["gold_answer"] < naive_answer : + break + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + W_and_L = "\n".join("W[{}]={} L[{}]={}".format(i, Wi, i, Li) for i, (Wi, Li) in enumerate(zip(self.parameter["W"], self.parameter["L"]), start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[List[int]]] : + if answer is not None : + answer = answer.strip() + try : + groups = [] + for line in answer.splitlines() : + line = line.strip() + if line : + groups.append(list(map(int, line.split()))) + return groups + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if sum(len(group) for group in processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if set(item for group in processed_result for item in group) != set(range(1, self.parameter["N"] + 1)) : + return self.rewards["invalid_solution"] + + gold, answer = self.parameter["gold_answer"], sum(max(self.parameter["W"][i - 1] for i in group) * max(self.parameter["L"][i - 1] for i in group) for group in processed_result) + assert gold <= answer, f"Gold answer {gold} is greater than computed answer {answer}" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/landform_generation_counting/__init__.py b/server/Gym/environments/landform_generation_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ef60b5c11183114133610abb45e224f453688023 --- /dev/null +++ b/server/Gym/environments/landform_generation_counting/__init__.py @@ -0,0 +1 @@ +from .environment import LandformGenerationCounting_Environment diff --git a/server/Gym/environments/landform_generation_counting/environment.py b/server/Gym/environments/landform_generation_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..acad5ce06627bb6d0cdf32420fafee50bac30dd8 --- /dev/null +++ b/server/Gym/environments/landform_generation_counting/environment.py @@ -0,0 +1,117 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class LandformGenerationCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3255 + prompt_template = \ +r"""You are given two arrays `H` and `C`, each of length {N}: +H: {H} +C: {C} + +A permutation `p` of the indices `0` to `{N_minus_1}` (i.e., `p[0], p[1], ..., p[{N_minus_1}]`) is considered **valid** if and only if the following condition holds for every index `i` from `0` to `{N_minus_1}`: there are **fewer than** C[p[i]] indices `j` (j < i) such that H[p[j]] > H[p[i]]. +Please count the number of **distinct sequences** `H[p[0]], H[p[1]], ..., H[p[{N_minus_1}]]` that can be obtained by a valid permutation `p`. (Two permutations producing the same `H`-sequence count as one.) Output the result modulo {MOD}.""" + + def __init__(self, + max_MOD : int = 1000000000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the LandformGenerationCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + example_H = [random.randint(1, N) for _ in range(N)] + A = [None] * N + for i, Hi in enumerate(example_H) : + A[i] = (Hi, random.randint(sum(int(Hj > Hi) for Hj in example_H[: i]) + 1, sum(int(Hj > Hi) for Hj in example_H) + 1)) + random.shuffle(A) + self.parameter["A"] = A.copy() + + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + # ---------- pre-processing ---------- + # sort by height desc, key asc + A.sort(key=lambda x: (-x[0], x[1])) + + # ---------- 2. contour (height) sequences ---------- + ans_heights = 1 + start = 0 + while start < N: + end = start + h_cur = A[start][0] + while end + 1 < N and A[end + 1][0] == h_cur: # same-height block + end += 1 + + processed = start + 1 # 1-based + dp = [0] * (processed + 2) # dp[0 … processed] + + first_key = A[start][1] + for j in range(1, min(processed, first_key) + 1): + dp[j] = 1 + + for i in range(start + 1, end + 1): # remaining in block + key = A[i][1] + limit = min(processed, key) + for j in range(1, limit + 1): # prefix sums + dp[j] = (dp[j] + dp[j - 1]) % MOD + + last_key = A[end][1] + res = sum(dp[1:min(processed, last_key) + 1]) % MOD + ans_heights = (ans_heights * res) % MOD + + start = end + 1 # next block + + # ---------- output ---------- + self.parameter["reference_answer"] = ans_heights + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + H = " ".join("H[{}]={}".format(i, Ai[0]) for i, Ai in enumerate(self.parameter["A"])), + C = " ".join("C[{}]={}".format(i, Ai[1]) for i, Ai in enumerate(self.parameter["A"])), + MOD = self.parameter["MOD"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/largest_convex_polygon/__init__.py b/server/Gym/environments/largest_convex_polygon/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e6db83a4468a542e7c0e074ee8597bfa3cd8a4ba --- /dev/null +++ b/server/Gym/environments/largest_convex_polygon/__init__.py @@ -0,0 +1 @@ +from .environment import LargestConvexPolygon_Environment diff --git a/server/Gym/environments/largest_convex_polygon/environment.py b/server/Gym/environments/largest_convex_polygon/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..884d9520d224e57008d8470229adadcc34e6bac2 --- /dev/null +++ b/server/Gym/environments/largest_convex_polygon/environment.py @@ -0,0 +1,213 @@ +import random +from functools import cmp_to_key +from typing import Optional, List, Tuple +from ...environment import VerifiableEnvironment + + +class LargestConvexPolygon_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2924 + prompt_template = \ +r"""You are given {N} points in the 2D plane, labeled from 1 to {N}. No two points share the same coordinates, and no three points are collinear: +{points} + +Find a subset of distinct points that forms the vertices of a **convex polygon**, and maximize the number of points in this subset; please output the labels of the selected points in one line, separated by spaces (in any order); if multiple answers exist, output any one.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the LargestConvexPolygon_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None: + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + self.parameter["points"] = set() + lines = set() + for i in range(N): + while True: + x = random.randint(0, N) + y = random.randint(0, N) + if (x, y) in self.parameter["points"]: + continue + + coline = False + new_lines = set() + for (px, py) in self.parameter["points"]: + if px == x: + a, b, c = 1, 0, -x + else: + a, b = py - y, x - px + c = -(a * x + b * y) + + def gcd(a, b): + while b: + a, b = b, a % b + return a + + g = gcd(abs(a), gcd(abs(b), abs(c))) + a, b, c = a // g, b // g, c // g + + if a < 0: + a, b, c = -a, -b, -c + elif a == 0 and b < 0: + b, c = -b, -c + + if (a, b, c) in lines: + coline = True + break + + new_lines.add((a, b, c)) + + if coline: + continue + + self.parameter["points"].add((x, y)) + lines.update(new_lines) + break + + self.parameter["points"] = list(self.parameter["points"]) + + + P = self.parameter["points"] + + def octant(dx, dy): + if dx == 0 and dy > 0: # up + return 1 + elif dx > 0 and dy > 0: # NE + return 2 + elif dx > 0 and dy == 0: # right + return 3 + elif dx > 0 and dy < 0: # SE + return 4 + elif dx == 0 and dy < 0: # down + return 5 + elif dx < 0 and dy < 0: # SW + return 6 + elif dx < 0 and dy == 0: # left + return 7 + else: # dx < 0 and dy > 0 -> NW + return 8 + + # Build all directed edges with precomputed (dx, dy, oct) + edges = [] + for u in range(N): + xu, yu = P[u] + for v in range(N): + if u == v: + continue + xv, yv = P[v] + dx = xv - xu + dy = yv - yu + edges.append((u, v, dx, dy, octant(dx, dy))) + + def cmp_edges(e1, e2): + # sort by octant first (clockwise starting from up), + # then by slope via cross product (dy1*dx2 ? dy2*dx1) + if e1[4] != e2[4]: + return -1 if e1[4] < e2[4] else 1 + cross = e1[3] * e2[2] - e2[3] * e1[2] # dy1*dx2 - dy2*dx1 + if cross > 0: + return -1 + elif cross < 0: + return 1 + else: + return 0 + + edges.sort(key=cmp_to_key(cmp_edges)) + + # Only keep (u, v) for the DP loop + EV = [(u, v) for (u, v, _, _, _) in edges] + + ans = 0 + for i in range(N): + mx = [None] * N + mx[i] = 0 + for u, v in EV: + val = mx[u] + if val is not None: + cand = val + 1 + if mx[v] is None or cand > mx[v]: + mx[v] = cand + if mx[i] is not None and mx[i] > ans: + ans = mx[i] + assert ans >= 3, "The answer should be greater than or equal to 3" + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + points = "\n".join("Point {}: ({}, {})".format(i, x, y) for i, (x, y) in enumerate(self.parameter["points"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[int]] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not all(1 <= i <= self.parameter["N"] for i in processed_result) : + return self.rewards["invalid_solution"] + if len(processed_result) != len(set(processed_result)) : + return self.rewards["invalid_solution"] + + def cross(o: Tuple[int, int], a: Tuple[int, int], b: Tuple[int, int]) -> int: + return (a[0]-o[0])*(b[1]-o[1]) - (a[1]-o[1])*(b[0]-o[0]) + + def can_form_convex_polygon(points: List[Tuple[int, int]]) -> bool: + pts = sorted(set(points)) + n = len(pts) + if n < 3: + return False + + lower = [] + for p in pts: + while len(lower) >= 2 and cross(lower[-2], lower[-1], p) <= 0: + lower.pop() + lower.append(p) + + upper = [] + for p in reversed(pts): + while len(upper) >= 2 and cross(upper[-2], upper[-1], p) <= 0: + upper.pop() + upper.append(p) + + hull = lower[:-1] + upper[:-1] + return len(hull) == n + + if not can_form_convex_polygon([self.parameter["points"][i - 1] for i in processed_result]) : + return self.rewards["unsuccessful_solution"] + + answer, gold = len(processed_result), self.parameter["gold_answer"] + assert answer <= gold, "The answer should be less than or equal to the gold answer" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/largest_rectangle_among_points/__init__.py b/server/Gym/environments/largest_rectangle_among_points/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4a0a89e2f98f8fae0a65472e46361661bbcb2c30 --- /dev/null +++ b/server/Gym/environments/largest_rectangle_among_points/__init__.py @@ -0,0 +1 @@ +from .environment import LargestRectangle_AmongPoints_Environment diff --git a/server/Gym/environments/largest_rectangle_among_points/environment.py b/server/Gym/environments/largest_rectangle_among_points/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..39131839be48885d4b4fe16c525c07307fba46d3 --- /dev/null +++ b/server/Gym/environments/largest_rectangle_among_points/environment.py @@ -0,0 +1,166 @@ +import random +from typing import Optional, Tuple, List +from ...environment import VerifiableEnvironment + + +class LargestRectangle_AmongPoints_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3217 + prompt_template = \ +r"""You are given a set of {N} points in a 2D plane, each represented by its coordinates `(x, y)`: +{points} + +Your task is to find four **distinct** points such that they form a rectangle (NOT necessarily axis-aligned). Among all such rectangles, choose one with the **maximum possible area**. + +**Output Format:** Output one line containing the indices (0-based) of the four selected points, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the LargestRectangle_AmongPoints_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None: + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 5, "N should be greater than or equal to 5" + + points = self.parameter["points"] = [] + points.append((random.randint(-N // 2, +N // 2), random.randint(-N // 2, +N // 2))) + while True : + dx, dy = random.randint(-N // 2, +N // 2), random.randint(-N // 2, +N // 2) + if dx == 0 and dy == 0 : + continue + x, y = points[0] + points.append((x + dx, y + dy)) + points.append((x - dy, y + dx)) + points.append((x + dx - dy, y + dy + dx)) + break + for i in range(4, N) : + points.append((random.randint(-N, +N), random.randint(-N, +N))) + random.shuffle(points) + + + # Build list of all point‐pairs (diagonals), storing: + # (squared_length, sum_x, sum_y, idx1, idx2) + lines = [] + for i in range(N): + xi, yi = points[i] + for j in range(i + 1, N): + xj, yj = points[j] + dx = xi - xj + dy = yi - yj + s = dx * dx + dy * dy + # midpoint * 2 is (xi+xj, yi+yj) + sx = xi + xj + sy = yi + yj + lines.append((s, sx, sy, i, j)) + + # Sort by (length, midpoint_x, midpoint_y) + lines.sort(key=lambda t: (t[0], t[1], t[2])) + + ans = 0 + M = len(lines) + # Scan through sorted diagonals, grouping by equal (s, sx, sy) + i = 0 + while i < M: + s0, sx0, sy0, idx1, idx2 = lines[i] + j = i + 1 + # For each other diagonal with same length and midpoint... + while j < M and lines[j][0] == s0 and lines[j][1] == sx0 and lines[j][2] == sy0: + _, _, _, idx3, _ = lines[j] + # Compute the rectangle area via the cross‐product trick: + # area = |(C−A) × (B−A)|, with A=points[idx1], C=points[idx2], B=points[idx3] + x1, y1 = points[idx1] # A + x2, y2 = points[idx2] # C (opposite of A) + x3, y3 = points[idx3] # B (one endpoint of other diagonal) + # Determinant = x1*y2 + x2*y3 + x3*y1 - x2*y1 - x3*y2 - x1*y3 + tmp = abs(x1*y2 + x2*y3 + x3*y1 - x2*y1 - x3*y2 - x1*y3) + if tmp > ans: + ans = tmp + j += 1 + i += 1 + + assert ans > 0, "The maximum area should be greater than 0" + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + points = "\n".join("Point {}: ({}, {})".format(i, x, y) for i, (x, y) in enumerate(self.parameter["points"])), + ) + + + def _process(self, answer: Optional[str]) -> Optional[Tuple[int, int, int, int]] : + if answer is not None : + answer = answer.strip() + try : + indices = list(map(int, answer.split())) + if len(indices) != 4 : + return None # Invalid answer format + return indices[0], indices[1], indices[2], indices[3] + except ValueError : + return None + else : + return None + + + def scorer(self, output: str) -> float : + processed_result = self.processor(output) + if processed_result is not None: + assert isinstance(processed_result, tuple), "processed_result should be a tuple of indices" + + if not all(0 <= idx < self.parameter["N"] for idx in processed_result) : + return self.rewards["invalid_solution"] + + def rectangle_area(P: List[Tuple[int, int]]) -> Optional[int]: + A = P[0] + others = P[1:] + + d2 = [] + for X in others: + dx, dy = X[0] - A[0], X[1] - A[1] + d2.append((dx*dx + dy*dy, X, dx, dy)) + d2.sort(key=lambda t: t[0]) + + d1, B, dx1, dy1 = d2[0] + d2_val, D, dx2, dy2 = d2[1] + C = d2[2][1] + + # Critical fix: Check for zero-length sides (duplicate points) + if d1 == 0 or d2_val == 0: + return None + + if dx1*dx2 + dy1*dy2 != 0: # Perpendicular check + return None + + expected_C = (B[0] + D[0] - A[0], B[1] + D[1] - A[1]) + if expected_C != C: # Parallelogram property + return None + + area = abs(dx1*dy2 - dy1*dx2) + return area + + answer, gold = rectangle_area([self.parameter["points"][idx] for idx in processed_result]), self.parameter["gold_answer"] + if answer is None : + return self.rewards["invalid_solution"] + assert answer <= gold, "The answer area should be less than or equal to the gold area" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else: + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/las/__init__.py b/server/Gym/environments/las/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..223fee29744c0578fe47b8146af1324176ce30b2 --- /dev/null +++ b/server/Gym/environments/las/__init__.py @@ -0,0 +1 @@ +from .environment import LAS_Environment diff --git a/server/Gym/environments/las/environment.py b/server/Gym/environments/las/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..4e2f856a07b42ce6112ef3660938db0381ecc05c --- /dev/null +++ b/server/Gym/environments/las/environment.py @@ -0,0 +1,170 @@ +import random +from collections import deque +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class LAS_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3584 + prompt_template = \ +r"""There are {N} people labeled from 1 to {N}, and {N} foods also labeled from 1 to {N}. The i-th food has C[i] calories, and the array C is: {C} + +Each person chooses one food as follows: +- Person i (1 ≤ i < {N}) can choose either food i or food i+1. +- Person {N} can choose either food {N} or food 1. +- If a food is chosen by only one person, that person receives all of its calories. If a food is chosen by two people, they share the calories of that food **equally**. + +You are to find a valid food assignment (i.e., choose one food between the two choices for each person), such that for **every person**, if this person switches to the other food choice (while all other people keep their choices unchanged), this person does **NOT** receive more calories than this person currently does. +**Output Format:** Output a single line with {N} integers — the food chosen by person 1, 2, ..., {N}, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the LAS_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N must be at least 3" + + A = self.parameter["A"] = [random.randint(1, 2 * N) for _ in range(N)] + + + # B will hold the circular “Num” array (1‑indexed, with B[N+1] = B[1]) + B = [0] * (N + 2) + for i in range(1, N + 1): + B[i] = A[i - 1] + B[N + 1] = B[1] + + # C is our DP table: (N+2) × 5, initialized to 0 + C = [[0] * 5 for _ in range(N + 2)] + + def Dynamic_Programming(s): + # reset + for i in range(N + 2): + for j in range(5): + C[i][j] = 0 + # base case: at position 1, state s is reachable from “1” + C[1][s] = 1 + + # build DP up through i = N+1 + for i in range(2, N + 2): + if C[i - 1][1] and B[i - 1] <= B[i] * 2: + C[i][1] = 1 + if C[i - 1][1] and B[i - 1] <= B[i]: + C[i][3] = 1 + if C[i - 1][2] and B[i] <= B[i - 1] * 2: + C[i][2] = 2 + if C[i - 1][2] and B[i] <= B[i - 1]: + C[i][4] = 2 + if C[i - 1][3] and B[i] <= B[i - 1]: + C[i][2] = 3 + if C[i - 1][3] and B[i] * 2 <= B[i - 1]: + C[i][4] = 3 + if C[i - 1][4] and B[i - 1] <= B[i]: + C[i][1] = 4 + if C[i - 1][4] and B[i - 1] * 2 <= B[i]: + C[i][3] = 4 + + # return whether we can end in the same state s at position N+1 + return C[N + 1][s] != 0 + + # D will store the final choices (1‑indexed) + D = [0] * (N + 2) + + # Try all 4 possible end‑states + for s in range(1, 5): + if Dynamic_Programming(s): + # reconstruct backwards + x = s + for j in range(N + 1, 0, -1): + if x == 1: + D[j - 1] = ((j - 1) % N) + 1 + if x == 2: + D[j] = ((j - 1) % N) + 1 + if x == 3: + D[j - 1] = ((j - 1) % N) + 1 + D[j] = ((j - 1) % N) + 1 + # note: original C++ omitted an explicit case for x==4 + x = C[j][x] + + # output persons 1..N + self.parameter["reference_answer"] = " ".join(str(D[i]) for i in range(1, N + 1)) + break + + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + C = ", ".join("C[{}]={}".format(i + 1, Ci) for i, Ci in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + choices = [choice - 1 for choice in processed_result] # Convert to 0-based index + + if len(choices) != self.parameter["N"] : + return self.rewards["wrong_format"] + if not all(choice in (person, (person + 1) % self.parameter["N"]) for person, choice in enumerate(choices)) : + return self.rewards["invalid_solution"] + counting = [0] * self.parameter["N"] + for choice in choices : + counting[choice] += 1 + + def get_calories(choice) : + if counting[choice] == 1 : + return self.parameter["A"][choice] * 2 + elif counting[choice] == 2 : + return self.parameter["A"][choice] * 1 + else : + raise ValueError("Invalid counting for choice {}: {}".format(choice, counting[choice])) + + satisfied = 0 + for person, choice in enumerate(choices) : + current = get_calories(choice) + + other_choice = ((person + (person + 1)) - choice) % self.parameter["N"] + # counting[choice] -= 1 + counting[other_choice] += 1 + changed = get_calories(other_choice) + # counting[choice] += 1 + counting[other_choice] -= 1 + + satisfied += int(current >= changed) + + assert satisfied <= self.parameter["N"], "satisfied should not exceed N, got {}".format(satisfied) + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == self.parameter["N"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/las_laser/__init__.py b/server/Gym/environments/las_laser/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..430393bffec2f17d4b73afa990b1fb19d35eb6cf --- /dev/null +++ b/server/Gym/environments/las_laser/__init__.py @@ -0,0 +1 @@ +from .environment import LASLaser_Environment diff --git a/server/Gym/environments/las_laser/environment.py b/server/Gym/environments/las_laser/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9f516ee26bf74bc5cf6dd09c0d74119d4ab33725 --- /dev/null +++ b/server/Gym/environments/las_laser/environment.py @@ -0,0 +1,150 @@ +import random +from typing import Optional +from functools import cmp_to_key +from ...environment import VerifiableEnvironment + + +class LASLaser_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3562 + prompt_template = \ +r"""There are {N} segments in the 2D plane, given as: +{segments} + +You may shoot at most {K} rays from the origin (0, 0) in any directions. Each segment is allowed to intersect with **at most one** of these rays. Please output the **maximum number of segments** that can be intersected by a single ray.""" + + def __init__(self, + wrong_format: float = -1.0, correct_answer: float = 1.0, incorrect_answer: float = 0.0, + **kwargs): + """ + Initialize the LASLaser_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "correct_answer": correct_answer, + "incorrect_answer": incorrect_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + self.parameter["segments"] = segments = [(random.randint(1, 2 * N), random.randint(1, 2 * N), random.randint(1, 2 * N), random.randint(1, 2 * N)) for _ in range(N)] + + + # load all 2·N endpoint vectors + p0 = [None] * (2 * N) + for i, (x1, y1, x2, y2) in enumerate(segments): + p0[i] = (x1, y1) + p0[N + i] = (x2, y2) + + # comparator for sorting by angle via cross‐product + def cmp(i, j): + x1, y1 = p0[i] + x2, y2 = p0[j] + c = x1 * y2 - y1 * x2 + if c > 0: + return -1 # i comes before j + elif c < 0: + return 1 # i comes after j + else: + return 0 # same direction + + # sort all endpoint‐indices by their angle from the origin + p = list(range(2 * N)) + p.sort(key=cmp_to_key(cmp)) + + # discretize unique directions into 1..top + w = [0] * (2 * N) + top = 1 + now = p[0] + w[now] = 1 + for idx in p[1:]: + # if this direction is not collinear with 'now', it's a new bucket + if p0[idx][0] * p0[now][1] - p0[idx][1] * p0[now][0] != 0: + top += 1 + now = idx + w[idx] = top + + # prepare interval data structures + size = top + 2 + INF = top + 1 + left = [INF] * size + num = [0] * size + + # build intervals [x, y] on the angle‐index line for each segment + for i in range(N): + a = w[i] + b = w[N + i] + if a > b: + a, b = b, a + # record the leftmost start for any interval ending at b + if a < left[b]: + left[b] = a + # difference array to count how many intervals cover each point + num[a] += 1 + num[b + 1] -= 1 + + # prefix‐sum to get coverage count at each discrete angle + for i in range(1, top + 1): + num[i] += num[i - 1] + + # make left[i] = min(left[i..top]) + for i in range(top - 1, 0, -1): + if left[i] > left[i + 1]: + left[i] = left[i + 1] + + # DP: f[i] = max covered with last ray chosen at or before i + f = [0] * size + Ks, Answers = [], [] + for K in range(1, N + 1) : + # try placing one more ray at each i, in descending order + for i in range(top, 0, -1): + cand = f[left[i] - 1] + num[i] + if cand > f[i]: + f[i] = cand + # allow skipping placing at i (carry forward max) + for i in range(1, top + 1): + if f[i - 1] > f[i]: + f[i] = f[i - 1] + + if len(Answers) == 0 or f[top] > Answers[-1]: + Ks.append(K) + Answers.append(f[top]) + if Answers[-1] == N: + break + index = random.randint(0, len(Answers) - 1) + self.parameter["K"], self.parameter["reference_answer"] = Ks[index], Answers[index] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + segments = "\n".join("({}, {})-({}, {})".format(x1, y1, x2, y2) for (x1, y1, x2, y2) in self.parameter["segments"]), + K = self.parameter["K"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["incorrect_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/lcm/__init__.py b/server/Gym/environments/lcm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a58397ee26c94d459961580162d632c4ba639e4d --- /dev/null +++ b/server/Gym/environments/lcm/__init__.py @@ -0,0 +1 @@ +from .environment import LCM_Environment diff --git a/server/Gym/environments/lcm/environment.py b/server/Gym/environments/lcm/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..727bfa12715bbcec4b37fa7825ac011c221644f9 --- /dev/null +++ b/server/Gym/environments/lcm/environment.py @@ -0,0 +1,71 @@ +import math +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class LCM_Environment(VerifiableEnvironment) : + prompt_templates = ( + "Please calculate the least common multiple (LCM) of {} and {}.", + "What is the least common multiple (LCM) of {} and {}?", + "Find the least common multiple (LCM) of {} and {}.", + "Calculate the LCM of {} and {}.", + "Determine the least common multiple (LCM) of {} and {}.", + "What is the smallest positive integer that is a multiple of both {} and {}? (This is the LCM.)", + "What is the least common multiple (LCM) of the numbers {} and {}?", + "Compute the least common multiple (LCM) of {} and {}.", + "Find the smallest number that is a multiple of both {} and {}. (This is the LCM.)", + "What is the least common multiple (LCM) of these two numbers: {} and {}?", + ) # This is probably unnecessary, but just in case we need to diversify the prompt templates. + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the LCM_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_a_b" in self.parameter, "MAX_a_b is required in parameter" + MAX_a_b = self.parameter["MAX_a_b"] + assert MAX_a_b >= 2, "MAX_a_b should be greater than or equal to 2" + + self.parameter["a"] = random.randint(2, MAX_a_b) + self.parameter["b"] = random.randint(2, MAX_a_b) + self.parameter["reference_answer"] = math.lcm(self.parameter["a"], self.parameter["b"]) + + self.parameter["prompt_template"] = random.randrange(len(self.prompt_templates)) + + def _prompt_generate(self) -> str : + return self.prompt_templates[self.parameter["prompt_template"]].format(self.parameter["a"], self.parameter["b"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/lds_two_counting/__init__.py b/server/Gym/environments/lds_two_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0ee6e626a8ae899bbab74f35fbee966c73e181b3 --- /dev/null +++ b/server/Gym/environments/lds_two_counting/__init__.py @@ -0,0 +1 @@ +from .environment import LDSTwo_Counting_Environment diff --git a/server/Gym/environments/lds_two_counting/environment.py b/server/Gym/environments/lds_two_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..1b18b027eecf5c87b9cebb4f522268d95b677a10 --- /dev/null +++ b/server/Gym/environments/lds_two_counting/environment.py @@ -0,0 +1,96 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class LDSTwo_Counting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Consider a permutation A[1], A[2], ..., A[{N}] of the integers 1 through {N} that satisfies the following conditions: +- `A` is a **permutation**, meaning each integer from 1 to {N} appears **exactly once**. +- The value at position {X} is fixed: A[{X}] = {Y}. +- The permutation must **not contain any decreasing subsequence of length 3**. That is, there must not exist indices 1 <= a < b < c <= {N} such that A[a] > A[b] > A[c]. + +Please count the number of such permutations. + +**Output Format:** Your final answer should be a single integer — the total number of valid permutations.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the LDSTwo_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + X = self.parameter["X"] = random.randint(1, N) + Y = self.parameter["Y"] = random.randint(1, N) + + + def C(n : int, m : int) : + if n < m or m < 0 : + return 0 + result = 1 + for i in range(m) : + result = result * (n - i) // (i + 1) + return result + + def go(sx, sy, tx, ty) : + return C(tx - sx + ty - sy, tx - sx) + + def solve(sx, sy, tx, ty) : + return go(sx, sy, tx, ty) - go(sx, sy, ty + 1, tx - 1) + + if Y < X : + X, Y = Y, X + self.parameter["reference_answer"] = solve(0, 0, X - 1, Y - 1) * solve(X, Y, N, N) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + X = self.parameter["X"], + Y = self.parameter["Y"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/light_up_puzzle/__init__.py b/server/Gym/environments/light_up_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9f41f08b77d94354a0db16bb2e42f0d362dc12ff --- /dev/null +++ b/server/Gym/environments/light_up_puzzle/__init__.py @@ -0,0 +1 @@ +from .environment import LightUpPuzzle_Environment diff --git a/server/Gym/environments/light_up_puzzle/environment.py b/server/Gym/environments/light_up_puzzle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..ea809a346ac887aafec7a4e18a90737cd9ac3278 --- /dev/null +++ b/server/Gym/environments/light_up_puzzle/environment.py @@ -0,0 +1,184 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class LightUpPuzzle_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} grid. Each cell contains either a number from `0` to `4`, or a character `B` or `W`. +- All `W` cells are considered **white cells** (including those that may be replaced with `L` later). +- All other cells (`0`–`4` or `B`) are considered **black cells**. + +You may replace some `W` cells with `L`, indicating the placement of a **light bulb**. A light bulb illuminates its own cell and extends light in all **four directions** (up, down, left, right), stopping when it hits a black cell or the edge of the grid. Please place light bulbs such that: +1. **Each white cell** is illuminated by **at least one** light bulb. +2. No light bulb is illuminated by another light bulb, i.e., no two light bulbs can be placed in the same row or column without a black cell in between. +3. **Each black cell** with a number from `0` to `4` must have **exactly that many** light bulbs in its 4 neighboring cells (up, down, left, right). + +The grid is given in **row-major order**: +{grid} + +**Output Format:** Output {N} lines, each containing {M} characters with no separators. Some `W` cells should be replaced with `L` to indicate light bulbs; all other cells remain unchanged.""" + + def __init__(self, + black_cell_density_range : tuple = (0.6, 0.95), + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the LightUpPuzzle_Environment instance. + """ + super().__init__(**kwargs) + + self.black_cell_density_range = black_cell_density_range + assert len(black_cell_density_range) == 2 and 0.0 < black_cell_density_range[0] < black_cell_density_range[1] < 1.0, "black_cell_density_range should be a tuple of two floats in (0, 1)" + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + grid = [["W"] * M for _ in range(N)] + + black_cell_density = random.uniform(self.black_cell_density_range[0], self.black_cell_density_range[1]) + black_cells = random.sample(range(N * M), max(1, min(int(N * M * black_cell_density), N * M - 1))) + for cell in black_cells : + row, column = divmod(cell, M) + grid[row][column] = "B" + + white_cells = [(i, j) for i in range(N) for j in range(M) if grid[i][j] == "W"] + assert len(white_cells) >= 1, "There should be at least one white cell" + random.shuffle(white_cells) + illuminated = [[False] * M for _ in range(N)] + for i, j in white_cells : + if illuminated[i][j] : + continue + grid[i][j] = "L" + illuminated[i][j] = True + + for di, dj in ((-1, 0), (+1, 0), (0, -1), (0, +1)) : + ni, nj = i + di, j + dj + while 0 <= ni < N and 0 <= nj < M : + if grid[ni][nj] == "B" : + break + assert grid[ni][nj] != "L", "There should be no light bulb in the same row or column without a black cell in between" + illuminated[ni][nj] = True + ni += di + nj += dj + + assert "density" in self.parameter, "density is required in parameter" + density = self.parameter["density"] + assert 0 < density < 1, "density should be between 0 and 1" + black_cells = [(i, j) for i in range(N) for j in range(M) if grid[i][j] == "B"] + black_cells = random.sample(black_cells, max(1, int(len(black_cells) * density))) + assert len(black_cells) > 0, "There should be at least one black cell with a number" + for i, j in black_cells : + counting = 0 + for di, dj in ((-1, 0), (+1, 0), (0, -1), (0, +1)) : + ni, nj = i + di, j + dj + if 0 <= ni < N and 0 <= nj < M and grid[ni][nj] == "L" : + counting += 1 + grid[i][j] = str(counting) + + self.parameter["reference_answer"] = "\n".join("".join(row) for row in grid) + + self.parameter["grid"] = ["".join(cell if cell != "L" else "W" for cell in row) for row in grid] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + grid = "\n".join("".join(row) for row in self.parameter["grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(line.strip()) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N, M = self.parameter["N"], self.parameter["M"] + solution = processed_result + + if len(solution) != N or any(len(row) != M for row in solution) : + return self.rewards["wrong_format"] + + for solution_row, original_row in zip(solution, self.parameter["grid"]) : + for solution_cell, original_cell in zip(solution_row, original_row) : + if original_cell == "W" : + if solution_cell not in "WL" : + return self.rewards["invalid_solution"] + elif original_cell in "B01234" : + if solution_cell != original_cell : + return self.rewards["invalid_solution"] + else : + assert False, "Unknown cell type: {}".format(original_cell) + + illuminated = [[False] * M for _ in range(N)] + for i in range(N) : + for j in range(M) : + if solution[i][j] == "L" : + illuminated[i][j] = True + for di, dj in ((-1, 0), (+1, 0), (0, -1), (0, +1)) : + ni, nj = i + di, j + dj + while 0 <= ni < N and 0 <= nj < M : + if solution[ni][nj] != "W" : + if solution[ni][nj] == "L" : + return self.rewards["invalid_solution"] + elif solution[ni][nj] in "B01234" : + break + else : + assert False, "Unknown cell type: {}".format(solution[ni][nj]) + illuminated[ni][nj] = True + ni += di + nj += dj + if any(not illuminated[i][j] for i in range(N) for j in range(M) if self.parameter["grid"][i][j] == "W") : + return self.rewards["invalid_solution"] + + satisfied, total = 0, 0 + for i in range(N) : + for j in range(M) : + if self.parameter["grid"][i][j] in "01234" : + total += 1 + counting = 0 + for di, dj in ((-1, 0), (+1, 0), (0, -1), (0, +1)) : + ni, nj = i + di, j + dj + if 0 <= ni < N and 0 <= nj < M and solution[ni][nj] == "L" : + counting += 1 + if counting == int(self.parameter["grid"][i][j]) : + satisfied += 1 + assert satisfied <= total and total > 0, "satisfied should be less than or equal to total and total should be greater than 0" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / total) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == total) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/link_beads/__init__.py b/server/Gym/environments/link_beads/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3d4e04fc22cb96dd2394ccf83ba13c8c0a9df73d --- /dev/null +++ b/server/Gym/environments/link_beads/__init__.py @@ -0,0 +1 @@ +from .environment import LinkBeads_Environment diff --git a/server/Gym/environments/link_beads/environment.py b/server/Gym/environments/link_beads/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..dbc1154708ad08fa08ce27c07457c0adb394c227 --- /dev/null +++ b/server/Gym/environments/link_beads/environment.py @@ -0,0 +1,209 @@ +import random +import networkx +from typing import Optional +from ...environment import VerifiableEnvironment + + +class LinkBeads_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3647 + prompt_template = \ +r"""You are given a connected undirected graph with {N} nodes labeled from 0 to {N_minus_1}, connected by {N_minus_1} undirected edges (so this is a tree). Each edge is represented as a tuple `(u, v, w)`, meaning there is an undirected edge **connecting vertex `u` to vertex `v` with weight `w`: +{edges} + +These edges are the result of a sequence of operations, each either: +- `Append(x, v)`: Add a new node `x` and connect it to an existing node `v` with a **red edge**. +- `Insert(x, u, v)`: Remove the **red edge** between nodes `u` and `v`, and add **two blue edges** - one from `u` to `x` and one from `x` to `v`. + +After all operations, the final tree is given (as above), but the **edge colors are unknown**. Your task is to determine the **maximum total length of blue edges** that could exist in any valid sequence of operations that produces the given graph. + +**Output Format:** A single integer — the maximum possible total length of blue edges.""" + + def __init__(self, + wrong_format : float = -1.0, wrong_answer : float = 0.0, correct_answer : float = +1.0, + **kwargs) : + """ + Initialize the LinkBeads_Environment intance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_answer" : wrong_answer, + "correct_answer" : correct_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v, random.randint(1, N))) + random.shuffle(edges) + + for u, v, w in edges : + assert 0 <= u < v < N + assert len(edges) == len(set((u, v) for u, v, w in edges)) == N - 1 + + tree = networkx.Graph() + tree.add_weighted_edges_from(edges) + assert networkx.is_tree(tree) + + + class MultiSetMax: + def __init__(self): + # we store negatives so that heapq (a min‐heap) behaves like a max‐heap + self._add = [] + self._rem = [] + + def insert(self, x): + # push the delta + import heapq + heapq.heappush(self._add, -x) + + def remove(self, x): + # lazily remove by pushing into the “removal” heap + import heapq + heapq.heappush(self._rem, -x) + + def _clean(self): + import heapq + # pop from both heaps while their tops match + while self._add and self._rem and self._add[0] == self._rem[0]: + heapq.heappop(self._add) + heapq.heappop(self._rem) + + def get_max(self): + self._clean() + if not self._add: + return None + return -self._add[0] + + adj = [[] for _ in range(N)] + total_weight = 0 + + for u, v, w in edges: + adj[u].append((v, w)) + adj[v].append((u, w)) + total_weight += w + + # A safe “-infinity” based on input size: + NEG_INF = -(total_weight + 5) + + dp0 = [0] * N # dp0[x] == dp[x][0] + dp1 = [0] * N # dp1[x] == dp[x][1] + summ = [0] * N # summ[x] accumulates the sum of best child contributions + st = [MultiSetMax() for _ in range(N)] + + # First DFS: compute dp0, dp1, summ and fill each st[x] + def dfs(x, parent): + for y, w in adj[x]: + if y == parent: + continue + dfs(y, x) + # matching the C++: + # v1 = max(dp[y][0], dp[y][1] + w) + # v2 = dp[y][0] + w + v1 = dp0[y] + if dp1[y] + w > v1: + v1 = dp1[y] + w + v2 = dp0[y] + w + + summ[x] += v1 + st[x].insert(v2 - v1) + + dp0[x] = summ[x] + m = st[x].get_max() + dp1[x] = summ[x] + m if m is not None else NEG_INF + + ans = 0 + + # Second DFS: rerooting to consider every node as “root” + def dfs0(x, parent): + nonlocal ans + # we can only count blue‐score when parent‐edge is red → dp0[x] + if dp0[x] > ans: + ans = dp0[x] + + for y, w in adj[x]: + if y == parent: + continue + + # Backup all mutable state for x and y + bx0, bx1, bsx = dp0[x], dp1[x], summ[x] + by0, by1, bsy = dp0[y], dp1[y], summ[y] + + # Remove y’s contribution from x + v1y = dp0[y] if dp0[y] >= dp1[y] + w else dp1[y] + w + v2y = dp0[y] + w + delta_xy = v2y - v1y + + st[x].remove(delta_xy) + summ[x] -= v1y + dp0[x] = summ[x] + mx = st[x].get_max() + dp1[x] = summ[x] + mx if mx is not None else NEG_INF + + # Add x’s contribution to y as if we’d “rerooted” the tree at y + v1x = dp0[x] if dp0[x] >= dp1[x] + w else dp1[x] + w + v2x = dp0[x] + w + delta_yx = v2x - v1x + + summ[y] += v1x + st[y].insert(delta_yx) + dp0[y] = summ[y] + my = st[y].get_max() + dp1[y] = summ[y] + my if my is not None else NEG_INF + + # Recurse + dfs0(y, x) + + # Restore states + dp0[x], dp1[x], summ[x] = bx0, bx1, bsx + st[x].insert(delta_xy) # undo the removal + + dp0[y], dp1[y], summ[y] = by0, by1, bsy + st[y].remove(delta_yx) # undo the insertion + + dfs(0, -1) + dfs0(0, -1) + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/lis_lds_concatenation/__init__.py b/server/Gym/environments/lis_lds_concatenation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..058c714bb2873d2dd63382a4656e99acf4b75861 --- /dev/null +++ b/server/Gym/environments/lis_lds_concatenation/__init__.py @@ -0,0 +1 @@ +from .environment import LIS_LDS_Concatenation_Environment diff --git a/server/Gym/environments/lis_lds_concatenation/environment.py b/server/Gym/environments/lis_lds_concatenation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..45dba1a836dfc6caf7096d9627aa1c50071a0e41 --- /dev/null +++ b/server/Gym/environments/lis_lds_concatenation/environment.py @@ -0,0 +1,124 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class LIS_LDS_Concatenation_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1091 + prompt_template = \ +r"""You are given an array `A` of length {N}. The values are as follows (indexing starts at 0): +{A} + +Your task is to select a strictly increasing sequence of indices `i1, i2, ..., ik` such that: +- `0 ≤ i1 < i2 < ... < ik < {N}` +- Let `a[1], a[2], ..., a[k]` be the values of `A` at the selected indices (i.e., `a[1] = A[i1]`, `a[2] = A[i2]`, ..., `a[k] = A[ik]).` We want the sequence `a[1] < a[2] < ... < a[m] > a[m + 1] > ... > a[k]` for some `m` that satisfies `1 <= m <= k`. In other words, it is allowed for the sequence to first be strictly increasing, then strictly decreasing. It is also allowed for the sequence to be entirely strictly increasing or entirely strictly decreasing. +- Your goal is to **maximize the length** of the selected sequence `k`. + +**Output Format:** +Your final answer should be a single line containing the selected indices `i1, i2, ..., ik`, separated by **spaces**. +Example: `0 2 3` (do **NOT** include the backticks or quotes); this means the sequence has length `k = 3`, with `i1 = 0`, `i2 = 2`, and `i3 = 3`. +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the WeightedLIS_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + assert "MAX" in self.parameter, "MAX is required in parameter" + MAX = self.parameter["MAX"] + assert MAX >= 1, "MAX should be greater than or equal to 1" + + array = self.parameter["array"] = [random.randint(0, MAX) for _ in range(N)] + assert len(self.parameter["array"]) == self.parameter["N"], "array should have the same length as N" + + + F, G = [0] * N, [0] * N + for i in range(N) : + F[i] = 1 + for j in range(i) : + if array[j] < array[i] : + F[i] = max(F[i], F[j] + 1) + for i in range(N - 1, -1, -1) : + G[i] = 1 + for j in range(i + 1, N) : + if array[i] > array[j] : + G[i] = max(G[i], G[j] + 1) + + Answer = 0 + for i in range(N) : + Answer = max(Answer, F[i] + G[i] - 1) + self.parameter["gold_answer"] = Answer + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], A = " ".join(map(str, self.parameter["array"]))) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + values = [] + for i in range(len(processed_result)) : + if not (0 <= processed_result[i] < self.parameter["N"]) : + return self.rewards["invalid_solution"] + if i > 0 and not (processed_result[i - 1] < processed_result[i]) : + return self.rewards["invalid_solution"] + values.append(self.parameter["array"][processed_result[i]]) + + increasing, decreasing = [False] * self.parameter["N"], [False] * self.parameter["N"] + for i in range(len(values)) : + if i : + increasing[i] = increasing[i - 1] and (values[i - 1] < values[i]) + else : + increasing[i] = True + found = False + for i in range(len(values) - 1, -1, -1) : + if i < len(values) - 1 : + decreasing[i] = decreasing[i + 1] and (values[i] > values[i + 1]) + else : + decreasing[i] = True + if increasing[i] and decreasing[i] : + found = True + break + + if not found : + return self.rewards["invalid_solution"] + + assert len(processed_result) <= self.parameter["gold_answer"], "The length of the answer should be less than or equal to the gold answer" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((len(processed_result) / self.parameter["gold_answer"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * int(len(processed_result) == self.parameter["gold_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/liz_lollipop/__init__.py b/server/Gym/environments/liz_lollipop/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bbc7a04380b3008a6d92e6c1134676c289f61cf1 --- /dev/null +++ b/server/Gym/environments/liz_lollipop/__init__.py @@ -0,0 +1 @@ +from .environment import LIZ_Lollipop_Environment diff --git a/server/Gym/environments/liz_lollipop/environment.py b/server/Gym/environments/liz_lollipop/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b3021b404f80cd9ce6ff6c7048ce2d8daa050186 --- /dev/null +++ b/server/Gym/environments/liz_lollipop/environment.py @@ -0,0 +1,153 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class LIZ_Lollipop_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3514 + prompt_template = \ +r"""You are given an array `A` of length {N}: {A} +Each element in `A` is either 1 or 2, and the total sum of the array is {S}. + +You need to output {S} lines. For the i-th line (1 ≤ i ≤ {S}), output two integers `l` and `r` (0-indexed, inclusive), separated by a space: +- If there exists a contiguous subarray A[l : r + 1] (Python-style slicing, so it includes A[l] & A[r] but NOT A[r + 1]) such that the sum of its elements is exactly `i`, output `l` and `r`. +- If no such subarray exists, output `-1 -1`.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(correct/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the LIZ_Lollipop_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + two_probability = random.random() + A = self.parameter["A"] = [2 if random.random() < two_probability else 1 for _ in range(N)] + + + A = [0] + A.copy() # Convert to 1-indexed + # prefix sums + pref = [0] * (N + 1) + for i in range(1, N + 1): + pref[i] = pref[i-1] + A[i] + S = pref[N] + + # for each sum k (0..S), store one interval [l[k],r[k]] that sums to k, if known + l = [0] * (S + 3) + r = [0] * (S + 3) + # Max[0] = max even sum seen, Max[1] = max odd sum seen + Max = [-1, -1] + + def up(val, ll, rr): + p = val & 1 + if val > Max[p]: + Max[p] = val + l[val] = ll + r[val] = rr + + # record all prefixes and suffixes + for i in range(1, N): + up(S - pref[i], i+1, N) # suffix sum + up(pref[i], 1, i) # prefix sum + # whole string + up(S, 1, N) + + # propagate downward from S to 1 by deleting a 1 or 2 from one end + for k in range(S, 0, -1): + if l[k] == 0 and r[k] == 0: + pl, pr = l[k+2], r[k+2] + if pl and pr: + ll, rr = pl, pr + if A[pl] == 2: + ll += 1 + elif A[pr] == 2: + rr -= 1 + else: + ll += 1 + rr -= 1 + l[k], r[k] = ll, rr + + self.parameter["reference_answer"] = [] + self.parameter["existence"] = [] + for x in range(1, S + 1) : + # impossible if x > total sum, or we never saw any substring of that parity ≥ x + if x > S or x > Max[x & 1]: + self.parameter["reference_answer"].append("-1 -1") + self.parameter["existence"].append(False) + else: + self.parameter["reference_answer"].append("{} {}".format(l[x] - 1, r[x] - 1)) + self.parameter["existence"].append(True) + self.parameter["reference_answer"] = "\n".join(self.parameter["reference_answer"]) + + + def _prompt_generate(self) -> str : + A = self.parameter["A"] + return self.prompt_template.format( + N = self.parameter["N"], + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(A)), + S = sum(A), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answers = [] + for line in answer.splitlines() : + line = line.strip() + if line : + l, r = map(int, line.split()) + answers.append((l, r)) + return answers + except : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + S = [0] * self.parameter["N"] + for i, Ai in enumerate(self.parameter["A"]) : + S[i] = (S[i - 1] if i - 1 >= 0 else 0) + Ai + + if len(processed_result) != S[self.parameter["N"] - 1] : + return self.rewards["invalid_solution"] + assert len(processed_result) == len(self.parameter["existence"]), "Length of processed result does not match existence list" + + correct = 0 + for x in range(1, len(processed_result) + 1) : + l, r = processed_result[x - 1] + existence = self.parameter["existence"][x - 1] + if not ((l, r) == (-1, -1) or (0 <= l <= r < self.parameter["N"])) : + return self.rewards["invalid_solution"] + if existence : + correct += int((0 <= l <= r < self.parameter["N"]) and (S[r] - (S[l - 1] if l > 0 else 0) == x)) + else : + if 0 <= l <= r < self.parameter["N"] : + assert S[r] - (S[l - 1] if l > 0 else 0) != x + correct += int((l, r) == (-1, -1)) + + if self.rewards["rewarding_strategy"] == "(correct/all)^beta" : + return self.rewards["rewarding_weight"] * ((correct / len(processed_result)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "correct=all" : + return self.rewards["rewarding_weight"] * (correct == len(processed_result)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/longest_double_palindrome/__init__.py b/server/Gym/environments/longest_double_palindrome/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d5d74cc52a1cc59c8eb92cd66d2ed6580824c890 --- /dev/null +++ b/server/Gym/environments/longest_double_palindrome/__init__.py @@ -0,0 +1 @@ +from .environment import Longest_DoublePalindrome_Environment diff --git a/server/Gym/environments/longest_double_palindrome/environment.py b/server/Gym/environments/longest_double_palindrome/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..3345795528bf9ed344331cd56bcd5beaff84a4e8 --- /dev/null +++ b/server/Gym/environments/longest_double_palindrome/environment.py @@ -0,0 +1,149 @@ +import random +from typing import Optional, Tuple +from ...environment import VerifiableEnvironment + + +class Longest_DoublePalindrome_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a string S of length {N} (0-indexed): {S} + +Please find two non-empty intervals [A, B) and [B, C) (obviously, 0 <= A < B < C <= {N}) such that: +- S[A : B] and S[B : C] are both palindromes (S[a : b] refers to the substring starting at index a and ending at index b - 1, i.e., S[a] + S[a+1] + ... + S[b-1]). +- Try your best to maximize C - A. + +**Output Format:** Your final answer should be three integers A, B, and C, separated by spaces.""" + + def __init__(self, + wrong_format: float = -1.0, invalid_solution: float = -0.5, rewarding_strategy: str = "(answer/gold)^beta", rewarding_weight: float = +1.0, rewarding_beta: float = 5.0, + **kwargs): + """ + Initialize the Longest_DoublePalindrome_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + one_probability = random.uniform(0.1, 0.9) + endpoints = random.sample(range(N + 1), 3) + endpoints.sort() + + def generate_random(length : int) -> str : + assert length >= 0, "length should be non-negative" + return "".join("1" if random.random() < one_probability else "0" for _ in range(length)) + def generate_palindrome(length : int) -> str : + assert length >= 1, "length should be at least 1" + half = length // 2 + first_half = "".join("1" if random.random() < one_probability else "0" for _ in range(half)) + if length % 2 == 0: + return first_half + first_half[::-1] + else: + return first_half + ("1" if random.random() < one_probability else "0") + first_half[::-1] + S = self.parameter["S"] = \ + generate_random(endpoints[0]) + \ + generate_palindrome(endpoints[1] - endpoints[0]) + \ + generate_palindrome(endpoints[2] - endpoints[1]) + \ + generate_random(N - endpoints[2]) + assert len(S) == N, "S should have length N" + + + modified = ['@', '#'] + for ch in S: + modified.append(ch) + modified.append('#') + modified.append('$') + M = len(modified) + + # Arrays for Manacher + p = [0] * M + # Arrays to record max palindromic radii ending/starting at positions + l = [0] * M + r = [0] * M + + center = 0 + right = 0 + + # Manacher's algorithm on the modified string + for i in range(1, M - 1): + mirror = 2 * center - i + if i < right: + p[i] = min(right - i, p[mirror]) + # Expand around center i + while modified[i + 1 + p[i]] == modified[i - 1 - p[i]]: + p[i] += 1 + # Update center and right boundary + if i + p[i] > right: + center = i + right = i + p[i] + # Record palindromic spans (adjusted from C++ p: p_python = p_cpp - 1) + if p[i] > 0: + l[i + p[i]] = max(l[i + p[i]], p[i]) + r[i - p[i]] = max(r[i - p[i]], p[i]) + + # Propagate the best spans outward + # For l: propagate from right to left on odd indices + for i in range(M - 4, 0, -2): + l[i] = max(l[i], l[i + 2] - 2) + # For r: propagate from left to right on odd indices + for i in range(3, M, 2): + r[i] = max(r[i], r[i - 2] - 2) + + # Compute the answer by checking split points at separator positions + ans = 0 + for i in range(1, M, 2): # only consider '#' positions + if l[i] > 0 and r[i] > 0: + ans = max(ans, l[i] + r[i]) + + self.parameter["gold_answer"] = ans + assert self.parameter["gold_answer"] >= endpoints[2] - endpoints[0] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], S = self.parameter["S"]) + + + def _process(self, answer : Optional[str]) -> Optional[Tuple[int]] : + if answer is not None : + answer = answer.strip() + try : + A, B, C = map(int, answer.split()) + return A, B, C + except : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + A, B, C = processed_result + if not (0 <= A < B < C <= self.parameter["N"]) : + return self.rewards["invalid_solution"] + def check_palindrome(s : str) -> bool : + return s == s[:: -1] + if not (check_palindrome(self.parameter["S"][A : B]) and check_palindrome(self.parameter["S"][B : C])) : + return self.rewards["invalid_solution"] + + answer, gold = C - A, self.parameter["gold_answer"] + assert answer <= gold, "answer should not be greater than gold" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * int(answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/longest_matching_subsequence/__init__.py b/server/Gym/environments/longest_matching_subsequence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7281b0330f3c132d3488e529eb7d45c243344847 --- /dev/null +++ b/server/Gym/environments/longest_matching_subsequence/__init__.py @@ -0,0 +1 @@ +from .environment import Longest_MatchingSubsequence_Environment diff --git a/server/Gym/environments/longest_matching_subsequence/environment.py b/server/Gym/environments/longest_matching_subsequence/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..466ed8e423526fa9bd6c72c51eb04456916cffc7 --- /dev/null +++ b/server/Gym/environments/longest_matching_subsequence/environment.py @@ -0,0 +1,101 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Longest_MatchingSubsequence_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1799 + prompt_template = \ +r"""You are given an array `A` of length {N}, indexed from 0 to {N_minus_1}. The array is as follows: +{A} + +Your task is to select a **strictly increasing sequence of indices** `i_1, i_2, ..., i_k` (0 ≤ i_1 < i_2 < ... < i_k < {N}) such that: +- Let B[1] = A[i_1], B[2] = A[i_2], ..., B[k] = A[i_k] (B's indices are 1-based, while A's indices are 0-based). +- Try your best to **maximize** the number of positions `j` (1 ≤ j ≤ k) such that B[j] = j. + +**Output Format:** Your final answer should be a single line containing the selected indices i_1, i_2, ..., i_k, separated by **spaces**. Example: `0 2` (do **NOT** include quotes or backticks); this means you selected indices 0 and 2, with k = 2.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Longest_MatchingSubsequence_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + A = self.parameter["A"] = [random.randint(1, N) for _ in range(N - 1)] + [1] + random.shuffle(A) + + + answer = 0 + F = [None] * N + for i in range(N) : + if A[i] <= i + 1 : + F[i] = 1 + for j in range(i) : + if A[i] - A[j] <= i - j and A[i] > A[j] : + if F[j] is not None : + val = F[j] + 1 + if F[i] is None or val > F[i] : + F[i] = val + if F[i] is not None : + answer = max(answer, F[i]) + assert answer > 0 + self.parameter["gold_answer"] = answer + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + A = "\n".join("A[{}]={}".format(index, value) for index, value in enumerate(self.parameter["A"])), + ) + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + B = [-1] + for i in range(len(processed_result)) : + if not (0 <= processed_result[i] < self.parameter["N"]) : + return self.rewards["invalid_solution"] + if i > 0 and not (processed_result[i - 1] < processed_result[i]) : + return self.rewards["invalid_solution"] + B.append(self.parameter["A"][processed_result[i]]) + answer, gold = sum(int(i == bi) for i, bi in enumerate(B)), self.parameter["gold_answer"] + assert answer <= gold, "answer should be less than or equal to gold_answer" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * int(answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/longest_maxdiff_bounded_interval/__init__.py b/server/Gym/environments/longest_maxdiff_bounded_interval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..935e90eaa68d05c85fc4384d9ec5e4ad572af90c --- /dev/null +++ b/server/Gym/environments/longest_maxdiff_bounded_interval/__init__.py @@ -0,0 +1 @@ +from .environment import LongestMaxDiffBoundedInterval_Environment diff --git a/server/Gym/environments/longest_maxdiff_bounded_interval/environment.py b/server/Gym/environments/longest_maxdiff_bounded_interval/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..52d6b6f781b72d1af65d63ced043dae6b695bbae --- /dev/null +++ b/server/Gym/environments/longest_maxdiff_bounded_interval/environment.py @@ -0,0 +1,113 @@ +import random +from collections import deque +from typing import Optional, Tuple +from ...environment import VerifiableEnvironment + + +class LongestMaxDiffBoundedInterval_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3512 + prompt_template = \ +r"""You are given an array A of length {N}: {A} + +Please find the longest **contiguous** subarray A[l : r] (from index `l` to `r - 1`, inclusive) such that the **maximum difference between any two elements** in the subarray is at most {K}. Output `l` and `r`, separated by a space.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the LongestMaxDiffBoundedInterval_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + A = self.parameter["A"] = [random.randint(0, N) for _ in range(N)] + K = self.parameter["K"] = random.randint(0, max(max(A) - min(A) - 1, 0)) + + + # Deques to maintain indices of potential max/min in the current window + max_deque = deque() # will store indices of A in decreasing order of values + min_deque = deque() # will store indices of A in increasing order of values + + left = 0 + answer = 0 + + for right, value in enumerate(A): + # Maintain max_deque: pop smaller elements from the tail + while max_deque and A[max_deque[-1]] <= value: + max_deque.pop() + max_deque.append(right) + + # Maintain min_deque: pop larger elements from the tail + while min_deque and A[min_deque[-1]] >= value: + min_deque.pop() + min_deque.append(right) + + # Shrink window from the left until the max − min ≤ K + while A[max_deque[0]] - A[min_deque[0]] > K: + # Advance left past whichever extreme comes first + if max_deque[0] < min_deque[0]: + left = max_deque[0] + 1 + max_deque.popleft() + else: + left = min_deque[0] + 1 + min_deque.popleft() + + # Update the answer with the current valid window size + answer = max(answer, right - left + 1) + + assert answer > 0, "The answer should be greater than 0" + self.parameter["gold_answer"] = answer + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + K = self.parameter["K"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int]] : + if answer is not None : + answer = answer.strip() + try : + l, r = map(int, answer.split()) + return l, r + except : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + l, r = processed_result + if not (0 <= l < r <= self.parameter["N"]) : + return self.rewards["invalid_solution"] + if max(self.parameter["A"][l : r]) - min(self.parameter["A"][l : r]) > self.parameter["K"] : + return self.rewards["unsuccessful_solution"] + + answer, gold = r - l, self.parameter["gold_answer"] + assert 0 < answer <= gold, "The answer should not be greater than the gold answer" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * int(answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/longest_path/__init__.py b/server/Gym/environments/longest_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2951ba3db3ac43d179b50d71409ad9cc12f165c5 --- /dev/null +++ b/server/Gym/environments/longest_path/__init__.py @@ -0,0 +1 @@ +from .environment import LongestPath_Environment diff --git a/server/Gym/environments/longest_path/environment.py b/server/Gym/environments/longest_path/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d4a9837a059bb4c478bd3215f54963eabc928169 --- /dev/null +++ b/server/Gym/environments/longest_path/environment.py @@ -0,0 +1,127 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class LongestPath_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a **directed graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. + +The graph contains the following directed edges. Each edge is represented as a tuple `(s, t, w)`, meaning there is a directed edge **from vertex `s` to vertex `t` with weight `w`** : +{edges} + +Your task is to find a path `p1, p2, ..., pk` such that: +- **No vertex appears more than once** in the path. +- Try your best to **maximize** the total weight of the path (i.e., the sum of all edge weights used). + +**Output Format:** Your final answer should be a single line containing the path in order: `p1 p2 ... pk`, separated by **spaces**. +Example: `0 1 {N_minus_1}` (do **NOT** include the backticks or quotes); this means the path (k = 3) goes from `0` to `1` to `{N_minus_1}`.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the LongestPath_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 < edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = random.sample([(s, t, random.randint(1, N)) for s in range(N) for t in range(N) if s != t], int(edge_density * N * (N - 1))) + random.shuffle(edges) + assert len(edges) + + assert len(edges) == len(set((s, t) for s, t, w in edges)), "edges should be unique" + for s, t, w in edges : + assert 0 <= s < N, "s should be in range" + assert 0 <= t < N, "t should be in range" + assert s != t, "s should not be equal to t" + + + adjacent = [[] for s in range(N)] + for s, t, w in edges : + adjacent[s].append((t, w)) + + self.parameter["gold_answer"] = 0 + dpF = dict() + def dp(s : int, visited : int) -> int : + if visited == (1 << N) - 1 : + return 0 + if (s, visited) in dpF : + return dpF[(s, visited)] + ans = 0 + for t, w in adjacent[s] : + if visited & (1 << t) == 0 : + ans = max(ans, dp(t, visited | (1 << t)) + w) + dpF[(s, visited)] = ans + return ans + for s in range(N) : + self.parameter["gold_answer"] = max(self.parameter["gold_answer"], dp(s, 1 << s)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(s, t, w) for s, t, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if not answer_array : + return None + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + path = processed_result + if not all(0 <= vertex < self.parameter["N"] for vertex in path) : + return self.rewards["invalid_solution"] + if len(path) != len(set(path)) : + return self.rewards["invalid_solution"] + + edge2weight = {(s, t) : w for s, t, w in self.parameter["edges"]} + answer_weight = 0 + for s, t in zip(path, path[1 :]) : + if (s, t) not in edge2weight : + return self.rewards["invalid_solution"] + answer_weight += edge2weight[(s, t)] + gold = self.parameter["gold_answer"] + assert answer_weight <= gold and gold > 0, "answer_weight should be less than or equal to gold and gold should be greater than 0" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer_weight / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer_weight) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/longest_repeated_palindrome/__init__.py b/server/Gym/environments/longest_repeated_palindrome/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d21bfccc35e8deba9af4c1d663775f79f00a9d82 --- /dev/null +++ b/server/Gym/environments/longest_repeated_palindrome/__init__.py @@ -0,0 +1 @@ +from .environment import Longest_RepeatedPalindrome_Environment diff --git a/server/Gym/environments/longest_repeated_palindrome/environment.py b/server/Gym/environments/longest_repeated_palindrome/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..5347b932b5090cb5dcb39829c0d617cb24fef329 --- /dev/null +++ b/server/Gym/environments/longest_repeated_palindrome/environment.py @@ -0,0 +1,150 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Longest_RepeatedPalindrome_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a string S: {S} + +Please find a **substring T of S** such that: +- T = A + A^R + A + A^R, where A^R denotes the reverse of string A, and + represents string concatenation. +- Try your best to **maximize the length** of T. + +**Output Format:** Output a single line containing the substring T.""" + + def __init__(self, + wrong_format: float = -1.0, invalid_solution: float = -0.5, rewarding_strategy: str = "(answer/gold)^beta", rewarding_weight: float = +1.0, rewarding_beta: float = 5.0, + **kwargs): + """ + Initialize the Longest_RepeatedPalindrome_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + one_probability = random.uniform(0.1, 0.9) + + A_length = random.randint(1, N // 4) + A = "".join("1" if random.random() < one_probability else "0" for _ in range(A_length)) + A_reverse = A[::-1] + first_length = random.randint(0, N - 4 * A_length) + first_part = "".join("1" if random.random() < one_probability else "0" for _ in range(first_length)) + second_length = N - first_length - 4 * A_length + second_part = "".join("1" if random.random() < one_probability else "0" for _ in range(second_length)) + S = self.parameter["S"] = first_part + A + A_reverse + A + A_reverse + second_part + assert len(S) == N, "S should have length N" + + + def compute(S): + n = len(S) + + # Prepare for Palindromic Tree (PAM) + # We use two root nodes: node 0 for even-length palindromes (length 0) + # and node 1 for odd-length root (length -1). + # Maximum number of nodes is at most n + 3. + size = n + 3 + ch = [[0] * 2 for _ in range(size)] # transitions + fail = [0] * size # failure links + f = [0] * size # auxiliary links for double palindrome + length = [0] * size # palindrome lengths + + tot = 1 # total nodes so far (we have nodes 0 and 1) + fail[0] = 1 # fail of even root -> odd root + length[1] = -1 # length of odd root + las = 0 # last added node (start at even root) + + # Shift string to 1-indexed for convenience + S = ' ' + S + + for i in range(1, n + 1): + cur = las + # Find the largest suffix-palindrome we can extend + while S[i] != S[i - length[cur] - 1]: + cur = fail[cur] + c = int(S[i]) + + # If this extension hasn't been created, build a new node + if ch[cur][c] == 0: + tot += 1 + length[tot] = length[cur] + 2 + + # Compute failure link for the new node + x = fail[cur] + while S[i] != S[i - length[x] - 1]: + x = fail[x] + fail[tot] = ch[x][c] + + ch[cur][c] = tot + + # Compute auxiliary link f for checking double palindrome + if length[fail[tot]] <= length[tot] // 2: + f[tot] = fail[tot] + else: + p = f[cur] + # Traverse until we find a valid half-length palindrome to extend + while (length[p] + 2 > length[tot] // 2) or (S[i] != S[i - length[p] - 1]): + p = fail[p] + f[tot] = ch[p][c] + + # Move last pointer + las = ch[cur][c] + + # Compute the answer: longest double palindrome length + ans = 0 + # Nodes start from index 2 (skip the two roots) + for i in range(2, tot + 1): + if length[i] % 4 == 0 and length[f[i]] == length[i] // 2: + ans = max(ans, length[i]) + return ans + self.parameter["gold_answer"] = compute(S) + assert self.parameter["gold_answer"] >= 4 * A_length, "gold_answer should be at least 4 * A_length" + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(S = self.parameter["S"]) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + T = processed_result + if T not in self.parameter["S"] : + return self.rewards["invalid_solution"] + if len(T) == 0 or len(T) % 4 != 0 : + return self.rewards["invalid_solution"] + A = T[: len(T) // 4] + A_reverse = A[::-1] + if T != A + A_reverse + A + A_reverse : + return self.rewards["invalid_solution"] + + answer, gold = len(T), self.parameter["gold_answer"] + assert answer <= gold, "Answer should not be greater than gold answer" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * int(answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/maf_mafia/__init__.py b/server/Gym/environments/maf_mafia/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cf5bee4a6c049277b7841c71b4ab22c26a142706 --- /dev/null +++ b/server/Gym/environments/maf_mafia/__init__.py @@ -0,0 +1 @@ +from .environment import MafMafia_Environment diff --git a/server/Gym/environments/maf_mafia/environment.py b/server/Gym/environments/maf_mafia/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..6e11653bdfd02265f95f5ca5468c30e44570195f --- /dev/null +++ b/server/Gym/environments/maf_mafia/environment.py @@ -0,0 +1,190 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MafMafia_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3472 + prompt_template = \ +r"""There are {N} participants in a game, labeled from 0 to {N_minus_1}. Each participant `i` has a target participant TO[i]. The array TO is given as: {TO} + +You are to determine a permutation P[0], P[1], ..., P[{N_minus_1}] of the {N} participants, representing the order in which they act. The game proceeds in that order as follows: +- When a participant takes their turn, if they are still alive, they attempt to kill their target TO[i]. +- If the target has already been killed earlier, nothing happens. +- A participant who has already been killed cannot act. + +Please find a permutation that **{minimize_or_maximize}s the number of participants who get killed** by the end of the game. Output a single line containing the permutation P[0], P[1], ..., P[{N_minus_1}], separated by spaces.""" + + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, + rewarding_strategy_min : str = "(gold/answer)^beta", rewarding_weight_min : float = +1.0, rewarding_beta_min : float = 5.0, + rewarding_strategy_max : str = "(answer/gold)^beta", rewarding_weight_max : float = +1.0, rewarding_beta_max : float = 5.0, + **kwargs) : + """ + Initialize the MafMafia_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy_max" : rewarding_strategy_max, + "rewarding_weight_max" : rewarding_weight_max, + "rewarding_beta_max" : rewarding_beta_max, + "rewarding_strategy_min" : rewarding_strategy_min, + "rewarding_weight_min" : rewarding_weight_min, + "rewarding_beta_min" : rewarding_beta_min, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + TO = self.parameter["TO"] = [random.randint(0, N - 1) for _ in range(N)] + + self.parameter["minimize_or_maximize"] = random.choice(["minimize", "maximize"]) + + + # Compute indegrees + d = [0] * N + for t in TO: + d[t] += 1 + + # Prepare queue for trimming leaves + q = [0] * N + head = 0 + tail = 0 + minn = 0 # will count nodes trimmed (and pure cycles) for minimum-deaths logic + + # Enqueue all initial leaves (indegree 0) + for i in range(N): + if d[i] == 0: + q[tail] = i + tail += 1 + minn += 1 + + # Arrays to mark who dies in trimming, and which cycle nodes have incoming trees + die = [False] * N + lv = [False] * N + + # Trim all trees feeding into cycles + while head < tail: + x = q[head] + head += 1 + tx = TO[x] + # If the target is already dead, skip + if die[tx]: + continue + # Mark that target as killed + die[tx] = True + # Flag the target-of-target as having an incoming tree branch + y = TO[tx] + lv[y] = True + # Decrement indegree, and if it becomes a leaf, enqueue it + d[y] -= 1 + if d[y] == 0: + q[tail] = y + tail += 1 + + # 'tail' is now the total number of nodes trimmed (including those from cycles broken by trees) + maxn = tail + + # Now handle any remaining pure cycles + for i in range(N): + if not die[i] and d[i] > 0: + # Traverse this cycle exactly once + cnt = 0 + has_branch = False + x = i + while not die[x]: + cnt += 1 + if lv[x]: + has_branch = True + die[x] = True + nx = TO[x] + # stop once we complete the loop + if nx == i: + break + x = nx + + # In a cycle of length cnt, at most floor(cnt/2) die in the worst case + maxn += cnt // 2 + # But if it's a pure cycle (no incoming tree), at minimum 1 must die + if cnt > 1 and not has_branch: + minn += 1 + + # Compute and print: minimum and maximum possible deaths + # min_deaths = N - maxn + # max_deaths = N - minn + if self.parameter["minimize_or_maximize"] == "minimize" : + answer = N - maxn + elif self.parameter["minimize_or_maximize"] == "maximize" : + answer = N - minn + else : + assert False, "minimize_or_maximize should be either 'minimize' or 'maximize'" + assert answer > 0, "Answer should be greater than 0" + self.parameter["gold_answer"] = answer + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + TO = " ".join("TO[{}]={}".format(i, To_i) for i, To_i in enumerate(self.parameter["TO"])), + minimize_or_maximize = self.parameter["minimize_or_maximize"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + P = processed_result + if len(P) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if set(P) != set(range(self.parameter["N"])) : + return self.rewards["invalid_solution"] + + killed = [False] * self.parameter["N"] + for i in P : + if killed[i] : + continue + killed[self.parameter["TO"][i]] = True + answer, gold = sum(map(int, killed)), self.parameter["gold_answer"] + + if self.parameter["minimize_or_maximize"] == "minimize" : + assert 0 < gold <= answer, "For minimization, answer should be greater than 0 and at least as large as the gold answer" + if self.rewards["rewarding_strategy_min"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight_min"] * ((gold / answer) ** self.rewards["rewarding_beta_min"]) + elif self.rewards["rewarding_strategy_min"] == "gold=answer" : + return self.rewards["rewarding_weight_min"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_min"])) + elif self.parameter["minimize_or_maximize"] == "maximize" : + assert 0 < answer <= gold, "For maximization, answer should be greater than 0 and at most as large as the gold answer" + if self.rewards["rewarding_strategy_max"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight_max"] * ((answer / gold) ** self.rewards["rewarding_beta_max"]) + elif self.rewards["rewarding_strategy_max"] == "gold=answer" : + return self.rewards["rewarding_weight_max"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_max"])) + else : + assert False, "minimize_or_maximize should be either 'minimize' or 'maximize'" + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/magic_square_puzzle/__init__.py b/server/Gym/environments/magic_square_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d4eae0d86999e9e1a8a5dcf967e938ec607545ac --- /dev/null +++ b/server/Gym/environments/magic_square_puzzle/__init__.py @@ -0,0 +1 @@ +from .environment import MagicSquarePuzzle_Environment diff --git a/server/Gym/environments/magic_square_puzzle/environment.py b/server/Gym/environments/magic_square_puzzle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..0e76f9aceb8227a6de12623dd12f18e887c5a096 --- /dev/null +++ b/server/Gym/environments/magic_square_puzzle/environment.py @@ -0,0 +1,174 @@ +import random +import numpy as np +from typing import Optional, List +from ...environment import VerifiableEnvironment + +def magic_square(n): + if n == 1: + return np.array([[1]], dtype=int) + + if n % 2 == 1: + return _magic_odd(n) + elif n % 4 == 0: + return _magic_doubly_even(n) + else: + raise NotImplementedError("Magic square for singly even n (e.g., 6, 10) is not implemented.") + + +def _magic_odd(n): + magic = np.zeros((n, n), dtype=int) + num = 1 + i, j = 0, n // 2 + while num <= n * n: + magic[i, j] = num + num += 1 + ni, nj = (i - 1) % n, (j + 1) % n + if magic[ni, nj] != 0: + i = (i + 1) % n + else: + i, j = ni, nj + return magic + + +def _magic_doubly_even(n): + magic = np.arange(1, n * n + 1, dtype=int).reshape(n, n) + for i in range(n): + for j in range(n): + if (i % 4 == j % 4) or ((i % 4) + (j % 4) == 3): + magic[i, j] = n * n + 1 - magic[i, j] + return magic + + +def rotate(square): + return np.rot90(square, random.randint(1, 3)) + + +def mirror(square): + return np.fliplr(square) + + +def swap_rows(square, i, j): + n = square.shape[0] + A = square.copy() + A[[i, j], :] = A[[j, i], :] + c1, c2 = n-1-i, n-1-j + A[:, [c1, c2]] = A[:, [c2, c1]] + return square + + +class MagicSquarePuzzle_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Given a grid of size {N} × {N} filled with integers, some cells may be empty (represented by `0`). Please complete the grid to form a **magic square**, such that: +1. Each integer from `1` to `{N}^2` appears **exactly once**. +2. The sum of each row, each column, and both main diagonals is equal to {N} * ({N}^2 + 1) / 2 = {magic_constant}. + +The grid is given as follows: +{grid} + +**Output Format:** Your final answer should contain {N} lines, each with {N} numbers, separated by spaces. The numbers should represent the completed magic square in **row-major order**, matching the format of the given input.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the MagicSquarePuzzle_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + grid = magic_square(N) + operation_distribution = [0.1, 0.1, 0.8] + for step in range(N * N) : + operation = random.choices(["rotate", "mirror", "swap_rows"], weights = operation_distribution)[0] + if operation == "rotate" : + grid = rotate(grid) + elif operation == "mirror" : + grid = mirror(grid) + elif operation == "swap_rows" : + while True : + row1, row2 = random.sample(range(N), 2) + if row1 != row2 : + break + grid = swap_rows(grid, row1, row2) + else : + assert False + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, row)) for row in grid) + + self.parameter["grid"] = grid = [[cell.item() for cell in row] for row in grid] + assert "sparsity" in self.parameter, "sparsity is required in parameter" + sparsity = self.parameter["sparsity"] + assert 0 < sparsity < 1, "sparsity should be between 0 and 1" + empty_cells = random.sample(range(N * N), max(1, int(N * N * sparsity))) + for cell in empty_cells : + row, column = divmod(cell, N) + grid[row][column] = 0 + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + magic_constant = N * (N * N + 1) // 2, + grid = "\n".join(" ".join(map(str, row)) for row in self.parameter["grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + grid = [] + for line in answer.splitlines() : + line = line.strip() + if line : + grid.append(list(map(int, line.split()))) + return grid + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N = self.parameter["N"] + solution = processed_result + + if len(solution) != N or any(len(row) != N for row in solution) : + return self.rewards["wrong_format"] + + if set(cell for row in solution for cell in row) != set(range(1, N * N + 1)) : + return self.rewards["invalid_solution"] + if any(original_cell != 0 and cell != original_cell for row, original_row in zip(solution, self.parameter["grid"]) for cell, original_cell in zip(row, original_row)) : + return self.rewards["invalid_solution"] + + satisfied = sum(int(sum(row) == N * (N * N + 1) // 2) for row in solution) + \ + sum(int(sum(solution[i][j] for i in range(N)) == N * (N * N + 1) // 2) for j in range(N)) + \ + int(sum(solution[i][i] for i in range(N)) == N * (N * N + 1) // 2) + \ + int(sum(solution[i][N - i - 1] for i in range(N)) == N * (N * N + 1) // 2) + assert satisfied <= 2 * N + 2, "satisfied should be less than or equal to 2 * N + 2" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / (2 * N + 2)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (satisfied == (2 * N + 2)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/making_grade/__init__.py b/server/Gym/environments/making_grade/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..04a35575e828e02a19a0b533b25d0484e42caf02 --- /dev/null +++ b/server/Gym/environments/making_grade/__init__.py @@ -0,0 +1 @@ +from .environment import MakingGrade_Environment diff --git a/server/Gym/environments/making_grade/environment.py b/server/Gym/environments/making_grade/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..27bb463cbbde6892e961676adac0f47fda33eef1 --- /dev/null +++ b/server/Gym/environments/making_grade/environment.py @@ -0,0 +1,107 @@ +import heapq +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MakingGrade_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2893 + prompt_template = \ +r"""There is an array A of length {N}: {A} +Please find an array B of length {N} such that B is either monotonically non-decreasing or monotonically non-increasing. Can you make the sum of |A[i] - B[i]| for all 1 ≤ i ≤ {N} as small as possible? Output B[1], B[2], ..., B[{N}] in one line, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MakingGrade_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + def non_decreasing(self, A : List[int]) -> bool : + return all(a <= b for a, b in zip(A, A[1 :])) + + def non_increasing(self, A : List[int]) -> bool : + return all(a >= b for a, b in zip(A, A[1 :])) + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + while True : + A = self.parameter["A"] = [random.randint(0, N * N) for _ in range(N)] + if not (self.non_decreasing(A) or self.non_increasing(A)) : + break + + def cost_nondecreasing(seq): + # Max-heap via negatives + heap = [] + ans = 0 + for a in seq: + # push a + heapq.heappush(heap, -a) + top = -heap[0] # current maximum in the heap + if a < top: + # add the decrease needed and replace the largest with a + ans += top - a + heapq.heapreplace(heap, -a) + return ans + + # Cost to make nondecreasing (as per the provided C++ logic) + inc_cost = cost_nondecreasing(A) + # Cost to make nonincreasing is the same as making (-A) nondecreasing + dec_cost = cost_nondecreasing([-x for x in A]) + + self.parameter["gold_answer"] = min(inc_cost, dec_cost) + assert self.parameter["gold_answer"] > 0, "gold_answer should be greater than 0" + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + A = ", ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[int]] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + B = processed_result + if len(B) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not (self.non_decreasing(B) or self.non_increasing(B)) : + return self.rewards["invalid_solution"] + + gold, answer = self.parameter["gold_answer"], sum(abs(Ai - Bi) for Ai, Bi in zip(self.parameter["A"], B)) + assert 0 < gold <= answer, "gold should be less than or equal to answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/matrix_binary_exponentiation/__init__.py b/server/Gym/environments/matrix_binary_exponentiation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5acef4eab7cecba762cf7763a70f290f2cfb749a --- /dev/null +++ b/server/Gym/environments/matrix_binary_exponentiation/__init__.py @@ -0,0 +1 @@ +from .environment import Matrix_BinaryExponentiation_Environment diff --git a/server/Gym/environments/matrix_binary_exponentiation/environment.py b/server/Gym/environments/matrix_binary_exponentiation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d646242dbe7b0c9ce213f01e04000112db008c34 --- /dev/null +++ b/server/Gym/environments/matrix_binary_exponentiation/environment.py @@ -0,0 +1,131 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Matrix_BinaryExponentiation_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""We use the integer in the $i$-th row and $j$-th column to represent the element $A[i][j]$ of a matrix. + +You are given a square matrix $A$ of size {N}×{N}: +{matrix} + +Please compute the matrix $A^{K}$ (i.e., matrix $A$ raised to the power of ${K}$). Since the values may become very large, take each element **modulo {modulo}**. + +**Output Format:** +Your final answer — the matrix $A^{K}$ — should be printed as ${N}$ lines separated by **line breaks**. Each line should contain ${N}$ integers separated by **spaces**. +Example (do **NOT** include the backticks or quotes): +``` +{all_zeros} +``` +""" + def __init__(self, + modulo : int = 10000, + wrong_format : float = -1.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Matrix_BinaryExponentiation_Environment instance. + """ + super().__init__(**kwargs) + + self.modulo = modulo + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K >= 2, "MAX_K should be greater than or equal to 2" + + K = self.parameter["K"] = random.randint(2, MAX_K) + + A = self.parameter["A"] = [[random.randint(0, self.modulo - 1) for j in range(N)] for i in range(N)] + + + def matrix_multiply(A, B, mod) : + n = len(A) + C = [[0] * n for _ in range(n)] + + B_T = [[B[j][i] for j in range(n)] for i in range(n)] + + for i in range(n) : + for j in range(n) : + sum_val = 0 + for k in range(n) : + sum_val += A[i][k] * B_T[j][k] + C[i][j] = sum_val % mod + + return C + + def matrix_power(A, k, mod) : + n = len(A) + result = [[0] * n for _ in range(n)] + for i in range(n) : + result[i][i] = 1 + + base = [row[:] for row in A] + while k > 0 : + if k & 1 : + result = matrix_multiply(result, base, mod) + base = matrix_multiply(base, base, mod) + k >>= 1 + + return result + + self.parameter["gold_answer"] = matrix_power(A, K, self.modulo) + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, row)) for row in self.parameter["gold_answer"]) + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + matrix = "\n".join(" ".join(map(str, row)) for row in self.parameter["A"]), + K = self.parameter["K"], + modulo = self.modulo, + all_zeros = "\n".join(" ".join("0" for _ in range(self.parameter["N"])) for _ in range(self.parameter["N"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(list(map(int, line.split()))) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + A_K = processed_result + if len(A_K) != self.parameter["N"] : + return self.rewards["wrong_format"] + if not all(len(row) == self.parameter["N"] for row in A_K) : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(sum(answer == gold for answer, gold in zip(answer_row, gold_row)) for answer_row, gold_row in zip(A_K, self.parameter["gold_answer"])) / (self.parameter["N"] * self.parameter["N"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return A_K == self.parameter["gold_answer"] + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/matrix_permutation_both_diagonal_one/__init__.py b/server/Gym/environments/matrix_permutation_both_diagonal_one/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2a46bac708d042893575674b1446768469c47b77 --- /dev/null +++ b/server/Gym/environments/matrix_permutation_both_diagonal_one/__init__.py @@ -0,0 +1 @@ +from .environment import MatrixPermutation_BothDiagonalOne_Environment diff --git a/server/Gym/environments/matrix_permutation_both_diagonal_one/environment.py b/server/Gym/environments/matrix_permutation_both_diagonal_one/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7d3b308537d67e57628fe5a4521027c89bd861ac --- /dev/null +++ b/server/Gym/environments/matrix_permutation_both_diagonal_one/environment.py @@ -0,0 +1,113 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MatrixPermutation_BothDiagonalOne_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a square matrix of size {N} × {N}, where each element is either `0` or `1`. This matrix is 0-indexed. + +Please find: +- a permutation of the row indices: a[0], ..., a[{N_minus_1}] (a reordering of `0` to `{N_minus_1}`), +- a permutation of the column indices: b[0], ..., b[{N_minus_1}] (a reordering of `0` to `{N_minus_1}`), +- such that after applying these permutations to the rows and columns of matrix A (i.e., the element at position (i, j) becomes A[a[i]][b[j]]), **both diagonals of the resulting matrix contain only `1`s** — that is, all positions where `i = j` (main diagonal) and `i + j = {N_minus_1}` (anti-diagonal). + +Matrix A is given as follows: +{A} + +**Output Format:** Output two lines: +- The first line contains the row permutation: a[0] a[1] ... a[{N_minus_1}] +- The second line contains the column permutation: b[0] b[1] ... b[{N_minus_1}] +(Use spaces to separate adjacent integers. Do **not** include backticks or quotes.)""" + + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the MatrixPermutation_BothDiagonalOne_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter + N = self.parameter["N"] + assert N >= 2, "N must be at least 2." + + one_probability = random.random() / 4.0 + A = self.parameter["A"] = [[1 if random.random() < one_probability else 0 for _ in range(N)] for _ in range(N)] + + row_permutation = list(range(N)) + random.shuffle(row_permutation) + column_permutation = list(range(N)) + random.shuffle(column_permutation) + for i in range(N) : + A[row_permutation[i]][column_permutation[i]] = 1 + for i in range(N) : + A[row_permutation[i]][column_permutation[N - 1 - i]] = 1 + self.parameter["reference_answer"] = " ".join(map(str, row_permutation)) + "\n" + " ".join(map(str, column_permutation)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + A = "\n".join("".join(map(str, row)) for row in self.parameter["A"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + permutations = [] + for line in answer.splitlines() : + line = line.strip() + if line : + permutations.append(list(map(int, line.split()))) + if len(permutations) == 2 : + return permutations[0], permutations[1] + else : + return None + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + row_permutation, column_permutation = processed_result + if not (len(row_permutation) == self.parameter["N"] and set(row_permutation) == set(range(self.parameter["N"]))) : + return self.rewards["invalid_solution"] + if not (len(column_permutation) == self.parameter["N"] and set(column_permutation) == set(range(self.parameter["N"]))) : + return self.rewards["invalid_solution"] + B = [[self.parameter["A"][row_permutation[i]][column_permutation[j]] for j in range(self.parameter["N"])] for i in range(self.parameter["N"])] + + satisfied, total = 0, 0 + for i in range(self.parameter["N"]) : + for j in range(self.parameter["N"]) : + if i == j or i + j == self.parameter["N"] - 1 : + total += 1 + satisfied += B[i][j] + assert satisfied <= total, "satisfied must be less than or equal to total" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / total) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == total) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/matrix_permutation_equivalence/__init__.py b/server/Gym/environments/matrix_permutation_equivalence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1c81fd9a6e3efd649cfa88b71e410586d55f6c7e --- /dev/null +++ b/server/Gym/environments/matrix_permutation_equivalence/__init__.py @@ -0,0 +1 @@ +from .environment import MatrixPermutationEquivalence_Environment diff --git a/server/Gym/environments/matrix_permutation_equivalence/environment.py b/server/Gym/environments/matrix_permutation_equivalence/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e9ac8b08a6a2b4a9458ff6f96bdbb56b60520219 --- /dev/null +++ b/server/Gym/environments/matrix_permutation_equivalence/environment.py @@ -0,0 +1,109 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MatrixPermutationEquivalence_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given two matrices A and B of size {N} × {M}, where each element is either `0` or `1`. Both matrices are 0-indexed. + +Please find: +- a permutation of the row indices `a[0], ..., a[{N_minus_1}]` (a reordering of `0` to `{N_minus_1}`), and +- a permutation of the column indices `b[0], ..., b[{M_minus_1}]` (a reordering of `0` to `{M_minus_1}`), +- such that after permuting the rows and columns of matrix A accordingly, the resulting matrix matches B. Formally, for all `0 ≤ i < {N}` and `0 ≤ j < {M}`, it must hold that A[a[i]][b[j]] = B[i][j]. + +A is given as follows: +{A} + +B is given as follows: +{B} + +**Output Format:** Output two lines: +- The first line contains the row permutation: `a[0] ... a[{N_minus_1}]` +- The second line contains the column permutation: `b[0] ... b[{M_minus_1}]` +(Use spaces to separate the adjacent integers on the same line. Do **not** include backticks or quotes.)""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the MatrixPermutationEquivalence_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + one_probability = random.random() + A = self.parameter["A"] = [[1 if random.random() < one_probability else 0 for _ in range(M)] for _ in range(N)] + + row_permutation = list(range(N)) + random.shuffle(row_permutation) + column_permutation = list(range(M)) + random.shuffle(column_permutation) + + self.parameter["B"] = [[A[row_permutation[i]][column_permutation[j]] for j in range(M)] for i in range(N)] + self.parameter["reference_answer"] = " ".join(map(str, row_permutation)) + "\n" + " ".join(map(str, column_permutation)) + + + def _prompt_generate(self) -> str : + N, M = self.parameter["N"], self.parameter["M"] + return self.prompt_template.format( + N = N, + M = M, + N_minus_1 = N - 1, + M_minus_1 = M - 1, + A = "\n".join("".join(map(str, row)) for row in self.parameter["A"]), + B = "\n".join("".join(map(str, row)) for row in self.parameter["B"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + permutations = [] + for line in answer.splitlines() : + line = line.strip() + if line : + permutations.append(list(map(int, line.split()))) + if len(permutations) == 2 : + return permutations[0], permutations[1] + else : + return None + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + row_permutation, column_permutation = processed_result + if not (len(row_permutation) == self.parameter["N"] and set(row_permutation) == set(range(self.parameter["N"]))) : + return self.rewards["invalid_solution"] + if not (len(column_permutation) == self.parameter["M"] and set(column_permutation) == set(range(self.parameter["M"]))) : + return self.rewards["invalid_solution"] + B = [[self.parameter["A"][row_permutation[i]][column_permutation[j]] for j in range(self.parameter["M"])] for i in range(self.parameter["N"])] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(sum(answer == gold for answer, gold in zip(answer_row, gold_row)) for answer_row, gold_row in zip(B, self.parameter["B"])) / (self.parameter["N"] * self.parameter["M"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (B == self.parameter["B"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/matrix_permutation_main_diagonal_one/__init__.py b/server/Gym/environments/matrix_permutation_main_diagonal_one/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6d251dd4f1c663ff57ea8910183b2c04dcf0398f --- /dev/null +++ b/server/Gym/environments/matrix_permutation_main_diagonal_one/__init__.py @@ -0,0 +1 @@ +from .environment import MatrixPermutation_MainDiagonalOne_Environment diff --git a/server/Gym/environments/matrix_permutation_main_diagonal_one/environment.py b/server/Gym/environments/matrix_permutation_main_diagonal_one/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..15b54f4645ac4ce590abaf2a075d1b00483045b8 --- /dev/null +++ b/server/Gym/environments/matrix_permutation_main_diagonal_one/environment.py @@ -0,0 +1,103 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MatrixPermutation_MainDiagonalOne_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a square matrix of size {N} × {N}, where each element is either `0` or `1`. This matrix is 0-indexed. + +Please find: +- a permutation of the row indices: a[0], ..., a[{N_minus_1}] (a reordering of `0` to `{N_minus_1}`), +- a permutation of the column indices: b[0], ..., b[{N_minus_1}] (a reordering of `0` to `{N_minus_1}`), +- such that after applying these permutations to the rows and columns of the matrix A (i.e., the element at position (i, j) becomes A[a[i]][b[j]]), the **main diagonal** of the resulting matrix contains only `1`s (main diagonal refers to the elements at position (i, i) for i from `0` to `{N_minus_1}`). + +Matrix A is given as follows: +{A} + +**Output Format:** Output two lines: +- The first line contains the row permutation: a[0] a[1] ... a[{N_minus_1}] +- The second line contains the column permutation: b[0] b[1] ... b[{N_minus_1}] +(Use spaces to separate adjacent integers. Do **not** include backticks or quotes.)""" + + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the MatrixPermutation_MainDiagonalOne_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter + N = self.parameter["N"] + assert N >= 2, "N must be at least 2." + + one_probability = random.random() / 2.0 + A = self.parameter["A"] = [[1 if random.random() < one_probability else 0 for _ in range(N)] for _ in range(N)] + + row_permutation = list(range(N)) + random.shuffle(row_permutation) + column_permutation = list(range(N)) + random.shuffle(column_permutation) + for i in range(N) : + A[row_permutation[i]][column_permutation[i]] = 1 + self.parameter["reference_answer"] = " ".join(map(str, row_permutation)) + "\n" + " ".join(map(str, column_permutation)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + A = "\n".join("".join(map(str, row)) for row in self.parameter["A"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + permutations = [] + for line in answer.splitlines() : + line = line.strip() + if line : + permutations.append(list(map(int, line.split()))) + if len(permutations) == 2 : + return permutations[0], permutations[1] + else : + return None + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + row_permutation, column_permutation = processed_result + if not (len(row_permutation) == self.parameter["N"] and set(row_permutation) == set(range(self.parameter["N"]))) : + return self.rewards["invalid_solution"] + if not (len(column_permutation) == self.parameter["N"] and set(column_permutation) == set(range(self.parameter["N"]))) : + return self.rewards["invalid_solution"] + B = [[self.parameter["A"][row_permutation[i]][column_permutation[j]] for j in range(self.parameter["N"])] for i in range(self.parameter["N"])] + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((sum(B[i][i] for i in range(self.parameter["N"])) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * all(B[i][i] for i in range(self.parameter["N"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/matrix_pooling/__init__.py b/server/Gym/environments/matrix_pooling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8e4e2ff86a45b6d626b2b9f0cbf3202befd0c32c --- /dev/null +++ b/server/Gym/environments/matrix_pooling/__init__.py @@ -0,0 +1 @@ +from .environment import MatrixPooling_Environment diff --git a/server/Gym/environments/matrix_pooling/environment.py b/server/Gym/environments/matrix_pooling/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f840fbef1363e12fe048fd4bdc77fc1ee1b5e64a --- /dev/null +++ b/server/Gym/environments/matrix_pooling/environment.py @@ -0,0 +1,88 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MatrixPooling_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a matrix of size {N} × {M}. Perform a **max pooling** operation with a kernel size of {K} × {K}. In max pooling, each output cell contains the **maximum value** in the corresponding {K} × {K} submatrix of the input. + +The matrix is: +{matrix} + +**Output Format:** Your output should contain {N} - {K} + 1 lines, each with {M} - {K} + 1 integers separated by **spaces**. Each integer represents the maximum value in the respective pooling region.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MatrixPooling_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 3, "MAX_N_M should be greater than or equal to 3" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(3, MAX_N_M), random.randint(3, MAX_N_M) + K = self.parameter["K"] = random.randint(2, min(N, M) - 1) + + matrix = self.parameter["matrix"] = [[random.randint(0, N * M) for _ in range(M)] for _ in range(N)] + + gold_answer = self.parameter["gold_answer"] = [[max(matrix[i + di][j + dj] for di in range(K) for dj in range(K)) for j in range(M - K + 1)] for i in range(N - K + 1)] + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, row)) for row in gold_answer) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + K = self.parameter["K"], + matrix = "\n".join(" ".join(map(str, row)) for row in self.parameter["matrix"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(list(map(int, line.split()))) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + pool = processed_result + if len(pool) != self.parameter["N"] - self.parameter["K"] + 1 : + return self.rewards["wrong_format"] + if not all(len(row) == self.parameter["M"] - self.parameter["K"] + 1 for row in pool) : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(sum(answer == gold for answer, gold in zip(answer_row, gold_row)) for answer_row, gold_row in zip(pool, self.parameter["gold_answer"])) / ((self.parameter["N"] - self.parameter["K"] + 1) * (self.parameter["M"] - self.parameter["K"] + 1))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return pool == self.parameter["gold_answer"] + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/matrix_rmq_counting/__init__.py b/server/Gym/environments/matrix_rmq_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..92ecf01b9c10988714b86e6ef3ec49fa19b5f737 --- /dev/null +++ b/server/Gym/environments/matrix_rmq_counting/__init__.py @@ -0,0 +1 @@ +from .environment import MatrixRMQCounting_Environment diff --git a/server/Gym/environments/matrix_rmq_counting/environment.py b/server/Gym/environments/matrix_rmq_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..78a8bf813d8193c53094db3b818eff008020937f --- /dev/null +++ b/server/Gym/environments/matrix_rmq_counting/environment.py @@ -0,0 +1,156 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MatrixRMQCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3813 + prompt_template = \ +r"""Count the number of matrices `A` of size {H} × {W} (1-indexed, meaning row indices range from 1 to {H} and column indices from 1 to {W}) such that: +1. Each element of `A` is an integer between 1 and {M}, inclusive. +2. The matrix satisfies the following {N} constraints, where `max(A[x1 : x2 + 1, y1 : y2 + 1])` denotes the maximum value in the contiguous submatrix defined by the corners (x1, y1) and (x2, y2) (inclusive): +{constraints} + +Output a single integer — the number of such matrices modulo {MOD}.""" + def __init__(self, + H_W_range : int = 2, + max_MOD : int = 1000000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MatrixRMQCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.H_W_range = H_W_range + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + H = self.parameter["H"] = random.randint(1, N * self.H_W_range) + W = self.parameter["W"] = random.randint(1, N * self.H_W_range) + M = self.parameter["M"] = random.randint(1, (N * self.H_W_range) ** 2) + + A = [[random.randint(1, M) for _ in range(W)] for _ in range(H)] + self.parameter["constraints"] = constraints = [] + for _ in range(N) : + row_length, col_length = random.randint(1, H), random.randint(1, W) + x1 = random.randint(1, H - row_length + 1) + y1 = random.randint(1, W - col_length + 1) + x2, y2 = x1 + row_length - 1, y1 + col_length - 1 + v = max(A[i - 1][j - 1] for i in range(x1, x2 + 1) for j in range(y1, y2 + 1)) + constraints.append((x1, y1, x2, y2, v)) + + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + pos = [] + X = [1, H + 1] + Y = [1, W + 1] + # Read constraints and collect coordinates for compression + for x1, y1, x2, y2, v in constraints: + assert 1 <= x1 <= x2 <= H, "Invalid x1, x2 range" + assert 1 <= y1 <= y2 <= W, "Invalid y1, y2 range" + assert 1 <= v <= M, "Invalid value v" + # include x2+1, y2+1 as open intervals + pos.append((x1, y1, x2 + 1, y2 + 1, v)) + X.append(x1) + X.append(x2 + 1) + Y.append(y1) + Y.append(y2 + 1) + # Coordinate compression + X = sorted(set(X)) + Y = sorted(set(Y)) + xi = {x: i for i, x in enumerate(X)} + yi = {y: i for i, y in enumerate(Y)} + # Precompute block ranges for each constraint + ranges = [] + for x1, y1, x2p, y2p, v in pos: + xl = xi[x1] + xr = xi[x2p] + yl = yi[y1] + yr = yi[y2p] + ranges.append((xl, xr, yl, yr, v)) + # Number of blocks in compressed grid + Wb = len(X) - 1 + Hb = len(Y) - 1 + ans = 0 + # Inclusion-exclusion over subsets of constraints + for mask in range(1 << N): + # Initialize each block's max allowed value to M + arr = [[M] * Hb for __ in range(Wb)] + # Apply each constraint, reducing allowed max by 1 if in the subset + for j in range(N): + bit = (mask >> j) & 1 + xl, xr, yl, yr, v = ranges[j] + limit = v - bit + for xi_ in range(xl, xr): + row = arr[xi_] + for yi_ in range(yl, yr): + if row[yi_] > limit: + row[yi_] = limit + # Compute number of fillings for this configuration + tmp = 1 + for xi_ in range(Wb): + dx = X[xi_ + 1] - X[xi_] + for yi_ in range(Hb): + dy = Y[yi_ + 1] - Y[yi_] + area = dx * dy + val = arr[xi_][yi_] + # pow handles zero and mod efficiently + tmp = tmp * pow(val, area, MOD) % MOD + if tmp == 0: + break + if tmp == 0: + break + # Inclusion-exclusion sign + if bin(mask).count('1') & 1: + ans = (ans - tmp) % MOD + else: + ans = (ans + tmp) % MOD + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + H = self.parameter["H"], + W = self.parameter["W"], + M = self.parameter["M"], + N = self.parameter["N"], + constraints = "\n".join("max(A[{} : {} + 1, {} : {} + 1]) = {}".format(x1, x2, y1, y2, v) for x1, y1, x2, y2, v in self.parameter["constraints"]), + MOD = self.parameter["MOD"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_different_group_pair_division/__init__.py b/server/Gym/environments/max_different_group_pair_division/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..afdff350a869259c109f70c4edc57058ad0e9bb1 --- /dev/null +++ b/server/Gym/environments/max_different_group_pair_division/__init__.py @@ -0,0 +1 @@ +from .environment import MaxDifferentGroupPairDivision_Environment diff --git a/server/Gym/environments/max_different_group_pair_division/environment.py b/server/Gym/environments/max_different_group_pair_division/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d71bdcf9ed8f100696d97011ff97ab7d495cd067 --- /dev/null +++ b/server/Gym/environments/max_different_group_pair_division/environment.py @@ -0,0 +1,184 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MaxDifferentGroupPairDivision_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3648 + prompt_template = \ +r"""You are given an array A of {N} integers: {A} + +Initially, the entire array is one single block. Let S = 0. You need to perform the following operation exactly {K} times: +- Choose a position `i` such that A[i] and A[i + 1] are still in the same block. +- Split the block into two parts: the first ends at A[i], the second starts at A[i + 1]. +- Let `sum1` and `sum2` be the sums of the two blocks. Then, update S += sum1 × sum2. + +After {K} operations, you will have {K} + 1 blocks. Try your best to **maximize** the final value of S. + +**Output Format:** A single line containing {K} integers — the positions `i` you chose in order, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MaxDifferentGroupPairDivision_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + K = self.parameter["K"] = random.randint(2, N - 2) + A = self.parameter["A"] = [random.randint(0, N) for _ in range(N)] + + + B = K + 1 # number of blocks after K splits + + # Read sequence and build prefix sums + prefix_sum = [0] * (N + 1) + for i in range(1, N + 1): + prefix_sum[i] = prefix_sum[i - 1] + A[i - 1] + sum_N = prefix_sum[N] + + # pre[j][i] will store the split position for the j-th block ending at i + # Use array('I') for memory efficiency (stores unsigned 32-bit ints) + pre = [[0] * (N + 1) for _ in range(B + 1)] + + # We'll keep only two rows of DP at a time + prev_f = [0] * (N + 1) + cur_f = [0] * (N + 1) + + # DP over number of blocks j = 1..B + for j in range(1, B + 1): + # Convex-hull trick: maintain deque of candidate split-points + qx = [0] * (N + 1) # x = prefix_sum[p] + qy = [0] * (N + 1) # y = prev_f[p] + qp = [0] * (N + 1) # p = index + + head = tail = 0 + qx[0] = 0 + qy[0] = prev_f[0] + qp[0] = 0 + + for i in range(1, N + 1): + psi = prefix_sum[i] + S_rem = sum_N - psi + + # Pop from front while next candidate is better + while head < tail and (qy[head + 1] - qy[head]) >= S_rem * (qx[head + 1] - qx[head]): + head += 1 + + # Use best candidate at front + p = qp[head] + pre[j][i] = p + cur_f[i] = qy[head] + S_rem * (psi - qx[head]) + + # Prepare new candidate from this position + new_x = psi + new_y = prev_f[i] + + # Pop from back while new candidate makes the last one obsolete + while head < tail and (qy[tail] - qy[tail - 1]) * (new_x - qx[tail]) <= (new_y - qy[tail]) * (qx[tail] - qx[tail - 1]): + tail -= 1 + + tail += 1 + qx[tail] = new_x + qy[tail] = new_y + qp[tail] = i + + # Move current row to previous for next iteration + prev_f, cur_f = cur_f, [0] * (N + 1) + + # The answer is DP[B][N] + self.parameter["gold_answer"] = prev_f[N] + + # Reconstruct split positions + path = [0] * (B + 1) + path[B] = N + for j in range(B, 0, -1): + path[j - 1] = pre[j][path[j]] + # We only need the K split points: path[1], ..., path[B-1] + splits = path[1:B] + self.parameter["reference_answer"] = " ".join(map(str, [split - 1 for split in splits])) # Convert to 0-based index + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + K = self.parameter["K"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["K"] : + return self.rewards["invalid_solution"] + + answer = 0 + block_ID, block_numbers = 0, [0] * self.parameter["N"] + for i in processed_result : + if not (0 <= i < self.parameter["N"]) : + return self.rewards["invalid_solution"] + if not (0 <= (i + 1) < self.parameter["N"]) : + return self.rewards["invalid_solution"] + if block_numbers[i] != block_numbers[i + 1] : + return self.rewards["invalid_solution"] + + sum1, j = 0, i + while j >= 0 : + if block_numbers[j] != block_numbers[i] : + break + sum1 += self.parameter["A"][j] + j -= 1 + + block_ID += 1 + sum2, j = 0, i + 1 + while j < self.parameter["N"] : + if block_numbers[j] != block_numbers[i] : + break + sum2 += self.parameter["A"][j] + block_numbers[j] = block_ID + j += 1 + + answer += sum1 * sum2 + + gold = self.parameter["gold_answer"] + assert answer <= gold, "answer should be less than or equal to gold" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + if gold == 0 : + assert answer == 0, "If gold is 0, answer should also be 0" + return self.rewards["rewarding_weight"] * 1.0 + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_grid_path_intersection/__init__.py b/server/Gym/environments/max_grid_path_intersection/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a6a60cb2bc38c0174c99683c86357c659a37af41 --- /dev/null +++ b/server/Gym/environments/max_grid_path_intersection/__init__.py @@ -0,0 +1 @@ +from .environment import MaxGridPathIntersection_Environment diff --git a/server/Gym/environments/max_grid_path_intersection/environment.py b/server/Gym/environments/max_grid_path_intersection/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..3e02caca608e64765400aabea827ca3a8559da98 --- /dev/null +++ b/server/Gym/environments/max_grid_path_intersection/environment.py @@ -0,0 +1,165 @@ +import random +from typing import Optional +from collections import deque +from ...environment import VerifiableEnvironment + + +class MaxGridPathIntersection_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2045 + prompt_template = \ +r"""You are given an {N} × {N} grid (0-indexed) of non-negative integers (given in **row-major order**): +{grid} + +You will start at cell (0, 0) and move to cell ({N_minus_1}, {N_minus_1}) exactly {K} times. Each time, you can only move **right** or **down** at each step. When you step on a cell during a path, you collect its value and set it to 0 (so future paths will see it as 0). Your goal is to **maximize the total sum** collected across all {K} paths. + +**Output Format:** A single integer — the maximum total sum that can be collected after {K} such paths.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MaxGridPathIntersection_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + K = self.parameter["K"] = random.randint(1, N // 2) + + A = self.parameter["grid"] = [[random.randint(0, N) for _ in range(N)] for _ in range(N)] + + + def max_cost_flow(N, K, A): + # Number of nodes: each cell has in-node and out-node + total_nodes = 2 * N * N + # Adjacency list: each entry is [to, capacity, cost, rev] + ADJ = [[] for _ in range(total_nodes)] + + def add_edge(u, v, cap, cost): + # forward edge + forward = [v, cap, cost, None] + # reverse edge + backward = [u, 0, -cost, None] + # link edges for capacity updates + forward[3] = backward + backward[3] = forward + ADJ[u].append(forward) + ADJ[v].append(backward) + + def node_id(i, j, is_out): + # 0-indexed: cells at (i, j) share indices 0..N*N-1 for in-nodes, + # N*N..2*N*N-1 for out-nodes + base = N * N if is_out else 0 + return base + i * N + j + + # Build the flow network + for i in range(N): + for j in range(N): + in_id = node_id(i, j, False) + out_id = node_id(i, j, True) + # Pick the cell's value on one of the K visits + add_edge(in_id, out_id, 1, A[i][j]) # one with reward + add_edge(in_id, out_id, K - 1, 0) # others free + # Move right or down (up to K walkers) + if j + 1 < N: + add_edge(out_id, node_id(i, j + 1, False), K, 0) + if i + 1 < N: + add_edge(out_id, node_id(i + 1, j, False), K, 0) + + s = node_id(0, 0, False) + t = node_id(N - 1, N - 1, True) + total_cost = 0 + + # If K is zero, there is no flow and cost is zero + if K == 0: + return 0 + + # Successive SPFA for maximum-cost flow + while True: + DIST = [float('-inf')] * total_nodes + FLOW = [0] * total_nodes + INQUEUE = [False] * total_nodes + PREV_NODE = [None] * total_nodes + PREV_EDGE = [None] * total_nodes + + queue = deque([s]) + DIST[s] = 0 + FLOW[s] = K # maximum possible augment per iteration + INQUEUE[s] = True + + # Find longest path from s to t in residual graph + while queue: + u = queue.popleft() + INQUEUE[u] = False + for edge in ADJ[u]: + v, cap, cost, rev = edge + if cap > 0 and DIST[v] < DIST[u] + cost: + DIST[v] = DIST[u] + cost + FLOW[v] = min(FLOW[u], cap) + PREV_NODE[v] = u + PREV_EDGE[v] = edge + if not INQUEUE[v]: + queue.append(v) + INQUEUE[v] = True + + # If there's no augmenting path, we're done + if DIST[t] == float('-inf'): + break + + # Augment along the path + f = FLOW[t] + total_cost += f * DIST[t] + v = t + while v != s: + edge = PREV_EDGE[v] + # reduce forward capacity + edge[1] -= f + # increase reverse capacity + edge[3][1] += f + v = PREV_NODE[v] + + return total_cost + + self.parameter["reference_answer"] = max_cost_flow(N, K, A) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + K = self.parameter["K"], + grid = "\n".join(" ".join(map(str, row)) for row in self.parameter["grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_minimum_after_interval_addition/__init__.py b/server/Gym/environments/max_minimum_after_interval_addition/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..116c9ba05403f8f6fdb5080020acf90e86e510a3 --- /dev/null +++ b/server/Gym/environments/max_minimum_after_interval_addition/__init__.py @@ -0,0 +1 @@ +from .environment import MaxMinimum_AfterIntervalAddition_Environment diff --git a/server/Gym/environments/max_minimum_after_interval_addition/environment.py b/server/Gym/environments/max_minimum_after_interval_addition/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..aac3e17976415df7c877805fe67baf2f20cb72b7 --- /dev/null +++ b/server/Gym/environments/max_minimum_after_interval_addition/environment.py @@ -0,0 +1,174 @@ +import heapq +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MaxMinimum_AfterIntervalAddition_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4064 + prompt_template = \ +r"""You are given an array `ARRAY` of length {N}: {ARRAY} + +You are also given {M} intervals (numbered 1 to {M}): +{intervals} + +Let's select {K} **distinct** intervals; for each selected interval [l, r], add the value {A} to every element of `ARRAY` from index l to r (inclusive); all additions are cumulative. Can we **maximize the minimum value** in `ARRAY` after applying all {K} additions? You must output {K} integers in one line — the selected interval indices (in any order), separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MaxMinimum_AfterIntervalAddition_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 3, "MAX_N_M should be greater than or equal to 3" + + N = self.parameter["N"] = random.randint(3, MAX_N_M) + M = self.parameter["M"] = random.randint(3, MAX_N_M) + K = self.parameter["K"] = random.randint(2, M - 1) + + A = self.parameter["A"] = random.randint(1, MAX_N_M) + ARRAY = self.parameter["ARRAY"] = [random.randint(1, MAX_N_M * random.randint(1, K)) for _ in range(N)] + + intervals = self.parameter["intervals"] = [] + for i in range(M) : + length = random.randint(1, N) + start = random.randint(1, N - length + 1) + intervals.append((start, start + length - 1)) + + + # Build operations list + # Each op is a tuple: (pos, tp, val) + # tp: 0 = left endpoint, 1 = sequence point, 2 = right endpoint + OPS = [] + # sequence points + for i in range(1, N + 1): + t = ARRAY[i - 1] + OPS.append((i, 1, t)) + + # intervals + # R[i] stores right endpoint of interval i (1-based) + R = [0] * (M + 1) + for i, (L_i, R_i) in enumerate(intervals, start = 1): + OPS.append((L_i, 0, i)) # left endpoint event + OPS.append((R_i, 2, i)) # right endpoint event + R[i] = R_i + + # sort by position, and for ties: left(0) < sequence(1) < right(2) + OPS.sort(key=lambda x: (x[0], x[1])) + + lf = min(ARRAY) # lower bound (minimum current value) + ri = lf + M * A # upper bound (loose, but faithful to the C++) + + # jud(mid) checks if we can achieve min >= mid using at most K intervals + def jud(mid: int) -> bool: + flow = 0 # current accumulated +a from chosen intervals covering current position + tot = 0 # total intervals chosen so far + # priority queue (max-heap by r[v]); Python has min-heap, so push (-R[v], v) + pq = [] + # book[v] == 1 means interval v has been selected + book = [0] * (M + 1) + + for pos, tp, val in OPS: + if tp == 0: + # insert left endpoint + v = val + heapq.heappush(pq, (-R[v], v)) + elif tp == 1: + # sequence point + ned = mid - val - flow + if ned < 0: + continue + ch = (ned + A - 1) // A # ceil division + if tot + ch > K: + return False + while pq and ch: + _, v = heapq.heappop(pq) + if R[v] < pos: + return False + book[v] = 1 + flow += A + ch -= 1 + tot += 1 + if ch > 0: + return False + else: + # right endpoint; remove its contribution if it was chosen + v = val + if book[v]: + flow -= A + return True + + while lf != ri: + mid = (lf + ri + 1) // 2 + if jud(mid): + lf = mid + else: + ri = mid - 1 + + self.parameter["gold_answer"] = lf + assert lf > 0, "The gold answer should be positive" + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + ARRAY = ", ".join("ARRAY[{}]={}".format(i, ARRAYi) for i, ARRAYi in enumerate(self.parameter["ARRAY"], start = 1)), + M = self.parameter["M"], + K = self.parameter["K"], + intervals = "\n".join("Interval {}: [{}, {}]".format(i, Li, Ri) for i, (Li, Ri) in enumerate(self.parameter["intervals"], start = 1)), + A = self.parameter["A"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[int]] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if len(processed_result) != self.parameter["K"] : + return self.rewards["invalid_solution"] + if len(set(processed_result)) != self.parameter["K"] : + return self.rewards["invalid_solution"] + if not all(1 <= idx <= self.parameter["M"] for idx in processed_result) : + return self.rewards["invalid_solution"] + + ARRAY = self.parameter["ARRAY"].copy() + for idx in processed_result : + l, r = self.parameter["intervals"][idx - 1] + l -= 1 + r -= 1 + for i in range(l, r + 1) : + ARRAY[i] += self.parameter["A"] + answer, gold = min(ARRAY), self.parameter["gold_answer"] + assert 0 < answer <= gold, "The answer should not exceed the gold answer" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise ValueError("Invalid rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_mult_split/__init__.py b/server/Gym/environments/max_mult_split/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e0185698bc7d31d921674c8344f6a3436cbd9ff0 --- /dev/null +++ b/server/Gym/environments/max_mult_split/__init__.py @@ -0,0 +1 @@ +from .environment import MaxMultSplit_Environment diff --git a/server/Gym/environments/max_mult_split/environment.py b/server/Gym/environments/max_mult_split/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..2e8234cb8b0f0b7b16de7aae510807888e15119e --- /dev/null +++ b/server/Gym/environments/max_mult_split/environment.py @@ -0,0 +1,108 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MaxMultSplit_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1018 + prompt_template = \ +r"""You are given a string of digits `S` of length {N}: +{string} + +Your task is to divide this string into exactly {K_plus_1} non-empty, non-overlapping parts (from left to right, maintaining original order), such that the **product** of the resulting integer values is **maximized**. + +Specifically, split the string into substrings: s_1, ..., s_{K_plus_1}, where: +- Each part s_i is a contiguous non-empty substring of `S`, +- The concatenation s_1 + ... + s_{K_plus_1} = S (here + means string concatenation), +- The value `int(s_1) * ... * int(s_{K_plus_1})` is as large as possible. + +Output Format: +Your final answer should be a single line containing the {K_plus_1} parts, separated by **spaces**. +Example: `31 2` (do **NOT** include the backticks or quotes); this means the string "312" is split into two parts: "31" and "2". +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the MaxMultSplit_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + assert "K" in self.parameter, "K is required in parameter" + K = self.parameter["K"] + assert K >= 1, "K should be greater than or equal to 1" + + assert K + 1 <= N, "K + 1 should be less than or equal to N" + + string = self.parameter["string"] = "".join([str(random.randint(1, 9)) for _ in range(N)]) + + + # Dynamic programming to find the maximum product of split + # dpF[k][i] = max{int(string[i : j]) * dpF[k - 1][j] | j in [i + 1, N - 1]} + dpF = [[0] * N for _ in range(K + 1)] + for k in range(0, K + 1) : + for i in range(N) : + if not k : + dpF[0][i] = int(string[: i + 1]) + else : + for j in range(1, i + 1) : + dpF[k][i] = max(dpF[k][i], int(string[j : i + 1]) * dpF[k - 1][j - 1]) + self.parameter["gold_answer"] = dpF[K][N - 1] + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + K_plus_1 = self.parameter["K"] + 1, + string = self.parameter["string"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> str : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["K"] + 1 : + return self.rewards["invalid_solution"] + if "".join([str(a) for a in processed_result]) != self.parameter["string"] : + return self.rewards["invalid_solution"] + + answer = 1 + for val in processed_result : + assert isinstance(val, int), "val should be an integer" + answer *= val + assert answer <= self.parameter["gold_answer"], "answer should be less than or equal to gold_answer" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / self.parameter["gold_answer"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == self.parameter["gold_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_multiplication_fixed_sum/__init__.py b/server/Gym/environments/max_multiplication_fixed_sum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cdcda3d2e1635a2eefd3c80ac00cbd07f5e493ca --- /dev/null +++ b/server/Gym/environments/max_multiplication_fixed_sum/__init__.py @@ -0,0 +1 @@ +from .environment import MaxMultiplicationFixedSum_Environment diff --git a/server/Gym/environments/max_multiplication_fixed_sum/environment.py b/server/Gym/environments/max_multiplication_fixed_sum/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..c4a98f198b7fcb6cba45632b895871315ff0d5a0 --- /dev/null +++ b/server/Gym/environments/max_multiplication_fixed_sum/environment.py @@ -0,0 +1,66 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MaxMultiplicationFixedSum_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4157 + prompt_template = r"""Can you tell me the maximum product of positive integers (not necessarily distinct) whose sum is exactly {N}?""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MaxMultiplicationFixedSum_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 10, "MAX_N should be greater than or equal to 10" + + N = self.parameter["N"] = random.randint(4, MAX_N) + + + n = N + if n % 3 == 0 : + ans = 3 ** (int(n / 3)) + if n % 3 == 1 : + ans = 3 ** (int((n - 4) / 3)) * 4 + if n % 3 == 2 : + ans = 3 ** (int((n - 2) / 3)) * 2 + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_no_conflicting_bombs/__init__.py b/server/Gym/environments/max_no_conflicting_bombs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9ce52c56dc95383fe2f09b9426993d40927fe9c8 --- /dev/null +++ b/server/Gym/environments/max_no_conflicting_bombs/__init__.py @@ -0,0 +1 @@ +from .environment import MaxNoConflictingBombs_Environment diff --git a/server/Gym/environments/max_no_conflicting_bombs/environment.py b/server/Gym/environments/max_no_conflicting_bombs/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..89ec11177fe845884c6d9090ba4401a3efea573a --- /dev/null +++ b/server/Gym/environments/max_no_conflicting_bombs/environment.py @@ -0,0 +1,185 @@ +import sys +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MaxNoConflictingBombs_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2825 + prompt_template = \ +r"""You are given a {N} × {M} grid. Each cell contains one of the following characters: `#`, `x`, or `*`. You may replace some `*` cells with `B`, under the following condition: no two `B` cells may appear in the same row or column **unless** there is at least one `#` between them (i.e., every pair of `B`s in the same row or column must be separated by at least one `#`). Try your best to maximize the number of `B` cells. + +The grid is given in **row-major order**: +{grid} + +**Output Format:** Output {N} lines, each containing {M} characters with no separators. Replace selected `*` cells with `B`; all other cells should remain unchanged.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the MaxNoConflictingBombs_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + + distribution = [random.randint(1, N * M) for _ in range(3)] + distribution = [x / sum(distribution) for x in distribution] + A = self.parameter["grid"] = [random.choices(["#", "x", "*"], weights = distribution, k = M) for _ in range(N)] + + + # Assign row-segment IDs to each non-# cell + ROW = [[-1] * M for _ in range(N)] + tot = 0 + for i in range(N): + j = 0 + while j < M: + if A[i][j] == '#': + j += 1 + else: + # start of a new horizontal segment + k = j + while k < M and A[i][k] != '#': + ROW[i][k] = tot + k += 1 + tot += 1 + j = k + row_cnt = tot + + # Assign column-segment IDs to each non-# cell + COL = [[-1] * M for _ in range(N)] + tot = 0 + for j in range(M): + i = 0 + while i < N: + if A[i][j] == '#': + i += 1 + else: + # start of a new vertical segment + k = i + while k < N and A[k][j] != '#': + COL[k][j] = tot + k += 1 + tot += 1 + i = k + col_cnt = tot + + # Build bipartite graph: row segments 0..row_cnt-1 to col segments 0..col_cnt-1 + G = [[] for _ in range(row_cnt)] + for i in range(N): + for j in range(M): + if A[i][j] == '*': + u = ROW[i][j] + v = COL[i][j] + G[u].append(v) + + # Maximum bipartite matching via DFS + MATCH = [-1] * col_cnt + + # Ensure recursion limit is high enough + sys.setrecursionlimit(10000) + + def dfs(u, seen): + for v in G[u]: + if not seen[v]: + seen[v] = True + if MATCH[v] == -1 or dfs(MATCH[v], seen): + MATCH[v] = u + return True + return False + + result = 0 + for u in range(row_cnt): + seen = [False] * col_cnt + if dfs(u, seen): + result += 1 + + self.parameter["gold_answer"] = result + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + grid = "\n".join("".join(row) for row in self.parameter["grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(line.strip()) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N, M = self.parameter["N"], self.parameter["M"] + solution = processed_result + + if len(solution) != N or any(len(row) != M for row in solution) : + return self.rewards["wrong_format"] + + for solution_row, original_row in zip(solution, self.parameter["grid"]) : + for solution_cell, original_cell in zip(solution_row, original_row) : + if original_cell == "*" : + if solution_cell not in "*B" : + return self.rewards["invalid_solution"] + else : + assert original_cell in "#x", "Original cell should be either '#' or 'x'" + if solution_cell != original_cell : + return self.rewards["invalid_solution"] + + for i in range(N) : + for j in range(M) : + if solution[i][j] == 'B' : + for di, dj in ((-1, 0), (+1, 0), (0, -1), (0, +1)) : + ni, nj = i + di, j + dj + while 0 <= ni < N and 0 <= nj < M : + if solution[ni][nj] == 'B' : + return self.rewards["invalid_solution"] + if solution[ni][nj] == '#' : + break + ni += di + nj += dj + + answer, gold = sum(row.count('B') for row in solution), self.parameter["gold_answer"] + assert answer <= gold, "Answer should not exceed the gold answer" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + if gold == 0 : + assert answer == 0, "If gold answer is 0, answer should also be 0" + return self.rewards["rewarding_weight"] * 1.0 + return self.rewards["rewarding_weight"] * ((answer / self.parameter["gold_answer"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == self.parameter["gold_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_nonadjacent_k_element_sum/__init__.py b/server/Gym/environments/max_nonadjacent_k_element_sum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..98e4cb722087d64187ef5bd105ab3f876cbb1106 --- /dev/null +++ b/server/Gym/environments/max_nonadjacent_k_element_sum/__init__.py @@ -0,0 +1 @@ +from .environment import Max_NonAdjacent_KElementSum_Environment diff --git a/server/Gym/environments/max_nonadjacent_k_element_sum/environment.py b/server/Gym/environments/max_nonadjacent_k_element_sum/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d87a08b443ad2c4f67431711af1394c0c421a411 --- /dev/null +++ b/server/Gym/environments/max_nonadjacent_k_element_sum/environment.py @@ -0,0 +1,151 @@ +import heapq +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Max_NonAdjacent_KElementSum_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an array A of {N} positive integers: +{array} + +Please select **exactly {K}** indices i1, ..., i{K}, such that: +- No two selected indices are adjacent (i.e., there does not exist any i and i + 1 such that both i and i + 1 are selected). +- The sum A[i1] + ... + A[i{K}] is maximized. + +**Output Format:** A single line containing the {K} selected indices in any order, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Max_NonAdjacent_KElementSum_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + K = self.parameter["K"] = random.randint(2, N // 2) + + self.parameter["A"] = [random.randint(1, N) for _ in range(N)] + + + vals = self.parameter["A"].copy() + # Compute a dynamic "infinite" sentinel value larger than any sum of values + INF = sum(abs(v) for v in vals) + 1 + + # Initialize arrays (0..N+1) for doubly-linked list + L = list(range(N+2)) + R = list(range(N+2)) + val = [0] * (N + 2) + vis = [False] * (N + 2) + + # Fill in values, set up neighbors + for i, v in enumerate(vals, start=1): + val[i] = v + L[i] = i - 1 + R[i] = i + 1 + + # Sentinels at 0 and N+1 + val[0] = val[N+1] = -INF + L[0] = 0 + R[0] = 1 + L[N+1] = N + R[N+1] = N+1 + + # Build max-heap via negatives + heap = [] + for i in range(1, N + 1): + heapq.heappush(heap, (-val[i], i)) + + ans = 0 + # Perform K merges + for _ in range(K): + # Pop until we find an unvisited position + while True: + neg_x, pos = heap[0] + if vis[pos]: + heapq.heappop(heap) + else: + break + x = -neg_x + heapq.heappop(heap) + + ans += x + l = L[pos] + r = R[pos] + # Bypass l and r + L[pos] = L[l] + R[pos] = R[r] + R[L[pos]] = pos + L[R[pos]] = pos + + # Mark removed neighbors + vis[l] = True + vis[r] = True + + # Update current value and re-push + val[pos] = val[l] + val[r] - x + heapq.heappush(heap, (-val[pos], pos)) + + self.parameter["gold_answer"] = ans + assert ans > 0 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + K = self.parameter["K"], + array = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["K"] : + return self.rewards["invalid_solution"] + if len(processed_result) != len(set(processed_result)) : + return self.rewards["invalid_solution"] + if not all(0 <= i < self.parameter["N"] for i in processed_result) : + return self.rewards["invalid_solution"] + processed_result.sort() + if any(processed_result[i] + 1 == processed_result[i + 1] for i in range(len(processed_result) - 1)) : + return self.rewards["invalid_solution"] + + answer, gold = sum(self.parameter["A"][i] for i in processed_result), self.parameter["gold_answer"] + assert answer <= gold, "answer should be less than or equal to gold" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_permutation/__init__.py b/server/Gym/environments/max_permutation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7f0f497bb22a34392df49a098850355c19253e51 --- /dev/null +++ b/server/Gym/environments/max_permutation/__init__.py @@ -0,0 +1 @@ +from .environment import MaxPermutation_Environment diff --git a/server/Gym/environments/max_permutation/environment.py b/server/Gym/environments/max_permutation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..5ec05c6c14c4462b09c67f5dc4807432cbcfd9d7 --- /dev/null +++ b/server/Gym/environments/max_permutation/environment.py @@ -0,0 +1,118 @@ +import random +from functools import cmp_to_key +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MaxPermutation_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1018 + prompt_template = \ +r"""You are given an array `A` of {N} positive integers: +{array} + +Your task is to rearrange **all** the elements of the array (each number must be used **exactly once**) to form the **largest possible integer** when the numbers are **concatenated in order**. Treat the numbers as **strings** during concatenation (not as digits or arithmetic values). + + +Output Format: +Your final answer should be a **single line** with the indices of the chosen arrangement, separated by **spaces**. +Example: `{ALL_INDICES}` (do **NOT** include the backticks or quotes) means the numbers are used in the order: {ALL_ITEMS}. +""" + + def __init__(self, + proportion_being_prefix : float = 0.6, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MaxPermutation_Environment instance. + + Args: + proportion_being_prefix (float): Proportion of the numbers in the array that are prefixes of other numbers. + """ + super().__init__(**kwargs) + + assert 0.0 <= proportion_being_prefix < 1.0, "proportion_being_prefix should be in [0.0, 1.0)" + self.proportion_being_prefix = proportion_being_prefix + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "MAX_DIGIT_NUM" in self.parameter, "MAX_DIGIT_NUM is required in parameter" + MAX_DIGIT_NUM = self.parameter["MAX_DIGIT_NUM"] + assert MAX_DIGIT_NUM >= 1, "MAX_DIGIT_NUM should be greater than or equal to 1" + + M = N - int(N * self.proportion_being_prefix) + assert M >= 1, "M should be greater than or equal to 1" + array = self.parameter["array"] = ["".join([str(random.randint(1, 2)) for _ in range(MAX_DIGIT_NUM)]) for i in range(M)] + for i in range(N - M) : + prefix = random.choice(array[: M]) + assert len(prefix) == MAX_DIGIT_NUM, "prefix should have the same length as MAX_DIGIT_NUM" + array.append(prefix[: random.randint(1, MAX_DIGIT_NUM)]) + random.shuffle(array) + + # Sort the array in descending order based on concat(a + b) > (b + a) + def cmp(a : dict, b : dict) -> int : + a, b = a["value"], b["value"] + if a + b > b + a : + return -1 + elif a + b < b + a : + return 1 + else : + return 0 + self.parameter["reference_answer"] = [dict(index = i, value = a) for i, a in enumerate(array)] + self.parameter["reference_answer"].sort(key = cmp_to_key(cmp)) + self.parameter["gold"] = int("".join([item["value"] for item in self.parameter["reference_answer"]])) + self.parameter["reference_answer"] = " ".join([str(item["index"]) for item in self.parameter["reference_answer"]]) + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + array = "\n".join("A[{}]={}".format(i, a) for i, a in enumerate(self.parameter["array"])), + ALL_INDICES = " ".join(str(i) for i in range(self.parameter["N"] - 1, -1, -1)), + ALL_ITEMS = ", ".join("A[{}]".format(i) for i in range(self.parameter["N"] - 1, -1, -1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if len(set(processed_result)) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(0 <= i < self.parameter["N"] for i in processed_result) : + return self.rewards["invalid_solution"] + + answer = int("".join([self.parameter["array"][i] for i in processed_result])) + assert answer <= self.parameter["gold"], "answer should be less than or equal to gold" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / self.parameter["gold"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == self.parameter["gold"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_rmq_expectation/__init__.py b/server/Gym/environments/max_rmq_expectation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bebe626c04714083b1d1447bad18f4f4ce9617df --- /dev/null +++ b/server/Gym/environments/max_rmq_expectation/__init__.py @@ -0,0 +1 @@ +from .environment import MaxRMQExpectation_Environment diff --git a/server/Gym/environments/max_rmq_expectation/environment.py b/server/Gym/environments/max_rmq_expectation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..fe68bfaa31f1caa206de025ac61e876768f94448 --- /dev/null +++ b/server/Gym/environments/max_rmq_expectation/environment.py @@ -0,0 +1,140 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MaxRMQExpectation_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3600 + prompt_template = \ +r"""Let's randomly generate an array A[1], ..., A[{N}], where each A[i] is independently and uniformly chosen from the integers 1 to {X} (so there are {X}^{N} possible arrays in total). You are also given {Q} intervals [L[i], R[i]] (1 ≤ i ≤ {Q}): +{intervals} + +For each interval [L[i], R[i]], define M[i] = min(A[j]) for L[i] ≤ j ≤ R[i]. Please compute the **expected value** of max(M[1], ..., M[{Q}]) and output the result **modulo {MOD}**.""" + MODs = (666623333, 998244353, 10 ** 9 + 7) + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MaxRMQExpectation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + X = self.parameter["X"] = random.randint(2, N) + Q = self.parameter["Q"] = random.randint(1, N) + + self.parameter["intervals"] = intervals = [] + for _ in range(Q) : + L, R = random.randint(1, N), random.randint(1, N) + if L > R : + L, R = R, L + intervals.append((L, R)) + + + MOD = self.parameter["MOD"] = random.choice(self.MODs) + + def modinv(a): + # modular inverse via Fermat's little theorem + return pow(a, MOD - 2, MOD) + + def compute(): + # ar[i] will store the maximum l among all queries whose r+1 == i + ar = [0] * (N + 2) + for l, r in intervals: + ar[r + 1] = max(ar[r + 1], l) + # take prefix max so that ar[j] = max_{i ≤ j}( ar[i] ) + for i in range(1, N + 2): + if ar[i] < ar[i - 1]: + ar[i] = ar[i - 1] + + # ix = 1/X mod + ix = modinv(X) + ans = 0 + + # loop over possible threshold i = 1..X + for i1 in range(1, X + 1): + # p = (i1 - 1) / X (mod) + p = (i1 - 1) * ix % MOD + one_minus_p = (1 - p) % MOD + # ip = (1 - p)^{-1} mod + ip = modinv(one_minus_p) + + # precompute ff0[j] = (1-p)^j, ff1[j] = ip^j + ff0 = [1] * (N + 1) + ff1 = [1] * (N + 1) + for j in range(1, N + 1): + ff0[j] = ff0[j - 1] * one_minus_p % MOD + ff1[j] = ff1[j - 1] * ip % MOD + + # f0[j], f1[j] DP arrays + f0 = [0] * (N + 1) + f1 = [0] * (N + 1) + f1[0] = 1 + for j in range(1, N + 1): + if ar[j] > 0: + prev = (f1[j - 1] - f1[ar[j] - 1]) % MOD + else: + prev = f1[j - 1] + # f0[j] = p * prev * (1-p)^(j-1) + f0[j] = p * prev % MOD * ff0[j - 1] % MOD + # f1[j] = f1[j-1] + f0[j]*(ip^j) + f1[j] = (f1[j - 1] + f0[j] * ff1[j]) % MOD + + # sum up contributions from j = ar[N+1]..N + Lmax = ar[N + 1] + s = 0 + for j in range(Lmax, N + 1): + s = (s + f0[j] * ff0[N - j]) % MOD + + # accumulate into answer: ans += 1 - s + ans = (ans + 1 - s) % MOD + + return ans + self.parameter["reference_answer"] = compute() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + X = self.parameter["X"], + Q = self.parameter["Q"], + intervals = "\n".join("[{}, {}]".format(L, R) for L, R in self.parameter["intervals"]), + MOD = self.parameter["MOD"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_segment_coverage_constraint/__init__.py b/server/Gym/environments/max_segment_coverage_constraint/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4f649cfeb9c654a1a2df0fddc493614266b67330 --- /dev/null +++ b/server/Gym/environments/max_segment_coverage_constraint/__init__.py @@ -0,0 +1 @@ +from .environment import MaxSegmentCoverageConstraint_Environment diff --git a/server/Gym/environments/max_segment_coverage_constraint/environment.py b/server/Gym/environments/max_segment_coverage_constraint/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f0233101b042dbbb3fc1c8eafacd664994d3ed91 --- /dev/null +++ b/server/Gym/environments/max_segment_coverage_constraint/environment.py @@ -0,0 +1,171 @@ +import heapq +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MaxSegmentCoverageConstraint_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3602 + prompt_template = \ +r"""You are given {N} segments (each is a closed interval [l, r]) on the x-axis: +{segments} + +You are also given a list of constraints, where each constraint is a pair (p, x), meaning that the number of selected segments covering point p must be **at most** x: +{constraints} + +Your task is to select the **maximum number of segments** (each can be selected at most once) such that all the constraints are satisfied. Output the indices of the selected segments in one line, separated by spaces.""" + + def __init__(self, + coordinate_multiple : int = 2, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MaxSegmentCoverageConstraint_Environment instance. + """ + super().__init__(**kwargs) + + self.coordinate_multiple = coordinate_multiple + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + coverages = [0] * (N * self.coordinate_multiple + 1) + + segments = self.parameter["segments"] = [] + for i in range(N) : + l = random.randint(0, N * self.coordinate_multiple) + r = random.randint(l, N * self.coordinate_multiple) + segments.append((l, r)) + coverages[l] += 1 + if r + 1 < len(coverages) : + coverages[r + 1] -= 1 + for p in range(1, len(coverages)) : + coverages[p] += coverages[p - 1] + assert coverages[p] >= 0, "Coverage should be non-negative" + + constraints = [p for p, coverage in enumerate(coverages) if coverage > 0] + constraints = random.sample(constraints, random.randint(1, len(constraints))) + constraints = self.parameter["constraints"] = [(p, random.randint(1, coverages[p])) for p in constraints] + random.shuffle(constraints) + + + # (3) make lists of exactly the needed length + segments = segments.copy() + points = constraints.copy() + + # sort segments by left endpoint, but keep an ID for each + segs = sorted([(l, r, idx) for idx, (l, r) in enumerate(segments)], + key=lambda x: x[0]) + # sort points by position + pts = sorted(points, key=lambda x: x[0]) + + # two heaps: min‐heap over (r, id), max‐heap over (-r, id) + min_heap = [] + max_heap = [] + removed_ids = set() # IDs of segments we've popped (expired or forcibly removed) + size = 0 # current # of active segments covering p + ans = N # start assuming we keep all N + i = 0 # pointer into segs + + def clean_min(): + # drop any heap‐entries whose segment‐id is in removed_ids + while min_heap and min_heap[0][1] in removed_ids: + heapq.heappop(min_heap) + + def clean_max(): + while max_heap and max_heap[0][1] in removed_ids: + heapq.heappop(max_heap) + + for p, x in pts: + # 1) add every segment whose left ≤ p + while i < N and segs[i][0] <= p: + _, r, sid = segs[i] + heapq.heappush(min_heap, (r, sid)) + heapq.heappush(max_heap, (-r, sid)) + size += 1 + i += 1 + + # 2) expire any that end before p + clean_min() + while min_heap and min_heap[0][0] < p: + r_exp, sid_exp = heapq.heappop(min_heap) + size -= 1 + removed_ids.add(sid_exp) + clean_min() + + # 3) if we exceed x overlap, remove the segments with the largest r + clean_max() + while size > x: + neg_r, sid_rem = heapq.heappop(max_heap) + size -= 1 + ans -= 1 + removed_ids.add(sid_rem) + clean_max() + + assert ans > 0, "The answer should be greater than 0" + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + segments = "\n".join("Segment {}: [{}, {}]".format(i, l, r) for i, (l, r) in enumerate(self.parameter["segments"])), + constraints = "\n".join("({}, {})".format(p, x) for p, x in self.parameter["constraints"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != len(set(processed_result)) : + return self.rewards["invalid_solution"] + if not all(0 <= i < self.parameter["N"] for i in processed_result) : + return self.rewards["invalid_solution"] + + coverages = [0] * (max(r for l, r in self.parameter["segments"]) + 1) + for i in processed_result : + l, r = self.parameter["segments"][i] + coverages[l] += 1 + if r + 1 < len(coverages) : + coverages[r + 1] -= 1 + for p in range(1, len(coverages)) : + coverages[p] += coverages[p - 1] + + for p, x in self.parameter["constraints"] : + assert coverages[p] >= 0, "Coverage should be non-negative" + if coverages[p] > x : + return self.rewards["invalid_solution"] + + answer, gold = len(processed_result), self.parameter["gold_answer"] + assert answer <= gold, "The answer should be less than or equal to the gold answer" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_sum_lds/__init__.py b/server/Gym/environments/max_sum_lds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b3ce9d5fb8354235c4ee5595493e8e09bd890b82 --- /dev/null +++ b/server/Gym/environments/max_sum_lds/__init__.py @@ -0,0 +1 @@ +from .environment import MaxSumLDS_Environment diff --git a/server/Gym/environments/max_sum_lds/environment.py b/server/Gym/environments/max_sum_lds/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..587d76619e137171396a29c981e810e6021561e2 --- /dev/null +++ b/server/Gym/environments/max_sum_lds/environment.py @@ -0,0 +1,166 @@ +import random +from bisect import bisect_left +from typing import Optional, List, Tuple +from ...environment import VerifiableEnvironment + + +class MaxSumLDS_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3971 + prompt_template = \ +r"""Given a permutation of numbers from 1 to {N}, denoted as P[1], P[2], ..., P[{N}], define: +- A[0] = 0. For 1 ≤ i ≤ {N}, A[i] = max(A[j]) + 1 such that: (i) 0 ≤ j ≤ i - 1, and (ii) j = 0 **or** P[j] < P[i]. +- B[{N} + 1] = 0. For {N} ≥ i ≥ 1, B[i] = max(B[j]) + 1 such that: (i) i + 1 ≤ j ≤ {N} + 1, and (ii) j = {N} + 1 **or** P[j] < P[i]. + +You are given the array A: {A} +Find a permutation P such that this A is obtained, and **maximize** the value of: B[1] + B[2] + ... + B[{N}]. Output P[1], P[2], ..., P[{N}] in one line, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MaxSumLDS_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def get_A_B(self, P : List[int]) -> Tuple[List[int], List[int]] : + assert len(P) == self.parameter["N"] + 1 + assert P[0] is None, "P[0] should be None" + + A = [0] * (self.parameter["N"] + 2) + for i in range(1, self.parameter["N"] + 1) : + A[i] = max(A[j] for j in range(i) if j == 0 or P[j] < P[i]) + 1 + A[self.parameter["N"] + 1] = None + + B = [0] * (self.parameter["N"] + 2) + for i in range(self.parameter["N"], 1 - 1, -1) : + B[i] = max(B[j] for j in range(i + 1, self.parameter["N"] + 1 + 1) if j == self.parameter["N"] + 1 or P[j] < P[i]) + 1 + B[0] = None + + return A, B + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + P = list(range(1, N + 1)) + random.shuffle(P) + P = [None] + P + + A, B = self.get_A_B(P) + self.parameter["A"] = A[: -1] + + B = B[1 : -1] + assert len(B) == N, "B should have length N" + sumB = sum(B) + + + A = A[1 : -1] + assert len(A) == N, "A should have length N" + # Build the adjacency list (nodes 0..N, with 0 as a dummy root) + adj = [[] for _ in range(N + 1)] + last_pos = [0] * (N + 1) # last_pos[k] = last index i with LIS length k seen so far + + for i, x in enumerate(A, start=1): + parent = last_pos[x - 1] + adj[parent].append(i) + adj[i].append(parent) + last_pos[x] = i + + # Match C++ head-insert neighbor order by reversing adjacency lists + for nbrs in adj: + nbrs.reverse() + + # Iterative DFS to get preorder numbers dfn[0..N] + dfn = [0] * (N + 1) + cnt = 0 + stack = [(0, -1, 0)] # (node, parent, next-neighbor-index) + while stack: + u, p, idx = stack.pop() + if idx == 0: + cnt += 1 + dfn[u] = cnt + if idx < len(adj[u]): + v = adj[u][idx] + stack.append((u, p, idx + 1)) + if v != p: + stack.append((v, u, 0)) + + # Shift dfn[1..N] down by 1 (ignore dfn[0]) + for i in range(1, N + 1): + dfn[i] -= 1 + + # Build sequence B: B[i] = dfn[N - i] for i = 0..N-1 (equivalent to b[i]=dfn[n-i+1] in 1-based) + B = [dfn[pos] for pos in range(N, 0, -1)] + + # Compute sum of LIS lengths over B (strictly increasing), using patience sorting with bisect_left + tails = [] + ans = 0 + for v in B: + pos = bisect_left(tails, v) + if pos == len(tails): + tails.append(v) + else: + tails[pos] = v + ans += pos + 1 + + assert 0 < sumB <= ans, "Sum of B should be less than or equal to the answer" + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = ", ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if set(processed_result) != set(range(1, self.parameter["N"] + 1)) : + return self.rewards["invalid_solution"] + + P = [None] + processed_result + A, B = self.get_A_B(P) + A = A[: -1] + if A != self.parameter["A"] : + return self.rewards["unsuccessful_solution"] + + B = B[1 : -1] + answer, gold = sum(B), self.parameter["gold_answer"] + assert answer <= gold, "answer should be less than or equal to gold" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_three_square_sum/__init__.py b/server/Gym/environments/max_three_square_sum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4c12c8b8fa58fc7170a43f33cdb1dc0dcc7241eb --- /dev/null +++ b/server/Gym/environments/max_three_square_sum/__init__.py @@ -0,0 +1 @@ +from .environment import MaxThreeSquareSum_Environment diff --git a/server/Gym/environments/max_three_square_sum/environment.py b/server/Gym/environments/max_three_square_sum/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..3e4f9f8b877807203f64693a0524b8ca98aa55b6 --- /dev/null +++ b/server/Gym/environments/max_three_square_sum/environment.py @@ -0,0 +1,176 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MaxThreeSquareSum_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3625 + prompt_template = \ +r"""You are given a grid of size {N} × {M}, where each cell contains an integer. Please find three **non-overlapping** {K} × {K} squares in the grid such that the sum of all values in the three squares is maximized. The grid is provided as follows: +{grid} + +**Output Format:** Output a single integer — the maximum possible sum of values from the three non-overlapping {K} × {K} squares.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MaxThreeSquareSum_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 4, "MAX_N_M should be greater than or equal to 4" + + N = self.parameter["N"] = random.randint(4, MAX_N_M) + M = self.parameter["M"] = random.randint(4, MAX_N_M) + K = self.parameter["K"] = random.randint(2, min(N, M) // 2) + A = self.parameter["A"] = [[random.randint(0, MAX_N_M) for _ in range(M)] for _ in range(N)] + + + S = [[0]*(M+1) for _ in range(N+1)] + for i in range(N): + for j in range(M): + S[i+1][j+1] = A[i][j] + S[i][j+1] + S[i+1][j] - S[i][j] + + # cal(i,j) = sum of K×K ending at (i,j) + def cal(i, j): + if i < K-1 or j < K-1: + return 0 + return (S[i+1][j+1] + - S[i+1-K][j+1] + - S[i+1][j+1-K] + + S[i+1-K][j+1-K]) + + # mxx[i] = best K×K whose bottom row is i + # mxy[j] = best K×K whose right col is j + mxx = [0]*N + mxy = [0]*M + for i in range(K-1, N): + for j in range(K-1, M): + v = cal(i, j) + if v > mxx[i]: mxx[i] = v + if v > mxy[j]: mxy[j] = v + + # a[l][r] = max(mxx[t] for t in [l..r]) + a = [[0]*N for _ in range(N)] + for l in range(N): + a[l][l] = mxx[l] + for r in range(l+1, N): + a[l][r] = max(a[l][r-1], mxx[r]) + + # b[l][r] = max(mxy[t] for t in [l..r]) + b = [[0]*M for _ in range(M)] + for l in range(M): + b[l][l] = mxy[l] + for r in range(l+1, M): + b[l][r] = max(b[l][r-1], mxy[r]) + + # build the four quadrant-DP arrays + lu = [[0]*M for _ in range(N)] + for i in range(N): + for j in range(M): + best = cal(i, j) + if i>0: best = max(best, lu[i-1][j]) + if j>0: best = max(best, lu[i][j-1]) + lu[i][j] = best + + ru = [[0]*M for _ in range(N)] + for i in range(N): + for j in range(M-1, -1, -1): + best = cal(i, j+K-1) if j+K-1 < M else 0 + if i>0: best = max(best, ru[i-1][j]) + if j+10: best = max(best, ld[i][j-1]) + ld[i][j] = best + + rd = [[0]*M for _ in range(N)] + for i in range(N-1, -1, -1): + for j in range(M-1, -1, -1): + best = cal(i+K-1, j+K-1) if i+K-1 < N and j+K-1 < M else 0 + if i+1 ans: + ans = total + + # 2) three vertical strips + for i in range(M): + for j in range(i+K, M-K): + total = b[0][i] + b[i+K][j] + b[j+K][M-1] + if total > ans: + ans = total + + # 3) L-shaped splits + for i in range(N): + for j in range(M): + # top split then horizontal split + if i+K < N and j+1 < M: + ans = max(ans, lu[i][j] + ru[i][j+1] + a[i+K][N-1]) + # bottom split then horizontal split + if i >= K and j+1 < M: + ans = max(ans, ld[i][j] + rd[i][j+1] + a[0][i-1]) + # left split then vertical split + if j+K < M and i+1 < N: + ans = max(ans, lu[i][j] + ld[i+1][j] + b[j+K][M-1]) + # right split then vertical split + if j >= K and i+1 < N: + ans = max(ans, ru[i][j] + rd[i+1][j] + b[0][j-1]) + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + K = self.parameter["K"], + grid = "\n".join(" ".join(map(str, row)) for row in self.parameter["A"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_tree_constrained_permutation_weight/__init__.py b/server/Gym/environments/max_tree_constrained_permutation_weight/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..19709cbcd4774565899157273b26552393bd273b --- /dev/null +++ b/server/Gym/environments/max_tree_constrained_permutation_weight/__init__.py @@ -0,0 +1 @@ +from .environment import Max_TreeConstrainedPermutation_Weight_Environment diff --git a/server/Gym/environments/max_tree_constrained_permutation_weight/environment.py b/server/Gym/environments/max_tree_constrained_permutation_weight/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..fc9fedf1536c6c6626aa72791ac4bd4e6cbcd1b3 --- /dev/null +++ b/server/Gym/environments/max_tree_constrained_permutation_weight/environment.py @@ -0,0 +1,179 @@ +import heapq +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Max_TreeConstrainedPermutation_Weight_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4437 + prompt_template = \ +r"""You are given an array W of length {N}: {W} + +Please find a permutation P of 1 to {N} such that the following conditions are satisfied: +{conditions} + +Try your best to **maximize** the sum of W[P[i]] × i for all i from 1 to {N}. + +**Output Format:** Output one line containing the permutation P[1], ..., P[{N}], separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Max_TreeConstrainedPermutation_Weight_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + self.parameter["W"] = [random.randint(1, N) for _ in range(N)] + self.parameter["A"] = [random.randint(0, i) for i in range(N)] + + + class Da: + __slots__ = ('u', 'sz', 'w') + def __init__(self, u, sz, w): + self.u = u + self.sz = sz + self.w = w + def __lt__(self, other): + # Compare by average weight: want to pop the smallest average first + return self.w * other.sz < other.w * self.sz + + def compute(): + parent = [0] + self.parameter["A"] + weights_input = [0] + self.parameter["W"] + + # Build children lists for the reversed graph + children = [[] for _ in range(N + 1)] + for i in range(1, N + 1): + children[parent[i]].append(i) + + # DFS from 0 to detect reachable nodes and cycles + visited = [False] * (N + 1) + stack = [0] + visited[0] = True + cnt = 1 + while stack: + u = stack.pop() + for v in children[u]: + if visited[v]: + print(-1) + return + visited[v] = True + cnt += 1 + stack.append(v) + # If not all nodes reachable (including 0), there's a cycle + if cnt <= N: + print(-1) + return + + # Initialize DSU, sizes, and weights + dsu = list(range(N + 1)) + size = [1] * (N + 1) + weight = [0] * (N + 1) + for i in range(1, N + 1): + weight[i] = weights_input[i] + + def find(u): + # Path-compression find + while dsu[u] != u: + dsu[u] = dsu[dsu[u]] + u = dsu[u] + return u + + # Build priority queue of initial nodes + heap = [] + for i in range(1, N + 1): + heapq.heappush(heap, Da(i, 1, weight[i])) + + ans = 0 + # Merge components in order of increasing average weight + while heap: + s = heapq.heappop(heap) + u = find(s.u) + # Skip stale entries + if size[u] != s.sz: + continue + # Merge u into its parent component + p = find(parent[u]) + ans += weight[u] * size[p] + weight[p] += weight[u] + size[p] += size[u] + dsu[u] = p + # Push updated parent component if it's not the root 0 + if p != 0: + heapq.heappush(heap, Da(p, size[p], weight[p])) + + # Output the result + assert ans > 0 + return ans + + self.parameter["gold_answer"] = compute() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + W = " ".join("W[{}]={}".format(i + 1, Wi) for i, Wi in enumerate(self.parameter["W"])), + conditions = "\n".join( + "- The element {} has no constraint.".format(i + 1) + if Ai == 0 + else "- The element {} must come before element {}.".format(Ai, i + 1) + for i, Ai in enumerate(self.parameter["A"]) + ), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + P = processed_result + N = self.parameter["N"] + if len(P) != N : + return self.rewards["invalid_solution"] + if set(P) != set(range(1, N + 1)) : + return self.rewards["invalid_solution"] + + positions = [None] * (N + 1) + for i, Pi in enumerate(P) : + positions[Pi] = i + for i, Ai in enumerate(self.parameter["A"]) : + if Ai != 0 and positions[Ai] >= positions[i + 1] : + return self.rewards["invalid_solution"] + + answer, gold = sum(self.parameter["W"][Pi - 1] * (i + 1) for i, Pi in enumerate(P)), self.parameter["gold_answer"] + assert answer <= gold, "answer should be less than or equal to gold" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_tree_k_path_coverage/__init__.py b/server/Gym/environments/max_tree_k_path_coverage/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c934b51fb8c02b6e1563f3ea1d70abe995b52975 --- /dev/null +++ b/server/Gym/environments/max_tree_k_path_coverage/__init__.py @@ -0,0 +1 @@ +from .environment import MaxTree_KPathCoverahe_Environment diff --git a/server/Gym/environments/max_tree_k_path_coverage/environment.py b/server/Gym/environments/max_tree_k_path_coverage/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..008bbb6f23f404b20635507bf12b0c86cf3a169e --- /dev/null +++ b/server/Gym/environments/max_tree_k_path_coverage/environment.py @@ -0,0 +1,135 @@ +import random +import networkx +from typing import Optional +from collections import deque +from ...environment import VerifiableEnvironment + + +class MaxTree_KPathCoverahe_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4551 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices labeled from `0` to `{N_minus_1}`. The tree contains the following {N_minus_1} undirected edges. Each edge is represented as a tuple `(u, v)`, meaning there is an undirected edge **connecting vertex `u` and vertex `v`**: +{edges} + +You need to choose exactly {K} unordered pairs of distinct vertices `(u, v)`. For each selected pair, define the set of all vertices on the unique path between `u` and `v` (inclusive) as `covered`. Please **maximize the total number of unique vertices that are covered by at least one of the {K} paths**. Output a single integer — the maximum number of vertices that can be covered.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MaxTree_KPathCoverahe_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + edges = self.parameter["edges"] = [] + degrees = [0] * N + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v)) + degrees[u] += 1 + degrees[v] += 1 + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)) == N - 1 + + tree = networkx.Graph() + tree.add_edges_from(edges) + assert networkx.is_tree(tree) + + K = self.parameter["K"] = random.randint(1, max(1, sum(degree == 1 for degree in degrees) // 2 - 1)) + + + M = K * 2 + + # Build adjacency list (0-indexed) + adjacency = [[] for _ in range(N)] + for A, B in edges: + adjacency[A].append(B) + adjacency[B].append(A) + + # d[i] = number of remaining neighbors of i before it becomes a "leaf" in peeling + d = [len(adjacency[i]) - 1 for i in range(N)] + + # dep[i] = round at which node i is peeled (distance from nearest original leaf, plus one) + dep = [0] * N + q = deque() + + # Initialize queue with all initial leaves (d[i] == 0) + for i in range(N): + if d[i] == 0: + q.append(i) + dep[i] = 1 + + # cnt[k] = number of nodes peeled at round k + cnt = [0] * (N + 1) + maxd = 0 + + # Perform the "topological peeling" of the tree + while q: + x = q.popleft() + depth = dep[x] + cnt[depth] += 1 + if depth > maxd: + maxd = depth + for y in adjacency[x]: + d[y] -= 1 + if d[y] == 0: + dep[y] = depth + 1 + q.append(y) + + # Sum, for each layer, the minimum of its size or M = 2 * L + ans = 0 + for k in range(1, maxd + 1): + ans += min(cnt[k], M) + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + K = self.parameter["K"], + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_tree_xor_path/__init__.py b/server/Gym/environments/max_tree_xor_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dc14d33457c3f9a67f3193f7adba56634acd1017 --- /dev/null +++ b/server/Gym/environments/max_tree_xor_path/__init__.py @@ -0,0 +1 @@ +from .environment import MaxTreeXorPath_Environment diff --git a/server/Gym/environments/max_tree_xor_path/environment.py b/server/Gym/environments/max_tree_xor_path/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..ed1d4891399fdcb7a41accaa37cdf0c0672de283 --- /dev/null +++ b/server/Gym/environments/max_tree_xor_path/environment.py @@ -0,0 +1,126 @@ +import random +import networkx +from typing import Optional, Tuple +from ...environment import VerifiableEnvironment + + +class MaxTreeXorPath_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4551 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices, labeled from `0` to `{N_minus_1}`. The tree contains the following {N} - 1 = {N_minus_1} undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning there is an undirected edge **connecting vertex `u` to vertex `v` with weight `w`**: +{edges} + +Please find a pair of vertices (`u`, `v`) to **maximize the bitwise XOR of all edge weights on the unique path** connecting them. + +**Output Format:** Your final answer should be two integers `u` and `v` (the indices of the selected vertices), separated by a space. Example: `0 1` (do **NOT** include the backticks or quotes).""" + + def __init__(self, + lower_max_weight : int = 2 ** 4, + wrong_format : float = -1.0, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MaxTreeXorPath_Environment instance. + """ + super().__init__(**kwargs) + + self.lower_max_weight = lower_max_weight + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + max_weight = self.lower_max_weight + while max_weight <= N * 2 : + max_weight *= 2 + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v, random.randint(1, max_weight - 2))) + random.shuffle(edges) + + for u, v, w in edges : + assert 0 <= u < v < N + assert len(edges) == len(set((u, v) for u, v, w in edges)) == N - 1 + + tree = networkx.Graph() + tree.add_weighted_edges_from(edges) + assert networkx.is_tree(tree) + + + adj = [[] for _ in range(N)] + for u, v, w in edges : + adj[u].append((v, w)) + adj[v].append((u, w)) + + Xor = self.parameter["Xor"] = [0] * N + def dfs(u, parent): + for v, w in adj[u]: + if v == parent: + continue + Xor[v] = Xor[u] ^ w + dfs(v, u) + dfs(0, -1) + + Ans_u, Ans_v = 0, 1 + for u in range(N) : + for v in range(u + 1, N) : + if (Xor[u] ^ Xor[v]) > (Xor[Ans_u] ^ Xor[Ans_v]) : + Ans_u, Ans_v = u, v + + self.parameter["reference_answer"] = "{} {}".format(Ans_u, Ans_v) + self.parameter["gold_answer"] = (Xor[Ans_u] ^ Xor[Ans_v]) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int]] : + if answer is not None : + answer = answer.strip() + try : + u, v = map(int, answer.split()) + return u, v + except : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + u, v = processed_result + if not (0 <= u < self.parameter["N"] and 0 <= v < self.parameter["N"]) : + return self.rewards["wrong_format"] + + answer, gold = (self.parameter["Xor"][u] ^ self.parameter["Xor"][v]), self.parameter["gold_answer"] + assert answer <= gold, "answer <= gold" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_weight_palindromic_substring/__init__.py b/server/Gym/environments/max_weight_palindromic_substring/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6c0fab38c7fcde375991b980e0cb20101f990057 --- /dev/null +++ b/server/Gym/environments/max_weight_palindromic_substring/__init__.py @@ -0,0 +1 @@ +from .environment import MaxWeightPalindromicSubstring_Environment diff --git a/server/Gym/environments/max_weight_palindromic_substring/environment.py b/server/Gym/environments/max_weight_palindromic_substring/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..6e87c4df265e3b21e8c15165faaf2c9c74157b78 --- /dev/null +++ b/server/Gym/environments/max_weight_palindromic_substring/environment.py @@ -0,0 +1,160 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MaxWeightPalindromicSubstring_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3649 + prompt_template = \ +r"""You are given a string S: {S} +Please find a palindromic string T such that the product of T's length and the number of times T occurs in S is maximized. +**Output Format:** Output a single line containing the string T.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MaxWeightPalindromicSubstring_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + a_probability = random.uniform(0.3, 0.7) + S = self.parameter["S"] = "".join("a" if random.random() < a_probability else "b" for _ in range(N)) + + + def max_palindrome_existence_value(S: str) -> int: + """ + Build a palindromic tree (Eertree) for S and compute the maximum + existence value among all palindromic substrings: length * frequency. + """ + N = len(S) + # We will have at most N+2 distinct palindromes plus two roots + size = 1 # the last-used node index + # length of palindrome at each node + length = [0] * (N + 3) + # failure link (longest proper palindromic suffix) for each node + fail = [0] * (N + 3) + # count of how many times this palindrome occurs as a suffix during construction + count = [0] * (N + 3) + # transitions: for each node, map character -> next node + trans = [dict() for _ in range(N + 3)] + + # two roots: + # node 1: imaginary palindrome of length -1 + # node 0: empty palindrome of length 0 + length[1] = -1 + fail[0] = 1 + fail[1] = 1 + + last = 0 # the node corresponding to the longest palindromic suffix of S[:i] + + for i, c in enumerate(S): + cur = last + # find the longest suffix-palindrome of S[:i] that we can extend by c + while True: + if i - length[cur] - 1 >= 0 and S[i - length[cur] - 1] == c: + break + cur = fail[cur] + + # if there is no outgoing edge for c, create a new node + if c not in trans[cur]: + size += 1 + length[size] = length[cur] + 2 + + # compute failure link for the new node + f = fail[cur] + while True: + if i - length[f] - 1 >= 0 and S[i - length[f] - 1] == c: + break + f = fail[f] + # may be 0 if it's the first single-character palindrome + fail[size] = trans[f].get(c, 0) + + # link cur --c--> size + trans[cur][c] = size + + # move to that node and count one occurrence + last = trans[cur][c] + count[last] += 1 + + # propagate the counts from longer palindromes to their suffix-palindromes + ans = 0 + for u in range(size, 1, -1): + # existence value = length[u] * total occurrences of this palindrome + ans = max(ans, length[u] * count[u]) + count[fail[u]] += count[u] + + assert ans > 0 + return ans + self.parameter["gold_answer"] = max_palindrome_existence_value(S) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(S = self.parameter["S"]) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + answer = answer.strip() + return answer + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result != processed_result[::-1] : + return self.rewards["invalid_solution"] + + def count_overlapping_occurrences_kmp(text, pattern): + if not pattern or not text: + return 0 + def build_failure_function(pattern): + m = len(pattern) + failure = [0] * m + j = 0 + + for i in range(1, m): + while j > 0 and pattern[i] != pattern[j]: + j = failure[j - 1] + if pattern[i] == pattern[j]: + j += 1 + failure[i] = j + return failure + failure = build_failure_function(pattern) + count = 0 + j = 0 + for i in range(len(text)): + while j > 0 and text[i] != pattern[j]: + j = failure[j - 1] + if text[i] == pattern[j]: + j += 1 + if j == len(pattern): + count += 1 + j = failure[j - 1] + return count + + answer, gold = len(processed_result) * count_overlapping_occurrences_kmp(self.parameter["S"], processed_result), self.parameter["gold_answer"] + assert answer <= gold + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_xor_path/__init__.py b/server/Gym/environments/max_xor_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c21a3d2373fc11d6781cff5d5dde085258490096 --- /dev/null +++ b/server/Gym/environments/max_xor_path/__init__.py @@ -0,0 +1 @@ +from .environment import MaxXorPath_Environment diff --git a/server/Gym/environments/max_xor_path/environment.py b/server/Gym/environments/max_xor_path/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e3122f104a13b8864cbbcca88ac571551c09048f --- /dev/null +++ b/server/Gym/environments/max_xor_path/environment.py @@ -0,0 +1,155 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MaxXorPath_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4151 + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. + +The graph contains the following undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning an undirected edge **connecting vertex `u` to vertex `v` with weight `w`**: +{edges} + +Find a path from vertex `0` to vertex `{N_minus_1}` such that the XOR of the weights of the edges in the path is maximized. Output the maximum XOR value.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MaxXorPath_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + assert "MAX_bit_length" in self.parameter, "MAX_bit_length is required in parameter" + MAX_bit_length = self.parameter["MAX_bit_length"] + assert MAX_bit_length >= 2, "MAX_bit_length should be greater than or equal to 2" + + while True : + adjacent = [[] for _ in range(N)] + for u, v in random.sample([(u, v) for u in range(N) for v in range(u + 1, N)], int(edge_density * N * (N - 1) / 2)) : + adjacent[u].append(v) + adjacent[v].append(u) + + base_size_upper = random.randint(0, MAX_bit_length - 1) + + edges = self.parameter["edges"] = [] + + P = [0] * MAX_bit_length + base_size = 0 + + def insert_into_basis(x: int) -> None: + """ + Insert x into the XOR basis P. + """ + nonlocal P, base_size + cur = x + for i in range(MAX_bit_length - 1, -1, -1): + if not ((cur >> i) & 1): + continue + if P[i] == 0: + P[i] = cur + base_size += 1 + return + cur ^= P[i] + + def maximize_with_basis(x: int) -> int: + """ + Given a number x, maximize x XOR (any combination of basis vectors). + """ + res = x + for i in range(MAX_bit_length - 1, -1, -1): + if P[i] != 0 and (res ^ P[i]) > res: + res ^= P[i] + return res + + # Arrays to track visited nodes and the XOR-distance from node 0 + visited = [False] * N + xor_to = [0] * N + + edge2weight = {} + + def DFS(u : int) -> None : + visited[u] = True + for nbr in adjacent[u]: + if not visited[nbr]: + w = random.randint(0, 2 ** MAX_bit_length - 1) + if (min(u, nbr), max(u, nbr)) not in edge2weight : + edges.append((min(u, nbr), max(u, nbr), w)) + edge2weight[(min(u, nbr), max(u, nbr))] = w + xor_to[nbr] = xor_to[u] ^ w + DFS(nbr) + else: + if (min(u, nbr), max(u, nbr)) not in edge2weight : + if base_size < base_size_upper : + w = random.randint(0, 2 ** MAX_bit_length - 1) + else : + w = xor_to[u] ^ xor_to[nbr] + for i in range(MAX_bit_length - 1, -1, -1) : + if random.random() < 0.5 : + w ^= P[i] + edges.append((min(u, nbr), max(u, nbr), w)) + edge2weight[(min(u, nbr), max(u, nbr))] = w + else : + w = edge2weight[(min(u, nbr), max(u, nbr))] + cycle_xor = xor_to[u] ^ w ^ xor_to[nbr] + insert_into_basis(cycle_xor) + DFS(0) + if not visited[N - 1] : + continue + + self.parameter["reference_answer"] = maximize_with_basis(xor_to[N - 1]) + if self.parameter["reference_answer"] < 2 ** MAX_bit_length - 1 : + random.shuffle(edges) + break + + assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique" + for u, v, w in edges : + assert 0 <= u < v < N + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/max_xor_set/__init__.py b/server/Gym/environments/max_xor_set/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ed4e554e3fbece727ebaa48c60a2d32420c50087 --- /dev/null +++ b/server/Gym/environments/max_xor_set/__init__.py @@ -0,0 +1 @@ +from .environment import MaxXorSet_Environment diff --git a/server/Gym/environments/max_xor_set/environment.py b/server/Gym/environments/max_xor_set/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..6e44fecac1021e4661001d746e0b66287ec025d7 --- /dev/null +++ b/server/Gym/environments/max_xor_set/environment.py @@ -0,0 +1,114 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MaxXorSet_Environment(VerifiableEnvironment): # Source: https://www.luogu.com.cn/problem/P3812 + prompt_template = \ +r"""You are given an array A of {N} positive integers: +{array} + +Please select indices i_1, ..., i_k (k is arbitrary) to maximize A[i_1] XOR ... XOR A[i_k] (i.e., the bitwise XOR of the selected elements). + +Output Format: Your final answer should be a **single line** containing i_1, ..., i_k (the indices of the selected elements), separated by **spaces**.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MaxXorSet_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "MAX_bit_length" in self.parameter, "MAX_bit_length is required in parameter" + MAX_bit_length = self.parameter["MAX_bit_length"] + assert MAX_bit_length >= 2, "MAX_bit_length should be greater than or equal to 2" + + A = self.parameter["A"] = [random.randint(1, 2 ** MAX_bit_length - 2) for _ in range(N)] + + + max_value = max(A) + max_bit_index = max_value.bit_length() - 1 # if max_value is 0, this will be -1 + + # Initialize the basis array P with size = max_bit_index+1 + # If max_bit_index == -1 (all A are zero), P will be an empty list. + P = [0] * (max_bit_index + 1) + + def insert(x): + k = x + # Insert k into the basis + for i in range(max_bit_index, -1, -1): + if not (k >> i) & 1: + continue + if P[i] == 0: + P[i] = k + return + k ^= P[i] + + def max_xor(): + res = 0 + for i in range(max_bit_index, -1, -1): + res = max(res, res ^ P[i]) + return res + + # Build the basis + for x in A: + insert(x) + + self.parameter["gold_answer"] = max_xor() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + array = "\n".join("A[{}]={}".format(i, a) for i, a in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if not all(0 <= i < self.parameter["N"] for i in processed_result) : + return self.rewards["invalid_solution"] + + answer = 0 + for i in processed_result : + answer ^= self.parameter["A"][i] + + assert answer <= self.parameter["gold_answer"], "answer should be less than or equal to gold" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / self.parameter["gold_answer"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == self.parameter["gold_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/maximum_achromatic_number/__init__.py b/server/Gym/environments/maximum_achromatic_number/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7b8d1ddfb7f87270311d3d0235634a3a2d924139 --- /dev/null +++ b/server/Gym/environments/maximum_achromatic_number/__init__.py @@ -0,0 +1 @@ +from .environment import MaximumAchromaticNumber_Environment diff --git a/server/Gym/environments/maximum_achromatic_number/environment.py b/server/Gym/environments/maximum_achromatic_number/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9e97e4e2cc5b10241659232d69b789f82b6435b3 --- /dev/null +++ b/server/Gym/environments/maximum_achromatic_number/environment.py @@ -0,0 +1,141 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MaximumAchromaticNumber_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. +The graph contains the following undirected edges: +{edges} + +Your task is to assign a **non-negative integer color** to each vertex, represented as `c[0], c[1], ..., c[{N_minus_1}]`, such that: +- For every edge `(u, v)` in the graph, `c[u] ≠ c[v]` — adjacent vertices must have different colors. +- For every pair of two distinct used colors `x` and `y`, there exists **at least one edge** `(u, v)` such that `c[u] = x` and `c[v] = y`, i.e., this is a *complete coloring*. +- The total number of **distinct colors used** (i.e., the number of unique values among `c[0]` to `c[{N_minus_1}]`) is **maximized** - try your best to find a valid coloring using as many colors as possible. + +**Output Format:** +Your final answer should be a single line containing the color of each vertex in order: `c[0], c[1], ..., c[{N_minus_1}]`, separated by **spaces**.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MaximumAchromaticNumber_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 1" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = random.sample([(u, v) for u in range(N) for v in range(u + 1, N)], int(edge_density * N * (N - 1) / 2)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + + self.parameter["reference_answer"] = None + self.parameter["gold_answer"] = 0 + + adjacent = [0] * N + smaller_adjacents = [[] for u in range(N)] + for u, v in edges : + adjacent[u] |= 1 << v + adjacent[v] |= 1 << u + smaller_adjacents[max(u, v)].append(min(u, v)) + + colors, color2set = [None] * N, [0] * N + def DFS(u : int, max_color : int) -> int : + if (max_color + 1) + (N - u) <= self.parameter["gold_answer"] : + return + if u == N : + color_adjacent = [[False] * (max_color + 1) for _ in range(max_color + 1)] + satisfied_color_pair_num = 0 + for u, v in edges : + color_u, color_v = min(colors[u], colors[v]), max(colors[u], colors[v]) + assert color_u != color_v, "Adjacent vertices should have different colors" + if not color_adjacent[color_u][color_v] : + color_adjacent[color_u][color_v] = True + satisfied_color_pair_num += 1 + assert satisfied_color_pair_num <= (max_color + 1) * max_color // 2, "The number of satisfied color pairs should not exceed the maximum possible pairs" + if satisfied_color_pair_num == (max_color + 1) * max_color // 2 : + self.parameter["reference_answer"], self.parameter["gold_answer"] = colors.copy(), max_color + 1 + return + for color in range((max_color + 1) + 1) : + if (color2set[color] & adjacent[u]) == 0 : + colors[u] = color + color2set[color] += 1 << u + DFS(u + 1, max(max_color, color)) + color2set[color] -= 1 << u + DFS(0, -1) + + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + colors = processed_result + if len(colors) != self.parameter["N"] : + return self.rewards["invalid_solution"] + adjacent_color_pairs = set() + for u, v in self.parameter["edges"] : + if colors[u] == colors[v] : + return self.rewards["invalid_solution"] + adjacent_color_pairs.add((min(colors[u], colors[v]), max(colors[u], colors[v]))) + + assert len(adjacent_color_pairs) <= len(set(colors)) * (len(set(colors)) - 1) // 2, "The number of adjacent color pairs should not exceed the maximum possible pairs" + if len(adjacent_color_pairs) < len(set(colors)) * (len(set(colors)) - 1) // 2 : + return self.rewards["invalid_solution"] + + gold, answer = self.parameter["gold_answer"], len(set(colors)) + assert answer <= gold, "The number of distinct colors used should not exceed the gold answer" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/maximum_clique/__init__.py b/server/Gym/environments/maximum_clique/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e8cdd906389fbaef6bf65a66e0ceb81d5fb46308 --- /dev/null +++ b/server/Gym/environments/maximum_clique/__init__.py @@ -0,0 +1 @@ +from .environment import MaximumClique_Environment diff --git a/server/Gym/environments/maximum_clique/environment.py b/server/Gym/environments/maximum_clique/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..da8877fdbce7702f66775e6a747fa618b1d17403 --- /dev/null +++ b/server/Gym/environments/maximum_clique/environment.py @@ -0,0 +1,128 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MaximumClique_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. The graph contains the following undirected edges: +{edges} + +Your task is to select a subset of vertices `v1, v2, ..., vk` such that: +- 0 ≤ v1, v2, ..., vk < {N} and all selected vertices are **distinct**. +- The selected vertices form a **clique** — that is, **every pair** of distinct selected vertices is connected by **at least one edge**. +- Your goal is to **maximize** the number of selected vertices k. + +**Output Format:** +Your final answer should be a single line containing the selected vertex indices `v1, v2, ..., vk`, separated by **spaces**. +Example: `0 2 3` (do **NOT** include the backticks or quotes); this means the selected clique has size k = 3, with vertices 0, 2, and 3. +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MaximumClique_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = random.sample([(u, v) for u in range(N) for v in range(u + 1, N)], int(edge_density * N * (N - 1) / 2)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + + adjacent = [0] * N + for u, v in edges : + adjacent[u] |= 1 << v + adjacent[v] |= 1 << u + self.parameter["reference_answer"] = [] + clique = [] + + def DFS(u : int, allowed_set : int) -> None : + if len(clique) + (N - u) <= len(self.parameter["reference_answer"]) : + return + if u == N : + assert len(clique) > len(self.parameter["reference_answer"]) + self.parameter["reference_answer"] = clique.copy() + if allowed_set & (1 << u) : + clique.append(u) + DFS(u + 1, allowed_set & adjacent[u]) + clique.pop() + DFS(u + 1, allowed_set) + DFS(0, (1 << N) - 1) + + self.parameter["gold_answer"] = len(self.parameter["reference_answer"]) + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + clique = processed_result + if len(clique) != len(set(clique)) : + return self.rewards["invalid_solution"] + for vertex in clique : + if not (0 <= vertex < self.parameter["N"]) : + return self.rewards["invalid_solution"] + edges = set(map(tuple, self.parameter["edges"])) + for u in clique : + for v in clique : + if u < v : + if (u, v) not in edges : + return self.rewards["invalid_solution"] + + gold, answer = self.parameter["gold_answer"], len(clique) + assert answer <= gold, "answer should be less than or equal to gold" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/maximum_divisor/__init__.py b/server/Gym/environments/maximum_divisor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c8baf5a3bc4ab86f9840870554e1fcd343a58a4f --- /dev/null +++ b/server/Gym/environments/maximum_divisor/__init__.py @@ -0,0 +1 @@ +from .environment import MaximumDivisor_Environment diff --git a/server/Gym/environments/maximum_divisor/environment.py b/server/Gym/environments/maximum_divisor/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..055d9e982f187f797c03741a7ee741b1feb647f3 --- /dev/null +++ b/server/Gym/environments/maximum_divisor/environment.py @@ -0,0 +1,103 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MaximumDivisor_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2440 + prompt_template = \ +r"""You are given an array A of length {N}. The values are as follows (indexing starts at 0): +{A} + +Please find the **maximum positive integer L** such that the following inequality holds: [A[0] / L] + [A[1] / L] + ... + [A[{N_minus_1}] / L] >= {K}, where [x] denotes the **floor function** (i.e., rounding down to the nearest integer). + +**Output Format:** +Your final answer should be a single line containing the value of L. +""" + + + def __init__(self, + random_range_coefficient : int = 20, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = +5.0, + **kwargs) : + """ + Initialize the MaximumDivisor_Environment instance. + """ + super().__init__(**kwargs) + + self.random_range_coefficient = random_range_coefficient + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N must be at least 2" + + K = self.parameter["K"] = random.randint(1, N * max(1, N // self.random_range_coefficient)) + + A = self.parameter["A"] = [random.randint(1, N) for i in range(N)] + + if sum(A) < K : + A[0] += K - sum(A) + assert sum(A) >= K, "sum(A) must be at least K" + random.shuffle(A) + + + def check(l) : + return sum(li // l for li in A) >= K + + l, r = 1, max(A) + 1 + while l < r : + m = (l + r) // 2 + if check(m) : + l = m + 1 + else : + r = m + self.parameter["reference_answer"] = l - 1 + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + A = " ".join(map(str, self.parameter["A"])), + K = self.parameter["K"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if sum(li // processed_result for li in self.parameter["A"]) >= self.parameter["K"] : + assert processed_result <= self.parameter["reference_answer"] + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((processed_result / self.parameter["reference_answer"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == processed_result) + else : + raise ValueError("Invalid rewarding strategy") + else : + return self.rewards["invalid_solution"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/maximum_independent_set_grid/__init__.py b/server/Gym/environments/maximum_independent_set_grid/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3730d54cebc9656063279232b8203359fdadc2dd --- /dev/null +++ b/server/Gym/environments/maximum_independent_set_grid/__init__.py @@ -0,0 +1 @@ +from .environment import MaximumIndependentSetGrid_Environment diff --git a/server/Gym/environments/maximum_independent_set_grid/environment.py b/server/Gym/environments/maximum_independent_set_grid/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..452c1aa41239e351e3b8b30efb2f7e4feb554876 --- /dev/null +++ b/server/Gym/environments/maximum_independent_set_grid/environment.py @@ -0,0 +1,134 @@ +import random +import networkx as nx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MaximumIndependentSetGrid_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2774 + prompt_template = \ +r"""You are given a matrix of size {N} × {M}. Select some cells such that **no two selected cells are adjacent** (i.e., no two selected cells share a horizontal or vertical edge). Try your best to maximize the sum of the values in the selected cells. The matrix is given below (in **row-major order**): +{matrix} + +**Output Format:** Output {N} lines, each with {M} digits (0 or 1) and no separators. A `1` means the corresponding cell is selected; a `0` means it is not.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the MaximumIndependentSetGrid_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + NUM = self.parameter["matrix"] = [[random.randint(1, max(N, M)) for _ in range(M)] for _ in range(N)] + + + # Total sum of all cell weights + TOTAL = sum(sum(row) for row in NUM) + # Use TOTAL as the "infinite" capacity for inter-cell edges + INF = TOTAL + + # Build a directed graph for the min-cut formulation + G = nx.DiGraph() + SOURCE, SINK = 's', 't' + + # Add edges from SOURCE→odd‐parity cells and even‐parity cells→SINK + # plus infinite‐capacity edges between adjacent cells + for i in range(N): + for j in range(M): + u = (i, j) + weight = NUM[i][j] + + if (i + j) % 2 == 1: + # Odd parity: source → u with capacity = weight + G.add_edge(SOURCE, u, capacity=weight) + # Connect to each of its neighbors with infinite capacity + for di, dj in [(-1, 0), (1, 0), (0, -1), (0, 1)]: + ni, nj = i + di, j + dj + if 0 <= ni < N and 0 <= nj < M: + v = (ni, nj) + G.add_edge(u, v, capacity=INF) + else: + # Even parity: u → sink with capacity = weight + G.add_edge(u, SINK, capacity=weight) + + # Compute the maximum flow (which equals the minimum cut capacity) + flow_value, _ = nx.maximum_flow(G, SOURCE, SINK) + + # By König's theorem on bipartite graphs: + # max_weight_independent_set = TOTAL - min_vertex_cover_weight + # and min_vertex_cover_weight = flow_value + self.parameter["gold_answer"] = TOTAL - flow_value + assert self.parameter["gold_answer"] > 0 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + matrix = "\n".join(" ".join(str(x) for x in row) for row in self.parameter["matrix"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(line.strip()) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N, M = self.parameter["N"], self.parameter["M"] + solution = processed_result + + if len(solution) != N or any(len(row) != M for row in solution) : + return self.rewards["wrong_format"] + if any(c not in '01' for row in solution for c in row) : + return self.rewards["wrong_format"] + + answer, gold = 0, self.parameter["gold_answer"] + for i in range(N) : + for j in range(M) : + if solution[i][j] == '1' : + answer += self.parameter["matrix"][i][j] + for di, dj in ((-1, 0), (+1, 0), (0, -1), (0, +1)) : + ni, nj = i + di, j + dj + if 0 <= ni < N and 0 <= nj < M and solution[ni][nj] == '1' : + return self.rewards["invalid_solution"] + assert answer <= gold, "Answer should not exceed the gold answer" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / self.parameter["gold_answer"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == self.parameter["gold_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/maximum_independent_set_tree/__init__.py b/server/Gym/environments/maximum_independent_set_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3c85c15ee4ddd8f366a89e1a3895a1e23cbb1ca4 --- /dev/null +++ b/server/Gym/environments/maximum_independent_set_tree/__init__.py @@ -0,0 +1 @@ +from .environment import Maximum_IndependentSet_Tree_Environment diff --git a/server/Gym/environments/maximum_independent_set_tree/environment.py b/server/Gym/environments/maximum_independent_set_tree/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..42c97041b54b8558d18245301537f79e02632a77 --- /dev/null +++ b/server/Gym/environments/maximum_independent_set_tree/environment.py @@ -0,0 +1,142 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Maximum_IndependentSet_Tree_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1352 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices, labeled from `0` to `{N_minus_1}`. + +The tree contains the following {N} - 1 = {N_minus_1} undirected edges. Each edge is represented as a tuple `(u, v)`, meaning there is an undirected edge **connecting vertex `u` to vertex `v`**: +{edges} + +Each vertex has a weight, given as a list `R` of length {N}, where `R[i]` is the weight of vertex `i`. The weights are as follows: +{R} + +Your task is to select a set of distinct vertices `x_1, x_2, ..., x_k` (you determine `k`), such that **no two selected vertices are adjacent**. +Your goal is to **maximize the total weight**: R[x_1] + R[x_2] + ... + R[x_k]. + +**Output Format:** +Your final answer should be a single line containing the selected vertices in **any order**, separated by **spaces**. +Example: `0 1 {N_minus_1}` (do **NOT** include the backticks or quotes); this means k = 3, with selected vertices x_1 = 0, x_2 = 1, and x_3 = {N_minus_1}. +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the Maximum_IndependentSet_Tree_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + edges = self.parameter["edges"] = [] + childrens = [[] for u in range(N)] + + permutations = list(range(N)) + random.shuffle(permutations) + root = permutations[0] + for index, child in enumerate(permutations) : + if index == 0 : + continue + parent = random.choice(permutations[: index]) + childrens[parent].append(child) + u, v = min(parent, child), max(parent, child) + edges.append((u, v)) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)) == N - 1 + + tree = networkx.Graph() + tree.add_edges_from(edges) + assert networkx.is_tree(tree) + + self.parameter["R"] = [random.randint(1, N) for vertex in range(N)] + + + dpF = [None] * N + def dp(u) : + dpF[u] = [0, self.parameter["R"][u]] + for child in childrens[u] : + dp(child) + dpF[u][0] += max(dpF[child]) + dpF[u][1] += dpF[child][0] + dp(root) + self.parameter["reference_weight"] = max(dpF[root]) + + picked = [] + def Pick(u, pick) : + if pick : + picked.append(u) + for child in childrens[u] : + if pick : + Pick(child, False) + else : + Pick(child, bool(dpF[child][0] < dpF[child][1])) + Pick(root, dpF[root][0] < dpF[root][1]) + + self.parameter["reference_answer"] = " ".join(map(str, picked)) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + R = "\n".join("R[{}] = {}".format(i, self.parameter["R"][i]) for i in range(N)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + picked = processed_result + if len(set(picked)) != len(picked) : + return self.rewards["invalid_solution"] + if not all((0 <= vertex < self.parameter["N"]) for vertex in picked) : + return self.rewards["invalid_solution"] + picked = set(picked) + for u, v in self.parameter["edges"] : + if u in picked and v in picked : + return self.rewards["invalid_solution"] + + answer = sum(self.parameter["R"][u] for u in picked) + gold = self.parameter["reference_weight"] + assert answer <= gold + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise ValueError("Invalid rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/maximum_lexicographical_order_subsequence/__init__.py b/server/Gym/environments/maximum_lexicographical_order_subsequence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..71003956ed1fbce029ebac921016281840d618b7 --- /dev/null +++ b/server/Gym/environments/maximum_lexicographical_order_subsequence/__init__.py @@ -0,0 +1 @@ +from .environment import MaximumLexicographicalOrderSubsequence_Environment diff --git a/server/Gym/environments/maximum_lexicographical_order_subsequence/environment.py b/server/Gym/environments/maximum_lexicographical_order_subsequence/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..57a8a494253ac8c86848f5b962619df51349d817 --- /dev/null +++ b/server/Gym/environments/maximum_lexicographical_order_subsequence/environment.py @@ -0,0 +1,105 @@ +import random +from collections import deque +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MaximumLexicographicalOrderSubsequence_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3487 + prompt_template = \ +r"""Given an array A of length {N}: {A} + +Please find a (not necessarily contiguous) subsequence of length {K} (i.e., select {K} elements with increasing indices: 0 <= i1 < ... < i{K} < {N}) such that the resulting subsequence A[i1], ..., A[i{K}] is **lexicographically maximal**. Output a single line containing the selected subsequence A[i1], ..., A[i{K}], separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the MaximumLexicographicalOrderSubsequence_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + K = self.parameter["K"] = random.randint(2, N - 1) + A = self.parameter["A"] = [random.randint(1, N) for _ in range(N)] + + + self.parameter["gold_answer"] = [] + q = deque() + # Process each element, maintaining a monotonic queue of at most K candidates + for i in range(N): + # Remove smaller elements from the back + while q and q[-1] < A[i]: + q.pop() + # Append current element if we still have fewer than K candidates + if len(q) < K: + q.append(A[i]) + # Once we've seen the first N-K+1 elements, start outputting + if i >= N - K: + # The front of the deque is the next lexicographically maximal element + self.parameter["gold_answer"].append(q[0]) + # Remove it before moving on + q.popleft() + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["gold_answer"])) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + K = self.parameter["K"], + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["K"] : + return self.rewards["invalid_solution"] + + i = 0 + for a in processed_result : + found = False + while i < self.parameter["N"] : + if self.parameter["A"][i] == a : + found = True + i += 1 + if found : + break + if not found : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / self.parameter["K"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/maximum_point_segment_matching/__init__.py b/server/Gym/environments/maximum_point_segment_matching/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0cff2317287550010366d06d0ccd68c384433d0c --- /dev/null +++ b/server/Gym/environments/maximum_point_segment_matching/__init__.py @@ -0,0 +1 @@ +from .environment import MaximumPointSegmentMatching_Environment diff --git a/server/Gym/environments/maximum_point_segment_matching/environment.py b/server/Gym/environments/maximum_point_segment_matching/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e9f01434c3d2b7d75bf48c810efba5e1ab6bb98a --- /dev/null +++ b/server/Gym/environments/maximum_point_segment_matching/environment.py @@ -0,0 +1,139 @@ +import random +import bisect +from typing import Optional, List, Tuple +from ...environment import VerifiableEnvironment + + +class MaximumPointSegmentMatching_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given {C} points, indexed from 0 to {C_minus_1}: +{points} + +You are also given {N} segments (each represented as a closed interval [l, r], meaning both endpoints are inclusive), indexed from 0 to {N_minus_1}: +{segments} + +A valid matching is a set of pairs (c, n), where: +- `c` is the index of a point (0 <= c < {C}) and `n` is the index of a segment (0 <= n < {N}), +- point `c` lies within segment `n` (i.e., the point is contained in the segment), +- **no point is matched to more than one segment**, and **no segment is matched to more than one point**. + +I want you to find the **maximum matching** between points and segments. +The number of your output lines should equal the size of your matching. Output one line for each matched pair - each line should contain two integers `c` and `n`, separated by a space, indicating a matched pair (point index, segment index).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MaximumPointSegmentMatching_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_C_N" in self.parameter, "MAX_C_N is required in parameter" + MAX_C_N = self.parameter["MAX_C_N"] + assert MAX_C_N >= 1, "MAX_C_N should be greater than or equal to 1" + + C = self.parameter["C"] = random.randint(2, MAX_C_N) + N = self.parameter["N"] = random.randint(2, MAX_C_N) + while True : + points = self.parameter["points"] = [random.randint(0, MAX_C_N) for _ in range(C)] + + segments = self.parameter["segments"] = [] + for _ in range(N) : + length = random.randint(0, MAX_C_N) + l = random.randint(0, MAX_C_N - length) + r = l + length + segments.append((l, r)) + + + # Read the times T_i when each chicken is available + times = points.copy() + + # Read the intervals [A_j, B_j] during which each cow can cross + intervals = segments.copy() + + # Sort chicken times for binary search + times.sort() + + # Sort cows by their end time ascending; if tied, by start time descending + intervals.sort(key=lambda interval: (interval[1], -interval[0])) + + ans = 0 + # Greedily assign each cow the earliest available chicken in its interval + for A, B in intervals: + # Find the first chicken time >= A + idx = bisect.bisect_left(times, A) + # If that chicken is also <= B, match them + if idx < len(times) and times[idx] <= B: + ans += 1 + # Remove that chicken from availability + times.pop(idx) + + if ans > 0 : + self.parameter["gold_answer"] = ans + break + + + def _prompt_generate(self) -> str : + C = self.parameter["C"] + N = self.parameter["N"] + return self.prompt_template.format( + C = C, + C_minus_1 = C - 1, + points = "\n".join("point {}: {}".format(i, p) for i, p in enumerate(self.parameter["points"])), + N = N, + N_minus_1 = N - 1, + segments = "\n".join("segment {}: [{}, {}]".format(i, l, r) for i, (l, r) in enumerate(self.parameter["segments"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[Tuple[int, int]]] : + if answer is not None : + answer = answer.strip() + try : + operations = [] + for line in answer.splitlines() : + line = line.strip() + if line : + c, n = map(int, line.split()) + operations.append((c, n)) + return operations + except : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + used_points, used_segments = [False] * self.parameter["C"], [False] * self.parameter["N"] + for c, n in processed_result : + if not (0 <= c < self.parameter["C"]) or not (0 <= n < self.parameter["N"]) : + return self.rewards["invalid_solution"] + if not (self.parameter["segments"][n][0] <= self.parameter["points"][c] <= self.parameter["segments"][n][1]) : + return self.rewards["invalid_solution"] + if used_points[c] or used_segments[n] : + return self.rewards["invalid_solution"] + used_points[c] = used_segments[n] = True + answer, gold = len(processed_result), self.parameter["gold_answer"] + assert 0 <= answer <= gold, "Answer should be between 0 and gold_answer" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/maximum_subsequence_num/__init__.py b/server/Gym/environments/maximum_subsequence_num/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..54e50c326dfca8518484e76020a85e051f3bb82e --- /dev/null +++ b/server/Gym/environments/maximum_subsequence_num/__init__.py @@ -0,0 +1 @@ +from .environment import Maximum_SubsequenceNum_Environment diff --git a/server/Gym/environments/maximum_subsequence_num/environment.py b/server/Gym/environments/maximum_subsequence_num/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..1f13adbc99528627ba708e926dd7c85c8e478042 --- /dev/null +++ b/server/Gym/environments/maximum_subsequence_num/environment.py @@ -0,0 +1,118 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Maximum_SubsequenceNum_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""We want to obtain a sequence of length {M} + {N} = {M_plus_N} from an initial sequence of length {M} by appending {N} integers, each in [0, {K}). The initial sequence of length {M}: {A_first_M} + +Try your best to maximize the number of essentially different subsequences of the final sequence. +Subsequence: picking some (>= 1) integers from the sequence in order, not necessarily contiguous. +Essentially different: only the sequence of values matters — same values in the same relative order are considered the same. + +Your final answer should be a single line containing the {N} integers you appended to the initial sequence, separated by spaces, each in [0, {K}). +""" + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Maximum_SubsequenceNum_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def subsequence_num(self, A : List[int]) : + M, N, K = self.parameter["M"], self.parameter["N"], self.parameter["K"] + assert len(A) == M + N + 1 + F = [0] * (M + N + 1) + F[0] = 1 + last = [0] * K + for i in range(1, M + N + 1) : + if last[A[i]] == 0 : + F[i] = F[i - 1] * 2 + else : + F[i] = F[i - 1] * 2 - F[last[A[i]] - 1] + last[A[i]] = i + return F[M + N] - 1 + + def _generate(self) -> None : + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 1, "M should be greater than or equal to 1" + + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + assert "K" in self.parameter, "K is required in parameter" + K = self.parameter["K"] + assert K >= 2, "K should be greater than or equal to 2" + + self.parameter["A"] = [random.randint(0, K - 1) for i in range(1, M + 1)] + + + A = [-1] + self.parameter["A"] + assert len(A) == M + 1 + + last = [0] * K + for i in range(1, M + 1) : + last[A[i]] = i + for i in range(M + 1, M + N + 1) : + k = min(range(K), key = lambda k : last[k]) + A.append(k) + last[k] = i + self.parameter["reference_answer"] = " ".join(str(a) for a in A[M + 1 : M + N + 1]) + self.parameter["gold_answer"] = self.subsequence_num(A) + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + M = self.parameter["M"], + N = self.parameter["N"], + K = self.parameter["K"], + M_plus_N = self.parameter["M"] + self.parameter["N"], + A_first_M = " ".join(map(str, self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + A = processed_result + if len(A) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(0 <= a < self.parameter["K"] for a in A) : + return self.rewards["invalid_solution"] + A = [-1] + self.parameter["A"] + A + + answer, gold = self.subsequence_num(A), self.parameter["gold_answer"] + assert answer <= gold, "answer should be less than or equal to gold" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/maximum_weight_matching/__init__.py b/server/Gym/environments/maximum_weight_matching/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2b728b58a5f930bbdb50c1ed7d1cfd171cae7fbb --- /dev/null +++ b/server/Gym/environments/maximum_weight_matching/__init__.py @@ -0,0 +1 @@ +from .environment import MaximumWeightMatching_Environment diff --git a/server/Gym/environments/maximum_weight_matching/environment.py b/server/Gym/environments/maximum_weight_matching/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e92139bef6e61081d5713f42119dc57f10e486af --- /dev/null +++ b/server/Gym/environments/maximum_weight_matching/environment.py @@ -0,0 +1,114 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MaximumWeightMatching_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. + +The graph contains the following undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning an undirected edge **connecting vertex `u` to vertex `v` with weight `w`**: +{edges} + +Your task is to select a subset of edges `S = [(u_1, v_1, w_1), (u_2, v_2, w_2), ..., (u_k, v_k, w_k)]` such that: +- Each selected edge must exist in the graph. +- **Each vertex appears in at most one edge** in the set `S` — in other words, no two edges in `S` share a vertex. +- Your goal is to **maximize** the total weight of the selected edges `w_1 + w_2 + ... + w_k`. + +**Output Format:** +Your final answer should be a single line containing the endpoints of the selected edges in order: `u_1 v_1 u_2 v_2 ... u_k v_k`, separated by **spaces**. +Example: `0 1 3 4` (do **NOT** include the backticks or quotes); this means k = 2 edges are selected: `(0, 1, w_1)` and `(3, 4, w_2)`. +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MaximumWeightMatching_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = random.sample([(u, v, random.randint(1, N)) for u in range(N) for v in range(u + 1, N)], int(edge_density * N * (N - 1) / 2)) + random.shuffle(edges) + + assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique" + for u, v, w in edges : + assert 0 <= u < v < N + + G = networkx.Graph() + G.add_weighted_edges_from(edges) + matching = networkx.max_weight_matching(G, maxcardinality = False) + self.parameter["reference_answer"] = " ".join("{} {}".format(u, v) for u, v in matching) + + edge2weight = {(u, v) : w for u, v, w in edges} + self.parameter["gold_weight"] = sum(edge2weight[(min(u, v), max(u, v))] for u, v in matching) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + matches = processed_result + if len(matches) % 2 != 0 : + return self.rewards["wrong_format"] + matches = [(matches[i], matches[i + 1]) for i in range(0, len(matches), 2)] + + if not (len(set(u for u, v in matches) | set(v for u, v in matches)) == len(matches) * 2) : + return self.rewards["invalid_solution"] + edge2weight = {(u, v) : w for u, v, w in self.parameter["edges"]} + answer_weight = 0 + for u, v in matches : + u, v = min(u, v), max(u, v) + if (u, v) not in edge2weight : + return self.rewards["invalid_solution"] + answer_weight += edge2weight[(u, v)] + assert answer_weight <= self.parameter["gold_weight"], "answer_weight should be less than or equal to gold_weight" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer_weight / self.parameter["gold_weight"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer_weight == self.parameter["gold_weight"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/maze/__init__.py b/server/Gym/environments/maze/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..469e68a37a34addd3524e1e20d70fe7916016786 --- /dev/null +++ b/server/Gym/environments/maze/__init__.py @@ -0,0 +1 @@ +from .environment import Maze_Environment diff --git a/server/Gym/environments/maze/environment.py b/server/Gym/environments/maze/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7aad8ce49ce8f636070e565cbc645d6714f8d071 --- /dev/null +++ b/server/Gym/environments/maze/environment.py @@ -0,0 +1,133 @@ +import random +from queue import Queue +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Maze_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N}×{N} grid representing a maze. Each cell in the grid is either a wall (`#`) or an open space (`.`). The maze is provided in the following format: +{maze} + +Your task is to find the **shortest path** from the top-left corner `(0, 0)` to the bottom-right corner `({N_minus_1}, {N_minus_1})`. +You may move only in the four cardinal directions: **up, down, left, and right**, and only through open spaces (`.`). + +**Output Format:** +Your final answer should be a single line containing the sequence of moves, where each move is represented by a character: +- `L` = left +- `R` = right +- `U` = up +- `D` = down +For example, `RRDDLLUU` (do **NOT** include the backticks or quotes) means: right, right, down, down, left, left, up, up. +""" + action2delta = { + "L" : (0, -1), + "R" : (0, +1), + "U" : (-1, 0), + "D" : (+1, 0), + } + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the Maze_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "density" in self.parameter, "density is required in parameter" + density = self.parameter["density"] + assert 0.0 <= density < 1.0, "density should be between 0.0 and 1.0" + + while True : + maze = [["#" if random.random() < density else "." for col in range(N)] for row in range(N)] + maze[0][0] = maze[N - 1][N - 1] = "." + + prev = [[None] * N for row in range(N)] + prev[0][0] = (0, 0) + q = Queue() + q.put((0, 0)) + while not q.empty() : + x, y = q.get() + for (dx, dy) in self.action2delta.values() : + nx, ny = x + dx, y + dy + if 0 <= nx < N and 0 <= ny < N and maze[nx][ny] == "." and prev[nx][ny] is None : + prev[nx][ny] = (x, y) + q.put((nx, ny)) + + if prev[N - 1][N - 1] is not None : + break + + self.parameter["maze"] = ["".join(row) for row in maze] + + if prev[N - 1][N - 1] is not None : + path = [] + x, y = N - 1, N - 1 + while (x, y) != (0, 0) : + px, py = prev[x][y] + for action, (dx, dy) in self.action2delta.items() : + if (x, y) == (px + dx, py + dy) : + path.append(action) + break + x, y = px, py + path.reverse() + self.parameter["reference_answer"] = "".join(path) + + def _prompt_generate(self) -> str : + """ + Generate the prompt for the problem. + """ + N = self.parameter["N"] + N_minus_1 = N - 1 + return self.prompt_template.format(N = N, N_minus_1 = N_minus_1, maze = "\n".join(self.parameter["maze"])) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + actions = processed_result + x, y = 0, 0 + for action in actions : + if action not in self.action2delta : + return self.rewards["wrong_format"] + dx, dy = self.action2delta[action] + + nx, ny = x + dx, y + dy + if not (0 <= nx < self.parameter["N"] and 0 <= ny < self.parameter["N"]) : + return self.rewards["invalid_solution"] + if self.parameter["maze"][nx][ny] == "#" : + return self.rewards["invalid_solution"] + x, y = nx, ny + if (x, y) != (self.parameter["N"] - 1, self.parameter["N"] - 1) : + return self.rewards["unsuccessful_solution"] + assert len(actions) >= len(self.parameter["reference_answer"]), "actions should be greater than or equal to reference_answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((len(self.parameter["reference_answer"]) / len(actions)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (len(self.parameter["reference_answer"]) == len(actions)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_conversion_to_cycle_cost/__init__.py b/server/Gym/environments/min_conversion_to_cycle_cost/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..995365738ff26d9384d38c6ce4af101549a0f8e4 --- /dev/null +++ b/server/Gym/environments/min_conversion_to_cycle_cost/__init__.py @@ -0,0 +1 @@ +from .environment import MinConversionToCycleCost_Environment diff --git a/server/Gym/environments/min_conversion_to_cycle_cost/environment.py b/server/Gym/environments/min_conversion_to_cycle_cost/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..812172105e2c9312a5e2545f3558cace71c8969f --- /dev/null +++ b/server/Gym/environments/min_conversion_to_cycle_cost/environment.py @@ -0,0 +1,179 @@ +import random +from collections import deque +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinConversionToCycleCost_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3651 + prompt_template = \ +r"""You are given a **directed graph** with {N} vertices, labeled from 0 to {N_minus_1}. Each vertex `i` has exactly one incoming edge from vertex `A[i]` to vertex `i`. The initial array A is given as: {A} + +You are allowed to modify A[i] to any other vertex `j` (0 ≤ j < {N}) at a cost of C[i]. The cost array is given as: {C} + +Your goal is to make the entire graph form a **single directed cycle** (i.e., each vertex has exactly one incoming and one outgoing edge, and all vertices are reachable from each other). Try your best to **minimize the total cost** of modifications. + +**Output Format:** A single line containing the final A[0], A[1], ..., A[{N_minus_1}], separated by **spaces**.""" + + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the MinConversionToCycleCost_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + A = self.parameter["A"] = [] + for i in range(N) : + while True: + a = random.randint(0, N - 1) + if a != i : + A.append(a) + break + assert len(A) == N, "A should have exactly N elements" + + C = self.parameter["C"] = [random.randint(1, N) for _ in range(N)] + + + # Compute indegree h for each node in the functional graph + h = [0] * N + for v in A: + h[v] += 1 + + # Queue of nodes with indegree 0 (tree leaves) + q = deque(i for i in range(N) if h[i] == 0) + + # f[v] will track the best "incoming" cost seen so far for v + f = [0] * N + ans = 0 + + # Special case: if there are no leaves, the graph is pure cycles + # Check if it's exactly one big cycle + vis = [False] * N + if not q: + count = 0 + j = 0 + while not vis[j]: + vis[j] = True + count += 1 + j = A[j] + if count == N: + self.parameter["gold_answer"] = ans + return + + # Peel off the trees attached to cycles, from leaves inward + while q: + x = q.popleft() + y = A[x] + if f[y]: + # We already have one candidate edge into y; choose the cheaper + ans += min(f[y], C[x]) + # Keep the more expensive as the "best so far" for future comparisons + f[y] = max(f[y], C[x]) + else: + # First edge into y + f[y] = C[x] + h[y] -= 1 + if h[y] == 0: + q.append(y) + + # Now only the cycles remain (h[i] > 0 for nodes in cycles) + for i in range(N): + if h[i] > 0: + # Gather all edges in this cycle + diffs = [] + j = i + # Walk the cycle, breaking h[] as we go + while h[A[j]] > 0: + v = A[j] + h[v] = 0 + ans += f[v] # pay the best incoming from the attached tree (or 0) + diffs.append(f[v] - C[j]) + j = v + # To make this cycle strongly connected, we must drop one edge (the max diff) + diffs.sort() + ans -= diffs[-1] + # And if any other diffs are positive, we can save money by replacing more edges + for d in diffs[:-1]: + if d > 0: + ans -= d + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + C = " ".join("C[{}]={}".format(i, Ci) for i, Ci in enumerate(self.parameter["C"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + A = processed_result + + if len(A) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(0 <= a < self.parameter["N"] for a in A) : + return self.rewards["invalid_solution"] + + visited = [False] * self.parameter["N"] + x = 0 + while True : + assert 0 <= x < self.parameter["N"] + if visited[x] : + if x == 0 : + break + else : + return self.rewards["unsuccessful_solution"] + visited[x] = True + x = A[x] + if not all(visited) : + return self.rewards["unsuccessful_solution"] + + gold, answer = self.parameter["gold_answer"], sum(Ci * int(OldAi != NewAi) for OldAi, NewAi, Ci in zip(self.parameter["A"], A, self.parameter["C"])) + assert gold <= answer + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "If answer is 0, gold should also be 0" + return self.rewards["rewarding_weight"] * 1.0 + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_cost_reducing_lnds/__init__.py b/server/Gym/environments/min_cost_reducing_lnds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..835d093c1ad2d5f088d04b5e24608e60d5e0cb6d --- /dev/null +++ b/server/Gym/environments/min_cost_reducing_lnds/__init__.py @@ -0,0 +1 @@ +from .environment import MinCostReducingLNDS_Environment diff --git a/server/Gym/environments/min_cost_reducing_lnds/environment.py b/server/Gym/environments/min_cost_reducing_lnds/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..33c139794a0b7e0d772bd028239030391207827b --- /dev/null +++ b/server/Gym/environments/min_cost_reducing_lnds/environment.py @@ -0,0 +1,254 @@ +import random +from collections import deque +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinCostReducingLNDS_Environment(VerifiableEnvironment) : # Submitted to https://www.luogu.com.cn/problem/P3308 + prompt_template = \ +r"""You are given two arrays A and B, both of length {N}: +A: {A} +B: {B} +You may erase any (distinct) elements from A. When you erase element A[i], you must pay a cost of B[i]. Please reduce the length of the **longest non-decreasing subsequence** (not necessarily contiguous) of A by **at least 1**, while minimizing the total cost of the erased elements. +**Output Format:** Output a single line containing the **indices** of the elements you choose to erase, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the MinCostReducingLNDS_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + self.parameter["A"] = [random.randint(1, max(1, N * 2)) for _ in range(N)] + self.parameter["B"] = [random.randint(1, N) for _ in range(N)] + + + class Edge: + __slots__ = ('to','rev','cap','orig') + def __init__(self, to, rev, cap): + self.to = to + self.rev = rev + self.cap = cap + self.orig = cap + + def add_edge(u, v, c): + """Add edge u->v with capacity c, and reverse edge.""" + adj[u].append(Edge(v, len(adj[v]), c)) + adj[v].append(Edge(u, len(adj[u]) - 1, 0)) + + def bfs_level(): + """Build level graph from source; return True if sink reachable.""" + for i in range(V): + level[i] = -1 + q = deque([SRC]) + level[SRC] = 0 + while q: + u = q.popleft() + for e in adj[u]: + if e.cap > 0 and level[e.to] < 0: + level[e.to] = level[u] + 1 + q.append(e.to) + return level[SINK] >= 0 + + def dfs_flow(u, f): + """DFS in level graph; push up to f units, return actual pushed.""" + if u == SINK: + return f + for i in range(ptr[u], len(adj[u])): + e = adj[u][i] + if e.cap > 0 and level[e.to] == level[u] + 1: + pushed = dfs_flow(e.to, min(f, e.cap)) + if pushed: + e.cap -= pushed + adj[e.to][e.rev].cap += pushed + return pushed + ptr[u] += 1 + return 0 + + def dinic(): + """Run Dinic to exhaustion; return total flow.""" + flow = 0 + while bfs_level(): + ptr[:] = [0] * V + while True: + pushed = dfs_flow(SRC, INF) + if not pushed: + break + flow += pushed + return flow + + def reachable(u, t): + """Simple BFS on current residual graph to check if t reachable from u.""" + vis = [False] * V + dq = deque([u]) + vis[u] = True + while dq: + x = dq.popleft() + if x == t: + return True + for e in adj[x]: + if e.cap > 0 and not vis[e.to]: + vis[e.to] = True + dq.append(e.to) + return False + + A = [0] + self.parameter["A"].copy() + B = [0] + self.parameter["B"].copy() + C = [0] + list(range(1, N + 1)) + + V = 2 * N + 2 + SRC, SINK = 0, V - 1 + adj = [[] for _ in range(V)] + + # 1) Node-split edges; record their positions for later removal + id_info = [None] * (N + 1) + for i in range(1, N + 1): + u, v = i, N + i + idx_u = len(adj[u]) + idx_v = len(adj[v]) + adj[u].append(Edge(v, idx_v, B[i])) + adj[v].append(Edge(u, idx_u, 0)) + id_info[i] = (u, idx_u, v, idx_v) + + # 2) Compute dp[i] = LIS ending at i + dp = [0] * (N + 1) + dp[0] = 0 + for i in range(1, N + 1): + best = 1 + for j in range(1, i): + if A[j] <= A[i] and dp[j] + 1 > best: + best = dp[j] + 1 + dp[i] = best + + K = max(dp[1:]) + self.parameter["original_lnds_length"] = K + + # 3) Add DAG edges with infinite capacity = INF + S = sum(B[1:]) + 1 + INF = S + + for i in range(1, N + 1): + # from source to level-1 nodes + if dp[i] == 1: + add_edge(SRC, i, INF) + # from level-K nodes to sink + if dp[i] == K: + add_edge(N + i, SINK, INF) + # between intermediate levels + for j in range(1, i): + if A[j] <= A[i] and dp[j] + 1 == dp[i]: + add_edge(N + j, i, INF) + + # 4) Initial max-flow = minimal total cost + level = [-1] * V + ptr = [0] * V + INF = S + flow = dinic() + # flow is the minimal cost S + assert flow > 0, "The flow should be greater than 0" + self.parameter["gold_answer"] = flow + + # 5) Greedy extract lexicographically smallest C-sorted cut + vc = sorted((C[i], i) for i in range(1, N + 1)) + ans = [] + remaining_flow = flow + + for _, idx in vc: + # if idx.in can't reach idx.out in residual, it's essential + if not reachable(idx, N + idx): + ans.append(idx) + # permanently remove its split edge + u, iu, v, iv = id_info[idx] + e1 = adj[u][iu] + e2 = adj[v][iv] + e1.orig = 0 + e2.orig = 0 + # reset all capacities to orig + for u0 in range(V): + for e in adj[u0]: + e.cap = e.orig + # recompute flow on the reduced graph + level = [-1] * V + ptr = [0] * V + remaining_flow = dinic() + if remaining_flow == 0: + break + + # 6) Output M and the sorted positions + ans = [i - 1 for i in ans] + assert self.parameter["gold_answer"] == sum(self.parameter["B"][i] for i in ans), \ + f"Gold answer {self.parameter['gold_answer']} does not match computed cost {sum(self.parameter['B'][i] for i in ans)}" + self.parameter["reference_answer"] = " ".join(map(str, ans)) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + B = " ".join("B[{}]={}".format(i, Bi) for i, Bi in enumerate(self.parameter["B"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + erased = [False] * self.parameter["N"] + for i in processed_result : + if not (0 <= i < self.parameter["N"]) : + return self.rewards["invalid_solution"] + if erased[i] : + return self.rewards["invalid_solution"] + erased[i] = True + + newA = [Ai for i, Ai in enumerate(self.parameter["A"]) if not erased[i]] + F = [0] * len(newA) + for i, Ai in enumerate(newA) : + F[i] = 1 + for j, Aj in enumerate(newA[: i]) : + if Aj <= Ai : + F[i] = max(F[i], F[j] + 1) + + assert (max(F) if F else 0) <= self.parameter["original_lnds_length"] + if (max(F) if F else 0) == self.parameter["original_lnds_length"] : + return self.rewards["unsuccessful_solution"] + + answer, gold = sum(self.parameter["B"][i] for i in processed_result), self.parameter["gold_answer"] + assert gold <= answer, "Gold answer should be less than or equal to the answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * int(gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_cost_tree_coverage/__init__.py b/server/Gym/environments/min_cost_tree_coverage/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..98f4c2b44fc36966c35e22844602f0d133b35bfc --- /dev/null +++ b/server/Gym/environments/min_cost_tree_coverage/__init__.py @@ -0,0 +1 @@ +from .environment import MinCostTreeCoverage_Environment diff --git a/server/Gym/environments/min_cost_tree_coverage/environment.py b/server/Gym/environments/min_cost_tree_coverage/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b9efbda8409cafbcce2b14b537a55928a2f12b29 --- /dev/null +++ b/server/Gym/environments/min_cost_tree_coverage/environment.py @@ -0,0 +1,221 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinCostTreeCoverage_Environment(VerifiableEnvironment) : # Submitted to https://www.luogu.com.cn/problem/P3267 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices labeled from `0` to `{N_minus_1}`. The tree contains {N_minus_1} undirected edges. Each edge is represented as a tuple `(u, v)`, meaning there is an undirected edge **connecting vertex `u` to vertex `v`**: +{edges} + +You may select any subset of vertices. When a vertex `u` is selected, it **covers** all vertices that are reachable from `u` by a path containing at most {D} edges (i.e., within distance ≤ {D} in terms of edge count). You are required to cover the following vertices: {covered_vertices} +Each selected vertex `u` incurs a cost of `W[u]`. The cost array is: {W} +Try your best to **minimize the total cost** of the selected vertices while ensuring all required vertices are covered. + +**Output Format:** A single line containing the selected vertex indices in any order, separated by **spaces**.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the MinCostTreeCoverage_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + depths = [None] * N + for index, vertex in enumerate(permutations) : + if index == 0 : + depths[vertex] = 0 + continue + u, v = vertex, random.choice(permutations[: index]) + depths[u] = depths[v] + 1 + u, v = min(u, v), max(u, v) + edges.append((u, v)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)) == N - 1 + + covered_vertices = self.parameter["covered_vertices"] = random.sample(range(N), k = random.randint(1, N)) + + D = self.parameter["D"] = random.randint(1, max(1, max(depths[covered_vertex] for covered_vertex in covered_vertices) // 2)) + + W = self.parameter["W"] = [random.randint(1, N) for _ in range(N)] + + + important = [False] * N + for x in covered_vertices: + important[x] = True # 0-index + + A = [[] for _ in range(N)] # adjacency list + for u, v in edges: # 0-index + A[u].append(v) + A[v].append(u) + + # ---------- constants & DP tables --------------------------------------- + K = D # alias used below + INF = sum(W) + 1 # far larger than any legal answer + + # dp[u][i] : u *not* yet covered by an ancestor guard. + # fdp[u][i] : u *already* covered by an ancestor guard. + # ‘i’ is the distance (0 … K) from u to the closest guard in u’s subtree + dp = [[INF] * (K + 1) for _ in range(N)] + fdp = [[INF] * (K + 1) for _ in range(N)] + + for i in range(N): + dp[i][K] = W[i] # place a guard on i + if important[i]: + fdp[i][0] = 0 # covered by ancestor is fine + else: + dp[i][0] = 0 # no guard needed (not important) + + # ---------- build parent / post-order without recursion ------------------ + parent = [-1] * N + children = [[] for _ in range(N)] + order = [] # pre-order → reversed ⇒ post-order + + stack = [0] + parent[0] = 0 # root sentinel + while stack: + u = stack.pop() + order.append(u) + for v in A[u]: + if parent[v] == -1: + parent[v] = u + children[u].append(v) + stack.append(v) + + # ---------- DP merge ----------------------------------------------------- + for u in reversed(order): # post-order + for v in children[u]: + # prefix minima helper arrays (length K+1) + tru = [0] * (K + 1) + trv = [0] * (K + 1) + + tru[0] = min(dp[u]) # min cost in u-subtree + for i in range(1, K + 1): + tru[i] = min(tru[i - 1], fdp[u][i - 1]) + + trv[0] = min(dp[v]) # min cost in v-subtree + for i in range(1, K + 1): + trv[i] = min(trv[i - 1], fdp[v][i - 1]) + + new_dp = [0] * (K + 1) + new_fdp = [0] * (K + 1) + + # --- update dp[u] (u not yet covered by ancestor) --------------- + for i in range(K): # 0 … K-1 + new_dp[i] = min(dp[u][i] + trv[i], + dp[v][i + 1] + tru[i + 1]) + if new_dp[i] > INF: + new_dp[i] = INF + new_dp[K] = dp[u][K] + trv[K] + if new_dp[K] > INF: + new_dp[K] = INF + + # --- update fdp[u] (u already covered by ancestor) -------------- + new_fdp[0] = fdp[u][0] + trv[0] + if new_fdp[0] > INF: + new_fdp[0] = INF + for i in range(1, K + 1): + new_fdp[i] = min(fdp[u][i] + trv[i], + fdp[v][i - 1] + tru[i]) + if new_fdp[i] > INF: + new_fdp[i] = INF + + dp[u] = new_dp + fdp[u] = new_fdp + + # ---------- answer ------------------------------------------------------- + self.parameter["gold_answer"] = min(dp[0]) + assert self.parameter["gold_answer"] > 0, "Gold answer should be greater than 0" + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("{} {}".format(u, v) for u, v in self.parameter["edges"]), + covered_vertices = " ".join(map(str, self.parameter["covered_vertices"])), + D = self.parameter["D"], + W = " ".join("W[{}]={}".format(i, Wi) for i, Wi in enumerate(self.parameter["W"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != len(set(processed_result)) : + return self.rewards["invalid_solution"] + + answer, gold = 0, self.parameter["gold_answer"] + + adjacency_list = [[] for _ in range(self.parameter["N"])] + for u, v in self.parameter["edges"] : + adjacency_list[u].append(v) + adjacency_list[v].append(u) + + covered = [False] * self.parameter["N"] + for vertex in processed_result : + if not (0 <= vertex < self.parameter["N"]) : + return self.rewards["invalid_solution"] + answer += self.parameter["W"][vertex] + visited = [False] * self.parameter["N"] + visited[vertex] = True + stack = [(vertex, 0)] + while stack : + u, d = stack.pop() + covered[u] = True + if d == self.parameter["D"] : + continue + for v in adjacency_list[u] : + if not visited[v] : + visited[v] = True + stack.append((v, d + 1)) + + if not all(covered[covered_vertex] for covered_vertex in self.parameter["covered_vertices"]) : + return self.rewards["unsuccessful_solution"] + assert gold <= answer, "Gold answer should be less than or equal to the answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise ValueError("Invalid rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_cube_assignment/__init__.py b/server/Gym/environments/min_cube_assignment/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c0cf8f72c20a1593d1578098a2e5070e159312a3 --- /dev/null +++ b/server/Gym/environments/min_cube_assignment/__init__.py @@ -0,0 +1 @@ +from .environment import MinCubeAssignment_Environment diff --git a/server/Gym/environments/min_cube_assignment/environment.py b/server/Gym/environments/min_cube_assignment/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..52f9408b04807508b26ce0fbfb42a8e9b24abc7f --- /dev/null +++ b/server/Gym/environments/min_cube_assignment/environment.py @@ -0,0 +1,197 @@ +import random +from collections import deque +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinCubeAssignment_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3227 + prompt_template = \ +r"""You are given a {P} × {Q} grid. You need to assign each cell (i, j) an integer value f(i, j) in the range [0, {R}). Each cell (i, j) contributes a cost of c(i, j, f(i, j)) to the total cost, where the cost function c is defined as: +{costs} + +In addition, for every pair of **adjacent** cells (i, j) and (i', j') (i.e., cells such that |i - i'| + |j - j'| = 1), the assigned values must satisfy |f(i, j) - f(i', j')| ≤ {D}. Please find an assignment of values to the grid that minimizes the total cost. + +**Output Format:** Output {P} lines, each with {Q} integers (space-separated), representing the values assigned to the grid in row-major order.""" + + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinCubeAssignment_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_P_Q_R" in self.parameter, "MAX_P_Q_R is required in parameter" + MAX_P_Q_R = self.parameter["MAX_P_Q_R"] + assert MAX_P_Q_R >= 2, "MAX_P_Q_R should be greater than or equal to 2" + + P, Q, R = self.parameter["P"], self.parameter["Q"], self.parameter["R"] = random.randint(2, MAX_P_Q_R), random.randint(2, MAX_P_Q_R), random.randint(2, MAX_P_Q_R) + costs = self.parameter["costs"] = [[[random.randint(1, P * Q) for f in range(R)] for j in range(Q)] for i in range(P)] + D = self.parameter["D"] = random.randint(0, R - 1) + + + val = costs + total = 0 + for k in range(R): + for i in range(P): + for j in range(Q): + total += val[i][j][k] + # INF based on input + INF = total + 1 + # Node indexing: S=0, for (i,j,k): id = 1 + k*(P*Q) + i*Q + j, T = 1 + (R+1)*P*Q + node_count = 1 + (R + 1) * P * Q + 1 + S = 0 + T = node_count - 1 + # Build adjacency list + class Edge: + __slots__ = ('to', 'cap', 'rev') + def __init__(self, to, cap, rev): + self.to = to + self.cap = cap + self.rev = rev + + adj = [[] for _ in range(node_count)] + + def add_edge(u, v, c): + adj[u].append(Edge(v, c, len(adj[v]))) + adj[v].append(Edge(u, 0, len(adj[u]) - 1)) + + def node_id(i, j, k): + return 1 + k * (P * Q) + i * Q + j + + # Source to layer 0 and layer edges + for i in range(P): + for j in range(Q): + # Source to layer 0 + add_edge(S, node_id(i, j, 0), INF) + # Vertical edges through layers + for k in range(R): + add_edge(node_id(i, j, k), node_id(i, j, k + 1), val[i][j][k]) + # Last layer to Sink + add_edge(node_id(i, j, R), T, INF) + + # Smoothness constraints: infinite edges for height differences > D + dirs = [(-1, 0), (1, 0), (0, -1), (0, 1)] + for i in range(P): + for j in range(Q): + for dx, dy in dirs: + ni, nj = i + dx, j + dy + if 0 <= ni < P and 0 <= nj < Q: + for k in range(D + 1, R + 2): + u = node_id(i, j, k - 1) + v = node_id(ni, nj, k - D - 1) + add_edge(u, v, INF) + + # Dinic's Algorithm + level = [0] * node_count + it = [0] * node_count + + def bfs(): + for idx in range(node_count): + level[idx] = -1 + queue = deque([S]) + level[S] = 0 + while queue: + u = queue.popleft() + for e in adj[u]: + if e.cap > 0 and level[e.to] < 0: + level[e.to] = level[u] + 1 + if e.to == T: + return True + queue.append(e.to) + return level[T] >= 0 + + def dfs(u, flow): + if u == T: + return flow + for idx in range(it[u], len(adj[u])): + e = adj[u][idx] + if e.cap > 0 and level[u] < level[e.to]: + d = dfs(e.to, min(flow, e.cap)) + if d > 0: + e.cap -= d + adj[e.to][e.rev].cap += d + return d + it[u] += 1 + return 0 + + flow = 0 + # Repeatedly send flow while there is a path + while bfs(): + it = [0] * node_count + while True: + pushed = dfs(S, INF) + if pushed == 0: + break + flow += pushed + assert flow > 0, "Flow should be greater than 0, indicating a valid assignment exists" + self.parameter["gold_answer"] = flow + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + P = self.parameter["P"], + Q = self.parameter["Q"], + R = self.parameter["R"], + costs = "\n".join(" ".join("c({},{},{})={}".format(i, j, f, c) for f, c in enumerate(self.parameter["costs"][i][j])) for i in range(self.parameter["P"]) for j in range(self.parameter["Q"])), + D = self.parameter["D"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(list(map(int, line.split()))) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + assignment = processed_result + if len(assignment) != self.parameter["P"] or any(len(row) != self.parameter["Q"] for row in assignment) : + return self.rewards["invalid_solution"] + + answer, gold = 0, self.parameter["gold_answer"] + for i in range(self.parameter["P"]) : + for j in range(self.parameter["Q"]) : + if not (0 <= assignment[i][j] < self.parameter["R"]) : + return self.rewards["invalid_solution"] + for dx, dy in [(-1, 0), (+1, 0), (0, -1), (0, +1)] : + ni, nj = i + dx, j + dy + if 0 <= ni < self.parameter["P"] and 0 <= nj < self.parameter["Q"] : + if abs(assignment[i][j] - assignment[ni][nj]) > self.parameter["D"] : + return self.rewards["invalid_solution"] + answer += self.parameter["costs"][i][j][assignment[i][j]] + assert gold <= answer, "Gold answer should be less than or equal to the computed answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_division_sum_xor/__init__.py b/server/Gym/environments/min_division_sum_xor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..914d683ffd645a71b912a63f7296fdc66392835a --- /dev/null +++ b/server/Gym/environments/min_division_sum_xor/__init__.py @@ -0,0 +1 @@ +from .environment import MinDivisionSumXor_Environment diff --git a/server/Gym/environments/min_division_sum_xor/environment.py b/server/Gym/environments/min_division_sum_xor/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..fdb4a0cbda08f0f775f85c409ac98fedf91a2f34 --- /dev/null +++ b/server/Gym/environments/min_division_sum_xor/environment.py @@ -0,0 +1,149 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinDivisionSumXor_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3646 + prompt_template = \ +r"""You are given {N} numbers A[1], A[2], ..., A[{N}]. The values are given as: +{A} + +You may divide these numbers (in order) into some **consecutive batches**. Let the total number of batches be k (we must have 1 ≤ k ≤ {K}), and let end[1], end[2], ..., end[k] (1 ≤ end[1] < end[2] < ... < end[k] = {N}) denote the last index in each batch. This means: +- Batch 1 contains A[1] to A[end[1]] +- Batch 2 contains A[end[1] + 1] to A[end[2]] +- ... +- Batch k contains A[end[k−1] + 1] to A[end[k]] (with end[k] = {N}) + +Define the cost of one such division as follows: +- First compute the sum of values in each batch. +- Then take the **bitwise OR** of all batch sums. That is the cost. + +Please find a batch division (with 1 ≤ k ≤ {K}) that **minimizes the total cost**. + +**Output Format:** +A single line containing `end[1] end[2] ... end[k]`, separated by spaces (with `end[k]` always equal to {N}). +Example: `1 2 {N}` — this means: +- There are 3 batches, +- First batch ends at index 1, +- Second ends at index 2, +- Third ends at index {N} and includes the remaining numbers.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_beta : float = 5.0, rewarding_weight : float = 1.0, + **kwargs) : + """ + Initialize the MinDivisionSumXor_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + A = self.parameter["A"] = [random.randint(0, N * N) for _ in range(N)] + K = self.parameter["K"] = random.randint(2, N) + + + # Prefix sums for quick segment sum + prefix = [0] * (N + 1) + for i in range(1, N + 1): + prefix[i] = prefix[i - 1] + A[i - 1] + + def check(idx, ans): + # DP f[i]: min groups to cover first i sculptures + INF = N + 1 + f = [INF] * (N + 1) + f[0] = 0 + mask = ans + for i in range(1, N + 1): + # try last segment [j, i) + for j in range(i - 1, -1, -1): + seg_sum = prefix[i] - prefix[j] + if ((seg_sum >> idx) & 1) != 0: + continue + if (((seg_sum >> idx) << idx) | mask) != mask: + continue + if f[j] + 1 < f[i]: + f[i] = f[j] + 1 + return f[N] <= K + + ans = 0 + for idx in range(sum(A).bit_length() + 1, -1, -1): + ok = check(idx, ans) + # if not possible to keep this bit zero, set it + if not ok: + ans |= (1 << idx) + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = "\n".join("A[{}]={}".format(i + 1, Ai) for i, Ai in enumerate(self.parameter["A"])), + K = self.parameter["K"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if not answer_array : + return None + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N = self.parameter["N"] + + ends = processed_result + if not (1 <= len(ends) <= self.parameter["K"]) : + return self.rewards["invalid_solution"] + for i in range(len(ends)) : + if not (1 <= ends[i] <= N) : + return self.rewards["invalid_solution"] + if i and not (ends[i - 1] < ends[i]) : + return self.rewards["invalid_solution"] + if ends[-1] != N : + return self.rewards["invalid_solution"] + A = [None] + self.parameter["A"] + + answer = 0 + last = 0 + for end in ends : + batch_sum = sum(A[last + 1 : end + 1]) + answer |= batch_sum + last = end + gold = self.parameter["gold_answer"] + assert gold <= answer, "Gold answer should be less than or equal to the computed answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "If answer is 0, gold should also be 0" + return self.rewards["rewarding_weight"] * 1.0 + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_inorder_binary_tree/__init__.py b/server/Gym/environments/min_inorder_binary_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..170ef578b0c50bbf8b36097c88660a11ef51d1bd --- /dev/null +++ b/server/Gym/environments/min_inorder_binary_tree/__init__.py @@ -0,0 +1 @@ +from .environment import MinInorderBinaryTree_Environment diff --git a/server/Gym/environments/min_inorder_binary_tree/environment.py b/server/Gym/environments/min_inorder_binary_tree/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e1bf8249c2feabbb087548ef4403090cd7546901 --- /dev/null +++ b/server/Gym/environments/min_inorder_binary_tree/environment.py @@ -0,0 +1,196 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinInorderBinaryTree_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given {N} nodes numbered from 1 to {N}, along with the following edges (for each edge, the parent–child direction is not specified): +{edges} + +Please construct a valid **binary tree** using all these edges. Among all possible binary trees that can be formed, choose the one whose **inorder traversal** is lexicographically smallest. Output a single line containing {N} space-separated integers — the inorder traversal of the chosen binary tree.""" + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the MinInorderBinaryTree_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + edges = self.parameter["edges"] = [] + def construct(nodes : List[int]) -> int : + random.shuffle(nodes) + root = nodes[0] + left_size = random.randint(0, len(nodes) - 1) + right_size = len(nodes) - 1 - left_size + if left_size > 0 : + left_root = construct(nodes[1 : 1 + left_size]) + edges.append((min(root, left_root), max(root, left_root))) + if right_size > 0 : + right_root = construct(nodes[1 + left_size : ]) + edges.append((min(root, right_root), max(root, right_root))) + return root + construct(list(range(1, N + 1))) + random.shuffle(edges) + + assert len(edges) == len(set(edges)) == N - 1, "edges should be unique and of size N-1" + assert all(1 <= u < v <= N for u, v in edges), "edges should be between 1 and N" + + + G = [[] for _ in range(N + 1)] + SON = [[] for _ in range(N + 1)] + FA = [0] * (N + 1) + HEAD = [0] * (N + 1) + + for u, v in edges : + G[u].append(v) + G[v].append(u) + + # Choose a start node FIR: the smallest index (scanning from N down to 1) whose degree != 3 + FIR = 0 + for i in range(N, 0, -1): + if (len(G[i]) ^ 3) != 0: + FIR = i + + def build(start): + """Equivalent to dfs(u) in C++: builds SON and HEAD given a root 'start' using FA as parent array.""" + # Clear SON + for idx in range(1, N + 1): + SON[idx] = [] + order = [] + FA[start] = 0 + stack = [start] + while stack: + u = stack.pop() + order.append(u) + for v in G[u]: + if v != FA[u]: + SON[u].append(v) + FA[v] = u + stack.append(v) + # Post-order compute HEAD + for u in reversed(order): + if len(SON[u]) == 0: + HEAD[u] = u + elif len(SON[u]) == 1: + c = SON[u][0] + HEAD[u] = u if u < HEAD[c] else HEAD[c] + else: + a, b = SON[u][0], SON[u][1] + HEAD[u] = HEAD[a] if HEAD[a] < HEAD[b] else HEAD[b] + + # First build from FIR + build(FIR) + + # dfs1(u): determine the root rt + u = FIR + while True: + if len(SON[u]) == 0: + rt = u + break + elif len(SON[u]) == 1: + c = SON[u][0] + if HEAD[c] < c: + rt = u + break + else: + u = c + else: # len == 2 + a, b = SON[u][0], SON[u][1] + if HEAD[a] < HEAD[b]: + u = b + else: + u = a + + # Rebuild with chosen root + FA[rt] = 0 + build(rt) + + # dfs2(u): inorder traversal with tie-breaking rules to get lexicographically smallest sequence + ans = [] + stack = [(rt, 'go')] + while stack: + node, typ = stack.pop() + if typ == 'emit': + ans.append(node) + continue + # typ == 'go' + if len(SON[node]) == 0: + ans.append(node) + elif len(SON[node]) == 1: + c = SON[node][0] + if node < HEAD[c]: + # output node, then child + stack.append((c, 'go')) + stack.append((node, 'emit')) + else: + # child, then node + stack.append((node, 'emit')) + stack.append((c, 'go')) + else: + a, b = SON[node][0], SON[node][1] + # choose left/right based on HEAD comparison + if HEAD[a] < HEAD[b]: + left, right = a, b + else: + left, right = b, a + # inorder: left, node, right => push in reverse + stack.append((right, 'go')) + stack.append((node, 'emit')) + stack.append((left, 'go')) + + self.parameter["gold_answer"] = ans + self.parameter["reference_answer"] = " ".join(map(str, ans)) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[int]] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if set(processed_result) != set(range(1, self.parameter["N"] + 1)) : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_kdivisor_number/__init__.py b/server/Gym/environments/min_kdivisor_number/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dbc5c56bbdc2ca047f8c7cdd84ecc3ccb6f9460c --- /dev/null +++ b/server/Gym/environments/min_kdivisor_number/__init__.py @@ -0,0 +1 @@ +from .environment import MinKDivisorNumber_Environment diff --git a/server/Gym/environments/min_kdivisor_number/environment.py b/server/Gym/environments/min_kdivisor_number/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..77cb8a2ef55dc742a39653bb6a6d4396baef5cb3 --- /dev/null +++ b/server/Gym/environments/min_kdivisor_number/environment.py @@ -0,0 +1,141 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MinKDivisorNumber_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1128 + prompt_template = \ +r"""Find the **smallest positive integer `M`** such that it has **exactly `{K}` distinct positive divisors**. + +**Output Format:** +Your final answer should be a single integer representing the value of `M`. +Example: `10` (do **NOT** include the backticks or quotes).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = 0.0, rewarding_strategy : str = "(gold/answer)^beta", rewarding_beta : float = 2.0, rewarding_weight : float = 1.0, + **kwargs) : + """ + Initialize the MinKDivisorNumber_Environment instance. + """ + + super().__init__(**kwargs) + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def prime_factorization(self, n, limit) : + factors = [] + d = 2 + while d * d <= n : + e = 0 + while n % d == 0 : + n //= d + e += 1 + if e > 0 : + factors.append((d, e)) + d += 1 + if d > limit : + return None + if n > 1 : + factors.append((n, 1)) + return factors + + + def _generate(self) -> None : + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K >= 1, "MAX_K should be greater than or equal to 1" + + K = self.parameter["K"] = random.randint(1, MAX_K) + + sum_e = sum(e for d, e in self.prime_factorization(K, float("inf"))) + all_primes = [2] + while len(all_primes) < sum_e : + all_primes.append(all_primes[-1] + 1) + def check_prime(n) : + if n == 2 or n == 3 : + return True + if n < 2 or n % 2 == 0 : + return False + for i in range(3, int(n ** 0.5) + 1, 2) : + if n % i == 0 : + return False + return True + while not check_prime(all_primes[-1]) : + all_primes[-1] += 1 + + dpF = dict() + def dp(p, n) : + if n == 1 : + return 1 + if (p, n) in dpF : + return dpF[(p, n)] + Ans = (all_primes[p]) ** (n - 1) + if p + 1 < len(all_primes) : + factors = [] + for factor in range(1, int(n ** 0.5) + 1) : + if n % factor == 0 : + factors.append(factor) + if n // factor > factor : + factors.append(n // factor) + + for factor in factors : + if factor > 1 : + Ans = min(Ans, (all_primes[p] ** (factor - 1)) * dp(p + 1, n // factor)) + dpF[(p, n)] = Ans + return Ans + + self.parameter["reference_answer"] = dp(0, K) + + def _prompt_generate(self) -> str : + return self.prompt_template.format(K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if processed_result == self.parameter["reference_answer"] : + return self.rewards["rewarding_weight"] + + factorization_result = self.prime_factorization(processed_result, int(1E7)) + if factorization_result is None : + return 0.0 + all_e = [e for d, e in factorization_result] + divisor_number = 1 + for e in all_e : + divisor_number *= (e + 1) + + if divisor_number != self.parameter["K"] : + return self.rewards["invalid_answer"] + + assert processed_result >= self.parameter["reference_answer"], "processed_result should be greater than or equal to reference_answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((self.parameter["reference_answer"] / processed_result) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + assert self.parameter["reference_answer"] != processed_result + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_no_solution_linear_diophantine_equation/__init__.py b/server/Gym/environments/min_no_solution_linear_diophantine_equation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b85a0be0074c3ac66005b0551767eca1e9862759 --- /dev/null +++ b/server/Gym/environments/min_no_solution_linear_diophantine_equation/__init__.py @@ -0,0 +1 @@ +from .environment import MinNoSolutionLinearDiophantineEquation_Environment diff --git a/server/Gym/environments/min_no_solution_linear_diophantine_equation/environment.py b/server/Gym/environments/min_no_solution_linear_diophantine_equation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..65b11c9ffec9dc0790ac642b69bb7cd08c341d93 --- /dev/null +++ b/server/Gym/environments/min_no_solution_linear_diophantine_equation/environment.py @@ -0,0 +1,66 @@ +import math +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MinNoSolutionLinearDiophantineEquation_Environment(VerifiableEnvironment) : # https://www.luogu.com.cn/problem/P3951 + prompt_template = \ +r"""Consider the equation {A}x + {B}y = z. Find the largest non-negative integer z ≥ 0 such that the equation has **no** non-negative integer solutions (x, y).""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MinNoSolutionLinearDiophantineEquation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_A_B" in self.parameter, "MAX_A_B is required in parameter" + MAX_A_B = self.parameter["MAX_A_B"] + assert MAX_A_B >= 3, "A and B should be greater than or equal to 3" + + while True : + A = self.parameter["A"] = random.randint(2, MAX_A_B) + B = self.parameter["B"] = random.randint(2, MAX_A_B) + if math.gcd(A, B) == 1 : + break + + # The smallest non-negative integer z such that the equation has no non-negative integer solutions is A * B - A - B. + self.parameter["reference_answer"] = A * B - A - B + assert self.parameter["reference_answer"] > 0 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(A = self.parameter["A"], B = self.parameter["B"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_nonsubstring/__init__.py b/server/Gym/environments/min_nonsubstring/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e678af4f6ec6ae18a6fdf0c22abdc566e50966ae --- /dev/null +++ b/server/Gym/environments/min_nonsubstring/__init__.py @@ -0,0 +1 @@ +from .environment import MinNonsubstring_Environment diff --git a/server/Gym/environments/min_nonsubstring/environment.py b/server/Gym/environments/min_nonsubstring/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..994238dd19e7934e6c064801b666b74016a9f74e --- /dev/null +++ b/server/Gym/environments/min_nonsubstring/environment.py @@ -0,0 +1,80 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MinNonsubstring_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a string A = `{A}` + +Your task is to find a string B such that: +(1) B consists only of the characters `a` and `b`. +(2) B is **NOT** a (contiguous) substring of A. +(3) Among all strings satisfying (1) and (2), B has the **minimum possible length**. +(4) Among all strings satisfying (1), (2), and (3), B is **lexicographically smallest**. There is exactly one such string B. + +**Output Format:** Your final answer should be a single string B.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MinNonsubstring_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + a_probability = random.random() + + A = self.parameter["A"] = "".join("ab"[random.random() < a_probability] for _ in range(N)) + + + length = 1 + while True : + found = False + for B_mask in range(1 << length) : + B = "".join("ab"[(B_mask >> i) & 1] for i in range(length - 1, -1, -1)) + if B not in A : + self.parameter["reference_answer"] = B + found = True + break + if found : + break + length += 1 + + def _prompt_generate(self) -> str : + return self.prompt_template.format(A = self.parameter["A"]) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + answer = answer.strip() + return answer + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not all(c in "ab" for c in processed_result) : + return self.rewards["invalid_answer"] + + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_pairsum_multiplication_permutation/__init__.py b/server/Gym/environments/min_pairsum_multiplication_permutation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b88a4c7c833e9aa0192098ab49bc0024fb39c0e0 --- /dev/null +++ b/server/Gym/environments/min_pairsum_multiplication_permutation/__init__.py @@ -0,0 +1 @@ +from .environment import MinPairSumMultiplicationPermutation_Environment diff --git a/server/Gym/environments/min_pairsum_multiplication_permutation/environment.py b/server/Gym/environments/min_pairsum_multiplication_permutation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d71b9dada6693dfcc1502cbec980ed384b3bd68a --- /dev/null +++ b/server/Gym/environments/min_pairsum_multiplication_permutation/environment.py @@ -0,0 +1,189 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinPairSumMultiplicationPermutation_Environment(VerifiableEnvironment) : # Submitted to https://www.luogu.com.cn/problem/P3236 + prompt_template = \ +r"""You are given two matrices `A` and `B`, each of size {N} × {N}: +{matrix_A} +{matrix_B} + +You need to find a permutation P of indices from 0 to {N_minus_1} such that the value (sum of A[0][P[0]], A[1][P[1]], ..., A[{N_minus_1}][P[{N_minus_1}]]) multiplied by (sum of B[0][P[0]], B[1][P[1]], ..., B[{N_minus_1}][P[{N_minus_1}]]) is minimized. + +**Output Format:** A single line containing P[0], P[1], ..., P[{N_minus_1}], separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinPairSumMultiplicationPermutation_Environment instance. + """ + + super().__init__(**kwargs) + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + A = self.parameter["A"] = [[random.randint(1, N) for _ in range(N)] for _ in range(N)] + B = self.parameter["B"] = [[random.randint(1, N) for _ in range(N)] for _ in range(N)] + + + def hungarian(CX: int, CY: int, A, B, N, BIG): + """ + Minimise Σ ( A[i][j]*CX + B[i][j]*CY ), i,j a permutation. + Returns the permutation as a list row_match[i] = chosen column. + """ + U = [0] * (N + 1) + V = [0] * (N + 1) + P = [0] * (N + 1) + WAY = [0] * (N + 1) + + for i in range(1, N + 1): # rows 1..N + P[0] = i + j0 = 0 + MINV = [BIG] * (N + 1) + USED = [False] * (N + 1) + USED[0] = True + while True: + USED[j0] = True + i0 = P[j0] + delta = BIG + j1 = 0 + for j in range(1, N + 1): + if not USED[j]: + cur = (A[i0 - 1][j - 1] * CX + B[i0 - 1][j - 1] * CY) - U[i0] - V[j] + if cur < MINV[j]: + MINV[j] = cur + WAY[j] = j0 + if MINV[j] < delta: + delta = MINV[j] + j1 = j + for j in range(N + 1): # shift potentials + if USED[j]: + U[P[j]] += delta + V[j] -= delta + else: + MINV[j] -= delta + j0 = j1 + if P[j0] == 0: + break # free column found + # ------- augment along the path ------- + while True: + j1 = WAY[j0] + P[j0] = P[j1] + j0 = j1 + if j0 == 0: + break + + row_match = [-1] * N + for j in range(1, N + 1): + if P[j] != 0: + row_match[P[j] - 1] = j - 1 + return row_match + + + # ---------- tiny Point helper ---------- + class Point: + __slots__ = ("x", "y") + + def __init__(self, x=0, y=0): + self.x = x + self.y = y + + def calc(self, A, B): # ⟨self , (A.y-B.y , B.x-A.x)⟩ + return self.x * (A.y - B.y) + self.y * (B.x - A.x) + + # ---------- solve one test case ---------- + def solve_case(): + # -------- derive a SAFE 'BIG' sentinel for this test case ---------- + MAX_A = max(max(row) for row in A) + MAX_B = max(max(row) for row in B) + # every CX or CY equals a difference of two sums of ≤ N*MAX_A / B + SUM_BOUND = N * max(MAX_A, MAX_B) # ≤ 14 000 with constraints + BIG = (MAX_A + MAX_B) * SUM_BOUND + 1 # > any possible edge cost + + # ------- closure: run Hungarian, return Point(sumA,sumB) ------- + def MM(cx: int, cy: int) -> Point: + match = hungarian(cx, cy, A, B, N, BIG) + sx = sy = 0 + for i in range(N): + j = match[i] + sx += A[i][j] + sy += B[i][j] + return Point(sx, sy) + + POINT_A = MM(1, 0) # minimal ΣA + POINT_B = MM(0, 1) # minimal ΣB + best = min(POINT_A.x * POINT_A.y, POINT_B.x * POINT_B.y) + + # ------- recursively walk the lower convex hull ------- + def recurse(P: Point, Q: Point): + nonlocal best + C = MM(P.y - Q.y, Q.x - P.x) + best = min(best, C.x * C.y) + if C.calc(P, Q) >= P.calc(P, Q): # C lies on / below PQ + return + recurse(P, C) + recurse(C, Q) + + recurse(POINT_A, POINT_B) + return best + self.parameter["gold_answer"] = solve_case() + assert self.parameter["gold_answer"] > 0 + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + matrix_A = "\n".join(" ".join("A[{}][{}]={}".format(i, j, self.parameter["A"][i][j]) for j in range(N)) for i in range(N)), + matrix_B = "\n".join(" ".join("B[{}][{}]={}".format(i, j, self.parameter["B"][i][j]) for j in range(N)) for i in range(N)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + P = processed_result + if len(P) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if set(P) != set(range(self.parameter["N"])) : + return self.rewards["invalid_solution"] + + answer, gold = sum(self.parameter["A"][i][P[i]] for i in range(self.parameter["N"])) * sum(self.parameter["B"][i][P[i]] for i in range(self.parameter["N"])), self.parameter["gold_answer"] + assert gold <= answer, "The answer should be greater than or equal to the gold answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_path_cover_dag/__init__.py b/server/Gym/environments/min_path_cover_dag/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ad3b278dcf9ee67de7ba9d2a4395026d13d368fb --- /dev/null +++ b/server/Gym/environments/min_path_cover_dag/__init__.py @@ -0,0 +1 @@ +from .environment import MinPathCover_DAG_Environment diff --git a/server/Gym/environments/min_path_cover_dag/environment.py b/server/Gym/environments/min_path_cover_dag/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..bd2d63d3caf75ed67312b9eaec2f538bb4350050 --- /dev/null +++ b/server/Gym/environments/min_path_cover_dag/environment.py @@ -0,0 +1,233 @@ +import random +import networkx +from collections import deque +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinPathCover_DAG_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4043 + prompt_template = \ +r"""You are given a **directed acyclic graph (DAG)** with {N} vertices labeled from 1 to {N}. The graph contains the following directed edges (s, t, w), meaning there is an edge from `s` to `t` with weight `w`. It is guaranteed that vertex 1 can reach all other vertices: +{edges} + +Let's find a set of paths such that: +- Each path starts from vertex 1. According to the definition of paths, consecutive vertices in a path are connected by a directed edge (following the edge direction). +- All edges in the graph are covered by at least one path. + +Can we **minimize the total weight** of all paths, where the weight of a path is the sum of the weights of its edges? Please output K lines, where K is the number of paths you use; each line should list the vertices of one path in order (starting from 1), separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize MinPathCover_DAG_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + edges = self.parameter["edges"] = [] + topological_order = list(range(1, N + 1)) + random.shuffle(topological_order[1 :]) # Keep 1 as the first vertex + for i in range(1, N) : + t = topological_order[i] + for s in random.sample(topological_order[: i], random.randint(1, i)) : + edges.append((s, t, random.randint(1, N * (N - 1)))) + random.shuffle(edges) + + assert len(edges) == len(set((s, t) for s, t, w in edges)), "Duplicate edges detected" + + G = networkx.DiGraph() + G.add_weighted_edges_from(edges) + assert networkx.is_directed_acyclic_graph(G), "The generated graph is not a DAG" + assert all(networkx.has_path(G, 1, v) for v in range(2, N + 1)), "Vertex 1 cannot reach all other vertices" + + + # Read all edges first to compute INF based on input + A = [0] * (N + 4) # 1-based indexing; extra room for rT=N+1, vS=N+2, vT=N+3 + edges_data = [[] for _ in range(N + 4)] + total_cost_sum = 0 + M = len(edges) # total number of edges (sum of K_i) + + for i, u, t in edges : + edges_data[i].append((u, t)) + A[u] += 1 + A[i] -= 1 + total_cost_sum += t + + # Make INF depend on the input (covers both capacity sentinel and distance sentinel) + INF = total_cost_sum + M + 5 + + size = N + 4 # nodes: 1..N, rT=N+1, vS=N+2, vT=N+3 + Graph = [[] for _ in range(size)] + + class Edge: + __slots__ = ("to", "cap", "cost", "rev") + def __init__(self, to, cap, cost, rev): + self.to = to + self.cap = cap + self.cost = cost + self.rev = rev + + def add_edge(u, v, cap, cost): + Graph[u].append(Edge(v, cap, cost, len(Graph[v]))) + Graph[v].append(Edge(u, 0, -cost, len(Graph[u]) - 1)) + + rS = 1 + rT = N + 1 + vS = N + 2 + vT = N + 3 + + # Build edges as in the C++ code + for i in range(1, N + 1): + for (u, t) in edges_data[i]: + add_edge(i, u, INF - 1, t) + + for i in range(2, N + 1): + add_edge(i, rT, INF, 0) + + for i in range(1, N + 1): + if A[i] > 0: + add_edge(vS, i, A[i], 0) + elif A[i] < 0: + add_edge(i, vT, -A[i], 0) + + add_edge(rT, rS, INF, 0) + + S = vS + T = vT + + Dist = [0] * size + Cur = [0] * size + InQ = [False] * size + Vis = [False] * size + + # ret starts as the sum of all edge costs, then augmented during flow as in the original code + ret = total_cost_sum + + def spfa(): + for i in range(size): + Dist[i] = INF + InQ[i] = False + Dist[S] = 0 + q = deque([S]) + InQ[S] = True + while q: + u = q.popleft() + InQ[u] = False + for e in Graph[u]: + if e.cap > 0 and Dist[e.to] > Dist[u] + e.cost: + Dist[e.to] = Dist[u] + e.cost + if not InQ[e.to]: + InQ[e.to] = True + q.append(e.to) + return Dist[T] < INF + + def dfs(x, f): + nonlocal ret + if x == T: + return f + Vis[x] = True + flow = 0 + i = Cur[x] + while i < len(Graph[x]) and flow < f: + Cur[x] = i + e = Graph[x][i] + v = e.to + if (not Vis[v]) and e.cap > 0 and Dist[v] == Dist[x] + e.cost: + pushed = dfs(v, min(e.cap, f - flow)) + if pushed: + ret += pushed * e.cost + e.cap -= pushed + Graph[v][e.rev].cap += pushed + flow += pushed + i += 1 + Vis[x] = False + return flow + + def dinic(): + total = 0 + while spfa(): + for i in range(size): + Cur[i] = 0 + Vis[i] = False + while True: + pushed = dfs(S, INF) + if pushed == 0: + break + total += pushed + return total + + dinic() + self.parameter["gold_answer"] = ret + assert self.parameter["gold_answer"] > 0 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + edges = "\n".join("({}, {}, {})".format(s, t, w) for s, t, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[List[int]]] : + if answer is not None : + answer = answer.strip() + try : + paths = [] + for line in answer.splitlines() : + line = line.strip() + if line : + paths.append(list(map(int, line.split()))) + return paths + except : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + edges = {(s, t) : False for s, t, w in self.parameter["edges"]} + edge2weight = {(s, t) : w for s, t, w in self.parameter["edges"]} + gold, answer = self.parameter["gold_answer"], 0 + for path in processed_result : + if not path : + return self.rewards["invalid_solution"] + if path[0] != 1 : + return self.rewards["invalid_solution"] + for i in range(len(path) - 1) : + s = path[i] + t = path[i + 1] + if (s, t) in edges : + edges[(s, t)] = True + answer += edge2weight[(s, t)] + else : + return self.rewards["invalid_solution"] + + if not all(edges.values()) : + return self.rewards["unsuccessful_solution"] + + assert 0 < gold <= answer, "gold should be less than or equal to answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_sum_chebyshev_distance/__init__.py b/server/Gym/environments/min_sum_chebyshev_distance/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c4d9e481a43e7cd0e0e24d599259d1741a34c002 --- /dev/null +++ b/server/Gym/environments/min_sum_chebyshev_distance/__init__.py @@ -0,0 +1 @@ +from .environment import MinSumChebyshevDistance_Environment diff --git a/server/Gym/environments/min_sum_chebyshev_distance/environment.py b/server/Gym/environments/min_sum_chebyshev_distance/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..23bb36febafce343979b007d97b5d9955ae30a7b --- /dev/null +++ b/server/Gym/environments/min_sum_chebyshev_distance/environment.py @@ -0,0 +1,145 @@ +import random +from typing import Optional, Tuple +from ...environment import VerifiableEnvironment + + +class MinSumChebyshevDistance_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given three arrays X, Y, and T, each of length {N}: +{X_Y_T} + +Please find an integer point (x, y) such that the following sum is minimized: sum over 0 <= i < {N} of max(|x - X[i]|, |y - Y[i]|) * T[i]. Output a single line containing two integers x and y (separated by a space), representing the optimal point.""" + + def __init__(self, + wrong_format : float = -1.0,rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinSumChebyshevDistance_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + X, Y, T = self.parameter["X"], self.parameter["Y"], self.parameter["T"] = [random.randint(1, 2 * N) for _ in range(N)], [random.randint(1, 2 * N) for _ in range(N)], [random.randint(1, N) for _ in range(N)] + + + # A and B for rotated coordinates, C for original points + A = [] # list of [x_rot, count] + B = [] # list of [y_rot, count] + C = [] # list of (u, v, count) + + for u, v, t in zip(X, Y, T): + x_rot = u + v + y_rot = u - v + A.append([x_rot, t]) + B.append([y_rot, t]) + C.append((u, v, t)) + + # Sort by rotated coordinates + A.sort(key=lambda item: item[0]) + B.sort(key=lambda item: item[0]) + + def weighted_median(arr): + """ + Find weighted median of sorted array arr where each element is [coord, weight]. + Uses two-pointer elimination to find a coordinate where cumulative weight + is balanced. + """ + l, r = 0, len(arr) - 1 + while l < r: + if arr[l][1] < arr[r][1]: + arr[r][1] -= arr[l][1] + l += 1 + elif arr[l][1] > arr[r][1]: + arr[l][1] -= arr[r][1] + r -= 1 + else: + # equal weights, eliminate both + l += 1 + r -= 1 + return arr[l][0] + + # Compute medians in rotated space + posx = weighted_median(A) + posy = weighted_median(B) + + # Convert back to original coordinates (truncate towards zero) + xx = int((posx + posy) / 2) + yy = int((posx - posy) / 2) + + # Check the four nearest integer points + candidates = [ + (xx, yy), + (xx + 1, yy), + (xx, yy + 1), + (xx + 1, yy + 1) + ] + + best_cost = None + best_point = (xx, yy) + + for x, y in candidates: + cost = 0 + for u, v, t in C: + # Chebyshev distance * count + cost += max(abs(x - u), abs(y - v)) * t + if best_cost is None or cost < best_cost: + best_cost = cost + best_point = (x, y) + + # Output the optimal warehouse position + x, y = best_point[0], best_point[1] + + self.parameter["reference_answer"] = "{} {}".format(x, y) + self.parameter["gold_answer"] = sum(max(abs(x - Xi), abs(y - Yi)) * Ti for Xi, Yi, Ti in zip(X, Y, T)) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + X_Y_T = "\n".join("X[{}]={} Y[{}]={} T[{}]={}".format(i, Xi, i, Yi, i, Ti) for i, (Xi, Yi, Ti) in enumerate(zip(self.parameter["X"], self.parameter["Y"], self.parameter["T"]))), + ) + + + def _process(self, answer : Optional[str]) -> Optional[Tuple] : + if answer is not None : + answer = answer.strip() + try : + x, y = map(int, answer.split()) + return x, y + except : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + x, y = processed_result + + answer, gold = sum(max(abs(x - Xi), abs(y - Yi)) * Ti for Xi, Yi, Ti in zip(self.parameter["X"], self.parameter["Y"], self.parameter["T"])), self.parameter["gold_answer"] + assert gold <= answer, "Gold answer should be less than or equal to the answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "If answer is 0, gold should also be 0" + return self.rewards["rewarding_weight"] * 1.0 + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_sum_distance_square/__init__.py b/server/Gym/environments/min_sum_distance_square/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6d22748d128f499cbba0fc8e37a0fcd8f996648c --- /dev/null +++ b/server/Gym/environments/min_sum_distance_square/__init__.py @@ -0,0 +1 @@ +from .environment import MinSumDistanceSquare_Environment diff --git a/server/Gym/environments/min_sum_distance_square/environment.py b/server/Gym/environments/min_sum_distance_square/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..2ce38c1f59b79a976a929b863ec071932e7fa7aa --- /dev/null +++ b/server/Gym/environments/min_sum_distance_square/environment.py @@ -0,0 +1,125 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MinSumDistanceSquare_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3162 + prompt_template = \ +r"""There are {N} groups of points located on the x-axis. The coordinates of each group are given as follows: +{points} + +Your task is to choose a point X on the x-axis. For each group i (0 ≤ i < {N}), define cost[i] as the square of the minimum distance from X to any point in that group: cost[i] = (min(abs(X - x_i[j])))^2, where x_i[j] is the j-th point in group i. +Please find the value of X that minimizes the total cost, i.e., the sum of all cost[i]. + +It can be shown that there exists an optimal solution X = X' / {N}, where X' is an integer. Please output this integer X'.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinSumDistanceSquare_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def compute_toal_cost(self, X_prime : int) -> int : + # (X_prime / N - x)^2 = (X_prime - N * x)^2 / N^2 + return sum(min((X_prime - self.parameter["N"] * x) ** 2 for x in xs) for xs in self.parameter["points"]) + + + def _generate(self) -> None : + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 2, "M should be greater than or equal to 2" + + coordinatenates = [random.randint(-M, +M) for _ in range(M)] + + N = self.parameter["N"] = random.randint(2, M) + belongings = list(range(N)) + [random.randint(0, N - 1) for _ in range(M - N)] + random.shuffle(belongings) + + self.parameter["points"] = points = [[] for _ in range(N)] + for coordinate, belonging in zip(coordinatenates, belongings) : + points[belonging].append(coordinate) + + + F = [[] for _ in range(N)] # F[i] = coordinates producing part i (0-indexed) + + for p, xs in enumerate(points) : + assert len(xs) > 0, "Each group must have at least one point" + for x in xs : + F[p].append(x) + F[p].sort() # sort each group + + events = [] # consecutive-pair events + O = 0 # Σ X_i^2 + E = 0 # Σ X_i + + for lst in F: + lst.sort() + O += lst[0] * lst[0] + E += lst[0] + for j in range(1, len(lst)): + events.append((lst[j - 1], lst[j])) + + # sort by midpoint (a+b) + events.sort(key=lambda ab: ab[0] + ab[1]) + + best_value = N * O - E * E # current minimal n*O - E^2 + best_E = E + + for a, b in events: + O += b * b - a * a + E += b - a + value = N * O - E * E + if value < best_value: + best_value = value + best_E = E + + self.parameter["reference_answer"] = best_E + self.parameter["gold_answer"] = self.compute_toal_cost(best_E) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + points = "\n".join("Group {}: {}".format(i, " ".join(map(str, xs))) for i, xs in enumerate(self.parameter["points"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + gold, answer = self.parameter["gold_answer"], self.compute_toal_cost(processed_result) + assert 0 <= gold <= answer, "gold_answer should be less than or equal to answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "If answer is 0, gold should also be 0" + return self.rewards["rewarding_weight"] * 1.0 + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_sum_pre_xor/__init__.py b/server/Gym/environments/min_sum_pre_xor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..13e443afc1713a0d106e8e049d5027f19d5c78ae --- /dev/null +++ b/server/Gym/environments/min_sum_pre_xor/__init__.py @@ -0,0 +1 @@ +from .environment import MinSumPreXor_Environment diff --git a/server/Gym/environments/min_sum_pre_xor/environment.py b/server/Gym/environments/min_sum_pre_xor/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..20aabaaee9ca3f5be885fe84178f5ee12bd9d813 --- /dev/null +++ b/server/Gym/environments/min_sum_pre_xor/environment.py @@ -0,0 +1,128 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinSumPreXor_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4060 + prompt_template = \ +r"""You are given an array P of length {N}: {P} +Replace every entry P[i] that equals -1 (for 1 ≤ i ≤ {N}) with a **non-negative integer** (all other entries are fixed non-negative integers), so as to **minimize** the sum: B[1] + B[2] + ... + B[{N}], where B[1] = P[1] and for i ≥ 2, B[i] = B[i−1] XOR P[i] (XOR is the bitwise exclusive OR). Output the updated array P as {N} space-separated non-negative integers in one line.""" + + + def __init__(self, + element_range : int = 2, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinSumPreXor instance. + """ + super().__init__(**kwargs) + + self.element_range = element_range + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + P = self.parameter["P"] = [random.randint(0, self.element_range * N) for _ in range(N)] + for removed_indices in random.sample(range(N), random.randint(1, N - 1)) : + P[removed_indices] = -1 + + + A = [] + for i, ai in enumerate(P, start = 1) : + if ai != -1 : + A.append((i, ai)) + A.sort() + M = len(A) + + # Compute bit width from input instead of using a magic number. + if M > 0: + max_val = max(x for _, x in A) + BIT = max(1, max_val.bit_length()) + else: + BIT = 1 + + F = [] # per-block counts of set bits for each bit position + LEN = [] # length of each block (number of known elements inside) + tot = 0 + now = 0 + + for idx in range(M): + if idx == 0 or A[idx][0] != A[idx - 1][0] + 1: + F.append([0] * BIT) + LEN.append(0) + tot += 1 + now = 0 + now ^= A[idx][1] + for j in range(BIT): + F[tot - 1][j] += (now >> j) & 1 + LEN[tot - 1] += 1 + + ans = 0 + for i in range(tot): + if A[i][0] == 1: + for j in range(BIT): + ans += (F[i][j] << j) + else: + for j in range(BIT): + ans += (min(F[i][j], LEN[i] - F[i][j] + 1) << j) + + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + P = " ".join("P[{}]={}".format(i, Pi) for i, Pi in enumerate(self.parameter["P"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[int]] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all((previous_Pi >= 0 and now_Pi == previous_Pi) or (previous_Pi == -1 and now_Pi >= 0) for previous_Pi, now_Pi in zip(self.parameter["P"], processed_result)) : + return self.rewards["invalid_solution"] + + answer, gold = 0, self.parameter["gold_answer"] + Bi = 0 + for Pi in processed_result : + Bi ^= Pi + answer += Bi + assert 0 <= gold <= answer, "gold_answer should be non-negative and not greater than answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "If answer is 0, gold should also be 0" + return self.rewards["rewarding_weight"] * 1.0 + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_swap_two_permutations/__init__.py b/server/Gym/environments/min_swap_two_permutations/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a2bd2e1a5e35d200f63708157a05daae951783cf --- /dev/null +++ b/server/Gym/environments/min_swap_two_permutations/__init__.py @@ -0,0 +1 @@ +from .environment import MinSwapTwoPermutations_Environment diff --git a/server/Gym/environments/min_swap_two_permutations/environment.py b/server/Gym/environments/min_swap_two_permutations/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..94ef459b7e423afac0986eb469f77ff43bb75be8 --- /dev/null +++ b/server/Gym/environments/min_swap_two_permutations/environment.py @@ -0,0 +1,136 @@ +import random +from typing import Optional, List +from collections import defaultdict +from ...environment import VerifiableEnvironment + + +class MinSwapTwoPermutations_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given two arrays A and B of length {N}. Initially: +- A = {A} +- B = {B} + +Your task is to find the **minimum number of indices** i₁, i₂, ..., iₖ such that, after swapping A[i₁] with B[i₁], A[i₂] with B[i₂], ..., A[iₖ] with B[iₖ], both A and B contain **no duplicate elements**. Please output a single line containing the indices i₁, ..., iₖ, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinSwapTwoPermutations_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + A, B = self.parameter["A"], self.parameter["B"] = list(range(1, N + 1)), list(range(1, N + 1)) + while True : + random.shuffle(A) + random.shuffle(B) + swapped_indices = random.sample(range(N), random.randint(1, N - 1)) + for index in swapped_indices: + A[index], B[index] = B[index], A[index] + if not (len(set(A)) == N and len(set(B)) == N) : + break + + + # Map each height to the list of positions (where A[i] != B[i]) + p = defaultdict(list) + for i in range(N): + if A[i] != B[i]: + p[A[i]].append(i) + p[B[i]].append(i) + + # Build graph on positions 0..N-1, with edge weights 0 or 1 + graph = [[] for _ in range(N)] + for val, occ in p.items(): + if len(occ) == 2: + u, v = occ + # weight = 1 if swapping at one end preserves the "same-row" pairing, else 0 + w = 1 if (A[u] == A[v] or B[u] == B[v]) else 0 + graph[u].append((v, w)) + graph[v].append((u, w)) + + visited = [False] * N + ans = 0 + + # For each connected component, do a parity-DFS to count flips vs no-flips + for i in range(N): + if not visited[i]: + stack = [(i, 0)] + cnt = [0, 0] # cnt[0] = # nodes with parity 0, cnt[1] = # with parity 1 + while stack: + u, parity = stack.pop() + if visited[u]: + continue + visited[u] = True + cnt[parity] += 1 + for v, w in graph[u]: + if not visited[v]: + stack.append((v, parity ^ w)) + # Minimum swaps for this component is min(cnt[0], cnt[1]) + ans += min(cnt) + + assert 0 < ans <= len(swapped_indices), "The number of swaps should be between 1 and the number of swapped indices" + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + B = " ".join("B[{}]={}".format(i, Bi) for i, Bi in enumerate(self.parameter["B"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + A, B = self.parameter["A"].copy(), self.parameter["B"].copy() + + swapping_indices = processed_result + for swapping_index in swapping_indices : + if not (0 <= swapping_index < self.parameter["N"]) : + return self.rewards["invalid_solution"] + A[swapping_index], B[swapping_index] = B[swapping_index], A[swapping_index] + + if not (len(set(A)) == self.parameter["N"] and len(set(B)) == self.parameter["N"]) : + return self.rewards["unsuccessful_solution"] + + answer, gold = len(swapping_indices), self.parameter["gold_answer"] + assert 0 < gold <= answer, "gold should be less than or equal to answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/min_xor_pair/__init__.py b/server/Gym/environments/min_xor_pair/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d7219af653f7c33c400390bdfaf7db1cf05bbad6 --- /dev/null +++ b/server/Gym/environments/min_xor_pair/__init__.py @@ -0,0 +1 @@ +from .environment import MinXorPair_Environment diff --git a/server/Gym/environments/min_xor_pair/environment.py b/server/Gym/environments/min_xor_pair/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..575eb08cfbd72dcb87f57bb8cf3c0cefb1cebefc --- /dev/null +++ b/server/Gym/environments/min_xor_pair/environment.py @@ -0,0 +1,95 @@ +import random +from typing import Optional, Tuple +from ...environment import VerifiableEnvironment + + +class MinXorPair_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Given an array of length {N} (index starting from 0): +{A} + +Please find a pair of (i, j) such that 0 <= i < j < {N}, and try your best to minimize the value of (A[i] AND A[j]) XOR (A[i] OR A[j]), where `AND`, `OR`, and `XOR` denote bitwise operations. + +Your final answer should be a single line containing the two integers i and j, separated by a space. For example: `0 2` (do **NOT** include quotes or backticks) means i = 0 and j = 2.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinXorPair_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def compute(self, i, j) : + return (self.parameter["A"][i] & self.parameter["A"][j]) ^ (self.parameter["A"][i] | self.parameter["A"][j]) + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "max_bit_length" in self.parameter, "max_bit_length is required in parameter" + max_bit_length = self.parameter["max_bit_length"] + assert max_bit_length >= 1, "max_bit_length should be greater than or equal to 1" + + A = self.parameter["A"] = random.sample(range(1 << max_bit_length), N) + random.shuffle(A) + + + indices = self.parameter["indices"] = list(range(N)) + indices.sort(key = lambda x : A[x]) + + i, j, res = indices[0], indices[1], self.compute(indices[0], indices[1]) + for _i, _j in zip(indices, indices[1 :]) : + _res = self.compute(_i, _j) + if _res < res : + i, j, res = _i, _j, _res + self.parameter["reference_answer"] = "{} {}".format(min(i, j), max(i, j)) + self.parameter["gold_answer"] = res + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = "\n".join("A[{}]={}".format(index, a) for index, a in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int]] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if len(answer_array) != 2 : + return None # Invalid answer format + return answer_array[0], answer_array[1] + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + i, j = processed_result + + if not (0 <= i < j < self.parameter["N"]) : + return self.rewards["invalid_solution"] + gold, answer = self.parameter["gold_answer"], self.compute(i, j) + assert gold <= answer, "Gold answer should be less than or equal to answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minesweeping/__init__.py b/server/Gym/environments/minesweeping/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4d594745d108ea17ad623949f1b8e9d28354fbde --- /dev/null +++ b/server/Gym/environments/minesweeping/__init__.py @@ -0,0 +1 @@ +from .environment import Minesweeping_Environment diff --git a/server/Gym/environments/minesweeping/environment.py b/server/Gym/environments/minesweeping/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..6b77d49b82d2d74658816080571cbd1c714c14ea --- /dev/null +++ b/server/Gym/environments/minesweeping/environment.py @@ -0,0 +1,138 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Minesweeping_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} matrix. Each element is either a number in [0, 8] or `-1`. Your task is to construct a grid of the same size, satisfying the following conditions: +1. Each cell is either `*` or `.` +2. For any cell in the original matrix that is **NOT** `-1`, the corresponding cell in the output grid must be `.`. Also, its number must equal the number of `*` characters in its **8 neighboring cells**. + +The matrix is given in **row-major order**: +{matrix} + +**Output Format:** Output {N} lines, each containing {M} characters with no separators. Each character must be either `*` or `.`""" + + def __init__(self, + mine_density_range : tuple = (0.4, 0.7), + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the Minesweeping_Environment instance. + """ + super().__init__(**kwargs) + + self.mine_density_range = mine_density_range + assert len(mine_density_range) == 2 and 0.0 < mine_density_range[0] < mine_density_range[1] < 1.0, "mine_density_range should be a tuple of two floats in (0, 1)" + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + self.parameter["grid"] = grid = [["."] * M for _ in range(N)] + + mine_density = random.uniform(self.mine_density_range[0], self.mine_density_range[1]) + mine_cells = random.sample(range(N * M), max(1, min(int(N * M * mine_density), N * M - 1))) + for cell in mine_cells : + row, column = divmod(cell, M) + grid[row][column] = "*" + self.parameter["reference_answer"] = "\n".join("".join(row) for row in grid) + + empty_cells = [(i, j) for i in range(N) for j in range(M) if grid[i][j] == "."] + assert len(empty_cells) >= 1, "There should be at least one empty cell" + assert "density" in self.parameter, "density is required in parameter" + density = self.parameter["density"] + assert 0 < density < 1, "density should be between 0 and 1" + empty_cells = random.sample(empty_cells, max(1, int(len(empty_cells) * density))) + for i, j in empty_cells : + counting = 0 + for di in (-1, 0, +1) : + for dj in (-1, 0, +1) : + ni, nj = i + di, j + dj + if 0 <= ni < N and 0 <= nj < M and grid[ni][nj] == "*" : + counting += 1 + grid[i][j] = counting + + for i in range(N) : + for j in range(M) : + if grid[i][j] in (".", "*") : + grid[i][j] = -1 + else : + assert 0 <= grid[i][j] <= 8 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + matrix = "\n".join(" ".join(map(str, row)) for row in self.parameter["grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(line.strip()) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N, M = self.parameter["N"], self.parameter["M"] + solution = processed_result + + if len(solution) != N or any(len(row) != M for row in solution) : + return self.rewards["wrong_format"] + if not all(all(c in "*." for c in row) for row in solution) : + return self.rewards["wrong_format"] + + satisfied, total = 0, 0 + for i in range(N) : + for j in range(M) : + if self.parameter["grid"][i][j] != -1 : + if solution[i][j] != "." : + return self.rewards["invalid_solution"] + counting = 0 + for di in (-1, 0, +1) : + for dj in (-1, 0, +1) : + if di == 0 and dj == 0 : + continue + ni, nj = i + di, j + dj + if 0 <= ni < N and 0 <= nj < M and solution[ni][nj] == "*" : + counting += 1 + assert 0 <= counting <= 8, "counting should be between 0 and 8" + total += 1 + satisfied += int(counting == self.parameter["grid"][i][j]) + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / total) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == total) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimal_cyclic_shift/__init__.py b/server/Gym/environments/minimal_cyclic_shift/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4d16cedef4280da7702cad5e41e182919f8d12a8 --- /dev/null +++ b/server/Gym/environments/minimal_cyclic_shift/__init__.py @@ -0,0 +1 @@ +from .environment import MinimalCyclicShift_Environment diff --git a/server/Gym/environments/minimal_cyclic_shift/environment.py b/server/Gym/environments/minimal_cyclic_shift/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..04cd7ce511c35902e1a468bdb8e162c48f6e3b3c --- /dev/null +++ b/server/Gym/environments/minimal_cyclic_shift/environment.py @@ -0,0 +1,83 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MinimalCyclicShift_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Here is a binary string S of length {N}: {S} +You may perform any number of cyclic shifts on S, where one shift moves the leftmost character to the rightmost position. Output the lexicographically smallest string obtainable after any number of shifts.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the MinimalCyclicShift_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + one_probability = random.random() + S = self.parameter["S"] = "".join(str(int(random.random() < one_probability)) for _ in range(N)) + + + i, j, k = 0, 1, 0 + while i < N and j < N and k < N: + c1 = S[(i + k) % N] + c2 = S[(j + k) % N] + if c1 == c2: + k += 1 + else: + if c1 > c2: + i += k + 1 + else: + j += k + 1 + if i == j: + i += 1 + k = 0 + + start = min(i, j) + ans = ''.join(S[(start + t) % N] for t in range(N)) + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], S = self.parameter["S"]) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + answer = answer.strip() + return answer + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if len(processed_result) != self.parameter["N"] : + return self.rewards["wrong_format"] + if not all(c in "01" for c in processed_result) : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["reference_answer"], processed_result)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_chromatic_number/__init__.py b/server/Gym/environments/minimum_chromatic_number/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0a0dd54d7a86a2953adf6929929ee099cd0d7be7 --- /dev/null +++ b/server/Gym/environments/minimum_chromatic_number/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumChromaticNumber_Environment diff --git a/server/Gym/environments/minimum_chromatic_number/environment.py b/server/Gym/environments/minimum_chromatic_number/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b1f8969add07f6ebbac5e5586a48bdf9773920ae --- /dev/null +++ b/server/Gym/environments/minimum_chromatic_number/environment.py @@ -0,0 +1,124 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinimumChromaticNumber_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. +The graph contains the following undirected edges: +{edges} + +Your task is to assign a **non-negative integer color** to each vertex, represented as `c[0], c[1], ..., c[{N_minus_1}]`, such that: +- For every edge `(u, v)` in the graph, `c[u] ≠ c[v]` — adjacent vertices must have different colors. +- The total number of **distinct colors used** (i.e., the number of unique values among `c[0]` to `c[{N_minus_1}]`) is **minimized** - try your best to find a valid coloring using as few colors as possible. + +**Output Format:** +Your final answer should be a single line containing the color of each vertex in order: `c[0], c[1], ..., c[{N_minus_1}]`, separated by **spaces**. +Example: `0 1 0 2` (do **NOT** include the backticks or quotes); this means vertex 0 is assigned color 0, vertex 1 color 1, vertex 2 color 0, and vertex 3 color 2 (assuming 4 vertices in total). +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinimumChromaticNumber_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 1" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = random.sample([(u, v) for u in range(N) for v in range(u + 1, N)], int(edge_density * N * (N - 1) / 2)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + + self.parameter["reference_answer"] = list(range(N)) + self.parameter["gold_answer"] = N + + adjacent = [0] * N + for u, v in edges : + adjacent[u] |= 1 << v + adjacent[v] |= 1 << u + + colors, color2set = [None] * N, [0] * N + def DFS(u : int, max_color : int) -> int : + if max_color + 1 >= self.parameter["gold_answer"] : + return + if u == N : + self.parameter["reference_answer"], self.parameter["gold_answer"] = colors.copy(), max_color + 1 + return + for color in range((max_color + 1) + 1) : + if (color2set[color] & adjacent[u]) == 0 : + colors[u] = color + color2set[color] += 1 << u + DFS(u + 1, max(max_color, color)) + color2set[color] -= 1 << u + DFS(0, -1) + + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + colors = processed_result + if len(colors) != self.parameter["N"] : + return self.rewards["invalid_solution"] + for u, v in self.parameter["edges"] : + if colors[u] == colors[v] : + return self.rewards["invalid_solution"] + + gold, answer = self.parameter["gold_answer"], len(set(colors)) + assert gold <= answer, "gold should be less than or equal to answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_chromatic_number_segment_overlap/__init__.py b/server/Gym/environments/minimum_chromatic_number_segment_overlap/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1466b201114c6db3ed56918051ec509b214105bb --- /dev/null +++ b/server/Gym/environments/minimum_chromatic_number_segment_overlap/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumChromaticNumber_SegmentOverlap_Environment diff --git a/server/Gym/environments/minimum_chromatic_number_segment_overlap/environment.py b/server/Gym/environments/minimum_chromatic_number_segment_overlap/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8beb403e861acaa6e4a6d20d35d3ecf4c0cb0717 --- /dev/null +++ b/server/Gym/environments/minimum_chromatic_number_segment_overlap/environment.py @@ -0,0 +1,133 @@ +import heapq +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinimumChromaticNumber_SegmentOverlap_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2859 + prompt_template = \ +r"""There are {N} segments (closed intervals) on the x-axis, labeled from `0` to `{N_minus_1}`: +{segments} + +Your task is to assign a **non-negative integer color** to each segment, represented as `c[0], c[1], ..., c[{N_minus_1}]`, such that: +- If segment `u` and segment `v` overlap (i.e., they share at least one point), then `c[u] ≠ c[v]`. +- The total number of **distinct colors used** (i.e., unique values among `c[0]` to `c[{N_minus_1}]`) is **minimized**. + +**Output Format:** A single line containing the color of each segment in order: `c[0] c[1] ... c[{N_minus_1}]` (separated by spaces). +Example: `0 1 0 2` means segment 0 has color 0, segment 1 has color 1, segment 2 has color 0, and segment 3 has color 2 (assuming 4 segments in total).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinimumChromaticNumber_SegmentOverlap_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + answer_upperbound = random.randint(2, N) + segment_numbers = random.sample(range(1, N + 1), k = answer_upperbound - 1) + segment_numbers.sort() + segment_numbers += [N] + for i in range(len(segment_numbers) - 1, 0, -1) : + segment_numbers[i] -= segment_numbers[i - 1] + + segments = self.parameter["segments"] = [] + for segment_number in segment_numbers : + endpoints = random.choices(range(1, 2 * N), k = 2 * segment_number) + endpoints.sort() + for i in range(0, len(endpoints), 2) : + l = endpoints[i] + r = endpoints[i + 1] + segments.append((l, r)) + random.shuffle(segments) + assert len(segments) == N, "len(segments) should be equal to N" + + + segs = [] + for i, (a, b) in enumerate(segments): + segs.append((a, b, i)) # (start, end, original_index) + + # Sort by start time + segs.sort(key=lambda x: x[0]) + + # Min-heap of (end_time, stall_id) + heap = [] + next_stall_id = 0 + assignment = [0] * N # assignment[i] = stall id for cow i (1-based ids) + + for l, r, idx in segs: + if heap and heap[0][0] < l: + # Reuse the earliest finishing stall + _, stall_id = heapq.heappop(heap) + else: + # Need a new stall + next_stall_id += 1 + stall_id = next_stall_id + + assignment[idx] = stall_id + heapq.heappush(heap, (r, stall_id)) + + self.parameter["gold_answer"] = next_stall_id + self.parameter["reference_answer"] = " ".join(map(str, assignment)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + segments = "\n".join("Segment {}: [{}, {}]".format(i, l, r) for i, (l, r) in enumerate(self.parameter["segments"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + colors = processed_result + if len(colors) != self.parameter["N"] : + return self.rewards["invalid_solution"] + def overlap(seg1, seg2) -> bool : + return max(seg1[0], seg2[0]) <= min(seg1[1], seg2[1]) + for u in range(self.parameter["N"]) : + for v in range(u + 1, self.parameter["N"]) : + if overlap(self.parameter["segments"][u], self.parameter["segments"][v]) and colors[u] == colors[v] : + return self.rewards["invalid_solution"] + + gold, answer = self.parameter["gold_answer"], len(set(colors)) + assert gold <= answer, "gold should be less than or equal to answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_cost_maximum_flow/__init__.py b/server/Gym/environments/minimum_cost_maximum_flow/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b3acd883e8b0e5b3ff843380d9f4f8fc17210226 --- /dev/null +++ b/server/Gym/environments/minimum_cost_maximum_flow/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumCost_MaximumFlow_Environment diff --git a/server/Gym/environments/minimum_cost_maximum_flow/environment.py b/server/Gym/environments/minimum_cost_maximum_flow/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f6825ef9a43e2021e3b7b3820f9c4e3be05a2329 --- /dev/null +++ b/server/Gym/environments/minimum_cost_maximum_flow/environment.py @@ -0,0 +1,201 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinimumCost_MaximumFlow_Environment(VerifiableEnvironment): + prompt_template = \ +r"""You are given a **directed graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. The source vertex is `0` and the sink vertex is `{N_minus_1}`. + +The graph contains the following directed edges. Each edge is represented as a tuple `(s, t, c, w)`, meaning a directed edge **from vertex `s` to vertex `t` with positive capacity `c` and positive cost `w`**: +{edges} + +Your task is to find a **maximum flow** from source to sink that has the **minimum possible total cost**. A valid flow must satisfy these conditions: +1. The flow through each edge (which should not be negative) must not exceed its capacity +2. For each vertex (except source and sink), the total incoming flow must equal the total outgoing flow +3. The total flow leaving the source must be equal to the total flow entering the sink + +Among all possible maximum flows (flows that satisfy the above conditions and maximize the total flow from source to sink), you need to find the one with minimum total cost. The total cost is the sum of (flow x cost) for each edge. + +**Output Format:** +Your final answer should be a single line containing the flow values for each edge in the same order as they appear above, separated by **spaces**. +Example: `1 2 0 3` (do **NOT** include the backticks or quotes); this means the first edge has flow 1, second edge has flow 2, third edge has flow 0, and fourth edge has flow 3.""" + + def __init__(self, + max_capacity: int = 10, max_cost: int = 10, + wrong_format: float = -1.0, invalid_solution: float = -0.5, + rewarding_strategy_flow: str = "(answer/gold)^beta", rewarding_weight_flow: float = +0.5, rewarding_beta_flow: float = 5.0, + rewarding_strategy_cost: str = "(gold/answer)^beta", rewarding_weight_cost: float = +0.5, rewarding_beta_cost: float = 5.0, + **kwargs): + """ + Initialize the MaxFlow_Environment instance. + """ + super().__init__(**kwargs) + + self.max_capacity = max_capacity + self.max_cost = max_cost + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy_flow": rewarding_strategy_flow, + "rewarding_weight_flow": rewarding_weight_flow, + "rewarding_beta_flow": rewarding_beta_flow, + "rewarding_strategy_cost": rewarding_strategy_cost, + "rewarding_weight_cost": rewarding_weight_cost, + "rewarding_beta_cost": rewarding_beta_cost, + } + + + def _generate(self) -> None: + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + # Generate edges + edges = self.parameter["edges"] = [] + + # First ensure there's at least one path from source to sink with sufficient capacity + path_length = random.randint(2, min(5, N - 1)) + path = [0] + random.sample(range(1, N - 1), path_length - 1) + [N - 1] + for i in range(len(path) - 1): + s, t = path[i], path[i + 1] + assert s != t + capacity = random.randint(self.max_capacity // 2, self.max_capacity) # Ensure good capacity + cost = random.randint(1, self.max_cost) + edges.append((s, t, capacity, cost)) + + # Add remaining edges randomly, ensuring the graph is well-connected + num_edges = int(edge_density * N * (N - 1)) + if len(edges) < num_edges: + remaining_edges = list(set((s, t) for s in range(N) for t in range(N) if s != t and t != 0 and s != N - 1) - set((s, t) for s, t, c, w in edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + for s, t in remaining_edges: + capacity = random.randint(1, self.max_capacity) + cost = random.randint(1, self.max_cost) + edges.append((s, t, capacity, cost)) + random.shuffle(edges) + + for s, t, c, w in edges : + assert 0 <= s < N and s != N - 1, "Source vertex out of bounds" + assert 0 <= t < N and t != 0, "Target vertex out of bounds" + assert s != t, "Source and target vertices must be different" + assert c > 0, "Capacity must be positive" + assert w > 0, "Cost must be positive" + assert len(edges) == len(set((s, t) for s, t, c, w in edges)), "Edges must be unique" + + + # Create networkx graph and compute max flow min cost + G = networkx.DiGraph() + # Add all nodes first + for v in range(N): + G.add_node(v) + for s, t, c, w in edges: + G.add_edge(s, t, capacity=c, weight=w) + + # Compute max flow min cost in one step + flow_dict = networkx.max_flow_min_cost(G, 0, N - 1) + + # Store reference answer + reference_flows = [] + for edge in edges: + s, t = edge[0], edge[1] + flow = flow_dict[s][t] if t in flow_dict[s] else 0 + reference_flows.append(flow) + self.parameter["reference_answer"] = " ".join(map(str, reference_flows)) + + total_flow = sum(flow_dict[0][t] for t in flow_dict[0]) # Total flow from source + total_cost = sum(flow_dict[s][t] * G[s][t]['weight'] for s in flow_dict for t in flow_dict[s]) + assert total_flow > 0 and total_cost > 0 + self.parameter["gold_answer"] = {"flow" : total_flow, "cost": total_cost} + + + def _prompt_generate(self) -> str: + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {}, {})".format(s, t, c, w) for s, t, c, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output: str) -> float: + processed_result = self.processor(output) + if processed_result is not None: + assert isinstance(processed_result, list), "processed_result should be a list" + + flows = processed_result + if len(flows) != len(self.parameter["edges"]): + return self.rewards["wrong_format"] + + # Check if flows are valid + N = self.parameter["N"] + + # Initialize flow arrays for each vertex + in_flows = [0] * N + out_flows = [0] * N + + # Check flows and compute vertex flows in one pass + for i, (s, t, capacity, cost) in enumerate(self.parameter["edges"]): + flow = flows[i] + # Check if flow is valid + if not (0 <= flow <= capacity): + return self.rewards["invalid_solution"] + + # Update vertex flows + out_flows[s] += flow + in_flows[t] += flow + + # Check flow conservation at intermediate vertices + for v in range(N): + if v == 0 or v == N - 1: + continue + if in_flows[v] != out_flows[v]: + return self.rewards["invalid_solution"] + + # Check flow balance between source and sink + if out_flows[0] != in_flows[N - 1]: + return self.rewards["invalid_solution"] + + + reward = 0.0 + + total_flow, gold_flow = out_flows[0], self.parameter["gold_answer"]["flow"] + assert total_flow <= gold_flow, "Total flow from source exceeds gold flow" + if self.rewards["rewarding_strategy_flow"] == "(answer/gold)^beta": + reward += self.rewards["rewarding_weight_flow"] * ((total_flow / gold_flow) ** self.rewards["rewarding_beta_flow"]) + elif self.rewards["rewarding_strategy_flow"] == "gold=answer": + reward += self.rewards["rewarding_weight_flow"] * (total_flow == gold_flow) + else : + raise NotImplementedError(f"Unknown rewarding strategy: {self.rewards['rewarding_strategy_flow']}") + + if total_flow == gold_flow: + total_cost, gold_cost = sum(flows[i] * cost for i, (_, _, _, cost) in enumerate(self.parameter["edges"])), self.parameter["gold_answer"]["cost"] + assert gold_cost <= total_cost, "Total cost exceeds gold cost" + if self.rewards["rewarding_strategy_cost"] == "(gold/answer)^beta": + reward += self.rewards["rewarding_weight_cost"] * ((gold_cost / total_cost) ** self.rewards["rewarding_beta_cost"]) + elif self.rewards["rewarding_strategy_cost"] == "gold=answer": + reward += self.rewards["rewarding_weight_cost"] * (total_cost == gold_cost) + else : + raise NotImplementedError(f"Unknown rewarding strategy: {self.rewards['rewarding_strategy_cost']}") + + return reward + else: + return self.rewards["wrong_format"] diff --git a/server/Gym/environments/minimum_crossing_edges_graph_partition/__init__.py b/server/Gym/environments/minimum_crossing_edges_graph_partition/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a883d9e9fa7d9e252e20ef1bcc7bf89b9f1d6046 --- /dev/null +++ b/server/Gym/environments/minimum_crossing_edges_graph_partition/__init__.py @@ -0,0 +1 @@ +from .environment import Minimum_CrossingEdges_GraphPartition_Environment diff --git a/server/Gym/environments/minimum_crossing_edges_graph_partition/environment.py b/server/Gym/environments/minimum_crossing_edges_graph_partition/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b207d8aa568f25da4e1ca5086fed4efab3cb1236 --- /dev/null +++ b/server/Gym/environments/minimum_crossing_edges_graph_partition/environment.py @@ -0,0 +1,132 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Minimum_CrossingEdges_GraphPartition_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices labeled from `0` to `{N_minus_1}`. The graph contains the following undirected edges: +{edges} + +Partition all vertices into {K} **non-empty** sets, such that each vertex belongs to exactly one set. +Try your best to **minimize the number of crossing edges** — an edge `(u, v)` is considered crossing if `u` and `v` are in different sets. + +**Output Format:** Output a list of {N} integers (separated by space), where the `i`-th integer is the index of the set (from `0` to `{K_minus_1}`) that vertex `i` belongs to.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Minimum_CrossingEdges_GraphPartition_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + K = self.parameter["K"] = random.randint(2, N - 1) + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = random.sample([(u, v) for u in range(N) for v in range(u + 1, N)], int(edge_density * N * (N - 1) / 2)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + + internal_edges = [0] * (1 << N) + for u, v in edges : + remaining_S = ((1 << N) - 1) - (1 << u) - (1 << v) + S = remaining_S + while True : + internal_edges[S + (1 << u) + (1 << v)] += 1 + if S == 0 : + break + S = (S - 1) & remaining_S + + F = [None] * (1 << N) + F[0] = 0 + for k in range(K) : + G = [None] * (1 << N) + for S in range(1 << N) : + if F[S] is None : + continue + S_complement = ((1 << N) - 1) - S + T = S_complement + while T : + if G[S + T] is None : + G[S + T] = F[S] + internal_edges[T] + else : + G[S + T] = max(G[S + T], F[S] + internal_edges[T]) + T = (T - 1) & S_complement + F = G + + self.parameter["gold_answer"] = len(edges) - F[(1 << N) - 1] + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + K = self.parameter["K"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + K = K, + K_minus_1 = K - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(0 <= x < self.parameter["K"] for x in processed_result) : + return self.rewards["invalid_solution"] + if len(set(processed_result)) != self.parameter["K"] : + return self.rewards["invalid_solution"] + + gold, answer = self.parameter["gold_answer"], 0 + for u, v in self.parameter["edges"] : + if processed_result[u] != processed_result[v] : + answer += 1 + assert gold <= answer, "gold_answer should be less than or equal to answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + return self.rewards["rewarding_weight"] + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_directed_spanning_tree/__init__.py b/server/Gym/environments/minimum_directed_spanning_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1820901168e2e03bb0760f40d72250a6f4fcede5 --- /dev/null +++ b/server/Gym/environments/minimum_directed_spanning_tree/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumDirectedSpanningTree_Environment diff --git a/server/Gym/environments/minimum_directed_spanning_tree/environment.py b/server/Gym/environments/minimum_directed_spanning_tree/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8a728befccb76e6af1636879548eb043890d43ab --- /dev/null +++ b/server/Gym/environments/minimum_directed_spanning_tree/environment.py @@ -0,0 +1,168 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinimumDirectedSpanningTree_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a **directed graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. + +The graph contains the following directed edges. Each edge is represented as a tuple `(s, t, w)`, meaning a directed edge **from vertex `s` to vertex `t` with weight `w`**: +{edges} + +Your task is to select a subset of edges `T = [(s_1, t_1, w_1), (s_2, t_2, w_2), ..., (s_k, t_k, w_k)]` such that: +- k = {N} - 1 = {N_minus_1} (i.e., you select exactly {N_minus_1} edges). +- The selected edges form a **spanning arborescence rooted at vertex {root}** — meaning: + - All vertices are reachable from vertex `{root}`. + - Each vertex other than `{root}` has exactly one incoming edge. + - The selected edges form no cycles. +- Your goal is to **minimize** the total weight of the selected edges: `w_1 + w_2 + ... + w_k`. + +**Output Format:** +Your final answer should be a single line containing the endpoints of the selected edges in order: `s_1 t_1 s_2 t_2 ... s_k t_k`, separated by **spaces**. +Example: `0 1 0 2 2 3` (do **NOT** include the backticks or quotes); this means the arborescence includes edges `(0, 1)`, `(0, 2)`, and `(2, 3)` (assuming 4 vertices in total and root = 0).""" + + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinimumSpanningTree_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + while True : + edges = self.parameter["edges"] = [] + + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + t, s = vertex, random.choice(permutations[: index]) + edges.append((s, t, random.randint(1, max(1, int(edge_density * N * (N - 1)))))) + root = self.parameter["root"] = permutations[0] + + num_edges = int(edge_density * N * (N - 1)) + if len(edges) < num_edges : + remaining_edges = list(set((s, t) for s in range(N) for t in range(N) if s != t) - set((s, t) for s, t, w in edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + for s, t in remaining_edges : + edges.append((s, t, random.randint(1, max(1, int(edge_density * N * (N - 1)))))) + random.shuffle(edges) + + for s, t, w in edges : + assert 0 <= s < N and 0 <= t < N, "s and t should be in range [0, N)" + assert s != t + assert len(edges) == len(set((s, t) for s, t, w in edges)), "edges should be unique" + + try : + G = networkx.DiGraph() + G.add_weighted_edges_from(edges + [(self.parameter["N"], root, 0)]) + msa = networkx.minimum_spanning_arborescence(G) + self.parameter["reference_answer"] = " ".join("{} {}".format(s, t) for s, t in msa.edges() if (s, t) != (self.parameter["N"], root)) + self.parameter["gold_answer"] = sum(msa[s][t]["weight"] for s, t in msa.edges()) + assert self.parameter["gold_answer"] > 0, "The gold answer should be greater than 0" + break + except : # There might a bug in networkx.minimum_spanning_arborescence + continue + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(s, t, w) for s, t, w in self.parameter["edges"]), + root = self.parameter["root"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + msa = processed_result + if len(msa) % 2 != 0 : + return self.rewards["wrong_format"] + msa = [(msa[i], msa[i + 1]) for i in range(0, len(msa), 2)] + + if len(msa) != self.parameter["N"] - 1 : + return self.rewards["invalid_solution"] + if not ((set(s for s, t in msa) | set(t for s, t in msa)) == set(range(self.parameter["N"]))) : + return self.rewards["invalid_solution"] + + adjacent_list = [[] for s in range(self.parameter["N"])] + for s, t in msa : + assert 0 <= s < self.parameter["N"] and 0 <= t < self.parameter["N"], "s and t should be in range [0, N)" + if s == t : + return self.rewards["invalid_solution"] + adjacent_list[s].append(t) + + visited = [False] * self.parameter["N"] + def DFS(vertex : int) -> bool : + for neighbor in adjacent_list[vertex] : + if visited[neighbor] : + return False + visited[neighbor] = True + if not DFS(neighbor) : + return False + return True + visited[self.parameter["root"]] = True + if not DFS(self.parameter["root"]) : + return self.rewards["invalid_solution"] + if not all(visited) : + return self.rewards["invalid_solution"] + + G = networkx.DiGraph() + G.add_nodes_from(range(self.parameter["N"] + 1)) + G.add_edges_from(msa + [(self.parameter["N"], self.parameter["root"])]) + assert networkx.is_arborescence(G) + + edges = {(s, t) : w for s, t, w in self.parameter["edges"]} + answer_weight = 0 + for s, t in msa : + if (s, t) not in edges : + return self.rewards["invalid_solution"] + answer_weight += edges[(s, t)] + assert self.parameter["gold_answer"] <= answer_weight, "answer_weight should be greater than or equal to gold_answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((self.parameter["gold_answer"] / answer_weight) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == answer_weight) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_dominating_interval/__init__.py b/server/Gym/environments/minimum_dominating_interval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1ec1ba053d4196f6bf6a92f9daf5e5de0f6576ef --- /dev/null +++ b/server/Gym/environments/minimum_dominating_interval/__init__.py @@ -0,0 +1 @@ +from .environment import Minimum_DominatingInterval_Environment \ No newline at end of file diff --git a/server/Gym/environments/minimum_dominating_interval/environment.py b/server/Gym/environments/minimum_dominating_interval/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..4f82837d5be032022d123233f77dbd63d2ec06de --- /dev/null +++ b/server/Gym/environments/minimum_dominating_interval/environment.py @@ -0,0 +1,158 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Minimum_DominatingInterval_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""There are {N} points labeled 1 through {N} on a line. You are given {M} intervals [L[i], R[i]] (1 <= L[i] <= R[i] <= {N}), each with a cost C[i]: +{intervals} + +Please select {K} distinct points such that each selected point is **covered by at least one** of the intervals. +The cost of a selection is the sum of the costs (C[i]) of all intervals that cover at least one of the selected points. +Try your best to minimize the total cost of the selection. + +**Output Format:** Your final answer should be a single line containing the {K} selected points, separated by spaces. Example: {first_K_points} (do **NOT** include quotes or backticks).""" + + def __init__(self, + cost_range : int = 10, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Minimum_DominatingInterval_Environment instance. + """ + super().__init__(**kwargs) + + self.cost_range = cost_range + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 2, "M should be greater than or equal to 2" + + all_intervals = [(l, r, random.randint(1, self.cost_range)) for l in range(1, N + 1) for r in range(l, N + 1)] + assert len(all_intervals) == (N * (N + 1) // 2) + intervals = self.parameter["intervals"] = random.sample(all_intervals, min(len(all_intervals), M)) + + assert "K_density" in self.parameter, "K_density is required in parameter" + K_density = self.parameter["K_density"] + assert 0.0 <= K_density <= 1.0, "K_density should be between 0.0 and 1.0" + def full_point_set_size() -> int : + dominated = set() + for interval in self.parameter["intervals"] : + Li, Ri = interval[0], interval[1] + dominated.update(range(Li, Ri + 1)) + return len(dominated) + K = self.parameter["K"] = max(1, int(K_density * full_point_set_size())) + + + L, R, C = zip(*intervals) + + Sum_Ci = [[0] * (N + 1) for l in range(N + 1)] + for i in range(M) : + Li, Ri, Ci = L[i], R[i], C[i] + Sum_Ci[Li][Ri] = Sum_Ci[Li][Ri] + Ci + for l in range(1, N + 1) : + for r in range(N - 1, 0, -1) : + Sum_Ci[l][r] += Sum_Ci[l][r + 1] + + dpF = [[None] * (N + 1) for k in range(0, K + 1)] + dpG = [[None] * (N + 1) for k in range(0, K + 1)] + for i in range(1, N + 1) : + if not any (Li <= i and i <= Ri for Li, Ri in zip(L, R)) : + continue + dpF[1][i] = 0 + for l in range(1, i + 1) : + dpF[1][i] += Sum_Ci[l][i] + for k in range(2, K + 1) : + for i in range(1, N + 1) : + if not any (Li <= i and i <= Ri for Li, Ri in zip(L, R)) : + continue + Sum = 0 + for j in range(i, 0, -1) : + Sum += Sum_Ci[j][i] + if dpF[k - 1][j - 1] is not None : + val = dpF[k - 1][j - 1] + Sum + if dpF[k][i] is None or val < dpF[k][i] : + dpF[k][i] = val + dpG[k][i] = j - 1 + + last = None + for i in range(1, N + 1) : + if dpF[K][i] is None : + continue + if dpF[K][i] is not None and (last is None or dpF[K][i] < dpF[K][last]) : + last = i + pickeds = [] + for k in range(K, 0, -1) : + assert last is not None + pickeds.append(last) + last = dpG[k][last] + assert last is None + pickeds.reverse() + + self.parameter["reference_answer"] = " ".join(map(str, pickeds)) + self.parameter["gold_answer"] = sum(C[i] for i in range(M) if any(L[i] <= picked and picked <= R[i] for picked in pickeds)) + assert self.parameter["gold_answer"] > 0 + + def _prompt_generate(self) -> str : + L, R, C = zip(*self.parameter["intervals"]) + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + K = self.parameter["K"], + intervals = "\n".join("L[{}]={}, R[{}]={}, C[{}]={}".format(i, L[i], i, R[i], i, C[i]) for i in range(self.parameter["M"])), + first_K_points = " ".join(map(str, range(1, self.parameter["K"] + 1))), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + pickeds = processed_result + if len(pickeds) != self.parameter["K"] : + return self.rewards["invalid_solution"] + if len(set(pickeds)) != self.parameter["K"] : + return self.rewards["invalid_solution"] + + L, R, C = zip(*self.parameter["intervals"]) + if not all(any(Li <= picked <= Ri for Li, Ri in zip(L, R)) for picked in pickeds) : + return self.rewards["invalid_solution"] + + gold = self.parameter["gold_answer"] + answer = sum(C[i] for i in range(self.parameter["M"]) if any(L[i] <= picked and picked <= R[i] for picked in pickeds)) + assert gold <= answer, "answer should be greater than or equal to gold" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_dominating_set/__init__.py b/server/Gym/environments/minimum_dominating_set/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5203897229e25f8d8d3afa70fe0add0f145c393d --- /dev/null +++ b/server/Gym/environments/minimum_dominating_set/__init__.py @@ -0,0 +1 @@ +from .environment import Minimum_DominatingSet_Environment diff --git a/server/Gym/environments/minimum_dominating_set/environment.py b/server/Gym/environments/minimum_dominating_set/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..04b4c7b6fb135433cf51b585f6ebf1bcec86f9ff --- /dev/null +++ b/server/Gym/environments/minimum_dominating_set/environment.py @@ -0,0 +1,134 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Minimum_DominatingSet_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices labeled from 0 to {N_minus_1}. The graph contains the following undirected edges: +{edges} + +Each vertex has a cost, given as a list `C` of length {N}, where `C[i]` is the cost of vertex i: +{C} + +Your task is to select a set of distinct vertices x_1, x_2, ..., x_k (you determine k), such that every vertex is either selected or has at least one selected neighbor. +Try your best to minimize the total cost: C[x_1] + C[x_2] + ... + C[x_k]. + +**Output Format:** Your final answer should be a single line containing the selected vertices in any order, separated by spaces. +Example: `0 1 {N_minus_1}` (do **NOT** include quotes or backticks).""" + + def __init__(self, + cost_range : int = 10, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Minimum_DominatingSet_Environment instance. + """ + super().__init__(**kwargs) + + self.cost_range = cost_range + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 1" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = random.sample([(u, v) for u in range(N) for v in range(u + 1, N)], int(edge_density * N * (N - 1) / 2)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + C = self.parameter["C"] = [random.randint(1, self.cost_range) for vertex in range(N)] + + + covering = self.parameter["covering"] = [1 << u for u in range(N)] + for u, v in edges : + covering[u] |= 1 << v + covering[v] |= 1 << u + + self.parameter["reference_answer"] = list(range(N)) + self.parameter["gold_answer"] = sum(C) + + selected = [] + def DFS(u : int, now_covering : int, sumC : int) -> None : + if sumC >= self.parameter["gold_answer"] : + return + if u == N : + if now_covering == (1 << N) - 1 : + assert sumC < self.parameter["gold_answer"] + self.parameter["reference_answer"], self.parameter["gold_answer"] = selected.copy(), sumC + return + DFS(u + 1, now_covering, sumC) + if (now_covering | covering[u]) > now_covering : + selected.append(u) + DFS(u + 1, now_covering | covering[u], sumC + C[u]) + selected.pop() + DFS(0, 0, 0) + + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + assert self.parameter["gold_answer"] > 0, "gold_answer must be greater than 0" + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + C = "\n".join("C[{}]={}".format(i, Ci) for i, Ci in enumerate(self.parameter["C"])), + ) + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + selected_vertices = processed_result + + if len(selected_vertices) != len(set(selected_vertices)) : + return self.rewards["invalid_solution"] + + all_covering = 0 + for u in selected_vertices : + if not (0 <= u < self.parameter["N"]) : + return self.rewards["invalid_solution"] + all_covering |= self.parameter["covering"][u] + if all_covering != (1 << self.parameter["N"]) - 1 : + return self.rewards["invalid_solution"] + + answer = sum(self.parameter["C"][u] for u in selected_vertices) + gold = self.parameter["gold_answer"] + assert gold <= answer, "gold should be less than or equal to answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_dominating_set_grid/__init__.py b/server/Gym/environments/minimum_dominating_set_grid/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1b94ae392cb74bfd17a060734cf441e30f5765ad --- /dev/null +++ b/server/Gym/environments/minimum_dominating_set_grid/__init__.py @@ -0,0 +1 @@ +from .environment import Minimum_DominatingSet_Grid_Environment diff --git a/server/Gym/environments/minimum_dominating_set_grid/environment.py b/server/Gym/environments/minimum_dominating_set_grid/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8c9a30a959e8f4849027905fa08a84eaebfd94f7 --- /dev/null +++ b/server/Gym/environments/minimum_dominating_set_grid/environment.py @@ -0,0 +1,198 @@ +import random +from typing import Optional, List, Tuple +from ...environment import VerifiableEnvironment + + +class Minimum_DominatingSet_Grid_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3888 + prompt_template = \ +r"""We have a grid with {N} rows and {M} columns (1-based indices). The cost of cell (i, j) is F[i][j]: +{F} + +Select a set of **distinct** cells S such that every cell is either in S or has at least one **orthogonally adjacent** selected neighbor (up, down, left, or right). Minimize the total cost of selected cells (i.e., the sum of F[i][j] for all (i,j) ∈ S). Output K (the number of selected cells) lines: each line contains two integers `i j` (1-based), the row and column of a selected cell (in any order).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Minimum_DominatingSet_Grid_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N_M) + M = self.parameter["M"] = random.randint(2, MAX_N_M) + + F = self.parameter["F"] = [[random.randint(1, N * M) for j in range(M)] for i in range(N)] + + + S = 1 << M + ALL = S - 1 + + # --- Precompute helpers --- + # popcount for every mask + ones = [0] * S + for m in range(S): + ones[m] = m.bit_count() + + # shift coverage within a row (same row left/right neighbors) + shift_cov = [0] * S + for m in range(S): + shift_cov[m] = (m | ((m << 1) & ALL) | (m >> 1)) & ALL + + # map bit -> column index + bit_to_idx = {} + for c in range(M): + bit_to_idx[1 << c] = c + + # row_sums[i][mask]: cost of choosing 'mask' on row i (1-based rows for DP) + # add a dummy row N+1 with all zero costs (to flush coverage of the last real row) + row_sums = [[0] * S for _ in range(N + 2)] # index 1..N, N+1 is zeros + + for i in range(1, N + 1): + costs = F[i - 1] + rs = row_sums[i] + for mask in range(S): + total = 0 + x = mask + while x: + t = x & -x + total += costs[bit_to_idx[t]] + x -= t + rs[mask] = total + # row_sums[N+1] already zero + + # supersets list: for each 'need' mask, all p where p is a superset of 'need' + supersets = [[] for _ in range(S)] + for need in range(S): + rem = ALL ^ need # bits we are free to choose + x = rem + while True: + supersets[need].append(need | x) + if x == 0: + break + x = (x - 1) & rem + + INF = float('inf') + + # DP arrays: f[p][j] and g[p][j] + # f: minimal cost; g: number of depots (tie-breaker) + f = [[INF] * S for _ in range(S)] + g = [[INF] * S for _ in range(S)] + + # Initialize for first row: previous row (k) is 0 + rs1 = row_sums[1] + for j in range(S): + f[j][0] = rs1[j] + g[j][0] = ones[j] + + # Transition rows 2..N+1 (N+1 is dummy zero-cost row) + for i in range(2, N + 2): + nf = [[INF] * S for _ in range(S)] + ng = [[INF] * S for _ in range(S)] + rsi = row_sums[i] + + for j in range(S): # mask for row i-1 + sj = shift_cov[j] + fj = f[j] + gj = g[j] + for k in range(S): # mask for row i-2 + base_cost = fj[k] + if base_cost == INF: + continue + base_cnt = gj[k] + need = ALL ^ (sj | k) # columns still needing coverage on row i-1 + for p in supersets[need]: # mask for row i + v = base_cost + rsi[p] + c = base_cnt + ones[p] + if v < nf[p][j]: + nf[p][j] = v + ng[p][j] = c + elif v == nf[p][j] and c < ng[p][j]: + ng[p][j] = c + + f, g = nf, ng + + # Finalize: last (dummy) row must be p=0; scan any j + best_cost = INF + best_cnt = INF + f0 = f[0] + g0 = g[0] + for j in range(S): + v = f0[j] + if v < best_cost: + best_cost = v + best_cnt = g0[j] + elif v == best_cost and g0[j] < best_cnt: + best_cnt = g0[j] + + assert best_cost > 0, "gold_answer must be greater than 0" + self.parameter["gold_answer"] = best_cost + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + F = "\n".join(" ".join("F[{}][{}]={}".format(i, j, Fij) for j, Fij in enumerate(Fi, start = 1)) for i, Fi in enumerate(self.parameter["F"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[Tuple[int, int]]] : + if answer is not None : + answer = answer.strip() + try : + cells = [] + for line in answer.splitlines() : + line = line.strip() + if line : + i, j = map(int, line.split()) + cells.append((i, j)) + return cells + except : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + selected = [[False] * self.parameter["M"] for i in range(self.parameter["N"])] + for i, j in processed_result : + if not (1 <= i <= self.parameter["N"] and 1 <= j <= self.parameter["M"]) : + return self.rewards["invalid_solution"] + if selected[i - 1][j - 1] : + return self.rewards["invalid_solution"] + selected[i - 1][j - 1] = True + + dxs = [0, 0, 0, -1, +1] + dys = [0, -1, +1, 0, 0] + for i in range(self.parameter["N"]) : + for j in range(self.parameter["M"]) : + if not any(0 <= i + dx < self.parameter["N"] and 0 <= j + dy < self.parameter["M"] and selected[i + dx][j + dy] for dx, dy in zip(dxs, dys)) : + return self.rewards["unsuccessful_solution"] + + answer, gold = sum(self.parameter["F"][i - 1][j - 1] for i, j in processed_result), self.parameter["gold_answer"] + assert 0 < gold <= answer, "gold_answer must be greater than 0 and less than or equal to answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_fibonacci_representation/__init__.py b/server/Gym/environments/minimum_fibonacci_representation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..213bc900589e0c9b3cfbea9caf32eec715275363 --- /dev/null +++ b/server/Gym/environments/minimum_fibonacci_representation/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumFibonacciRepresentation_Environment diff --git a/server/Gym/environments/minimum_fibonacci_representation/environment.py b/server/Gym/environments/minimum_fibonacci_representation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..cd133297176b6214b768e84c7944af419f4eef80 --- /dev/null +++ b/server/Gym/environments/minimum_fibonacci_representation/environment.py @@ -0,0 +1,83 @@ +import random +import bisect +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MinimumFibonacciRepresentation_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3539 + prompt_template = \ +r"""Define Fibonacci numbers as the sequence: 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, ... You can represent any positive integer by adding or subtracting Fibonacci numbers. For example: +- 10 = 5 + 5 → uses 2 Fibonacci numbers +- 19 = 21 - 2 → uses 2 Fibonacci numbers +- 17 = 13 + 5 - 1 → uses 3 Fibonacci numbers +- 1070 = 987 + 89 - 5 - 1 → uses 4 Fibonacci numbers + +Please compute the minimum number of Fibonacci numbers needed (added or subtracted) to represent the number {K}. Output a single integer — the minimum number of Fibonacci numbers used.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the Multiplication_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K >= 10, "MAX_K should be greater than or equal to 10" + + K = self.parameter["K"] = random.randint(4, MAX_K) + + + # Build the Fibonacci-like sequence up to just above maxK + F = [1, 2] + while F[-1] <= K: + F.append(F[-2] + F[-1]) + # Now F[-1] > maxK, F[-2] <= maxK + + RES = 0 + n = K + while n: + RES += 1 + # Find first F element > n + idx = bisect.bisect_right(F, n) + larger = F[idx] + smaller = F[idx - 1] + # Move n toward zero by the minimal step + n = min(larger - n, n - smaller) + self.parameter["reference_answer"] = RES + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_harmonious_chromatic_number/__init__.py b/server/Gym/environments/minimum_harmonious_chromatic_number/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5843140cf89e65a13f112105b3a4ae3ba85b4d13 --- /dev/null +++ b/server/Gym/environments/minimum_harmonious_chromatic_number/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumHarmoniousChromaticNumber_Environment diff --git a/server/Gym/environments/minimum_harmonious_chromatic_number/environment.py b/server/Gym/environments/minimum_harmonious_chromatic_number/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..38b1b1b8155361b3d6626ec9326baf5ff9d720c3 --- /dev/null +++ b/server/Gym/environments/minimum_harmonious_chromatic_number/environment.py @@ -0,0 +1,148 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinimumHarmoniousChromaticNumber_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. +The graph contains the following undirected edges: +{edges} + +Your task is to assign a **non-negative integer color** to each vertex, represented as `c[0], c[1], ..., c[{N_minus_1}]`, such that: +- For every edge `(u, v)` in the graph, `c[u] ≠ c[v]` — adjacent vertices must have different colors. +- For every pair of two distinct used colors `x` and `y`, there exists **at most one edge** `(u, v)` such that `c[u] = x` and `c[v] = y`, i.e., this is a *harmonious coloring*. +- The total number of **distinct colors used** (i.e., the number of unique values among `c[0]` to `c[{N_minus_1}]`) is **minimized** - try your best to find a valid coloring using as few colors as possible. + +**Output Format:** +Your final answer should be a single line containing the color of each vertex in order: `c[0], c[1], ..., c[{N_minus_1}]`, separated by **spaces**.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinimumHarmoniousChromaticNumber_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 1" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = random.sample([(u, v) for u in range(N) for v in range(u + 1, N)], int(edge_density * N * (N - 1) / 2)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + + self.parameter["reference_answer"] = list(range(N)) + self.parameter["gold_answer"] = N + + adjacent = [0] * N + smaller_adjacents = [[] for u in range(N)] + for u, v in edges : + adjacent[u] |= 1 << v + adjacent[v] |= 1 << u + smaller_adjacents[max(u, v)].append(min(u, v)) + + colors, color2set = [None] * N, [0] * N + color_adjacent = [[False] * N for _ in range(N)] + def DFS(u : int, max_color : int) -> int : + nonlocal colors, color2set, color_adjacent + if max_color + 1 >= self.parameter["gold_answer"] : + return + if u == N : + self.parameter["reference_answer"], self.parameter["gold_answer"] = colors.copy(), max_color + 1 + return + for color in range((max_color + 1) + 1) : + if (color2set[color] & adjacent[u]) == 0 : + colors[u] = color + + new_color_adjacent = [row.copy() for row in color_adjacent] + + invalid = False + for v in smaller_adjacents[u] : + color_u, color_v = min(colors[u], colors[v]), max(colors[u], colors[v]) + assert color_u != color_v, "Adjacent vertices should have different colors" + if new_color_adjacent[color_u][color_v] : + invalid = True + break + new_color_adjacent[color_u][color_v] = True + + if not invalid : + color2set[color] += 1 << u + old_color_adjacent = [row.copy() for row in color_adjacent] + color_adjacent = new_color_adjacent + DFS(u + 1, max(max_color, color)) + color_adjacent = old_color_adjacent + color2set[color] -= 1 << u + DFS(0, -1) + + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + colors = processed_result + if len(colors) != self.parameter["N"] : + return self.rewards["invalid_solution"] + adjacent_color_pairs = set() + for u, v in self.parameter["edges"] : + if colors[u] == colors[v] : + return self.rewards["invalid_solution"] + color_u, color_v = min(colors[u], colors[v]), max(colors[u], colors[v]) + if (color_u, color_v) in adjacent_color_pairs : + return self.rewards["invalid_solution"] + adjacent_color_pairs.add((color_u, color_v)) + + gold, answer = self.parameter["gold_answer"], len(set(colors)) + assert gold <= answer, "gold should be less than or equal to answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_interval_coverage/__init__.py b/server/Gym/environments/minimum_interval_coverage/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dc6518a9d74d8cabc559805e4e360cca88b24690 --- /dev/null +++ b/server/Gym/environments/minimum_interval_coverage/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumIntervalCoverage_Environment diff --git a/server/Gym/environments/minimum_interval_coverage/environment.py b/server/Gym/environments/minimum_interval_coverage/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..2b29ceadb856eecd49296582e484e3f5530b30a8 --- /dev/null +++ b/server/Gym/environments/minimum_interval_coverage/environment.py @@ -0,0 +1,146 @@ +import random +import networkx as nx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinimumIntervalCoverage_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3980 + prompt_template = \ +r"""You are given {M} intervals within [1, {N}]. Each interval is defined as [L[i], R[i]] with an associated cost C[i]. The intervals are provided as: +{intervals} + +You can select each interval any number of times (including 0). For each point i in [1, {N}], you must ensure it is covered by at least NEED[i] selected intervals, where the array NEED is given as: +{NEED} + +Your goal is to minimize the **total cost** of the selected intervals while satisfying the above condition. + +**Output Format:** A single line containing {M} integers — the number of times you select each interval, in order, separated by spaces.""" + + def __init__(self, + cost_multiple : int = 3, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = +5.0, + **kwargs) : + """ + Initialize the MinimumIntervalCoverage_Environment instance. + """ + super().__init__(**kwargs) + + self.cost_multiple = cost_multiple + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N must be at least 1" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 1, "M must be at least 1" + + INTERVALS = self.parameter["intervals"] = [] + for i in range(M) : + L, R = random.randint(1, N), random.randint(1, N) + if L > R : + L, R = R, L + C = random.randint(1, self.cost_multiple * (R - L + 1)) + INTERVALS.append((L, R, C)) + + NEEDS = self.parameter["NEEDS"] = [] + for i in range(1, N + 1) : + NEEDS.append(random.randint(0, N) if any(L <= i <= R for L, R, C in INTERVALS) else 0) + + + # Pad NEED with zeros at both ends (for difference calculation) + NEED = [0] + NEEDS + [0] # length N+2 + + # Build the demand for each node 0..N: + # DEMANDS[k] = flow into node k minus flow out of node k + # We want net in-out = NEED[k+1] - NEED[k] + DEMANDS = [0] * (N + 1) + for i in range(1, N + 2): + DEMANDS[i - 1] = NEED[i] - NEED[i - 1] + + # Build the directed graph + G = nx.MultiDiGraph() + INF = sum(NEEDS) + + # Add all nodes with their 'demand' attribute + for node, d in enumerate(DEMANDS): + G.add_node(node, demand=d) + + # Add the "chain" edges i -> i+1 with infinite capacity and zero cost + for i in range(N): + G.add_edge(i, i + 1, capacity=INF, weight=0) + + # Add an edge for each volunteer type: + # selecting one volunteer of type (s, t, c) corresponds to sending + # one unit of flow along t -> (s-1) at cost c + for s, t, c in INTERVALS: + u = t # maps to node t (since t in [1..N], node range is 0..N) + v = s - 1 # maps to node s-1 + G.add_edge(u, v, capacity=INF, weight=c) + + # Compute the minimum-cost flow satisfying all node demands + cost, flow_dict = nx.network_simplex(G) + + # Output the total minimum cost + self.parameter["gold_answer"] = cost + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + intervals = "\n".join("L[{}]={} R[{}]={} C[{}]={}".format(i + 1, L, i + 1, R, i + 1, C) for i, (L, R, C) in enumerate(self.parameter["intervals"])), + NEED = " ".join("NEED[{}]={}".format(i + 1, need) for i, need in enumerate(self.parameter["NEEDS"])) + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + times = processed_result + if len(times) != self.parameter["M"] : + return self.rewards["wrong_format"] + if any(t < 0 for t in times) : + return self.rewards["invalid_solution"] + for i in range(1, self.parameter["N"] + 1) : + if sum(int(L <= i <= R) * times[j] for j, (L, R, C) in enumerate(self.parameter["intervals"])) < self.parameter["NEEDS"][i - 1] : + return self.rewards["invalid_solution"] + + answer, gold = sum(times[j] * C for j, (L, R, C) in enumerate(self.parameter["intervals"])), self.parameter["gold_answer"] + assert gold <= answer, "answer should be greater than or equal to gold" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "gold should be zero if answer is zero" + return self.rewards["rewarding_weight"] * 1.0 # Reward for zero answer + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_max_abs_slicer/__init__.py b/server/Gym/environments/minimum_max_abs_slicer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b435963380179be553fcc6f63dbaec3000b53bc3 --- /dev/null +++ b/server/Gym/environments/minimum_max_abs_slicer/__init__.py @@ -0,0 +1 @@ +from .environment import Minimum_MaxAbsSlicer_Environment diff --git a/server/Gym/environments/minimum_max_abs_slicer/environment.py b/server/Gym/environments/minimum_max_abs_slicer/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..ee929fcf76fb4cad77a985331c68e5b0ded7d743 --- /dev/null +++ b/server/Gym/environments/minimum_max_abs_slicer/environment.py @@ -0,0 +1,244 @@ +import random +from typing import Optional, List +from collections import defaultdict, deque +from ...environment import VerifiableEnvironment + + +class Minimum_MaxAbsSlicer_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3229 + prompt_template = \ +r"""You are given two arrays A and B, each of length {N} (0-indexed). A is a permutation of [1, 2, ..., {N}], and each element of B is either +1 or -1. The values are as follows: +{A_and_B} + +You must divide the indices [0, 1, ..., {N_minus_1}] into {M} **consecutive batches**. Let end[1], end[2], ..., end[{M}] (0 ≤ end[1] < end[2] < ... < end[{M}] = {N_minus_1}) represent the last index of each batch. This means: +- Batch 1 contains indices from 0 to end[1] +- Batch 2 contains indices from end[1] + 1 to end[2] +- ... +- Batch {M} contains indices from end[{M_minus_1}] + 1 to end[{M}] = {N_minus_1} + +For each batch i, let S[i] be the **sum of B values in that batch**. Your goal is to **minimize the maximum absolute value** among all batches, i.e., minimize max(|S[1]|, |S[2]|, ..., |S[{M}]|). +Among all such optimal partitions, choose the one with the **smallest lexicographical order** of the sequence A[end[1]], A[end[2]], ..., A[end[{M}]]. + +**Output Format:** Your final answer should be a single line containing A[end[1]], A[end[2]], ..., A[end[{M}]], separated by **spaces**.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, + rewarding_strategy_abs : str = "(gold/answer)^beta", rewarding_weight_abs : float = +0.5, rewarding_beta_abs : float = +5.0, + rewarding_strategy_lex : str = "mean([gold=answer])^beta", rewarding_weight_lex : float = +0.5, rewarding_beta_lex : float = +5.0, + **kwargs) : + """ + Initialize the Minimum_MaxAbsSlicer_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy_abs" : rewarding_strategy_abs, + "rewarding_weight_abs" : rewarding_weight_abs, + "rewarding_beta_abs" : rewarding_beta_abs, + "rewarding_strategy_lex" : rewarding_strategy_lex, + "rewarding_weight_lex" : rewarding_weight_lex, + "rewarding_beta_lex" : rewarding_beta_lex, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N must be at least 4" + + M = self.parameter["M"] = random.randint(3, N - 1) + + self.parameter["A"] = list(range(1, N + 1)) + random.shuffle(self.parameter["A"]) + positive_probability = random.random() + self.parameter["B"] = [+1 if random.random() < positive_probability else -1 for _ in range(N)] + + + A = [0] * (N + 2) # 1-based city ids + B = [0] * (N + 2) # +1 (sight) / –1 (no sight) + + for i in range(1, N + 1): + A[i] = self.parameter["A"][i - 1] + B[i] = self.parameter["B"][i - 1] + + # ---------- build suffix balance array ---------- + SUF = [0] * (N + 3) # SUF[i] = balance on [i … N] + for i in range(N, 0, -1): + SUF[i] = B[i] + SUF[i + 1] + + # count how many suffixes are perfectly balanced + tot_zero = sum(1 for i in range(1, N + 1) if SUF[i] == 0) + + OFFSET = N # shift to make indices non-negative + + # ---------- minimal possible maximal monthly imbalance d ---------- + if SUF[1] == 0: # whole trip already balanced + d = 1 if tot_zero < M else 0 + else: + d = (abs(SUF[1]) - 1) // M + 1 # same as ceil(|SUF[1]| / M) + self.parameter["gold_answer_max_abs"] = d + + # ---------- monotone queues keyed by balance value ---------- + queues = defaultdict(deque) # balance → deque[(city, pos)] + + def push(pos: int) -> None: + """Put position `pos` into queue of balance SUF[pos+1].""" + key = SUF[pos + 1] + OFFSET + dq = queues[key] + rec = (A[pos], pos) # ordered by city id + while dq and rec[0] < dq[-1][0]: + dq.pop() + dq.append(rec) + + def best_from_queue(now_pos: int, key: int, cur_best: tuple) -> tuple: + """Try improving cur_best using front of queue `key`.""" + dq = queues.get(key) + if not dq: + return cur_best + while dq and dq[0][1] < now_pos: # outdated endpoint + dq.popleft() + if dq and dq[0][0] < cur_best[0]: + return dq[0] + return cur_best + + # ---------- CASE 1 : perfectly balanced plan possible (d == 0) ---------- + if d == 0: + C = [i for i in range(1, N + 1) if SUF[i + 1] == 0] # candidate cuts + tot_c = len(C) + now = 1 + j = 0 + answer = [] + + # decide the first M-1 months + for month in range(1, M): + # keep at least (M - month) candidates unpushed + while tot_c - j > M - month: + push(C[j]) + j += 1 + best = (N + 1, -1) # (city id, pos) + best = best_from_queue(now, OFFSET, best) + answer.append(best[0]) + now = best[1] + 1 # next month starts here + else : + + # ---------- CASE 2 : need positive imbalance (d > 0) ---------- + now = 1 + r = 1 + # preload all positions that may finish the first month + while N - r >= M - 1: + push(r) + r += 1 + + answer = [] + months_left = M + + while months_left > 1: + best = (N + 1, -1) + center = SUF[now] + OFFSET + + low = max(0, center - d) + high = min(2 * N, center + d) + + for key in range(low, high + 1): + # |balance| must be small enough to finish the rest in (months_left-1) months + if abs(key - OFFSET) <= (months_left - 1) * d: + best = best_from_queue(now, key, best) + + answer.append(best[0]) + now = best[1] + 1 + months_left -= 1 + + # make one more position available for the next round + if r <= N: # guard, though algorithm ensures r ≤ N + push(r) + r += 1 + + answer.append(A[N]) # last month ends here + assert len(answer) == M, "The answer should have exactly M elements" + self.parameter["gold_answer"] = answer + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["gold_answer"])) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + M = self.parameter["M"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + M = M, + M_minus_1 = M - 1, + A_and_B = "\n".join("A[{}]={} B[{}]={}".format(i, Ai, i, Bi) for i, (Ai, Bi) in enumerate(zip(self.parameter["A"], self.parameter["B"]))), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + reward = 0.0 + + + N = self.parameter["N"] + if not all(1 <= Ai <= N for Ai in processed_result) : + return self.rewards["invalid_solution"] + Ai2i = [None] * (N + 1) + for i, Ai in enumerate(self.parameter["A"]) : + Ai2i[Ai] = i + ends = [Ai2i[Ai] for Ai in processed_result] + + if len(ends) != self.parameter["M"] : + return self.rewards["invalid_solution"] + for i in range(len(ends)) : + if not (0 <= ends[i] < N) : + return self.rewards["invalid_solution"] + if i and not (ends[i - 1] < ends[i]) : + return self.rewards["invalid_solution"] + if ends[-1] != N - 1 : + return self.rewards["invalid_solution"] + + answer = abs(sum(self.parameter["B"][index] for index in range(ends[0] + 1))) + for i in range(1, len(ends)) : + answer = max(answer, abs(sum(self.parameter["B"][index] for index in range(ends[i - 1] + 1, ends[i] + 1)))) + gold = self.parameter["gold_answer_max_abs"] + assert gold <= answer, "answer should be greater than or equal to gold" + if self.rewards["rewarding_strategy_abs"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "If answer is 0, gold should also be 0" + reward += self.rewards["rewarding_weight_abs"] * 1.0 + else : + reward += self.rewards["rewarding_weight_abs"] * ((gold / answer) ** self.rewards["rewarding_beta_abs"]) + elif self.rewards["rewarding_strategy_abs"] == "gold=answer" : + reward += self.rewards["rewarding_weight_abs"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_abs"])) + + + if gold == answer : + if self.rewards["rewarding_strategy_lex"] == "mean([gold=answer])^beta" : + for a, b in zip(self.parameter["gold_answer"], processed_result) : + if a != b : + assert a < b, "gold_answer should be less than or equal to processed_result" + break + reward += self.rewards["rewarding_weight_lex"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / self.parameter["M"]) ** self.rewards["rewarding_beta_lex"]) + elif self.rewards["rewarding_strategy_lex"] == "gold=answer" : + reward += self.rewards["rewarding_weight_lex"] * (self.parameter["gold_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_lex"])) + + return reward + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_max_slicer/__init__.py b/server/Gym/environments/minimum_max_slicer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ff8a770b83c87474f8d6d22fbef408b0ea94eafd --- /dev/null +++ b/server/Gym/environments/minimum_max_slicer/__init__.py @@ -0,0 +1 @@ +from .environment import Minimum_MaxSlicer_Environment diff --git a/server/Gym/environments/minimum_max_slicer/environment.py b/server/Gym/environments/minimum_max_slicer/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..ba66ea013b864715a4d9f8a662dd4b7d9970d0b4 --- /dev/null +++ b/server/Gym/environments/minimum_max_slicer/environment.py @@ -0,0 +1,156 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Minimum_MaxSlicer_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1182 + prompt_template = \ +r"""You are given an array A of length {N}. The values are as follows (indexing starts at 0): +{A} + +You may divide these items (in order) into {M} **consecutive batches**. Let end[1], end[2], ..., end[{M}] (0 <= end[1] < end[2] < ... < end[{M}] = {N} - 1 = {N_minus_1}) represent the last index of each batch. This means: +- Batch 1 contains items from index 0 to end[1] +- Batch 2 contains items from index end[1] + 1 to end[2] +- ... +- Batch {M} contains items from index end[{M_minus_1}] + 1 to end[{M}] (which is {N_minus_1}) + +Try your best to **minimize the maximum sum** among all batches. In other words, minimize: max(S[1], S[2], ..., S[{M}]), where each S[i] is the sum of A values in batch i. + +**Output Format:** +Your final answer should be a single line containing end[1], end[2], ..., end[{M}] (with end[{M}] always equal to {N_minus_1}), separated by **spaces**. +Example: `{first_M_minus_1_indices} {N_minus_1}` (do **NOT** include the backticks or quotes); this means: end[1] = 0, ..., end[{M_minus_1}] = {M_minus_2}, and end[{M}] = {N_minus_1}. So, the first {M_minus_1} batches each contain one item, and the last batch contains the remaining items. +""" + + def __init__(self, + M_range_coefficient : int = 2, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = +3.0, + **kwargs) : + """ + Initialize the Minimum_MaxSlicer_Environment instance. + """ + super().__init__(**kwargs) + self.M_range_coefficient = M_range_coefficient + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N must be at least 4" + + M = self.parameter["M"] = random.randint(3, max(3, N // self.M_range_coefficient)) + assert M < N, "M must be less than N" + + A = self.parameter["A"] = [random.randint(1, N) for i in range(N)] + + + left, right = min(A), sum(A) + while left < right : + mid = (left + right) // 2 + def check(d) : + now_sum, index, counting = 0, 0, 1 + while True : + if now_sum + A[index] <= d : + now_sum += A[index] + else : + counting += 1 + if A[index] <= d : + now_sum = A[index] + else : + return False + index += 1 + if index == N : + break + return counting <= M + if check(mid) : + right = mid + else : + left = mid + 1 + self.parameter["gold_answer"] = left + assert self.parameter["gold_answer"] > 0, "gold_answer must be greater than 0" + + ends = [] + def get_ends(d) : + now_sum, index = 0, 0 + while True : + if now_sum + A[index] <= d : + now_sum += A[index] + else : + ends.append(index - 1) + now_sum = A[index] + index += 1 + if index == N : + ends.append(index - 1) + break + get_ends(left) + if len(ends) < M : + missing = sorted(set(range(N)) - set(ends)) + ends += missing[: M - len(ends)] + ends.sort() + assert len(ends) == M + self.parameter["reference_answer"] = " ".join(map(str, ends)) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + M = self.parameter["M"] + return self.prompt_template.format( + N = N, + M = M, + N_minus_1 = N - 1, + M_minus_1 = M - 1, + M_minus_2 = M - 2, + A = "\n".join("A[{}]={}".format(i, self.parameter["A"][i]) for i in range(N)), + first_M_minus_1_indices = " ".join(map(str, range(M - 1))), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N = self.parameter["N"] + + ends = processed_result + if len(ends) != self.parameter["M"] : + return self.rewards["invalid_solution"] + for i in range(len(ends)) : + if not (0 <= ends[i] < N) : + return self.rewards["invalid_solution"] + if i and not (ends[i - 1] < ends[i]) : + return self.rewards["invalid_solution"] + if ends[-1] != N - 1 : + return self.rewards["invalid_solution"] + + answer = sum(self.parameter["A"][index] for index in range(ends[0] + 1)) + for i in range(1, len(ends)) : + answer = max(answer, sum(self.parameter["A"][index] for index in range(ends[i - 1] + 1, ends[i] + 1))) + gold = self.parameter["gold_answer"] + assert gold <= answer, "answer should be greater than or equal to gold" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_ratio_path/__init__.py b/server/Gym/environments/minimum_ratio_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9a6ce42c3bca0e9c54c019248d2781a53b69ff5f --- /dev/null +++ b/server/Gym/environments/minimum_ratio_path/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumRatioPath_Environment diff --git a/server/Gym/environments/minimum_ratio_path/environment.py b/server/Gym/environments/minimum_ratio_path/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8ef985fa9cffb4cd420997c22b1a060bc91dabb9 --- /dev/null +++ b/server/Gym/environments/minimum_ratio_path/environment.py @@ -0,0 +1,170 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinimumRatioPath_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2502 + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. + +The graph contains the following undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning there is an undirected edge connecting vertex `u` and vertex `v` with weight `w`: +{edges} + +Your task is to find a path `p1, p2, ..., pk` such that: +- `p1 = 0` (the path starts at vertex `0`) +- `pk = {N_minus_1}` (the path ends at vertex `{N_minus_1}`) +- Try your best to **minimize** the ratio of the maximum edge weight to the minimum edge weight along the path (i.e., minimize `max(w) / min(w)`, where `w` are the edge weights on the path). + +**Output Format:** Your final answer should be a single line containing the path in order: `p1 p2 ... pk`, separated by spaces. Example: `0 1 {N_minus_1}` (do NOT include backticks or quotes).""" + + def __init__(self, + weight_range_multiple : int = 5, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinimumRatioPath_Environment instance. + """ + super().__init__(**kwargs) + + self.weight_range_multiple = weight_range_multiple + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_ratio" in self.parameter, "edge_ratio is required in parameter" + edge_ratio = self.parameter["edge_ratio"] + + edges = self.parameter["edges"] = [] + + constructed_path = list(range(1, (N - 2) + 1)) + random.shuffle(constructed_path) + constructed_path = [0] + constructed_path + [N - 1] + assert set(constructed_path) == set(range(N)), "constructed_path should contain all vertices from 0 to N-1" + for u, v in zip(constructed_path, constructed_path[1 :]) : + w = random.randint(1, max(1, int(N * edge_ratio) * self.weight_range_multiple)) + edges.append((min(u, v), max(u, v), w)) + + num_edges = int(N * edge_ratio) + if len(edges) < num_edges : + remaining_edges = list(set((u, v) for u in range(N) for v in range(u + 1, N) if (u, v) != (0, N - 1)) - set((u, v) for u, v, w in edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + for u, v in remaining_edges : + edges.append((u, v, random.randint(1, max(1, int(N * edge_ratio) * self.weight_range_multiple)))) + random.shuffle(edges) + + for u, v, w in edges : + assert 0 <= u < v < N + assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique" + + + edges = sorted([(w, u, v) for u, v, w in edges], key = lambda x : x[0]) + M = len(edges) + S, T = 0, N - 1 # Start and end vertices + + ans_num = 0 # numerator = max speed on the chosen path + ans_den = 1 # denominator = min speed on the chosen path + found_any = False + + def find(parent, x): + if parent[x] != x: + parent[x] = find(parent, parent[x]) + return parent[x] + + # Try every possible minimum-speed edge as the start of the path + for i in range(M): + parent = list(range(N)) + # Add edges in non-decreasing order of speed, starting from i, + # until s and t become connected + for j in range(i, M): + wj, uj, vj = edges[j] + fu = find(parent, uj) + fv = find(parent, vj) + if fu != fv: + parent[fu] = fv + if find(parent, S) == find(parent, T): + break + + # If even after adding all edges from i onward s and t aren't connected: + if find(parent, S) != find(parent, T): + if i == 0: + assert False + break + + wi = edges[i][0] # the minimum speed on this trial + # Update the best ratio if it's the first valid path, or if the new ratio is smaller: + # compare ans_num/ans_den >= wj/wi 〈⇒〉 ans_num * wi >= ans_den * wj + if not found_any or ans_num * wi >= ans_den * wj: + ans_num = wj + ans_den = wi + found_any = True + + self.parameter["gold_answer"] = (ans_num, ans_den) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if not answer_array : + return None + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + path = processed_result + for vertex in path : + if not (0 <= vertex < self.parameter["N"]) : # check if vertex is in range + return self.rewards["invalid_solution"] + if not (path[0] == 0 and path[-1] == self.parameter["N"] - 1) : # check if start and end vertices are correct + return self.rewards["invalid_solution"] + + edge2weight = {(u, v) : w for u, v, w in self.parameter["edges"]} + answer_num, answer_den = min(edge2weight.values()), max(edge2weight.values()) + for s, t in zip(path, path[1 :]) : + u, v = min(s, t), max(s, t) + if (u, v) not in edge2weight : + return self.rewards["invalid_solution"] + w = edge2weight[(u, v)] + answer_num, answer_den = max(answer_num, w), min(answer_den, w) + gold_num, gold_den = self.parameter["gold_answer"] + # gold_num / gold_den <= answer_num / answer_den <=> gold_num * answer_den <= answer_num * gold_den + assert gold_num * answer_den <= answer_num * gold_den, "The answer should be better than the gold answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + # (gold_num / gold_den) / (answer_num / answer_den) = (gold_num * answer_den) / (answer_num * gold_den) + return self.rewards["rewarding_weight"] * (((gold_num * answer_den) / (answer_num * gold_den)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * ((gold_num * answer_den) == (answer_num * gold_den)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_spanning_tree/__init__.py b/server/Gym/environments/minimum_spanning_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1d65e8f5e1b6e3aab511912bd498ca1733b2f49b --- /dev/null +++ b/server/Gym/environments/minimum_spanning_tree/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumSpanningTree_Environment diff --git a/server/Gym/environments/minimum_spanning_tree/environment.py b/server/Gym/environments/minimum_spanning_tree/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..a812799bf58a42e8a7abefd045ac03edd6f4b67f --- /dev/null +++ b/server/Gym/environments/minimum_spanning_tree/environment.py @@ -0,0 +1,136 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinimumSpanningTree_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. The graph contains the following undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning an undirected edge **connecting vertex `u` to vertex `v` with weight `w`**: +{edges} + +Your task is to select a subset of edges `T = [(u_1, v_1, w_1), (u_2, v_2, w_2), ..., (u_k, v_k, w_k)]` such that: +- k = {N} - 1 = {N_minus_1} (i.e., you select exactly {N_minus_1} edges). +- The selected edges form a **spanning tree** — that is, they connect all {N} vertices without forming any cycles. +- Your goal is to **minimize** the total weight of the selected edges: `w_1 + w_2 + ... + w_k`. + +**Output Format:** +Your final answer should be a single line containing the endpoints of the selected edges in order: `u_1 v_1 u_2 v_2 ... u_k v_k`, separated by **spaces**. +Example: `0 1 1 2 2 3` (do **NOT** include the backticks or quotes); this means the spanning tree includes the edges `(0, 1, w_1)`, `(1, 2, w_2)`, and `(2, 3, w_3)` (assuming 4 vertices in total).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinimumSpanningTree_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = [] + + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v, random.randint(1, max(1, int(edge_density * N * (N - 1) / 2))))) + + num_edges = int(edge_density * N * (N - 1) / 2) + if len(edges) < num_edges : + remaining_edges = list(set((u, v) for u in range(N) for v in range(u + 1, N)) - set((u, v) for u, v, w in edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + for u, v in remaining_edges : + edges.append((u, v, random.randint(1, max(1, int(edge_density * N * (N - 1) / 2))))) + random.shuffle(edges) + + for u, v, w in edges : + assert 0 <= u < v < N + assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique" + + + G = networkx.Graph() + G.add_weighted_edges_from(edges) + mst = networkx.minimum_spanning_tree(G) + self.parameter["reference_answer"] = " ".join("{} {}".format(u, v) for u, v in mst.edges()) + self.parameter["gold_answer"] = sum(mst[u][v]["weight"] for u, v in mst.edges()) + + assert self.parameter["gold_answer"] > 0, "The gold answer should be greater than 0" + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + mst = processed_result + if len(mst) % 2 != 0 : + return self.rewards["wrong_format"] + mst = [(mst[i], mst[i + 1]) for i in range(0, len(mst), 2)] + + if len(mst) != self.parameter["N"] - 1 : + return self.rewards["invalid_solution"] + if not ((set(u for u, v in mst) | set(v for u, v in mst)) == set(range(self.parameter["N"]))) : + return self.rewards["invalid_solution"] + + subgraph = networkx.Graph() + edge2weight = {(u, v) : w for u, v, w in self.parameter["edges"]} + answer_weight = 0 + for u, v in mst : + u, v = min(u, v), max(u, v) + if (u, v) not in edge2weight : + return self.rewards["invalid_solution"] + answer_weight += edge2weight[(u, v)] + subgraph.add_edge(u, v) + if not networkx.is_connected(subgraph) : + return self.rewards["invalid_solution"] + assert networkx.is_tree(subgraph), "The answer should be a tree as it has N - 1 edges and is connected" + + assert self.parameter["gold_answer"] <= answer_weight, "answer_weight should be greater than or equal to gold_answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((self.parameter["gold_answer"] / answer_weight) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == answer_weight) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_spanning_tree_counting/__init__.py b/server/Gym/environments/minimum_spanning_tree_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7b1645feafd640b059463f3c6b6c62816bf63d72 --- /dev/null +++ b/server/Gym/environments/minimum_spanning_tree_counting/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumSpanningTreeCounting_Environment diff --git a/server/Gym/environments/minimum_spanning_tree_counting/environment.py b/server/Gym/environments/minimum_spanning_tree_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..77ad8d13fec10e76f1697d2e7cb48e2d33756c30 --- /dev/null +++ b/server/Gym/environments/minimum_spanning_tree_counting/environment.py @@ -0,0 +1,249 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MinimumSpanningTreeCounting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. The graph contains the following undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning an undirected edge **connecting vertex u to vertex v with weight w**: +{edges} + +Consider a subset of edges `T = [(u_1, v_1, w_1), (u_2, v_2, w_2), ..., (u_k, v_k, w_k)]` such that: +- k = {N_minus_1} (i.e., you select exactly {N_minus_1} edges), +- The selected edges form a **spanning tree** — that is, they connect all {N} vertices without forming any cycles, +- The total weight `w_1 + w_2 + ... + w_k` is **minimized** among all such spanning trees (so it is called a minimum spanning tree). + +Please compute **the number of such minimum spanning trees** modulo {MOD}.""" + + + def __init__(self, + MAX_MOD : int = 10000, + weight_range_divisor : int = 10, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MinimumSpanningTreeCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.weight_range_divisor = weight_range_divisor + assert self.weight_range_divisor > 0, "weight_range_divisor should be greater than 0" + + self.MAX_MOD = MAX_MOD + assert self.MAX_MOD > 1, "MAX_MOD should be greater than 1" + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_ratio" in self.parameter, "edge_ratio is required in parameter" + edge_ratio = self.parameter["edge_ratio"] + + weight_range = max(1, int(edge_ratio * N / self.weight_range_divisor)) + 1 + + edges = self.parameter["edges"] = [] + + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v, random.randint(1, weight_range))) + + num_edges = int(edge_ratio * N) + if len(edges) < num_edges : + remaining_edges = list(set((u, v) for u in range(N) for v in range(u + 1, N)) - set((u, v) for u, v, w in edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + for u, v in remaining_edges : + edges.append((u, v, random.randint(1, weight_range))) + random.shuffle(edges) + + for u, v, w in edges : + assert 0 <= u < v < N + assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique" + + P = self.parameter["MOD"] = random.randint(2, self.MAX_MOD) + + + def find(parent, x): + # path compression + if parent[x] != x: + parent[x] = find(parent, parent[x]) + return parent[x] + + def union(parent, a, b): + # simple union + ra = find(parent, a) + rb = find(parent, b) + if ra != rb: + parent[rb] = ra + + def det_mod(mat, mod): + """ + Compute determinant of mat (n x n) modulo mod using + a gcd-based elimination that avoids division by non-invertible elements. + """ + n = len(mat) + f = 1 + tp = 1 + # ensure entries are in [0, mod) + for i in range(n): + for j in range(n): + mat[i][j] %= mod + + for i in range(n): + # eliminate below mat[i][i] + for j in range(i+1, n): + a = mat[i][i] + b = mat[j][i] + while b: + t = a // b + a, b = b, a - t*b + # row_i = row_i - t * row_j (from column i onward) + for k in range(i, n): + mat[i][k] = (mat[i][k] - t * mat[j][k]) % mod + # swap row_i and row_j (from column i onward) + for k in range(i, n): + mat[i][k], mat[j][k] = mat[j][k], mat[i][k] + f = -f + if mat[i][i] % mod == 0: + return 0 + tp = tp * (mat[i][i] % mod) % mod + + res = f * tp % mod + return res if res >= 0 else res + mod + + def count_mst(): + edges = self.parameter["edges"].copy() + M = len(edges) + + # sort by weight + edges.sort(key=lambda x: x[2]) + + # initialize DSU + parent = list(range(N)) + ans = 1 + i = 0 + + # process groups of equal-weight edges + while i < M: + w = edges[i][2] + j = i + while j < M and edges[j][2] == w: + j += 1 + group = edges[i:j] + + # build the multigraph on current DSU components + adj_count = {} # (u, v) -> number of parallel edges + nodes = set() + for u, v, _ in group: + ru = find(parent, u) + rv = find(parent, v) + if ru != rv: + nodes.add(ru) + nodes.add(rv) + adj_count[(ru, rv)] = adj_count.get((ru, rv), 0) + 1 + adj_count[(rv, ru)] = adj_count.get((rv, ru), 0) + 1 + + # find connected components in this subgraph + visited = set() + for u in nodes: + if u in visited: + continue + # BFS/DFS to collect one component + stack = [u] + comp = [] + visited.add(u) + while stack: + x = stack.pop() + comp.append(x) + # look at neighbors of x + for (a, b), cnt in adj_count.items(): + if a == x and b not in visited: + visited.add(b) + stack.append(b) + + t = len(comp) + if t > 1: + m = t - 1 + mat = [[0] * m for _ in range(m)] + for xi in range(m): + ni = comp[xi] + # degree of ni within comp + deg = 0 + for nj in comp: + deg += adj_count.get((ni, nj), 0) + deg %= P + mat[xi][xi] = deg + # off-diagonals + for yj in range(m): + if xi != yj: + nj = comp[yj] + mat[xi][yj] = (- adj_count.get((ni, nj), 0)) % P + + # multiply in the number of spanning trees of this component + ans = ans * det_mod(mat, P) % P + + # unite the DSU by all useful edges in this group + for u, v, _ in group: + ru = find(parent, u) + rv = find(parent, v) + if ru != rv: + union(parent, ru, rv) + + i = j + + # check if the graph is connected + roots = {find(parent, x) for x in range(N)} + if len(roots) != 1: + return 0 + else: + return ans + + self.parameter["reference_answer"] = count_mst() + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + MOD = self.parameter["MOD"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_steiner_tree/__init__.py b/server/Gym/environments/minimum_steiner_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..22a7185cec9a9336fffe4bebf240759c810f461f --- /dev/null +++ b/server/Gym/environments/minimum_steiner_tree/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumSteinerTree_Environment diff --git a/server/Gym/environments/minimum_steiner_tree/environment.py b/server/Gym/environments/minimum_steiner_tree/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..c099d035366dc9c1930f034c4d331dec9cf3333e --- /dev/null +++ b/server/Gym/environments/minimum_steiner_tree/environment.py @@ -0,0 +1,174 @@ +import random +import networkx +from collections import deque +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinimumSteinerTree_Environment(VerifiableEnvironment) : # Submitted to https://www.luogu.com.cn/problem/P6192 + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. + +The graph contains the following undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning an undirected edge **connecting vertex `u` to vertex `v` with weight `w`**: +{edges} + +Your task is to select a subset of edges `T = [(u_1, v_1, w_1), (u_2, v_2, w_2), ..., (u_k, v_k, w_k)]` such that: +- The selected edges form a **connected graph** that contains these {K} verticies: {to_be_connected} +- Your goal is to **minimize** the total weight of the selected edges: `w_1 + w_2 + ... + w_k`. + +**Output Format:** +Your final answer should be a single line containing the endpoints of the selected edges in order: `u_1 v_1 u_2 v_2 ... u_k v_k`, separated by **spaces**. Example: `0 1 1 2 2 3` (do **NOT** include the backticks or quotes); this means the it includes the edges `(0, 1, w_1)`, `(1, 2, w_2)`, and `(2, 3, w_3)`""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinimumSteinerTree_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = [] + + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v, random.randint(1, N))) + + num_edges = int(edge_density * N * (N - 1) / 2) + if len(edges) < num_edges : + remaining_edges = list(set((u, v) for u in range(N) for v in range(u + 1, N)) - set((u, v) for u, v, w in edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + for u, v in remaining_edges : + edges.append((u, v, random.randint(1, N))) + random.shuffle(edges) + + for u, v, w in edges : + assert 0 <= u < v < N + assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique" + + K = self.parameter["K"] = random.randint(3, min(20, N - 1)) + + to_be_connected = self.parameter["to_be_connected"] = random.sample(range(N), K) + + + adj = [[] for _ in range(N)] + for u, v, w in edges: + adj[u].append((v, w)) + adj[v].append((u, w)) + full_mask = (1 << K) - 1 + dp = [[None] * (full_mask + 1) for _ in range(N)] + for i in range(K): + dp[to_be_connected[i]][1 << i] = 0 + for s1 in range(1, full_mask + 1): + for i in range(N): + s2 = (s1 - 1) & s1 + while s2: + a = dp[i][s2] + b = dp[i][s1 ^ s2] + if a is not None and b is not None: + v = a + b + cur = dp[i][s1] + if cur is None or v < cur: + dp[i][s1] = v + s2 = (s2 - 1) & s1 + vis = [False] * N + q = deque() + for i in range(N): + if dp[i][s1] is not None: + q.append(i) + vis[i] = True + while q: + u = q.popleft() + vis[u] = False + du = dp[u][s1] + for v, w in adj[u]: + nd = du + w + cur = dp[v][s1] + if cur is None or nd < cur: + dp[v][s1] = nd + if not vis[v]: + q.append(v) + vis[v] = True + self.parameter["gold_answer"] = dp[to_be_connected[0]][full_mask] + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + K = self.parameter["K"], + to_be_connected = " ".join(map(str, self.parameter["to_be_connected"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + mst = processed_result + if len(mst) % 2 != 0 : + return self.rewards["wrong_format"] + mst = [(mst[i], mst[i + 1]) for i in range(0, len(mst), 2)] + + if not (set(range(self.parameter["N"])) >= (set(u for u, v in mst) | set(v for u, v in mst)) >= set(self.parameter["to_be_connected"])) : + return self.rewards["invalid_solution"] + + subgraph = networkx.Graph() + edge2weight = {(u, v) : w for u, v, w in self.parameter["edges"]} + answer_weight = 0 + for u, v in mst : + u, v = min(u, v), max(u, v) + if (u, v) not in edge2weight : + return self.rewards["invalid_solution"] + answer_weight += edge2weight[(u, v)] + subgraph.add_edge(u, v) + if not networkx.is_connected(subgraph) : + return self.rewards["invalid_solution"] + + assert self.parameter["gold_answer"] <= answer_weight, "answer_weight should be greater than or equal to gold_answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((self.parameter["gold_answer"] / answer_weight) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == answer_weight) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_sum_difference_submatrix/__init__.py b/server/Gym/environments/minimum_sum_difference_submatrix/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..80d24482535eb62dd7b39cad33e01686fa088071 --- /dev/null +++ b/server/Gym/environments/minimum_sum_difference_submatrix/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumSumDifferenceSubmatrix_Environment diff --git a/server/Gym/environments/minimum_sum_difference_submatrix/environment.py b/server/Gym/environments/minimum_sum_difference_submatrix/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9cf0f2c6afa10fa6e85fa1bd8b36d58c93d3e555 --- /dev/null +++ b/server/Gym/environments/minimum_sum_difference_submatrix/environment.py @@ -0,0 +1,167 @@ +import random +from typing import Optional, List +from itertools import combinations +from ...environment import VerifiableEnvironment + + +class MinimumSumDifferenceSubmatrix_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} matrix of integers (with row indices from `0` to `{N_minus_1}` and column indices from `0` to `{M_minus_1}`). Please select {R} rows and {C} columns, denoted as `r[1], ..., r[{R}]` and `c[1], ..., c[{C}]`, respectively, such that: +- 0 ≤ r[1] < ... < r[{R}] ≤ {N_minus_1} +- 0 ≤ c[1] < ... < c[{C}] ≤ {M_minus_1} + +The matrix is given as below (each line represents a row): +{matrix} + +From these, you can extract a new {R} × {C} submatrix, where the value at position `(i, j)` is taken from row `r[i]` and column `c[j]` of the original matrix. Try your best to **minimize the sum of absolute differences** between all pairs of **adjacent** (horizontally or vertically) elements in the new submatrix. Two elements are considered adjacent if their manhattan distance is 1 (i.e., they are either in the same row and consecutive columns, or in the same column and consecutive rows). + +**Output Format:** Output two lines, +- The first line contains the selected row indices: `r[1], ..., r[{R}]` +- The second line contains the selected column indices: `c[1], ..., c[{C}]` +All integers in one line should be separated by a single space and should be **0-indexed** (i.e., the first row/column is `0`, the second is `1`, etc.).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize MinimumSumDifferenceSubmatrix_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 3, "MAX_N_M should be greater than or equal to 3" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(3, MAX_N_M), random.randint(3, MAX_N_M) + R, C = self.parameter["R"], self.parameter["C"] = random.randint(2, N - 1), random.randint(2, M - 1) + matrix = self.parameter["matrix"] = [[random.randint(1, N * M) for _ in range(M)] for _ in range(N)] + + + # Compute an appropriate "infinite" value based on the input + max_val = max(max(row) for row in matrix) + # Maximum number of adjacent pairs in any R×C submatrix: + # vertical: (R-1)*C, horizontal: R*(C-1) + max_pairs = (R - 1) * C + R * (C - 1) + INF = max_val * max_pairs + 1 + + ans = INF + + # Enumerate all choices of R rows out of N + for rows in combinations(range(N), R): + # Precompute w[j][i]: the cost contribution when picking column j then column i + # (and w[i][i] is the vertical adjacencies within column i) + w = [[0] * M for _ in range(M)] + + for i in range(M): + # Vertical adjacencies in column i + for idx in range(1, R): + r0 = rows[idx - 1] + r1 = rows[idx] + w[i][i] += abs(matrix[r1][i] - matrix[r0][i]) + + # Cross-column differences between column j and column i + for j in range(i): + s = 0 + for r0 in rows: + s += abs(matrix[r0][i] - matrix[r0][j]) + w[j][i] = s + + # DP over columns: dp[i][k] = min cost to pick k columns ending at column i + dp = [[INF] * (C + 1) for _ in range(M)] + for i in range(M): + dp[i][1] = w[i][i] + + for k in range(2, C + 1): + for i in range(M): + best = INF + for j in range(i): + cost = dp[j][k - 1] + w[j][i] + w[i][i] + if cost < best: + best = cost + dp[i][k] = best + + # Update global answer + for i in range(M): + if dp[i][C] < ans: + ans = dp[i][C] + + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + N, M = self.parameter["N"], self.parameter["M"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + M = M, + M_minus_1 = M - 1, + R = self.parameter["R"], + C = self.parameter["C"], + matrix = "\n".join(" ".join(map(str, row)) for row in self.parameter["matrix"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(list(map(int, line.split()))) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != 2 : + return self.rewards["wrong_format"] + row_indices, col_indices = processed_result + if len(row_indices) != self.parameter["R"] or len(col_indices) != self.parameter["C"] : + return self.rewards["wrong_format"] + + if not all(0 <= row < self.parameter["N"] for row in row_indices) or not all(0 <= col < self.parameter["M"] for col in col_indices) : + return self.rewards["invalid_solution"] + if not all(row_indices[i] < row_indices[i + 1] for i in range(len(row_indices) - 1)) or not all(col_indices[i] < col_indices[i + 1] for i in range(len(col_indices) - 1)) : + return self.rewards["invalid_solution"] + + new_matrix = [[self.parameter["matrix"][row][col] for col in col_indices] for row in row_indices] + sum_diff = 0 + for i in range(self.parameter["R"]): + for j in range(self.parameter["C"]): + if i < self.parameter["R"] - 1: + sum_diff += abs(new_matrix[i + 1][j] - new_matrix[i][j]) + if j < self.parameter["C"] - 1: + sum_diff += abs(new_matrix[i][j + 1] - new_matrix[i][j]) + gold, answer = self.parameter["gold_answer"], sum_diff + assert gold <= answer + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "gold should be 0 if answer is 0" + return self.rewards["rewarding_weight"] + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_tree_weighted_dominating_ancestor/__init__.py b/server/Gym/environments/minimum_tree_weighted_dominating_ancestor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..152eb38358b51f837e5f6bc0e7797de323280b7b --- /dev/null +++ b/server/Gym/environments/minimum_tree_weighted_dominating_ancestor/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumTreeWeightedDominatingAncestor_Environment diff --git a/server/Gym/environments/minimum_tree_weighted_dominating_ancestor/environment.py b/server/Gym/environments/minimum_tree_weighted_dominating_ancestor/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..1dd4069315567299a776a4c65f03293e258a1186 --- /dev/null +++ b/server/Gym/environments/minimum_tree_weighted_dominating_ancestor/environment.py @@ -0,0 +1,196 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinimumTreeWeightedDominatingAncestor_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3354 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} + 1 = {N_plus_1} vertices, labeled from `0` to `{N}`. + +`0` is the root of the tree. Each non-root vertex has a parent, and the edge connecting it with its parent has a weight. The edges are given as follows: +{edges} + +Each non-root vertex also has a cost, given as a list `C` of length {N}, where `C[i]` is the cost of vertex `i`: +{C} + +The root (vertex `0`) is already selected. Your task is to select exactly {K} **additional** non-root vertices. The total cost of the selection is defined as follows: +- For every vertex `u`, let `D[u]` be the distance from `u` to its **nearest selected ancestor**, where a selected ancestor includes `0` or the vertex itself (if selected). The **distance** between two vertices is the sum of weights along the unique path between them. +- The cost contributed by vertex `u` is `C[u] × D[u]` for all non-root verticies `u`. +- Try your best to **minimize** the total cost. + +**Output Format:** Output a single line containing {K} integers — the selected vertices (excluding 0), separated by spaces.""" + + def __init__(self, + weight_range : int = 10, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinimumTreeWeightedDominatingAncestor_Environment instance. + """ + super().__init__(**kwargs) + + self.weight_range = weight_range + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 1" + + parents = self.parameter["parents"] = [None] * (N + 1) + permutations = list(range(1, N + 1)) + random.shuffle(permutations) + permutations = [0] + permutations + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + parent = random.choice(permutations[: index]) + parents[vertex] = (parent, random.randint(1, self.weight_range)) + + C = self.parameter["C"] = [0] + [random.randint(0, self.weight_range) for vertex in range(1, N + 1)] + + K = self.parameter["K"] = random.randint(1, N - 1) + + + graph = [[] for _ in range(N + 1)] + # depth[i] = distance from root (Bytetown, node 0) to i + depth = [0] * (N + 1) + + for i in range(1, N + 1): + parent, dist = parents[i] + graph[parent].append((i, dist)) + + # f[p][j][l]: minimum cost in subtree of p, for wood from all nodes + # that are descendants of p but no closer ancestor than j, + # using l new sawmills in that subtree + # g[p][j][l]: same but requiring that none of those l sawmills lies + # on the path from p up to j (i.e., the first mill is strictly below p) + f = [[[0] * (K + 1) for _ in range(N + 1)] for _ in range(N + 1)] + g = [[[0] * (K + 1) for _ in range(N + 1)] for _ in range(N + 1)] + + # st is the stack of ancestors of the current node in DFS + st = [] + + def dfs(p): + st.append(p) + # Process children + for to, dist in graph[p]: + depth[to] = depth[p] + dist + dfs(to) + # Merge DP from child 'to' into p + for j in st: + # We go from high to low l so we can use previous-state values safely + for l in range(K, -1, -1): + # First, take the case x = 0 (no new mills in 'to' subtree) + f[p][j][l] += f[to][j][0] + g[p][j][l] += f[to][p][0] + best_fpjl = f[p][j][l] + best_gpjl = g[p][j][l] + # Try allocating x new mills to subtree 'to' + for x in range(1, l + 1): + # put x mills in 'to' subtree for wood going up to j + cost_f = f[p][j][l - x] + f[to][j][x] + if cost_f < best_fpjl: + best_fpjl = cost_f + # put x mills in 'to' subtree for wood going up to p + cost_g = g[p][j][l - x] + f[to][p][x] + if cost_g < best_gpjl: + best_gpjl = cost_g + f[p][j][l] = best_fpjl + g[p][j][l] = best_gpjl + + # After merging all children, account for p's own wood + for j in st: + dist_up = depth[p] - depth[j] + # if no new mills in subtree of p, we pay full transport cost + f[p][j][0] += C[p] * dist_up + # if we have at least 1 mill, we can choose either to treat at p + # or let it be handled by one of the mills below p + for l in range(1, K + 1): + f[p][j][l] = min( + f[p][j][l] + C[p] * dist_up, + g[p][j][l - 1] + ) + + st.pop() + + # Run DFS from root (Bytetown = 0) + dfs(0) + # Answer: minimum cost when we place exactly K new mills in the whole tree + self.parameter["gold_answer"] = f[0][0][K] + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_plus_1 = N + 1, + edges = "\n".join("`{}`'s parent is `{}` with weight `{}`".format(i + 1, parent, weight) for i, (parent, weight) in enumerate(self.parameter["parents"][1 :])), + C = " ".join("C[{}]={}".format(i, Ci) for i, Ci in enumerate(self.parameter["C"]) if i > 0), + K = self.parameter["K"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + selected_vertices = processed_result + + if len(selected_vertices) != self.parameter["K"] : + return self.rewards["invalid_solution"] + + selected = [True] + [False] * self.parameter["N"] + for vertex in selected_vertices : + if not (1 <= vertex <= self.parameter["N"]) : + return self.rewards["invalid_solution"] + selected[vertex] = True + + graph = [[] for _ in range(self.parameter["N"] + 1)] + for i in range(1, self.parameter["N"] + 1): + parent, dist = self.parameter["parents"][i] + graph[parent].append((i, dist)) + + answer = 0 + def DFS(vertex, dist) : + nonlocal answer + if selected[vertex] : + dist = 0 + answer += self.parameter["C"][vertex] * dist + for neighbor, weight in graph[vertex] : + DFS(neighbor, dist + weight) + DFS(0, 0) + gold = self.parameter["gold_answer"] + assert gold <= answer, "gold should be less than or equal to answer, but got gold={} and answer={}".format(gold, answer) + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "If answer is 0, gold should also be 0" + return self.rewards["rewarding_weight"] * 1.0 + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_unconflicted_grid_kmax/__init__.py b/server/Gym/environments/minimum_unconflicted_grid_kmax/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cd1da1e7e1e9603565ba9efdc9ba4b5d831222d6 --- /dev/null +++ b/server/Gym/environments/minimum_unconflicted_grid_kmax/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumUnconflictedGridKMax_Environment diff --git a/server/Gym/environments/minimum_unconflicted_grid_kmax/environment.py b/server/Gym/environments/minimum_unconflicted_grid_kmax/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..0bda89c4d0a4a820c1c77399a3d9fe46b7ba7db5 --- /dev/null +++ b/server/Gym/environments/minimum_unconflicted_grid_kmax/environment.py @@ -0,0 +1,124 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinimumUnconflictedGridKMax_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4251 + prompt_template = \ +r"""You are given an {N} × {M} grid of non-negative integers `A[i][j]` (1-indexed). The matrix A is: +{grid} + +Choose {N} **distinct** column indices `p[1], p[2], ..., p[{N}]` in the range `[1, {M}]`. For each row `i`, take the value `A[i][p[i]]`; among these {N} values, consider the **{K}-th largest** value; your goal is to **minimize** this {K}-th largest value. Output `p[1] p[2] ... p[{N}]` on a single line, separated by spaces.""" + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinimumUnconflictedGridKMax_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 3, "MAX_N_M should be greater than or equal to 3" + + N = self.parameter["N"] = random.randint(3, MAX_N_M) + M = self.parameter["M"] = random.randint(N, MAX_N_M) + self.parameter["K"] = random.randint(1, N) + self.parameter["A"] = [[random.randint(1, N * M) for j in range(M)] for i in range(N)] + + + K = N - self.parameter["K"] + 1 # transform as in the original code + + A = [[0] * (M + 1) for _ in range(N + 1)] + LIM = -1 + for i in range(1, N + 1): + for j in range(1, M + 1): + A[i][j] = self.parameter["A"][i - 1][j - 1] + if A[i][j] > LIM: + LIM = A[i][j] + + def check(x): + vis = [0] * (M + 1) + lin = [0] * (M + 1) + tot = 1 + ans = 0 + + def dfs(u, lim): + for j in range(1, M + 1): + if A[u][j] <= lim and vis[j] != tot: + vis[j] = tot + if lin[j] == 0 or dfs(lin[j], lim): + lin[j] = u + return True + return False + + for i in range(1, N + 1): + if dfs(i, x): + ans += 1 + tot += 1 + return ans + + l, r = 1, LIM + while l < r: + mid = (l + r) // 2 + if check(mid) >= K: + r = mid + else: + l = mid + 1 + + self.parameter["gold_answer"] = l + assert self.parameter["gold_answer"] > 0, "gold_answer should be positive" + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + K = self.parameter["K"], + grid = "\n".join(", ".join("A[{}][{}]={}".format(i, j, Aij) for j, Aij in enumerate(row, start = 1)) for i, row in enumerate(self.parameter["A"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[int]] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if len(processed_result) != len(set(processed_result)) : + return self.rewards["invalid_solution"] + if not all(1 <= x <= self.parameter["M"] for x in processed_result) : + return self.rewards["invalid_solution"] + + answer, gold = sorted([self.parameter["A"][i][x - 1] for i, x in enumerate(processed_result)], reverse = True)[self.parameter["K"] - 1], self.parameter["gold_answer"] + assert 0 < gold <= answer, "gold should be less than or equal to answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_vertex_cover/__init__.py b/server/Gym/environments/minimum_vertex_cover/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b9caaddb197bfe2d5997a888c2c96c9723651521 --- /dev/null +++ b/server/Gym/environments/minimum_vertex_cover/__init__.py @@ -0,0 +1 @@ +from .environment import Minimum_VertexCover_Environment diff --git a/server/Gym/environments/minimum_vertex_cover/environment.py b/server/Gym/environments/minimum_vertex_cover/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..24378279ba6cf87d87d1f20402e67dd5d2f1d4fb --- /dev/null +++ b/server/Gym/environments/minimum_vertex_cover/environment.py @@ -0,0 +1,138 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Minimum_VertexCover_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices labeled from 0 to {N_minus_1}. The graph contains the following undirected edges: +{edges} + +Each vertex has a cost, given as a list `C` of length {N}, where `C[i]` is the cost of vertex i: +{C} + +Your task is to select a set of distinct vertices x_1, x_2, ..., x_k (you determine k), such that every edge in the graph has at least one endpoint in the selected set; that is, for every edge (u, v), at least one of u or v must be included. +Try your best to minimize the total cost: C[x_1] + C[x_2] + ... + C[x_k]. + +**Output Format:** Your final answer should be a single line containing the selected vertices in any order, separated by spaces. +Example: `0 1 {N_minus_1}` (do **NOT** include quotes or backticks).""" + + def __init__(self, + cost_range : int = 10, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Minimum_DominatingSet_Environment instance. + """ + super().__init__(**kwargs) + + self.cost_range = cost_range + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 1" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 < edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + assert int(edge_density * N * (N - 1) / 2) > 0, "edge_density should be large enough to generate at least one edge" + + edges = self.parameter["edges"] = random.sample([(u, v) for u in range(N) for v in range(u + 1, N)], int(edge_density * N * (N - 1) / 2)) + random.shuffle(edges) + assert len(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + C = self.parameter["C"] = [random.randint(1, self.cost_range) for vertex in range(N)] + + + adjacent = [0] * N + for u, v in edges : + adjacent[u] |= 1 << v + adjacent[v] |= 1 << u + + self.parameter["reference_answer"] = list(range(N)) + self.parameter["gold_answer"] = sum(C) + + selected = [] + def DFS(u : int, not_selected : int, requiring : int, sumC : int) -> None : + assert (not_selected & requiring) == 0 + if sumC >= self.parameter["gold_answer"] : + return + if u == N : + assert sumC < self.parameter["gold_answer"] + self.parameter["reference_answer"], self.parameter["gold_answer"] = selected.copy(), sumC + return + + if not (requiring & (1 << u)) : + if not (not_selected & adjacent[u]) : + DFS(u + 1, not_selected | (1 << u), requiring | adjacent[u], sumC) + + selected.append(u) + DFS(u + 1, not_selected, requiring, sumC + C[u]) + selected.pop() + DFS(0, 0, 0, 0) + + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + assert self.parameter["gold_answer"] > 0 + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + C = "\n".join("C[{}]={}".format(i, Ci) for i, Ci in enumerate(self.parameter["C"])), + ) + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + selected_vertices = processed_result + + if not all(0 <= vertex < self.parameter["N"] for vertex in selected_vertices) : + return self.rewards["invalid_solution"] + if len(selected_vertices) != len(set(selected_vertices)) : + return self.rewards["invalid_solution"] + selected_vertices = set(selected_vertices) + + for u, v in self.parameter["edges"] : + if (u not in selected_vertices) and (v not in selected_vertices) : + return self.rewards["invalid_solution"] + + answer = sum(self.parameter["C"][u] for u in selected_vertices) + gold = self.parameter["gold_answer"] + assert gold <= answer, "gold should be less than or equal to answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/minimum_weighted_spanning_tree/__init__.py b/server/Gym/environments/minimum_weighted_spanning_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c83df2906a566f64375c80ea1c6bf894bea32128 --- /dev/null +++ b/server/Gym/environments/minimum_weighted_spanning_tree/__init__.py @@ -0,0 +1 @@ +from .environment import MinimumWeightedSpanningTree_Environment diff --git a/server/Gym/environments/minimum_weighted_spanning_tree/environment.py b/server/Gym/environments/minimum_weighted_spanning_tree/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9ab214a13e362953ec59186c4e9ca2cd80660c2d --- /dev/null +++ b/server/Gym/environments/minimum_weighted_spanning_tree/environment.py @@ -0,0 +1,237 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MinimumWeightedSpanningTree_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. The graph contains the following undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning an undirected edge **connecting vertex `u` to vertex `v` with weight `w`**: +{edges} + +Your task is to select a subset of edges `T = [(u_1, v_1, w_1), (u_2, v_2, w_2), ..., (u_k, v_k, w_k)]` such that: +- `k = {N} - 1 = {N_minus_1}` (i.e., you select exactly {N_minus_1} edges). +- The selected edges form a **spanning tree** — that is, they connect all {N} vertices without forming any cycles. +- You choose one vertex as the **root**. Then, every non-root vertex has exactly one incoming edge in the tree. + +The cost of your scheme (the edge subset and chosen root) is defined as follows: +- For each vertex `t ≠ root`, suppose `(s, t, w)` is the single incoming edge on the path from the root to `t`, and the number of edges from the root to `t` is `K`. +- The cost of this edge is `w × K`. +- The total cost is the sum of such edge costs for all `t ≠ root`. + +Your goal is to **minimize the total cost** as defined above. + +**Output Format:** +Output a single line containing the root and the endpoints of the selected edges in order: `root u_1 v_1 u_2 v_2 ... u_k v_k`, separated by **spaces** . Example: `0 0 1 1 2 1 3` (do **NOT** include the backticks or quotes); this means the root is `0`, and the selected edges are `(0, 1)`, `(1, 2)`, and `(1, 3)` (assuming 4 vertices in total).""" + + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MinimumWeightedSpanningTree_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = [] + + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v, random.randint(0, N))) + + num_edges = int(edge_density * N * (N - 1) / 2) + if len(edges) < num_edges : + remaining_edges = list(set((u, v) for u in range(N) for v in range(u + 1, N)) - set((u, v) for u, v, w in edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + for u, v in remaining_edges : + edges.append((u, v, random.randint(0, N))) + random.shuffle(edges) + + for u, v, w in edges : + assert 0 <= u < v < N + assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique" + + + total_length = sum(w for u, v, w in edges) + + # A safe INF larger than any possible total cost + INF = total_length * N + 1 + + # Build adjacency matrix A + A = [[INF] * N for _ in range(N)] + for x, y, v in edges: + if v < A[x][y]: + A[x][y] = A[y][x] = v + + S = (1 << N) - 1 + + # Precompute low‐bit index + lg = [0] * (S + 1) + for i in range(N): + lg[1 << i] = i + + # f[i][j] = min cost to attach subset j (disjoint from i) to i by exactly |j| edges + f = [dict() for _ in range(S + 1)] + + # *** FIX: make f[0][j] = 0 for all j, just like the C++ static init *** + f[0] = {j: 0 for j in range(S + 1)} + + # Base case: attaching an empty set costs 0 + for i in range(1, S + 1): + f[i][0] = 0 + + ne = [0] * (S + 1) + # Build f table + for i in range(1, S + 1): + s = S ^ i + prev = 0 + j = s + # build reverse linked list of submasks of s + while j: + ne[j] = prev + prev = j + j = (j - 1) & s + + # traverse that linked list + j = prev + while j: + x = lg[j & -j] + # find cheapest edge from x into i + best = INF + tmp = i + while tmp: + yb = tmp & -tmp + y = lg[yb] + if A[x][y] < best: + best = A[x][y] + tmp ^= yb + + without_low = j ^ (j & -j) + f[i][j] = f[i][without_low] + best + j = ne[j] + + # g[l][i] = min cost to excavate exactly the set i using l roads + g = [[INF] * (S + 1) for _ in range(N + 1)] + # with 0 roads, only singletons are free + for i in range(N): + g[0][1 << i] = 0 + + # build g + for l in range(1, N + 1): + for i in range(1, S + 1): + j = i + while j: + prev_set = i ^ j + cost = g[l - 1][prev_set] + f[prev_set][j] * l + if cost < g[l][i]: + g[l][i] = cost + j = (j - 1) & i + + # answer is min over all l + ans = min(g[l][S] for l in range(N + 1)) + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if not answer_array : + return None + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + root = processed_result[0] + if not (0 <= root < self.parameter["N"]) : + return self.rewards["invalid_solution"] + + mst = processed_result[1 :] + if len(mst) % 2 != 0 : + return self.rewards["wrong_format"] + mst = [(mst[i], mst[i + 1]) for i in range(0, len(mst), 2)] + + if len(mst) != self.parameter["N"] - 1 : + return self.rewards["invalid_solution"] + if not ((set(u for u, v in mst) | set(v for u, v in mst)) == set(range(self.parameter["N"]))) : + return self.rewards["invalid_solution"] + + subgraph = networkx.Graph() + edge2weight = {(u, v) : w for u, v, w in self.parameter["edges"]} + for u, v in mst : + u, v = min(u, v), max(u, v) + if (u, v) not in edge2weight : + return self.rewards["invalid_solution"] + subgraph.add_edge(u, v) + if not networkx.is_connected(subgraph) : + return self.rewards["invalid_solution"] + assert networkx.is_tree(subgraph), "The answer should be a tree as it has N - 1 edges and is connected" + + answer_weight = 0 + adjacent_list = [[] for _ in range(self.parameter["N"])] + for u, v in mst : + adjacent_list[u].append(v) + adjacent_list[v].append(u) + def DFS(vertex : int, parent : int, depth : int) -> None : + nonlocal answer_weight + for neighbor in adjacent_list[vertex] : + if neighbor == parent : + continue + edge_weight = edge2weight[(min(vertex, neighbor), max(vertex, neighbor))] + answer_weight += edge_weight * (depth + 1) + DFS(neighbor, vertex, depth + 1) + DFS(root, -1, 0) + assert self.parameter["gold_answer"] <= answer_weight, "answer_weight should be greater than or equal to gold_answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer_weight == 0 : + assert self.parameter["gold_answer"] == 0, "If answer_weight is 0, gold_answer should also be 0" + return self.rewards["rewarding_weight"] * 1.0 + return self.rewards["rewarding_weight"] * ((self.parameter["gold_answer"] / answer_weight) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == answer_weight) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/mitter_transportation/__init__.py b/server/Gym/environments/mitter_transportation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..93264b797694e0fb8f402e8b372006cd8c60e0b1 --- /dev/null +++ b/server/Gym/environments/mitter_transportation/__init__.py @@ -0,0 +1 @@ +from .environment import MitterTransportation_Environment diff --git a/server/Gym/environments/mitter_transportation/environment.py b/server/Gym/environments/mitter_transportation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..1c014271b2cf387ab7f4c774eea584462c02b08e --- /dev/null +++ b/server/Gym/environments/mitter_transportation/environment.py @@ -0,0 +1,132 @@ +import math +import random +from typing import Optional +from collections import deque, Counter +from ...environment import VerifiableEnvironment + + +class MitterTransportation_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3237 + prompt_template = \ +r"""You are given a tree with {N} vertices labeled from `0` to `{N_minus_1}`, where vertex `0` is the root. For each vertex `i` (i > 0), its parent is `p[i]`. The parent array is: {parent} +Each vertex `i` initially has a value `A[i]`. The array A is: {A} + +You are allowed to modify the values of any vertices. Your goal is to ensure that: +- For every vertex `i` with children, all of its children must have the **same** value; the value of `A[i]` must be equal to the **sum** of the values of its children. +- Every vertex's value should be a **positive real number**. + +Please compute the **minimum number of vertices** whose `A[i]` value you must modify to satisfy these rules. Output a single integer — the minimum number of modified vertices.""" + + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MitterTransportation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + parents = self.parameter["parents"] = [None] + adj = [[] for s in range(N)] + for i in range(1, N) : + parent = random.randint(0, i - 1) + parents.append(parent) + adj[parent].append(i) + + + # BFS to root the tree in vertex 0 (former city 1) + parent = [-1] * N + child_cnt = [0] * N + order = [] # parents appear before children + + q = deque([0]) + parent[0] = 0 + while q: + v = q.popleft() + order.append(v) + for nxt in adj[v]: + if nxt == parent[v]: + assert False, "Tree should not have cycles" + continue + parent[nxt] = v + child_cnt[v] += 1 + q.append(nxt) + + # step 2 – compute the multiplicative factors (triple-hashed) + k1 = [0] * N + k1[0] = 1 # factor(root) = 1 + + for v in order[1:]: # skip the root itself + p = parent[v] + k1[v] = child_cnt[p] * k1[p] + + A = self.parameter["A"] = [None] * self.parameter["N"] + + no_change_vertices = random.sample(range(N), random.randint(1, N - 1)) + lcm = 1 + for i in no_change_vertices : + assert k1[i] > 0, "k1[i] should be positive" + lcm = math.lcm(lcm, k1[i]) + maxA = 1 + for i in no_change_vertices : + A[i] = lcm // k1[i] + maxA = max(maxA, A[i]) + for i in range(N) : + if A[i] is None : + A[i] = random.randint(1, maxA) + + # step 3 – count identical triplets + counter = Counter( + k1[i] * A[i] + for i in range(N) + ) + + # step 4 – result + max_group = max(counter.values()) # largest unchanged set + assert max_group >= len(no_change_vertices), "max_group should be at least the size of no_change_vertices" + self.parameter["reference_answer"] = N - max_group + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + parent = " ".join("p[{}]={}".format(i, self.parameter["parents"][i]) for i in range(1, N)), + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/mixed_graph_eulerian_circuit/__init__.py b/server/Gym/environments/mixed_graph_eulerian_circuit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1bbd6a02daa4e559a3c021ca1c1cf322e9367e1a --- /dev/null +++ b/server/Gym/environments/mixed_graph_eulerian_circuit/__init__.py @@ -0,0 +1 @@ +from .environment import MixedGraphEulerianCircuit_Environment diff --git a/server/Gym/environments/mixed_graph_eulerian_circuit/environment.py b/server/Gym/environments/mixed_graph_eulerian_circuit/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..bf352080ca51ed177d9e4dc472e79c1abe95e7f3 --- /dev/null +++ b/server/Gym/environments/mixed_graph_eulerian_circuit/environment.py @@ -0,0 +1,149 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MixedGraphEulerianCircuit_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a **graph** with {N} vertices labeled from 0 to {N_minus_1}. + +The graph contains the following **undirected** edges: +{undirected_edges} + +It also contains the following **directed** edges (each `` represents a directed edge from vertex `u` to vertex `v`): +{directed_edges} + +It is guaranteed that if all directed edges are treated as undirected, the resulting graph is connected and has no repeated edges, and every vertex has an even degree. + +Please find an **Eulerian circuit** in this graph — a closed path that starts and ends at the same vertex and **visits each edge exactly once**. +Output a single line containing the sequence of vertex labels visited in order, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + while True : + degrees = [0] * N + edges = [] + for v in range(1, N - 1) : + neighbors = random.sample(range(v), random.randint(0, v)) + for u in neighbors : + assert u < v, "Undirected edges should be added in increasing order" + edges.append((u, v)) + degrees[u] += 1 + degrees[v] += 1 + for u in range(N - 1) : + if degrees[u] % 2 == 1 : + v = N - 1 + edges.append((u, v)) + degrees[u] += 1 + degrees[v] += 1 + assert all(degree % 2 == 0 for degree in degrees), "All vertices should have even degree in undirected edges" + + random.shuffle(edges) + assert len(edges) == len(set(edges)), "There should be no repeated undirected edges" + for u, v in edges : + assert 0 <= u < v < N, "Undirected edges should be within the range of vertex labels" + + # Check if the undirected graph is connected + undirected_graph = networkx.Graph() + undirected_graph.add_nodes_from(range(N)) + undirected_graph.add_edges_from(edges) + if networkx.is_connected(undirected_graph) : + assert networkx.is_eulerian(undirected_graph), "The undirected graph should be Eulerian" + break + + + eulerian_circuit = list(networkx.eulerian_circuit(undirected_graph)) + assert len(eulerian_circuit) == len(edges), "The Eulerian circuit should visit each edge exactly once" + directed_flags = [False] * len(eulerian_circuit) + for flagged in random.sample(range(len(eulerian_circuit)), random.randint(1, len(eulerian_circuit) - 1)) : + directed_flags[flagged] = True + + undirected_edges, directed_edges = self.parameter["undirected_edges"], self.parameter["directed_edges"] = [], [] + self.parameter["reference_answer"] = [] + for (u, v), directed_flag in zip(eulerian_circuit, directed_flags) : + self.parameter["reference_answer"].append(u) + if directed_flag : + directed_edges.append((u, v)) + else : + undirected_edges.append((min(u, v), max(u, v))) + self.parameter["reference_answer"].append(eulerian_circuit[-1][1]) + assert self.parameter["reference_answer"][0] == self.parameter["reference_answer"][-1], "The Eulerian circuit should start and end at the same vertex" + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + assert len(undirected_edges) > 0 and len(directed_edges) > 0, "There should be at least one undirected edge and one directed edge" + random.shuffle(undirected_edges) + random.shuffle(directed_edges) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + undirected_edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["undirected_edges"]), + directed_edges = "\n".join("<{}, {}>".format(u, v) for u, v in self.parameter["directed_edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) == 0 : + return self.rewards["wrong_format"] + + if not all(0 <= u < self.parameter["N"] for u in processed_result) : + return self.rewards["invalid_solution"] + undirected_edges, directed_edges = {(u, v) : 0 for u, v in self.parameter["undirected_edges"]}, {(u, v) : 0 for u, v in self.parameter["directed_edges"]} + if processed_result[0] != processed_result[-1] : + return self.rewards["invalid_solution"] + for u, v in zip(processed_result, processed_result[1 :]) : + directed, undirected = (u, v) in directed_edges, (min(u, v), max(u, v)) in undirected_edges + assert int(directed) + int(undirected) <= 1 + if directed : + directed_edges[(u, v)] += 1 + elif undirected : + undirected_edges[(min(u, v), max(u, v))] += 1 + else : + return self.rewards["invalid_solution"] + + satisfied = sum(count == 1 for count in directed_edges.values()) + sum(count == 1 for count in undirected_edges.values()) + assert satisfied <= len(self.parameter["undirected_edges"]) + len(self.parameter["directed_edges"]), "satisfied should be less than or equal to the total number of edges" + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / (len(self.parameter["undirected_edges"]) + len(self.parameter["directed_edges"]))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == (len(self.parameter["undirected_edges"]) + len(self.parameter["directed_edges"]))) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/money_charging_game/__init__.py b/server/Gym/environments/money_charging_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..45d3ab756ceebc6ce0a5d062f86348395645ccd0 --- /dev/null +++ b/server/Gym/environments/money_charging_game/__init__.py @@ -0,0 +1 @@ +from .environment import MoneyChargingGame_Environment diff --git a/server/Gym/environments/money_charging_game/environment.py b/server/Gym/environments/money_charging_game/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..478e4addd20418e8bb4ca9483f14a678b4330141 --- /dev/null +++ b/server/Gym/environments/money_charging_game/environment.py @@ -0,0 +1,162 @@ +import random +import networkx +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MoneyChargingGame_Environment(VerifiableEnvironment) : # https://www.luogu.com.cn/problem/P5405 + prompt_template = \ +r"""There are {N} nodes, each associated with values A[i][1], A[i][2], and A[i][3]. For each node `i`, define: P[i][j] = A[i][j] / (A[i][1] + A[i][2] + A[i][3]) for j = 1, 2, 3. The values A are given as follows: +{A} + +We define the following random process: +1. For each node `i`, randomly assign W[i] = j with probability P[i][j] for j = 1, 2, 3. +2. Starting from an empty set, repeatedly select a node `i` with probability proportional to W[i], and add it to the set (duplicates are allowed). Continue until all nodes are in the set. +3. Let T[i] denote the first time node `i` is added to the set. + +You are also given a set of constraints (each of the form T[u] < T[v]) that correspond to the edges of an undirected tree: +{T_inequalities} +Please compute the total probability that all the above T[u] < T[v] conditions hold during the random process. Output the result modulo {MOD}.""" + MOD = 998244353 + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MoneyChargingGame_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + A, B, C = self.parameter["A"], self.parameter["B"], self.parameter["C"] = [random.randint(1, N) for u in range(N)], [random.randint(1, N) for u in range(N)], [random.randint(1, N) for u in range(N)] + + T_inequalities = self.parameter["T_inequalities"] = [] + permutation = list(range(N)) + swap_probability = random.random() + random.shuffle(permutation) + for i in range(1, N) : + u = permutation[random.randint(0, i - 1)] + v = permutation[i] + if random.random() < swap_probability : + u, v = v, u + T_inequalities.append((u, v)) + random.shuffle(T_inequalities) + + assert len(T_inequalities) == N - 1, "T_inequalities should have exactly N-1 elements" + assert len(T_inequalities) == len(set(T_inequalities)), "T_inequalities should not have duplicates" + for u, v in T_inequalities : + assert 0 <= u < N and 0 <= v < N, "T_inequalities should contain valid indices" + assert u != v, "T_inequalities should not contain self-loops" + tree = networkx.Graph() + tree.add_edges_from((T_inequalities)) + assert networkx.is_tree(tree) + + + S = [] + for a1, a2, a3 in zip(A, B, C): + total = a1 + a2 + a3 + S.append(pow(total, self.MOD - 2, self.MOD)) + + # 2) precompute inverses of 1..3N + invs = [0] * (3 * N + 1) + for k in range(1, 3 * N + 1): + invs[k] = pow(k, self.MOD - 2, self.MOD) + + # 3) build the tree (0-indexed) with flags + G = [[] for _ in range(N)] + for u, v in T_inequalities : + G[v].append((u, 1)) + G[u].append((v, 0)) + + # 4) DP arrays + f = [None] * N + size = [0] * N + + def dfs(x, parent): + size[x] = 1 + # fx[k] will hold the *unnormalized* convolution numerator + fx = [0] * (3 * size[x] + 1) + fx[1] = A[x] * S[x] % self.MOD + fx[2] = B[x] * S[x] % self.MOD * 2 % self.MOD + fx[3] = C[x] * S[x] % self.MOD * 3 % self.MOD + + # merge in each child + for (v, t) in G[x]: + if v == parent: + continue + dfs(v, x) + fy = f[v] + + new_size = size[x] + size[v] + tmp = [0] * (3 * new_size + 1) + + # convolution with the “subtract-and-redistribute” if t==1 + for i in range(1, size[x] * 3 + 1): + if fx[i] == 0: + continue + for j in range(1, size[v] * 3 + 1): + res = fx[i] * fy[j] % self.MOD + if t: + tmp[i + j] = (tmp[i + j] - res) % self.MOD + tmp[i] = (tmp[i] + res) % self.MOD + else: + tmp[i + j] = (tmp[i + j] + res) % self.MOD + + size[x] = new_size + fx = tmp + + # 5) **one** division pass, _after_ all children are merged + for k in range(1, size[x] * 3 + 1): + fx[k] = fx[k] * invs[k] % self.MOD + + f[x] = fx + + # 6) run and collect answer + dfs(0, -1) + self.parameter["reference_answer"] = sum(f[0][1 : 3 * size[0] + 1]) % self.MOD + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = "\n".join("A[{}][1, 2, 3] = [{}, {}, {}]".format(i, a, b, c) for i, (a, b, c) in enumerate(zip(self.parameter["A"], self.parameter["B"], self.parameter["C"]))), + T_inequalities = "\n".join("T[{}] < T[{}]".format(u, v) for u, v in self.parameter["T_inequalities"]), + MOD = self.MOD, + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.MOD) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/monochrome_block_counting/__init__.py b/server/Gym/environments/monochrome_block_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..555b1a123529702a67b974abb6b89d9ba63fd995 --- /dev/null +++ b/server/Gym/environments/monochrome_block_counting/__init__.py @@ -0,0 +1 @@ +from .environment import MonochromeBlockCounting_Environment diff --git a/server/Gym/environments/monochrome_block_counting/environment.py b/server/Gym/environments/monochrome_block_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..5b834aa81e724d0e4161639d2709c09d20341fd3 --- /dev/null +++ b/server/Gym/environments/monochrome_block_counting/environment.py @@ -0,0 +1,84 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MonochromeBlockCounting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are building a **tower of blocks** with the following rules: +- The i-th layer (from top to bottom) must contain exactly i blocks (i is from 1 to N if the tower has N layers). +- All blocks in the same layer must be of the **same color**: either black or white. +- You may use **at most {A} black blocks** and **at most {B} white blocks** in total. +- You should build a tower with the **maximum possible number of layers (N)** under these constraints. + +Please compute the total number of distinct ways to build such a tower with the **maximum number of layers**. + +**Output Format:** Your final answer should be a single integer — the total number of valid tower configurations that achieve the maximum number of layers.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the MonochromeBlockCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_A_B" in self.parameter, "MAX_A_B is required in parameter" + MAX_A_B = self.parameter["MAX_A_B"] + assert MAX_A_B >= 1, "A and B should be greater than or equal to 1" + + A = self.parameter["A"] = random.randint(1, MAX_A_B) + B = self.parameter["B"] = random.randint(1, MAX_A_B) + + + T = 0 + while ((T + 1) * (T + 2) // 2 <= A + B) : + T += 1 + + F = [0] * (A + 1) + F[0] = 1 + for i in range(1, T + 1) : + for j in range(A, i - 1, -1) : + F[j] += F[j - i] + + self.parameter["reference_answer"] = sum(F[i] for i in range(max(T * (T + 1) // 2 - B, 0), A + 1)) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(A = self.parameter["A"], B = self.parameter["B"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/monotonic_stack/__init__.py b/server/Gym/environments/monotonic_stack/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..55f9375ffda63c59ad8df8798a3823a8f16c1a23 --- /dev/null +++ b/server/Gym/environments/monotonic_stack/__init__.py @@ -0,0 +1 @@ +from .environment import MonotonicStack_Environment diff --git a/server/Gym/environments/monotonic_stack/environment.py b/server/Gym/environments/monotonic_stack/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8d0fa58940a9579248e13d80855e7bdf5e85ddc2 --- /dev/null +++ b/server/Gym/environments/monotonic_stack/environment.py @@ -0,0 +1,74 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MonotonicStack_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2866 + prompt_template = \ +r"""You are given an array A indexed from `1` to `{N}`: {A} + +For each 1 ≤ i ≤ {N}, define C[i] as the number of indices j such that: +- i + 1 ≤ j ≤ {N}, and +- For every index k such that i + 1 ≤ k ≤ j, we have A[i] > A[k]. + +Tell me the value of C[1] + C[2] + ... + C[{N}].""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MonotonicStack_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> str : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N must be at least 3" + + self.parameter["A"] = A = [random.randint(1, N) for _ in range(N)] + + S = [] # monotonic decreasing stack of heights + ans = 0 + + for t in A: + while S and S[-1] <= t: + S.pop() + ans += len(S) + S.append(t) + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], A = ", ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"], start = 1))) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/most_component_tree_removing_two_paths/__init__.py b/server/Gym/environments/most_component_tree_removing_two_paths/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2f504a76476f37dadf1ed266b00c10b31ffe8463 --- /dev/null +++ b/server/Gym/environments/most_component_tree_removing_two_paths/__init__.py @@ -0,0 +1 @@ +from .environment import MostComponentTreeRemovingTwoPaths_Environment diff --git a/server/Gym/environments/most_component_tree_removing_two_paths/environment.py b/server/Gym/environments/most_component_tree_removing_two_paths/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7415efe72e5ed4e57efc102a38f3acbc7027c42e --- /dev/null +++ b/server/Gym/environments/most_component_tree_removing_two_paths/environment.py @@ -0,0 +1,163 @@ +import random +import networkx +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MostComponentTreeRemovingTwoPaths_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3748 + prompt_template = \ +r"""You are given a **tree** with {N} vertices labeled from 1 to {N}, where vertex 1 is the **root**. The tree contains the following {N_minus_1} undirected edges: +{edges} + +Your task is to choose two paths (each from any vertex to any vertex; a path could be just one single vertex) such that: +- The two paths do **NOT** share any edge (but they can share vertices). +- You remove all vertices on both paths, along with all their adjacent edges. +- After this removal, the remaining structure is a forest. Try your best to **maximize the number of connected components** in the resulting forest. + +**Output Format:** A single integer — the maximum number of connected components you can achieve.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MostComponentTreeRemovingTwoPaths instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v) + 1, max(u, v) + 1 + edges.append((u, v)) + random.shuffle(edges) + + for u, v in edges : + assert 1 <= u < v <= N + assert len(edges) == len(set(edges)) == N - 1 + + tree = networkx.Graph() + tree.add_edges_from(edges) + assert networkx.is_tree(tree) + + + adj = [[] for _ in range(N+1)] + for u, v in edges: + adj[u].append(v) + adj[v].append(u) + # build a child-only adjacency by rooting at 1 + visited = [False] * (N+1) + visited[1] = True + stack = [1] + children = [[] for _ in range(N+1)] + order = [] + while stack: + u = stack.pop() + order.append(u) + for v in adj[u]: + if not visited[v]: + visited[v] = True + children[u].append(v) + stack.append(v) + # we no longer need 'adj' + # do the DP in post-order + ans = 0 + f0 = [0]*(N+1) + f1 = [0]*(N+1) + f2 = [0]*(N+1) + f3 = [0]*(N+1) + for u in reversed(order): + deg_u = len(children[u]) + dp0 = deg_u + dp1 = 1 + dp2 = deg_u + dp3 = deg_u + ret = 0 + off = 1 if u == 1 else 0 + for q in children[u]: + c0, c1, c2, c3 = f0[q], f1[q], f2[q], f3[q] + # update global answer + val = dp3 + c0 - off + if val > ans: ans = val + val = dp0 + c3 - off + if val > ans: ans = val + val = dp1 + c2 + if val > ans: ans = val + val = dp1 + c1 - 1 + if val > ans: ans = val + val = dp2 + c1 - off + if val > ans: ans = val + val = dp2 + c2 - off + if val > ans: ans = val + # transitions for f1 + if c1 > dp1: dp1 = c1 + if c2 + 1 > dp1: dp1 = c2 + 1 + # transitions for f3 + val = dp0 + c2 - 1 + if val > dp3: dp3 = val + val = dp0 + c1 - 1 + if val > dp3: dp3 = val + val = dp2 + c0 - 1 + if val > dp3: dp3 = val + val = c3 + deg_u - 1 + if val > dp3: dp3 = val + val = c0 + deg_u + ret - 2 + if val > dp3: dp3 = val + # transitions for f2 + val = dp0 + c0 - 1 + if val > dp2: dp2 = val + # transitions for f0 + val = c0 + deg_u - 1 + if val > dp0: dp0 = val + # update ret for next child + if c1 > ret: ret = c1 + if c2 > ret: ret = c2 + f0[u], f1[u], f2[u], f3[u] = dp0, dp1, dp2, dp3 + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/most_num_edge_non_self_isomorphism/__init__.py b/server/Gym/environments/most_num_edge_non_self_isomorphism/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4754ad5f540b8ccae3b7a1cbcd1f9d08df2a7820 --- /dev/null +++ b/server/Gym/environments/most_num_edge_non_self_isomorphism/__init__.py @@ -0,0 +1 @@ +from .environment import MostNumEdge_NonSelfIsomorphism_Environment diff --git a/server/Gym/environments/most_num_edge_non_self_isomorphism/environment.py b/server/Gym/environments/most_num_edge_non_self_isomorphism/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e01cf2768c829bf2eae6a8d93c9c73cb00f104e2 --- /dev/null +++ b/server/Gym/environments/most_num_edge_non_self_isomorphism/environment.py @@ -0,0 +1,86 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MostNumEdge_NonSelfIsomorphism_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Consider a simple **undirected graph** G on {N} labeled vertices `1` to `{N}`. We say G is **asymmetric** if the only bijection (permutation) `p` of the vertices that preserves all edges (i.e., `(u, v)` is an edge iff `(p(u), p(v))` is an edge) is the identity permutation. What is the **maximum number of edges** an asymmetric graph G on {N} labeled vertices can have?""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MostNumEdge_NonSelfIsomorphism_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 6, "MAX_N should be greater than or equal to 6" + + N = self.parameter["N"] = random.randint(6, MAX_N) + + + def C(n, m) : + if 0 > m or m > n : + return 0 + ans = 1 + for i in range(m) : + ans = ans * (n - i) // (i + 1) + return ans + f = h = [0 for i in range(0, N + 1)] + g = [[0 for j in range(0, N + 1)] for i in range(0, N + 1)] + g[0][0] = 1 + for i in range(1, N + 1): + h[i] = g[i - 1][i - 1] + for j in range(0, N + 1): + for k in range(j // i + 1): + g[i][j] += C(h[i], k) * g[i - 1][j - i * k] + for i in range(1, N + 1): + f[i] = g[(i - 1) // 2][i - 1] + if i % 2 == 0: + f[i] += C(g[i // 2 - 1][i // 2 - 1], 2) + + res = N * (N - 1) // 2 - N + original_N = N + for i in range(1, original_N + 1): + cnt = min(N // i, f[i]) + res += cnt + N -= i * cnt + self.parameter["reference_answer"] = res + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/multidrink/__init__.py b/server/Gym/environments/multidrink/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6b26de513ccefe5fa74859037f4d6e4fd79d021d --- /dev/null +++ b/server/Gym/environments/multidrink/__init__.py @@ -0,0 +1 @@ +from .environment import MultiDrink_Environment diff --git a/server/Gym/environments/multidrink/environment.py b/server/Gym/environments/multidrink/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7037a60bc587286c716bf8faf263c96de6cc34b2 --- /dev/null +++ b/server/Gym/environments/multidrink/environment.py @@ -0,0 +1,118 @@ + +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MultiDrink_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3549 + prompt_template = \ +r"""There is a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices labeled from `0` to `{N_minus_1}`. Its edges are: +{edges} + +Please find a permutation of the vertices p[0], p[1], ..., p[{N_minus_1}] such that for every pair (p[i], p[i + 1]) with 0 ≤ i < {N_minus_1}, the distance between p[i] and p[i + 1] in the tree (measured in number of edges) is **at most 2**. Output the permutation as a single line of space-separated integers in order.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MultiDrinkProblem. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N must be specified in parameters" + N = self.parameter["N"] + assert N >= 4, "N must be at least 4" + + edges = self.parameter["edges"] = [] + neighbors = [[] for _ in range(N)] + def add_edge(u, v) : + edges.append((min(u, v), max(u, v))) + neighbors[u].append(v) + neighbors[v].append(u) + + paths = [[u] for u in range(N)] + while len(paths) > 1 : + while True : + i, j = random.choices(range(len(paths)), k = 2, weights = [len(path) for path in paths]) + if i != j : + break + path_i, path_j = paths[i], paths[j] + + a, b = path_i[-1], path_j[0] + if random.random() < 0.5 : + add_edge(a, random.choice([b] + neighbors[b])) + else : + add_edge(b, random.choice([a] + neighbors[a])) + + paths = [path for index, path in enumerate(paths) if index not in (i, j)] + [path_i + path_j] + self.parameter["reference_answer"] = " ".join(map(str, paths[0])) + + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + tree = networkx.Graph() + tree.add_edges_from(edges) + assert networkx.is_tree(tree) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + P = processed_result + if len(P) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if set(P) != set(range(self.parameter["N"])) : + return self.rewards["invalid_solution"] + + neighbors = [set() for _ in range(self.parameter["N"])] + for u, v in self.parameter["edges"] : + neighbors[u].add(v) + neighbors[v].add(u) + + satisfied = sum(int((a in neighbors[b]) or (len(neighbors[a] & neighbors[b]) > 0)) for a, b in zip(P, P[1 :])) + assert satisfied <= self.parameter["N"] - 1, "satisfied should be at most N - 1" + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / (self.parameter["N"] - 1)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == self.parameter["N"] - 1) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/multiple_flipping_game/__init__.py b/server/Gym/environments/multiple_flipping_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6ded5c5979f74fe72bcc6cb7c327e40bc5fc7a84 --- /dev/null +++ b/server/Gym/environments/multiple_flipping_game/__init__.py @@ -0,0 +1 @@ +from .environment import MultipleFlippingGame_Environment diff --git a/server/Gym/environments/multiple_flipping_game/environment.py b/server/Gym/environments/multiple_flipping_game/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..4b9c8b5c62ecb569c7180a8179cb8bedabe396b8 --- /dev/null +++ b/server/Gym/environments/multiple_flipping_game/environment.py @@ -0,0 +1,127 @@ +import math +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class MultipleFlippingGame_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3179 + prompt_template = \ +r"""You are given an array of length {N}, indexed from `1` to `{N}`. + +Two players, Alice and Bob, play the following game: ++ Initially, some positions in the array are **white**, and the rest are **black**. ++ The players take turns. On each turn, the current player selects a **white** cell with index `x`. ++ Then, they choose an integer `k` such that 1 <= k <= n / x, and **flip the color** of all cells at indices `x, 2×x, ..., k×x`. ++ A player **loses** if they have no valid move on their turn. + +Initially, the cells at indices {white_indices} are white (all others are black). Determine whether the **first player (Alice)** has a **winning strategy** if both players play optimally. + +**Output Format:** Your final answer should be either `Yes` or `No` (do **NOT** include quotes or backticks), indicating whether the first player has a forced win.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the MultipleFlippingGame_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + self.parameter["reference_answer"] = "Yes" if random.random() < 0.5 else "No" + + + sn = int(math.isqrt(N)) + p = [] + r_list = [] + l = 1 + while l <= N : + k = N // l + r = N // k + p.append(l) + r_list.append(r) + l = r + 1 + + m = len(p) + sg_small = [0] * (sn + 1) + sg_large = [0] * (sn + 1) + vis = [0] * (2 * sn + 5) + + for i in range(m - 1, -1, -1) : + li = p[i] + t = N // li + s = 0 + l2 = 2 + mark = i + 1 + while l2 <= t : + k2 = t // l2 + r2 = t // k2 + v = l2 * li + if v <= sn : + gv = sg_small[v] + else : + gv = sg_large[k2] + vis[s ^ gv] = mark + if ((r2 - l2 + 1) & 1) : + s ^= gv + l2 = r2 + 1 + g = 1 + while vis[g] == mark : + g += 1 + if li <= sn : + sg_small[li] = g + else: + sg_large[t] = g + + def SG(x) : + if x <= sn: + return sg_small[x] + return sg_large[N // x] + + + while True : + white_index_number = random.randint(1, N) + white_indices = random.sample(range(1, N + 1), white_index_number) + xo = 0 + for x in white_indices : + xo ^= SG(x) + if ("Yes" if xo else "No") == self.parameter["reference_answer"] : + self.parameter["white_indices"] = white_indices + break + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + white_indices = ", ".join(map(str, sorted(self.parameter["white_indices"]))), + ) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result not in ("Yes", "No") : + return self.rewards["invalid_answer"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/multiplication/__init__.py b/server/Gym/environments/multiplication/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..42cbede7325b68822bb48ae070e9dc7f19404a33 --- /dev/null +++ b/server/Gym/environments/multiplication/__init__.py @@ -0,0 +1 @@ +from .environment import Multiplication_Environment diff --git a/server/Gym/environments/multiplication/environment.py b/server/Gym/environments/multiplication/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..31e799e27027c6e206550e0f433e04e5436fa3c2 --- /dev/null +++ b/server/Gym/environments/multiplication/environment.py @@ -0,0 +1,70 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Multiplication_Environment(VerifiableEnvironment) : + prompt_templates = ( + "Give me the answer of the following equation: {} * {} = ", # https://github.com/Jiayi-Pan/TinyZero/blob/main/examples/data_preprocess/multiply.py + "What is the result of {} times {}?", + "Calculate the product of {} and {}.", + "What do you get when you multiply {} by {}?", + "If you multiply {} and {}, what is the answer?", + "What is {} multiplied by {}?", + "Find the result of {} times {}.", + "What is the multiplication of {} and {}?", + "Compute the product of {} and {}.", + "What is the answer to {} times {}?", + ) # This is probably unnecessary, but just in case we need to diversify the prompt templates. + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the Multiplication_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "digit_num" in self.parameter, "digit_num is required in parameter" + digit_num = self.parameter["digit_num"] + assert digit_num >= 1, "digit_num should be greater than or equal to 1" + + self.parameter["a"] = random.randint(0, 10 ** digit_num - 1) + self.parameter["b"] = random.randint(0, 10 ** digit_num - 1) + self.parameter["reference_answer"] = self.parameter["a"] * self.parameter["b"] + + self.parameter["prompt_template"] = random.randrange(len(self.prompt_templates)) + + def _prompt_generate(self) -> str : + return self.prompt_templates[self.parameter["prompt_template"]].format(self.parameter["a"], self.parameter["b"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/myj/__init__.py b/server/Gym/environments/myj/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0c649c14a8e4f954f39d852206f143124d7e7d02 --- /dev/null +++ b/server/Gym/environments/myj/__init__.py @@ -0,0 +1 @@ +from .environment import MYJ_Environment diff --git a/server/Gym/environments/myj/environment.py b/server/Gym/environments/myj/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..346796f77283dda804e628131358b0f57749490f --- /dev/null +++ b/server/Gym/environments/myj/environment.py @@ -0,0 +1,176 @@ +import random +from bisect import bisect_left +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class MYJ_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3592 + prompt_template = \ +r"""There are {N} shops labeled from 1 to {N} (from left to right); every shop has a price, and the price of an item at shop i is P[i]. There are {M} customers; each customer is represented by a tuple (a, b, c); the customer will consider buying the item from a shop in the range [a, b] with the lowest price, but if and only if that price is at most c (if the lowest price in the range is greater than c, the customer will not buy anything): +{customers} + +Please assign an item price for each shop to **maximize the total money earned** from all customers. Output P[1], P[2], ..., P[{N}] in one line, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the MYJ_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + M = self.parameter["M"] = random.randint(1, N * (N + 1) // 2) + + customers = self.parameter["customers"] = [] + for _ in range(M) : + a, b = random.randint(1, N), random.randint(1, N) + customers.append((min(a, b), max(a, b), random.randint(1, N * (N + 1) // 2))) + + + A = [0] * (M + 1) + B = [0] * (M + 1) + C = [0] * (M + 1) + D = [] + + for i in range(1, M + 1): + a, b, c = customers[i - 1] + A[i] = a + B[i] = b + C[i] = c + D.append(c) + + # Sort costs and compress them to 1..M + D_sorted = sorted(D) + for i in range(1, M + 1): + C[i] = bisect_left(D_sorted, C[i]) + 1 + + # Allocate DP, traceback, bucket and answer arrays + # f[l][r][i]: maximum total value in segment [l..r] using cost-levels >= i + f = [ + [ + [0] * (M + 2) + for _ in range(N + 2) + ] + for __ in range(N + 2) + ] + # tr[l][r][i]: (cost_index, position) choice for segment [l..r] at level i + tr = [ + [ + [(0, 0)] * (M + 2) + for _ in range(N + 2) + ] + for __ in range(N + 2) + ] + # buc[l][r]: number of customers whose interval [a_j..b_j] is contained in [l..r] + # among those with cost-index >= current i + buc = [ + [0] * (N + 2) + for _ in range(N + 2) + ] + # Final assigned prices + ans = [0] * (N + 2) + + # Recursive reconstruction of the chosen positions/prices + def dfs(l, r, i): + if l > r: + return + cost_i, pos = tr[l][r][i] + ans[pos] = D_sorted[cost_i - 1] + dfs(l, pos - 1, cost_i) + dfs(pos + 1, r, cost_i) + + # Main DP: process cost-levels from high to low + for i in range(M, 0, -1): + # Add all intervals whose compressed cost == i into the bucket counts + for j in range(1, M + 1): + if C[j] == i: + for l in range(1, A[j] + 1): + for r in range(B[j], N + 1): + buc[l][r] += 1 + + # Solve subproblems for all segments [l..r] + for length in range(1, N + 1): + for l in range(1, N - length + 2): + r = l + length - 1 + # Option 1: skip using cost-level i + f[l][r][i] = f[l][r][i + 1] + tr[l][r][i] = tr[l][r][i + 1] + + # Option 2: pick a position p in [l..r] with price = D_sorted[i-1] + for p in range(l, r + 1): + coef = buc[l][r] + coef -= buc[l][p - 1] if p - 1 >= 1 else 0 + coef -= buc[p + 1][r] if p + 1 <= N else 0 + v = f[l][p - 1][i] + f[p + 1][r][i] + coef * D_sorted[i - 1] + if v > f[l][r][i]: + f[l][r][i] = v + tr[l][r][i] = (i, p) + + # If we never picked anything at this level, default to placing at l + if tr[l][r][i][0] == 0: + tr[l][r][i] = (i, l) + + # Output the maximum total and one valid price assignment + self.parameter["gold_answer"] = f[1][N][1] + dfs(1, N, 1) + self.parameter["reference_answer"] = " ".join(str(ans[i]) for i in range(1, N + 1)) + + + def _prompt_generate(self) -> str : + customers = self.parameter["customers"] + return self.prompt_template.format( + N = self.parameter["N"], + M = len(customers), + customers = "\n".join("({}, {}, {})".format(a, b, c) for a, b, c in customers), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + P = [None] + processed_result + + answer, gold = 0, self.parameter["gold_answer"] + for a, b, c in self.parameter["customers"]: + min_price = min(P[a : b + 1]) + if min_price <= c: + answer += min_price + assert answer <= gold, "The answer should not exceed the gold answer" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/nand_result_counting/__init__.py b/server/Gym/environments/nand_result_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e5c78e7c2125b28ba3330e41ec58aea07deda27d --- /dev/null +++ b/server/Gym/environments/nand_result_counting/__init__.py @@ -0,0 +1 @@ +from .environment import NANDResultCounting_Environment diff --git a/server/Gym/environments/nand_result_counting/environment.py b/server/Gym/environments/nand_result_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..116db5ea854448945d0e22ace9dc0b19f3ee289c --- /dev/null +++ b/server/Gym/environments/nand_result_counting/environment.py @@ -0,0 +1,154 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class NANDResultCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3220 + prompt_template = \ +r"""From now on, all numbers are treated as {K}-bit binary strings (i.e., only the lowest {K} bits are considered, and leading zeros may be added to fill up to {K} bits). + +The **NAND** operation is defined as: +- 0 NAND 0 = 1 +- 0 NAND 1 = 1 NAND 0 = 1 +- 1 NAND 1 = 0 + +You are given the following {N} numbers: {numbers} +You may combine them arbitrarily using the NAND operation and brackets (i.e., in any order, any number of times). + +How many distinct numbers in the range [{L}, {R}] (inclusive) can be obtained by such combinations? Note: all intermediate and final results are considered as {K}-bit binary strings, so only numbers within the {K}-bit range are valid.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the NANDResultCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "K" in self.parameter, "K is required in parameter" + K = self.parameter["K"] + assert K >= 1, "K should be greater than or equal to 1" + + component_num = random.randint(1, K) + endpoints = random.sample(range(1, K), component_num - 1) if component_num > 1 else [] + endpoints.sort() + endpoints = [0] + endpoints + [K] + assert len(endpoints) == component_num + 1, "Endpoints should be of length component_num + 1" + allbits = list(range(K)) + random.shuffle(allbits) + assert all(0 <= endpoints[i] < endpoints[i + 1] <= K for i in range(component_num)), "Endpoints should be in the range [0, K] and strictly increasing" + components = [allbits[endpoints[i] : endpoints[i + 1]] for i in range(component_num)] + + def generate_number() -> int : + number = 0 + existence_probability = random.random() + for component in components : + if random.random() < existence_probability : + number |= sum(1 << bit for bit in component) + return number + self.parameter["A"] = A = [generate_number() for _ in range(N)] + + L, R = random.randint(0, (1 << K) - 1), random.randint(0, (1 << K) - 1) + if L > R: + L, R = R, L + self.parameter["L"], self.parameter["R"] = L, R + + + full = (1 << K) - 1 + lk = [0] * K + num = [0] * K + have = 0 + + # build the 'basis' masks + for i in range(K - 1, -1, -1): + if ((have >> i) & 1) == 0: + now_mask = full + for a in A: + if (a >> i) & 1: + now_mask &= a + else: + # mask off to K bits here! + now_mask &= (~a) & full + lk[i] = now_mask + num[i] = 1 + have |= now_mask + + # prefix‐sum the counts + for i in range(1, K): + num[i] += num[i - 1] + + def count_upto(x): + # how many reachable values ≤ x + if x < 0: + return 0 + if x >= full: + return 1 << num[K - 1] + ans = 0 + for i in range(K - 1, -1, -1): + if x < 0: + break + if (x >> i) & 1: + if lk[i] != 0: + ans += 1 << (num[i] - 1) + x -= lk[i] + else: + ans += 1 << num[i] + break + if x == 0: + ans += 1 + return ans + + self.parameter["reference_answer"] = count_upto(R) - count_upto(L - 1) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + K = self.parameter["K"], + numbers = " ".join(map(str, self.parameter["A"])), + L = self.parameter["L"], R = self.parameter["R"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if processed_result == 0 : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/negative_base/__init__.py b/server/Gym/environments/negative_base/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..001c36b9cb75259aed59feb27faabdb83bada097 --- /dev/null +++ b/server/Gym/environments/negative_base/__init__.py @@ -0,0 +1 @@ +from .environment import NegativeBase_Environment diff --git a/server/Gym/environments/negative_base/environment.py b/server/Gym/environments/negative_base/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..97fcb9a3ce1e268ce5ad536953d77f0faf9ee9fa --- /dev/null +++ b/server/Gym/environments/negative_base/environment.py @@ -0,0 +1,102 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class NegativeBase_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1017 + prompt_template = \ +r"""We can represent integers using a **negative base** system with base `-R`, where `R` is a positive integer greater than 1. In this system, the digits used are from `0` to `R - 1` (in decimal). +For example, the decimal number `-15` can be represented as `110001` in base `-2`, since: +1×(-2)^5 + 1×(-2)^4 + 0×(-2)^3 + 0×(-2)^2 + 0×(-2)^1 + 1×(-2)^0 = (-15). + +Your task is to convert the decimal number `{N}` into base `-{R}`, and output its digits (in decimal) from most significant to least significant. + +Output Format: +Your final answer should be a single line containing the digits (in decimal), separated by **spaces**. +Example: `{R_minus_1} 0 1` (do **NOT** include the backticks or quotes) means `{R_minus_1} * (-{R})^2 + 0 * (-{R})^1 + 1 * (-{R})^0` in decimal. +""" + + def __init__(self, + wrong_format : float = -1.0, wrong_length : float = 0.0, rewarding_strategy : str = "mean([gold=answer])", rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the NegativeBase_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_length" : wrong_length, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 1, "MAX_N should be greater than or equal to 1" + + assert "MAX_R" in self.parameter, "MAX_R is required in parameter" + MAX_R = self.parameter["MAX_R"] + assert MAX_R >= 2, "MAX_R should be greater than or equal to 2" + + N = 0 + while N == 0 : + N = self.parameter["N"] = random.randint(-MAX_N, MAX_N) + R = self.parameter["R"] = random.randint(2, MAX_R) + + # Convert N to base -R + def convert_to_negative_base(n, r) : + if n == 0 : + return [] + m = n % r + if m < 0 : + m -= r + n += r + return convert_to_negative_base(n // r, r) + [m] + self.parameter["gold_answer"] = convert_to_negative_base(N, -R) + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["gold_answer"])) + + # check the gold_answer + Sum = 0 + for digit in self.parameter["gold_answer"] : + Sum *= (-R) + Sum += digit + assert Sum == N, "Sum should be equal to N, but got {} != {}".format(Sum, N) + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + R = self.parameter["R"], + R_minus_1 = self.parameter["R"] - 1, + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + if len(processed_result) != len(self.parameter["gold_answer"]) : + return self.rewards["wrong_length"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])" : + return self.rewards["rewarding_weight"] * (sum(float(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / len(self.parameter["gold_answer"])) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * all(a == b for a, b in zip(self.parameter["gold_answer"], processed_result)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/new_nim_game/__init__.py b/server/Gym/environments/new_nim_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b2c30e348b5270d57007d1199f12cd7e57f709ca --- /dev/null +++ b/server/Gym/environments/new_nim_game/__init__.py @@ -0,0 +1 @@ +from .environment import NewNimGame_Environment diff --git a/server/Gym/environments/new_nim_game/environment.py b/server/Gym/environments/new_nim_game/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9374cf3e83e6387537c3996cbd2cc442c60150e2 --- /dev/null +++ b/server/Gym/environments/new_nim_game/environment.py @@ -0,0 +1,135 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class NewNimGame_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4301 + prompt_template = \ +r"""You are given a Nim-like game with heaps of matches. There are {N} heaps with the following sizes (1-indexed): {A} +Game rules: +- **First round** has two phases: + 1) **Your move (first player):** You may remove **any number of entire heaps** (possibly zero), but you are **not allowed** to remove **all** heaps. + 2) **Opponent's move (second player):** Then the opponent may remove **any number of entire heaps** (possibly zero), but likewise cannot remove **all remaining** heaps. +- **From the second round onward:** Standard Nim rules apply on the remaining heaps: players alternate; a move removes any positive number of matches from **exactly one** heap; the player who takes the last match **wins**. +- Both players play optimally. + +Your task: Choose which heaps to remove **in your first move** so that you **guarantee a win**; if multiple winning choices exist, choose one that **minimizes the total number of matches** you remove (i.e., the sum of sizes of the heaps you remove). Output the distinct *indices** (1-based) of the heaps you remove in your first move, in any order, separated by spaces; if you can guarantee victory without removing any heap, output an **empty line**.""" + + def __init__(self, + match_number_range_coefficient : int = 2, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = +3.0, + **kwargs) : + """ + Initialize the NewNimGame_Environment instance. + """ + super().__init__(**kwargs) + + self.match_number_range_coefficient = match_number_range_coefficient + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N must be at least 3" + + self.parameter["A"] = [random.randint(1, N * self.match_number_range_coefficient) for i in range(N)] + + + A = self.parameter["A"].copy() + A.sort(reverse=True) + + max_bit = max(A).bit_length() + D = [0] * max_bit # linear basis, dynamic size based on input + ans = 0 + + def add(x): + # Try to insert x into the xor-basis D + for i in range(max_bit - 1, -1, -1): + if (x >> i) & 1: + if D[i]: + x ^= D[i] + else: + D[i] = x + return True + return False + + for x in A: + if not add(x): + ans += x + + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = ", ".join("the size of heap {} is {}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[int]] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != len(set(processed_result)) : + return self.rewards["invalid_solution"] # Duplicate indices + if not all(1 <= index <= self.parameter["N"] for index in processed_result) : + return self.rewards["invalid_solution"] # Index out of range + if len(processed_result) == self.parameter["N"] : + return self.rewards["invalid_solution"] # Cannot remove all heaps + + removed = [False] * self.parameter["N"] + for index in processed_result : + removed[index - 1] = True + + max_bit = max(self.parameter["A"]).bit_length() + D = [0] * max_bit # linear basis, dynamic size based on input + def add(x): + # Try to insert x into the xor-basis D + for i in range(max_bit - 1, -1, -1): + if (x >> i) & 1: + if D[i]: + x ^= D[i] + else: + D[i] = x + return True + return False + for i, Ai in enumerate(self.parameter["A"]) : + if not removed[i] : + if not add(Ai): + return self.rewards["unsuccessful_solution"] # Cannot guarantee victory + + answer, gold = sum(self.parameter["A"][i - 1] for i in processed_result), self.parameter["gold_answer"] + assert 0 <= gold <= answer, "Gold answer should be non-negative and not exceed the provided answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "If answer is 0, gold must also be 0" + return self.rewards["rewarding_weight"] * 1.0 + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/next_palindromic/__init__.py b/server/Gym/environments/next_palindromic/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..02e27cd52fad821c0820567513209c00e3317b1c --- /dev/null +++ b/server/Gym/environments/next_palindromic/__init__.py @@ -0,0 +1 @@ +from .environment import NextPalindromic_Environment diff --git a/server/Gym/environments/next_palindromic/environment.py b/server/Gym/environments/next_palindromic/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f3c7383bc6214728fc1fdee2e7e515f5e16401f3 --- /dev/null +++ b/server/Gym/environments/next_palindromic/environment.py @@ -0,0 +1,103 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class NextPalindromic_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1609 + prompt_template = r"""Please find the **smallest palindromic number** that is greater than {N}.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the NextPalindromic_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "digit_num" in self.parameter, "digit_num is required in parameter" + digit_num = self.parameter["digit_num"] + assert digit_num >= 1, "digit_num should be greater than or equal to 1" + + self.parameter["N"] = random.randint(1, 10 ** digit_num - 1) + + + def next_palindrome(s: str) -> str: + l = len(s) + # Special case: all '9's -> next palindrome is 1 followed by zeros and ending with 1 + if all(ch == '9' for ch in s): + return '1' + '0' * (l - 1) + '1' + + # Build initial palindrome by mirroring left half to right half + ans = list(s) + for i in range(l // 2): + ans[l - 1 - i] = ans[i] + + # If this palindrome is already greater than the original, return it + if ''.join(ans) > s: + return ''.join(ans) + + # Otherwise, increment the middle and propagate carry + # Find the middle index (for both even and odd lengths) + mid = (l - 1) // 2 + i = mid + # Move left through the middle until a non-'9' digit is found, setting '9's to '0' + while i >= 0 and ans[i] == '9': + ans[i] = '0' + i -= 1 + # Increment the first non-'9' digit + ans[i] = str(int(ans[i]) + 1) + # Mirror the incremented digit to the other side + ans[l - 1 - i] = ans[i] + + # Mirror the rest of the left half to the right half to form a valid palindrome + for j in range(l // 2): + ans[l - 1 - j] = ans[j] + + return ''.join(ans) + self.parameter["reference_answer"] = next_palindrome(str(self.parameter["N"])) + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (processed_result > self.parameter["N"]) : + return self.rewards["invalid_solution"] + if str(processed_result) != str(processed_result)[::-1] : + return self.rewards["invalid_solution"] + + gold, answer = int(self.parameter["reference_answer"]), processed_result + assert gold <= answer + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/nine_puzzle/__init__.py b/server/Gym/environments/nine_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5eb9c22c19ab836a0d643aa3404a3bdb1d5e291b --- /dev/null +++ b/server/Gym/environments/nine_puzzle/__init__.py @@ -0,0 +1 @@ +from .environment import NinePuzzle_Environment diff --git a/server/Gym/environments/nine_puzzle/environment.py b/server/Gym/environments/nine_puzzle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..1de0275b2b52b9f78fe58089fad897fd7369f9b9 --- /dev/null +++ b/server/Gym/environments/nine_puzzle/environment.py @@ -0,0 +1,159 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class NinePuzzle_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} grid, where each cell contains a digit from `0` to `{NM_minus_1}`. + +At any time, you may perform one of the following actions: +- Pick a row i (0 ≤ i < {N}) and shift it left or right by **at most** {row_K} cells. +- Pick a column j (0 ≤ j < {M}) and shift it up or down by **at most** {col_K} cells. + +You start with the following grid: +{start_grid} + +Your goal is to transform it into the following grid: +{destination_grid} + +**Output Format:** Each action should be written on its own line in the following format: `[row_or_column] [index] [shifts]` +Where: +- `row_or_column` is either `row` or `column` +- `index` is the 0-based index of the row or column +- `shifts` is a signed integer: positive for right/down, negative for left/up +- Example: `row 0 2` or `column 1 -3` +Do **NOT** include backticks or quotes in your output. Output one action per line in the order they should be performed.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the NinePuzzle_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + row_K, col_K = self.parameter["row_K"], self.parameter["col_K"] = random.randint(1, M - 1), random.randint(1, N - 1) + + start_permutation = list(range(N * M)) + random.shuffle(start_permutation) + start_grid = self.parameter["start_grid"] = [[start_permutation[i * M + j] for j in range(M)] for i in range(N)] + + assert "steps" in self.parameter, "steps is required in parameter" + steps = self.parameter["steps"] + assert steps >= 1, "steps should be greater than or equal to 1" + + destination_grid = [row.copy() for row in start_grid] + self.parameter["reference_answer"] = "" + for step in range(steps) : + row_or_column = random.choice(["row", "column"]) + index = random.randint(0, N - 1) if row_or_column == "row" else random.randint(0, M - 1) + while True : + shifts = random.randint(-row_K, row_K) if row_or_column == "row" else random.randint(-col_K, col_K) + if shifts != 0 : + break + self.parameter["reference_answer"] += "{} {} {}\n".format(row_or_column, index, shifts) + + new_grid = [row.copy() for row in destination_grid] + if row_or_column == "row" : + assert abs(shifts) <= M - 1 + assert abs(shifts) <= row_K + for j in range(M) : + new_grid[index][j] = destination_grid[index][((j - shifts) % M + M) % M] + else : + assert row_or_column == "column" + assert abs(shifts) <= N - 1 + assert abs(shifts) <= col_K + for i in range(N) : + new_grid[i][index] = destination_grid[((i - shifts) % N + N) % N][index] + destination_grid = new_grid + self.parameter["destination_grid"] = destination_grid + + + def _prompt_generate(self) -> str : + N, M = self.parameter["N"], self.parameter["M"] + return self.prompt_template.format( + N = N, + M = M, + NM_minus_1 = N * M - 1, + row_K = self.parameter["row_K"], + col_K = self.parameter["col_K"], + start_grid = "\n".join(" ".join(map(str, row)) for row in self.parameter["start_grid"]), + destination_grid = "\n".join(" ".join(map(str, row)) for row in self.parameter["destination_grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + actions = [] + for line in answer.splitlines() : + line = line.strip() + if line : + actions.append(line.split()) + action = actions[-1] + if len(action) != 3 : + return None + if action[0] not in ("row", "column") : + return None + try : + action[1] = int(action[1]) + action[2] = int(action[2]) + except ValueError : + return None + return actions + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + destination_grid = [row.copy() for row in self.parameter["start_grid"]] + + for action in processed_result : + new_grid = [row.copy() for row in destination_grid] + if action[0] == "row" : + index = action[1] + if not (0 <= index < self.parameter["N"]) : + return self.rewards["invalid_solution"] + shifts = action[2] + if not (-self.parameter["row_K"] <= shifts <= self.parameter["row_K"]) : + return self.rewards["invalid_solution"] + for j in range(self.parameter["M"]) : + new_grid[index][j] = destination_grid[index][((j - shifts) % self.parameter["M"] + self.parameter["M"]) % self.parameter["M"]] + else : + assert action[0] == "column" + index = action[1] + if not (0 <= index < self.parameter["M"]) : + return self.rewards["invalid_solution"] + shifts = action[2] + if not (-self.parameter["col_K"] <= shifts <= self.parameter["col_K"]) : + return self.rewards["invalid_solution"] + for i in range(self.parameter["N"]) : + new_grid[i][index] = destination_grid[((i - shifts) % self.parameter["N"] + self.parameter["N"]) % self.parameter["N"]][index] + destination_grid = new_grid + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(sum(int(a == b) for a, b in zip(gold_row, answer_row)) for gold_row, answer_row in zip(self.parameter["destination_grid"], destination_grid)) / (self.parameter["N"] * self.parameter["M"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * all(all(a == b for a, b in zip(gold_row, answer_row)) for gold_row, answer_row in zip(self.parameter["destination_grid"], destination_grid)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/no_adjacent_girl_counting/__init__.py b/server/Gym/environments/no_adjacent_girl_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5d6cce54948d7899d8f7ca43cac93e616729739a --- /dev/null +++ b/server/Gym/environments/no_adjacent_girl_counting/__init__.py @@ -0,0 +1 @@ +from .environment import NoAdjacentGirlCounting_Environment diff --git a/server/Gym/environments/no_adjacent_girl_counting/environment.py b/server/Gym/environments/no_adjacent_girl_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..bd665830e1e2dfb89887b7888ae41ecb5dae3f94 --- /dev/null +++ b/server/Gym/environments/no_adjacent_girl_counting/environment.py @@ -0,0 +1,77 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class NoAdjacentGirlCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3223 + prompt_template = r"""Please count the number of ways to arrange {N} distinct boys, {M} distinct girls, and 2 distinct teachers in a line such that no two girls are adjacent and the two teachers are not adjacent.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the PalindromePartitionCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + while True : + N, M = self.parameter["N"], self.parameter["M"] = random.randint(1, MAX_N_M), random.randint(1, MAX_N_M) + Ans = 0 + def A(x, y) : + res = 1 + for i in range(y) : + res *= x - i + return res + if N + 3 >= M : + Ans += A(N + 3, M) * A(N + 2, N + 2) + if N + 2 >= M : + Ans -= 2 * A(N + 2, M) * A(N + 1, N + 1) + if Ans > 0 : + self.parameter["reference_answer"] = Ans + break + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/no_double_triple_counting/__init__.py b/server/Gym/environments/no_double_triple_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e3405ded70967491d4d9a8d28a05fb5ab6a73b23 --- /dev/null +++ b/server/Gym/environments/no_double_triple_counting/__init__.py @@ -0,0 +1 @@ +from .environment import NoDoubleTripleCounting_Environment diff --git a/server/Gym/environments/no_double_triple_counting/environment.py b/server/Gym/environments/no_double_triple_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9b656f30980475bbdcb9fa88d9d7f202230bd24a --- /dev/null +++ b/server/Gym/environments/no_double_triple_counting/environment.py @@ -0,0 +1,128 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class NoDoubleTripleCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3226 + prompt_template = r"""How many subsets of 1, 2, ..., {N} satisfy that if x is in the subset, then neither 2 × x nor 3 × x is in the subset?""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the NoDoubleTripleCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 3, "N should be greater than or equal to 3" + + N = self.parameter["N"] = random.randint(3, MAX_N) + + + S = list(range(1, N + 1)) + assert len(S) == N, "S should contain exactly N elements" + + # visited[i] means “value i+1 has already been included in some component” + visited = [False] * N + + def dp(root): + # build the 2-chain: root, 2·root, 4·root, … ≤ n + pow2_chain = [] + v = root + while v <= N: + pow2_chain.append(v) + v *= 2 + L = len(pow2_chain) + + # for each of those, build its 3-chain: v, 3·v, 9·v, … ≤ n + pow3_chains = [] + for v in pow2_chain: + chain = [] + u = v + while u <= N: + chain.append(u) + u *= 3 + pow3_chains.append(chain) + + # mark all nodes in this component + for chain in pow3_chains: + for u in chain: + visited[u - 1] = True + + # lmt0[i] = maximum mask value at level i (0…i=L) + # level 0 has only mask 0 + lmt0 = [0] + [(1 << len(chain)) - 1 for chain in pow3_chains] + + # f[i][mask] = number of ways up to level i with configuration ‘mask’ at level i + f = [[0] * (l + 1) for l in lmt0] + f[0][0] = 1 + + # transition from level i → i+1 + for i in range(L): + for mask_j, ways in enumerate(f[i]): + if not ways: + continue + # try every subset mask_k on next 3-chain + for mask_k in range(lmt0[i + 1] + 1): + # no conflict with previous level, and no adjacent picks in this level + if (mask_j & mask_k) == 0 and (mask_k & (mask_k << 1)) == 0: + # f[i + 1][mask_k] = (f[i + 1][mask_k] + ways) % MOD + f[i + 1][mask_k] += ways + + # once you finish the last real level, all those mask-states are final + # (in the original C++ they'd collapse through extra levels to mask 0, + # which is exactly summing f[L][*]) + # return sum(f[L]) % MOD + return sum(f[L]) + + ans = 1 + for x in S: + if not visited[x - 1]: + # ans = ans * dp(x) % MOD + ans *= dp(x) + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/not_containing_string_counting/__init__.py b/server/Gym/environments/not_containing_string_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..217e2e0c2b228900c9d188d2cfb8ec41ce2f1a46 --- /dev/null +++ b/server/Gym/environments/not_containing_string_counting/__init__.py @@ -0,0 +1 @@ +from .environment import NotContainingStringCounting_Environment diff --git a/server/Gym/environments/not_containing_string_counting/environment.py b/server/Gym/environments/not_containing_string_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..742f7c294e0fbde57727f5e87bdabe1ef756a8ae --- /dev/null +++ b/server/Gym/environments/not_containing_string_counting/environment.py @@ -0,0 +1,157 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class NotContainingStringCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3193 + prompt_template = \ +r"""Please count the number of binary (0/1) strings of length {N} that do **NOT** contain the substring {pattern} + +Output the result modulo {MOD}.""" + + def __init__(self, + max_MOD : int = 10000, + wrong_format: float = -1.0, wrong_range: float = -0.5, correct_answer: float = +1.0, wrong_answer: float = 0.0, + **kwargs) -> None: + """ + Initialize the NotContainingStringCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + + self.rewards = { + "wrong_format": wrong_format, + "wrong_range": wrong_range, + "correct_answer": correct_answer, + "wrong_answer": wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 3, "MAX_N should be greater than or equal to 3" + N = self.parameter["N"] = random.randint(3, MAX_N) + + assert "MAX_M" in self.parameter, "MAX_M is required in parameter" + MAX_M = self.parameter["MAX_M"] + assert MAX_M >= 2, "MAX_M should be greater than or equal to 2" + M = random.randint(2, min(N - 1, MAX_M)) + one_probability = random.random() + pattern = self.parameter["pattern"] = "".join("1" if random.random() < one_probability else "0" for _ in range(M)) + + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + def build_prefix(pattern): + """ + Build the KMP prefix function (pi array) for the given pattern. + pi[i] = length of the longest proper prefix of pattern[:i+1] + which is also a suffix of pattern[:i+1]. + """ + m = len(pattern) + pi = [0] * m + j = 0 + for i in range(1, m): + while j > 0 and pattern[i] != pattern[j]: + j = pi[j - 1] + if pattern[i] == pattern[j]: + j += 1 + pi[i] = j + return pi + + def multiply_matrices(A, B, mod): + """ + Multiply two square matrices A and B under modulo mod. + """ + size = len(A) + C = [[0] * size for _ in range(size)] + for i in range(size): + for k in range(size): + if A[i][k]: + aik = A[i][k] + for j in range(size): + C[i][j] = (C[i][j] + aik * B[k][j]) % mod + return C + + def matrix_power(matrix, exponent, mod): + """ + Raise 'matrix' to the power 'exponent' under modulo 'mod' + using binary exponentiation. + """ + size = len(matrix) + # initialize result as the identity matrix + result = [[int(i == j) for j in range(size)] for i in range(size)] + base = matrix + while exponent > 0: + if exponent & 1: + result = multiply_matrices(result, base, mod) + base = multiply_matrices(base, base, mod) + exponent >>= 1 + return result + + def compute(): + # Build KMP prefix function for the forbidden pattern + pi = build_prefix(pattern) + + # Build the (M+1) x (M+1) transition matrix + # States 0..M-1 correspond to "currently matched prefix length" + # State M is the absorbing forbidden state + size = M + 1 + B = [[0] * size for _ in range(size)] + + # Fill transitions for states 0..M-1 + for state in range(M): + for digit in map(str, range(2)): + k = state + # follow KMP fallback links + while k > 0 and digit != pattern[k]: + k = pi[k - 1] + if digit == pattern[k]: + k += 1 + # transition from 'state' to 'k' on this digit + B[state][k] += 1 + + # Make state M absorbing with all 2 digits + B[M][M] = 2 + + # Compute B^N mod MOD + Bn = matrix_power(B, N, MOD) + + # Initial state is 0 (matched 0 chars), so the number of valid sequences of length N + # that end in state j is Bn[0][j]. We sum over j = 0..M-1 (exclude forbidden state M). + result = sum(Bn[0][j] for j in range(M)) % MOD + + return result + + self.parameter["reference_answer"] = compute() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], pattern = self.parameter["pattern"], MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/number_partition_counting/__init__.py b/server/Gym/environments/number_partition_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4c35dc180e2d238dded723534473fb4f3a173a43 --- /dev/null +++ b/server/Gym/environments/number_partition_counting/__init__.py @@ -0,0 +1 @@ +from .environment import NumberPartitionCounting_Environment diff --git a/server/Gym/environments/number_partition_counting/environment.py b/server/Gym/environments/number_partition_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..35efab34b1abd7ba5d3bc1b3165127c29d863ba8 --- /dev/null +++ b/server/Gym/environments/number_partition_counting/environment.py @@ -0,0 +1,87 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class NumberPartitionCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1025 + prompt_template = \ +r"""You are given a positive integer {N}. Your task is to divide it into exactly {K} **non-empty** positive integers such that: + +- The **sum** of the {K} parts is exactly {N}, +- The **order does not matter** — that is, two partitions are considered the same if they contain the same numbers, regardless of order (e.g., `1 + 1 + 5` is the same as `5 + 1 + 1`), +- All parts must be strictly positive integers (no zero). + +Determine how many **distinct** ways there are to partition the number {N} into {K} such parts. + +Output Format: +Your final answer should be a single integer — the total number of valid partitions. +Example: `10` (do **NOT** include the backticks or quotes); this means there are 10 distinct ways to split {N} into {K} parts. +""" + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the NumberPartitionCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 1, "N should be greater than or equal to 1" + + N = self.parameter["N"] = random.randint(1, MAX_N) + K = self.parameter["K"] = random.randint(1, N) + + # Dynamic programming solution + dpF = [[0 for _ in range(K + 1)] for _ in range(N + 1)] + for i in range(1, N + 1) : + dpF[i][1] = 1 + dpF[i][0] = 1 + for i in range(2, N + 1) : + for x in range(2, K + 1) : + if i > x : + dpF[i][x] = dpF[i - 1][x - 1] + dpF[i - x][x] + else : + dpF[i][x] = dpF[i - 1][x - 1] + self.parameter["reference_answer"] = dpF[N][K] + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/numbrix/__init__.py b/server/Gym/environments/numbrix/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f6ba9800842f700f3eaa76fd7675a5a6cd498c1a --- /dev/null +++ b/server/Gym/environments/numbrix/__init__.py @@ -0,0 +1 @@ +from .environment import Numbrix_Environment diff --git a/server/Gym/environments/numbrix/environment.py b/server/Gym/environments/numbrix/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..88f3d941440c3132ae0a99c648c25d3095755f2f --- /dev/null +++ b/server/Gym/environments/numbrix/environment.py @@ -0,0 +1,201 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Numbrix_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} matrix with some cells filled with numbers from `0` to `{NM_minus_1}`, and some cells empty (represented by `-1`). Please fill the empty cells with numbers from `0` to `{NM_minus_1}` such that: +1. Each number from `0` to `{NM_minus_1}` appears **exactly once** in the matrix. +2. Each number is **horizontally or vertically adjacent** to the next number (i.e., every number `x` is adjacent to `x + 1`). + +The matrix is given as follows: +{matrix} + +**Output Format:** Your final answer should contain {N} lines, each with {M} numbers, separated by spaces. The numbers should represent the completed matrix in **row-major order**, matching the format of the given input.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(1/path)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the Numbrix_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N_M) + M = self.parameter["M"] = random.randint(2, MAX_N_M) + + dirs = [(0, 1), (0, -1), (1, 0), (-1, 0)] + + def is_inside(x, y): + return 0 <= x < N and 0 <= y < M + + def count_unvisited_degree(x, y): + cnt = 0 + for dx, dy in dirs: + nx, ny = x + dx, y + dy + if is_inside(nx, ny) and not visited[nx][ny]: + cnt += 1 + return cnt + + def check_connectivity(remain): + start = None + for i in range(N): + for j in range(M): + if not visited[i][j]: + start = (i, j) + break + if start: + break + if not start: + return True + stack = [start] + seen = {start} + count = 1 + while stack: + x, y = stack.pop() + for dx, dy in dirs: + xx, yy = x + dx, y + dy + if is_inside(xx, yy) and not visited[xx][yy] and (xx, yy) not in seen: + seen.add((xx, yy)) + stack.append((xx, yy)) + count += 1 + return count == remain + + def DFS(step, x, y): + if step == N * M: + return True + cand = [] + for dx, dy in dirs: + nx, ny = x + dx, y + dy + if is_inside(nx, ny) and not visited[nx][ny]: + cand.append((nx, ny)) + if not cand: + return False + random.shuffle(cand) + cand_scores = [] + for nx, ny in cand: + deg = count_unvisited_degree(nx, ny) + cand_scores.append((deg, nx, ny)) + cand_scores.sort(key=lambda t: t[0]) + for _, nx, ny in cand_scores: + visited[nx][ny] = True + order[nx][ny] = step + path.append((nx, ny)) + remain = N * M - (step + 1) + if check_connectivity(remain): + if DFS(step + 1, nx, ny): + return True + visited[nx][ny] = False + order[nx][ny] = -1 + path.pop() + return False + + def generate_random_hamiltonian_path(): + global visited, order, path + while True: + sx = random.randint(0, N - 1) + sy = random.randint(0, M - 1) + visited = [[False] * M for _ in range(N)] + order = [[-1] * M for _ in range(N)] + path = [] + visited[sx][sy] = True + order[sx][sy] = 0 + path = [(sx, sy)] + if DFS(1, sx, sy): + return path, order + + self.parameter["matrix"] = matrix = generate_random_hamiltonian_path()[-1] + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, row)) for row in matrix) + + assert "sparsity" in self.parameter, "sparsity is required in parameter" + sparsity = self.parameter["sparsity"] + assert 0 < sparsity < 1, "sparsity should be between 0 and 1" + empty_cells = random.sample(range(N * M), max(1, int(N * M * sparsity))) + for cell in empty_cells : + row, column = divmod(cell, M) + matrix[row][column] = -1 + + + def _prompt_generate(self) -> str : + N, M = self.parameter["N"], self.parameter["M"] + return self.prompt_template.format( + N = N, + M = M, + NM_minus_1 = N * M - 1, + matrix = "\n".join(" ".join(map(str, row)) for row in self.parameter["matrix"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(list(map(int, line.split()))) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N, M = self.parameter["N"], self.parameter["M"] + solution = processed_result + + if len(solution) != N or any(len(row) != M for row in solution) : + return self.rewards["wrong_format"] + + location = [None] * (N * M) + i = 0 + for original_row, solution_row in zip(self.parameter["matrix"], solution) : + j = 0 + for original_value, solution_value in zip(original_row, solution_row) : + if original_value != -1 and original_value != solution_value : + return self.rewards["invalid_solution"] + if not (0 <= solution_value < N * M) : + return self.rewards["invalid_solution"] + if location[solution_value] is not None : + return self.rewards["invalid_solution"] + location[solution_value] = (i, j) + j += 1 + i += 1 + + path = 1 + for value in range(N * M - 1) : + assert location[value] is not None, "location[{}] should not be None".format(value) + assert location[value + 1] is not None, "location[{}] should not be None".format(value + 1) + x1, y1 = location[value] + x2, y2 = location[value + 1] + path += int(abs(x1 - x2) + abs(y1 - y2) != 1) + + if self.rewards["rewarding_strategy"] == "(1/path)^beta" : + return self.rewards["rewarding_weight"] * ((1 / path) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "path=1" : + return self.rewards["rewarding_weight"] * (path == 1) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/odd_visitation/__init__.py b/server/Gym/environments/odd_visitation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ebde2b3ffc1388d6c6e7f50422a71736f1d8f0b9 --- /dev/null +++ b/server/Gym/environments/odd_visitation/__init__.py @@ -0,0 +1 @@ +from .environment import OddVisitation_Environment diff --git a/server/Gym/environments/odd_visitation/environment.py b/server/Gym/environments/odd_visitation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..68c0d4b3662a59c1b7ed4e430433720fdd542bcd --- /dev/null +++ b/server/Gym/environments/odd_visitation/environment.py @@ -0,0 +1,165 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class OddVisitation_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a **connected undirected graph** with {N} vertices labeled from 0 to {N_minus_1}. The graph contains the following undirected edges: +{edges} + +Your task is to find a trajectory that visits each vertex odd numbers of times, and the starting and ending vertices can be arbitrary. +Formally, you should find a sequence of length $K$ (which is decided by you), $v_0, v_1, \\ldots, v_{{K-1}}$, such that: +(1) $v_i$ and $v_{{i+1}}$ are connected by an edge for all $0 \\leq i < K - 1$; +(2) for each vertex with label $v$ ($0 \\leq v < N$), the number of times it appears in the sequence is odd: \[\sum_{{i=0}}^{{K-1}} [v_i = v] \\equiv 1 \\pmod 2.\] + +**Output Format:** Your output should be one single line of $K$ integers (you don't need to output $K$), separated by spaces, representing the sequence $v_0, v_1, \\ldots, v_{{K-1}}$.""" + + def __init__(self, + wrong_format : float = -1.0, + invalid_solution : float = -0.5, + correct_solution : float = +1.0, + wrong_solution : float = 0.0, + **kwargs) : + """ + Initialize the OddVisitation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "correct_solution" : correct_solution, + "wrong_solution" : wrong_solution, + } + + + def _generate(self) -> None: + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 2" + + assert "edge_ratio" in self.parameter, "edge_ratio is required in parameter" + edge_ratio = self.parameter["edge_ratio"] + + edges = self.parameter["edges"] = [] + + # randomly generate a spanning tree using Prufer sequence + prufer = [random.randint(0, N - 1) for _ in range(N - 2)] + degree = [1] * N + for v in prufer: + degree[v] += 1 + leaves = [i for i in range(N) if degree[i] == 1] + for v in prufer: + u = leaves.pop(0) + if u > v: + edges.append((v, u)) + else: + edges.append((u, v)) + degree[u] -= 1 + degree[v] -= 1 + if degree[u] == 1: + leaves.append(u) + if degree[v] == 1 and v not in leaves: + leaves.append(v) + u = leaves.pop(0) + v = leaves.pop(0) + if u > v: + u, v = v, u + edges.append((u, v)) + + num_edges = int(N * edge_ratio) + if len(edges) < num_edges : + remaining_edges = list(set((u, v) for u in range(N) for v in range(u + 1, N)) - set(edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + edges += remaining_edges + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + + # generate reference answer + edges = [[] for _ in range(N)] + for u, v in self.parameter["edges"]: + edges[u].append(v) + edges[v].append(u) + + sons = [[] for _ in range(N)] + visited = [False] * N + def dfs1(u, fa): + visited[u] = True + for v in edges[u]: + if v != fa and not visited[v]: + sons[u].append(v) + dfs1(v, u) + dfs1(0, -1) + + answer = [] + def dfs2(u): + u_visit = 1 + answer.append(u) + for v in sons[u]: + finished = dfs2(v) + u_visit += 1 + answer.append(u) + if not finished: + answer.append(v) + u_visit += 1 + answer.append(u) + return u_visit % 2 == 1 + dfs2(0) + if sum(1 for v in answer if v == 0) % 2 == 0: + assert answer[-1] == 0, "The last vertex should be 0 to ensure odd visitation." + answer = answer[:-1] + + self.parameter["reference_answer"] = " ".join(map(str, answer)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + seq = list(map(int, answer.split())) + return seq + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + seq = self.processor(output) + if seq is not None : + cnt = [0] * self.parameter["N"] + for v in seq : + if 0 <= v < self.parameter["N"] : + cnt[v] += 1 + else : + return self.rewards["invalid_solution"] + + edges = set(map(tuple, self.parameter["edges"])) + for i in range(len(seq) - 1) : + u, v = seq[i], seq[i + 1] + if u > v: + u, v = v, u + if (u, v) not in edges: + return self.rewards["invalid_solution"] + + if any(c % 2 == 0 for c in cnt) : + return self.rewards["wrong_solution"] + else : + assert all(c % 2 == 1 for c in cnt), "All vertices should be visited odd times." + return self.rewards["correct_solution"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/odl_distance/__init__.py b/server/Gym/environments/odl_distance/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..321e2b5de685a0236e7dd0f55597ee7a9245819c --- /dev/null +++ b/server/Gym/environments/odl_distance/__init__.py @@ -0,0 +1 @@ +from .environment import ODLDistance_Environment diff --git a/server/Gym/environments/odl_distance/environment.py b/server/Gym/environments/odl_distance/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f40b2bc61fabb5a1ba94761929573be7e81a5127 --- /dev/null +++ b/server/Gym/environments/odl_distance/environment.py @@ -0,0 +1,146 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class ODLDistance_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3532 + prompt_template = \ +r"""Define an operation on an integer as either multiplying it by a prime number, or dividing it by a prime number (only if it is divisible by that prime). Define D(a, b) as the minimum number of such operations needed to transform a into b; for example, D(69, 42) = 3 because 69 → 3 → 6 → 42 (i.e., divide by 23, multiply by 2, multiply by 7). + +Given an array A of length {N}: {A} +For each index i (0 <= i < {N}), find the index j (j ≠ i) such that D(A[i], A[j]) is minimized; if multiple such j exist, choose the smallest one. +Output a single line containing {N} integers — the j values for each i in order, separated by spaces.""" + + def __init__(self, + weight_multiple : int = 4, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the ODLDistance_Environment instance. + """ + super().__init__(**kwargs) + + self.weight_multiple = weight_multiple + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + A = self.parameter["A"] = random.sample(range(1, N * self.weight_multiple + 1), N) + + + U = max(A) + + # compute Omega(n): number of prime factors of n with multiplicity + num = [0] * (U + 1) + primes = [] + for i in range(2, U + 1): + if num[i] == 0: + primes.append(i) + num[i] = 1 + for p in primes: + x = p * i + if x > U: + break + num[x] = num[i] + 1 + if i % p == 0: + break + + # build linked lists of positions for each value + t = [-1] * (U + 1) + next_idx = [-1] * N + for i, v in enumerate(A): + next_idx[i] = t[v] + t[v] = i + + # initialize answers + INF = U + 1 + ans = [INF] * N + ansj = [-1] * N + + # for each possible divisor x + for x in range(1, U + 1): + # collect all indices i with A[i] divisible by x + q = [] + for m in range(x, U + 1, x): + j = t[m] + while j != -1: + q.append(j) + j = next_idx[j] + if not q: + continue + + # find index b in q with minimal num[A[b]] (tie-break on smaller index) + b = q[0] + for i in range(1, len(q)): + qi = q[i] + if num[A[qi]] < num[A[b]] or (num[A[qi]] == num[A[b]] and qi < b): + # swap b and q[i] + q[i], b = b, qi + + # update distances using this common divisor x + common = num[x] << 1 + for i in range(1, len(q)): + a_i = q[i] + d = num[A[a_i]] + num[A[b]] - common + + # update for a_i + if d < ans[a_i] or (d == ans[a_i] and b < ansj[a_i]): + ans[a_i] = d + ansj[a_i] = b + + # update for b + if d < ans[b] or (d == ans[b] and a_i < ansj[b]): + ans[b] = d + ansj[b] = a_i + + self.parameter["gold_answer"] = ansj + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["gold_answer"])) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(0 <= j < self.parameter["N"] and j != i for i, j in enumerate(processed_result)) : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/pair_more_one_counting/__init__.py b/server/Gym/environments/pair_more_one_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9b08f52bfe023853bcb998b377526d3242d1b812 --- /dev/null +++ b/server/Gym/environments/pair_more_one_counting/__init__.py @@ -0,0 +1 @@ +from .environment import PairMoreOneCounting_Environment diff --git a/server/Gym/environments/pair_more_one_counting/environment.py b/server/Gym/environments/pair_more_one_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..ec9490fcf87b95e5da007d15915f129eb14019c0 --- /dev/null +++ b/server/Gym/environments/pair_more_one_counting/environment.py @@ -0,0 +1,172 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class PairMoreOneCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3726 + prompt_template = \ +r"""Please count the number of pairs of binary strings (S, T) such that: +- The length of S is {N} = {M} + {delta}, and the length of T is {M}. +- The number of 1s in S is strictly greater than the number of 1s in T. + +Please output the result modulo 10^{K}.""" + + + def __init__(self, + max_K : int = 5, + wrong_format: float = -1.0, wrong_range: float = -0.5, correct_answer: float = +1.0, wrong_answer: float = 0.0, + **kwargs): + """ + Initialize the PairMoreOneCountingProblem instance. + """ + super().__init__(**kwargs) + + self.max_K = max_K + assert self.max_K >= 1, "max_K must be at least 1" + + self.rewards = { + "wrong_format": wrong_format, + "wrong_range": wrong_range, + "correct_answer": correct_answer, + "wrong_answer": wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_M" in self.parameter, "MAX_M must be set in the parameter" + MAX_M = self.parameter["MAX_M"] + assert MAX_M >= 1, "MAX_M must be at least 1" + + assert "MAX_delta" in self.parameter, "MAX_delta must be set in the parameter" + MAX_delta = self.parameter["MAX_delta"] + assert MAX_delta >= 0, "MAX_delta must be at least 0" + + M = self.parameter["M"] = random.randint(1, MAX_M) + delta = self.parameter["delta"] = random.randint(0, MAX_delta) + N = M + delta + + K = self.parameter["K"] = random.randint(1, self.max_K) + + + MOD10 = 10 ** K + MOD2 = 2 ** (K + 1) + MOD5 = 5 ** K + MOD_ALL = MOD10 * 2 # = 2 * 10^K + + # Build factorial tables excluding factors of 2 and 5 + s2 = [1] * (MOD2 + 1) + for i in range(1, MOD2 + 1): + if i & 1 == 0: + s2[i] = s2[i - 1] + else: + s2[i] = (s2[i - 1] * i) % MOD2 + + s5 = [1] * (MOD5 + 1) + for i in range(1, MOD5 + 1): + if i % 5 == 0: + s5[i] = s5[i - 1] + else: + s5[i] = (s5[i - 1] * i) % MOD5 + + # Recursive factorial mod p^c excluding multiples of p + def solve_fact(n, p, modp): + if n <= 1: + return 1 + sub = solve_fact(n // p, p, modp) + if p == 2: + sp_mod = s2[modp] + sp_rem = s2[n % modp] + else: + sp_mod = s5[modp] + sp_rem = s5[n % modp] + return sub * pow(sp_mod, n // modp, modp) % modp * sp_rem % modp + + # Count exponent of p in n! + def count_p(n, p): + cnt = 0 + while n: + n //= p + cnt += n + return cnt + + # Extended Lucas for C(n, m) mod 2*10^K + def lucas(n, m): + # 2-adic part + c2 = count_p(n, 2) - count_p(m, 2) - count_p(n - m, 2) + if c2 <= K: + a2 = solve_fact(n, 2, MOD2) + b2 = solve_fact(m, 2, MOD2) + inv_b2 = pow(b2, -1, MOD2) + a2 = a2 * inv_b2 % MOD2 + c2part = solve_fact(n - m, 2, MOD2) + inv_c2 = pow(c2part, -1, MOD2) + a2 = a2 * inv_c2 % MOD2 * pow(2, c2, MOD2) % MOD2 + else: + a2 = 0 + + # 5-adic part + c5 = count_p(n, 5) - count_p(m, 5) - count_p(n - m, 5) + if c5 < K: + a5 = solve_fact(n, 5, MOD5) + b5 = solve_fact(m, 5, MOD5) + inv_b5 = pow(b5, -1, MOD5) + a5 = a5 * inv_b5 % MOD5 + c5part = solve_fact(n - m, 5, MOD5) + inv_c5 = pow(c5part, -1, MOD5) + a5 = a5 * inv_c5 % MOD5 * pow(5, c5, MOD5) % MOD5 + else: + a5 = 0 + + # CRT combine (mod MOD2) = a2 and (mod MOD5) = a5 + t = (a5 - a2) * pow(MOD2, -1, MOD5) % MOD5 + return (a2 + MOD2 * t) % (MOD2 * MOD5) + + # Main computation + if N == M: + total = pow(2, 2 * N, MOD_ALL) + comb = lucas(2 * N, N) + ans = (total - comb) % MOD_ALL + ans = (ans // 2) % MOD10 + else: + total = pow(2, N + M, MOD_ALL) + diff = N - M + for i in range(1, diff): + total = (total + lucas(N + M, M + i)) % MOD_ALL + ans = (total // 2) % MOD10 + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + M, delta = self.parameter["M"], self.parameter["delta"] + return self.prompt_template.format( + N = M + delta, + M = M, + delta = delta, + K = self.parameter["K"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < (10 ** self.parameter["K"])) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/palembang_bridges/__init__.py b/server/Gym/environments/palembang_bridges/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5bb2e81335e0699f6ca52d0d0b4bf8d9c0d06321 --- /dev/null +++ b/server/Gym/environments/palembang_bridges/__init__.py @@ -0,0 +1 @@ +from .environment import PalembangBridges_Environment diff --git a/server/Gym/environments/palembang_bridges/environment.py b/server/Gym/environments/palembang_bridges/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b5511f3b61b788fb607f8deb7715897f4ff9dd1b --- /dev/null +++ b/server/Gym/environments/palembang_bridges/environment.py @@ -0,0 +1,184 @@ +import heapq +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class PalembangBridges_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3644 + prompt_template = \ +r"""You are given two arrays S and T, each of length {N}, provided as: {S_and_T} + +Your task is to choose {K} integers P[j] (1 <= j <= {K}) such that the following total cost is minimized: for each i from 1 to {N}, compute min(|P[j] - S[i]| + |P[j] - T[i]|) over all 1 ≤ j ≤ {K}, and take the sum over all i. Output the {K} integers P[j] (1 <= j <= {K}) in a single line, separated by spaces.""" + + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the PalembangBridges_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + K = self.parameter["K"] = random.randint(1, 2) + + S = self.parameter["S"] = [random.randint(0, N) for _ in range(N)] + T = self.parameter["T"] = [random.randint(0, N) for _ in range(N)] + + + cross_pairs = [] + + # process each resident + for s, t in zip(S, T): + cross_pairs.append((s, t)) + + m = len(cross_pairs) + + class Solver: + def __init__(self): + # max-heap for lower half (store negatives), min-heap for upper half + self.left = [] + self.right = [] + self.left_sum = 0 + self.right_sum = 0 + + def insert(self, a: int): + # initial insert + if not self.left: + heapq.heappush(self.left, -a) + self.left_sum += a + else: + median = -self.left[0] + if a <= median: + heapq.heappush(self.left, -a) + self.left_sum += a + else: + heapq.heappush(self.right, a) + self.right_sum += a + + # rebalance so that left has (total+1)//2 elements + total = len(self.left) + len(self.right) + target = (total + 1) // 2 + + # if left too big, move top of left → right + while len(self.left) > target: + v = -heapq.heappop(self.left) + self.left_sum -= v + heapq.heappush(self.right, v) + self.right_sum += v + + # if left too small, move top of right → left + while len(self.left) < target: + v = heapq.heappop(self.right) + self.right_sum -= v + heapq.heappush(self.left, -v) + self.left_sum += v + + def query(self) -> int: + """ + Returns the minimum sum of absolute deviations from the optimal pivot, + which is the sum of |x_i - median| over all inserted x_i. + """ + if not self.left: + return 0 + total = len(self.left) + len(self.right) + cnt = (total + 1) // 2 + median = -self.left[0] + # cost = sum_{i in left} (median - x_i) + sum_{j in right} (x_j - median) + # = cnt*median - left_sum + right_sum - (total-cnt)*median + return cnt * median - self.left_sum + self.right_sum - (total - cnt) * median + + if K == 1: + # one bridge: place it at the median of all endpoints + solver = Solver() + for a, b in cross_pairs: + solver.insert(a) + solver.insert(b) + self.parameter["gold_answer"] = solver.query() + + else: + # two bridges: split the pairs into two contiguous groups by sorting on a+b + cross_pairs.sort(key=lambda x: x[0] + x[1]) + + # pre[i]: best cost for first i pairs with one bridge + pre = [0] * (m + 1) + solver1 = Solver() + for i in range(m): + a, b = cross_pairs[i] + solver1.insert(a) + solver1.insert(b) + pre[i + 1] = solver1.query() + + # suf[i]: best cost for pairs i..m-1 with one bridge + suf = [0] * (m + 2) + solver2 = Solver() + for i in range(m - 1, -1, -1): + a, b = cross_pairs[i] + solver2.insert(a) + solver2.insert(b) + suf[i + 1] = solver2.query() + + # try all ways to split into two groups + best = pre[0] + suf[1] + for i in range(m + 1): + cost = pre[i] + suf[i + 1] + if cost < best: + best = cost + + self.parameter["gold_answer"] = best + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + K = self.parameter["K"], + S_and_T = "; ".join("S[{}]={}, T[{}]={}".format(i, Si, i, Ti) for i, (Si, Ti) in enumerate(zip(self.parameter["S"], self.parameter["T"]), start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["K"] : + return self.rewards["invalid_solution"] + + answer, gold = sum(min(abs(p - s) + abs(p - t) for p in processed_result) for s, t in zip(self.parameter["S"], self.parameter["T"])), self.parameter["gold_answer"] + assert 0 <= gold <= answer, "gold_answer should be non-negative and less than or equal to answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "If answer is 0, gold should also be 0" + return self.rewards["rewarding_weight"] * 1.0 + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/palindrome_partition_counting/__init__.py b/server/Gym/environments/palindrome_partition_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fd69c773d226113eeb95b7338d8abf671d98f99e --- /dev/null +++ b/server/Gym/environments/palindrome_partition_counting/__init__.py @@ -0,0 +1 @@ +from .environment import PalindromePartitionCounting_Environment diff --git a/server/Gym/environments/palindrome_partition_counting/environment.py b/server/Gym/environments/palindrome_partition_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e73438c648147544d85b6412a88a907b5a96720e --- /dev/null +++ b/server/Gym/environments/palindrome_partition_counting/environment.py @@ -0,0 +1,72 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class PalindromePartitionCounting_Environment(VerifiableEnvironment) : + prompt_template = r"""Please count the number of ways to partition the string `{S}` into (non-empty) palindromic substrings, where the number of substrings is arbitrary.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the PalindromePartitionCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + zero_probability = random.randint(1, 9) / 10 + self.parameter["S"] = S = "".join("01"[random.random() < zero_probability] for _ in range(N)) + + + dpF = [1] + [0] * N + for i in range(1, N + 1) : + for j in range(i) : + if S[j : i] == S[j : i][:: -1] : + dpF[i] += dpF[j] + self.parameter["reference_answer"] = dpF[N] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(S = self.parameter["S"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/palindromic_substring_number_counting/__init__.py b/server/Gym/environments/palindromic_substring_number_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bfaa24902e54e9cab57d281549446cb0b2b3ea4a --- /dev/null +++ b/server/Gym/environments/palindromic_substring_number_counting/__init__.py @@ -0,0 +1 @@ +from .environment import PalindromicSubstringNumberCounting_Environment diff --git a/server/Gym/environments/palindromic_substring_number_counting/environment.py b/server/Gym/environments/palindromic_substring_number_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7b50e4d5ebb3b7fccd4c99688559970d218997ac --- /dev/null +++ b/server/Gym/environments/palindromic_substring_number_counting/environment.py @@ -0,0 +1,151 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class PalindromicSubstringNumberCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3413 + prompt_template = \ +r"""We treat every positive integer as a string of digits (without leading zeros). A number is called a `good number` if it contains at least one palindromic substring of length **greater than 1**. + +For example: +- 101 is a good number because it contains the substring "101", +- 110 is a good number because it contains the substring "11", +- But 102 and 1201 are not good numbers because they do not contain any palindromic substring of length greater than 1. + +Please count how many good numbers exist in the range [{L}, {R}] (inclusive).""" + + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the PalindromicSubstringNumberCounting problem. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_R" in self.parameter, "MAX_R is required in parameter" + MAX_R = self.parameter["MAX_R"] + assert MAX_R >= 20, "MAX_R should be greater than or equal to 20" + + R = self.parameter["R"] = random.randint(20, MAX_R) + L = self.parameter["L"] = random.randint(1, R - 1) + + + def str_minus_one(s: str) -> str: + # Subtract 1 from a positive decimal string s + lst = list(s) + i = len(lst) - 1 + # borrow until we find a non-zero digit + while i >= 0 and lst[i] == '0': + lst[i] = '9' + i -= 1 + if i >= 0: + lst[i] = str(int(lst[i]) - 1) + # strip leading zeros (but leave one zero if result is 0) + if lst[0] == '0': + j = 0 + while j < len(lst) - 1 and lst[j] == '0': + j += 1 + lst = lst[j:] + return ''.join(lst) + + def solve_for(bound_str: str) -> int: + # Count "lovely" numbers in [0, bound_str] + n = len(bound_str) + # d[1] = least significant digit, ..., d[n] = most significant + d = [0] * (n + 1) + for i, ch in enumerate(reversed(bound_str), start=1): + d[i] = int(ch) + + # dp cache: f[x][num][pre][lovely][lead][prelead], initialized to -1 + f = [[[[[[ -1 for _ in range(2)] + for _ in range(2)] + for _ in range(2)] + for _ in range(10)] + for _ in range(10)] + for _ in range(n+1)] + + def dfs(x: int, num: int, pre: int, lovely: bool, + lead: bool, prelead: bool, top: bool) -> int: + # base case: all digits placed + if x == 0: + return 1 if lovely else 0 + + # use cache when not tight + if not top: + cached = f[x][num][pre][lovely][lead][prelead] + if cached != -1: + return cached + + bound = d[x] if top else 9 + total = 0 + + for digit in range(bound + 1): + # check for palindrome substrings of length 2 or 3 + is_lovely = lovely \ + or ((not lead) and digit == num) \ + or ((not prelead) and digit == pre) + next_lead = lead and (digit == 0) + next_prelead = lead + next_top = top and (digit == bound) + + total += dfs(x - 1, digit, num, + is_lovely, next_lead, + next_prelead, next_top) + + if not top: + f[x][num][pre][lovely][lead][prelead] = total + + return total + + # start from position n, with no previous digits placed + return dfs(n, 0, 0, False, True, True, True) + + # compute counts up to R and up to L-1, then take difference + L, R = str(L), str(R) + L_minus_one = str_minus_one(L) + self.parameter["reference_answer"] = solve_for(R) - solve_for(L_minus_one) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(L = self.parameter["L"], R = self.parameter["R"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if processed_result == 0 : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/pan_solar_panels/__init__.py b/server/Gym/environments/pan_solar_panels/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..236e3cb837d5660dc03cc3cecb4dbe6448ffa1c1 --- /dev/null +++ b/server/Gym/environments/pan_solar_panels/__init__.py @@ -0,0 +1 @@ +from .environment import PanSolarPanels_Environment diff --git a/server/Gym/environments/pan_solar_panels/environment.py b/server/Gym/environments/pan_solar_panels/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..908b0147e5a9a602b2d655a63c2749da255c45fa --- /dev/null +++ b/server/Gym/environments/pan_solar_panels/environment.py @@ -0,0 +1,102 @@ +import math +import random +from typing import Optional, Tuple +from ...environment import VerifiableEnvironment + + +class PanSolarPanels_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3579 + prompt_template = \ +r"""Output two integers X and Y (separated by a space), such that: +- {A} ≤ X ≤ {B} +- {C} ≤ Y ≤ {D} +- gcd(X, Y) is maximized (where gcd stands for greatest common divisor)""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the PanSolarPanels_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_A_B_C_D" in self.parameter, "MAX_A_B_C_D is required in parameter" + MAX_A_B_C_D = self.parameter["MAX_A_B_C_D"] + assert MAX_A_B_C_D >= 4, "MAX_A_B_C_D should be greater than or equal to 4" + + while True : + numbers = [random.randint(1, MAX_A_B_C_D) for _ in range(4)] + numbers.sort() + A, B, C, D = numbers + if A <= B < C <= D : + break + if random.random() < 0.5 : + A, B, C, D = C, D, A, B + self.parameter["A"], self.parameter["B"], self.parameter["C"], self.parameter["D"] = A, B, C, D + + + def solve(A, B, C, D): + res = 1 + m = min(B, D) + p = 1 + while p <= m: + # floor-divisions for current p + t1 = B // p + t2 = D // p + # find the largest r such that B//x == t1 and D//x == t2 for all x in [p..r] + r1 = B // t1 + r2 = D // t2 + r = min(r1, r2) + # check if multiples of r lie within the intervals + x = (B // r) * r + y = (D // r) * r + if x >= A and y >= C: + res = r + # jump to the next segment + p = r + 1 + return res + self.parameter["gold_answer"] = solve(A, B, C, D) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(A = self.parameter["A"], B = self.parameter["B"], C = self.parameter["C"], D = self.parameter["D"]) + + + def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int]] : + if answer is not None : + answer = answer.strip() + try : + X, Y = map(int, answer.split()) + return X, Y + except : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + X, Y = processed_result + if not (self.parameter["A"] <= X <= self.parameter["B"] and self.parameter["C"] <= Y <= self.parameter["D"]) : + return self.rewards["invalid_solution"] + + gold, answer = self.parameter["gold_answer"], math.gcd(X, Y) + assert 0 < answer <= gold, "answer should be less than or equal to gold" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/path_no_going_back_counting/__init__.py b/server/Gym/environments/path_no_going_back_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d4f2f6a723e368e1861643a9e796b4d797af9086 --- /dev/null +++ b/server/Gym/environments/path_no_going_back_counting/__init__.py @@ -0,0 +1 @@ +from .environment import Path_NoGoingBack_Counting_Environment diff --git a/server/Gym/environments/path_no_going_back_counting/environment.py b/server/Gym/environments/path_no_going_back_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f5059ad123a07f33d12dc95061d3f6948754786f --- /dev/null +++ b/server/Gym/environments/path_no_going_back_counting/environment.py @@ -0,0 +1,164 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Path_NoGoingBack_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2151 + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices labeled from `0` to `{N_minus_1}`. The graph contains the following undirected edges (no repeated edges): +{edges} + +Please count the number of paths from vertex `0` to vertex `{N_minus_1}` that satisfy the following conditions: +- The path has exactly {T} edges. +- You may not immediately return to the previous vertex. That is, if you move along edge `(u, v)` from `u` to `v`, you cannot move back to `u` in the very next step. + +**Output Format:** Your final answer should be a single integer — the number of valid paths, modulo {MOD}.""" + MOD = 10000 + + def __init__(self, + wrong_format: float = -1.0, wrong_range: float = -0.5, correct_answer: float = +1.0, wrong_answer: float = 0.0, + **kwargs): + """ + Initialize the Path_NoGoingBack_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "wrong_range": wrong_range, + "correct_answer": correct_answer, + "wrong_answer": wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_M" in self.parameter, "MAX_M must be set in the parameter" + MAX_M = self.parameter["MAX_M"] + assert MAX_M >= 3, "MAX_M must be at least 3" + + M = self.parameter["M"] = random.randint(3, MAX_M) + + valid_N = [N for N in range(3, (M + 1) + 1) if M <= N * (N - 1) // 2] + N = self.parameter["N"] = random.choice(valid_N) + assert N - 1 <= M <= N * (N - 1) // 2, "M must be at least N - 1 and at most N * (N - 1) / 2" + + T = self.parameter["T"] = random.randint(1, 2 ** N) + + edges = self.parameter["edges"] = [] + initial_permutation = list(range(N)) + random.shuffle(initial_permutation) + for u, v in zip(initial_permutation, initial_permutation[1 :]): + edges.append((min(u, v), max(u, v))) + if len(edges) < M : + edges += random.sample(list(set((u, v) for u in range(N) for v in range(u + 1, N)) - set(edges)), M - len(edges)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + + Start, End = 0, N - 1 + + x = [-1] # x[i] = source vertex of the i-th “edge” + y = [Start] # y[i] = destination vertex of the i-th “edge” + for u, v in edges: + x.append(u); y.append(v) + x.append(v); y.append(u) + + cnt = len(x) + + # Precompute reversal-pair for each directed edge + pair = [-1] * cnt + for j in range(1, cnt): + if j % 2 == 1: + pair[j] = j + 1 + else: + pair[j] = j - 1 + + # Build the adjacency matrix A of the “edge-graph” + A = [[0] * cnt for _ in range(cnt)] + for i in range(cnt): + yi = y[i] + Ai = A[i] + for j in range(cnt): + if yi == x[j] and i != j and i != pair[j]: + Ai[j] = 1 + + # Matrix multiplication (MODular) + def mat_mult(A, B): + n = len(A) + C = [[0] * n for _ in range(n)] + for i in range(n): + Ai = A[i] + Ci = C[i] + for k in range(n): + if Ai[k]: + aik = Ai[k] + Bk = B[k] + for j in range(n): + Ci[j] = (Ci[j] + aik * Bk[j]) % self.MOD + return C + + # Fast exponentiation of matrix A^power + def mat_pow(mat, power): + n = len(mat) + # identity + res = [[0] * n for _ in range(n)] + for i in range(n): + res[i][i] = 1 + while power: + if power & 1: + res = mat_mult(res, mat) + mat = mat_mult(mat, mat) + power >>= 1 + return res + + # Compute A^T + A_exp = mat_pow(A, T) + + # The number of walks of length T from S to T is the sum over all + # directed edges i ending at vertex T of (A^T)[0][i] + ans = 0 + row0 = A_exp[0] + for i in range(cnt): + if y[i] == End: + ans = (ans + row0[i]) % self.MOD + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + T = self.parameter["T"], + MOD = self.MOD, + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.MOD) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/patrol/__init__.py b/server/Gym/environments/patrol/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ed96fe80658c803aa95a2a98df0ebf5e7ffa3cbe --- /dev/null +++ b/server/Gym/environments/patrol/__init__.py @@ -0,0 +1 @@ +from .environment import Patrol_Environment diff --git a/server/Gym/environments/patrol/environment.py b/server/Gym/environments/patrol/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..0f6b150cca176dfceb5429f456559908d7eb92eb --- /dev/null +++ b/server/Gym/environments/patrol/environment.py @@ -0,0 +1,155 @@ +import random +from typing import Optional +from collections import deque +from ...environment import VerifiableEnvironment + + +class Patrol_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3629 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices labeled from `1` to `{N}`. It contains the following {N_minus_1} undirected edges: +{edges} + +You are allowed to add {K} arbitrary edges to the tree. Each added edge can connect any two existing vertices (including possibly the same vertex); it is allowed to be a duplicate of an existing edge. After adding these {K} edges, you must start at vertex `1` (and also end at vertex `1`) and traverse a path that: +- Visits each **original edge at least once**, and +- Visits each **added edge exactly once**. + +Please output the **minimum total number of edges traversed** (of course, edges that are traversed multiple times should be counted multiple times) in such a path.""" + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the Patrol_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u + 1, v + 1)) # Convert to 1-based indexing + random.shuffle(edges) + + for u, v in edges : + assert 1 <= u < v <= N + assert len(edges) == len(set(edges)) == N - 1 + + K = self.parameter["K"] = random.randint(1, 2) + + + # Build adjacency list for the tree + adj = [[] for _ in range(N + 1)] + for u, v in edges: + adj[u].append(v) + adj[v].append(u) + + # BFS to find farthest node and distance from a start node + def bfs(start, record_parent=False): + dist = [-1] * (N + 1) + parent = [0] * (N + 1) + q = deque([start]) + dist[start] = 0 + far_node = start + maxd = 0 + while q: + x = q.popleft() + for y in adj[x]: + if dist[y] == -1: + dist[y] = dist[x] + 1 + parent[y] = x + q.append(y) + if dist[y] > maxd: + maxd = dist[y] + far_node = y + if record_parent: + return far_node, maxd, parent, dist + return far_node, maxd + + # First BFS from node 1 to find one end of the diameter + u, _ = bfs(1) + # Second BFS from u to find the other end, and record parents + v, L1, parent, _ = bfs(u, record_parent=True) + + # Case K = 1: formula is 2*(N-1) - L1 + 1 + if K == 1: + result = 2 * (N - 1) - L1 + 1 + self.parameter["reference_answer"] = result + return + + # For K = 2: mark the nodes on the diameter path + on_path = [False] * (N + 1) + node = v + while node != 0: + on_path[node] = True + node = parent[node] + + # Prepare for DP to compute L2 (weighted diameter with diameter edges weight -1) + d = [0] * (N + 1) + L2 = [0] + + def dfs(x, p): + for y in adj[x]: + if y == p: + continue + dfs(y, x) + # weight = -1 if edge is on the original diameter, else +1 + w = -1 if on_path[x] and on_path[y] else 1 + # update the maximum combination across two branches + L2[0] = max(L2[0], d[x] + d[y] + w) + # update the best single branch length + d[x] = max(d[x], d[y] + w) + + # Run DP from root = 1 + dfs(1, 0) + + # Final answer for K = 2: 2*N - L1 - L2 + result = 2 * N - L1 - L2[0] + self.parameter["reference_answer"] = result + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + K = self.parameter["K"], + edges = "\n".join("{} {}".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/pcp_permutation/__init__.py b/server/Gym/environments/pcp_permutation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ca97e4a68d1de43f55fba5a3b9179566a941ef88 --- /dev/null +++ b/server/Gym/environments/pcp_permutation/__init__.py @@ -0,0 +1 @@ +from .environment import PCPPermutation_Environment diff --git a/server/Gym/environments/pcp_permutation/environment.py b/server/Gym/environments/pcp_permutation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..c34eb83f2e5fe8a53a4009a5951487a8b8d4cbea --- /dev/null +++ b/server/Gym/environments/pcp_permutation/environment.py @@ -0,0 +1,107 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class PCPPermutation_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given two arrays of strings, `A` and `B`, each containing {N} strings: +{A_and_B} + +Find a permutation p_0, ..., p_{N_minus_1} of the indices `0` to `{N_minus_1}` such that: `A[p_0] + ... + A[p_{N_minus_1}]` is equal to `B[p_0] + ... + B[p_{N_minus_1}]` (here, `+` denotes string concatenation). + +**Output Format:** Your final answer should be a single line containing the permutation `p_0 ... p_{N_minus_1}`, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([a=b])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the PCPPermutation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "average_length" in self.parameter, "average_length is required in parameter" + average_length = self.parameter["average_length"] + assert average_length >= 1.0, "average_length should be greater than or equal to 1.0" + + sum_length = max(N + 1, random.randint(N, int(N * average_length))) + probability = random.random() + S = "".join("ab"[random.random() < probability] for _ in range(sum_length)) + + for array_name in ("A", "B") : + endpoints = random.sample(range(1, sum_length), N - 1) + endpoints.sort() + endpoints = [0] + endpoints + [sum_length] + assert len(endpoints) == N + 1, "endpoints should have length N + 1" + self.parameter[array_name] = [S[endpoints[i] : endpoints[i + 1]] for i in range(N)] + + permutation = list(range(N)) + random.shuffle(permutation) + for array_name in ("A", "B") : + self.parameter[array_name] = [self.parameter[array_name][i] for i in permutation] + + inv_permutation = [None] * N + for i, p in enumerate(permutation) : + inv_permutation[p] = i + self.parameter["reference_answer"] = " ".join(map(str, inv_permutation)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + A_and_B = "\n".join("A[{}]={} B[{}]={}".format(i, self.parameter["A"][i], i, self.parameter["B"][i]) for i in range(N)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + permutation = processed_result + if len(permutation) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if len(set(permutation)) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(0 <= i < self.parameter["N"] for i in permutation) : + return self.rewards["invalid_solution"] + + concatenated_A = "".join(self.parameter["A"][i] for i in permutation) + concatenated_B = "".join(self.parameter["B"][i] for i in permutation) + assert len(concatenated_A) == len(concatenated_B), "concatenated_A and concatenated_B should have the same length" + if self.rewards["rewarding_strategy"] == "mean([a=b])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(concatenated_A, concatenated_B)) / len(concatenated_A)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "a=b" : + return self.rewards["rewarding_weight"] * (concatenated_A == concatenated_B) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/pipeline_arrangement/__init__.py b/server/Gym/environments/pipeline_arrangement/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6629ac0f92fbe4c4c7050be2bdc55ac090f95186 --- /dev/null +++ b/server/Gym/environments/pipeline_arrangement/__init__.py @@ -0,0 +1 @@ +from .environment import PipelineArrangement_Environment diff --git a/server/Gym/environments/pipeline_arrangement/environment.py b/server/Gym/environments/pipeline_arrangement/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e8a1d5f65d4151ad28785fec1b48a2626bc42866 --- /dev/null +++ b/server/Gym/environments/pipeline_arrangement/environment.py @@ -0,0 +1,122 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class PipelineArrangement_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1248 + prompt_template = \ +r"""You need to process {N} products labeled from `0` to `{N_minus_1}`. Each product must go through **two machines**, A and B, **in order**. + +The processing times for each product on machines A and B are given as: +{A_and_B} + +Please determine a permutation (i.e., an ordering) of all products. Each product is processed one by one in the chosen order: +- First on machine A. +- Then, after finishing on A, it waits (if needed) and is processed by machine B; meanwhile, machine A can continue processing subsequent products without any delay. +- Machine B processes one product at a time in the order they complete machine A. + +Try your best to **minimize the time** when the **last product finishes** on machine B. + +**Output Format:** Your final answer should be a single line containing the indices of the products in the chosen order (i.e., the permutation), separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the PipelineArrangement_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def get_finishing_time(self, order) -> int : + tA = tB = 0 + for idx in order : + tA += self.parameter["A"][idx] + if tB < tA : + tB = tA + tB += self.parameter["B"][idx] + return tB + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + A = self.parameter["A"] = [random.randint(1, N) for _ in range(N)] + B = self.parameter["B"] = [random.randint(1, N) for _ in range(N)] + + + tasks = [] + for i in range(N) : + if A[i] < B[i] : + tasks.append((A[i], 0, i)) + else: + tasks.append((B[i], 1, i)) + + tasks.sort(key = lambda x : x[0]) + + order = [None] * N + left, right = 0, N - 1 + for time, belong, idx in tasks : + if belong == 0 : + order[left] = idx + left += 1 + else : + order[right] = idx + right -= 1 + + self.parameter["reference_answer"] = " ".join(map(str, order)) + self.parameter["gold_answer"] = self.get_finishing_time(order) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + A_and_B = "\n".join("A[{}]={}, B[{}]={}".format(i, self.parameter["A"][i], i, self.parameter["B"][i]) for i in range(N)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if len(set(processed_result)) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(0 <= i < self.parameter["N"] for i in processed_result) : + return self.rewards["invalid_solution"] + + answer, gold = self.get_finishing_time(processed_result), self.parameter["gold_answer"] + assert gold <= answer + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/pol_polarization/__init__.py b/server/Gym/environments/pol_polarization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..46cf54a3c85dbee2a05ad78423b1507f0e4a966c --- /dev/null +++ b/server/Gym/environments/pol_polarization/__init__.py @@ -0,0 +1 @@ +from .environment import POLPolarization_Environment diff --git a/server/Gym/environments/pol_polarization/environment.py b/server/Gym/environments/pol_polarization/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..5e617a9a1dd9e0aed59d8a80105183ae61cb2fa9 --- /dev/null +++ b/server/Gym/environments/pol_polarization/environment.py @@ -0,0 +1,158 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class POLPolarization_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3563 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices labeled from `0` to `{N_minus_1}`. The tree contains the following {N_minus_1} undirected edges. Each edge is represented as a tuple `(u, v)`, meaning there is an undirected edge **connecting vertex `u` and vertex `v`**: +{edges} + +Your task is to assign a direction to each edge (i.e., for each edge `(u, v)`, you may direct it either from `u` to `v` or from `v` to `u`) to form a **directed tree**. Try your best to **maximize** the number of ordered pairs `(X, Y)` such that `X ≠ Y` and vertex `X` can **reach** vertex `Y` along directed edges (i.e., `Y` is reachable from `X` in the directed tree). Output a single integer — the maximum number of such ordered pairs `(X, Y)`.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the POLPolarization_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)) == N - 1 + + + adjacency = [[] for _ in range(N)] + for u, v in edges: + adjacency[u].append(v) + adjacency[v].append(u) + + # First DFS: compute subtree sizes and "max part" sizes to find the centroid + siz = [0] * N + msiz = [0] * N + rt = 0 + best_ms = N + + def dfs(p, fa): + nonlocal rt, best_ms + siz[p] = 1 + max_sub = 0 + for v in adjacency[p]: + if v == fa: + continue + dfs(v, p) + siz[p] += siz[v] + if siz[v] > max_sub: + max_sub = siz[v] + # consider the "upward" part when p is removed + up = N - siz[p] + if up > max_sub: + max_sub = up + msiz[p] = max_sub + # update centroid if this node is better + if max_sub < best_ms: + best_ms = max_sub + rt = p + + dfs(0, -1) + + # Second DFS from centroid: recompute subtree sizes and record parents + siz = [0] * N + parent = [-1] * N + + def dfs2(p, fa): + siz[p] = 1 + parent[p] = fa + for v in adjacency[p]: + if v == fa: + continue + dfs2(v, p) + siz[p] += siz[v] + + dfs2(rt, -1) + + # initial answer: sum of sizes of all subtrees except the centroid itself + ans = sum(siz[i] for i in range(N) if i != rt) + + # count how many child-subtrees of each size the centroid has + cnt = [0] * (N + 1) + for v in adjacency[rt]: + if parent[v] == rt: + cnt[siz[v]] += 1 + + # merge pairs of equal sizes greedily + for i in range(1, N // 2 + 1): + while cnt[i] > 2: + cnt[i] -= 2 + cnt[2 * i] += 1 + + # subset‐sum via bitset in an integer + dp = 1 + for i in range(1, N + 1): + for _ in range(cnt[i]): + dp |= dp << i + + # find the best split i ≤ N//2 that is reachable + half = N // 2 + for i in range(half, -1, -1): + if (dp >> i) & 1: + ans += i * (N - i - 1) + break + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("{} {}".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/polya_model/__init__.py b/server/Gym/environments/polya_model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e522a85180f971556f6a90e6ea5c78196f72fc79 --- /dev/null +++ b/server/Gym/environments/polya_model/__init__.py @@ -0,0 +1 @@ +from .environment import PolyaModel_Environment diff --git a/server/Gym/environments/polya_model/environment.py b/server/Gym/environments/polya_model/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d362b753f2744d47e9504fd12dd205dbdba5e280 --- /dev/null +++ b/server/Gym/environments/polya_model/environment.py @@ -0,0 +1,87 @@ +import random +from fractions import Fraction +from typing import Optional, Dict +from ...environment import VerifiableEnvironment + + +class PolyaModel_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4204 + prompt_template = \ +r"""You have a bag with balls of {T} colors. The initial counts are: {color2num} +Process: +- At each step (starting from step 1), draw one ball uniformly at random from the bag. +- Return the drawn ball to the bag, then add {D} additional balls of the **same color** to the bag. + +Given the following event(s): {events} +What's the probability that **all** specified events occur? Output a single fraction `p/q` (without quotes), where `p` and `q` are coprime non-negative integers; if the probability is 0, output `0/1`; if it is 1, output `1/1`.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the PolyaModel_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_T_N" in self.parameter, "MAX_T_N is required in parameter" + MAX_T_N = self.parameter["MAX_T_N"] + assert MAX_T_N >= 2, "MAX_T_N should be greater than or equal to 2" + + T = self.parameter["T"] = random.randint(2, MAX_T_N) + + color2num = self.parameter["color2num"] = [random.randint(1, MAX_T_N) for color in range(T)] + D = self.parameter["D"] = random.randint(1, MAX_T_N) + + N = random.randint(1, MAX_T_N) + events = self.parameter["events"] = [(step, random.randint(1, T)) for step in sorted(random.sample(range(1, N + 1), random.randint(1, N)))] + + + ar = color2num.copy() + s = sum(ar) + ans = Fraction(1) + for x, y in events: + y -= 1 + ans *= Fraction(ar[y], s) + ar[y] += D + s += D + self.parameter["reference_answer"] = str(ans) + self.parameter["gold_answer"] = dict(numerator = int(ans.numerator), denominator = int(ans.denominator)) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + T = self.parameter["T"], + color2num = ", ".join("{} balls of color {}".format(num, color) for color, num in enumerate(self.parameter["color2num"], start = 1)), + D = self.parameter["D"], + events = ", ".join("at step {} the drawn ball is of color {}".format(step, color) for step,color in self.parameter["events"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[Dict[str, int]] : + if answer is not None : + answer = answer.strip() + try : + a, b = map(int, answer.split('/')) + return dict(numerator = a, denominator = b) + except : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self._process(output) + if processed_result is not None : + if processed_result == self.parameter["gold_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/polynomial_factorization/__init__.py b/server/Gym/environments/polynomial_factorization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..135c04223b56db7d770ff68ed34f51894c81beda --- /dev/null +++ b/server/Gym/environments/polynomial_factorization/__init__.py @@ -0,0 +1 @@ +from .environment import PolynomialFactorization_Environment diff --git a/server/Gym/environments/polynomial_factorization/environment.py b/server/Gym/environments/polynomial_factorization/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..a32a331cfb3e582c85d510f3cd0d847b8aa6463d --- /dev/null +++ b/server/Gym/environments/polynomial_factorization/environment.py @@ -0,0 +1,90 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class PolynomialFactorization_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a degree-{N} polynomial: (x - a_1)...(x - a_{N}) = {polynomial} + +Your task is to find any valid set of integers `a_1, ..., a_{N}` (not necessarily distinct) such that the product of the linear factors on the left expands to match the given polynomial. + +**Output Format:** Your final answer should be a single line containing `a_1, ..., a_{N}`, separated by **spaces**.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the PolynomialFactorization instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + self.parameter["gold_answer"] = [random.randint(-N, +N) for _ in range(N)] + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["gold_answer"])) + + coefficients = self.parameter["coefficients"] = [1] + [0] * N + for a in self.parameter["gold_answer"] : + for i in range(N, 0, -1) : + coefficients[i] = coefficients[i - 1] - a * coefficients[i] + coefficients[0] *= -a + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + polynomial = " + ".join("({}) * x^{}".format(coefficient, i) for i, coefficient in enumerate(self.parameter["coefficients"]) if coefficient != 0), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["wrong_format"] + + # make a multiset of self.parameter["gold_answer"] + gold_answer_multiset = {} + for a in self.parameter["gold_answer"] : + if a in gold_answer_multiset : + gold_answer_multiset[a] += 1 + else : + gold_answer_multiset[a] = 1 + + satisfied = 0 + for a in processed_result : + if gold_answer_multiset.get(a, 0) > 0 : + satisfied += 1 + gold_answer_multiset[a] -= 1 + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == self.parameter["N"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/polynomial_interpolation/__init__.py b/server/Gym/environments/polynomial_interpolation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3e66bf5f728706809a82e826da3b0acb7649a135 --- /dev/null +++ b/server/Gym/environments/polynomial_interpolation/__init__.py @@ -0,0 +1 @@ +from .environment import PolynomialInterpolation_Environment diff --git a/server/Gym/environments/polynomial_interpolation/environment.py b/server/Gym/environments/polynomial_interpolation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..af64e607861d272d1ffa81dcd070bfd662df903e --- /dev/null +++ b/server/Gym/environments/polynomial_interpolation/environment.py @@ -0,0 +1,89 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class PolynomialInterpolation_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a polynomial of degree {N} in the form: f(x) = a_0 * x^0 + a_1 * x^1 + ... + a_{N} * x^{N}, where the coefficients `a_0, a_1, ..., a_{N}` are integers. + +It is known that the polynomial passes through the following {N_plus_1} points: +{points} + +Please determine the coefficients a_0, a_1, ..., a_{N}. + +**Output Format:** Your final answer should be a single line containing `a_0 a_1 ... a_{N}` (do **NOT** include backticks or quotes), separated by spaces.""" + + def __init__(self, + max_weight : int = 5, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the PolynomialInterpolation_Environment instance. + """ + super().__init__(**kwargs) + + self.max_weight = max_weight + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def compute(self, x : int) -> int : + return sum(coeff * (x ** i) for i, coeff in enumerate(self.parameter["coeffs"])) + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + self.parameter["coeffs"] = [random.randint(-self.max_weight, self.max_weight) for degree in range(N)] + [random.randint(1, self.max_weight)] + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["coeffs"])) + + X = self.parameter["X"] = random.sample(range(-N, +N + 1), N + 1) + Y = self.parameter["Y"] = [self.compute(x) for x in X] + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_plus_1 = N + 1, + points = "\n".join("f({}) = {}".format(x, y) for x, y in zip(self.parameter["X"], self.parameter["Y"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] + 1 : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["coeffs"], processed_result)) / (self.parameter["N"] + 1)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["coeffs"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/polynomial_minimum/__init__.py b/server/Gym/environments/polynomial_minimum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3ccf1dfd531c6371b55a5ba9ba007adfe825f2d8 --- /dev/null +++ b/server/Gym/environments/polynomial_minimum/__init__.py @@ -0,0 +1 @@ +from .environment import PolynomialMinimum_Environment diff --git a/server/Gym/environments/polynomial_minimum/environment.py b/server/Gym/environments/polynomial_minimum/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..3cd76df045dd667bfff722ccaa263283a608e178 --- /dev/null +++ b/server/Gym/environments/polynomial_minimum/environment.py @@ -0,0 +1,116 @@ +import math +import sympy +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class PolynomialMinimum_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Given f(x) = {polynomial}, find the value of x0 that minimizes f(x). Your final answer should be a single real number in decimal form, representing the value of x0.""" + + def __init__(self, + max_weight : int = 2, + wrong_format : float = -1.0, rewarding_strategy : str = "piecewise", rewarding_threshold : float = +0.95, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the PolynomialMinimum_Environment instance. + """ + super().__init__(**kwargs) + + self.max_weight = max_weight + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_threshold" : rewarding_threshold, + "rewarding_beta" : rewarding_beta, + } + + if self.rewards["rewarding_strategy"] == "piecewise" : + self.passing_reward_threshold = rewarding_threshold * (0.999 ** rewarding_beta) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2 and N % 2 == 0, "N should be greater than or equal to 2 and even" + + available_degrees = list(range(2, N, 2)) + random.shuffle(available_degrees) + + degrees = [N] + available_degrees + + x = sympy.Symbol("x") + terms = [] + for deg in degrees : + a = random.randint(1, self.max_weight) + s = random.choice(range(-self.max_weight, +self.max_weight + 1)) + term = a * ((x - s) ** deg) + terms.append(term) + + poly = sum(terms) + poly_expanded = sympy.expand(poly) + coeffs = [int(poly_expanded.coeff(x, i)) for i in range(N + 1)] + + assert len(coeffs) == N + 1, "coeffs should have length N + 1" + assert coeffs[N] > 0.0, "leading coefficient should be positive" + self.parameter["coeffs"] = coeffs + + + f_expr = sum(c * (x ** i) for i, c in enumerate(coeffs)) + real_roots = [0.0] + [random.uniform(-self.max_weight, self.max_weight) for _ in range(5)] + try : + # (Try to) Find the minimum of the polynomial using sympy + d_expr = sympy.diff(f_expr, x) + roots = sympy.nroots(d_expr) + real_roots += [float(sympy.re(r)) for r in roots if abs(sympy.im(r)) < 1E-6] + except : + pass + f_vals = [float(f_expr.evalf(subs = {x : xr})) for xr in real_roots] + min_idx = f_vals.index(min(f_vals)) + x0 = real_roots[min_idx] + self.parameter["reference_answer"] = float(x0) + self.parameter["reference_value"] = float(f_vals[min_idx]) + self.parameter["worst_value"] = f_vals[0] + + def _prompt_generate(self) -> str : + x = sympy.Symbol("x") + poly_expr = sum(c * (x ** i) for i, c in enumerate(self.parameter["coeffs"])) + return self.prompt_template.format(polynomial = sympy.simplify(poly_expr)) + + + def _process(self, answer : Optional[str]) -> Optional[float] : + if answer is not None : + answer = answer.strip() + try : + float_answer = float(answer) + if not math.isfinite(float_answer) : + return None + return float_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + def compute(x_val : float) -> float : + x = sympy.Symbol("x") + f_expr = sum(c * (x ** i) for i, c in enumerate(self.parameter["coeffs"])) + return float(f_expr.evalf(subs = {x : x_val})) + f_val = compute(processed_result) + + if self.rewards["rewarding_strategy"] == "piecewise" : + if f_val >= self.parameter["worst_value"] : + return self.rewards["rewarding_threshold"] * (f_val <= self.parameter["reference_value"]) + elif f_val >= self.parameter["reference_value"] : + # self.parameter["reference_value"] <= f_val < self.parameter["worst_value"] + return self.rewards["rewarding_threshold"] * (((self.parameter["worst_value"] - f_val) / (self.parameter["worst_value"] - self.parameter["reference_value"])) ** self.rewards["rewarding_beta"]) + else : + # f_val < self.parameter["reference_value"] + return self.rewards["rewarding_threshold"] + (1.0 - self.rewards["rewarding_threshold"]) / (1 + 1 / max(self.parameter["reference_value"] - f_val, 1E-8)) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/polynomial_remainder/__init__.py b/server/Gym/environments/polynomial_remainder/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..087407e165584a5d25063ef3df30c7701c44ee9b --- /dev/null +++ b/server/Gym/environments/polynomial_remainder/__init__.py @@ -0,0 +1 @@ +from .environment import PolynomialRemainder_Environment diff --git a/server/Gym/environments/polynomial_remainder/environment.py b/server/Gym/environments/polynomial_remainder/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b5bd4255b0022a2a7f7526e1e268e960c22a7486 --- /dev/null +++ b/server/Gym/environments/polynomial_remainder/environment.py @@ -0,0 +1,97 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class PolynomialRemainder_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given two polynomials: +- P(x) of degree {N}: P(x) = {P} +- Q(x) of degree {M}: Q(x) = {Q} + +There exists a unique polynomial R(x) such that: P(x) = Q(x) * R(x) + S(x), where S(x) is the **remainder polynomial** and its degree is **less than {M}**. Let the coefficients of S(x) be `s_0, ..., s_{M_minus_1}` (if the degree of S(x) is less than {M_minus_1}, pad the remaining coefficients with zeros); we know that the coefficients of S(x) are all integers. + +**Output Format:** Your final answer should be a single line containing `s_0 ... s_{M_minus_1}` (do **NOT** include backticks or quotes), separated by spaces. +""" + + def __init__(self, + max_weight : int = 5, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the PolynomialRemainder_Environment instance. + """ + super().__init__(**kwargs) + + self.max_weight = max_weight + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert N >= M >= 2, "M should be less than or equal to N and greater than or equal to 2" + + self.parameter["Q_coeffs"] = [random.randint(-self.max_weight, self.max_weight) for degree in range(M)] + [random.randint(1, self.max_weight)] + self.parameter["R_coeffs"] = [random.randint(-self.max_weight, self.max_weight) for degree in range(N - M)] + [random.randint(1, self.max_weight)] + self.parameter["S_coeffs"] = [random.randint(-self.max_weight, self.max_weight) for degree in range(M)] + + self.parameter["P_coeffs"] = [0] * (N + 1) + for Qi in range(M + 1) : + for Ri in range(N - M + 1) : + self.parameter["P_coeffs"][Qi + Ri] += self.parameter["Q_coeffs"][Qi] * self.parameter["R_coeffs"][Ri] + for Si in range(M) : + self.parameter["P_coeffs"][Si] += self.parameter["S_coeffs"][Si] + + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["S_coeffs"])) + + + def _prompt_generate(self) -> str : + N, M = self.parameter["N"], self.parameter["M"] + return self.prompt_template.format( + N = N, + M = M, + M_minus_1 = M - 1, + P = " + ".join("({}) * x^{}".format(coefficient, i) for i, coefficient in enumerate(self.parameter["P_coeffs"]) if coefficient != 0), + Q = " + ".join("({}) * x^{}".format(coefficient, i) for i, coefficient in enumerate(self.parameter["Q_coeffs"]) if coefficient != 0), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["M"] : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["S_coeffs"], processed_result)) / self.parameter["M"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["S_coeffs"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/power_cycle/__init__.py b/server/Gym/environments/power_cycle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eb8b91c2dabf0798801a2c297e3d2a7a461b9193 --- /dev/null +++ b/server/Gym/environments/power_cycle/__init__.py @@ -0,0 +1 @@ +from .environment import PowerCycle_Environment diff --git a/server/Gym/environments/power_cycle/environment.py b/server/Gym/environments/power_cycle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..365c8d91ee9262aa13bc7bfdda76bd1d29bc8a1f --- /dev/null +++ b/server/Gym/environments/power_cycle/environment.py @@ -0,0 +1,124 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class PowerCycle_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1050 + prompt_template = \ +r"""It is well known that the **last digit** of positive powers of 2 follows a repeating pattern: +`2, 4, 8, 6, 2, 4, 8, 6, ...`. +We say that the **last digit** of powers of 2 has a **cycle length of 4** (there are other cycle lengths, but we focus only on the **smallest** one). + +Now, your task is to analyze powers of a given integer {N} and determine whether the **last {K} digits** (in base-10) of its positive powers form a repeating cycle. If so, what is the **minimum** cycle length? + +**Important Notes:** +1. If a power of {N} has fewer than {K} digits, consider the **missing leading digits as 0** (i.e., pad with zeros from the left). +2. If the cycle length is `L`, it means for **every positive integer** `a`, the last {K} digits of `{N}^a` are the same as those of `{N}^(a+L)`. + +**Output Format:** +Your final answer should be a single integer representing the minimum cycle length. +Example: `10` (do **NOT** include the backticks or quotes).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = 0.0, rewarding_strategy : str = "gold/answer", rewarding_weight : float = 1.0, + **kwargs) : + """ + Initialize the PowerCycle_Environment instance. + """ + + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "digit_num" in self.parameter, "digit_num is required in parameter" + digit_num = self.parameter["digit_num"] + assert digit_num >= 1, "digit_num should be greater than or equal to 1" + + def solve(S, K) : + mod = 10 ** K + # t is the original number mod 10^k. + t = S % mod + + # Initially, last (which we use as the multiplier seed) equals t. + last = t + # ans will accumulate the cycle length. + ans = 1 + # n_val will hold the intermediate product that we compare with t. + n_val = t + + # For each digit position from 1 to k (i.e. considering the last i digits) + for i in range(1, K + 1) : + _last = 1 + flag = False + # Try multipliers j = 1 to 10. + for j in range(1, 11) : + # Update n_val and _last using multiplication mod 10^k. + n_val = (n_val * last) % mod + _last = (_last * last) % mod + # Compare the last i digits: + # This is done by comparing n_val mod 10^i with t mod 10^i. + if n_val % (10 ** i) == t % (10 ** i) : + # If j is less than 10, use j; otherwise, use 10. + multiplier = j if j < 10 else 10 + ans *= multiplier + flag = True + break + # If no valid multiplier was found in [1, 10], there is no cycle. + if not flag : + return -1 + # Reset n_val for the next outer iteration. + n_val = t + # Set last to _last so that the cycle for the next higher digit is built on + last = _last + + return ans + + while True : + self.parameter["N"] = random.randint(1, 10 ** digit_num - 1) + self.parameter["K"] = random.randint(1, digit_num) + self.parameter["reference_answer"] = solve(self.parameter["N"], self.parameter["K"]) + + if self.parameter["reference_answer"] != -1 : + break + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + assert self.parameter["reference_answer"] > 0, "reference_answer should be greater than 0" + + if self.rewards["rewarding_strategy"] == "gold/answer" : + if processed_result % self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] / processed_result) + else : + return self.rewards["invalid_answer"] + elif self.rewarding_strategy == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/power_shortcut/__init__.py b/server/Gym/environments/power_shortcut/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d35b28c122e5cf4141115507b00793ce5dd374ba --- /dev/null +++ b/server/Gym/environments/power_shortcut/__init__.py @@ -0,0 +1 @@ +from .environment import PowerShortcut_Environment diff --git a/server/Gym/environments/power_shortcut/environment.py b/server/Gym/environments/power_shortcut/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..db362ce9ce320923bcceb8bfbe765ba5a3e04c19 --- /dev/null +++ b/server/Gym/environments/power_shortcut/environment.py @@ -0,0 +1,148 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class PowerShortcut_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a **directed graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. + +The graph contains the following directed edges. Each edge is represented as a tuple `(s, t)`, meaning there is a directed edge **from vertex `s` to vertex `t`**: +{edges} + +Your task is to find a sequence of vertices `p[1], p[2], ..., p[m]` such that: +- `p[1] = 0` (the sequence starts at vertex 0) and `p[m] = {N_minus_1}` (the sequence ends at vertex `{N_minus_1}`) +- For each consecutive pair `(p[i], p[i + 1])`, there exists a **path** from `p[i]` to `p[i + 1]` whose length (number of edges) is exactly 2^k for some integer k where 0 ≤ k ≤ {K}. + +Your goal is to **minimize** the length `m` of the sequence — that is, the number of steps in the sequence. + +**Output Format:** +Your final answer should be a single line containing the sequence: `p[1] p[2] ... p[m]`, separated by **spaces**. +Example: `0 1 {N_minus_1}` (do **NOT** include the backticks or quotes); this means m = 3, p[1] = 0, p[2] = 1, and p[3] = {N_minus_1}.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the PowerShortcut_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "K" in self.parameter, "K is required in parameter" + K = self.parameter["K"] + assert K >= 0, "K should be greater than or equal to 0" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + constructed_path = list(range(1, N - 1)) + random.shuffle(constructed_path) + constructed_path = [0] + constructed_path + [N - 1] + + edges = self.parameter["edges"] = [] + for s, t in zip(constructed_path, constructed_path[1 :]) : + edges.append((s, t)) + + num_edges = int(edge_density * N * (N - 1)) + if len(edges) < num_edges : + remaining_edges = list(set((s, t) for s in range(N) for t in range(N) if s != t) - set(edges)) + edges += random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + random.shuffle(edges) + + assert len(edges) == len(set(edges)), "Edges should be unique" + for s, t in edges : + assert 0 <= s < N, "s should be in range" + assert 0 <= t < N, "t should be in range" + assert s != t, "s should not be equal to t" + + + achievable = [[[False] * N for s in range(N)] for k in range(K + 1)] + path = [[None] * N for s in range(N)] + for s in range(N) : + path[s][s] = [] + for s, t in edges : + achievable[0][s][t] = True + path[s][t] = [] + for k in range(1, K + 1) : + for s in range(N) : + for t in range(N) : + for m in range(N) : + achievable[k][s][t] |= (achievable[k - 1][s][m] and achievable[k - 1][m][t]) + if achievable[k][s][t] : + path[s][t] = [] + self.parameter["achievable"] = [[any(achievable[k][s][t] for k in range(K + 1)) for t in range(N)] for s in range(N)] + + for m in range(N) : + for s in range(N) : + for t in range(N) : + if path[s][m] is not None and path[m][t] is not None : + if path[s][t] is None or (len(path[s][t]) > len(path[s][m]) + 1 + len(path[m][t])) : + path[s][t] = path[s][m] + [m] + path[m][t] + self.parameter["reference_answer"] = " ".join(map(str, [0] + path[0][N - 1] + [N - 1])) + self.parameter["gold_answer"] = 1 + len(path[0][N - 1]) + 1 + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + K = self.parameter["K"], + edges = "\n".join("({}, {})".format(s, t) for s, t in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if not answer_array : + return None + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + path = processed_result + for vertex in path : + if not (0 <= vertex < self.parameter["N"]) : # check if vertex is in range + return self.rewards["invalid_solution"] + if not (path[0] == 0 and path[-1] == self.parameter["N"] - 1) : # check if start and end vertices are correct + return self.rewards["invalid_solution"] + for s, t in zip(path, path[1 :]) : + if not self.parameter["achievable"][s][t] : + return self.rewards["invalid_solution"] + + gold, answer = self.parameter["gold_answer"], len(path) + assert gold <= answer, "gold_answer should be less than or equal to answer length" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/powernest/__init__.py b/server/Gym/environments/powernest/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..273abedda3f86bc5510d52fd5cdd4f7c7f2b7567 --- /dev/null +++ b/server/Gym/environments/powernest/__init__.py @@ -0,0 +1 @@ +from .environment import PowerNest_Environment diff --git a/server/Gym/environments/powernest/environment.py b/server/Gym/environments/powernest/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..eff81d051bb2abab7ac0d480c649686f74cd85c5 --- /dev/null +++ b/server/Gym/environments/powernest/environment.py @@ -0,0 +1,150 @@ +import random +from typing import Dict, Optional, Any +from ...environment import VerifiableEnvironment + + +class PowerNest_Environment(VerifiableEnvironment) : # Source: https://www.luogu.com.cn/problem/P1010 + prompt_template = \ +r"""You are given a **positive integer** `{number}`. + +Every positive integer can be represented as a **sum of powers of 2**. For example: +137 = 2^7 + 2^3 + 2^0 + +We adopt the following format: +- A power expression like a^b should be written as `a(b)` +- So, 137 can be written as: `2(7)+2(3)+2(0)` + +Now, each exponent (like `7`, `3`, `0`) can itself be expressed as a sum of powers of 2, recursively applying the same rule: +- 7 = 2^2 + 2 + 2^0 → 2(2)+2+2(0) +- 3 = 2 + 2^0 → 2+2(0) + +So the final expression for 137 becomes: +`2(2(2)+2+2(0))+2(2+2(0))+2(0)` + +Another example: +1315 = 2^10 + 2^8 + 2^5 + 2 + 1 +Final form: `2(2(2+2(0))+2)+2(2(2+2(0)))+2(2(2)+2(0))+2+2(0)` + +--- + +Your task is to write the given number `{number}` in this **power-of-two expression form**, following the rules above. + +Output Format: +Your final answer should be just the final expression, e.g. `2(2(2+2(0))+2)+2(2(2+2(0)))+2(2(2)+2(0))+2+2(0)` (do **NOT** include the backticks or quotes). +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the PowerNest_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "max_number" in self.parameter, "max_number is required in parameter" + max_number = self.parameter["max_number"] + assert max_number >= 1, "max_number should be greater than or equal to 1" + + self.parameter["number"] = random.randint(1, max_number) + + n2expression = {} + def convert_to_powernest(n) : + assert n > 0, "n should be greater than 0" + if n in n2expression : + return n2expression[n] + power = 0 + result = [] + while n : + if n & 1 : + if power == 0 : + result.append("2(0)") + elif power == 1 : + result.append("2") + else : + result.append("2({})".format(convert_to_powernest(power))) + n //= 2 + power += 1 + result.reverse() + n2expression[n] = "+".join(result) + return n2expression[n] + self.parameter["reference_answer"] = convert_to_powernest(self.parameter["number"]) + + def _prompt_generate(self) -> str : + return self.prompt_template.format(number = self.parameter["number"]) + + + def _process(self, answer : Optional[str]) -> Dict[str, Any] : + if answer is not None : + answer = answer.strip() + if answer == self.parameter["reference_answer"] : + return {"format" : True, "validation" : True, "answer" : answer} + else : + def check_powernest(expression) : + if expression == "" : + return False + + intervals = [] + stack_count = 0 + for i, char in enumerate(expression) : + if char == "(" : + stack_count += 1 + elif char == ")" : + if stack_count > 0 : + stack_count -= 1 + else : + return False + elif char == "+" : + if stack_count == 0 : + if not intervals : + intervals.append((0, i)) + else : + intervals.append((intervals[-1][1] + 1, i)) + else : + pass + if stack_count != 0 : + return False + + if intervals : + intervals.append((intervals[-1][1] + 1, len(expression))) + for interval in intervals : + if interval[0] < interval[1] : + if not check_powernest(expression[interval[0] : interval[1]]) : + return False + else : + return False + return True + else : + if expression == "2" : + return True + elif expression.startswith("2(") and expression.endswith(")") : + if expression[2 : -1] == "0" : + return True + return check_powernest(expression[2 : -1]) + else : + return False + + return {"format" : True, "validation" : check_powernest(answer), "answer" : answer} + else : + return {"format" : False} + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result["format"] : + if processed_result["validation"] : + if processed_result["answer"] == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["invalid_solution"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/prefix_concatenation/__init__.py b/server/Gym/environments/prefix_concatenation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7656f43dea726e06d2d60c8bb72468c9a52712c2 --- /dev/null +++ b/server/Gym/environments/prefix_concatenation/__init__.py @@ -0,0 +1 @@ +from .environment import PrefixConcatenation_Environment diff --git a/server/Gym/environments/prefix_concatenation/environment.py b/server/Gym/environments/prefix_concatenation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..dfb365893cd8f8aaec5577f68694d6a6d17fb22f --- /dev/null +++ b/server/Gym/environments/prefix_concatenation/environment.py @@ -0,0 +1,110 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class PrefixConcatenation_Environment(VerifiableEnvironment) : # Source: https://www.luogu.com.cn/problem/P3216 + prompt_template = \ +r"""Define $Concatenate(n)$ as the number formed by concatenating all positive integers from $1$ to $n$ in order. For example, when $n = 12$, $Concatenate(12) = 123456789101112$ + +Your task is to compute $Concatenate({N}) \bmod {M}$. + +**Output Format:** Your final answer should be a **single integer** in the range $[0, {M})$, printed on a line by itself. +""" + + def __init__(self, + max_modulo : int = 1000000, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the PrefixConcatenation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + self.max_modulo = max_modulo + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 2, "MAX_N should be greater than or equal to 1" + + N = self.parameter["N"] = random.randint(2, MAX_N) + M = self.parameter["M"] = random.randint(3, self.max_modulo) + + + def mat_mul(A, B) : + return [ + [(A[i][0] * B[0][j] + A[i][1] * B[1][j] + A[i][2] * B[2][j]) % M + for j in range(3)] + for i in range(3) + ] + + def mat_pow(base, exp) : + R = [[1 if i == j else 0 for j in range(3)] for i in range(3)] + while exp : + if exp & 1 : + R = mat_mul(R, base) + base = mat_mul(base, base) + exp >>= 1 + return R + + def mat_vec_mul(A, v) : + return [ + (A[0][0] * v[0] + A[0][1] * v[1] + A[0][2] * v[2]) % M, + (A[1][0] * v[0] + A[1][1] * v[1] + A[1][2] * v[2]) % M, + (A[2][0] * v[0] + A[2][1] * v[1] + A[2][2] * v[2]) % M, + ] + + state = [0, 1, 1] + start = 1 + power_of_10 = 10 + + while start <= N : + end = min(N, power_of_10 - 1) + block_size = end - start + 1 + + B = [ + [power_of_10 % M, 1, 0], + [0, 1, 1], + [0, 0, 1] + ] + + Bk = mat_pow(B, block_size) + state = mat_vec_mul(Bk, state) + + start = power_of_10 + power_of_10 *= 10 + + self.parameter["reference_answer"] = state[0] + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/prefix_product_mod_distinct_permutation/__init__.py b/server/Gym/environments/prefix_product_mod_distinct_permutation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..51c5a28d3b5aeb04ad7b47a1e020db5b430ce279 --- /dev/null +++ b/server/Gym/environments/prefix_product_mod_distinct_permutation/__init__.py @@ -0,0 +1 @@ +from .environment import PrefixProductMODDistinctPermutation_Environment diff --git a/server/Gym/environments/prefix_product_mod_distinct_permutation/environment.py b/server/Gym/environments/prefix_product_mod_distinct_permutation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e4246966a4c5401565a6aae3fe4b2e111045cae2 --- /dev/null +++ b/server/Gym/environments/prefix_product_mod_distinct_permutation/environment.py @@ -0,0 +1,104 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class PrefixProductMODDistinctPermutation_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Please find a permutation of the numbers from 1 to {N} such that all {N} prefix products (i.e., the product of the first i numbers for all i from 1 to {N}) are **distinct modulo {N}**. Output the permutation as {N} integers (in order) in one line, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the PrefixProductMODDistinctPermutation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 3, "MAX_N should be greater than or equal to 3" + + while True : + N = self.parameter["N"] = random.randint(3, MAX_N) + + def is_composite(x): + """Return True if x is composite (has a non‑trivial divisor), False otherwise.""" + for i in range(2, int(x**0.5) + 1): + if x % i == 0: + return True + return False + + if N == 1: + assert False, "N should not be 1" + elif N == 4: + self.parameter["reference_answer"] = "1 3 2 4" + break + elif is_composite(N): + continue + else: + # Compute modular inverses mod N in O(N) + inv = [0] * (N + 1) + inv[0] = inv[1] = 1 + for i in range(2, N + 1): + inv[i] = ((N - N//i) * inv[N % i]) % N + # Build the sequence + perm = [1] + for i in range(1, N - 1): + perm.append(((i+1) * inv[i]) % N) + perm.append(N) + self.parameter["reference_answer"] = " ".join(map(str, perm)) + break + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if set(processed_result) != set(range(1, self.parameter["N"] + 1)) : + return self.rewards["invalid_solution"] + + existing, prefix_product = [False] * self.parameter["N"], 1 + for x in processed_result : + prefix_product = (prefix_product * x) % self.parameter["N"] + assert 0 <= prefix_product < self.parameter["N"], "prefix_product should be in the range [0, N)" + existing[prefix_product] = True + satisfied = sum(existing) + assert 1 <= satisfied <= self.parameter["N"], "satisfied should be less than or equal to N" + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == self.parameter["N"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/prefix_sum_mod_distinct_permutation/__init__.py b/server/Gym/environments/prefix_sum_mod_distinct_permutation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2bf56fef9b322d3dbe66fe04d3e0fa901165bccf --- /dev/null +++ b/server/Gym/environments/prefix_sum_mod_distinct_permutation/__init__.py @@ -0,0 +1 @@ +from .environment import PrefixSumMODDistinctPermutation_Environment diff --git a/server/Gym/environments/prefix_sum_mod_distinct_permutation/environment.py b/server/Gym/environments/prefix_sum_mod_distinct_permutation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..45eff54ade6189a9320ecf02d6c74e93b5b1e84e --- /dev/null +++ b/server/Gym/environments/prefix_sum_mod_distinct_permutation/environment.py @@ -0,0 +1,89 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class PrefixSumMODDistinctPermutation_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Please find a permutation of the numbers from 1 to {N} such that all {N} prefix sums (i.e., the sum of the first i numbers for all i from 1 to {N}) are **distinct modulo {N}**. Output the permutation as {N} integers (in order) in one line, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the PrefixSumMODDistinctPermutation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 3, "MAX_N should be greater than or equal to 3" + + while True : + N = self.parameter["N"] = random.randint(3, MAX_N) + + if N % 2 == 1: + continue + else: + # Build the “zig‑zag” even‑N construction + perm = [N] + for i in range(1, N): + if i % 2 == 1: + perm.append(i) + else: + perm.append(N - i) + self.parameter["reference_answer"] = " ".join(map(str, perm)) + break + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if set(processed_result) != set(range(1, self.parameter["N"] + 1)) : + return self.rewards["invalid_solution"] + + existing, prefix_sum = [False] * self.parameter["N"], 0 + for x in processed_result : + prefix_sum = (prefix_sum + x) % self.parameter["N"] + assert 0 <= prefix_sum < self.parameter["N"], "prefix_sum should be in the range [0, N)" + existing[prefix_sum] = True + satisfied = sum(existing) + assert 1 <= satisfied <= self.parameter["N"], "satisfied should be less than or equal to N" + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == self.parameter["N"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/prefixuffix/__init__.py b/server/Gym/environments/prefixuffix/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b3028ad372a4212f11d286f6acdaa493dde536 --- /dev/null +++ b/server/Gym/environments/prefixuffix/__init__.py @@ -0,0 +1 @@ +from .environment import Prefixuffix_Environment diff --git a/server/Gym/environments/prefixuffix/environment.py b/server/Gym/environments/prefixuffix/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..bf3163e3e6c1dd274d877181e00d12146eee181d --- /dev/null +++ b/server/Gym/environments/prefixuffix/environment.py @@ -0,0 +1,147 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Prefixuffix_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3546 + prompt_template = \ +r"""Define two strings S1 and S2 to be **equivalent** if one can be obtained from the other by moving a suffix to the front (i.e., performing a cyclic shift). For example, the strings "ababba" and "abbaab" are equivalent because "ababba" = "ab" + "abba" and "abbaab" = "abba" + "ab" + +You are given a string S of length {N}: {S} +Please output the largest integer L such that 2 × L ≤ {N}, and the L-prefix (i.e., the first L characters of S) and the L-suffix (i.e., the last L characters of S) are equivalent (see the definition above).""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the Prefixuffix_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "correct_answer": correct_answer, + "wrong_answer": wrong_answer + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + a_probability = random.random() + def generate_string(length : int) -> str : + return "".join("a" if random.random() < a_probability else "b" for _ in range(length)) + + L = random.randint(1, N // 2) + L1 = random.randint(0, L) + L2 = L - L1 + S1, S2 = generate_string(L1), generate_string(L2) + self.parameter["S"] = S = (S1 + S2) + generate_string(N - 2 * L) + (S2 + S1) + + + # Build interleaved string t[1..N], with t[0] a sentinel + t = ['#'] * (N + 1) + # fill odd positions with S[0], S[1], ... + j = 1 + for i in range(N): + if j <= N: + t[j] = S[i] + j += 2 + # fill even positions with S[N-1], S[N-2], ... + j = 2 + for i in range(N - 1, -1, -1): + if j <= N: + t[j] = S[i] + j += 2 + + # p[i]: radius of the even-length palindrome centered between t[i] and t[i+1] + p = [0] * (N + 1) + # vis[k] = 1 iff there is a palindrome of radius exactly i at center i such that it touches t[0] + vis = [0] * (N + 2) + + mr = 0 # rightmost reach of any palindrome seen so far + mid2 = 0 # twice the center index of that palindrome + + # Manacher's algorithm for even-length palindromes on t + for i in range(1, N): + # mirror optimization + if mid2 - i - 1 > 0 and mr - i - 1 > 0: + p[i] = min(p[mid2 - i - 1], mr - i - 1) + else: + p[i] = 0 + # expand around center between i and i+1 + while i - p[i] >= 0 and i + 1 + p[i] <= N and t[i - p[i]] == t[i + 1 + p[i]]: + p[i] += 1 + # update rightmost palindrome + if i + 1 + p[i] > mr: + mr = i + 1 + p[i] + mid2 = 2 * i + 1 + # if it reaches the sentinel at t[0], mark vis + if i == p[i]: + vis[i + p[i]] = 1 + + # Union-find to compute, for each starting point j, the max center i covering it + f = list(range(N + 2)) + res = [0] * (N + 2) + def find(x): + while f[x] != x: + f[x] = f[f[x]] + x = f[x] + return x + + # Populate res[j] = max i such that [j..i] is inside some palindrome + for i in range(N - 1, 0, -1): + start = i - p[i] + 1 + j = find(start) + while j <= i: + res[j] = i + f[j] = find(j + 1) + j = f[j] + + # Compute answer as the largest L ≤ N//2 where prefix and suffix are cyclically equivalent + ans = 0 + # Case 1: using two-part palindromes + for i in range(1, N + 1): + if vis[i] and res[i + 1] != 0: + # solve (2*res + 1 - (i+1)) / 2 + val = (2 * res[i + 1] + 1 - (i + 1)) // 2 + if val > ans: + ans = val + # Case 2: trivial rotations within the first part + for i in range(1, N + 1): + if vis[i]: + val = i // 2 + if val > ans: + ans = val + + assert L <= ans <= N // 2, "Computed answer is not within the expected range" + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], S = self.parameter["S"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/preorder_traversal/__init__.py b/server/Gym/environments/preorder_traversal/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d073ffeba1fa799d414a175b9f9b5c775cace875 --- /dev/null +++ b/server/Gym/environments/preorder_traversal/__init__.py @@ -0,0 +1 @@ +from .environment import PreorderTraversal_Environment diff --git a/server/Gym/environments/preorder_traversal/environment.py b/server/Gym/environments/preorder_traversal/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..67152468c21a9dcb5ba3b5d462214a11c710b0d3 --- /dev/null +++ b/server/Gym/environments/preorder_traversal/environment.py @@ -0,0 +1,108 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class PreorderTraversal_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a binary tree with nodes labeled from 0 to {N_minus_1}. + +Its **in-order traversal** sequence is: {inorder_traversal} +Its **post-order traversal** sequence is: {postorder_traversal} + +Your task is to reconstruct the tree and output its **pre-order traversal** sequence. + +Output Format: +Your final answer should be a single line containing the pre-order traversal, with node labels separated by **spaces**. +Example: `{all_node_sequence}` (do **NOT** include the backticks or quotes). +""" + def __init__(self, + wrong_format : float = -1.0, wrong_length : float = 0.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the PreorderTraversal_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_length" : wrong_length, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + nodes = list(range(N)) + random.shuffle(nodes) + def build(nodes) : + if not nodes : + return None + root_index = random.randint(0, len(nodes) - 1) + return { + "root" : nodes[root_index], + "left" : build(nodes[: root_index]), + "right" : build(nodes[root_index + 1 :]), + } + tree = build(nodes) + + def preorder_traversal(node) : + if node is None : + return [] + return [node["root"]] + preorder_traversal(node["left"]) + preorder_traversal(node["right"]) + def inorder_traversal(node) : + if node is None : + return [] + return inorder_traversal(node["left"]) + [node["root"]] + inorder_traversal(node["right"]) + def postorder_traversal(node) : + if node is None : + return [] + return postorder_traversal(node["left"]) + postorder_traversal(node["right"]) + [node["root"]] + self.parameter["inorder_traversal"] = inorder_traversal(tree) + self.parameter["postorder_traversal"] = postorder_traversal(tree) + + self.parameter["preorder_traversal"] = preorder_traversal(tree) + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["preorder_traversal"])) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N_minus_1 = N - 1, + inorder_traversal = " ".join(map(str, self.parameter["inorder_traversal"])), + postorder_traversal = " ".join(map(str, self.parameter["postorder_traversal"])), + all_node_sequence = " ".join(map(str, range(N))), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + if len(processed_result) != self.parameter["N"] : + return self.rewards["wrong_length"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(float(a == b) for a, b in zip(self.parameter["preorder_traversal"], processed_result)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * all(a == b for a, b in zip(self.parameter["preorder_traversal"], processed_result)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/prime_graph_minimum_chromatic_number/__init__.py b/server/Gym/environments/prime_graph_minimum_chromatic_number/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..92b221e732d44a16e27dce3d778b8bf2faba0c6f --- /dev/null +++ b/server/Gym/environments/prime_graph_minimum_chromatic_number/__init__.py @@ -0,0 +1 @@ +from .environment import PrimeGraph_MinimumChromaticNumber_Environment diff --git a/server/Gym/environments/prime_graph_minimum_chromatic_number/environment.py b/server/Gym/environments/prime_graph_minimum_chromatic_number/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..09b69f22aeef1eff125f9276d0644ffb5df0c3d9 --- /dev/null +++ b/server/Gym/environments/prime_graph_minimum_chromatic_number/environment.py @@ -0,0 +1,104 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class PrimeGraph_MinimumChromaticNumber_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `1` to `{N}`. Two vertices `u` and `v` are connected by an edge **if and only if** the absolute difference `|u - v|` is a prime number. + +Your task is to assign a **non-negative integer color** to each vertex, represented as `c[1], c[2], ..., c[{N}]`, such that: +- For every edge `(u, v)` in the graph, `c[u] ≠ c[v]` — adjacent vertices must have different colors. +- The total number of **distinct colors used** (i.e., the number of unique values among `c[1]` to `c[{N}]`) is **minimized** - try your best to find a valid coloring using as few colors as possible. + +**Output Format:** Your final answer should be a single line containing the color of each vertex in order: `c[1], c[2], ..., c[{N}]`, separated by **spaces**.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the PrimeGraph_MinimumChromaticNumber_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 3, "MAX_N should be greater than or equal to 3" + + N = self.parameter["N"] = random.randint(3, MAX_N) + + + if N <= 6 : + self.parameter["reference_answer"] = [(i + 1) // 2 for i in range(1, N + 1)] + else : + self.parameter["reference_answer"] = [i & 3 for i in range(1, N + 1)] + self.parameter["gold_answer"] = len(set(self.parameter["reference_answer"])) + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N = self.parameter["N"] + + colors = processed_result + if len(colors) != N : + return self.rewards["invalid_solution"] + colors = [-1] + colors + assert len(colors) == N + 1, "colors should be of length N + 1" + + is_prime = [True] * (N + 1) + if N >= 0 : + is_prime[0] = False + if N >= 1 : + is_prime[1] = False + primes = [] + for i in range(2, N + 1) : + if is_prime[i] : + primes.append(i) + for j in range(i * i, N + 1, i) : + is_prime[j] = False + + for p in primes : + for i in range(1, N - p + 1) : + if colors[i] == colors[i + p] : + return self.rewards["invalid_solution"] + + gold, answer = self.parameter["gold_answer"], len(set(colors[1 :])) + assert gold <= answer, "gold should be less than or equal to answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/protecting_flowers/__init__.py b/server/Gym/environments/protecting_flowers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..63c9548186cd15a05ce99c37bb0596ba07e2593e --- /dev/null +++ b/server/Gym/environments/protecting_flowers/__init__.py @@ -0,0 +1 @@ +from .environment import ProtectingFlowers_Environment diff --git a/server/Gym/environments/protecting_flowers/environment.py b/server/Gym/environments/protecting_flowers/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..4a3bd77efe6f96ef56376973735b668e34206939 --- /dev/null +++ b/server/Gym/environments/protecting_flowers/environment.py @@ -0,0 +1,118 @@ +import random +from functools import cmp_to_key +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class ProtectingFlowers_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2878 + prompt_template = \ +r"""You are given two arrays `T` and `D`, each containing {N} integers: +{T_and_D} + +Please output **a permutation of 1 to {N}**, denoted as p[1], p[2], ..., p[{N}] ({N} integers in one line) with adjacent numbers separated by spaces: +- Define S[i] as the sum of T[p[j]] for all 1 ≤ j < i (so S[1] = 0). +- The objective is to minimize the total sum of S[i] * D[p[i]] for i from 1 to {N}.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the ProtectingFlowers_Environment instance + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + T = self.parameter["T"] = [random.randint(1, N) for _ in range(N)] + D = self.parameter["D"] = [random.randint(1, N) for _ in range(N)] + + + A = [] + for t, d in zip(T, D): + A.append((t, d)) + + def cmp(x, y): + # sort by t/d ascending without floating point + left = x[0] * y[1] + right = x[1] * y[0] + if left < right: + return -1 + elif left > right: + return 1 + else: + return 0 + A.sort(key=cmp_to_key(cmp)) + + # prefix sums of d + prefix = [0] * (N + 1) + for i in range(N): + prefix[i + 1] = prefix[i] + A[i][1] + + ans = 0 + total_d = prefix[N] + for i in range(N): + t_i, d_i = A[i] + # cows after i (in sorted order) keep eating while we fetch i + ans += t_i * (total_d - prefix[i + 1]) + + assert ans > 0, "The answer should be positive" + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + T_and_D = "\n".join("T[{}]={} D[{}]={}".format(i, Ti, i, Di) for i, (Ti, Di) in enumerate(zip(self.parameter["T"], self.parameter["D"]), start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if set(processed_result) != set(range(1, self.parameter["N"] + 1)) : + return self.rewards["invalid_solution"] + + answer, gold = 0, self.parameter["gold_answer"] + T, D = [None] + self.parameter["T"], [None] + self.parameter["D"] + S = [0] * (self.parameter["N"] + 1) + for i, Pi in enumerate(processed_result, start = 1) : + S[i] = S[i - 1] + T[Pi] + answer += S[i - 1] * D[Pi] + + assert 0 < gold <= answer, "gold should be less than or equal to answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/pythagorean_graph_independent_set_counting/__init__.py b/server/Gym/environments/pythagorean_graph_independent_set_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9d24d94417ceb09af4dffc85be707be6949a8767 --- /dev/null +++ b/server/Gym/environments/pythagorean_graph_independent_set_counting/__init__.py @@ -0,0 +1 @@ +from .environment import PythagoreanGraph_IndependentSetCounting_Environment diff --git a/server/Gym/environments/pythagorean_graph_independent_set_counting/environment.py b/server/Gym/environments/pythagorean_graph_independent_set_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed12b55fcfba2e6643d76d994a7c3d469a7d98a --- /dev/null +++ b/server/Gym/environments/pythagorean_graph_independent_set_counting/environment.py @@ -0,0 +1,206 @@ +import math +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class PythagoreanGraph_IndependentSetCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3213 + prompt_template = \ +r"""You are given an array H of length {N}: {H} +Construct an undirected graph with vertices labeled from 0 to {N_minus_1}. There is an edge between vertex i and vertex j (i ≠ j) if and only if: +- There exists an integer C such that H[i]^2 + H[j]^2 = C^2 +- gcd(H[i], H[j]) = 1 (i.e., H[i] and H[j] are coprime) + +Your task is to count the number of **non-empty independent sets** in this graph — that is, subsets of vertices such that no two vertices in the subset are connected by an edge. + +**Output Format:** Output a single integer — the number of non-empty independent sets modulo {MOD}.""" + + def __init__(self, + max_MOD : int = 1000000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the PythagoreanGraph_IndependentSetCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + while True : + H = self.parameter["H"] = [random.randint(1, 2 * N) for _ in range(N)] + + hs = H + # Count sticks of each length + maxH = max(hs) + num = [0] * (maxH + 1) + for h in hs: + num[h] += 1 + + # Precompute powers of 2 up to N + PW2 = [1] * (N + 1) + for i in range(1, N + 1): + PW2[i] = (PW2[i-1] * 2) % MOD + + # Build adjacency lists for primitive Pythagorean pairs + to = [[] for _ in range(maxH + 1)] + limit_i = int(math.isqrt(maxH)) + two_max = 2 * maxH + for i in range(1, limit_i + 1): + # j > i, 2*i*j <= maxH, j*j <= 2*maxH + # so j_max = min(maxH//(2*i), int(sqrt(2*maxH))) + j_max1 = maxH // (2*i) + j_max2 = int(math.isqrt(two_max)) + j_max = min(j_max1, j_max2) + for j in range(i+1, j_max+1): + x = j*j - i*i + y = 2*i*j + # we already ensured y <= maxH by j_max1, and j*j <= 2*maxH by j_max2 + if x > maxH or y > maxH: + continue + if num[x] == 0 or num[y] == 0: + continue + if math.gcd(x, y) != 1: + continue + to[x].append(y) + to[y].append(x) + + # Arrays for DFS and DP + vis = [False] * (maxH + 1) + ins = [False] * (maxH + 1) + sat = [0] * (maxH + 1) + des = [0] * (maxH + 1) + dp0 = [0] * (maxH + 1) + dp1 = [0] * (maxH + 1) + QE = [] # cycle nodes + pnt = 0 # stamp for dp traversal + + # Find all back-edges to detect cycle nodes + def dfs_init(u, parent): + vis[u] = True + for v in to[u]: + if v == parent: + continue + if not vis[v]: + dfs_init(v, u) + else: + # found a back-edge u-v + if not ins[u]: + QE.append(u) + if not ins[v]: + QE.append(v) + ins[u] = ins[v] = True + + # Check that no two forced-selected cycle nodes are adjacent + def check(): + for u in QE: + if sat[u] == 1: + for v in to[u]: + if sat[v] == 1: + return False + return True + + # Tree-DP for counting valid selections in a rooted tree + def dfs_dp(u): + nonlocal pnt + dp0[u] = 1 + dp1[u] = (PW2[num[u]] - 1) % MOD + des[u] = pnt + for v in to[u]: + if des[v] != pnt: + dfs_dp(v) + dp0[u] = dp0[u] * (dp0[v] + dp1[v]) % MOD + dp1[u] = dp1[u] * dp0[v] % MOD + # apply forced-status constraints + if sat[u] == 1: + dp0[u] = 0 + if sat[u] == -1: + dp1[u] = 0 + return (dp0[u] + dp1[u]) % MOD + + # Solve one connected component + def query(root): + nonlocal pnt + QE.clear() + dfs_init(root, root) + + comp_ans = 0 + k = len(QE) + # Enumerate all ways to force-select or force-skip the cycle nodes + for mask in range(1 << k): + for i in range(k): + u = QE[i] + sat[u] = 1 if (mask >> i) & 1 else -1 + if not check(): + continue + pnt += 1 + comp_ans = (comp_ans + dfs_dp(root)) % MOD + + # reset sat flags + for u in QE: + sat[u] = 0 + return comp_ans + + # Main loop over all lengths + answer = 1 + for length in range(1, maxH + 1): + if num[length] > 0 and not vis[length]: + if not to[length]: + # isolated node: any subset of its sticks + answer = answer * PW2[num[length]] % MOD + vis[length] = True + else: + answer = answer * query(length) % MOD + + # subtract empty set + if answer != PW2[N] : + self.parameter["reference_answer"] = (answer - 1) % MOD + break + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + H = " ".join("H[{}]={}".format(i, Hi) for i, Hi in enumerate(self.parameter["H"])), + MOD = self.parameter["MOD"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/quad_magic_items/__init__.py b/server/Gym/environments/quad_magic_items/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..01ad92321208b773c7c8c3fbd246143a88efb062 --- /dev/null +++ b/server/Gym/environments/quad_magic_items/__init__.py @@ -0,0 +1 @@ +from .environment import QuadMagicItems_Environment diff --git a/server/Gym/environments/quad_magic_items/environment.py b/server/Gym/environments/quad_magic_items/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..dcf715286ad3ccbf52131a3f5b45e35bafb7c887 --- /dev/null +++ b/server/Gym/environments/quad_magic_items/environment.py @@ -0,0 +1,128 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class QuadMagicItems_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2119 + prompt_template = \ +r"""You are given {N} items, each with a positive value. The values of the items are: +{X} + +We say that four items with indices `a, b, c, d` form a **magic formation** if their values satisfy: +- X[a] < X[b] < X[c] < X[d] +- X[b] - X[a] = 2 × (X[d] - X[c]) +- X[b] - X[a] < (X[c] - X[b]) / 3 + +In such a formation, items `a`, `b`, `c`, and `d` are called type `A`, `B`, `C`, and `D` respectively. + +**Output Format:** Output {N} lines. The i-th line should contain four integers, representing the number of times the i-th item is used as an `A`, `B`, `C`, and `D` item in any valid magic formation. The four values should be separated by spaces.""" + + def __init__(self, + weight_range_multiple : int = 1, + wrong_format : float = -1.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the QuadMagicItems_Environment instance. + """ + super().__init__(**kwargs) + + self.weight_range_multiple = weight_range_multiple + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 5, "N should be greater than or equal to 5" + + X = self.parameter["X"] = [random.randint(1, N * self.weight_range_multiple) for _ in range(N)] + + + # Count how many items have each magic value + MAX = max(X) + cnt = [0] * (MAX + 1) + for xi in X: + cnt[xi] += 1 + + # ans_val[v][0] = times value v is used as A + # ans_val[v][1] = times value v is used as B + # ans_val[v][2] = times value v is used as C + # ans_val[v][3] = times value v is used as D + ans_val = [[0, 0, 0, 0] for _ in range(MAX + 1)] + + # Enumerate t such that 9*t <= N-2 + for t in range(1, (MAX - 2) // 9 + 1): + # Forward pass: accumulate over d increasing + s = 0 + for d in range(9 * t + 2, MAX + 1): + a = d - 9 * t - 1 + b = a + 2 * t + c = d - t + s += cnt[a] * cnt[b] + # add all new magic arrays ending at (c, d) + ans_val[c][2] += s * cnt[d] # as C + ans_val[d][3] += s * cnt[c] # as D + + # Backward pass: accumulate over a decreasing + s = 0 + for a in range(MAX - 9 * t - 1, 0, -1): + b = a + 2 * t + c = b + 6 * t + 1 + d = c + t + s += cnt[c] * cnt[d] + # add all new magic arrays starting at (a, b) + ans_val[a][0] += s * cnt[b] # as A + ans_val[b][1] += s * cnt[a] # as B + + # Output results for each item in input order + self.parameter["gold_answer"] = [] + self.parameter["reference_answer"] = "" + for xi in X: + A_cnt, B_cnt, C_cnt, D_cnt = ans_val[xi] + self.parameter["gold_answer"].append((A_cnt, B_cnt, C_cnt, D_cnt)) + self.parameter["reference_answer"] += "{} {} {} {}\n".format(A_cnt, B_cnt, C_cnt, D_cnt) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], X = " ".join("X[{}]={}".format(i + 1, x) for i, x in enumerate(self.parameter["X"]))) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(tuple(map(int, line.split()))) + if len(matrix[-1]) != 4 : + return None + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/quadratic_function_segmentation/__init__.py b/server/Gym/environments/quadratic_function_segmentation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e0e6f94b468e118cc01935cef71de7cb961233f7 --- /dev/null +++ b/server/Gym/environments/quadratic_function_segmentation/__init__.py @@ -0,0 +1 @@ +from .environment import QuadraticFunctionSegmentation_Environment diff --git a/server/Gym/environments/quadratic_function_segmentation/environment.py b/server/Gym/environments/quadratic_function_segmentation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9cd364337b26fc4729dd989bc1715b016117c64d --- /dev/null +++ b/server/Gym/environments/quadratic_function_segmentation/environment.py @@ -0,0 +1,179 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class QuadraticFunctionSegmentation_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3628 + prompt_template = \ +r"""You are given {N} numbers A[1], A[2], ..., A[{N}]. The values are given as: {A} + +You may divide these numbers (in order) into some **consecutive batches**. Let the total number of batches be k (1 ≤ k ≤ {N}), and let end[1], end[2], ..., end[k] (1 ≤ end[1] < end[2] < ... < end[k] = {N}) denote the last index in each batch. This means: +- Batch 1 contains elements A[1] to A[end[1]] +- Batch 2 contains elements A[end[1] + 1] to A[end[2]] +- ... +- Batch k contains elements A[end[k−1] + 1] to A[end[k]] (with end[k] = {N}) + +Define the value of a batch with sum X as: **{A_coef} × X² + {B_coef} × X + {C_coef}**. The total value of the division is the **sum of values of all batches**. I am asking you to find a batch division that **maximizes** this total value. + +Output a single line containing `end[1] end[2] ... end[k]`, separated by spaces (with `end[k]` always equal to {N}`). +Example: `1 2 {N}` means: +- There are 3 batches, +- First batch ends at index 1, +- Second ends at index 2, +- Third ends at index {N} and includes the remaining elements.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_beta : float = 5.0, rewarding_weight : float = 1.0, + **kwargs) : + """ + Initialize the QuadraticFunctionSegmentation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def compute_value(self, X) -> int : + return self.parameter["A_coef"] * (X ** 2) + self.parameter["B_coef"] * X + self.parameter["C_coef"] + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + while True : + xs = self.parameter["xs"] = [random.randint(1, N) for _ in range(N)] + A = self.parameter["A_coef"] = -random.randint(1, N) + B = self.parameter["B_coef"] = random.randint(1, random.randint(1, N) * random.randint(1, N) * random.randint(1, N)) + C = self.parameter["C_coef"] = random.randint(-random.randint(1, N) * random.randint(1, N) * random.randint(1, N) * random.randint(1, N) * random.randint(1, N), + +random.randint(1, N) * random.randint(1, N) * random.randint(1, N) * random.randint(1, N) * random.randint(1, N)) + + + # prefix sums + s = [0] * (N + 1) + for i in range(1, N + 1): + s[i] = s[i - 1] + xs[i - 1] + + # dp array + d = [0] * (N + 1) + + # deque for convex hull (indices of candidate break points) + q = [0] * (N + 1) + head = tail = 0 + q[0] = 0 + + # helper lambdas matching the C++ macros + def K(i): + return 2 * A * s[i] + + def X(i): + return s[i] + + def Y(i): + # y(i) = d[i] + A*s[i]^2 - B*s[i] + return d[i] + A * s[i] * s[i] - B * s[i] + + def slope(i, j): + # (Y(i)-Y(j)) / (X(i)-X(j)) + return (Y(i) - Y(j)) / (X(i) - X(j)) + + for i in range(1, N + 1): + # pop from front while next line is better for x = s[i] + while head < tail and slope(q[head], q[head + 1]) > K(i): + head += 1 + + j = q[head] + # exactly the same formula as in C++ + d[i] = -(K(i) * X(j) - Y(j) - A * s[i] * s[i] - B * s[i] - C) + + # maintain convex hull by slope ordering + while head < tail and slope(q[tail - 1], q[tail]) <= slope(q[tail], i): + tail -= 1 + + tail += 1 + q[tail] = i + + self.parameter["gold_answer"] = d[N] + + trivial_best = max(sum(self.compute_value(x) for x in xs), self.compute_value(sum(xs))) + prefix_sum, suffix_sum = 0, sum(xs) + for x in xs : + prefix_sum += x + suffix_sum -= x + if prefix_sum > 0 and suffix_sum > 0 : + trivial_best = max(trivial_best, self.compute_value(prefix_sum) + self.compute_value(suffix_sum)) + if self.parameter["gold_answer"] > trivial_best : + if self.parameter["gold_answer"] > 0 : + break + else : + assert self.parameter["gold_answer"] == trivial_best, "Gold answer should be greater than trivial best" + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = "\n".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["xs"], start = 1)), + A_coef = self.parameter["A_coef"], + B_coef = self.parameter["B_coef"], + C_coef = self.parameter["C_coef"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if not answer_array : + return None + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N = self.parameter["N"] + + ends = processed_result + if not (1 <= len(ends) <= N) : + return self.rewards["invalid_solution"] + for i in range(len(ends)) : + if not (1 <= ends[i] <= N) : + return self.rewards["invalid_solution"] + if i and not (ends[i - 1] < ends[i]) : + return self.rewards["invalid_solution"] + if ends[-1] != N : + return self.rewards["invalid_solution"] + + A = [None] + self.parameter["xs"] + answer = 0 + last = 0 + for end in ends : + batch_sum = sum(A[last + 1 : end + 1]) + answer += self.compute_value(batch_sum) + last = end + gold = self.parameter["gold_answer"] + assert answer <= gold, "Answer should not be greater than gold answer" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + answer = max(answer, 0) + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/quantum_lock_puzzle/__init__.py b/server/Gym/environments/quantum_lock_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f9cb48f8e6fe9e8518a00e3d2fcf6fcb2d7f4294 --- /dev/null +++ b/server/Gym/environments/quantum_lock_puzzle/__init__.py @@ -0,0 +1 @@ +from .environment import QuantumLockPuzzle_Environment diff --git a/server/Gym/environments/quantum_lock_puzzle/environment.py b/server/Gym/environments/quantum_lock_puzzle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f6112131dd256825633913b99bb759b30d185fd9 --- /dev/null +++ b/server/Gym/environments/quantum_lock_puzzle/environment.py @@ -0,0 +1,133 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class QuantumLockPuzzle_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""There is a 0/1 variable X, which is initially 0. You also have a variable Y, which starts at {Y_start}. You can press the buttons in any order, and you may press the same button multiple times. There are {N} buttons in total. Each time you press **any** button, X toggles: it becomes 1 - X. + +When X is 0 and you press a button, Y changes according to the following rules: +{X0_rules} + +When X is 1 and you press a button, Y changes according to the following rules: +{X1_rules} + +Please find a sequence of button presses that will make Y equal to {Y_target}. + +**Output Format:** Your final answer should be a single line containing the sequence of button presses in order, separated by spaces. For example, `0 1 0 2` means you pressed button 0, then button 1, then button 0 again, and finally button 2. Do **NOT** include backticks or quotes in your output.""" + + def __init__(self, + operation_weights : Optional[List[float]] = [0.4, 0.4, 0.2], + wrong_format : float = -1.0, invalid_solution : float = -0.5, wrong_solution : float = 0.0, correct_solution : float = 1.0, + **kwargs) : + """ + Initialize the QuantumLockPuzzle_Environment instance. + """ + super().__init__(**kwargs) + + self.operation_weights = operation_weights + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "wrong_solution" : wrong_solution, + "correct_solution" : correct_solution, + } + + + def operate(self, Y : int, rule : List) -> int : + operation, value = rule + if operation == "+" : + return Y + value + elif operation == "-" : + return Y - value + elif operation == "*" : + return Y * value + else : + raise NotImplementedError(f"Unknown operation: {operation}") + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + Y = self.parameter["Y_start"] = random.randint(-N, +N) + buttons = self.parameter["buttons"] = [] + for button in range(N) : + def rule_generator() : + operation = random.choices(["+", "-", "*"], weights = self.operation_weights, k = 1)[0] + if operation in ("+", "-") : + value = random.randint(1, N) + elif operation in ("*", ) : + value = random.randint(2, 3) + else : + raise NotImplementedError + return [operation, value] + buttons.append([rule_generator() for _ in range(2)]) + + steps = self.parameter["steps"] + assert steps >= 2, "steps should be greater than or equal to 2" + steps += random.randint(0, 1) + + X = 0 + pressed_buttons = [] + existing_Y = set([Y]) + for step in range(steps) : + button = random.randint(0, N - 1) + pressed_buttons.append(button) + Y = self.operate(Y, buttons[button][X]) + X = 1 - X + if Y not in existing_Y : + existing_Y.add(Y) + self.parameter["reference_answer"] = pressed_buttons.copy() + self.parameter["Y_target"] = Y + if "Y_target" not in self.parameter : + assert Y == self.parameter["Y_start"] + self.parameter["reference_answer"] = "" + self.parameter["Y_target"] = self.parameter["Y_start"] + else : + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + Y_start = self.parameter["Y_start"], + Y_target = self.parameter["Y_target"], + X0_rules = "\n".join("When you press button {}, Y becomes Y {} {}".format(i, button[0][0], button[0][1]) for i, button in enumerate(self.parameter["buttons"])), + X1_rules = "\n".join("When you press button {}, Y becomes Y {} {}".format(i, button[1][0], button[1][1]) for i, button in enumerate(self.parameter["buttons"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + X, Y = 0, self.parameter["Y_start"] + for button in processed_result : + if not (0 <= button < self.parameter["N"]) : + return self.rewards["invalid_solution"] + Y = self.operate(Y, self.parameter["buttons"][button][X]) + X = 1 - X + + if Y == self.parameter["Y_target"] : + return self.rewards["correct_solution"] + else : + return self.rewards["wrong_solution"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/queen_placement/__init__.py b/server/Gym/environments/queen_placement/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..25258069eb48f47de3ec74a1bacdadff976f5030 --- /dev/null +++ b/server/Gym/environments/queen_placement/__init__.py @@ -0,0 +1 @@ +from .environment import QueenPlacement_Environment diff --git a/server/Gym/environments/queen_placement/environment.py b/server/Gym/environments/queen_placement/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b237c11b8f32200821af174b45556bafc2c216b2 --- /dev/null +++ b/server/Gym/environments/queen_placement/environment.py @@ -0,0 +1,121 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + +class QueenPlacement_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an {N} × {N} chessboard grid. Some cells already contain queens (denoted by 'Q'), and the rest are empty ('.'). +{grid} + +Please place **{K} additional queens** such that **no two queens threaten each other**. A queen threatens another if they share the same **row**, **column**, or **diagonal** (both main and anti-diagonals). + +**Output Format:** Output {N} lines, each containing a string of length {N}. Each string represents a row of the grid using 'Q' for a queen and '.' for an empty cell.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, wrong_solution : float = 0.0, correct_solution : float = 1.0, + **kwargs) : + """ + Initialize the QueenPlacement_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "wrong_solution": wrong_solution, + "correct_solution": correct_solution, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + grid = self.parameter["grid"] = [["." for _ in range(N)] for _ in range(N)] + + all_cells = [(i, j) for i in range(N) for j in range(N)] + random.shuffle(all_cells) + + row, col, main_diag, anti_diag = set(), set(), set(), set() + queens = [] + for i, j in all_cells : + if i in row or j in col or (i - j) in main_diag or (i + j) in anti_diag : + continue + grid[i][j] = "Q" + queens.append((i, j)) + row.add(i) + col.add(j) + main_diag.add(i - j) + anti_diag.add(i + j) + self.parameter["reference_answer"] = "\n".join("".join(row) for row in grid) + + K = self.parameter["K"] = random.randint(1, max(1, len(queens) // 2)) + + queens = random.sample(queens, K) + for i, j in queens : + grid[i][j] = "." + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + K = self.parameter["K"], + grid = "\n".join("".join(row) for row in self.parameter["grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + grid = [] + for line in answer.splitlines() : + line = line.strip() + if line : + grid.append(line) + return grid + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + grid = processed_result + if len(grid) != self.parameter["N"] : + return self.rewards["wrong_format"] + if any(len(row) != self.parameter["N"] for row in grid) : + return self.rewards["wrong_format"] + if any(cell not in "Q." for row in grid for cell in row) : + return self.rewards["wrong_format"] + + counting = 0 + row, col, main_diag, anti_diag = set(), set(), set(), set() + i = 0 + for original_row, current_row in zip(self.parameter["grid"], grid) : + j = 0 + for original_cell, current_cell in zip(original_row, current_row) : + if original_cell == "Q" : + if current_cell != "Q" : + return self.rewards["invalid_solution"] + else : + assert original_cell == ".", "original cell should be empty" + counting += (current_cell == "Q") + if current_cell == "Q" : + if i in row or j in col or (i - j) in main_diag or (i + j) in anti_diag : + return self.rewards["wrong_solution"] + row.add(i) + col.add(j) + main_diag.add(i - j) + anti_diag.add(i + j) + j += 1 + i += 1 + + if counting != self.parameter["K"] : + return self.rewards["wrong_solution"] + else : + return self.rewards["correct_solution"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/random_range_max_expectation/__init__.py b/server/Gym/environments/random_range_max_expectation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bf1b520896523e1fc9df22047e5561371d4e9554 --- /dev/null +++ b/server/Gym/environments/random_range_max_expectation/__init__.py @@ -0,0 +1 @@ +from .environment import RandomRangeMaxExpectation_Environment diff --git a/server/Gym/environments/random_range_max_expectation/environment.py b/server/Gym/environments/random_range_max_expectation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..ea9c0dda19c1722df8cb1b4b1e80c9971cf6b066 --- /dev/null +++ b/server/Gym/environments/random_range_max_expectation/environment.py @@ -0,0 +1,169 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class RandomRangeMaxExpectation_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3352 + prompt_template = \ +r"""You are given an array of {N} integers: {array} + +You will perform {Q} operations in order. In each operation, you uniformly select a subarray (a contiguous segment of the array) at random from all {N} × ({N} + 1) / 2 possible subarrays. Then, all elements in that subarray are changed to the **maximum** value within it. + +Please compute the expected value of each position in the array after all {Q} operations. Since the expected value is a rational number with denominator ({N} × ({N} + 1) / 2)^{Q}, output the **numerator** (i.e., the expected value multiplied by ({N} × ({N} + 1) / 2)^{Q}), modulo {MOD}. + +**Output Format:** A single line containing {N} integers — the scaled expected values (modulo {MOD}) for each position, separated by spaces.""" + MOD = 10000 + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 3.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the RandomRangeMaxExpectation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + Q = self.parameter["Q"] = random.randint(1, N) + + A = self.parameter["array"] = [random.randint(0, N) for _ in range(N)] + + + def calc(x): + return x * (x + 1) // 2 % self.MOD + + # sentinel INF just above any value in A + INF = max(A) + 1 + + # prepare DP tables + # f[0] for previous round, f[1] for current + f = [ + [ [0] * N for _ in range(N) ], + [ [0] * N for _ in range(N) ] + ] + # g[l][r] is the weight factor + g = [ [0] * N for _ in range(N) ] + + # precompute g + for l in range(N): + for r in range(l, N): + length = r - l + 1 + left = l + right = N - 1 - r + g[l][r] = (calc(length) + calc(left) + calc(right)) % self.MOD + + # base case f[0] + for l in range(N): + maxx = 0 + for r in range(l, N): + # update max in A[l..r] + if A[r] > maxx: + maxx = A[r] + # case: whole array + if l == 0 and r == N - 1: + f[0][l][r] = maxx % self.MOD + else: + left_val = INF if l == 0 else A[l - 1] + right_val = INF if r == N - 1 else A[r + 1] + # only intervals where both neighbors are strictly larger + if left_val > maxx and right_val > maxx: + f[0][l][r] = (maxx - min(left_val, right_val)) % self.MOD + + # perform Q random-interval operations in expectation + for i in range(1, Q + 1): + now = i & 1 + pre = 1 - now + + # prefix sums s1 and suffix sums s2 + s1 = [ [0]*N for _ in range(N) ] + s2 = [ [0]*N for _ in range(N) ] + + # build s1: for each r, accumulate over l=0..r of f[pre][l][r] * l + for r in range(N): + acc = 0 + for l in range(0, r + 1): + acc = (acc + f[pre][l][r] * l) % self.MOD + s1[l][r] = acc + + # build s2: for each l, accumulate over r=N-1..l of f[pre][l][r] * (N-1-r) + for l in range(N): + acc = 0 + for r in range(N - 1, l - 1, -1): + acc = (acc + f[pre][l][r] * (N - 1 - r)) % self.MOD + s2[l][r] = acc + + # update f[now] using precomputed g, s1, s2 + for l in range(N): + for r in range(l, N): + left_contrib = s1[l - 1][r] if l - 1 >= 0 else 0 + right_contrib = s2[l][r + 1] if r + 1 < N else 0 + f[now][l][r] = ( + f[pre][l][r] * g[l][r] + + left_contrib + + right_contrib + ) % self.MOD + + # collect and print final answers + result = [] + final_dp = f[Q & 1] + for i in range(N): + ans = 0 + for l in range(0, i + 1): + for r in range(i, N): + ans = (ans + final_dp[l][r]) % self.MOD + result.append(ans) + + assert len(result) == N + self.parameter["gold_answer"] = result + self.parameter["reference_answer"] = " ".join(map(str, result)) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + array = " ".join(map(str, self.parameter["array"])), + Q = self.parameter["Q"], + MOD = self.MOD, + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/range_constrained_increasing_sequence_counting/__init__.py b/server/Gym/environments/range_constrained_increasing_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b0229c876b4218520386697fd7f6a209df16faf3 --- /dev/null +++ b/server/Gym/environments/range_constrained_increasing_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .environment import RangeConstrained_IncreasingSequence_Counting_Environment diff --git a/server/Gym/environments/range_constrained_increasing_sequence_counting/environment.py b/server/Gym/environments/range_constrained_increasing_sequence_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..40f267b3faa89a64c887ac230aba62980f87f8ef --- /dev/null +++ b/server/Gym/environments/range_constrained_increasing_sequence_counting/environment.py @@ -0,0 +1,132 @@ +import random +from typing import Optional +from bisect import bisect_left +from ...environment import VerifiableEnvironment + + +class RangeConstrained_IncreasingSequence_Counting_Environment(VerifiableEnvironment): + prompt_template = \ +r"""Count the number of integer sequences A[0], A[1], ..., A[{N_minus_1}] of length {N} such that: +- For each A[i], it is either 0 or an integer in [L[i], R[i]] +- At least one A[i] is greater than 0 +- All non-zero A[i] form a strictly increasing sequence in order (i.e., if A[i] > 0 and A[j] > 0 with i < j, then A[i] < A[j]) + +The bounds L[i] and R[i] for each position are given as: +{L_and_R} + +Output the number of such sequences modulo {MOD}. +""" + MOD = 10 ** 9 + 7 + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the RangeConstrained_IncreasingSequence_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + L = self.parameter["L"] = [random.randint(1, N * N) for _ in range(N)] + R = self.parameter["R"] = [random.randint(Li, N * N) for Li in L] + + + A, B = L.copy(), R.copy() + coords = [] + for ai, bi in zip(A, B) : + coords.append(ai) + coords.append(bi + 1) + + # Coordinate compression + coords = sorted(set(coords)) + tot = len(coords) + for i in range(N): + A[i] = bisect_left(coords, A[i]) + B[i] = bisect_left(coords, B[i] + 1) + + # Precompute modular inverses up to N + inv = [0] * (N + 1) + inv[1] = 1 + for i in range(2, N + 1): + inv[i] = (self.MOD - self.MOD // i) * inv[self.MOD % i] % self.MOD + + # DP arrays + # C[k] will hold binomial-like coefficients for each segment length + C = [0] * (N + 1) + # g[k] is number of ways ending with the k-th school (k from 0 to N) + g = [0] * (N + 1) + g[0] = 1 # base: no school chosen yet + + # Process each compressed segment j + for j in range(tot - 1): + length = coords[j + 1] - coords[j] + # Build C array: C[k] = C(length + k - 1, k) + C[0] = 1 + for k in range(1, N + 1): + C[k] = C[k - 1] * (length + k - 1) % self.MOD * inv[k] % self.MOD + + # Update DP in reverse order to avoid overwriting + for i in range(N, 0, -1): + # If school i-1 can cover this segment + if A[i - 1] <= j < B[i - 1]: + f = 0 + m = 1 + c_val = length + # Sum contributions from previous states + for p in range(i - 1, -1, -1): + f = (f + c_val * g[p]) % self.MOD + # If previous school (p-1) also covers, increase combination size + if p > 0 and A[p - 1] <= j < B[p - 1]: + m += 1 + c_val = C[m] + g[i] = (g[i] + f) % self.MOD + + # Sum all ways where at least one school participates + self.parameter["reference_answer"] = sum(g[1:]) % self.MOD + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + L_and_R = "\n".join("L[{}]={} R[{}]={}".format(i, Li, i, Ri) for i, (Li, Ri) in enumerate(zip(self.parameter["L"], self.parameter["R"]))), + MOD = self.MOD, + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.MOD) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/range_four_sequence_construction/__init__.py b/server/Gym/environments/range_four_sequence_construction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6f3b89b794b2094a55bd5a1c78386eabf5024fc4 --- /dev/null +++ b/server/Gym/environments/range_four_sequence_construction/__init__.py @@ -0,0 +1 @@ +from .environment import RangeFourSequenceConstruction_Environment diff --git a/server/Gym/environments/range_four_sequence_construction/environment.py b/server/Gym/environments/range_four_sequence_construction/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..a88d64aae33f1b784b8f8150557163bc44106ed2 --- /dev/null +++ b/server/Gym/environments/range_four_sequence_construction/environment.py @@ -0,0 +1,107 @@ +import random +from typing import Optional, List +from itertools import combinations, product +from ...environment import VerifiableEnvironment + + +class RangeFourSequenceConstruction_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3876 + prompt_template = \ +r"""Find a sequence of {N} integers, each being 0, 1, 2, or 3, such that no two adjacent elements form any of the pairs: '00', '11', '22', '33', '02', '20', '23', '32', '13', '31'. The sequence must also satisfy the following additional conditions: each condition is given in the form `(p_1, ..., p_L)`, meaning that the elements at positions p_1, ..., p_L (positions are numbered from 1 to {N} from left to right) must all be different. +{conditions} + +Output the {N} integers of the sequence in order, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the RangeFourSequenceConstruction_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + distribution = [random.randint(1, N) for _ in range(4)] + distribution = [d / sum(distribution) for d in distribution] + A = [] + for i in range(N) : + while True : + Ai = random.choices([0, 1, 2, 3], distribution)[0] + if not ((i > 0) and (A[i - 1], Ai) in ((0, 0), (1, 1), (2, 2), (3, 3), (0, 2), (2, 0), (2, 3), (3, 2), (1, 3), (3, 1))): + A.append(Ai) + break + + positions = [[] for _ in range(4)] + for i, Ai in enumerate(A) : + positions[Ai].append(i + 1) + + conditions = [] + for L in range(2, 4 + 1) : + for As in combinations(range(4), L) : + assert len(As) == len(set(As)) == L, "As should be distinct" + for ps in product(*[positions[A] for A in As]) : + for p, Ap in zip(ps, As) : + assert A[p - 1] == Ap, "A[p - 1] should equal Ap" + conditions.append(list(ps)) + self.parameter["conditions"] = conditions = random.sample(conditions, random.randint(1, min(2 * N, len(conditions)))) + for condition in conditions : + random.shuffle(condition) + + self.parameter["reference_answer"] = " ".join(map(str, A)) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + conditions = "\n".join("Condition {}: ({})".format(i + 1, ", ".join(map(str, condition))) for i, condition in enumerate(self.parameter["conditions"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + A = processed_result + if len(A) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(x in (0, 1, 2, 3) for x in A) : + return self.rewards["invalid_solution"] + for a, b in zip(A, A[1 :]) : + if (a, b) in ((0, 0), (1, 1), (2, 2), (3, 3), (0, 2), (2, 0), (2, 3), (3, 2), (1, 3), (3, 1)) : + return self.rewards["invalid_solution"] + + satisfied = sum(int(all(A[p1 - 1] != A[p2 - 1] for p1, p2 in combinations(condition, 2))) for condition in self.parameter["conditions"]) + assert satisfied <= len(self.parameter["conditions"]), "satisfied should not exceed the number of conditions" + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / len(self.parameter["conditions"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == len(self.parameter["conditions"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/range_shrinking_sequence_counting/__init__.py b/server/Gym/environments/range_shrinking_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..93881da6154ee28f63899df996c4cbe2bf4bb4ca --- /dev/null +++ b/server/Gym/environments/range_shrinking_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .environment import RangeShrinkingSequenceCounting_Environment diff --git a/server/Gym/environments/range_shrinking_sequence_counting/environment.py b/server/Gym/environments/range_shrinking_sequence_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..4db23db636b938b68dde1ce216dc930747d6a39b --- /dev/null +++ b/server/Gym/environments/range_shrinking_sequence_counting/environment.py @@ -0,0 +1,230 @@ +import random +from array import array +from typing import Optional +from ...environment import VerifiableEnvironment + + +class RangeShrinkingSequenceCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4063 + prompt_template = \ +r"""Count the number of sequences A[1], A[2], ..., A[{N}] such that: +- For each i (1 ≤ i ≤ {N}), 1 ≤ A[i] ≤ R[i], where R is given as: {R} +- For each i (3 ≤ i ≤ {N}): + - Let r = the minimum value among A[1], ..., A[i−2] that is ≥ A[i−1] (if none exists, r = +∞). + - Let l = the maximum value among A[1], ..., A[i−2] that is ≤ A[i−1] (if none exists, l = −∞). + - Then A[i] must satisfy l ≤ A[i] ≤ r. + +Can you let me know the number of valid sequences modulo {MOD}?""" + + def __init__(self, + max_MOD : int = 1000000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the RangeShrinkingSequenceCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + shrinking_sequence = [random.randint(1, N), random.randint(1, N)] + l, r = 1, N + if shrinking_sequence[0] >= shrinking_sequence[1] : + r = shrinking_sequence[1] + if shrinking_sequence[0] <= shrinking_sequence[1] : + l = shrinking_sequence[1] + for i in range(2, N) : + shrinking_sequence.append(random.randint(l, r)) + if shrinking_sequence[i - 1] >= shrinking_sequence[i] : + assert shrinking_sequence[i - 1] <= r + r = shrinking_sequence[i] + if shrinking_sequence[i - 1] <= shrinking_sequence[i] : + assert shrinking_sequence[i - 1] >= l + l = shrinking_sequence[i] + assert 1 <= l <= r <= N + self.parameter["R"] = R = [random.randint(a, N) for a in shrinking_sequence] + + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + MAXV = max(R) if R else 0 + SENT = MAXV + 1 # sentinel "+inf" equivalent used in the C++ code (151 there) + SIZE = SENT + 2 # +1 for 1-based shift, +1 so we can safely index r+2 etc. + TOT = SIZE * SIZE * SIZE + + # Helper to compute flattened 3D index with a 1-based shift on each axis. + # We store f[L+1][R+1][x+1] at flat index ((L1*SIZE + R1) * SIZE + X1) + def base_idx(L1, R1): + return (L1 * SIZE + R1) * SIZE + + # Modular add/sub on array('I') cells (values kept in [0, MOD)) + def add_at(A, idx, val): + s = A[idx] + val + if s >= MOD: + s -= MOD + A[idx] = s + + def sub_at(A, idx, val): + cur = A[idx] + if cur >= val: + A[idx] = cur - val + else: + A[idx] = cur - val + MOD + + # DP arrays as flat typed arrays (memory efficient vs nested Python lists) + f = array('I', [0]) * TOT + g = array('I', [0]) * TOT + + # Initialization: for i in 1..R[0], f[0][SENT][i] = 1 (shifted indices) + L0 = 0 + Rinf = SENT + L1 = L0 + 1 + R1 = Rinf + 1 + b = base_idx(L1, R1) + for x in range(1, R[0] + 1): + X1 = x + 1 + f[b + X1] = 1 + + # Iterate positions 2..N + for i in range(1, N): # Python 0-based: position i corresponds to a[i], so start from index 1 + Ai = R[i] + # reset g to zeros + g = array('I', [0]) * TOT + + # transitions + for L in range(0, SENT + 1): + L1 = L + 1 + for RR in range(L, SENT + 1): + R1 = RR + 1 + bf = base_idx(L1, R1) + for x in range(L, RR + 1): + X1 = x + 1 + c = f[bf + X1] + if c == 0: + continue + + # 1) choose in (L, min(x-1, Ai)) + l = L + 1 + r = min(x - 1, Ai) + if l <= r: + # target pair (L, x) + tgtL1 = L1 + tgtR1 = X1 # since new R becomes x + bg = base_idx(tgtL1, tgtR1) + add_at(g, bg + (l + 1), c) # + at l + sub_at(g, bg + (r + 1 + 1), c) # - at r+1 + + # 2) choose in (x+1, min(RR-1, Ai)) + l = x + 1 + r = min(RR - 1, Ai) + if l <= r: + # target pair (x, RR) + tgtL1 = X1 + tgtR1 = R1 + bg = base_idx(tgtL1, tgtR1) + add_at(g, bg + (l + 1), c) + sub_at(g, bg + (r + 1 + 1), c) + + # 3) choose L exactly if valid (L > 0 and L <= Ai) + if L != 0 and L <= Ai: + tgtL1 = L1 + tgtR1 = L1 + bg = base_idx(tgtL1, tgtR1) + add_at(g, bg + (L + 1), c) # at position L + sub_at(g, bg + (L + 1 + 1), c) # at L+1 + + # 4) choose RR exactly if RR is a real bound (RR <= MAXV), RR <= Ai, and L != RR + if RR <= Ai and RR <= MAXV and L != RR: + tgtL1 = R1 + tgtR1 = R1 + bg = base_idx(tgtL1, tgtR1) + add_at(g, bg + (RR + 1), c) + sub_at(g, bg + (RR + 1 + 1), c) + + # 5) choose x exactly if x <= Ai and it's not equal to L or RR + if x <= Ai and L != x and RR != x: + tgtL1 = X1 + tgtR1 = X1 + bg = base_idx(tgtL1, tgtR1) + add_at(g, bg + (x + 1), c) + sub_at(g, bg + (x + 1 + 1), c) + + # prefix sums along the 3rd dimension: g[L][R][x] += g[L][R][x-1] + for L in range(0, SENT + 1): + L1 = L + 1 + for RR in range(L, SENT + 1): + R1 = RR + 1 + bg = base_idx(L1, R1) + pref = 0 + # x runs from L..RR, we use shifted index (x+1) + for x in range(L, RR + 1): + X1 = x + 1 + val = g[bg + X1] + s = val + pref + if s >= MOD: + s -= MOD + g[bg + X1] = s + pref = s + + # f = g for next iteration + f = g + + # Sum all f[L][R][x] over 0<=L<=R<=SENT, L<=x<=R + ans = 0 + for L in range(0, SENT + 1): + L1 = L + 1 + for RR in range(L, SENT + 1): + R1 = RR + 1 + bf = base_idx(L1, R1) + for x in range(L, RR + 1): + X1 = x + 1 + val = f[bf + X1] + ans += val + if ans >= MOD: + ans -= MOD + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + R = ", ".join("R[{}]={}".format(i, Ri) for i, Ri in enumerate(self.parameter["R"], start =1 )), + MOD = self.parameter["MOD"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/recursive_function/__init__.py b/server/Gym/environments/recursive_function/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2eac390a6733c745582735fc7ad38932330d54c6 --- /dev/null +++ b/server/Gym/environments/recursive_function/__init__.py @@ -0,0 +1 @@ +from .environment import RecursiveFunction_Environment diff --git a/server/Gym/environments/recursive_function/environment.py b/server/Gym/environments/recursive_function/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..818d67eaf65e4b14d0ae9cd341ef52f2d9cfbdfa --- /dev/null +++ b/server/Gym/environments/recursive_function/environment.py @@ -0,0 +1,76 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class RecursiveFunction_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Define a function f(m, n) as follows: +1. If m = 0, then f(m, n) = n + 1. +2. If m > 0 and n = 0, then f(m, n) = f(m - 1, 1). +3. If m > 0 and n > 0, then f(m, n) = f(m // 2, f(m // 2, n // 2)) + f(m // 2, f(m // 2, n - 1)). Here, `//` denotes integer division. + +Please compute the value of f({M}, {N}) +""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the RecursiveFunction_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_M_N" in self.parameter, "MAX_M_N is required in parameter" + MAX_M_N = self.parameter["MAX_M_N"] + assert MAX_M_N >= 1, "MAX_M_N should be greater than or equal to 1" + + M, N = self.parameter["M"], self.parameter["N"] = random.randint(1, MAX_M_N), random.randint(1, MAX_M_N) + + + ackermann = dict() + def ack(m, n) : + if m == 0 : + return n + 1 + if (m, n) not in ackermann : + if n == 0 : + ackermann[(m, n)] = ack(m - 1, 1) + else : + ackermann[(m, n)] = ack(m // 2, ack(m // 2, n // 2)) + ack(m // 2, ack(m // 2, n - 1)) + return ackermann[(m, n)] + self.parameter["reference_answer"] = ack(M, N) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(M = self.parameter["M"], N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/recursive_sequence_sum_construction/__init__.py b/server/Gym/environments/recursive_sequence_sum_construction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2fa4a9e9756259fb5c70f37c230a453659a4a452 --- /dev/null +++ b/server/Gym/environments/recursive_sequence_sum_construction/__init__.py @@ -0,0 +1 @@ +from .environment import RecursiveSequenceSumConstruction_Environment diff --git a/server/Gym/environments/recursive_sequence_sum_construction/environment.py b/server/Gym/environments/recursive_sequence_sum_construction/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..fb06513620118e1256400797d89a24ea6a4c6933 --- /dev/null +++ b/server/Gym/environments/recursive_sequence_sum_construction/environment.py @@ -0,0 +1,114 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class RecursiveSequenceSumConstruction_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3996 + prompt_template = \ +r"""Define a sequence F by: +- F(0) = {F0} +- For every integer n ≥ 1, F(n) = {A} * F(n - 1) + {B} + +Output any number of **distinct** positive (F(0) cannot be included) indices n1, n2, ..., nk (k ≥ 1), in one line separated by spaces, such that: F(n1) + F(n2) + ... + F(nk) = {S}.""" + + + def __init__(self, + A_is_1_probability : float = 0.3, + wrong_format : float = -1.0, invalid_answer : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the RecursiveSequenceSumConstruction_Environment instance. + """ + super().__init__(**kwargs) + + self.A_is_1_probability = A_is_1_probability + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_F0" in self.parameter, "MAX_F0 is required in parameter" + MAX_F0 = self.parameter["MAX_F0"] + assert MAX_F0 >= 1, "MAX_F0 should be greater than or equal to 1" + + assert "MAX_A" in self.parameter, "MAX_A is required in parameter" + MAX_A = self.parameter["MAX_A"] + assert MAX_A >= 2, "MAX_A should be greater than or equal to 2" + + assert "MAX_B" in self.parameter, "MAX_B is required in parameter" + MAX_B = self.parameter["MAX_B"] + assert MAX_B >= 1, "MAX_B should be greater than or equal to 1" + + F0 = self.parameter["F0"] = random.randint(0, MAX_F0) + A = self.parameter["A"] = (1 if random.random() < self.A_is_1_probability else random.randint(2, MAX_A)) + B = self.parameter["B"] = random.randint(0, MAX_B) + + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + F = [F0] + for n in range(1, N + 1) : + F.append(A * F[n - 1] + B) + + self.parameter["reference_answer"] = random.sample(range(1, N + 1), k = random.randint(1, N)) + self.parameter["S"] = sum(F[n] for n in self.parameter["reference_answer"]) + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + F0 = self.parameter["F0"], + A = self.parameter["A"], + B = self.parameter["B"], + S = self.parameter["S"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[int]] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if not answer_array : + return None + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != len(set(processed_result)) : + return self.rewards["invalid_answer"] + if not all(n >= 1 for n in processed_result) : + return self.rewards["invalid_answer"] + + S = 0 + N = max(processed_result) + if N > max(map(int, self.parameter["reference_answer"].split())) * 10 : + return self.rewards["wrong_answer"] + processed_result = set(processed_result) + Fn_minus_1 = self.parameter["F0"] + for n in range(1, N + 1) : + Fn = self.parameter["A"] * Fn_minus_1 + self.parameter["B"] + if S + Fn > self.parameter["S"] : + return self.rewards["wrong_answer"] + if n in processed_result : + S += Fn + Fn_minus_1 = Fn + if S == self.parameter["S"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/repeat_sequence_lnds/__init__.py b/server/Gym/environments/repeat_sequence_lnds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8f073ab822329219632275f91768d54146946081 --- /dev/null +++ b/server/Gym/environments/repeat_sequence_lnds/__init__.py @@ -0,0 +1 @@ +from .environment import RepeatSequenceLNDS_Environment \ No newline at end of file diff --git a/server/Gym/environments/repeat_sequence_lnds/environment.py b/server/Gym/environments/repeat_sequence_lnds/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..896928b6b00aae31df0525649c9ca0e2d6476e8e --- /dev/null +++ b/server/Gym/environments/repeat_sequence_lnds/environment.py @@ -0,0 +1,98 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class RepeatSequenceLNDS_Environment(VerifiableEnvironment): + prompt_template = \ +r"""You are given an array that repeats every {n} elements. The initial pattern is: {a}. This pattern repeats {T} times, creating a total array length of {nT}. + +For example, if the initial pattern is [1, 3, 2] and it repeats 2 times, the full array would be [1, 3, 2, 1, 3, 2]. + +Find the length of the longest non-decreasing subsequence (not necessarily contiguous) in this repeated array. + +Your answer should be a single integer.""" + + def __init__(self, + wrong_format: float = -1.0, incorrect_solution: float = 0.0, correct_solution: float = 1.0, + **kwargs): + """ + Initialize the RepeatSequenceLNDS_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "incorrect_solution": incorrect_solution, + "correct_solution": correct_solution, + } + + def _generate(self) -> None: + assert "n" in self.parameter, "n is required in parameter" + n = self.parameter["n"] + assert n >= 2, "n must be at least 2" + + assert "MAX_T" in self.parameter, "MAX_T is required in parameter" + MAX_T = self.parameter["MAX_T"] + assert MAX_T >= 2, "MAX_T must be at least 2" + + T = self.parameter["T"] = random.randint(2, MAX_T) + + # Generate the initial array of length n + self.parameter["a"] = a = [random.randint(1, n) for _ in range(n)] + + # Calculate the reference answer using the provided algorithm + self.parameter["reference_answer"] = self._calculate_longest_nds(a, n, T) + + def _calculate_longest_nds(self, a, n, T): + """ + Calculate the longest non-decreasing subsequence using the provided algorithm. + Source: https://codeforces.com/contest/582/submission/282761264 + """ + # Initialize frequency array for elements (1 to max(a)) + s = [0] * (max(a) + 1) + d = [0] * (max(a) + 1) + + # Count the frequency of each element in the initial array + for i in a: + d[i] += 1 + + # Calculate the longest non-decreasing subsequence + # Iterate over the array repeated min(T, 2 * n) times + for i in a * min(T, 2 * n): + # Update the dynamic programming array + s[i] = max(s[:i + 1]) + 1 + + # Calculate the maximum length of the subsequence + # Consider extending the subsequence with full repetitions of the most frequent element + return max(s) + max((T - n * 2) * max(d), 0) + + def _prompt_generate(self) -> str: + n, T = self.parameter["n"], self.parameter["T"] + return self.prompt_template.format( + n=n, + T=T, + nT=n * T, + a=str(self.parameter["a"]) + ) + + def _process(self, answer: Optional[str]) -> Optional[int]: + if answer is not None: + answer = answer.strip() + try: + int_answer = int(answer) + return int_answer + except ValueError: + return None + else: + return None + + def scorer(self, output: str) -> float: + processed_result = self.processor(output) + if processed_result is not None: + if processed_result == self.parameter["reference_answer"]: + return self.rewards["correct_solution"] + else: + return self.rewards["incorrect_solution"] + else: + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/root_extraction/__init__.py b/server/Gym/environments/root_extraction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..41a13b187404a78b488ff9ef159c54da0c441942 --- /dev/null +++ b/server/Gym/environments/root_extraction/__init__.py @@ -0,0 +1 @@ +from .environment import RootExtraction_Environment diff --git a/server/Gym/environments/root_extraction/environment.py b/server/Gym/environments/root_extraction/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9690b077221a8d82db01758fd0e43b896b2f5307 --- /dev/null +++ b/server/Gym/environments/root_extraction/environment.py @@ -0,0 +1,79 @@ +import math +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class RootExtraction_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Your task is to compute the **{K}-th root of {N}**, that is, find the value of `{N}^(1/{K})`. + +Since the result may not be an exact integer, output the value in **decimal form**, as accurate as possible, **up to 5 decimal places**. +If the result has fewer than 5 decimal digits, you may omit trailing zeros. + +Output Format: +Your final answer should be a single decimal number. +Example: `2.24573` (do **NOT** include the backticks or quotes). +""" + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "1/(1+|answer-gold|)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 2.0, + **kwargs) : + """ + Initializes the RootExtraction_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + if self.rewards["rewarding_strategy"] == "1/(1+|answer-gold|)^beta" : + self.passing_reward_threshold = rewarding_weight * ((1 / (1 + 1E-4)) ** rewarding_beta) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 1, "MAX_N should be greater than or equal to 1" + + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K >= 1, "MAX_K should be greater than or equal to 1" + + self.parameter["N"] = random.randint(1, MAX_N) + self.parameter["K"] = random.randint(1, MAX_K) + self.parameter["reference_answer"] = round(self.parameter["N"] ** (1 / self.parameter["K"]), 5) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[float] : + if answer is not None : + answer = answer.strip() + try : + float_answer = float(answer) + if not math.isfinite(float_answer) : + return None + return float_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if self.rewards["rewarding_strategy"] == "1/(1+|answer-gold|)^beta" : + return self.rewards["rewarding_weight"] * ((1 / (1 + abs(processed_result - self.parameter["reference_answer"]))) ** self.rewards["rewarding_beta"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/round_robin/__init__.py b/server/Gym/environments/round_robin/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..49d7b39908df552b761981e01c3911ab03693988 --- /dev/null +++ b/server/Gym/environments/round_robin/__init__.py @@ -0,0 +1 @@ +from .environment import RoundRobin_Environment diff --git a/server/Gym/environments/round_robin/environment.py b/server/Gym/environments/round_robin/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..3ffe86235f1f64ad548c1c99e5aea2ebbdddcfed --- /dev/null +++ b/server/Gym/environments/round_robin/environment.py @@ -0,0 +1,113 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class RoundRobin_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Please construct an {N} × {N} matrix, where each element is either 0, 1, or 2. Denote the matrix as A (0-indexed), and it must satisfy the following conditions: +1. A[i][i] = 0 for all i. +2. For all i ≠ j (0 ≤ i, j < {N}), A[i][j] + A[j][i] = 2 (i.e., one of the following holds: A[i][j] = 0 and A[j][i] = 2; A[i][j] = 2 and A[j][i] = 0; or A[i][j] = A[j][i] = 1). +3. Define W[i] = 3 × (number of positions j where A[i][j] = 2) + 1 × (number of positions j where A[i][j] = 1). The final values of W[0], ..., W[{N_minus_1}] must be exactly: {W} + +**Output Format:** Output {N} lines, each containing {N} digits (0, 1, or 2) with no separators. The i-th line should represent A[i][0], A[i][1], ..., A[i][{N_minus_1}].""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the RoundRobin_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_beta": rewarding_beta, + "rewarding_weight": rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + tie_probability = random.random() + A = [[None] * N for _ in range(N)] + self.parameter["W"] = W = [0] * N + for i in range(N) : + for j in range(N) : + if i == j : + A[i][j] = 0 + continue + if i < j : + if random.random() < tie_probability : + A[i][j] = 1 + else : + A[i][j] = random.choice([0, 2]) + else : + A[i][j] = 2 - A[j][i] + W[i] += 3 * (A[i][j] == 2) + 1 * (A[i][j] == 1) + self.parameter["reference_answer"] = "\n".join("".join(map(str, row)) for row in A) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + W = " ".join("W[{}]={}".format(i, Wi) for i, Wi in enumerate(self.parameter["W"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(line.strip()) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + A = processed_result + if len(A) != self.parameter["N"] : + return self.rewards["wrong_format"] + if any(len(row) != self.parameter["N"] for row in A) : + return self.rewards["wrong_format"] + if any(any(c not in "012" for c in row) for row in A) : + return self.rewards["wrong_format"] + + W = [0] * self.parameter["N"] + for i in range(self.parameter["N"]) : + for j in range(self.parameter["N"]) : + if i == j : + if A[i][j] != "0" : + return self.rewards["invalid_solution"] + else : + if int(A[i][j]) + int(A[j][i]) != 2 : + return self.rewards["invalid_solution"] + assert (A[i][j] == "0" and A[j][i] == "2") or (A[i][j] == "2" and A[j][i] == "0") or (A[i][j] == A[j][i] == "1") + W[i] += 3 * (A[i][j] == "2") + 1 * (A[i][j] == "1") + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["W"], W)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["W"] == W) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/roundtable_assignment/__init__.py b/server/Gym/environments/roundtable_assignment/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..056e5b5c7a6d4c988355c16a73e74e03c6eec926 --- /dev/null +++ b/server/Gym/environments/roundtable_assignment/__init__.py @@ -0,0 +1 @@ +from .environment import RoundTableAssignment_Environment diff --git a/server/Gym/environments/roundtable_assignment/environment.py b/server/Gym/environments/roundtable_assignment/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..fdd3f3f6b7972700ea750c4472692473c4239430 --- /dev/null +++ b/server/Gym/environments/roundtable_assignment/environment.py @@ -0,0 +1,117 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class RoundTableAssignment_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""There are {M} groups of people and {N} tables. +- The i-th group consists of R[i] people. Array R: {R} +- The j-th table can seat up to C[j] people. Array C: {C} + +You need to assign each person to a table such that: +- No table contains more than one person from the same group. +- No table exceeds its total capacity. + +**Output Format:** Output {M} lines. The i-th line (0-indexed) should contain R[i] integers (separated by spaces), representing the table indices assigned to each person in the i-th group.""" + + def __init__(self, + wrong_format: float = -1.0, invalid_solution: float = -0.5, rewarding_strategy: str = "(satisfied/all)^beta", rewarding_weight: float = +1.0, rewarding_beta: float = 5.0, + **kwargs) : + """ + Initialize the RoundTableAssignment_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 3, "MAX_N_M should be greater than or equal to 3" + + M = self.parameter["M"] = random.randint(2, MAX_N_M) + R = self.parameter["R"] = [] + tables = [[] for table_index in range(MAX_N_M)] + for group_index in range(M) : + R.append(random.randint(2, MAX_N_M)) + table_indices = random.sample(range(MAX_N_M), R[-1]) + for table_index in table_indices : + tables[table_index].append(group_index) + tables = [table for table in tables if len(table) > 0] + assert len(R) == M, "R should have length M" + + self.parameter["N"] = len(tables) + self.parameter["C"] = [len(table) for table in tables] + assert len(self.parameter["C"]) == self.parameter["N"], "C should have length N" + + reference_answer = [[] for group_index in range(M)] + for table_index, table in enumerate(tables) : + for group_index in table : + reference_answer[group_index].append(table_index) + assert all(len(answer) == R[group_index] for group_index, answer in enumerate(reference_answer)), "Reference answer does not match the group sizes" + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, answer)) for answer in reference_answer) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + M = self.parameter["M"], + N = self.parameter["N"], + R = " ".join("R[{}]={}".format(i, Ri) for i, Ri in enumerate(self.parameter["R"])), + C = " ".join("C[{}]={}".format(i, Ci) for i, Ci in enumerate(self.parameter["C"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(list(map(int, line.split()))) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["M"] : + return self.rewards["invalid_solution"] + + countings = [0] * self.parameter["N"] + for answer, Ri in zip(processed_result, self.parameter["R"]) : + if len(answer) != Ri : + return self.rewards["invalid_solution"] + if not all(0 <= i < self.parameter["N"] for i in answer) : + return self.rewards["invalid_solution"] + if len(set(answer)) != Ri : + return self.rewards["invalid_solution"] + for table_index in answer : + countings[table_index] += 1 + + assert len(countings) == len(self.parameter["C"]) == self.parameter["N"], "countings should match the number of tables" + satisfied = sum(int(counting <= Ci) for counting, Ci in zip(countings, self.parameter["C"])) + assert satisfied <= self.parameter["N"], "satisfied should not exceed N" + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == self.parameter["N"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/royal_lock_counting/__init__.py b/server/Gym/environments/royal_lock_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6bd0de2c258db8cbd9daec8597180248f8ae3ac2 --- /dev/null +++ b/server/Gym/environments/royal_lock_counting/__init__.py @@ -0,0 +1 @@ +from .environment import RoyalLockCounting_Environment diff --git a/server/Gym/environments/royal_lock_counting/environment.py b/server/Gym/environments/royal_lock_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..bf587030a21ef081649a327c190ba9ec9b9a4f16 --- /dev/null +++ b/server/Gym/environments/royal_lock_counting/environment.py @@ -0,0 +1,118 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class RoyalLockCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1896 + prompt_template = \ +r"""On a {N} × {N} chessboard, you are to place {K} kings such that **no two kings attack each other**. How many different valid placement configurations are there? (The internal order of the kings does NOT matter.) + +A king can attack up to 8 surrounding squares: the squares directly above, below, left, right, and all 4 diagonals (top-left, top-right, bottom-left, bottom-right). + +**Output Format:** +Your final answer should be a single integer — the total number of valid placements.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the RoyalLockCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + K = self.parameter["K"] = random.randint(1, max(1, N * N // 4)) + + + num_states = 1 << N + + valid_states = [] + line_valid = [False] * num_states + king_count = [0] * num_states + for s in range(num_states) : + if s & (s << 1) : + continue + line_valid[s] = True + valid_states.append(s) + king_count[s] = s.bit_count() + + compat = {s : [] for s in valid_states} + for s in valid_states : + for t in valid_states : + if s & t: + continue + if (s << 1) & t : + continue + if (s >> 1) & t : + continue + compat[s].append(t) + + F_prev = [[0] * num_states for _ in range(K + 1)] + F_cur = [[0] * num_states for _ in range(K + 1)] + + F_prev[0][0] = 1 + + for _row in range(1, N + 1) : + for k in range(K + 1) : + for s in valid_states: + F_cur[k][s] = 0 + + for s in valid_states : + c = king_count[s] + for k in range(c, K + 1) : + prev_k = k - c + tot = 0 + for t in compat[s] : + tot += F_prev[prev_k][t] + F_cur[k][s] = tot + + F_prev, F_cur = F_cur, F_prev + + self.parameter["reference_answer"] = sum(F_prev[K][s] for s in valid_states) + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/salad_bar/__init__.py b/server/Gym/environments/salad_bar/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aa544f7aff7e8561ac59ba689261b5a2d34b83f0 --- /dev/null +++ b/server/Gym/environments/salad_bar/__init__.py @@ -0,0 +1 @@ +from .environment import SaladBar_Environment diff --git a/server/Gym/environments/salad_bar/environment.py b/server/Gym/environments/salad_bar/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..821c84e8fb7d8fea0ab31224190a6975bfd21421 --- /dev/null +++ b/server/Gym/environments/salad_bar/environment.py @@ -0,0 +1,133 @@ +import random +from typing import Optional, Tuple +from ...environment import VerifiableEnvironment + + +class SaladBar_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3564 + prompt_template = \ +r"""You are given a string S (0-indexed) of length {N}, consisting only of the characters `j` and `p`: {S} + +Please find a **contiguous** substring S[l : r] (using Python-style slicing: 0 ≤ l < r ≤ {N}, which includes S[l] through S[r - 1], but **NOT** S[r]) such that: +- In **every prefix** of the substring, the number of `p` characters is **not less than** the number of `j` characters. +- In **every suffix** of the substring, the number of `p` characters is **not less than** the number of `j` characters. + +Your goal is to **maximize the length** of such a substring (i.e., maximize r - l). Output two integers `l` and `r`, separated by a space.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the SaladBar_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + p_probability = random.uniform(0.0, 0.7) + while True : + S = self.parameter["S"] = "".join("p" if random.random() < p_probability else "j" for _ in range(N)) + if "p" in S and "j" in S : + break + + + # Compute prefix sums and track minimum and maximum + prefix = [0] * (N + 1) + minx = 0 + maxx = 0 + for i in range(1, N + 1): + prefix[i] = prefix[i - 1] + (1 if S[i - 1] == 'p' else -1) + if prefix[i] < minx: + minx = prefix[i] + if prefix[i] > maxx: + maxx = prefix[i] + + # Prepare linked lists for each adjusted prefix-sum value + range_x = maxx - minx + 1 + head = [-1] * range_x + nxt = [-1] * (N + 1) + to = [0] * (N + 1) + + # Build next pointers for equal adjusted-sum indices + for i in range(N, -1, -1): + x = prefix[i] - minx + nxt[i] = head[x] + head[x] = i + to[i] = i + + # Scan backwards to find longest valid segment + ans = 0 + pre = N + for i in range(N, 0, -1): + if S[i - 1] == 'j': + # Can't start with an apple + pre = i - 1 + else: + idx = i - 1 + ni = nxt[idx] + # Potentially update end based on next equal-sum position + if ni >= 0 and prefix[to[ni]] >= prefix[pre]: + pre = to[ni] + to[idx] = pre + length = pre - i + 1 + if length > ans: + self.parameter["reference_answer"] = "{} {}".format(i - 1, pre) + ans = length + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], S = self.parameter["S"]) + + + def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int]] : + if answer is not None : + answer = answer.strip() + try : + l, r = map(int, answer.split()) + return l, r + except : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + l, r = processed_result + if not (0 <= l < r <= self.parameter["N"]) : + return self.rewards["invalid_solution"] + + T = self.parameter["S"][l : r] + def check(s) : + counting = 0 + for c in s : + counting += (+1 if c == 'p' else -1) + if counting < 0 : + return False + return True + if not (check(T) and check(T[::-1])) : + return self.rewards["invalid_solution"] + + gold, answer = self.parameter["gold_answer"], r - l + assert 0 < answer <= gold, "answer should be less than or equal to gold" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/salesman_fatigue/__init__.py b/server/Gym/environments/salesman_fatigue/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..42f99b559322021f2b654dfe7fa2a7038ae5102c --- /dev/null +++ b/server/Gym/environments/salesman_fatigue/__init__.py @@ -0,0 +1 @@ +from .environment import SalesmanFatigue_Environment diff --git a/server/Gym/environments/salesman_fatigue/environment.py b/server/Gym/environments/salesman_fatigue/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8c23dd6ab492d8eda8efef616c1c7701a68e37c8 --- /dev/null +++ b/server/Gym/environments/salesman_fatigue/environment.py @@ -0,0 +1,111 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class SalesmanFatigue_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2672 + prompt_template = \ +r"""You are given {N} pairs of integers `(S[i], A[i])` for `0 <= i < {N}`, provided as: +{S_and_A} + +**Note:** The array `S` is sorted in non-decreasing order: `S[0] <= S[1] <= ... <= S[{N_minus_1}]` + +Please select k distinct pairs `i_1, i_2, ..., i_k` and maximize the following expression: `max(S[i_1], S[i_2], ..., S[i_k]) * 2 + A[i_1] + A[i_2] + ... + A[i_k]` (i.e., the sum of the selected A[i] values plus the maximum S[i] value multiplied by 2). +Please compute the **maximum value of this expression** for each k = 1 to {N}. + +**Output Format:** Your final answer should be a single line containing {N} integers — the maximum value for each k = 1 to {N} in order, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the SalesmanFatigueProblem instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + + S = self.parameter["S"] = [random.randint(1, max(1, N * N // 2)) for _ in range(N)] + S.sort() + A = self.parameter["A"] = [random.randint(1, N) for _ in range(N)] + + + v = list(zip(S, A)) + v.sort(key = lambda x : -x[1]) + + P = [0] * (N + 1) + for i in range(N) : + P[i + 1] = P[i] + v[i][1] + + q = [0] * N + max_q = 0 + for i in range(N) : + max_q = max(max_q, 2 * v[i][0]) + q[i] = max_q + + h = [0] * N + max_h = 0 + for i in range(N - 1, -1, -1) : + max_h = max(max_h, 2 * v[i][0] + v[i][1]) + h[i] = max_h + + answers = [] + for X in range(1, N + 1) : + idx = X - 1 + option1 = P[X] + q[idx] + option2 = P[X - 1] + h[idx] + answers.append(max(option1, option2)) + + self.parameter["gold_answer"] = answers + self.parameter["reference_answer"] = " ".join(map(str, answers)) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + S_and_A = "\n".join("S[{}]={} A[{}]={}".format(i, self.parameter["S"][i], i, self.parameter["A"][i]) for i in range(N)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/same_adjacency_counting/__init__.py b/server/Gym/environments/same_adjacency_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..77788298bdfdc2a3c3d08f7b08dd6f86bd7e7852 --- /dev/null +++ b/server/Gym/environments/same_adjacency_counting/__init__.py @@ -0,0 +1 @@ +from .environment import SameAdjacencyCounting_Environment diff --git a/server/Gym/environments/same_adjacency_counting/environment.py b/server/Gym/environments/same_adjacency_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8723d4c3e02ab5059c63de9e1147b4f16c2076c3 --- /dev/null +++ b/server/Gym/environments/same_adjacency_counting/environment.py @@ -0,0 +1,68 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SameAdjacencyCounting_Environment(VerifiableEnvironment) : # Submitted to https://www.luogu.com.cn/problem/P3197 + prompt_template = \ +r"""Count the number of length-{N} sequences using integers from `1` to `{M}` such that **at least one pair of adjacent elements is equal**. Output the result modulo {MOD}.""" + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the SameAdjacencyCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 2, "MAX_N should be greater than or equal to 2" + + assert "MAX_M" in self.parameter, "MAX_M is required in parameter" + MAX_M = self.parameter["MAX_M"] + assert MAX_M >= 2, "MAX_M should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N) + M = self.parameter["M"] = random.randint(2, MAX_M) + MOD = self.parameter["MOD"] = random.randint(M, 2 * M) + + self.parameter["reference_answer"] = (pow(M, N, MOD) - M * pow(M - 1, N - 1, MOD) + MOD) % MOD + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"], MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sat/__init__.py b/server/Gym/environments/sat/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6b6ff294ef968c3c55f3c9f467d2e39a6ff2396b --- /dev/null +++ b/server/Gym/environments/sat/__init__.py @@ -0,0 +1 @@ +from .environment import SAT_Environment diff --git a/server/Gym/environments/sat/environment.py b/server/Gym/environments/sat/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f1ad0f57a6714506db08be56af2db1f1be5b1a26 --- /dev/null +++ b/server/Gym/environments/sat/environment.py @@ -0,0 +1,101 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class SAT_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""There are {N} boolean (0/1) values x[0], x[1], ..., x[{N_minus_1}]. Each of the following {M} expressions (`|` means OR, `!` means NOT) must equal 1: +{expressions} + +Please find any solution x[0], x[1], ..., x[{N_minus_1}] that satisfies the conditions above. + +Output Format: Your final answer should be a single line containing x[0], x[1], ..., x[{N_minus_1}], separated by **spaces**. +Example: `{N_boolean}` (do **NOT** include quotes or backticks).""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SAT_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 1, "M should be greater than or equal to 1" + + assert "density" in self.parameter, "density is required in parameter" + density = self.parameter["density"] + assert 0 < density <= 1, "density should be in (0, 1]" + + x = [random.randint(0, 1) for i in range(N)] + self.parameter["reference_answer"] = " ".join(map(str, x)) + + clauses = self.parameter["clauses"] = [] + for m in range(M) : + while True : + clause = [] + all_or = False + for index in range(N) : + if random.random() < density : + clause.append((index, random.random() < 0.5)) + all_or |= (x[index] if clause[-1][-1] else not x[index]) + if len(clause) >= 2 and all_or : + break + clauses.append(clause) + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + N_minus_1 = self.parameter["N"] - 1, + M = self.parameter["M"], + expressions = "\n".join(" | ".join("({}x[{}])".format("" if is_positive else "!", index) for index, is_positive in clause) for clause in self.parameter["clauses"]), + N_boolean = " ".join(str(i % 2) for i in range(self.parameter["N"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + x = processed_result + if len(x) != self.parameter["N"] : + return self.rewards["wrong_format"] + if not all(xi in (0, 1) for xi in x) : + return self.rewards["wrong_format"] + + satisfied = sum(int(any(x[index] if is_positive else not x[index] for index, is_positive in clause)) for clause in self.parameter["clauses"]) + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / len(self.parameter["clauses"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == len(self.parameter["clauses"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/scc_sequence_counting/__init__.py b/server/Gym/environments/scc_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ed90dca202e42da859226a5f946d4524ff75c16b --- /dev/null +++ b/server/Gym/environments/scc_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .environment import SCC_Sequence_Counting_Environment diff --git a/server/Gym/environments/scc_sequence_counting/environment.py b/server/Gym/environments/scc_sequence_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e8472c79a9a0666dbf00a533d489a89c81c6bd57 --- /dev/null +++ b/server/Gym/environments/scc_sequence_counting/environment.py @@ -0,0 +1,161 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class SCC_Sequence_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P5241 + prompt_template = \ +r"""Consider a directed graph with {N} vertices, initially with no edges. You may choose an arbitrary list of **E directed edges** to add to the graph, under the following constraints: +- Each edge connects two **distinct** vertices (i.e., no self-loops). +- No two edges in the list are the same. +- The edges are added **one by one** in the given order of the list. + +After adding each edge, compute the number of **strongly connected components (SCCs)** in the current graph (with the edges added so far) and record it; this produces a sequence of E integers — we call this an **SCC sequence**. Your task is to compute, for each possible value of E from 1 to {N} × ({N} - 1), how many **distinct SCC sequences** can be produced. + +Output {N} × ({N} - 1) integers in one line, separated by spaces. The i-th number (1 ≤ i ≤ {N} × ({N} - 1)) is the number of distinct SCC sequences that can be obtained when E = i, **modulo {MOD}**.""" + + def __init__(self, + max_MOD : int = 1000000, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the SCC_Sequence_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + # Precompute the “p_limit” array + p_limit = [0] * (N + 1) + for i in range(1, N + 1): + # same formula as (n - i + 1)*(n - 1) + (i - 1)*(i - 2)/2 + p_limit[i] = (N - i + 1) * (N - 1) + (i - 1) * (i - 2) // 2 + + # f and sf are 2×(N+2)×(N+2) to allow indexing up to [j+1][k-1] + f = [[[0] * (N + 2) for _ in range(N + 2)] for _ in range(2)] + sf = [[[0] * (N + 2) for _ in range(N + 2)] for _ in range(2)] + + # g and sg are 2×(N+2) + g = [[0] * (N + 2) for _ in range(2)] + sg = [[0] * (N + 2) for _ in range(2)] + + # ans[E] will hold the answer for sequence‐length E + ans = [0] * (N * (N - 1) + 2) + + # --- initialize for E = 1 --- + f[1][N][1] = 1 + ans[1] = 1 + for i in range(1, N + 1): + sf[1][i][1] = 1 + + # --- first phase: E = 2 … min(N*(N-1), 2*N) --- + maxE = min(N * (N - 1), N << 1) + for E in range(2, maxE + 1): + op = E & 1 + prev = op ^ 1 + + # zero out f[op] + for j in range(1, N + 1): + for k in range(1, N + 1): + f[op][j][k] = 0 + + # DP recurrence + for j in range(1, N + 1): + if E <= p_limit[j]: + for k in range(1, N + 1): + # only valid if E + j >= N + k - 1 + if E + j >= N + k - 1: + f[op][j][k] = (f[prev][j][k] + sf[prev][j + 1][k - 1]) % MOD + + # build sf[op] and accumulate ans[E] + total = 0 + for j in range(N, 0, -1): + for k in range(1, N + 1): + sf[op][j][k] = (sf[op][j + 1][k] + f[op][j][k]) % MOD + total = (total + f[op][j][k]) % MOD + ans[E] = total + + # --- prepare g[0] and sg[0] from f[0] --- + for j in range(1, N + 1): + s = 0 + for k in range(1, N + 1): + s = (s + f[0][j][k]) % MOD + g[0][j] = s + + for j in range(N, 0, -1): + sg[0][j] = (sg[0][j + 1] + g[0][j]) % MOD + + # --- second phase: E = 2*N+1 … N*(N-1) --- + for E in range((N << 1) + 1, N * (N - 1) + 1): + op = E & 1 + prev = op ^ 1 + + # zero out g[op] + for j in range(1, N + 1): + g[op][j] = 0 + + # recurrence for g + for j in range(1, N + 1): + if E <= p_limit[j]: + g[op][j] = sg[prev][j] + + # build sg[op] and accumulate ans[E] + total = 0 + for j in range(N, 0, -1): + sg[op][j] = (sg[op][j + 1] + g[op][j]) % MOD + total = (total + g[op][j]) % MOD + ans[E] = total + + # output ans[1..N*(N-1)] + self.parameter["gold_answer"] = ans[1 : N * (N - 1) + 1] + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["gold_answer"])) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] * (self.parameter["N"] - 1) : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / (self.parameter["N"] * (self.parameter["N"] - 1))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/secret_cow_code/__init__.py b/server/Gym/environments/secret_cow_code/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4f79d2cfaa96fced66ff46478edc408e8c7a500b --- /dev/null +++ b/server/Gym/environments/secret_cow_code/__init__.py @@ -0,0 +1 @@ +from .environment import SecretCowCode_Environment diff --git a/server/Gym/environments/secret_cow_code/environment.py b/server/Gym/environments/secret_cow_code/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9098f33d53105bf9d99a1fb5161c20c9ace067d5 --- /dev/null +++ b/server/Gym/environments/secret_cow_code/environment.py @@ -0,0 +1,81 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SecretCowCode_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3612 + prompt_template = \ +r"""You are given a string S consisting of lowercase English letters: {S} +Define F(s) as the string obtained by concatenating `s` with `right_shift(s)` (s + right_shift(s)), where `right_shift(s)` means moving the last character of `s` to the beginning. Let F⁽∞⁾(S) denote the result of applying F infinitely many times to S: F⁽∞⁾(S) = F(F(F(...(S)...))). Please output the {K}-th character (1-based index, from left to right) of the infinite string F⁽∞⁾(S).""" + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the SecretCowCode_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 2, "MAX_N should be greater than or equal to 2" + + S = self.parameter["S"] = "".join(random.choices("abcdefghijklmnopqrstuvwxyz", k = random.randint(2, MAX_N))) + + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K > MAX_N, "MAX_K should be greater than MAX_N" + K = self.parameter["K"] = random.randint(len(S) + 1, MAX_K) + + + N = K + + # Build list of string lengths until covering N + lengths = [len(S)] + while lengths[-1] < N: + lengths.append(lengths[-1] * 2) + + # Work backwards to map N into the original string + while len(lengths) > 1: + lengths.pop() + half = lengths[-1] # Length of the previous stage + if N > half: + if N == half + 1: + N = half + else: + N = N - (half + 1) + # if N <= half: it stays the same + + self.parameter["reference_answer"] = S[N-1] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(S = self.parameter["S"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if len(processed_result) != 1 : + return self.rewards["wrong_format"] + if processed_result not in self.parameter["S"] : + return self.rewards["wrong_format"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/segment_min_length_equal_counting/__init__.py b/server/Gym/environments/segment_min_length_equal_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..19c033867a880e199eace6bde6f32f934fd14c0d --- /dev/null +++ b/server/Gym/environments/segment_min_length_equal_counting/__init__.py @@ -0,0 +1 @@ +from .environment import SegmentMinLengthEqual_Counting_Environment diff --git a/server/Gym/environments/segment_min_length_equal_counting/environment.py b/server/Gym/environments/segment_min_length_equal_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..93c4b1f449100f3212421ef824178d39d82abf27 --- /dev/null +++ b/server/Gym/environments/segment_min_length_equal_counting/environment.py @@ -0,0 +1,104 @@ +import random +from typing import Optional +from bisect import bisect_left +from ...environment import VerifiableEnvironment + + +class SegmentMinLengthEqual_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/B3902 + prompt_template = \ +r"""An array x[1], x[2], ..., x[{N}] is called **valid** if and only if there exists a partition of it into intervals such that the minimum value in each interval is exactly equal to the interval’s length. Equivalently, there exist indices 0 = x_1 < x_2 < ... < x_m = {N}, such that for every 1 ≤ i < m, we have min_{j = x_i + 1}^{x_{i+1}} a_j = x_{i+1} - x_i. What is the number of such valid arrays x, where each element x[i] must belong to the set S = {S}? Output the answer modulo {MOD}.""" + def __init__(self, + max_MOD : int = 1000000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the SegmentMinLengthEqual_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + S = self.parameter["S"] = sorted(random.sample(range(1, N + 1), random.randint(2, N))) + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + + def quick_power(a: int, e: int) -> int: + # fast power mod MOD (you can also use pow(a, e, MOD) directly) + res = 1 + a %= MOD + while e: + if e & 1: + res = (res * a) % MOD + a = (a * a) % MOD + e >>= 1 + return res + + def main(B): + M = len(B) + exist_set = set(B) + + # c[i] = count of elements in S >= i, for i in 1..N + C = [0] * (N + 1) + for i in range(1, N + 1): + # number of elements >= i = M - index of first >= i + C[i] = M - bisect_left(B, i) + + # DP + F = [0] * (N + 1) + F[0] = 1 + + for i in range(1, N + 1): + total = 0 + for j in range(i): + L = i - j # length of the last segment + if L in exist_set: + cL = C[L] + # ways to fill a segment of length L with min exactly L: + ways = (quick_power(cL, L) - quick_power(cL - 1, L) + MOD) % MOD + total = (total + F[j] * ways) % MOD + F[i] = total + + return F[N] + + self.parameter["reference_answer"] = main(S) + + + def _prompt_generate(self) -> str : + return self.prompt_template.replace(r"{N}", str(self.parameter["N"])).replace(r"{S}", "{" + ", ".join(map(str, self.parameter["S"])) + "}").replace(r"{MOD}", str(self.parameter["MOD"])) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/segment_tree_sorting_counting/__init__.py b/server/Gym/environments/segment_tree_sorting_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bf188883a160423e5a9e12bde564a07a5054055e --- /dev/null +++ b/server/Gym/environments/segment_tree_sorting_counting/__init__.py @@ -0,0 +1 @@ +from .environment import SegmentTreeSortingCounting_Environment diff --git a/server/Gym/environments/segment_tree_sorting_counting/environment.py b/server/Gym/environments/segment_tree_sorting_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9dc9f58c45d2497d54c0dca61cefd56e3ed0cc99 --- /dev/null +++ b/server/Gym/environments/segment_tree_sorting_counting/environment.py @@ -0,0 +1,158 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SegmentTreeSortingCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3322 + prompt_template = \ +r"""You are given a permutation of integers from 1 to 2^{N} (A[1], A[2], ..., A[2^{N}]). The array is: {A} + +There are {N} types of operations. You may apply **each type at most once**, and you may choose to apply them in any order. The i-th type of operation (1 ≤ i ≤ {N}) is defined as follows: +- Divide the array into 2^({N} - i + 1) segments, each of length 2^(i - 1). (Each element belongs to exactly one segment.) +- You may swap **any two segments** (freely chosen by you). + +Please count the number of **distinct sequences of operations** that can sort the array into increasing order. Two sequences are considered different if: +- They have different lengths, OR +- They perform **different operations at any same position** in the sequence (i.e., the type or the pair of segments swapped differs at that step).""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SegmentTreeSortingCounting problem. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + A = self.parameter["A"] = list(range(1, 2 ** N + 1)) + + operation_types = random.sample(range(1, N + 1), random.randint(1, N)) + for operation_type in operation_types : + seg_num, seg_size = 2 ** (N - operation_type + 1), 2 ** (operation_type - 1) + i, j = random.sample(range(seg_num), 2) + i_start, j_start = i * seg_size, j * seg_size + for k in range(seg_size) : + A[i_start + k], A[j_start + k] = A[j_start + k], A[i_start + k] + + + # Precompute factorials up to N (maximum 12) + po = [1] * (N + 1) + for i in range(1, N + 1): + po[i] = po[i - 1] * i + + ans = 0 # will hold the final count + + # Check function: for operation type k (1-based), verify segments are "good" + def check(k): + seg_size = 1 << k + half = 1 << (k - 1) + # number of segments: 2^(N-k) + cnt = 1 << (N - k) + for i in range(cnt): + start = i * seg_size + # Compare start of segment and middle of segment + if A[start] + half != A[start + half]: + return False + return True + + # Swap two segments of length 'length', starting at indices i and j (0-based) + def swap(i, j, length): + for m in range(length): + A[i + m], A[j + m] = A[j + m], A[i + m] + + # Depth-first search through operation choices + def dfs(now, num): + nonlocal ans + # If we've applied an operation type and the current configuration fails the check, prune + if now > 0 and not check(now): + return + # If we've considered all operations, add factorial count + if now == N: + ans += po[num] + return + + # Option 1: skip operation type now+1 + dfs(now + 1, num) + + # Option 2: apply an operation of this type by swapping two segments + seg_size = 1 << now + total_segments = 1 << (N - now) + tmp = [] + # Identify mismatched pairs of adjacent segments + for i in range(1, total_segments, 2): # i = 1, 3, 5, ... (1-based segment index) + # Convert to 0-based start indices + s1 = (i - 1) * seg_size + s2 = i * seg_size + if A[s2] != A[s1] + seg_size: + tmp.append(i) + tmp.append(i + 1) + if len(tmp) > 4: + return + if not tmp: + return + # Try swapping any two segments among the identified ones + for p in range(len(tmp)): + for q in range(p + 1, len(tmp)): + i_seg = tmp[p] - 1 + j_seg = tmp[q] - 1 + i_start = i_seg * seg_size + j_start = j_seg * seg_size + swap(i_start, j_start, seg_size) + dfs(now + 1, num + 1) + swap(i_start, j_start, seg_size) + + # Run DFS from operation type 0 with 0 operations used + # Note: 'now' from 0 to N, mapping to operation types 1..N + # dfs(0,0) considers operation type 1 at now=0, so check uses now>0 means skip initial + + dfs(0, 0) + assert ans > 0 + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = " ".join("A[{}]={}".format(i + 1, Ai) for i, Ai in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/self_power_sequence_mod/__init__.py b/server/Gym/environments/self_power_sequence_mod/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b89a70d5ba259ad643bea4bb08b20267839bdca8 --- /dev/null +++ b/server/Gym/environments/self_power_sequence_mod/__init__.py @@ -0,0 +1 @@ +from .environment import SelfPowerSequenceMOD_Environment diff --git a/server/Gym/environments/self_power_sequence_mod/environment.py b/server/Gym/environments/self_power_sequence_mod/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..dc64a36cf201264934f0ddbba0bc8c9f6b68eee8 --- /dev/null +++ b/server/Gym/environments/self_power_sequence_mod/environment.py @@ -0,0 +1,92 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SelfPowerSequenceMOD_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4139 + prompt_template = r"""Define $a[0] = 1$, and $a[n] = 2^(a[n-1])$. Let $b[n] = a[n] \bmod {MOD}$. It can be proven that $b[n]$ becomes constant after some point. Find this constant value.""" + + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the SelfPowerSequenceMOD_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_MOD" in self.parameter, "MAX_MOD is required in parameter" + MAX_MOD = self.parameter["MAX_MOD"] + assert MAX_MOD >= 3, "MAX_MOD should be greater than or equal to 3" + + self.parameter["MOD"] = MOD = random.randint(3, MAX_MOD) + + + def phi(n): + ret = n + i = 2 + while i * i <= n: + if n % i == 0: + while n % i == 0: + n //= i + ret = ret // i * (i - 1) + i += 1 + if n > 1: + ret = ret // n * (n - 1) + return ret + + def pow_mod(x, p, mod): + ret = 1 + x %= mod + while p: + if p & 1: + ret = ret * x % mod + x = x * x % mod + p >>= 1 + return ret + + def solve(p): + if p == 1: + return 0 + t = phi(p) + return pow_mod(2, solve(t) + t, p) + + self.parameter["reference_answer"] = solve(MOD) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/set_cover/__init__.py b/server/Gym/environments/set_cover/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ecb4f87e06c8553090f79aab57e9a1ae4ecd5770 --- /dev/null +++ b/server/Gym/environments/set_cover/__init__.py @@ -0,0 +1 @@ +from .environment import SetCover_Environment diff --git a/server/Gym/environments/set_cover/environment.py b/server/Gym/environments/set_cover/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..4d7a7a677d463f2b85c082c52523ee85eac0b249 --- /dev/null +++ b/server/Gym/environments/set_cover/environment.py @@ -0,0 +1,100 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class SetCover_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given {N} items labeled from 0 to {N_minus_1}, and {M} sets labeled from 0 to {M_minus_1}. Each set is a subset of the items: +{sets} + +Your task is to select a collection of sets such that every item is covered **by exactly one** of the selected sets. + +**Output Format:** Your final answer should be a single line containing the indices of the selected sets, separated by spaces. Example: `0 {M_minus_1}` (do **NOT** include quotes or backticks); this means you selected sets 0 and {M_minus_1} to cover all items exactly once.""" + + def __init__(self, + MAX_M_multiple : int = 2, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(covered/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SetCover_Environment instance. + """ + super().__init__(**kwargs) + + self.MAX_M_multiple = MAX_M_multiple + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + M = random.randint(3, N * self.MAX_M_multiple) + constructed_M = random.randint(2, M - 1) + + Sets = self.parameter["Sets"] = [[] for m in range(constructed_M)] + for item in range(N) : + Sets[random.randint(0, constructed_M - 1)].append(item) + for m in range(M - constructed_M) : + existence_probability = random.random() + Sets.append([item for item in range(N) if random.random() < existence_probability]) + Sets = [(Set, index < constructed_M) for index, Set in enumerate(Sets) if len(Set) > 0] + random.shuffle(Sets) + + self.parameter["reference_answer"] = " ".join(str(index) for index in range(len(Sets)) if Sets[index][-1]) + self.parameter["Sets"] = [Set for Set, _ in Sets] + + def _prompt_generate(self) -> str : + N, M = self.parameter["N"], len(self.parameter["Sets"]) + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + M = M, + M_minus_1 = M - 1, + sets = "\n".join("Set {}: ".format(index) + "{ " + ", ".join(map(str, Set)) + " }" for index, Set in enumerate(self.parameter["Sets"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + Set_indices = set(processed_result) + union = set() + for index in Set_indices : + if not (0 <= index < len(self.parameter["Sets"])) : + return self.rewards["invalid_solution"] + current = set(self.parameter["Sets"][index]) + if union & current : + return self.rewards["invalid_solution"] + union |= current + + assert len(union) <= self.parameter["N"], "union should be less than or equal to N" + + if self.rewards["rewarding_strategy"] == "(covered/all)^beta" : + return self.rewards["rewarding_weight"] * ((len(union) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "covered=all" : + return self.rewards["rewarding_weight"] * (len(union) == self.parameter["N"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/set_splitting/__init__.py b/server/Gym/environments/set_splitting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..740ba81fcb49ca27244d6a57c35f832de53b73dd --- /dev/null +++ b/server/Gym/environments/set_splitting/__init__.py @@ -0,0 +1 @@ +from .environment import SetSplitting_Environment diff --git a/server/Gym/environments/set_splitting/environment.py b/server/Gym/environments/set_splitting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..5972f43923595e2a1301674c819df5434f493c5a --- /dev/null +++ b/server/Gym/environments/set_splitting/environment.py @@ -0,0 +1,102 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class SetSplitting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Define the full set `S` as all {N} integers from `0` to `{N_minus_1}`. + +Your task is to partition `S` into two **disjoint subsets** `S1` and `S2` such that: +- `S1 ∪ S2 = S` and `S1 ∩ S2 = ∅` +- For each of the following {M} subsets (each a subset of `S`), the subset is **not fully contained** in either `S1` or `S2`. That is, each subset must contain **at least one element from S1** and **at least one element from S2`. + +The list of {M} subsets is as follows: +{Sets} + +**Output Format:** Your final answer should be a single line containing the elements of `S1`, separated by spaces. (Subset `S2` is implicitly defined as `S \ S1`.)""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SetCover_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 2, "M should be greater than or equal to 2" + + S1 = random.sample(range(N), k = random.randint(1, N - 1)) + S2 = list(set(range(N)) - set(S1)) + assert S1 and S2, "S1 and S2 must be non-empty" + self.parameter["reference_answer"] = " ".join(map(str, S1)) + + Sets = self.parameter["Sets"] = [] + for _ in range(M) : + subset = random.sample(S1, k = random.randint(1, len(S1))) + random.sample(S2, k = random.randint(1, len(S2))) + random.shuffle(subset) + Sets.append(subset) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + M = len(self.parameter["Sets"]), + Sets = "\n".join("{ " + ", ".join(map(str, subset)) + " }" for subset in self.parameter["Sets"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if not all(0 <= x < self.parameter["N"] for x in processed_result) : + return self.rewards["invalid_solution"] + if len(set(processed_result)) != len(processed_result) : + return self.rewards["invalid_solution"] + + S1 = set(processed_result) + S2 = set(range(self.parameter["N"])) - S1 + + satisfied = sum(int(not (set(subset) <= S1 or set(subset) <= S2)) for subset in self.parameter["Sets"]) + assert sum(int(not (set(subset) <= S1 or set(subset) <= S2)) for subset in self.parameter["Sets"]) == sum(int(bool(set(subset) & S1) and bool(set(subset) & S2)) for subset in self.parameter["Sets"]) + assert satisfied <= self.parameter["M"], "satisfied should not exceed M" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / self.parameter["M"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == self.parameter["M"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/shared_substring_counting/__init__.py b/server/Gym/environments/shared_substring_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7f929f74496759581fce3fbfb384e637bd5826ae --- /dev/null +++ b/server/Gym/environments/shared_substring_counting/__init__.py @@ -0,0 +1 @@ +from .environment import SharedSubstringCounting_Environment diff --git a/server/Gym/environments/shared_substring_counting/environment.py b/server/Gym/environments/shared_substring_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..555ef200ad8098b1dbce880eae22c0c336853b59 --- /dev/null +++ b/server/Gym/environments/shared_substring_counting/environment.py @@ -0,0 +1,205 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SharedSubstringCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3181 + prompt_template = \ +r"""You are given two strings: +S = {S} +T = {T} + +Please compute the number of tuples (lS, rS, lT, rT) such that: +- 0 ≤ lS < rS ≤ len(S) +- 0 ≤ lT < rT ≤ len(T) +- The substring S[lS : rS] is equal to the substring T[lT : rT] (we are using Python-style slicing here)""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SharedSubstringCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_LEN" in self.parameter, "MAX_LEN is required in parameter" + MAX_LEN = self.parameter["MAX_LEN"] + assert MAX_LEN >= 2, "MAX_LEN should be greater than or equal to 2" + + for key in ("S", "T") : + a_probability = random.random() + LEN = random.randint(2, MAX_LEN) + self.parameter[key] = "".join("a" if random.random() < a_probability else "b" for _ in range(LEN)) + S, T = self.parameter["S"], self.parameter["T"] + + + def SA(arr): + """ + Given an integer array `arr` representing a string (each int is a “character” code), + build its suffix array and LCP, then return + sum_{0 <= i < j < n} LCP(suffix_i, suffix_j). + """ + n = len(arr) + if n <= 1: + return 0 + + # initial rank range + m = max(arr) + 1 + + sa = [0] * n + rk = arr[:] # rk[i] = rank of the suffix starting at i + tp = [0] * n # temporary array for sorting + # initial radix‐sort by single character + tax = [0] * m + for x in rk: + tax[x] += 1 + for i in range(1, m): + tax[i] += tax[i-1] + for i in range(n-1, -1, -1): + c = rk[i] + tax[c] -= 1 + sa[tax[c]] = i + + # doubling loop + w = 1 + while True: + # sort by second key: collect suffixes with i >= n-w first + p = 0 + for i in range(n-w, n): + tp[p] = i; p += 1 + for i in range(n): + j = sa[i] + if j >= w: + tp[p] = j - w + p += 1 + + # radix‐sort by first key + tax = [0] * m + for x in rk: + tax[x] += 1 + for i in range(1, m): + tax[i] += tax[i-1] + for i in range(n-1, -1, -1): + j = tp[i] + c = rk[j] + tax[c] -= 1 + sa[tax[c]] = j + + # re‐rank + old_rk = rk + rk = [0] * n + rk[sa[0]] = 0 + p = 1 + for i in range(1, n): + prev, curr = sa[i-1], sa[i] + # compare pairs (old_rk[curr], old_rk[curr+w]) vs (old_rk[prev], old_rk[prev+w]) + if (old_rk[curr] == old_rk[prev] and + (old_rk[curr+w] if curr+w < n else -1) == + (old_rk[prev+w] if prev+w < n else -1)): + rk[curr] = p-1 + else: + rk[curr] = p + p += 1 + + if p >= n: + break + m = p + w <<= 1 + + # build LCP array (het) via Kasai’s algorithm + het = [0] * n + k = 0 + for i in range(n): + r = rk[i] + if r == 0: + continue + j = sa[r-1] + while i + k < n and j + k < n and arr[i+k] == arr[j+k]: + k += 1 + het[r] = k + if k: + k -= 1 + + # now sum up all LCPs over i= h: + last_h = stack_h.pop() + last_cnt = stack_cnt.pop() + running -= last_h * last_cnt + cnt += last_cnt + stack_h.append(h) + stack_cnt.append(cnt) + running += h * cnt + total += running + + return total + + + def compute(): + # use a separator > 'z' + SEP = ord('z') + 1 + + # build concatenated array + concat = [ord(c) for c in S] + [SEP] + [ord(c) for c in T] + + # total cross‐sum = SA(S#T) - SA(S) - SA(T) + ans = SA(concat) + ans -= SA([ord(c) for c in S]) + ans -= SA([ord(c) for c in T]) + + return ans + + self.parameter["reference_answer"] = compute() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + S = self.parameter["S"], + T = self.parameter["T"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/shortest_path/__init__.py b/server/Gym/environments/shortest_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..31cc89881563e1b1ce1971f79fde3ea03a9a1b5e --- /dev/null +++ b/server/Gym/environments/shortest_path/__init__.py @@ -0,0 +1 @@ +from .environment import ShortestPath_Environment diff --git a/server/Gym/environments/shortest_path/environment.py b/server/Gym/environments/shortest_path/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..acc6394854ad7c4984b917a4695288030c2ad3e9 --- /dev/null +++ b/server/Gym/environments/shortest_path/environment.py @@ -0,0 +1,141 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class ShortestPath_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a **directed graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. + +The graph contains the following directed edges. Each edge is represented as a tuple `(s, t, w)`, meaning there is a directed edge **from vertex `s` to vertex `t` with weight `w`** : +{edges} + +Your task is to find a path `p1, p2, ..., pk` such that: +- `p1 = 0` (the path starts at vertex 0) and `pk = {N_minus_1}` (the path ends at vertex `{N_minus_1}`) +- Try your best to **minimize** the total weight of the path (i.e., the sum of all edge weights used). + +**Output Format:** +Your final answer should be a single line containing the path in order: `p1 p2 ... pk`, separated by **spaces**. +Example: `0 1 {N_minus_1}` (do **NOT** include the backticks or quotes); this means the path (k = 3) goes from `0` to `1` to `{N_minus_1}`. +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the ShortestPath_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = [] + + constructed_path = list(range(1, N - 1)) + random.shuffle(constructed_path) + constructed_path = [0] + constructed_path + [N - 1] + for s, t in zip(constructed_path, constructed_path[1 :]) : + w = random.randint(1, max(1, N // 3)) + edges.append((s, t, w)) + + num_edges = int(edge_density * N * (N - 1)) + if len(edges) < num_edges : + remaining_edges = list(set((s, t) for s in range(N) for t in range(N) if s != t) - set((s, t) for s, t, w in edges) - {(0, N - 1)}) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + for s, t in remaining_edges : + edges.append((s, t, random.randint(max(1, N // 2), N))) + random.shuffle(edges) + + starting = {t : (s, t, w) for s, t, w in edges if s == 0} + ending = {s : (s, t, w) for s, t, w in edges if t == N - 1} + for s, t, w in starting.values() : + if t in ending : + if t == constructed_path[-2] : + assert t != constructed_path[1] + edges.remove(starting[t]) + else : + edges.remove(ending[t]) + + + assert len(edges) == len(set((s, t) for s, t, w in edges)), "edges should be unique" + for s, t, w in edges : + assert 0 <= s < N, "s should be in range" + assert 0 <= t < N, "t should be in range" + assert s != t, "s should not be equal to t" + + + G = networkx.DiGraph() + G.add_weighted_edges_from(edges) + shortest_path_length, shortest_path = networkx.single_source_dijkstra(G, 0, N - 1) + self.parameter["reference_answer_weight"] = shortest_path_length + self.parameter["reference_answer"] = " ".join(map(str, shortest_path)) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(s, t, w) for s, t, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if not answer_array : + return None + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + path = processed_result + for vertex in path : + if not (0 <= vertex < self.parameter["N"]) : # check if vertex is in range + return self.rewards["invalid_solution"] + if not (path[0] == 0 and path[-1] == self.parameter["N"] - 1) : # check if start and end vertices are correct + return self.rewards["invalid_solution"] + + edge2weight = {(s, t) : w for s, t, w in self.parameter["edges"]} + answer_weight = 0 + for s, t in zip(path, path[1 :]) : + if (s, t) not in edge2weight : + return self.rewards["invalid_solution"] + answer_weight += edge2weight[(s, t)] + gold = self.parameter["reference_answer_weight"] + assert 0 < gold <= answer_weight, "answer weight should be greater than or equal to reference" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer_weight) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer_weight) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/shortest_path_count_construction/__init__.py b/server/Gym/environments/shortest_path_count_construction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..51c0b6b1c611c5d0ac2ce6dbbdcf1ff40f1cfdee --- /dev/null +++ b/server/Gym/environments/shortest_path_count_construction/__init__.py @@ -0,0 +1 @@ +from .environment import ShortestPathCountConstruction_Environment diff --git a/server/Gym/environments/shortest_path_count_construction/environment.py b/server/Gym/environments/shortest_path_count_construction/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..24015265fc786dd80a647b727d666b31ce8ec557 --- /dev/null +++ b/server/Gym/environments/shortest_path_count_construction/environment.py @@ -0,0 +1,114 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment +import numpy as np +from typing import Tuple + +class ShortestPathCountConstruction_Environment(VerifiableEnvironment) : # Source: https://codeforces.com/problemset/problem/388/B + prompt_template = \ +r"""Please construct a simple undirected graph with N vertices, such that the number of shortest paths between vertex 1 and vertex 2 is {K}. Since there are multiple valid graphs satisfying the condition, you can output any of them. +{N_constraint} + +Please strictly follow the output format without additional stuff: +1. The first line must contain an integer N. +2. The next N lines each contain a string of length N, representing the adjacency matrix G with N rows and N columns. Each element of the matrix must be 'N' or 'Y'. If Gij is 'Y', then graph G has a edge connecting vertex i and vertex j. Consider the graph vertexes are numbered from 1 to N. The graph must be undirected and simple: Gii = 'N' and Gij = Gji must hold. And there must be at least one path between vertex 1 and vertex 2. +""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, trivial_solution_penalty : float = -0.5, + **kwargs) : + """ + Initialize the ShortestPathCountConstruction instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + "trivial_solution_penalty" : trivial_solution_penalty, + } + + def _generate(self) -> None : + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K >= 3, "MAX_K should be greater than or equal to 3" + + K = self.parameter["K"] = random.randint(3, MAX_K) + + if K >= 12 : + self.parameter["N_constraint"] = min(((len(bin(K)[2 :]) * 3 + 1) + 1) * 2, K + 2) + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + K = self.parameter["K"], + N_constraint = "Please ensure that the number of verticies N is fewer than {}.".format(self.parameter["N_constraint"]) if self.parameter["K"] >= 12 else "Please try your best to avoid constructing a trivial solution with N = {K} + 2 (by just putting {K} intermediate vertices between vertex 1 and vertex 2).".format(K = self.parameter["K"]) + ) + + def _process(self, answer : Optional[str]) -> Optional[Tuple[int, np.ndarray]] : + if answer is not None : + try : + answer = answer.strip() + N = int(answer[:answer.find("\n")]) + answer = answer[answer.find("\n") + 1 :] + assert(sum([1 if c=='\n' else 0 for c in answer]) == N - 1) + answer = answer.splitlines() + adjacency_matrix = np.ndarray((N, N), dtype=int) + for i in range(N) : + assert(len(answer[i]) == N) + for j in range(N) : + # check if the adjacency matrix is valid: ('N' or 'Y') + assert answer[i][j] in ['N', 'Y'] + adjacency_matrix[i, j] = answer[i][j] == 'Y' + # check if the adjacency matrix is valid: (symmetric, no self-loops) + assert(np.all(adjacency_matrix == adjacency_matrix.T)) + assert(np.all(np.diag(adjacency_matrix) == 0)) + return N, adjacency_matrix + except (ValueError, AssertionError) : + return None + else : + return None + + def count_shortest_paths(self, N, adjacency_matrix): + """ + Assume the format is completely correct. + Count the number of shortest paths between vertex 1 and vertex 2 in the given graph. + Use matrix multiplication instead of BFS, since numpy is faster than python for loop. + """ + + start_node_idx = 0 + end_node_idx = 1 + + current_paths_vec = np.zeros(N, dtype=int) + current_paths_vec[start_node_idx] = 1 + + # enumerate the shortest path length + for k in range(1, N): + next_paths_vec = adjacency_matrix @ current_paths_vec + + # check if there is a path to the end node + if next_paths_vec[end_node_idx] > 0: + return next_paths_vec[end_node_idx] + + # update the vector for the next iteration + current_paths_vec = next_paths_vec + + # if the loop ends without finding a path, then there is no path from the start node to the end node + return 0 + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + N, adjacency_matrix = processed_result + if N < 2 : + return self.rewards["wrong_format"] + real_K = int(self.count_shortest_paths(N, adjacency_matrix)) + if self.parameter["K"] >= 12 and N >= self.parameter["N_constraint"] : # a trivial solution is N = K+2, and we try to avoid it by penalizing a big N (when k>=12, 3\lceil\log k\rceil+1 < k+2) + return self.rewards["trivial_solution_penalty"] + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + return self.rewards["rewarding_weight"] * ((min(real_K, self.parameter["K"]) / max(real_K, self.parameter["K"])) ** self.rewards["rewarding_beta"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/shortest_unicolor_substring/__init__.py b/server/Gym/environments/shortest_unicolor_substring/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2341b5c911f1f246e2fc51558bf3c7bf02074f2c --- /dev/null +++ b/server/Gym/environments/shortest_unicolor_substring/__init__.py @@ -0,0 +1 @@ +from .environment import ShortestUnicolorSubstring_Environment diff --git a/server/Gym/environments/shortest_unicolor_substring/environment.py b/server/Gym/environments/shortest_unicolor_substring/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..71d108cd47a5e31802177358b9a508953d862747 --- /dev/null +++ b/server/Gym/environments/shortest_unicolor_substring/environment.py @@ -0,0 +1,136 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class ShortestUnicolorSubstring_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3718 + prompt_template = \ +r"""You are given a binary string (i.e., consisting of only 0s and 1s) S of length {N}: {S} + +Please construct a binary string T of length {N} such that: +- There are at most {K} positions where S[i] ≠ T[i]. +- You try your best to **minimize** the length of the **longest consecutive segment** of the same character in T. + +**Output Format:** A single line containing the string T — a binary string of length {N}.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the ShortestUnicolorSubstring_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + one_probability = random.random() + self.parameter["S"] = S = "".join("1" if random.random() < one_probability else "0" for _ in range(N)) + self.parameter["K"] = K = random.randint(1, N // 2) + + + def compute(): + lamp = list(map(int, S)) + + # Count mismatches to the two possible alternating patterns + # Pattern A: positions 1,3,5... = 'N' (1), positions 2,4,6... = 'F' (0) + # In 0-based index: i%2==0 -> 1, else 0 + s1 = sum(1 for i, v in enumerate(lamp) if v == (1 if i % 2 == 0 else 0)) + # The other pattern requires flipping exactly the opposite set of positions + s2 = N - s1 + + # If we can flip into a perfect alternation, the answer is 1 + if min(s1, s2) <= K: + return 1 + + # Build the lengths of consecutive same-value segments + segments = [] + curr = lamp[0] + length = 1 + for v in lamp[1:]: + if v == curr: + length += 1 + else: + segments.append(length) + curr = v + length = 1 + segments.append(length) + + # Given a candidate maximum run-length x, how many flips are needed? + # For each segment of length L, we need floor(L / (x+1)) flips + def flips_needed(x): + total = 0 + for L in segments: + total += L // (x + 1) + return total + + # Binary search the minimal x in [2..N] such that flips_needed(x) <= K + lo, hi = 2, N + ans = N + while lo <= hi: + mid = (lo + hi) // 2 + if flips_needed(mid) > K: + # too many flips needed, increase x + lo = mid + 1 + else: + # feasible, try smaller + ans = mid + hi = mid - 1 + + return ans + self.parameter["gold_answer"] = compute() + assert self.parameter["gold_answer"] >= 1, "The gold answer should be at least 1" + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], S = self.parameter["S"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + T = processed_result + + if len(T) != self.parameter["N"] : + return self.rewards["wrong_format"] + if any(c not in "01" for c in T) : + return self.rewards["wrong_format"] + + if sum(int(s != t) for s, t in zip(self.parameter["S"], T)) > self.parameter["K"] : + return self.rewards["invalid_solution"] + + now_length, answer, gold = 1, 1, self.parameter["gold_answer"] + for i in range(1, len(T)) : + if T[i] == T[i - 1] : + now_length += 1 + answer = max(answer, now_length) + else : + now_length = 1 + assert gold <= answer, "The answer should not be less than the gold answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * int(answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/singing_girl_story/__init__.py b/server/Gym/environments/singing_girl_story/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..97a1105e6b68c639f971bec9d356973fd7900c49 --- /dev/null +++ b/server/Gym/environments/singing_girl_story/__init__.py @@ -0,0 +1 @@ +from .environment import SingingGirlStory_Environment diff --git a/server/Gym/environments/singing_girl_story/environment.py b/server/Gym/environments/singing_girl_story/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..aa6ff9b46a20f97abc7207c951261981d81fa511 --- /dev/null +++ b/server/Gym/environments/singing_girl_story/environment.py @@ -0,0 +1,189 @@ +import random +from typing import Optional +from bisect import bisect_left +from ...environment import VerifiableEnvironment + + +class SingingGirlStory_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4229 + prompt_template = \ +r"""Consider an array H[1], H[2], ..., H[{N}], where each H[i] is an integer in [1, {A}]. We say max(H[l : r + 1]) denotes the maximum value in the subarray H[l], H[l+1], ..., H[r] (1 ≤ l ≤ r ≤ {N}). How many arrays H satisfy all of the following conditions? +{conditions} + +Output the number of valid arrays modulo {MOD}.""" + MODs = (666623333, 998244353, 10 ** 9 + 7) + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the SingingGirlStory_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 3, "MAX_N_M should be greater than or equal to 3" + + N = self.parameter["N"] = random.randint(3, MAX_N_M) + A = self.parameter["A"] = random.randint(2, N) + H = [random.randint(1, A) for i in range(N)] + M = random.randint(1, MAX_N_M) + + conditions = self.parameter["conditions"] = [] + for _ in range(M) : + length = random.randint(2, N) + start = random.randint(1, N - length + 1) + end = start + length - 1 + conditions.append((start, end, max(H[start - 1 : (end - 1) + 1]))) + assert 1 <= conditions[-1][0] <= conditions[-1][1] <= N, "1 <= l <= r <= N" + assert 1 <= conditions[-1][2] <= A, "max(H[l : r + 1]) should be in [1, A]" + + MOD = self.parameter["MOD"] = random.choice(self.MODs) + + + def calc(val, pts, eves, UNI, Q): + # pts: list of segment indices i (1-based) where mx[i] == val + # eves: list of event indices (1-based) where Q[id]['v'] == val + if not pts: + return 0 + L = len(pts) + # 1-based for convenience; Aindex[0] = 0 as in the C++ code + Aindex = [0] + pts[:] + + # Precompute powers + PPW = [1] * (L + 1) # PPW[0] = 1 is safe + for i in range(1, L + 1): + seg_len = UNI[Aindex[i] + 1] - UNI[Aindex[i]] + PPW[i] = pow(val - 1, seg_len, MOD) + + DP = [0] * (L + 1) + DP[0] = 1 + + for i in range(1, L + 1): + seg_len = UNI[Aindex[i] + 1] - UNI[Aindex[i]] + pw = (pow(val, seg_len, MOD) - pow(val - 1, seg_len, MOD) + MOD) % MOD + mxL = 0 + for eid in eves: + if Q[eid]['r'] <= Aindex[i]: + if Q[eid]['l'] > mxL: + mxL = Q[eid]['l'] + j = i - 1 + while j >= 0 and Aindex[j] >= mxL: + DP[i] = (DP[i] + DP[j] * pw) % MOD + pw = (pw * PPW[j]) % MOD + j -= 1 + + res = 0 + for i in range(0, L + 1): + ok = True + for eid in eves: + if Q[eid]['l'] > Aindex[i]: + ok = False + break + if ok: + pw = 1 + for j in range(i + 1, L + 1): + pw = (pw * PPW[j]) % MOD + res = (res + DP[i] * pw) % MOD + return res + + def solve_one(): + # Read queries + Q = [None] * (M + 1) # 1-based + KEY = [] + ST = set() + for i, (l, r, v) in enumerate(conditions, start = 1): + r += 1 + Q[i] = {'l': l, 'r': r, 'v': v} + KEY.append(l) + KEY.append(r) + ST.add(v) + + # Coordinate compression for boundaries + KEY.sort() + UNI = [None] # 1-based + prev = None + for x in KEY: + if x != prev: + UNI.append(x) + prev = x + NUM = len(UNI) - 1 # number of unique keys + UNI.append(N + 1) # uni[NUM+1] = N+1 + + # Map l, r to indices in UNI[1..NUM] + for i in range(1, M + 1): + lval = Q[i]['l'] + rval = Q[i]['r'] + li = bisect_left(UNI, lval, 1, NUM + 1) + ri = bisect_left(UNI, rval, 1, NUM + 1) + Q[i]['l'] = li + Q[i]['r'] = ri + + # Compute per-segment minimal mx (INF if unconstrained) + INF = A + 1 # computed based on input + MX = [INF] * (NUM + 2) # 1-based up to NUM + for i in range(1, M + 1): + for j in range(Q[i]['l'], Q[i]['r']): + if Q[i]['v'] < MX[j]: + MX[j] = Q[i]['v'] + + # Sum of constrained lengths + total_constrained = 0 + for i in range(1, NUM + 1): + if MX[i] != INF: + total_constrained += (UNI[i + 1] - UNI[i]) + + prd = pow(A, N - total_constrained, MOD) + + # Multiply contributions for each distinct maximum value + for val in ST: + pts = [i for i in range(1, NUM + 1) if MX[i] == val] + eves = [i for i in range(1, M + 1) if Q[i]['v'] == val] + prd = (prd * calc(val, pts, eves, UNI, Q)) % MOD + + return prd + + self.parameter["reference_answer"] = solve_one() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = self.parameter["A"], + conditions = "\n".join("- max(H[{} : {} + 1]) = {}".format(l, r, v) for (l, r, v) in self.parameter["conditions"]), + MOD = self.parameter["MOD"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/single_stack_sorting/__init__.py b/server/Gym/environments/single_stack_sorting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7c0cae9a27d3f36ba94544859d594e03ffe0581e --- /dev/null +++ b/server/Gym/environments/single_stack_sorting/__init__.py @@ -0,0 +1 @@ +from .environment import SingleStackSorting_Environment diff --git a/server/Gym/environments/single_stack_sorting/environment.py b/server/Gym/environments/single_stack_sorting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..327b56ec91eaf86adac895417f3906d66659b641 --- /dev/null +++ b/server/Gym/environments/single_stack_sorting/environment.py @@ -0,0 +1,107 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SingleStackSorting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a queue of integers containing `{N}` elements in increasing order from `0` (at the front) to `{N_minus_1}` (at the back). You also have an empty stack `S` and an initially empty output sequence. You may perform the following operations: +- `a`: Pop the front element of the queue and push it onto the stack `S`. +- `b`: Pop the top element from the stack `S` and append it to the output sequence. + +Please produce the following target output sequence: +{sequence} + +Please output a valid sequence of operations (a string consisting of the characters `a` and `b` only) that transforms the initial queue into the given output sequence using the rules above. + +**Output Format:** A single line containing the sequence of operations (`a` and `b` only), with no spaces or extra characters.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the SingleStackSorting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + operation_distribution = [random.randint(1, N) for _ in range(2)] + operation_distribution = [weight / sum(operation_distribution) for weight in operation_distribution] + + self.parameter["reference_answer"] = "" + + S = [] + output_sequence = self.parameter["output_sequence"] = [] + queue_front = 0 + while len(output_sequence) < N : + operation = random.choices(["a", "b"], weights = operation_distribution, k = 1)[0] + if operation == "a" and queue_front < N : + self.parameter["reference_answer"] += "a" + S.append(queue_front) + queue_front += 1 + elif operation == "b" and S : + self.parameter["reference_answer"] += "b" + output_sequence.append(S.pop()) + assert len(self.parameter["reference_answer"]) == N * 2, "reference_answer should have length 2 * N" + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + sequence = " ".join(map(str, self.parameter["output_sequence"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + S = [] + output_sequence = [] + queue_front = 0 + + for operation in processed_result : + if operation == "a" : + if queue_front >= self.parameter["N"] : + return self.rewards["invalid_solution"] + S.append(queue_front) + queue_front += 1 + elif operation == "b" : + if not S : + return self.rewards["invalid_solution"] + output_sequence.append(S.pop()) + else : + return self.rewards["wrong_format"] + + if len(output_sequence) != self.parameter["N"] : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["output_sequence"], output_sequence)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["output_sequence"] == output_sequence) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/ska_rock_garden/__init__.py b/server/Gym/environments/ska_rock_garden/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..87bd064344d24c0729f30e73ed4b5a747a12b000 --- /dev/null +++ b/server/Gym/environments/ska_rock_garden/__init__.py @@ -0,0 +1 @@ +from .environment import SkaRockGarden_Environment diff --git a/server/Gym/environments/ska_rock_garden/environment.py b/server/Gym/environments/ska_rock_garden/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..1b4e4c1f55665bd132bc0cf69098437729f03d3b --- /dev/null +++ b/server/Gym/environments/ska_rock_garden/environment.py @@ -0,0 +1,176 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SkaRockGarden_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3458 + prompt_template = \ +r"""There are {N} points in a 2D plane, where the i-th point is (X[i], Y[i]) for 0 ≤ i < {N}. Each point has a cost M[i] to swap its coordinates (i.e., swapping (x, y) becomes (y, x)). Your goal is as follows: +- First, minimize the total perimeter of the smallest axis-aligned rectangle that can enclose all points after some of them are optionally swapped. The perimeter is obviously 2 × ((max_x - min_x) + (max_y - min_y)), where max_x and min_x are the maximum and minimum x-coordinates after your swaps (similarly for y). +- If multiple swap strategies result in the same minimum perimeter, choose the one with the smallest total swap cost (i.e., sum of M[i] for all swapped points). + +X, Y, and M are given as follows: +{X_Y_M} + +**Output Format:** Output a single line of {N} characters (no spaces or any other kinds of separators). The i-th character should be: +- `'0'` if you do **NOT** swap point i, +- `'1'` if you **do** swap point i.""" + + def __init__(self, + wrong_format: float = -1.0, + rewarding_strategy_perimeter: str = "(gold/answer)^beta", rewarding_weight_perimeter: float = +0.5, rewarding_beta_perimeter: float = 5.0, + rewarding_strategy_cost: str = "(gold/answer)^beta", rewarding_weight_cost: float = +0.5, rewarding_beta_cost: float = 5.0, + **kwargs): + """ + Initialize the SkaRockGarden_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "rewarding_strategy_perimeter": rewarding_strategy_perimeter, + "rewarding_weight_perimeter": rewarding_weight_perimeter, + "rewarding_beta_perimeter": rewarding_beta_perimeter, + "rewarding_strategy_cost": rewarding_strategy_cost, + "rewarding_weight_cost": rewarding_weight_cost, + "rewarding_beta_cost": rewarding_beta_cost, + } + + + def _generate(self) -> None: + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + X, Y, M = self.parameter["X"], self.parameter["Y"], self.parameter["M"] = [random.randint(0, 2 * N) for _ in range(N)], [random.randint(0, 2 * N) for _ in range(N)], [random.randint(1, N) for _ in range(N)] + + + INF = (max(max(X), max(Y)) + 1) * 2 + lx = INF + rx = -INF + ly = INF + ry = -INF + + # Determine the minimal enclosing rectangle assuming no more swaps + for i in range(N): + x, y = X[i], Y[i] + if x <= y: + if x < lx: lx = x + if x > rx: rx = x + if y < ly: ly = y + if y > ry: ry = y + else: + # these points are effectively swapped + if y < lx: lx = y + if y > rx: rx = y + if x < ly: ly = x + if x > ry: ry = x + + # The minimal fence length (perimeter of axis-aligned rectangle) + fence_length = 2 * ((rx - lx) + (ry - ly)) + + best_weight = sum(M) # Start with the worst case: swap all points + best_assign = None + + def try_bounds(lx0, rx0, ly0, ry0): + """Try using bounds [lx0,rx0] × [ly0,ry0], returning (weight, assignment) + or (None, None) if impossible.""" + total = 0 + assign = [0] * N + for i in range(N): + x, y = X[i], Y[i] + if lx0 <= x <= rx0 and ly0 <= y <= ry0: + # no swap needed + assign[i] = 0 + elif lx0 <= y <= rx0 and ly0 <= x <= ry0: + # swap needed + assign[i] = 1 + total += M[i] + else: + # this point can't fit even if swapped + return None, None + return total, assign + + # Try the 4 possible ways of interpreting the bounding box + for (a, b, c, d) in ( + (lx, rx, ly, ry), + (lx, ry, ly, rx), + (ly, rx, lx, ry), + (ly, ry, lx, rx), + ): + w, assn = try_bounds(a, b, c, d) + if w is not None and w < best_weight: + best_weight = w + best_assign = assn + + # Output results + self.parameter["gold_answer_perimeter"] = fence_length + self.parameter["gold_answer_cost"] = best_weight + self.parameter["reference_answer"] = "".join(map(str, best_assign)) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + X_Y_M = "\n".join("X[{}]={} Y[{}]={} M[{}]={}".format(i, Xi, i, Yi, i, Mi) for i, (Xi, Yi, Mi) in enumerate(zip(self.parameter["X"], self.parameter["Y"], self.parameter["M"]))), + ) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + answer = answer.strip() + return answer + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if len(processed_result) != self.parameter["N"] : + return self.rewards["wrong_format"] + if not all(c in "01" for c in processed_result) : + return self.rewards["wrong_format"] + + X, Y = self.parameter["X"].copy(), self.parameter["Y"].copy() + answer_cost, gold_cost = 0, self.parameter["gold_answer_cost"] + for i, swap in enumerate(processed_result) : + if swap == "1" : + X[i], Y[i] = Y[i], X[i] + answer_cost += self.parameter["M"][i] + elif swap == "0" : + continue + else : + assert False + answer_perimeter, gold_perimeter = 2 * ((max(X) - min(X)) + (max(Y) - min(Y))), self.parameter["gold_answer_perimeter"] + + reward = 0.0 + + assert gold_perimeter <= answer_perimeter, "answer_perimeter should be greater than or equal to gold_perimeter" + if self.rewards["rewarding_strategy_perimeter"] == "(gold/answer)^beta" : + if answer_perimeter == 0 : + assert gold_perimeter == 0, "If answer_perimeter is zero, gold_perimeter should also be zero" + reward += self.rewards["rewarding_weight_perimeter"] * 1.0 + else : + reward += self.rewards["rewarding_weight_perimeter"] * ((gold_perimeter / answer_perimeter) ** self.rewards["rewarding_beta_perimeter"]) + elif self.rewards["rewarding_weight_perimeter"] == "gold=answer" : + reward += self.rewards["rewarding_beta_weight"] * (gold_perimeter == answer_perimeter) + else : + raise NotImplementedError(f"Unknown rewarding strategy: {self.rewards['rewarding_strategy_perimeter']}") + + if gold_perimeter == answer_perimeter : + assert gold_cost <= answer_cost, "answer_cost should be greater than or equal to gold_cost" + if self.rewards["rewarding_strategy_cost"] == "(gold/answer)^beta" : + if answer_cost == 0 : + assert gold_cost == 0, "If answer_cost is zero, gold_cost should also be zero" + reward += self.rewards["rewarding_weight_cost"] * 1.0 + else : + reward += self.rewards["rewarding_weight_cost"] * ((gold_cost / answer_cost) ** self.rewards["rewarding_beta_cost"]) + elif self.rewards["rewarding_strategy_cost"] == "gold=answer" : + reward += self.rewards["rewarding_weight_cost"] * (gold_cost == answer_cost) + else : + raise NotImplementedError(f"Unknown rewarding strategy: {self.rewards['rewarding_strategy_cost']}") + + return reward + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/skyscraper_puzzle/__init__.py b/server/Gym/environments/skyscraper_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..825988d4c1ae440eec61b454b0e3bc107bc26708 --- /dev/null +++ b/server/Gym/environments/skyscraper_puzzle/__init__.py @@ -0,0 +1 @@ +from .environment import SkyscraperPuzzle_Environment diff --git a/server/Gym/environments/skyscraper_puzzle/environment.py b/server/Gym/environments/skyscraper_puzzle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..c407c3a20d7abdd4037bc04ace5db35cba09bdd2 --- /dev/null +++ b/server/Gym/environments/skyscraper_puzzle/environment.py @@ -0,0 +1,123 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class SkyscraperPuzzle_Environment(VerifiableEnvironment): + prompt_template = \ +r"""You are given a {N} × {N} grid. Your task is to place a building of height in the range [0, {N_minus_1}] in each cell such that: +- Each **row** and each **column** contains all integer heights from `0` to `{N_minus_1}` **exactly once**. +- A building is **visible from a direction** if there are no taller buildings before it in that direction. + +The number of visible buildings is specified as follows: +- From the **left** of each row: {left} +- From the **right** of each row: {right} +- From the **top** of each column: {top} +- From the **bottom** of each column: {bottom} + +**Output Format:** Your final answer should contain {N} lines, each with {N} integers (heights), separated by spaces. Each line represents a row of the grid.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SkyscraperPuzzle_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + permutation_row, permutation_col = list(range(N)), list(range(N)) + random.shuffle(permutation_row) + random.shuffle(permutation_col) + + grid = [[(permutation_row[i] + permutation_col[j]) % N for j in range(N)] for i in range(N)] + self.parameter["left"] = [sum(int(grid[i][j] == max(grid[i][: j + 1])) for j in range(N)) for i in range(N)] + self.parameter["right"] = [sum(int(grid[i][j] == max(grid[i][j :])) for j in range(N)) for i in range(N)] + + transposed_grid = [[grid[j][i] for j in range(N)] for i in range(N)] + self.parameter["top"] = [sum(int(transposed_grid[i][j] == max(transposed_grid[i][: j + 1])) for j in range(N)) for i in range(N)] + self.parameter["bottom"] = [sum(int(transposed_grid[i][j] == max(transposed_grid[i][j :])) for j in range(N)) for i in range(N)] + + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, row)) for row in grid) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + left = " ".join(map(str, self.parameter["left"])), + right = " ".join(map(str, self.parameter["right"])), + top = " ".join(map(str, self.parameter["top"])), + bottom = " ".join(map(str, self.parameter["bottom"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + grid = [] + for line in answer.splitlines() : + line = line.strip() + if line : + grid.append(list(map(int, line.split()))) + return grid + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N = self.parameter["N"] + solution = processed_result + + if len(solution) != N : + return self.rewards["wrong_format"] + if not all(len(row) == N for row in solution) : + return self.rewards["wrong_format"] + + if not all(set(row) == set(range(N)) for row in solution) : + return self.rewards["invalid_solution"] + if not all(set(solution[i][j] for i in range(N)) == set(range(N)) for j in range(N)) : + return self.rewards["invalid_solution"] + + left = [sum(int(solution[i][j] == max(solution[i][: j + 1])) for j in range(N)) for i in range(N)] + right = [sum(int(solution[i][j] == max(solution[i][j :])) for j in range(N)) for i in range(N)] + + transposed_solution = [[solution[j][i] for j in range(N)] for i in range(N)] + top = [sum(int(transposed_solution[i][j] == max(transposed_solution[i][: j + 1])) for j in range(N)) for i in range(N)] + bottom = [sum(int(transposed_solution[i][j] == max(transposed_solution[i][j :])) for j in range(N)) for i in range(N)] + + satisfied = sum(int(answer == gold) for answer, gold in zip(left, self.parameter["left"])) + \ + sum(int(answer == gold) for answer, gold in zip(right, self.parameter["right"])) + \ + sum(int(answer == gold) for answer, gold in zip(top, self.parameter["top"])) + \ + sum(int(answer == gold) for answer, gold in zip(bottom, self.parameter["bottom"])) + assert satisfied <= 4 * N, "satisfied should not exceed 4 * N" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / (4 * N)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (satisfied == (4 * N)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/skyscraper_sum_puzzle/__init__.py b/server/Gym/environments/skyscraper_sum_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e59ca7294d4646fc4eb1d562376c612afbebcebd --- /dev/null +++ b/server/Gym/environments/skyscraper_sum_puzzle/__init__.py @@ -0,0 +1 @@ +from .environment import SkyscraperSumPuzzle_Environment diff --git a/server/Gym/environments/skyscraper_sum_puzzle/environment.py b/server/Gym/environments/skyscraper_sum_puzzle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..2f7f5a4e277594b9fd827179ba98ef9a2ae35f13 --- /dev/null +++ b/server/Gym/environments/skyscraper_sum_puzzle/environment.py @@ -0,0 +1,123 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class SkyscraperSumPuzzle_Environment(VerifiableEnvironment): + prompt_template = \ +r"""You are given a {N} × {N} grid. Your task is to place a building of height in the range [0, {N_minus_1}] in each cell such that: +- Each **row** and each **column** contains all integer heights from `0` to `{N_minus_1}` **exactly once**. +- A building is **visible from a direction** if there are no taller buildings before it in that direction. + +The **sum height** of visible buildings is specified as follows: +- From the **left** of each row: {left} +- From the **right** of each row: {right} +- From the **top** of each column: {top} +- From the **bottom** of each column: {bottom} + +**Output Format:** Your final answer should contain {N} lines, each with {N} integers (heights), separated by spaces. Each line represents a row of the grid.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SkyscraperPuzzle_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + permutation_row, permutation_col = list(range(N)), list(range(N)) + random.shuffle(permutation_row) + random.shuffle(permutation_col) + + grid = [[(permutation_row[i] + permutation_col[j]) % N for j in range(N)] for i in range(N)] + self.parameter["left"] = [sum(int(grid[i][j] == max(grid[i][: j + 1])) * grid[i][j] for j in range(N)) for i in range(N)] + self.parameter["right"] = [sum(int(grid[i][j] == max(grid[i][j :])) * grid[i][j] for j in range(N)) for i in range(N)] + + transposed_grid = [[grid[j][i] for j in range(N)] for i in range(N)] + self.parameter["top"] = [sum(int(transposed_grid[i][j] == max(transposed_grid[i][: j + 1])) * transposed_grid[i][j] for j in range(N)) for i in range(N)] + self.parameter["bottom"] = [sum(int(transposed_grid[i][j] == max(transposed_grid[i][j :])) * transposed_grid[i][j] for j in range(N)) for i in range(N)] + + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, row)) for row in grid) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + left = " ".join(map(str, self.parameter["left"])), + right = " ".join(map(str, self.parameter["right"])), + top = " ".join(map(str, self.parameter["top"])), + bottom = " ".join(map(str, self.parameter["bottom"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + grid = [] + for line in answer.splitlines() : + line = line.strip() + if line : + grid.append(list(map(int, line.split()))) + return grid + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N = self.parameter["N"] + solution = processed_result + + if len(solution) != N : + return self.rewards["wrong_format"] + if not all(len(row) == N for row in solution) : + return self.rewards["wrong_format"] + + if not all(set(row) == set(range(N)) for row in solution) : + return self.rewards["invalid_solution"] + if not all(set(solution[i][j] for i in range(N)) == set(range(N)) for j in range(N)) : + return self.rewards["invalid_solution"] + + left = [sum(int(solution[i][j] == max(solution[i][: j + 1])) * solution[i][j] for j in range(N)) for i in range(N)] + right = [sum(int(solution[i][j] == max(solution[i][j :])) * solution[i][j] for j in range(N)) for i in range(N)] + + transposed_solution = [[solution[j][i] for j in range(N)] for i in range(N)] + top = [sum(int(transposed_solution[i][j] == max(transposed_solution[i][: j + 1])) * transposed_solution[i][j] for j in range(N)) for i in range(N)] + bottom = [sum(int(transposed_solution[i][j] == max(transposed_solution[i][j :])) * transposed_solution[i][j] for j in range(N)) for i in range(N)] + + satisfied = sum(int(answer == gold) for answer, gold in zip(left, self.parameter["left"])) + \ + sum(int(answer == gold) for answer, gold in zip(right, self.parameter["right"])) + \ + sum(int(answer == gold) for answer, gold in zip(top, self.parameter["top"])) + \ + sum(int(answer == gold) for answer, gold in zip(bottom, self.parameter["bottom"])) + assert satisfied <= 4 * N, "satisfied should not exceed 4 * N" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / (4 * N)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (satisfied == (4 * N)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sliding_window/__init__.py b/server/Gym/environments/sliding_window/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fca7a4693666caa5589d6ecbca580b88ff3a463f --- /dev/null +++ b/server/Gym/environments/sliding_window/__init__.py @@ -0,0 +1 @@ +from .environment import SlidingWindow_Environment diff --git a/server/Gym/environments/sliding_window/environment.py b/server/Gym/environments/sliding_window/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..467a146781ed44058886d57b4ebab2c1e809a04a --- /dev/null +++ b/server/Gym/environments/sliding_window/environment.py @@ -0,0 +1,94 @@ +import random +from collections import deque +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class SlidingWindow_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1886 + prompt_template = \ +r"""You are given the following list of {N} numbers: {A} +Please find the minimum value in each contiguous subarray of size {K} (there are {N_minus_K_plus_1} such subarrays in total). + +Your final answer should be a single line containing the minimum values (from the leftmost subarray to the rightmost), separated by **spaces**.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the SlidingWindow_Environment instance. + """ + + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + K = self.parameter["K"] = random.randint(2, N - 1) + A = self.parameter["A"] = [random.randint(-(N // 20), +N) for _ in range(N)] + + + min_deque = deque() # will store indices, increasing by a[] + self.parameter["gold_answer"] = mins = [] + + for i in range(N) : + if min_deque and min_deque[0] <= i - K : + min_deque.popleft() + while min_deque and A[min_deque[-1]] > A[i] : + min_deque.pop() + min_deque.append(i) + if i >= K - 1 : + mins.append(A[min_deque[0]]) + + assert len(mins) == N - K + 1, "The length of gold_answer should be N - K + 1" + self.parameter["reference_answer"] = " ".join(map(str, mins)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + K = self.parameter["K"] + return self.prompt_template.format( + N = N, + K = K, + N_minus_K_plus_1 = N - K + 1, + A = " ".join(map(str, self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] - self.parameter["K"] + 1 : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / (self.parameter["N"] - self.parameter["K"] + 1)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/slo_elephants/__init__.py b/server/Gym/environments/slo_elephants/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..de7a32c333f7832a966e657591ec7c4b0c4a7fd7 --- /dev/null +++ b/server/Gym/environments/slo_elephants/__init__.py @@ -0,0 +1 @@ +from .environment import SLOElephants_Environment diff --git a/server/Gym/environments/slo_elephants/environment.py b/server/Gym/environments/slo_elephants/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..33ea6d7c5a80ca19219399f195a2589a62a3e4e7 --- /dev/null +++ b/server/Gym/environments/slo_elephants/environment.py @@ -0,0 +1,146 @@ +import random +from typing import Optional, Tuple +from ...environment import VerifiableEnvironment + + +class SLOElephants_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3482 + prompt_template = \ +r"""There are {N} items labeled from 0 to {N_minus_1}. Each item labeled `i` has an associated cost C[i]. The array C is: {C} +Initially, the items are arranged in the order A (this means the item at position 0 has label A[0], at position 1 has label A[1], etc): {A} +You are required to rearrange the items into the target order B: {B} + +You may perform any number of swaps. Swapping the items labeled `i` and `j` incurs a cost of C[i] + C[j]. Please minimize the total cost of all swaps. +Output multiple lines. Each line should contain two integers `i` and `j`, indicating that you swap the items labeled `i` and `j`. The swaps should be listed in the order they are applied.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the SLOElephants_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + A, B = self.parameter["A"], self.parameter["B"] = list(range(N)), list(range(N)) + while True : + random.shuffle(A) + random.shuffle(B) + if A != B : + break + C = self.parameter["C"] = [random.randint(1, N) for _ in range(N)] + + + # ---------- build permutation on elephant IDs ---------- + # dest_pos[e] = where elephant e must finally stand (index in B) + dest_pos = [0] * N + for idx, e in enumerate(B): + dest_pos[e] = idx + + # next_id[e] = elephant that currently occupies e's final place + next_id = [A[dest_pos[e]] for e in range(N)] + + # ---------- cycle decomposition & cost ---------- + visited = [False] * N + overall_min = min(C) # global lightest elephant + answer = 0 + + for e in range(N): + if visited[e]: + continue + + # traverse the current cycle of elephants + cycle_sum = 0 + cycle_min = 10**9 + length = 0 + x = e + while not visited[x]: + visited[x] = True + m = C[x] + cycle_sum += m + cycle_min = min(cycle_min, m) + length += 1 + x = next_id[x] + + if length <= 1: # already in place → no swaps + continue + + # two ways to reorder a cycle of length L (standard POI trick) + cost_within = cycle_sum + cycle_min * (length - 2) + cost_global = cycle_sum + cycle_min + overall_min * (length + 1) + answer += min(cost_within, cost_global) + + assert answer > 0, "The answer should be greater than 0" + self.parameter["gold_answer"] = answer + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + C = " ".join("C[{}]={}".format(i, Ci) for i, Ci in enumerate(self.parameter["C"])), + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + B = " ".join("B[{}]={}".format(i, Bi) for i, Bi in enumerate(self.parameter["B"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int]] : + if answer is not None : + answer = answer.strip() + try : + swaps = [] + for line in answer.splitlines() : + line = line.strip() + if line : + i, j = map(int, line.split()) + swaps.append((i, j)) + return swaps + except : + return None + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + A = self.parameter["A"].copy() + pos = [None] * self.parameter["N"] + for i, Ai in enumerate(A) : + pos[Ai] = i + + answer, gold = 0, self.parameter["gold_answer"] + for i, j in processed_result : + if not (0 <= i < self.parameter["N"] and 0 <= j < self.parameter["N"] and i != j) : + return self.rewards["invalid_solution"] + answer += self.parameter["C"][i] + self.parameter["C"][j] + A[pos[i]], A[pos[j]] = A[pos[j]], A[pos[i]] + pos[i], pos[j] = pos[j], pos[i] + assert A[pos[i]] == i and A[pos[j]] == j, "After swap, A[{}] should be {} and A[{}] should be {}".format(pos[i], i, pos[j], j) + if A != self.parameter["B"] : + return self.rewards["unsuccessful_solution"] + + assert 0 < gold <= answer, "gold should be less than or equal to answer, but got gold={}, answer={}".format(gold, answer) + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/smallest_binary_multiple/__init__.py b/server/Gym/environments/smallest_binary_multiple/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a44ba5bac524c30ac4da4d461abd25604eb80605 --- /dev/null +++ b/server/Gym/environments/smallest_binary_multiple/__init__.py @@ -0,0 +1 @@ +from .environment import SmallestBinaryMultiple_Environment diff --git a/server/Gym/environments/smallest_binary_multiple/environment.py b/server/Gym/environments/smallest_binary_multiple/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..67148642ca9ad16a792ccaafb14e7ca8e2bf5080 --- /dev/null +++ b/server/Gym/environments/smallest_binary_multiple/environment.py @@ -0,0 +1,103 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SmallestBinaryMultiple_Environment(VerifiableEnvironment) : # https://www.luogu.com.cn/problem/P2841 + prompt_template = r"""Find the **smallest positive integer** B such that the product {A} × B contains **only digits `0` and `1`** in its decimal representation. Output the value of B.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_beta : float = 5.0, rewarding_weight : float = 1.0, + **kwargs) : + """ + Initialize the SmallestBinaryMultiple_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_answer": invalid_answer, + "rewarding_strategy": rewarding_strategy, + "rewarding_beta": rewarding_beta, + "rewarding_weight": rewarding_weight, + } + + + def _generate(self) -> None : + assert "MAX_A" in self.parameter, "MAX_A is required in parameter" + MAX_A = self.parameter["MAX_A"] + assert MAX_A >= 2, "MAX_A should be greater than or equal to 2" + + A = self.parameter["A"] = random.randint(2, MAX_A) + + + def solve() : + dp = {0: 0} + + cur_value = 1 # 10^k (a single '1' at the current digit position) + cur_mod = 1 % A # (10^k) mod A + + while True: + # store new states to avoid modifying dp during iteration + new_states = [] + + for remainder, value in dp.items(): + candidate = value + cur_value # turn the current digit from 0 to 1 + new_remainder = (remainder + cur_mod) % A + + if new_remainder == 0: + # candidate is the first multiple of A that uses only 0/1 digits + B = candidate // A + return B + + if new_remainder not in dp: # first time we see this remainder + new_states.append((new_remainder, candidate)) + + # add the freshly discovered states + for r, v in new_states: + dp[r] = v + + # move to the next more-significant digit + cur_value *= 10 + cur_mod = (cur_mod * 10) % A + self.parameter["reference_answer"] = solve() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(A = self.parameter["A"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + AB = self.parameter["A"] * processed_result + while AB : + if AB % 10 not in (0, 1) : + return self.rewards["invalid_answer"] + AB //= 10 + + assert self.parameter["reference_answer"] <= processed_result, "Reference answer should be less than or equal to the processed result" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((self.parameter["reference_answer"] / processed_result) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + assert self.parameter["reference_answer"] != processed_result + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/smallest_circle/__init__.py b/server/Gym/environments/smallest_circle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8fdeb6b204bed20c2172e95e43351642b160b990 --- /dev/null +++ b/server/Gym/environments/smallest_circle/__init__.py @@ -0,0 +1 @@ +from .environment import SmallestCircle_Environment diff --git a/server/Gym/environments/smallest_circle/environment.py b/server/Gym/environments/smallest_circle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..156cf696a869793e85865e3d597012360c4e7193 --- /dev/null +++ b/server/Gym/environments/smallest_circle/environment.py @@ -0,0 +1,185 @@ +import random +from math import sqrt +from typing import Optional +from ...environment import VerifiableEnvironment + +def distance(p1, p2): + return sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) + +def circle_from_two_points(p1, p2): + center_x = (p1[0] + p2[0]) / 2 + center_y = (p1[1] + p2[1]) / 2 + radius = distance(p1, p2) / 2 + return (center_x, center_y), radius + +def circle_from_three_points(p1, p2, p3): + a1 = p2[0] - p1[0] + b1 = p2[1] - p1[1] + c1 = (a1 * a1 + b1 * b1) / 2 + a2 = p3[0] - p1[0] + b2 = p3[1] - p1[1] + c2 = (a2 * a2 + b2 * b2) / 2 + d = a1 * b2 - a2 * b1 + center_x = p1[0] + (c1 * b2 - c2 * b1) / d + center_y = p1[1] + (a1 * c2 - a2 * c1) / d + radius = distance((center_x, center_y), p1) + return (center_x, center_y), radius + +class SmallestCircle_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a set of {N} points on a 2D plane. +It is guaranteed that: +(1) all the coordinates are integers; +(2) no two points have the same coordinates; +(3) no three points are on the same line. +Below is the set of points: +{points} + +Your task is to find the **smallest circle** covering these points, measured by the radius of the circle. +Your score will be based on the feasibility of your output and the optimality of the radius. +The precision tolerance is 0.001. + +**Output Format:** Your output should be three **floats** in a single line, $x$, $y$, and $r$, separated by spaces. +$x$ and $y$ represent the center of the circle, and $r$ represents the radius of the circle.""" + epsilon = 1E-3 + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = 0.0, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the Smallest Circle problem. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None: + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + self.parameter["points"] = set() + lines = set() + for i in range(N): + while True: + x = random.randint(0, 2 * N) + y = random.randint(0, 2 * N) + if (x, y) in self.parameter["points"]: + continue + + coline = False + new_lines = set() + for (px, py) in self.parameter["points"]: + if px == x: + a, b, c = 1, 0, -x + else: + a, b = py - y, x - px + c = -(a * x + b * y) + + def gcd(a, b): + while b: + a, b = b, a % b + return a + + g = gcd(abs(a), gcd(abs(b), abs(c))) + a, b, c = a // g, b // g, c // g + + if a < 0: + a, b, c = -a, -b, -c + elif a == 0 and b < 0: + b, c = -b, -c + + if (a, b, c) in lines: + coline = True + break + + new_lines.add((a, b, c)) + + if coline: + continue + + self.parameter["points"].add((x, y)) + lines.update(new_lines) + break + + self.parameter["points"] = list(self.parameter["points"]) + + # use the randomized algorithm to find the smallest circle + random.shuffle(self.parameter["points"]) + c = self.parameter["points"][0] + r = 0.0 + for i in range(1, N): + if distance(self.parameter["points"][i], c) < r + self.epsilon: + continue + + c = self.parameter["points"][i] + r = 0.0 + for j in range(i): + if distance(self.parameter["points"][j], c) < r + self.epsilon: + continue + + c, r = circle_from_two_points( + self.parameter["points"][i], + self.parameter["points"][j], + ) + for k in range(j): + if distance(self.parameter["points"][k], c) < r + self.epsilon: + continue + + c, r = circle_from_three_points( + self.parameter["points"][i], + self.parameter["points"][j], + self.parameter["points"][k], + ) + + self.parameter["reference_answer"] = "{} {} {}".format(c[0], c[1], r) + self.parameter["gold_answer"] = r + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + self.passing_reward_threshold = self.rewards["rewarding_weight"] * ((r / (r + self.epsilon)) ** self.rewards["rewarding_beta"]) + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + points = "\n".join("({}, {})".format(x, y) for x, y in self.parameter["points"]), + ) + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + x, y, r = map(float, answer.split()) + return (x, y, r) + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + x, y, r = processed_result + if r <= 0: + return self.rewards["wrong_format"] + + if any(distance((x, y), p) > r + self.epsilon for p in self.parameter["points"]): + return self.rewards["invalid_solution"] + + opt_r = self.parameter["gold_answer"] + assert r >= opt_r - 2 * self.epsilon, "The radius of the output circle should be at least as large as the optimal radius." + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * min(((opt_r / r) ** self.rewards["rewarding_beta"]), 1.0) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (abs(r - opt_r) < self.epsilon) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sorting/__init__.py b/server/Gym/environments/sorting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a2d43f4cc929d7b4cf41bcb225ca1495f2d4c8d5 --- /dev/null +++ b/server/Gym/environments/sorting/__init__.py @@ -0,0 +1 @@ +from .environment import Sorting_Environment diff --git a/server/Gym/environments/sorting/environment.py b/server/Gym/environments/sorting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e7e18a527ffcaa24887a8d54ead675074849549d --- /dev/null +++ b/server/Gym/environments/sorting/environment.py @@ -0,0 +1,76 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Sorting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given the following list of numbers: +{} +Please sort them in **ascending order**. + +Your final answer should be a single line containing the sorted numbers, separated by **spaces**. +For example: `1 2 3 4 5` (do **NOT** include the backticks or quotes).""" + + def __init__(self, + weight_multiple : int = 5, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the Sorting_Environment instance. + """ + super().__init__(**kwargs) + + self.weight_multiple = weight_multiple + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + self.parameter["array"] = [random.randint(0, N * self.weight_multiple) for _ in range(N)] + assert len(self.parameter["array"]) == self.parameter["N"], "array should have the same length as N" + self.parameter["gold_answer"] = sorted(self.parameter["array"]) + assert len(self.parameter["gold_answer"]) == self.parameter["N"], "gold_answer should have the same length as N" + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["gold_answer"])) + + def _prompt_generate(self) -> str : + return self.prompt_template.format(" ".join(map(str, self.parameter["array"]))) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/spiral_matrix/__init__.py b/server/Gym/environments/spiral_matrix/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9b16c30e134fd08fdeb396d636a0763f333e96ee --- /dev/null +++ b/server/Gym/environments/spiral_matrix/__init__.py @@ -0,0 +1 @@ +from .environment import SpiralMatrix_Environment diff --git a/server/Gym/environments/spiral_matrix/environment.py b/server/Gym/environments/spiral_matrix/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..a3eda6ec0f59fc72324be64a6434b305a9a9433f --- /dev/null +++ b/server/Gym/environments/spiral_matrix/environment.py @@ -0,0 +1,140 @@ +from ...environment import VerifiableEnvironment +from typing import Optional, List +import random + + +class SpiralMatrix_Environment(VerifiableEnvironment): + prompt_template = \ +r"""You are given a 2D integer matrix of size {M} x {N}: +{matrix} + +Return all elements of the matrix in a clockwise spiral order, starting from the top-left corner. More precisely: +- Start from the top-left corner and move right until you reach the right edge. +- Then, move down until you reach the bottom-right corner. +- Then, move left until you reach the bottom-left corner. +- Then, move up until you reach the top-right corner. +- Continue this inward spiral traversal until all elements have been visited exactly once. + +**Output Format:** +Your final answer should be a single line of {MN} integers separated by **spaces**. + +--- + +**Example 1** + +You are given an integer matrix of size 3 x 3: +1 2 3 +4 5 6 +7 8 9 + +The output is (do **NOT** include backticks or quotes — use the format below exactly): +``` +1 2 3 6 9 8 7 4 5 +``` + +**Example 2** + +You are given an integer matrix of size 3 x 4: +1 2 3 4 +5 6 7 8 +9 10 11 12 + +The output is (do **NOT** include backticks or quotes — use the format below exactly): +``` +1 2 3 4 8 12 11 10 9 5 6 7 +``` +--- +""" + + def __init__( + self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs + ): + """ + Initialize the SpiralMatrixProblem instance. + """ + super().__init__(**kwargs) + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + def _generate(self) -> None: + assert "MAX_M_N" in self.parameter, "MAX_M_N is required in parameter" + MAX_M_N = self.parameter["MAX_M_N"] + self.parameter["M"] = M = random.randint(2, MAX_M_N) + self.parameter["N"] = N = random.randint(2, MAX_M_N) + + self.matrix = [[random.randint(1, M * N) for _ in range(N)] for _ in range(M)] + self.parameter["matrix"] = self.matrix + self.parameter["gold_answer"] = self._compute_spiral(self.matrix) + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["gold_answer"])) + + def _prompt_generate(self) -> str: + return self.prompt_template.format( + M=self.parameter["M"], + N=self.parameter["N"], + MN=self.parameter["M"] * self.parameter["N"], + matrix="\n".join(" ".join(map(str, row)) for row in self.parameter["matrix"]), + ) + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def _compute_spiral(self, matrix: List[List[int]]) -> List[int]: + res = [] + if not matrix: + return res + + top, bottom = 0, len(matrix) - 1 + left, right = 0, len(matrix[0]) - 1 + + while top <= bottom and left <= right: + for i in range(left, right + 1): + res.append(matrix[top][i]) + top += 1 + + for i in range(top, bottom + 1): + res.append(matrix[i][right]) + right -= 1 + + if top <= bottom: + for i in range(right, left - 1, -1): + res.append(matrix[bottom][i]) + bottom -= 1 + + if left <= right: + for i in range(bottom, top - 1, -1): + res.append(matrix[i][left]) + left += 1 + + return res + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["M"] * self.parameter["N"] : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / (self.parameter["M"] * self.parameter["N"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/splitting_game/__init__.py b/server/Gym/environments/splitting_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..02c01118fd5b2e3b2f84782a7381ec8090e6dbe7 --- /dev/null +++ b/server/Gym/environments/splitting_game/__init__.py @@ -0,0 +1 @@ +from .environment import SplittingGame_Environment diff --git a/server/Gym/environments/splitting_game/environment.py b/server/Gym/environments/splitting_game/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..bbcb212b849ef91284741efe7b506c8a724631de --- /dev/null +++ b/server/Gym/environments/splitting_game/environment.py @@ -0,0 +1,116 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SplittingGame_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3185 + prompt_template = \ +r"""There are {N} bottles of beans, indexed from 0 to {N_minus_1}. Initially, the i-th bottle contains P[i] beans. The array P is given as: +{P} + +Alice and Bob play a game with the following rules: +- Alice goes first. They take turns alternately. +- On each turn, a player must choose three indices i, j, k (0 ≤ i < j ≤ k < {N}) such that the i-th bottle contains at least one bean. The player then removes one bean from bottle i, adds one bean to bottle j, and adds one bean to bottle k. (If j = k, it means adding two beans to bottle j.) +- The game ends when a player cannot make a move. The player who cannot move loses the game. + +Assuming both players play optimally, who will win the game? Output a single line containing either `Alice` or `Bob` (do NOT include quotes or backticks).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the SplittingGame_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + self.parameter["reference_answer"] = "Alice" if random.random() < 0.5 else "Bob" + + + def mex(s): + m = 0 + while m in s: + m += 1 + return m + + # Precompute Sprague-Grundy values for reversed positions 0..N-1 + SG = [0] * N + for r in range(1, N): + reachable = set() + for j in range(r): + for k in range(j + 1): + reachable.add(SG[j] ^ SG[k]) + SG[r] = mex(reachable) + + while True : + p = self.parameter["P"] = [random.randint(0, 2 * N) for _ in range(N)] + + def get_answer() : + ans = 0 + # Compute nim-sum based on parity of beans + for i in range(N): + if p[i] & 1: + r = N - 1 - i + ans ^= SG[r] + + # If zero nim-sum, losing position + if ans == 0: + return "Bob" + + # Enumerate all valid moves i < j <= k with at least one bean at i + for i in range(N): + if p[i] == 0: + continue + for j in range(i + 1, N): + for k in range(j, N): + r_i = N - 1 - i + r_j = N - 1 - j + r_k = N - 1 - k + # Check if this move leads to zero nim-sum + if ans ^ SG[r_i] ^ SG[r_j] ^ SG[r_k] == 0: + return "Alice" + return "Bob" + + if get_answer() == self.parameter["reference_answer"] : + break + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + P = " ".join("P[{}]={}".format(i, Pi) for i, Pi in enumerate(self.parameter["P"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result not in ("Alice", "Bob") : + return self.rewards["invalid_answer"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/spy_network/__init__.py b/server/Gym/environments/spy_network/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..55e3f648830643b8e5da3ec59434163b4b7ae437 --- /dev/null +++ b/server/Gym/environments/spy_network/__init__.py @@ -0,0 +1 @@ +from .environment import SpyNetwork_Environment diff --git a/server/Gym/environments/spy_network/environment.py b/server/Gym/environments/spy_network/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..c6bfa2b3170404c555be6af502b2b173a0bd0fdb --- /dev/null +++ b/server/Gym/environments/spy_network/environment.py @@ -0,0 +1,200 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class SpyNetwork_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1262 + prompt_template = \ +r"""You are given a **directed graph** with {N} vertices, labeled from 0 to {N_minus_1}. + +The graph contains the following directed edges. Each edge is represented as a tuple (s, t), meaning there is a directed edge **from vertex s to vertex t**: +{edges} + +Each vertex i has an associated cost c[i], given as follows: +{costs} + +Your task is to select a subset of vertices s_1, s_2, ..., s_k such that: +- Every vertex in the graph is reachable (i.e., there exists a path ending at that vertex) starting from at least one of the selected vertices. +- Your goal is to **minimize** the total cost of the selected vertices: c[s_1] + c[s_2] + ... + c[s_k]. + +**Output Format:** +Your final answer should be a single line containing the selected vertices: s_1, s_2, ..., s_k, separated by **spaces**. +Example: `0 1 {N_minus_1}` (do **NOT** include the backticks or quotes); this means the selected vertices are 0, 1, and {N_minus_1}, and the total cost is c[0] + c[1] + c[{N_minus_1}] = {c_0} + {c_1} + {c_N_minus_1} = {example_cost}. +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.3, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the SpyNetwork_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + assert "dominated_probability" in self.parameter, "dominated_probability is required in parameter" + dominated_probability = self.parameter["dominated_probability"] + + dominated = [random.random() < dominated_probability for vertex in range(N)] + all_edges = [(s, t) for s in range(N) for t in range(N) if s != t and (dominated[s] == False or dominated[t] == True)] + edges = self.parameter["edges"] = random.sample(all_edges, min(len(all_edges), int(edge_density * N * (N - 1)))) + random.shuffle(edges) + + assert len(edges) == len(set(edges)), "edges should be unique" + for s, t in edges : + assert 0 <= s < N, "s should be in range" + assert 0 <= t < N, "t should be in range" + assert s != t, "s should not be equal to t" + + costs = self.parameter["costs"] = [random.randint(1, N) for vertex in range(N)] + + + adj = [[] for _ in range(N)] + for s, t in edges : + adj[s].append(t) + + scc_id = [0] * N + pre = [0] * N + low = [0] * N + stack = [] + in_stack = [False] * N + + scc_count = 0 + dfs_clock = 0 + + def tarjan(u) : + nonlocal dfs_clock, scc_count + dfs_clock += 1 + pre[u] = dfs_clock + low[u] = dfs_clock + stack.append(u) + in_stack[u] = True + + for v in adj[u] : + if pre[v] == 0 : + tarjan(v) + low[u] = min(low[u], low[v]) + elif in_stack[v] : + low[u] = min(low[u], pre[v]) + + if low[u] == pre[u] : + while True : + x = stack.pop() + in_stack[x] = False + scc_id[x] = scc_count + if x == u: + break + scc_count += 1 + + for i in range(N) : + if pre[i] == 0 : + tarjan(i) + + scc_in_degree = [False] * scc_count + for u in range(N) : + for v in adj[u] : + if scc_id[u] != scc_id[v] : + scc_in_degree[scc_id[v]] = True + + min_costs = [None] * scc_count + min_vertices = [None] * scc_count + for i, _cost in enumerate(costs) : + if _cost is None : + continue + s_id = scc_id[i] + if min_costs[s_id] is None or _cost < min_costs[s_id] : + min_costs[s_id] = _cost + min_vertices[s_id] = i + + self.parameter["reference_answer"] = [min_vertices[s] for s in range(scc_count) if not scc_in_degree[s]] + self.parameter["gold_answer"] = sum(costs[vertex] for vertex in self.parameter["reference_answer"]) + assert self.parameter["gold_answer"] == sum(min_costs[s] for s in range(scc_count) if not scc_in_degree[s]) + assert self.parameter["gold_answer"] > 0, "gold_answer should be greater than 0" + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + costs = self.parameter["costs"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(s, t) for s, t in self.parameter["edges"]), + costs = "\n".join("c[{}]={}".format(i, costs[i]) for i in range(N)), + c_0 = costs[0], + c_1 = costs[1], + c_N_minus_1 = costs[N - 1], + example_cost = costs[0] + costs[1] + costs[N - 1], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + selected_vertices = processed_result + + adj = [[] for _ in range(self.parameter["N"])] + for s, t in self.parameter["edges"] : + adj[s].append(t) + + visited = [False] * self.parameter["N"] + def DFS(vertex) : + if visited[vertex] : + return + visited[vertex] = True + for neighbor in adj[vertex] : + DFS(neighbor) + + if len(selected_vertices) != len(set(selected_vertices)) : + return self.rewards["invalid_solution"] + + answer = 0 + for vertex in selected_vertices : + if not (0 <= vertex < self.parameter["N"]) : + return self.rewards["invalid_solution"] + DFS(vertex) + answer += self.parameter["costs"][vertex] + + if not all(visited) : + return self.rewards["unsuccessful_solution"] + + gold = self.parameter["gold_answer"] + assert gold <= answer + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/squ_squarks/__init__.py b/server/Gym/environments/squ_squarks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b9071471fad7e952c4216b2617a9177f4c28d763 --- /dev/null +++ b/server/Gym/environments/squ_squarks/__init__.py @@ -0,0 +1 @@ +from .environment import SquSquarks_Environment diff --git a/server/Gym/environments/squ_squarks/environment.py b/server/Gym/environments/squ_squarks/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..31a573dbbdc5135733932e886a6bb326d604425c --- /dev/null +++ b/server/Gym/environments/squ_squarks/environment.py @@ -0,0 +1,99 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class SquSquarks_Environment(VerifiableEnvironment): # Source: https://www.luogu.com.cn/problem/P3194 + prompt_template = \ +r"""Please find {N} **distinct positive integers** such that the sums of all {N} * ({N} - 1) / 2 distinct pairs among them (in any order) are exactly: {sums} +Output these {N} integers, separated by spaces.""" + + def __init__(self, + number_multiple : int = 2, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(intersection/union)^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the SquSquarks_Environment instance. + """ + super().__init__(**kwargs) + + self.number_multiple = number_multiple + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_beta": rewarding_beta, + "rewarding_weight": rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + numbers = random.sample(range(1, N * self.number_multiple + 1), N) + self.parameter["reference_answer"] = " ".join(map(str, numbers)) + + sums = self.parameter["sums"] = [] + for i, Xi in enumerate(numbers) : + for Xj in numbers[i + 1 :] : + sums.append(Xi + Xj) + assert len(sums) == N * (N - 1) // 2, "sums should have exactly N * (N - 1) / 2 elements" + random.shuffle(sums) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + sums = ", ".join(map(str, self.parameter["sums"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : # N integers + return self.rewards["invalid_solution"] + if len(set(processed_result)) != self.parameter["N"] : # distinct + return self.rewards["invalid_solution"] + if not all (x >= 1 for x in processed_result) : # positive integers + return self.rewards["invalid_solution"] + + intersection, union = 0, 0 + gold_basket = {} + for s in self.parameter["sums"] : + gold_basket[s] = gold_basket.get(s, 0) + 1 + union += 1 + for i, Xi in enumerate(processed_result) : + for Xj in processed_result[i + 1 :] : + s = Xi + Xj + if gold_basket.get(s, 0) > 0 : + gold_basket[s] -= 1 + intersection += 1 + else : + union += 1 + assert intersection <= union, "intersection should not exceed union" + + if self.rewards["rewarding_strategy"] == "(intersection/union)^beta" : + return ((intersection / union) ** self.rewards["rewarding_beta"]) * self.rewards["rewarding_weight"] + elif self.rewards["rewarding_strategy"] == "intersection=union" : + return self.rewards["rewarding_weight"] * (intersection == union) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/square_undamaged_point_counting/__init__.py b/server/Gym/environments/square_undamaged_point_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4db60ff44fc1151d20820d7cbcce38cbbbcf94ee --- /dev/null +++ b/server/Gym/environments/square_undamaged_point_counting/__init__.py @@ -0,0 +1 @@ +from .environment import SquareUndamagedPointCounting_Environment diff --git a/server/Gym/environments/square_undamaged_point_counting/environment.py b/server/Gym/environments/square_undamaged_point_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..bba6549e3bc4a51efb064d5dddb6480cf875610a --- /dev/null +++ b/server/Gym/environments/square_undamaged_point_counting/environment.py @@ -0,0 +1,159 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SquareUndamagedPointCounting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Please count the number of distinct squares (not necessarily axis-aligned) such that: +- All four vertices are integer coordinate points with 0 ≤ x ≤ {N} and 0 ≤ y ≤ {M}. +- None of the four vertices is among the damaged points. The list of damaged points is given as follows: {damaged_points}""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SquareUndamagedPointCounting problem. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(1, MAX_N_M), random.randint(1, MAX_N_M) + damaged_points = self.parameter["damaged_points"] = random.sample([(x, y) for x in range(N + 1) for y in range(M + 1)], random.randint(1, min(N * M, MAX_N_M))) + + + pts = damaged_points.copy() # copy to avoid modifying the original list + pts.sort() # sort exactly as in the C++ code + + # compress each (x, y) to a single integer id = x*(M+1)+y for O(1) lookup + deleted = {x * (M + 1) + y for (x, y) in pts} + get_id = lambda x, y: x * (M + 1) + y + + # ---------- cnt0 : total number of squares in a complete grid ---------- + # cnt0 = Σ_{size = 1..min(N,M)} (N - size + 1)*(M - size + 1)*size + limit = min(N, M) + cnt0 = 0 + for s in range(1, limit + 1): + cnt0 += (N - s + 1) * (M - s + 1) * s + + # ---------- cnt1 : squares counted by at least one deleted vertex ---------- + def add_lgh(lim: int, len1: int, len2: int) -> int: + """exactly matches the lgh lambda in the C++ code""" + res = lim * (lim + 3) // 2 + if lim > len1: + d = lim - len1 + res -= d * (d + 1) // 2 + if lim > len2: + d = lim - len2 + res -= d * (d + 1) // 2 + return res + + cnt1 = 0 + for x, y in pts: + u, d = x, N - x # up / down steps we can take + l, r = y, M - y # left / right steps we can take + cnt1 += add_lgh(min(M, u), l, r) + cnt1 += add_lgh(min(M, d), l, r) + cnt1 += add_lgh(min(N, l), u, d) + cnt1 += add_lgh(min(N, r), u, d) + cnt1 -= min(l, u) + cnt1 -= min(u, r) + cnt1 -= min(r, d) + cnt1 -= min(d, l) + + # ---------- cnt2 / cnt3 / cnt4 : inclusion–exclusion on pairs ---------- + cnt2 = cnt3 = cnt4 = 0 + Klen = len(pts) + + def inside(x: int, y: int) -> bool: + return 0 <= x <= N and 0 <= y <= M + + def process(x3: int, y3: int, x4: int, y4: int) -> None: + """one candidate square determined by the current pair of points""" + nonlocal cnt2, cnt3, cnt4 + if not (inside(x3, y3) and inside(x4, y4)): + return + t1 = get_id(x3, y3) in deleted + t2 = get_id(x4, y4) in deleted + cnt2 += 1 + if t1: cnt3 += 1 + if t2: cnt3 += 1 + if t1 and t2: cnt4 += 1 + + for i in range(Klen): + x1, y1 = pts[i] + for j in range(i + 1, Klen): + x2, y2 = pts[j] + + # the two orientations where (x1,y1)–(x2,y2) is a side + process(x1 - (y2 - y1), y1 + (x2 - x1), + x2 - (y2 - y1), y2 + (x2 - x1)) + process(x1 + (y2 - y1), y1 - (x2 - x1), + x2 + (y2 - y1), y2 - (x2 - x1)) + + # orientation where they are the diagonal + a = (x2 - x1) + (y2 - y1) + b = (x2 - x1) - (y2 - y1) + if (a & 1) or (b & 1): # both must be even + continue + a //= 2 + b //= 2 + process(x1 + b, y1 + a, x2 - b, y2 - a) + + # correct over-counting (each square appears C(3,1)=3 or C(4,2)=6 times) + cnt3 //= 3 + cnt4 //= 6 + + # ---------- final inclusion–exclusion ---------- + self.parameter["reference_answer"] = cnt0 - cnt1 + cnt2 - cnt3 + cnt4 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + damaged_points = ", ".join("({}, {})".format(x, y) for x, y in self.parameter["damaged_points"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if processed_result == 0 : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/star_battle/__init__.py b/server/Gym/environments/star_battle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6cd83e86d452d9c0b6e9e9614611ed48850f5d40 --- /dev/null +++ b/server/Gym/environments/star_battle/__init__.py @@ -0,0 +1 @@ +from .environment import StarBattle_Environment diff --git a/server/Gym/environments/star_battle/environment.py b/server/Gym/environments/star_battle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..924fdac7f88916843bd7791f89aef46a8dffe8b3 --- /dev/null +++ b/server/Gym/environments/star_battle/environment.py @@ -0,0 +1,124 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class StarBattle_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} grid. Each cell contains either `X` or `.`. Please select some `.` cells to fill with `*` such that: +1. Each **row** contains **exactly one** `*`. +2. Each **column** contains **no more than one** `*`. +3. No two `*` cells are adjacent (including diagonals — i.e., no two `*`s share an 8-neighbor relationship). + +The grid is given in **row-major order**, with each row represented as a string of `X` and `.`: +{grid} + +**Output Format:** Output {N} lines, each containing {M} characters. Each character should be `X`, `.`, or `*`. The output must match the format of the input (i.e., one row per line, no separators), indicating the final state of the grid after placing the `*` cells.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, wrong_solution : float = 0.0, correct_solution : float = 1.0, + **kwargs) : + """ + Initialize the StarBattle_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "wrong_solution" : wrong_solution, + "correct_solution" : correct_solution, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 3, "MAX_N_M should be greater than or equal to 3" + + while True : + N = self.parameter["N"] = random.randint(2, MAX_N_M) + M = self.parameter["M"] = random.randint(max(3, N), MAX_N_M) + self.parameter["grid"] = grid = [["."] * M for _ in range(N)] + permutation = random.sample(range(M), N) + if any(abs(a - b) <= 1 for a, b in zip(permutation, permutation[1 :])) : + continue + for row, col in enumerate(permutation) : + grid[row][col] = "*" + break + + assert "sparsity" in self.parameter, "sparsity is required in parameter" + sparsity = self.parameter["sparsity"] + assert 0 < sparsity < 1, "sparsity should be between 0 and 1" + empty_cells = [(i, j) for i in range(N) for j in range(M) if grid[i][j] == "."] + for i, j in random.sample(empty_cells, max(1, int(len(empty_cells) * sparsity))) : + grid[i][j] = "X" + self.parameter["reference_answer"] = "\n".join("".join(row) for row in grid) + + for i in range(N) : + for j in range(M) : + if grid[i][j] == "*" : + grid[i][j] = "." + assert grid[i][j] in "X.", "grid should only contain 'X' or '.'" + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + grid = "\n".join("".join(row) for row in self.parameter["grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + grid = [] + for line in answer.splitlines() : + line = line.strip() + if line : + grid.append(line.strip()) + return grid + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N, M = self.parameter["N"], self.parameter["M"] + solution = processed_result + + if len(solution) != N or any(len(row) != M for row in solution) : + return self.rewards["wrong_format"] + if not all(c in "X.*" for row in solution for c in row) : + return self.rewards["wrong_format"] + + for row, original_row in zip(solution, self.parameter["grid"]) : + for cell, original_cell in zip(row, original_row) : + if original_cell == "X" and cell != "X" : + return self.rewards["invalid_solution"] + if original_cell == "." and cell not in ".*" : + return self.rewards["invalid_solution"] + + if any(row.count("*") != 1 for row in solution) : + return self.rewards["wrong_solution"] + if any(col.count("*") > 1 for col in zip(*solution)) : + return self.rewards["wrong_solution"] + + for i in range(N) : + for j in range(M) : + if solution[i][j] == "*" : + for di in (-1, 0, +1) : + for dj in (-1, 0, +1) : + if (di != 0 or dj != 0) and 0 <= i + di < N and 0 <= j + dj < M : + if solution[i + di][j + dj] == "*" : + return self.rewards["wrong_solution"] + return self.rewards["correct_solution"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/stirling_second/__init__.py b/server/Gym/environments/stirling_second/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d0124be1ad20fc8b4c0a4ea2c1140e7d599a5eff --- /dev/null +++ b/server/Gym/environments/stirling_second/__init__.py @@ -0,0 +1 @@ +from .environment import StirlingSecond_Environment diff --git a/server/Gym/environments/stirling_second/environment.py b/server/Gym/environments/stirling_second/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f82c2a9a8d8071ba5e2554523ed19caa96c31b83 --- /dev/null +++ b/server/Gym/environments/stirling_second/environment.py @@ -0,0 +1,78 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class StirlingSecond_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1287 + prompt_template = \ +r"""There are {R} distinct boxes and {N} distinct balls. Count the number of ways to place all {N} balls into the boxes such that **no box is empty**. Two arrangements are different if **at least one ball** is placed into a different box. Output the result modulo {MOD}.""" + MOD = 10**9 + 7 + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the StirlingSecond_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 2, "MAX_N should be greater than or equal to 2" + + assert "MAX_R" in self.parameter, "MAX_R is required in parameter" + MAX_R = self.parameter["MAX_R"] + assert MAX_R >= 2, "MAX_R should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N) + R = self.parameter["R"] = random.randint(2, min(N, MAX_R)) + MOD = self.MOD + + ans = 0 + c = 1 + for k in range(R) : + term = c * pow(R - k, N, MOD) % MOD + if k & 1 : + ans = ((ans - term) % MOD + MOD) % MOD + else : + ans = (ans + term) % MOD + c = c * (R - k) // (k + 1) + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], R = self.parameter["R"], MOD = self.MOD) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.MOD) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/stone_game/__init__.py b/server/Gym/environments/stone_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2125929860a2e0dab10c3f225c49ee6d13640623 --- /dev/null +++ b/server/Gym/environments/stone_game/__init__.py @@ -0,0 +1 @@ +from .environment import StoneGame_Environment diff --git a/server/Gym/environments/stone_game/environment.py b/server/Gym/environments/stone_game/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..130ed2f25baa9f3c9a75f22efacf9872bcfba849 --- /dev/null +++ b/server/Gym/environments/stone_game/environment.py @@ -0,0 +1,105 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class StoneGame_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3235 + prompt_template = \ +r"""Stan and Ollie are playing a game. The game rules are as follows: ++ There are **{N}** heaps of stones: {Stones}. ++ Stan and Ollie take turns playing, and **Stan** goes first. ++ On a player's turn, they must select a heap that contains at least **{F}** stones. ++ Then, they choose an integer **M** (at least 2) and split the selected heap into **M** smaller heaps such that the sizes of the smaller heaps differ by at most 1 (i.e., as evenly as possible). ++ After splitting, the game continues with the updated heap configuration. ++ If a player cannot make a move (i.e., no heap contains at least **{F}** stones), they lose. + +If both players always play optimally, who will win — Stan or Ollie? + +**Output Format:** Your final answer should be a single word: either `Stan` or `Ollie` (do **NOT** include quotes or backticks), indicating the winner.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_answer : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the StoneGame_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_answer" : invalid_answer, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + def _generate(self) -> None : + assert "MAX_SUM" in self.parameter, "MAX_SUM is required in parameter" + MAX_SUM = self.parameter["MAX_SUM"] + assert MAX_SUM >= 2, "MAX_SUM should be greater than or equal to 2" + + self.parameter["reference_answer"] = "Stan" if random.random() < 0.5 else "Ollie" + + while True : + SUM = random.randint(2, MAX_SUM) + N = self.parameter["N"] = random.randint(1, min(SUM // 2, 100)) + if N == 1: + Stones = [SUM] + else: + cuts = sorted(random.sample(range(1, SUM), N - 1)) + Stones = [cuts[0]] + [cuts[i] - cuts[i - 1] for i in range(1, N - 1)] + [SUM - cuts[-1]] + self.parameter["Stones"] = Stones + F = self.parameter["F"] = random.randint(1, max(Stones) + 1) + + def check(n : int, f : int, stones : List[int]) -> bool : + sg = [-1] * (max(stones) + 5) + exist = [0] * (max(stones) + 5) + for i in range(0, min(max(stones)+1, f)): + sg[i] = 0 + + def get_sg(x): + if sg[x] != -1: return sg[x] + i = 2 + while i <= x : + k = x//(x//i) + for j in range(i, min(i+1, k)+1): + s = 0 + if (x%j) % 2 == 1: s ^= get_sg(x//j+1) + if (j-(x%j)) % 2 == 1: s ^= get_sg(x//j) + exist[s] = x + i = k + 1 + i = 0 + while True: + if exist[i] != x: + sg[x] = i + return i + i += 1 + + nim_sum = 0 + for pile_size in stones: + nim_sum ^= get_sg(pile_size) + return nim_sum != 0 + + if ("Stan" if check(N, F, Stones) else "Ollie") == self.parameter["reference_answer"] : + break + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], F = self.parameter["F"], Stones = ", ".join(map(str, self.parameter["Stones"]))) + + + def _process(self, answer : Optional[str]) -> Optional[str] : + if answer is not None : + return answer.strip() + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result not in ("Stan", "Ollie") : + return self.rewards["invalid_answer"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/stone_intervals_game/__init__.py b/server/Gym/environments/stone_intervals_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4b27d0b4d21475ae6b2aeeb2dfb26ed7799372be --- /dev/null +++ b/server/Gym/environments/stone_intervals_game/__init__.py @@ -0,0 +1 @@ +from .environment import StoneIntervalsGame_Environment diff --git a/server/Gym/environments/stone_intervals_game/environment.py b/server/Gym/environments/stone_intervals_game/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d1b429395b08c7c3d572b35786316e12a1fdfc26 --- /dev/null +++ b/server/Gym/environments/stone_intervals_game/environment.py @@ -0,0 +1,149 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class StoneIntervalsGame_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3235 + prompt_template = \ +r"""There are {N} piles of stones. Initially, the i-th pile contains A[i] stones, given as: {A} +Alice and Bob play a game with the following rules: +- Alice goes first. They alternate turns. +- On each turn, a player selects a pile `i` such that **at least one of its adjacent piles** (`i - 1` or `i + 1`, if within bounds) contains **0 stones** (noting that the first/last pile has ONLY ONE adjacent pile). The player then collects **all stones** from pile `i` (pile `i` becomes 0). +- The game ends when there are no piles with any stones remaining. + +Assuming both players play optimally to maximize their own total number of collected stones, output the number of stones Alice will collect.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the StoneIntervalsGame_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + A = self.parameter["A"] = [random.randint(1, N * 2) for _ in range(N)] + for zero_index in random.sample(range(N), random.randint(1, N - 2)) : + A[zero_index] = 0 + + + v = A.copy() + SumVal = sum(v) + + # mark which piles are non-zero + tag = [x != 0 for x in v] + + # doubly-linked list over 0..N-1 + prev_ = [i - 1 for i in range(N)] + next_ = [i + 1 for i in range(N)] + prev_[0] = None + next_[N - 1] = None + + head = 0 + tail = N - 1 + + # 1) Triple-compression: whenever three consecutive non-zero piles + # form a “peak” (middle ≥ both neighbors), merge them into the rightmost. + i = head + while i is not None: + while ( + prev_[i] is not None + and prev_[prev_[i]] is not None + and tag[i] + and tag[prev_[i]] + and tag[prev_[prev_[i]]] + and v[prev_[i]] >= v[prev_[prev_[i]]] + and v[prev_[i]] >= v[i] + ): + p = prev_[i] + pp = prev_[p] + new_prev = prev_[pp] + # merge: v[i] = v[pp] + v[i] − v[p] + v[i] = v[pp] + v[i] - v[p] + # remove pp and p by re-linking new_prev ↔ i + prev_[i] = new_prev + if new_prev is not None: + next_[new_prev] = i + else: + head = i + i = next_[i] + + # 2) Edge-peeling: greedily remove matching monotonic pairs at the ends, + # accumulating their difference into S + L, R = head, tail + S = 0 + # left side + while True: + nl = next_[L] + if nl is None or not (tag[L] and tag[nl]) or v[L] < v[nl]: + break + S += v[nl] - v[L] + L = next_[nl] + # right side + while True: + pr = prev_[R] + if pr is None or not (tag[R] and tag[pr]) or v[R] < v[pr]: + break + S += v[pr] - v[R] + R = prev_[pr] + + # 3) Collect the remaining non-zero segments between L and R + segments = [] + i = L + while True: + if tag[i]: + segments.append(v[i]) + if i == R: + break + i = next_[i] + + # 4) Sort descending, append the peeled sum S, then do an alternating sum + segments.sort(reverse=True) + segments.append(S) + score = 0 + for idx, val in enumerate(segments): + score += val if idx % 2 == 0 else -val + + # 5) Recover each player's total + self.parameter["reference_answer"] = (SumVal + score) // 2 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/string_partition_shuffle/__init__.py b/server/Gym/environments/string_partition_shuffle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8a643a5f0ce3902d3fef0e3a2f620d9fabf06d47 --- /dev/null +++ b/server/Gym/environments/string_partition_shuffle/__init__.py @@ -0,0 +1 @@ +from .environment import StringPartitionShuffle_Environment diff --git a/server/Gym/environments/string_partition_shuffle/environment.py b/server/Gym/environments/string_partition_shuffle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..17dc4fd71daa51b1c8b51dbed59f23d114b82877 --- /dev/null +++ b/server/Gym/environments/string_partition_shuffle/environment.py @@ -0,0 +1,110 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class StringPartitionShuffle_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3785 + prompt_template = \ +r"""You are given a string S of length {N} (0-indexed): {S} + +Please find {K} intervals [L[1], R[1]), ..., [L[{K}], R[{K}]) such that: +- Each interval [L[i], R[i]) is non-empty and disjoint. +- The intervals together cover the entire string S (each index appears in exactly one interval). +- Concatenating all substrings S[L[i]: R[i]] (= S[L[i]] + S[L[i] + 1] + ... + S[R[i] - 1]) (in order) yields a new string T: {T} + +**Output Format:** Output {K} lines. The i-th line should contain two integers L[i] and R[i], separated by a space.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([a=b])^beta", rewarding_beta : float = 10.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the StringPartitionShuffle_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + if N >= 4 and random.random() < 0.5 : + K = self.parameter["K"] = 3 + else : + K = self.parameter["K"] = random.randint(2, N - 1) + + one_probability = random.uniform(0.1, 0.9) + S = self.parameter["S"] = "".join("1" if random.random() < one_probability else "0" for _ in range(N)) + + endpoints = random.sample(range(1, N), K - 1) + endpoints.sort() + endpoints = [0] + endpoints + [N] + assert len(endpoints) == K + 1, "endpoints should have length K + 1" + intervals = [(endpoints[i], endpoints[i + 1]) for i in range(K)] + assert len(intervals) == K, "intervals should have length K" + random.shuffle(intervals) + self.parameter["T"] = "".join(S[L : R] for L, R in intervals) + self.parameter["reference_answer"] = "\n".join("{} {}".format(L, R) for L, R in intervals) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + K = self.parameter["K"], + S = self.parameter["S"], + T = self.parameter["T"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(list(map(int, line.split()))) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["K"] : + return self.rewards["wrong_format"] + if not all(len(interval) == 2 for interval in processed_result) : + return self.rewards["wrong_format"] + + if not all(0 <= L < R <= self.parameter["N"] for L, R in processed_result) : + return self.rewards["invalid_solution"] + if not sum(R - L for L, R in processed_result) == self.parameter["N"] : + return self.rewards["invalid_solution"] + if not set(i for L, R in processed_result for i in range(L, R)) == set(range(self.parameter["N"])) : + return self.rewards["invalid_solution"] + + T = "".join(self.parameter["S"][L : R] for L, R in processed_result) + assert len(T) == self.parameter["N"] == len(self.parameter["T"]), "Length of T should match N" + + if self.rewards["rewarding_strategy"] == "mean([a=b])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["T"], T)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "a=b" : + return self.rewards["rewarding_weight"] * (self.parameter["T"] == T) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/string_reversal_construction/__init__.py b/server/Gym/environments/string_reversal_construction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..81d7092e66bcfe41e49651318e36e900602da23b --- /dev/null +++ b/server/Gym/environments/string_reversal_construction/__init__.py @@ -0,0 +1 @@ +from .environment import StringReversalConstruction_Environment \ No newline at end of file diff --git a/server/Gym/environments/string_reversal_construction/environment.py b/server/Gym/environments/string_reversal_construction/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..ef25f3e12cd74bb06d383aa373346c10ac28ada4 --- /dev/null +++ b/server/Gym/environments/string_reversal_construction/environment.py @@ -0,0 +1,115 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class StringReversalConstruction_Environment(VerifiableEnvironment): + prompt_template = \ +r"""A code lock is installed on a safe. The lock has a screen that displays a string of {n} lowercase Latin letters. Initially, the screen displays string "{s}". The safe will open when string "{t}" is displayed on the screen. + +The string on the screen can be changed using the operation "shift x". To apply this operation, you choose an integer x from 0 to {n} (including 0 and {n}). After that, the current string p = α + β changes to β^R + α, where the length of β is x, and the length of α is {n} - x. In other words, the suffix of length x of string p is reversed and moved to the beginning of the string (+ means string concatenation and β^R means the reverse of β). For example, after the operation "shift 4" the string "abcacb" will be changed to "bcacab", since α = "ab", β = "cacb", β^R = "bcac". + +Find a way to open the safe, using no more than {max_k} operations. + +Your response should only contain the solution in the following format: a single line containing k numbers x_i corresponding to the operations "shift x_i" (0 ≤ x_i ≤ {n}) in the order in which they should be applied (separated by spaces), where k is the number of operations.""" + + def __init__(self, + wrong_format: float = -1.0, + invalid_solution: float = -0.5, + incorrect_solution: float = 0.0, + correct_solution: float = 1.0, + **kwargs): + """ + Initialize the StringReversalConstruction_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "incorrect_solution": incorrect_solution, + "correct_solution": correct_solution, + } + + def _apply_shift_operation(self, s: str, x: int) -> str: + """Apply shift x operation to string s""" + n = len(s) + assert 0 <= x <= n, "x must be in the range [0, n]" + if x == 0: + return s + if x == n: + return s[::-1] + + alpha = s[:-x] # first n-x characters + beta = s[-x:] # last x characters + beta_reversed = beta[::-1] + + return beta_reversed + alpha + + def _generate(self) -> None: + assert "n" in self.parameter, "n is required in parameter" + + n = self.parameter["n"] + + # Generate initial string s + s = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in range(n)) + + # Generate target string t by applying random operations to s + # This ensures there's always a valid solution + t = s + num_operations = random.randint(1, max(1, n // 2)) + operations = [] + + for _ in range(num_operations): + x = random.randint(1, n) + t = self._apply_shift_operation(t, x) + operations.append(x) + + self.parameter["n"] = n + self.parameter["s"] = s + self.parameter["t"] = t + self.parameter["reference_answer"] = " ".join(map(str, operations)) + self.parameter["max_k"] = 3 * n + + def _prompt_generate(self) -> str: + return self.prompt_template.format( + n=self.parameter["n"], + s=self.parameter["s"], + t=self.parameter["t"], + max_k=self.parameter["max_k"] + ) + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def scorer(self, output: str) -> float: + processed_result = self.processor(output) + + if processed_result is None: + return self.rewards["wrong_format"] + + operations = processed_result + + # Check if number of operations exceeds limit + if len(operations) > self.parameter["max_k"]: + return self.rewards["invalid_solution"] + if not all(0 <= op <= self.parameter["n"] for op in operations): + return self.rewards["invalid_solution"] + + # Simulate the operations + current_s = self.parameter["s"] + target_t = self.parameter["t"] + for op in operations: + current_s = self._apply_shift_operation(current_s, op) + if current_s == target_t: + return self.rewards["correct_solution"] + else: + return self.rewards["incorrect_solution"] \ No newline at end of file diff --git a/server/Gym/environments/stu_well/__init__.py b/server/Gym/environments/stu_well/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d13e3d6598440a0cbed0eb72804ffb2752bd5b39 --- /dev/null +++ b/server/Gym/environments/stu_well/__init__.py @@ -0,0 +1 @@ +from .environment import STUWell_Environment diff --git a/server/Gym/environments/stu_well/environment.py b/server/Gym/environments/stu_well/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..0acec8f3b0e7ed94a99e76e67cc17de8197573b9 --- /dev/null +++ b/server/Gym/environments/stu_well/environment.py @@ -0,0 +1,151 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class STUWell_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""There is an array X of length {N}. Initially, X is: {X} +You can perform the following operation at most {M} times: pick an arbitrary index i and decrease X[i] by 1 (i.e., X[i] -= 1); at the end, you must ensure that there exists at least one index i such that X[i] = 0. +Try your best to minimize the value of max(|X[i] - X[i + 1]|) over all 0 <= i < {N} - 1 (i.e., the maximum absolute difference between any two adjacent elements in X). Output the minimum possible value of this maximum difference.""" + + def __init__(self, + weight_multiple : int = 4, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the STUWell_Environment instance. + """ + super().__init__(**kwargs) + + self.weight_multiple = weight_multiple + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + X = self.parameter["X"] = [random.randint(1, N * self.weight_multiple) for _ in range(N)] + M = self.parameter["M"] = random.randint(min(X), sum(X)) + + + def check(z): + """ + Check if it's possible with maximum allowed adjacent slope z (mid in the original code) + to dig down somewhere to water (height 0) using at most M shovel swings. + If so, record the position in best_k and return True; else return False. + """ + # Remaining digging power + rem = M + + # 1) First, smooth the terrain so that |a[i] - a[i+1]| <= z at minimal cost + # We work on a copy so as not to overwrite X + a = X[:] + for i in range(1, N): + # if slope from a[i-1] up to a[i] exceeds z, shave off the excess + excess = a[i] - (a[i-1] + z) + if excess > 0: + rem -= excess + a[i] = a[i-1] + z + for i in range(N-2, -1, -1): + excess = a[i] - (a[i+1] + z) + if excess > 0: + rem -= excess + a[i] = a[i+1] + z + + # If we've already used more than M shovels, fail + if rem < 0: + return False + + # 2) Build prefix sums so we can query any interval sum in O(1) + prefix = [0] * N + prefix[0] = a[0] + for i in range(1, N): + prefix[i] = prefix[i-1] + a[i] + + # 3) For each candidate digging spot i, we need to compute the cost to + # shave the terrain down to the "tent" shape that slopes up at rate z + # from height 0 at i. Outside a certain window [L..R], the original + # a[j] is already below the tent, so no digging needed there. + L = [0] * N + j = 0 + for i in range(N): + # advance j until a[j] >= z*(i-j) + while j < N and z * (i - j) > a[j]: + j += 1 + L[i] = j + + R = [0] * N + j = N - 1 + for i in range(N-1, -1, -1): + # decrease j until a[j] >= z*(j-i) + while j >= 0 and z * (j - i) > a[j]: + j -= 1 + R[i] = j + + # 4) Test each position i as the digging spot + for i in range(N): + li, ri = L[i], R[i] + # sum of a[li..ri] + segment_sum = prefix[ri] - (prefix[li-1] if li > 0 else 0) + # cost to carve the left half of the tent (from li up to i) + left_len = i - li + cost_left = z * left_len * (left_len + 1) // 2 + # cost to carve the right half of the tent (from i up to ri) + right_len = ri - i + cost_right = z * right_len * (right_len + 1) // 2 + # total additional digs needed to form the tent + needed = segment_sum - cost_left - cost_right + if needed <= rem: + return True + + return False + + # 5) Binary search on z = the maximum allowed adjacent slope + lo, hi = 0, max(X) + best_z = 0 + while lo <= hi: + mid = (lo + hi) // 2 + if check(mid): + best_z = mid + hi = mid - 1 + else: + lo = mid + 1 + self.parameter["reference_answer"] = best_z + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + X = " ".join("X[{}]={}".format(i, Xi) for i, Xi in enumerate(self.parameter["X"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/stunt_flying/__init__.py b/server/Gym/environments/stunt_flying/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dbf2850ab00772f7cee0c497598fc4152641e5d8 --- /dev/null +++ b/server/Gym/environments/stunt_flying/__init__.py @@ -0,0 +1 @@ +from .environment import StuntFlying_Environment diff --git a/server/Gym/environments/stunt_flying/environment.py b/server/Gym/environments/stunt_flying/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..4c4793a8da2902294fb69e12789d9af18aa35a82 --- /dev/null +++ b/server/Gym/environments/stunt_flying/environment.py @@ -0,0 +1,103 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class StuntFlying_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3918 + prompt_template = \ +r"""There are {K} elements labeled from 0 to {K_minus_1}, and each element `x` has an associated value C[x]. C is: {C} +You need to build an array A of length {N}, where each A[i] is one of these elements (i.e., 0 ≤ A[i] < {K} for all 1 ≤ i ≤ {N}). Each position i in A has a value defined as **C[A[i]] × T[i]**, where T[i] is determined as follows: +- If there is no previous index j (0 ≤ j < i) such that A[j] = A[i], then T[i] = 0. +- Otherwise, let j be the largest index (basically, closest to i) such that A[j] = A[i] (0 ≤ j < i), and set T[i] = i - j. + +Can you maximize the sum of all values **C[A[i]] × T[i]**? Output A[1], A[2], ..., A[{N}] in order, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the StuntFlying_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + K = self.parameter["K"] = random.randint(2, N) + C = self.parameter["C"] = [random.randint(1, K) for _ in range(K)] + + + A = C.copy() + A.sort(reverse=True) + + ans = 0 + N -= 1 + i = 0 + while N > 0 and i < K: + ans += N * A[i] + i += 1 + N -= 2 + + assert ans > 0, "ans should be greater than 0" + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + K = self.parameter["K"] + return self.prompt_template.format( + K = K, + K_minus_1 = K - 1, + C = "; ".join("C[{}] = {}".format(x, Cx) for x, Cx in enumerate(self.parameter["C"])), + N = self.parameter["N"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(0 <= Ai < self.parameter["K"] for Ai in processed_result) : + return self.rewards["invalid_solution"] + + last = [None] * self.parameter["K"] + gold, answer = self.parameter["gold_answer"], 0 + for i, Ai, in enumerate(processed_result) : + T = 0 if last[Ai] is None else i - last[Ai] + answer += self.parameter["C"][Ai] * T + last[Ai] = i + + assert answer <= gold, "answer should be less than or equal to gold_answer" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/subarray_sum_xor/__init__.py b/server/Gym/environments/subarray_sum_xor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..338a788c646f0d405fc99f531f6ae7ac5523b21e --- /dev/null +++ b/server/Gym/environments/subarray_sum_xor/__init__.py @@ -0,0 +1 @@ +from .environment import SubarraySumXor_Environment diff --git a/server/Gym/environments/subarray_sum_xor/environment.py b/server/Gym/environments/subarray_sum_xor/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..bf0b456bedd082af1ddfbf9a0f901a46e352612c --- /dev/null +++ b/server/Gym/environments/subarray_sum_xor/environment.py @@ -0,0 +1,125 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SubarraySumXor_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3760 + prompt_template = \ +r"""You are given an array A of {N} integers: {A} +This array has {N} × ({N} + 1) / 2 contiguous subarrays. For each subarray, compute its sum; then, output the **bitwise XOR** of all these subarray sums.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the SubarraySumXor_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + A = self.parameter["A"] = [random.randint(0, N) for _ in range(N)] + + + # build prefix sums S[0..N] + S = [0] * (N + 1) + for i in range(1, N + 1): + S[i] = S[i - 1] + A[i - 1] + mx = S[N] + + # count how many times each prefix‐sum value appears (excluding S[0]) + cnt = [0] * (mx + 1) + for i in range(1, N + 1): + cnt[S[i]] += 1 + + # scnt[v] = sum of cnt[0..v] + scnt = [0] * (mx + 1) + scnt[0] = cnt[0] + for v in range(1, mx + 1): + scnt[v] = scnt[v - 1] + cnt[v] + + ans = 0 + # for each bit j, count how many subarray‐sums have that bit = 1 + for j in range(mx.bit_length()): + K = 1 << j + M = 1 << (j + 1) + + # f[v] = number of earlier prefix‐sums s' with (v - s') in [K, M-1] + f = [0] * (mx + 1) + for v in range(mx + 1): + # f[v - M] or 0 if out of range + prev = f[v - M] if v >= M else 0 + # scnt[v - K] counts s' ≤ v-K + add1 = scnt[v - K] if v >= K else 0 + # subtract those with s' ≤ v-M + sub1 = scnt[v - M] if v >= M else 0 + f[v] = prev + add1 - sub1 + + # g[v] = number of later prefix‐sums s' with (s' - v) in [K, M-1] + g = [0] * (mx + 1) + for v in range(mx, -1, -1): + # g[v + M] or 0 if out of range + prev = g[v + M] if v + M <= mx else 0 + # scnt[min(mx, v+M-1)] - scnt[min(mx, v+K-1)] + hi = v + M - 1 + lo = v + K - 1 + add2 = scnt[hi] if hi <= mx else scnt[mx] + sub2 = scnt[lo] if lo <= mx else scnt[mx] + g[v] = prev + add2 - sub2 + + # sum up f[S[i]] + g[S[i]] for i=1..N, then divide by 2 to get the # of subarrays + res = 0 + for i in range(1, N + 1): + sv = S[i] + res += f[sv] + g[sv] + res //= 2 + + # if that count is odd, set bit j in ans + if res & 1: + ans |= K + + # finally, include the subarrays that start from index 1 (i.e. S[i] - S[0] = S[i]) + for i in range(1, N + 1): + ans ^= S[i] + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = ", ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/subarray_xor_sum/__init__.py b/server/Gym/environments/subarray_xor_sum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4dafcd277eadfa8ccd3de9c3631179738ac7240a --- /dev/null +++ b/server/Gym/environments/subarray_xor_sum/__init__.py @@ -0,0 +1 @@ +from .environment import SubarrayXorSum_Environment diff --git a/server/Gym/environments/subarray_xor_sum/environment.py b/server/Gym/environments/subarray_xor_sum/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..1eb70223e37e15c926098e7b4d90fcb2090236a4 --- /dev/null +++ b/server/Gym/environments/subarray_xor_sum/environment.py @@ -0,0 +1,93 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SubarrayXorSum_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3760 + prompt_template = \ +r"""You are given an array A of {N} integers: {A} +This array has {N} × ({N} + 1) / 2 contiguous subarrays. For each subarray, compute the bitwise XOR of its elements, then output the **sum** of all these subarray XOR values.""" + + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the SubarrayXorSum_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + A = self.parameter["A"] = [random.randint(0, N) for _ in range(N)] + + + # Use only as many bits as needed + or_all = 0 + for x in A: + or_all |= x + B = or_all.bit_length() + + def compute() -> int : + # If all zeros, the answer is zero + if B == 0: + return 0 + + cnt_zero = [1] * B # counts of previous prefixes with bit j == 0 (include s[0]=0) + cnt_one = [0] * B # counts of previous prefixes with bit j == 1 + prefix = 0 + ans = 0 + + for x in A: + prefix ^= x + for j in range(B - 1, -1, -1): + bit = (prefix >> j) & 1 + if bit: + ans += (1 << j) * cnt_zero[j] + cnt_one[j] += 1 + else: + ans += (1 << j) * cnt_one[j] + cnt_zero[j] += 1 + + return ans + self.parameter["reference_answer"] = compute() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = ", ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/subgraph_isomorphism/__init__.py b/server/Gym/environments/subgraph_isomorphism/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cccc49dd826ee41a567827e4fcea28a256e9fb34 --- /dev/null +++ b/server/Gym/environments/subgraph_isomorphism/__init__.py @@ -0,0 +1 @@ +from .environment import SubgraphIsomorphism_Environment diff --git a/server/Gym/environments/subgraph_isomorphism/environment.py b/server/Gym/environments/subgraph_isomorphism/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e0f3ab3c5fd432be339bff54acf1d72a17047e65 --- /dev/null +++ b/server/Gym/environments/subgraph_isomorphism/environment.py @@ -0,0 +1,130 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class SubgraphIsomorphism_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given two **undirected graphs**, G1 and G2. + +- G1 has `{N1}` vertices labeled from `0` to `{N1_minus_1}`. It has the following edge set E1: +{G1_edges} + +- G2 has `{N2}` vertices labeled from `0` to `{N2_minus_1}`. It has the following edge set E2: +{G2_edges} + +Please find an **injection** `p` (an injection means each vertex in G1 maps to a **unique** vertex in G2) from the vertices of G1 to the vertices of G2. This mapping `p` must satisfy the following condition: for every pair `(u, v)`, the edge `(u, v)` exists in E1 **if and only if** the edge `(p(u), p(v))` exists in E2. + +**Output Format:** Your final answer should be a single line containing `p(0), p(1), ..., p({N1_minus_1})`, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SubgraphIsomorphism_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N2" in self.parameter, "N2 is required in parameter" + N2 = self.parameter["N2"] + assert N2 >= 3, "N2 should be greater than or equal to 3" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 < edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + assert int(edge_density * N2 * (N2 - 1) / 2) > 0 + + G2_edges = self.parameter["G2_edges"] = random.sample([(u, v) for u in range(N2) for v in range(u + 1, N2)], int(edge_density * N2 * (N2 - 1) / 2)) + random.shuffle(G2_edges) + + N1 = self.parameter["N1"] = random.randint(3, N2) + mapping = random.sample(range(N2), N1) + random.shuffle(mapping) + + G1_edges = self.parameter["G1_edges"] = [] + G2_edges_set = set(G2_edges) + for u in range(N1) : + for v in range(u + 1, N1) : + G2_u, G2_v = mapping[u], mapping[v] + if G2_u > G2_v : + G2_u, G2_v = G2_v, G2_u + if (G2_u, G2_v) in G2_edges_set : + G1_edges.append((u, v)) + random.shuffle(G1_edges) + + for edges, N in zip((G1_edges, G2_edges), (N1, N2)) : + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + self.parameter["reference_answer"] = " ".join(map(str, mapping)) + + + def _prompt_generate(self) -> str : + N1, N2 = self.parameter["N1"], self.parameter["N2"] + N1_minus_1, N2_minus_1 = N1 - 1, N2 - 1 + G1_edges, G2_edges = self.parameter["G1_edges"], self.parameter["G2_edges"] + return self.prompt_template.format( + N1 = N1, + N1_minus_1 = N1_minus_1, + G1_edges = "\n".join("({}, {})".format(u, v) for u, v in G1_edges), + N2 = N2, + N2_minus_1 = N2_minus_1, + G2_edges = "\n".join("({}, {})".format(u, v) for u, v in G2_edges), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + mapping = processed_result + if len(mapping) != self.parameter["N1"] : + return self.rewards["invalid_solution"] + if len(set(mapping)) != self.parameter["N1"] : + return self.rewards["invalid_solution"] + if not all(0 <= i < self.parameter["N2"] for i in mapping) : + return self.rewards["invalid_solution"] + + G1_edge_set, G2_edges_set = set(map(tuple, self.parameter["G1_edges"])), set(map(tuple, self.parameter["G2_edges"])) + satisfied = 0 + for u in range(self.parameter["N1"]) : + for v in range(u + 1, self.parameter["N1"]) : + G2_u, G2_v = mapping[u], mapping[v] + if G2_u > G2_v : + G2_u, G2_v = G2_v, G2_u + satisfied += int(((u, v) in G1_edge_set) == ((G2_u, G2_v) in G2_edges_set)) + all_edges = self.parameter["N1"] * (self.parameter["N1"] - 1) // 2 + assert satisfied <= all_edges, "satisfied edges should not exceed all edges" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / all_edges) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == all_edges) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/submatrix_sum_divisible_counting/__init__.py b/server/Gym/environments/submatrix_sum_divisible_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..02c13986b168b1c364326ab4f4773bc3ceb8ca52 --- /dev/null +++ b/server/Gym/environments/submatrix_sum_divisible_counting/__init__.py @@ -0,0 +1 @@ +from .environment import SubmatrixSumDivisibleCounting_Environment diff --git a/server/Gym/environments/submatrix_sum_divisible_counting/environment.py b/server/Gym/environments/submatrix_sum_divisible_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..271196f691f9ab8121ccd34f1b37198aabfc474c --- /dev/null +++ b/server/Gym/environments/submatrix_sum_divisible_counting/environment.py @@ -0,0 +1,114 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + +class SubmatrixSumDivisibleCounting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a matrix of size {N} × {M}, where each element is an integer. Count the number of **contiguous, non-empty submatrices** whose sum is divisible by {K}. The matrix is: +{matrix} + +Notes: +- Two submatrices are considered different if they differ in position, even if they contain identical elements. +- The entire matrix itself is also considered a submatrix. +- Output a single non-negative integer, which is the total number of submatrices whose sum is divisible by {K}.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SubmatrixSumDivisibleCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + K = self.parameter["K"] = random.randint(2, N * M) + matrix = self.parameter["matrix"] = [[random.randint(0, K - 1) for _ in range(M)] for _ in range(N)] + + + # 2D prefix sums modulo K, 1-indexed + a = [[0] * (M + 1) for _ in range(N + 1)] + + for i in range(1, N + 1): + row = matrix[i - 1] + ai = a[i] + ai_1 = a[i - 1] + for j in range(1, M + 1): + v = row[j - 1] # each a[i][j] <= K per problem statement + # a[i][j] = (v + a[i-1][j] + a[i][j-1] + K - a[i-1][j-1]) % K + ai[j] = (v + ai_1[j] + ai[j - 1] + K - ai_1[j - 1]) % K + + ans = 0 + b = [0] * (M + 1) # reuse across pairs of rows + cnt = [0] * K # frequency array modulo K (size depends on K) + + # Enumerate pairs of rows (top=i+1 .. bottom=j) + for i in range(0, N): + ai = a[i] + for j in range(i + 1, N + 1): + aj = a[j] + cnt[0] = 1 # empty prefix + # Sweep columns, counting subarrays with sum % K == 0 + for k in range(1, M + 1): + v = aj[k] - ai[k] # both already modulo K + if v < 0: + v += K # avoid Python modulo in inner loop + b[k] = v + ans += cnt[v] + cnt[v] += 1 + # reset only the touched buckets (like the C++ code) + for k in range(1, M + 1): + cnt[b[k]] = 0 + + self.parameter["reference_answer"] = ans + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + K = self.parameter["K"], + matrix = "[\n" + "\n".join(", ".join(map(str, row)) for row in self.parameter["matrix"]) + "\n]", + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + if processed_result == 0 : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == 0) + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/subsequence_reversal_lnds/__init__.py b/server/Gym/environments/subsequence_reversal_lnds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cfbdfc79ace37d31d41d219ae3f6d8b3c3c1c454 --- /dev/null +++ b/server/Gym/environments/subsequence_reversal_lnds/__init__.py @@ -0,0 +1 @@ +from .environment import SubsequenceReversalLNDS_Environment diff --git a/server/Gym/environments/subsequence_reversal_lnds/environment.py b/server/Gym/environments/subsequence_reversal_lnds/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..31c4cf9d4503e898b858dfa11932511046e31f8b --- /dev/null +++ b/server/Gym/environments/subsequence_reversal_lnds/environment.py @@ -0,0 +1,114 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SubsequenceReversalLNDS_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3607 + prompt_template = \ +r"""You are given a sequence A of {N} integers: {A} +You may choose a subsequence of A, defined by a strictly increasing sequence of indices i₁, ..., iₖ (1 ≤ i₁ < ... < iₖ ≤ {N}, k >= 1), and **reverse the order of the elements at those indices** (i.e., A[i₁] becomes A[iₖ], ..., A[iₖ] becomes A[i₁]). Please **maximize the length of the longest non-decreasing subsequence** (not necessarily contiguous) in the resulting array. Output a single integer — the maximum achievable length.""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the SubsequenceReversalLNDS_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + self.parameter["A"] = [random.randint(1, N) for _ in range(N)] + + + # Read heights, 1-indexed + A = [0] + self.parameter["A"] + M = max(A) + + # dp[l][r][L][R]: max LIS length in A[l..r] after reversing at most one subsequence, + # considering only values in [L..R] + # Dimensions: (N+2) x (N+2) x (M+2) x (M+2) + dp = [[[[0] * (M+2) for _ in range(M+2)] for _ in range(N+2)] for _ in range(N+2)] + + # Base case: intervals of length 1 + for i in range(1, N+1): + for L in range(1, A[i] + 1): + for R in range(A[i], M + 1): + dp[i][i][L][R] = 1 + + # Build up for intervals of length = 2..N + for length in range(2, N+1): + for l in range(1, N - length + 2): + r = l + length - 1 + for span in range(1, M+1): + for L in range(1, M - span + 2): + R = L + span - 1 + + # 1) shrink the allowed value range + val = dp[l][r][L+1][R] + if dp[l][r][L][R-1] > val: + val = dp[l][r][L][R-1] + + # 2) extend by taking A[l] at the left (if it matches L) + tmp = dp[l+1][r][L][R] + (1 if A[l] == L else 0) + if tmp > val: + val = tmp + + # 3) extend by taking A[r] at the right (if it matches R) + tmp = dp[l][r-1][L][R] + (1 if A[r] == R else 0) + if tmp > val: + val = tmp + + # 4) reverse a subsequence spanning the ends + tmp = dp[l+1][r-1][L][R] + if A[l] == R: + tmp += 1 + if A[r] == L: + tmp += 1 + if tmp > val: + val = tmp + + dp[l][r][L][R] = val + + # The answer is dp[1][N][1][M] + self.parameter["reference_answer"] = dp[1][N][1][M] + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = ", ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/subset_sum/__init__.py b/server/Gym/environments/subset_sum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ac652e005671a9fcf26ba8c10529910fdd62ee3a --- /dev/null +++ b/server/Gym/environments/subset_sum/__init__.py @@ -0,0 +1 @@ +from .environment import SubsetSum_Environment diff --git a/server/Gym/environments/subset_sum/environment.py b/server/Gym/environments/subset_sum/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..62a6fe9276c6ae072fd6e97679a8b04221081942 --- /dev/null +++ b/server/Gym/environments/subset_sum/environment.py @@ -0,0 +1,80 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class SubsetSum_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an array `A` of length `{N}`, indexed from `0` to `{N_minus_1}`: +{A} + +Please find a subset of **distinct indices** `i1, i2, ..., ik` such that: the sum `A[i1] + A[i2] + ... + A[ik]` is exactly equal to {target}. + +**Output Format:** Your final answer should be a single line containing the selected indices `i1, i2, ..., ik`, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the SubsetSum_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + A = self.parameter["A"] = [random.randint(1, N) for _ in range(N)] + + indices = random.sample(range(N), k = random.randint(2, N - 1)) + self.parameter["target"] = sum(A[index] for index in indices) + self.parameter["reference_answer"] = " ".join(map(str, indices)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + target = self.parameter["target"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if not all(0 <= i < self.parameter["N"] for i in processed_result) : + return self.rewards["invalid_solution"] + if len(processed_result) != len(set(processed_result)) : + return self.rewards["invalid_solution"] + + if sum(self.parameter["A"][i] for i in processed_result) == self.parameter["target"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/subset_sum_sequence/__init__.py b/server/Gym/environments/subset_sum_sequence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7b558b421e81774c2bb11b9093e0d50ab5522129 --- /dev/null +++ b/server/Gym/environments/subset_sum_sequence/__init__.py @@ -0,0 +1 @@ +from .environment import SubsetSumSequence_Environment diff --git a/server/Gym/environments/subset_sum_sequence/environment.py b/server/Gym/environments/subset_sum_sequence/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e1b3eb1d85f54bdff7c2dda4c5d313b1a0694324 --- /dev/null +++ b/server/Gym/environments/subset_sum_sequence/environment.py @@ -0,0 +1,109 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SubsetSumSequence_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1062 + prompt_template = \ +r"""Consider all powers of `{K}`, and all **finite sums of distinct powers of `{K}`**. +Collect these numbers and sort them in **increasing order** (starting from index 1) to form a sequence: +`{term_0}, {term_1}, {term_2}, {term_3}, ...` + +Your task is to compute the value of the **{N}-th term** in this sequence (1-based indexing), and output it in **decimal (base 10)**. + +Output Format: +Your final answer should be a single decimal number to indicate the {N}-th term in the sequence. +Example: `{K}` (do **NOT** include the backticks or quotes). +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = 0.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 2.0, + **kwargs) : + """ + Initialize the SubsetSumSequence_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K >= 2, "MAX_K should be greater than or equal to 2" + + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 1, "MAX_N should be greater than or equal to 1" + + N = self.parameter["N"] = random.randint(1, MAX_N) + K = self.parameter["K"] = random.randint(2, MAX_K) + + Ans = 0 + base = 1 + while N : + if N & 1 : + Ans += base + N //= 2 + base *= K + self.parameter["reference_answer"] = Ans + + def _prompt_generate(self) -> str : + K = self.parameter["K"] + term_0 = 1 + term_1 = K + term_2 = 1 + K + term_3 = K**2 + return self.prompt_template.format( + K = K, + term_0 = term_0, + term_1 = term_1, + term_2 = term_2, + term_3 = term_3, + N = self.parameter["N"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + K = self.parameter["K"] + def check(num : int) -> bool : # Check if the answer is in base K and contains only 0s and 1s. + while num : + if num % K not in (0, 1) : + return False + num //= K + return True + if not check(processed_result) : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sudoku/__init__.py b/server/Gym/environments/sudoku/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bb383b99bcbfff80d8bd8a1e6121840b0258504c --- /dev/null +++ b/server/Gym/environments/sudoku/__init__.py @@ -0,0 +1 @@ +from .environment import Sudoku_Environment diff --git a/server/Gym/environments/sudoku/environment.py b/server/Gym/environments/sudoku/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..25c1f9b1ce8554481811cd370bef35f8f4edb2e0 --- /dev/null +++ b/server/Gym/environments/sudoku/environment.py @@ -0,0 +1,149 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Sudoku_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Solve a Sudoku puzzle of size ({N} × {M}) × ({M} × {N}) = {NM} × {NM}. Each number is in the range from 1 to {NM}, and empty cells are represented by 0. Here is the input grid: +{sudoku} + +Rules of Sudoku: +1. Each **row** must contain all digits from 1 to {NM}, without repetition. +2. Each **column** must contain all digits from 1 to {NM}, without repetition. +3. The grid is divided into {M} × {N} **subgrids**, where each subgrid is of size {N} × {M} (i.e., each subgrid has {N} rows and {M} columns). Each subgrid must also contain all digits from 1 to {NM}, without repetition. + +**Output Format:** +Your final answer should contain {NM} lines, each with {NM} numbers, separated by spaces. The numbers should represent the completed Sudoku grid in **row-major order**, matching the format of the given input — that is, the first number on the first line is the top-left cell of the Sudoku. Example (do **NOT** include the backticks or quotes, and this is NOT a valid Sudoku): +``` +{output_example} +```""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, wrong_solution : float = 0.0, correct_solution : float = 1.0, + **kwargs) : + """ + Initialize the Sudoku_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "wrong_solution" : wrong_solution, + "correct_solution" : correct_solution, + } + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N_M) + M = self.parameter["M"] = random.randint(2, MAX_N_M) + NM = self.parameter["NM"] = N * M + + + base = [[(M * (row % N) + row // N + column) % NM + 1 for column in range(NM)] for row in range(NM)] + + perm = list(range(1, NM + 1)) + random.shuffle(perm) + grid = [[perm[base[row][column] - 1] for column in range(NM)] for row in range(NM)] + + def shuffle_groups(data, group_size) : + G = len(data) // group_size + for g in range(G) : + start = g * group_size + slice_ = data[start : start + group_size] + random.shuffle(slice_) + data[start : start+group_size] = slice_ + groups = [data[g * group_size:(g + 1) * group_size] for g in range(G)] + random.shuffle(groups) + data[:] = [row for group in groups for row in group] + + shuffle_groups(grid, N) + grid_t = list(map(list, zip(*grid))) + shuffle_groups(grid_t, M) + grid = list(map(list, zip(*grid_t))) + + if random.choice([True, False]) : + grid = list(map(list, zip(*grid))) + N, M = M, N + self.parameter["N"], self.parameter["M"] = N, M + + + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, row)) for row in grid) + + + assert "sparsity" in self.parameter, "sparsity is required in parameter" + sparsity = self.parameter["sparsity"] + assert 0 < sparsity < 1, "sparsity should be between 0 and 1" + empty_cells = random.sample(range(NM * NM), max(1, int(NM * NM * sparsity))) + for cell in empty_cells : + row, column = divmod(cell, NM) + grid[row][column] = 0 + self.parameter["sudoku"] = grid + + def _prompt_generate(self) -> str : + N, M = self.parameter["N"], self.parameter["M"] + return self.prompt_template.format( + N = N, + M = M, + NM = N * M, + sudoku = "\n".join(" ".join(map(str, row)) for row in self.parameter["sudoku"]), + output_example = "\n".join(" ".join(map(str, range(1, N * M + 1))) for _ in range(N * M)) + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + grid = [] + for line in answer.splitlines() : + line = line.strip() + if line : + grid.append(list(map(int, line.split()))) + return grid + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N, M = self.parameter["N"], self.parameter["M"] + solution = processed_result + + if len(solution) != N * M : + return self.rewards["wrong_format"] + for row in solution : + if len(row) != N * M : + return self.rewards["wrong_format"] + + for solution_row, sudoku_row in zip(solution, self.parameter["sudoku"]) : + for solution_cell, sudoku_cell in zip(solution_row, sudoku_row) : + if not (1 <= solution_cell <= N * M) : + return self.rewards["invalid_solution"] + if sudoku_cell != 0 and solution_cell != sudoku_cell : + return self.rewards["invalid_solution"] + + for row in solution : + if len(set(row)) != N * M : + return self.rewards["wrong_solution"] + for column in range(N * M) : + if len(set(solution[row][column] for row in range(N * M))) != N * M : + return self.rewards["wrong_solution"] + for i in range(M) : + for j in range(N) : + subgrid = [solution[x][y] for x in range(i * N, (i + 1) * N) for y in range(j * M, (j + 1) * M)] + if len(set(subgrid)) != N * M : + return self.rewards["wrong_solution"] + + return self.rewards["correct_solution"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sum_divisor_num/__init__.py b/server/Gym/environments/sum_divisor_num/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..101ed850ca9cafb1e9ab739ed0840a69213cdea1 --- /dev/null +++ b/server/Gym/environments/sum_divisor_num/__init__.py @@ -0,0 +1 @@ +from .environment import Sum_DivisorNum_Environment diff --git a/server/Gym/environments/sum_divisor_num/environment.py b/server/Gym/environments/sum_divisor_num/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d798bdd32c6ff896eb0edfc6d1b51b228e411cf6 --- /dev/null +++ b/server/Gym/environments/sum_divisor_num/environment.py @@ -0,0 +1,77 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Sum_DivisorNum_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3935 + prompt_template = \ +r"""Please compute sum(d(i)) for all integers i such that {L} ≤ i ≤ {R}. Here, d(i) denotes the **number of positive divisors** of the integer i. + +**Output Format:** Your final answer should be a single integer — the sum of d(i) over all i in the range [{L}, {R}].""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the Sum_DivisorNum_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_R" in self.parameter, "MAX_R is required in parameter" + MAX_R = self.parameter["MAX_R"] + assert MAX_R >= 2, "MAX_R should be greater than or equal to 2" + + R = self.parameter["R"] = random.randint(2, MAX_R) + L = self.parameter["L"] = random.randint(1, R) + assert 1 <= L <= R, "L should be less than or equal to R" + + + def sumF(n : int) -> int : + total = 0 + l = 1 + while l <= n : + val = (n // l) + r = n // (n // l) + total += val * ((r - l + 1)) + l = r + 1 + return total + self.parameter["reference_answer"] = sumF(R) - sumF(L - 1) + + def _prompt_generate(self) -> str : + return self.prompt_template.format(L = self.parameter["L"], R = self.parameter["R"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sum_gcd/__init__.py b/server/Gym/environments/sum_gcd/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f2e9a66bcaf74d5214f0db07fbfe52905afeb2fc --- /dev/null +++ b/server/Gym/environments/sum_gcd/__init__.py @@ -0,0 +1 @@ +from .environment import SumGCD_Environment diff --git a/server/Gym/environments/sum_gcd/environment.py b/server/Gym/environments/sum_gcd/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d12aacae9258c4e6f7f411cd75a6c960cba3dae2 --- /dev/null +++ b/server/Gym/environments/sum_gcd/environment.py @@ -0,0 +1,108 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SumGCD_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4449 + prompt_template = \ +r"""Please compute sum(GCD(i, j)^{K}) for all pairs (i, j) such that 1 ≤ i ≤ {N} and 1 ≤ j ≤ {M}. Here, GCD(i, j) denotes the **greatest common divisor** of integers i and j, and x^{K} denotes x raised to the power of K. + +**Output Format:** Your final answer should be a single integer — the sum of GCD(i, j)^{K} over all such pairs.""" + + def __init__(self, + max_K : int = 5, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SumGCD_Environment instance. + """ + super().__init__(**kwargs) + + self.max_K = max_K + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N_M) + M = self.parameter["M"] = random.randint(2, MAX_N_M) + K = self.parameter["K"] = random.randint(1, self.max_K) + + + is_comp = [False] * (min(N, M) + 1) + f = [0] * (min(N, M) + 1) + primes = [] + g = [] + + f[1] = 1 + for i in range(2, min(N, M) + 1) : + if not is_comp[i] : + primes.append(i) + gi = i ** K + g.append(gi) + f[i] = (gi - 1) + + for j, p_j in enumerate(primes) : + ip = i * p_j + if ip > min(N, M) : + break + is_comp[ip] = True + if i % p_j == 0 : + f[ip] = f[i] * g[j] + break + else : + f[ip] = f[i] * f[p_j] + + for i in range(1, min(N, M) + 1) : + f[i] = (f[i] + f[i - 1]) + + ans = 0 + i = 1 + while i <= min(N, M) : + ni = N // i + mi = M // i + nxt = min(N // ni, M // mi) + s = (f[nxt] - f[i - 1]) + ans += s * ni * mi + i = nxt + 1 + + self.parameter["reference_answer"] = ans + assert ans > 0, "ans should be greater than 0" + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"], K = self.parameter["K"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sum_gcd_with_individual/__init__.py b/server/Gym/environments/sum_gcd_with_individual/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b399a459fc68b330571c62362d64e5066c549790 --- /dev/null +++ b/server/Gym/environments/sum_gcd_with_individual/__init__.py @@ -0,0 +1 @@ +from .environment import SumGCDWithIndividual_Environment diff --git a/server/Gym/environments/sum_gcd_with_individual/environment.py b/server/Gym/environments/sum_gcd_with_individual/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..410bbbba75f559222e7ee0a52747b01d8a7feff3 --- /dev/null +++ b/server/Gym/environments/sum_gcd_with_individual/environment.py @@ -0,0 +1,90 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SumGCDWithIndividual_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4449 + prompt_template = \ +r"""Please compute the sum of GCD(i, {N}) for all i such that 1 ≤ i ≤ {N}. Here, GCD(i, j) denotes the **greatest common divisor** of integers i and j. + +**Output Format:** Your final answer should be a single integer indicating the sum of GCDs.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SumGCDWithIndividual_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 4, "MAX_N should be greater than or equal to 4" + + N = self.parameter["N"] = random.randint(4, MAX_N) + + def f(n): + t = n + ans = n + i = 2 + # iterate over possible prime factors up to sqrt(t), updating t as we go + while i * i <= t: + if t % i == 0: + b = 0 + # count how many times i divides t + while t % i == 0: + b += 1 + t //= i + # incorporate factor i with exponent b into ans + ans //= i + ans *= (b * i - b + i) + i += 1 + + # if there's any prime > sqrt(n) left + if t > 1: + ans //= t + ans *= (t + t - 1) + + return ans + self.parameter["reference_answer"] = f(N) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sum_lcm/__init__.py b/server/Gym/environments/sum_lcm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8f34d7ce8c448abdeb5e0354ae5b15af215fdaa7 --- /dev/null +++ b/server/Gym/environments/sum_lcm/__init__.py @@ -0,0 +1 @@ +from .environment import SumLCM_Environment diff --git a/server/Gym/environments/sum_lcm/environment.py b/server/Gym/environments/sum_lcm/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d057e2f8ef808f18919a966d9dc5d63fe8c86542 --- /dev/null +++ b/server/Gym/environments/sum_lcm/environment.py @@ -0,0 +1,110 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SumLCM_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1829 + prompt_template = \ +r"""Please compute sum(LCM(i, j)) for all pairs (i, j) such that 1 ≤ i ≤ {N} and 1 ≤ j ≤ {M}. Here, LCM(i, j) denotes the **least common multiple** of integers i and j. + +**Output Format:** Your final answer should be a single integer — the sum of LCM(i, j) over all such pairs.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SumLCM_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N_M) + M = self.parameter["M"] = random.randint(2, MAX_N_M) + + + max_rep = max(N, M) + + mu = [0] * (max_rep + 1) + pref = [0] * (max_rep + 1) + mu[1] = 1 + primes = [] + vis = bytearray(max_rep + 1) + + for i in range(2, max_rep + 1) : + if not vis[i] : + primes.append(i) + mu[i] = -1 + for p in primes : + ip = i * p + if ip > max_rep : + break + vis[ip] = 1 + if i % p == 0 : + mu[ip] = 0 + break + mu[ip] = -mu[i] + + for i in range(1, max_rep + 1) : + pref[i] = pref[i - 1] + mu[i] * i * i + + def tri(t : int) -> int : + return (1 + t) * t // 2 + + ans = 0 + for d in range(1, max_rep + 1) : + nx, ny = N // d, M // d + limit = nx if nx < ny else ny + l = 1 + subtotal = 0 + while l <= limit : + r = min(nx // (nx // l), ny // (ny // l)) + mu_segment = pref[r] - pref[l - 1] + sx = tri(nx // l) + sy = tri(ny // l) + subtotal = subtotal + mu_segment * sx * sy + l = r + 1 + ans = ans + subtotal * d + + self.parameter["reference_answer"] = ans + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sum_manhattan_curved_surface/__init__.py b/server/Gym/environments/sum_manhattan_curved_surface/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a89c57b34a47f88892b5a72d2492ee526f809ba3 --- /dev/null +++ b/server/Gym/environments/sum_manhattan_curved_surface/__init__.py @@ -0,0 +1 @@ +from .environment import SumManhattan_CurvedSurface_Environment diff --git a/server/Gym/environments/sum_manhattan_curved_surface/environment.py b/server/Gym/environments/sum_manhattan_curved_surface/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..10f7f7a16ee8322d1ccae6dcbe4b3fe5357a0580 --- /dev/null +++ b/server/Gym/environments/sum_manhattan_curved_surface/environment.py @@ -0,0 +1,127 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SumManhattan_CurvedSurface_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3636 + prompt_template = r"""Define P(k) as the sum of (|x| + |y| + |z|)^2 over all integer triples (x, y, z) such that x × y × z = k. Compute the sum of P(k) for all integers k in the range [{A}, {B}] (inclusive).""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SumManhattan_CurvedSurface_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_A_B" in self.parameter, "MAX_A_B is required in parameter" + MAX_A_B = self.parameter["MAX_A_B"] + assert MAX_A_B >= 1, "MAX_A_B should be greater than or equal to 1" + + A = self.parameter["A"] = random.randint(1, MAX_A_B) + B = self.parameter["B"] = random.randint(A, MAX_A_B) + + + def funa(l: int, r: int) -> int: + """Sum of i for i in [l..r], mod mo.""" + cnt = r - l + 1 + return (l + r) * cnt // 2 + + def ready(x: int) -> int: + """Sum of i^2 for i in [1..x], mod mo.""" + return x * (x + 1) * (2 * x + 1) // 6 + + def funb(l: int, r: int) -> int: + """Sum of i^2 for i in [l..r], mod mo.""" + return ready(r) - ready(l - 1) + + def work2(n: int): + """ + Compute the three helper sums for a given n: + ans1 = sum_{i=1..n} floor(n/i) + ans2 = sum_{i=1..n} [ sum_{j=1..i} j + i * sum_{j=1..floor(n/i)} j ] + ans3 = sum_{i=1..n} [ sum_{j=1..i} j^2 + i * sum_{j=1..floor(n/i)} j^2 + 2 * (sum_{j=1..i} j) * (sum_{k=1..floor(n/i)} k) ] + All mod mo. + Uses divisor grouping to run in ~O(sqrt(n)). + """ + ans1 = ans2 = ans3 = 0 + l = 1 + while l <= n: + d = n // l + r = n // d + cnt = r - l + 1 + + # accumulate contributions + ans1 += cnt * d + ans2 += funa(l, r) * d + cnt * funa(1, d) + ans3 += funb(l, r) * d + cnt * funb(1, d) + 2 * funa(l, r) * funa(1, d) + + l = r + 1 + + return ans1, ans2, ans3 + + def work(n: int) -> int: + """ + Compute the cumulative beauty sum S(n) = sum_{k=1..n} P(k)/4 (mod mo), + where P(k) is the squared-Manhattan-distance sum on xyz=k. + The final answer is 4*(S(b) - S(a-1)) mod mo. + """ + ans = 0 + l = 1 + while l <= n: + d = n // l + r = n // d + cnt = r - l + 1 + + a1, a2, a3 = work2(d) + ans += funb(l, r) * a1 + funa(l, r) * 2 * a2 + cnt * a3 + + l = r + 1 + + return ans + + result = work(B) - work(A - 1) + result = result * 4 + assert result > 0, "Result should be positive" + self.parameter["reference_answer"] = result + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(A = self.parameter["A"], B = self.parameter["B"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sum_mod/__init__.py b/server/Gym/environments/sum_mod/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..da1aa509de00569392496dc304333cf0ab5b2c8b --- /dev/null +++ b/server/Gym/environments/sum_mod/__init__.py @@ -0,0 +1 @@ +from .environment import SumMOD_Environment diff --git a/server/Gym/environments/sum_mod/environment.py b/server/Gym/environments/sum_mod/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..2eb4aa241f986165d912f8dd0e4e450a7a65ccc7 --- /dev/null +++ b/server/Gym/environments/sum_mod/environment.py @@ -0,0 +1,108 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SumMOD_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2260 + prompt_template = \ +r"""Please compute the sum of ({N} mod i) × ({M} mod j) over all pairs of integers (i, j) such that: +- 1 ≤ i ≤ {N} +- 1 ≤ j ≤ {M} +- i ≠ j + +**Output Format:** Your final answer should be a single integer — the sum of all computed values.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SumMOD_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 3, "MAX_N_M should be greater than or equal to 3" + + N = self.parameter["N"] = random.randint(3, MAX_N_M) + M = self.parameter["M"] = random.randint(3, MAX_N_M) + + + def sum1(l : int, r : int) -> int : + return (l + r) * (r - l + 1) // 2 + + def sum2(x : int) -> int : + return x * (x + 1) * (2 * x + 1) // 6 + + def calc(n : int) -> int : + res, l = 0, 1 + while l <= n : + q = n // l + r = n // q + res += n * (r - l + 1) - sum1(l, r) * q + l = r + 1 + return res + + def solve(n : int, m : int) -> int : + if n > m : + n, m = m, n + + ans = calc(n) * calc(m) + + l = 1 + while l <= n : + nd, md = n // l, m // l + r = min(n // nd, m // md) + + cnt = r - l + 1 + SUM = n * m * cnt + Sum = nd * md * (sum2(r) - sum2(l - 1)) + SUMK = (nd * m + md * n) * sum1(l, r) + ans -= (SUM + Sum - SUMK) + l = r + 1 + + return ans + + self.parameter["reference_answer"] = solve(N, M) + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * (processed_result == 0) + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sum_phi_interval/__init__.py b/server/Gym/environments/sum_phi_interval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2b2c401c66cc4707d4e9d2a38b5a84883b43e58b --- /dev/null +++ b/server/Gym/environments/sum_phi_interval/__init__.py @@ -0,0 +1 @@ +from .environment import SumPHIInterval_Environment diff --git a/server/Gym/environments/sum_phi_interval/environment.py b/server/Gym/environments/sum_phi_interval/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f5316c6f93eabaf738d53a35c502044d0f72d1cd --- /dev/null +++ b/server/Gym/environments/sum_phi_interval/environment.py @@ -0,0 +1,110 @@ +import math +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SumPHIInterval_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3601 + prompt_template = r"""Define F(x) as the number of integers in the range [1, x] that are **not coprime** to x. Please output the sum of F(i) for all integers i in the range [{L}, {R}] (inclusive).""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SumPHIInterval_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_DELTA" in self.parameter, "MAX_DELTA is required in parameter" + MAX_DELTA = self.parameter["MAX_DELTA"] + assert MAX_DELTA >= 1, "MAX_DELTA should be greater than or equal to 1" + + L = self.parameter["L"] = random.randint(1, MAX_DELTA ** 2) + R = self.parameter["R"] = L + random.randint(1, MAX_DELTA) + + + # 1. generate all primes up to sqrt(R) + limit = math.isqrt(R) + is_prime = [True] * (limit + 1) + primes = [] + for i in range(2, limit + 1): + if is_prime[i]: + primes.append(i) + if i * i <= limit: + for j in range(i * i, limit + 1, i): + is_prime[j] = False + + # 2. prepare A and B arrays for [L..R] + size = R - L + 1 + A = [L + i for i in range(size)] # will become φ(L+i) + B = [L + i for i in range(size)] # copy to strip prime factors + + # 3. for each small prime p, apply the φ‐factor and strip p from B + for p in primes: + if p * p > R: + break + # first multiple of p in [L..R] + start = ((L + p - 1) // p) * p + for x in range(start, R + 1, p): + idx = x - L + # multiply φ‐part: φ(n) *= (1 - 1/p) + A[idx] //= p + A[idx] *= (p - 1) + # remove ALL factors of p from B[idx] + while B[idx] % p == 0: + B[idx] //= p + + # 4. any B[idx] > 1 is a leftover prime > sqrt(R) + ans = 0 + for i in range(size): + if B[i] > 1: + # apply its φ‐factor + A[i] //= B[i] + A[i] *= (B[i] - 1) + # qiandao(L+i) = (L+i) - φ(L+i) + ans += (L + i) - A[i] + + self.parameter["reference_answer"] = ans + assert ans > 0, "The reference answer should be greater than 0" + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(L = self.parameter["L"], R = self.parameter["R"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sum_product_divisor_num/__init__.py b/server/Gym/environments/sum_product_divisor_num/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8a8e01f8c7b9a384f0bd772ea1e6e1f5c3256aa8 --- /dev/null +++ b/server/Gym/environments/sum_product_divisor_num/__init__.py @@ -0,0 +1 @@ +from .environment import SumProductDivisorNum_Environment diff --git a/server/Gym/environments/sum_product_divisor_num/environment.py b/server/Gym/environments/sum_product_divisor_num/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9c544975dddff4308a386e7bb25fcd63ed551d70 --- /dev/null +++ b/server/Gym/environments/sum_product_divisor_num/environment.py @@ -0,0 +1,134 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SumProductDivisorNum_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3327 + prompt_template = \ +r"""Please compute sum(d(i * j)) for all pairs (i, j) such that 1 ≤ i ≤ {N} and 1 ≤ j ≤ {M}. Here, d(x) denotes the **number of distinct divisors** of integer x, and d(i * j) is the number of divisors of the product of i and j. + +**Output Format:** Your final answer should be a single integer — the sum of d(i * j) over all such pairs.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SumProductDivisorNum_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N_M) + M = self.parameter["M"] = random.randint(2, MAX_N_M) + + + def precompute(max_val: int): + """ + Pre-computes + • mu_pref[x] – Σ_{k=1..x} μ(k) (Möbius prefix sum, 0-indexed) + • s[x] – Σ_{k=1..x} ⌊x/k⌋ (harmonic-sum helper), 0-indexed + Both lists have length max_val + 1 so that index == argument. + """ + # -------- linear sieve for Möbius ----------------- + mu = [0] * (max_val + 1) # μ itself; will turn into prefix sum + mu[1] = 1 + is_composite = [False] * (max_val + 1) + primes = [] + + for i in range(2, max_val + 1): + if not is_composite[i]: # i is prime + primes.append(i) + mu[i] = -1 + for p in primes: + ip = i * p + if ip > max_val: + break + is_composite[ip] = True + if i % p == 0: # p divides i → μ(ip) = 0 + mu[ip] = 0 + break + mu[ip] = -mu[i] + + # turn μ into its prefix sum in-place + for i in range(1, max_val + 1): + mu[i] += mu[i - 1] + + # -------- pre-compute s[x] = Σ_{k=1..x} ⌊x/k⌋ ----- + s = [0] * (max_val + 1) + for x in range(1, max_val + 1): + res = 0 + i = 1 + # harmonic-series blocking: next j s.t. ⌊x/i⌋ is constant on [i,j] + while i <= x: + j = x // (x // i) # largest j with ⌊x/i⌋ constant + res += (j - i + 1) * (x // i) + i = j + 1 + s[x] = res + + return mu, s + + + def solve_case(N: int, M: int, mu_pref, s): + """ + Computes Σ_{i=1..N} Σ_{j=1..M} d(i j) in O(√(min(N,M))) using the + Möbius inversion trick exactly as in the reference C++. + N ≤ M must hold when called. + """ + ans = 0 + i = 1 + while i <= N: + j = min(N // (N // i), M // (M // i)) + ans += (mu_pref[j] - mu_pref[i - 1]) * s[N // i] * s[M // i] + i = j + 1 + return ans + + # one-shot pre-computation up to the largest N, M + mu_pref, s = precompute(max(N, M)) + + if N > M: # ensure N ≤ M as in the C++ optimisation + N, M = M, N + self.parameter["reference_answer"] = solve_case(N, M, mu_pref, s) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sum_pseudo_euclidean/__init__.py b/server/Gym/environments/sum_pseudo_euclidean/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f50de1515edf4cdbd32d99faf7cd4ea73bf2690c --- /dev/null +++ b/server/Gym/environments/sum_pseudo_euclidean/__init__.py @@ -0,0 +1 @@ +from .environment import SumPseudoEuclidean_Environment diff --git a/server/Gym/environments/sum_pseudo_euclidean/environment.py b/server/Gym/environments/sum_pseudo_euclidean/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d87f8c57fff70aa47169eabda1d1f7f932f2fa93 --- /dev/null +++ b/server/Gym/environments/sum_pseudo_euclidean/environment.py @@ -0,0 +1,112 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + +class SumPseudoEuclidean_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3764 + prompt_template = \ +r"""Consider the function `f(a, b)` defined in Python as follows: +```python +def f(a: int, b: int) -> int: + if a == b: + return 0 + if a > b: + return f(a - b, b + b) + 1 + else: + return f(a + a, b - a) + 1 +``` + +If the function enters an infinite loop, we treat its return value as `0`. Tell me the sum of `f(i, j)` over all pairs (i, j) such that 1 ≤ i ≤ {N} and 1 ≤ j ≤ {N}.""" + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SumPseudoEuclidean_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 5, "MAX_N should be greater than or equal to 5" + + N = self.parameter["N"] = random.randint(5, MAX_N) + + + def solve(N): + # Count of odd numbers in [x, y] + def count_odds(x, y): + length = y - x + 1 + # If the interval length is odd and starts with an odd number, we get one extra odd + if (length & 1) and (x & 1): + return (length >> 1) + 1 + else: + return length >> 1 + + # “Logarithmic” number‐theory block over [l..k] + def block_sum(l, k, N): + total = 0 + while l <= k: + # floor(log2(l)) + lg = l.bit_length() - 1 + # r = min((2^(lg+1) - 1), k) + r = min((1 << (lg + 1)) - 1, k) + # contribution: lg * (N//l) times number of odds in [l..r] + total += lg * (N // l) * count_odds(l, r) + l = r + 1 + return total + + ans = 0 + l = 1 + # Standard divisor‐block decomposition over 1..N + while l <= N: + v = N // l + r = N // v + ans += block_sum(l, r, N) + l = r + 1 + + # multiply by 2 as in the original C++ (ans << 1) + return ans * 2 + + self.parameter["reference_answer"] = solve(N) + assert self.parameter["reference_answer"] > 0, "Reference answer should be greater than 0" + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sum_set_multiplication/__init__.py b/server/Gym/environments/sum_set_multiplication/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..badae8d77db3273c10a74644c7dca0342c316a22 --- /dev/null +++ b/server/Gym/environments/sum_set_multiplication/__init__.py @@ -0,0 +1 @@ +from .environment import SumSetMultiplication_Environment diff --git a/server/Gym/environments/sum_set_multiplication/environment.py b/server/Gym/environments/sum_set_multiplication/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..135522f22ecc0884656d310a0c8d694081759888 --- /dev/null +++ b/server/Gym/environments/sum_set_multiplication/environment.py @@ -0,0 +1,109 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SumSetMultiplication_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4463 + prompt_template = r"""Consider all sequences A[1..{N}] of **distinct integers** chosen from [1, {K}]. Compute the sum of (A[1] × A[2] × ... × A[{N}]) over all such sequences, modulo {MOD}.""" + MODs = (666623333, 998244353, 10 ** 9 + 7) + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the SumSetMultiplication_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 3, "MAX_N should be greater than or equal to 3" + + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K > MAX_N, "MAX_K should be greater than MAX_N" + + N = self.parameter["N"] = random.randint(3, MAX_N) + K = self.parameter["K"] = random.randint(N + 1, MAX_K) + MOD = self.parameter["MOD"] = random.choice(self.MODs) + + + # dynamic sizing based on N + size = 2 * N + 3 # to safely index up to 2N+1 and use i+1 at i=2N + F = [0] * size + C = [0] * size + + def mod_pow(a, b): + a %= MOD + res = 1 + while b: + if b & 1: + res = (res * a) % MOD + a = (a * a) % MOD + b >>= 1 + return res + + INX = K if (2 * N + 1) > K else (2 * N + 1) + C[INX] = 1 + F[0] = 1 + + for i in range(1, N + 1): + for j in range(2 * i, 1, -1): + F[j] = (F[j - 1] * j + F[j - 2] * (2 * i - j)) % MOD + F[1] = F[0] + F[0] = 0 + + if INX == 2 * N + 1: + for i in range(1, 2 * N + 1): + C[INX] = (C[INX] * ((K - i) % MOD)) % MOD + C[INX] = (C[INX] * mod_pow(i % MOD, MOD - 2)) % MOD + + for i in range(INX - 1, -1, -1): + numerator = (K + 2 * N - i) % MOD + denom = (K - i) % MOD + C[i] = C[i + 1] * numerator % MOD * mod_pow(denom, MOD - 2) % MOD + + ans = 0 + for i in range(0, 2 * N + 1): + ans = (ans + C[i] * F[i]) % MOD + for i in range(1, N + 1): + ans = ans * i % MOD + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], K = self.parameter["K"], MOD = self.parameter["MOD"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sum_spanning_tree_gcd/__init__.py b/server/Gym/environments/sum_spanning_tree_gcd/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..45dd7e67f6d4d74cd4232b2bdb7d4ca218a7fbc8 --- /dev/null +++ b/server/Gym/environments/sum_spanning_tree_gcd/__init__.py @@ -0,0 +1 @@ +from .environment import SumSpanningTreeGCD_Environment diff --git a/server/Gym/environments/sum_spanning_tree_gcd/environment.py b/server/Gym/environments/sum_spanning_tree_gcd/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..a6022057bc93e8f5f6064f242005daf2bc7740bd --- /dev/null +++ b/server/Gym/environments/sum_spanning_tree_gcd/environment.py @@ -0,0 +1,225 @@ +import math +import random +from typing import Optional +from collections import Counter +from collections import defaultdict +from ...environment import VerifiableEnvironment + + +class SumSpanningTreeGCD_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `1` to `{N}`. The graph contains the following undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning an undirected edge **connecting vertex u to vertex v with weight w**: +{edges} + +Consider a subset of edges `T = [(u_1, v_1, w_1), (u_2, v_2, w_2), ..., (u_k, v_k, w_k)]` such that: +- k = {N_minus_1} (i.e., you select exactly {N_minus_1} edges), +- The selected edges form a **spanning tree** — that is, they connect all {N} vertices without forming any cycles, +- The value of this spanning tree is defined as the **greatest common divisor (GCD)** of the weights of the edges in `T`, i.e., `gcd(w_1, w_2, ..., w_k)`. + +What is **the sum value** of all such spanning trees modulo {MOD}?""" + MODs = (666623333, 998244353, 10 ** 9 + 7) + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the SumSpanningTreeGCD_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = [] + + common_d = random.randint(1, N) + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u + 1, v + 1, common_d * random.randint(1, N))) + + num_edges = int(edge_density * N * (N - 1) / 2) + if len(edges) < num_edges : + remaining_edges = list(set((u, v) for u in range(1, N + 1) for v in range(u + 1, N + 1)) - set((u, v) for u, v, w in edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + for u, v in remaining_edges : + edges.append((u, v, random.randint(1, N * N))) + random.shuffle(edges) + + for u, v, w in edges : + assert 1 <= u < v <= N + assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique" + + MOD = self.parameter["MOD"] = random.choice(self.MODs) + + + weight_counts = Counter() + edges = [] + for u, v, w in self.parameter["edges"] : + edges.append((u-1, v-1, w)) + weight_counts[w] += 1 + + # 2) Precompute small primes for trial division up to sqrt(max_w) + max_w = max(weight_counts) if weight_counts else 0 + limit = int(math.isqrt(max_w)) + 1 + sieve = [True] * (limit+1) + primes = [] + for i in range(2, limit+1): + if sieve[i]: + primes.append(i) + for j in range(i*i, limit+1, i): + sieve[j] = False + + # 3) Build S[d] = number of edges whose weight is divisible by d, + # and phi_map[d] = φ(d) for all divisors d that appear. + S = defaultdict(int) + phi_map = {} + + def gen_divisors(idx, cur_d, cur_phi, factors, cnt): + """Recursively generate all divisors of a weight w and accumulate S, phi_map.""" + if idx == len(factors): + S[cur_d] += cnt + if cur_d not in phi_map: + phi_map[cur_d] = cur_phi + return + p, e = factors[idx] + # exponent = 0 + gen_divisors(idx+1, cur_d, cur_phi, factors, cnt) + # exponents 1..e + p_pow = 1 + for k in range(1, e+1): + p_pow *= p + # φ(p^k) = p^k - p^(k-1) + factor = p_pow - (p_pow // p) + gen_divisors(idx+1, cur_d * p_pow, cur_phi * factor, factors, cnt) + + for w, cnt in weight_counts.items(): + # factor w into primes + x = w + factors = [] + for p in primes: + if p*p > x: + break + if x % p == 0: + e = 0 + while x % p == 0: + x //= p + e += 1 + factors.append((p, e)) + if x > 1: + factors.append((x, 1)) + # generate its divisors + gen_divisors(0, 1, 1, factors, cnt) + + # 4) Collect all d for which we have at least N-1 edges divisible by d + candidates = [d for d, cnt in S.items() if cnt >= N-1] + candidates.sort() + + # 5) Define a function to compute the number of spanning trees + # in the subgraph of edges whose weight divides d, via Kirchhoff + Gauss. + def solve_for_d(d): + dim = N - 1 + # build the (N-1)x(N-1) Laplacian minor + G = [[0]*dim for _ in range(dim)] + for u, v, w in edges: + if w % d != 0 or u == v: + continue + # only update if endpoint != the excluded node (index N-1) + if u < dim and v < dim: + G[u][u] += 1 + G[v][v] += 1 + G[u][v] -= 1 + G[v][u] -= 1 + elif u < dim: + G[u][u] += 1 + elif v < dim: + G[v][v] += 1 + # reduce modulo + for i in range(dim): + for j in range(dim): + G[i][j] %= MOD + + # Gaussian elimination to compute determinant MOD + det = 1 + for i in range(dim): + # pivot if needed + if G[i][i] == 0: + for j in range(i+1, dim): + if G[j][i]: + G[i], G[j] = G[j], G[i] + det = -det % MOD + break + else: + return 0 + ai = G[i][i] + det = det * ai % MOD + inv = pow(ai, MOD-2, MOD) + # eliminate below + for j in range(i+1, dim): + if G[j][i]: + factor = G[j][i] * inv % MOD + row_i = G[i] + row_j = G[j] + for k in range(i, dim): + row_j[k] = (row_j[k] - factor * row_i[k]) % MOD + return det + + # 6) Sum up φ(d) * (# of trees using only edges ≡ 0 MOD d) + ans = 0 + for d in candidates: + ans = (ans + phi_map[d] * solve_for_d(d)) % MOD + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + MOD = self.parameter["MOD"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sum_triangle_area/__init__.py b/server/Gym/environments/sum_triangle_area/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..da7799e0ab3a15ca401716f5f9650ea2d62e5e2b --- /dev/null +++ b/server/Gym/environments/sum_triangle_area/__init__.py @@ -0,0 +1 @@ +from .environment import SumTriangleArea_Environment diff --git a/server/Gym/environments/sum_triangle_area/environment.py b/server/Gym/environments/sum_triangle_area/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..0afcdb8cd969274f60e98309863e4092f3e83927 --- /dev/null +++ b/server/Gym/environments/sum_triangle_area/environment.py @@ -0,0 +1,98 @@ +import random +import functools +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SumTriangleArea_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3476 + prompt_template = \ +r"""There are {N} points in a 2D plane, each represented by its coordinates (x, y). The points are given as follows: +{points} + +Please compute the **sum of the areas of all triangles** that can be formed by any three distinct points in this set. If a triangle is degenerate (i.e., the three points are collinear), its area is considered 0. **Output the total area multiplied by 2** (i.e., twice the sum of all triangle areas), which will always be an integer (think about why this is the case).""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SumTriangleArea_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + points = self.parameter["points"] = random.sample([(x, y) for x in range(0, N + 1) for y in range(0, N + 1)], N) + + + A = sorted(points, key=lambda p: (p[0], p[1])) + + ans = 0 + for i in range(N): + xi, yi = A[i] + # build vectors from A[i] to all later points + s = [(x - xi, y - yi) for x, y in A[i+1:]] + # sort by polar angle around the origin using cross-product comparator + s.sort(key=functools.cmp_to_key( + lambda a, b: -1 if a[1]*b[0] < a[0]*b[1] + else (1 if a[1]*b[0] > a[0]*b[1] else 0) + )) + + m = len(s) + # build suffix sums of x- and y-components + sx = [0] * (m + 1) + sy = [0] * (m + 1) + for j in range(m - 1, -1, -1): + sx[j] = sx[j+1] + s[j][0] + sy[j] = sy[j+1] + s[j][1] + # accumulate cross-products to sum triangle areas (twice the area) + ans += s[j][0] * sy[j+1] - s[j][1] * sx[j+1] + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + points = "\n".join("({}, {})".format(x, y) for x, y in self.parameter["points"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if processed_result == 0 : + return self.rewards["rewarding_weight"] * int(self.parameter["reference_answer"] == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/sum_xor_divisor_num/__init__.py b/server/Gym/environments/sum_xor_divisor_num/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3ef5deca955ea6e16dda12701df552eccf64c493 --- /dev/null +++ b/server/Gym/environments/sum_xor_divisor_num/__init__.py @@ -0,0 +1 @@ +from .environment import SumXorDivisorNum_Environment diff --git a/server/Gym/environments/sum_xor_divisor_num/environment.py b/server/Gym/environments/sum_xor_divisor_num/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..9d02adec9f84995462bcf515b2c3afe998f6302c --- /dev/null +++ b/server/Gym/environments/sum_xor_divisor_num/environment.py @@ -0,0 +1,146 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class SumXorDivisorNum_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3791 + prompt_template = r"""Let d(n) denote the number of positive divisors of n (with d(0) = 0). What is the sum of d(i XOR j XOR {X}) (XOR means bitwise XOR) over all integer pairs (i, j) such that 0 ≤ i ≤ {N} and 0 ≤ j ≤ {M}?""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the SumXorDivisorNum_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 3, "MAX_N_M should be greater than or equal to 3" + + N = self.parameter["N"] = random.randint(3, MAX_N_M) + M = self.parameter["M"] = random.randint(3, MAX_N_M) + X = self.parameter["X"] = random.randint(0, MAX_N_M) + + + A = N + 1 + B = M + 1 + + # Build bit‐arrays (LSB first) of lengths exactly what we need + a = [] + while A: + a.append(A & 1) + A >>= 1 + + b = [] + while B: + b.append(B & 1) + B >>= 1 + + x = [] + while X: + x.append(X & 1) + X >>= 1 + + # Pad all to the same length + L = max(len(a), len(b), len(x)) + a += [0] * (L - len(a)) + b += [0] * (L - len(b)) + x += [0] * (L - len(x)) + + # h[i] = integer value of bits (a⊕b⊕x) from position i..L-1 + h = [0] * (L + 1) + for i in range(L - 1, -1, -1): + h[i] = h[i + 1] + ((a[i] ^ b[i] ^ x[i]) << i) + + # mi[k] = 2^k mod (we only need up to L-1) + mi = [1] * L + for i in range(1, L): + mi[i] = mi[i - 1] * 2 + + # Cache for the divisor‐summatory function + sd = {} + + def D(val): + """Return sum_{k=1}^val d(k) mod, where d(k)=#divisors of k. d(0)=0.""" + if val <= 0: + return 0 + if val in sd: + return sd[val] + res = 0 + l = 1 + # Standard sqrt‐decomposition trick to compute sum_{i=1..val} floor(val/i) + while l <= val: + t = val // l + r = val // t + cnt = r - l + 1 + res += cnt * t + l = r + 1 + sd[val] = res + return res + + # Main double loop over set bits in a[] and b[] + ans = 0 + for i in range(L): + if a[i] == 0: + continue + for j in range(L): + if b[j] == 0: + continue + s = max(i, j) + t = min(i, j) + + # H = h[s] with the s-th bit of the XOR flipped once, + # then flipped again if i==j (to undo double‐count) + H = h[s] ^ (1 << s) + if i == j: + H ^= (1 << s) + + # We want sum_{v=H .. H + 2^s - 1} d(v) + val = D(H + (1 << s) - 1) - D(H - 1) + ans += val * mi[t] + + assert ans > 0, "The answer should be greater than 0" + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], M = self.parameter["M"], X = self.parameter["X"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/survo_puzzle/__init__.py b/server/Gym/environments/survo_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f51a92b1a1dd2380e9f719311e5371f2dda4a251 --- /dev/null +++ b/server/Gym/environments/survo_puzzle/__init__.py @@ -0,0 +1 @@ +from .environment import SurvoPuzzle_Environment diff --git a/server/Gym/environments/survo_puzzle/environment.py b/server/Gym/environments/survo_puzzle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..753a7d5a26a271aad2eff4c76444de4a88685b84 --- /dev/null +++ b/server/Gym/environments/survo_puzzle/environment.py @@ -0,0 +1,118 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class SurvoPuzzle_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} matrix with some cells filled with numbers from `0` to `{NM_minus_1}`, and some cells empty (represented by `-1`). Please fill the empty cells with numbers from `0` to `{NM_minus_1}` such that: +1. Each number from `0` to `{NM_minus_1}` appears **exactly once** in the matrix. +2. The sum of each row (from top to bottom) is: {row_sums} +3. The sum of each column (from left to right) is: {col_sums} + +The matrix is given as follows: +{matrix} + +**Output Format:** Your final answer should contain {N} lines, each with {M} numbers, separated by spaces. The numbers should represent the completed matrix in **row-major order**, matching the format of the given input.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the SurvoPuzzle_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N_M) + M = self.parameter["M"] = random.randint(2, MAX_N_M) + + permutation = list(range(N * M)) + random.shuffle(permutation) + + matrix = self.parameter["matrix"] = [[permutation[i * M + j] for j in range(M)] for i in range(N)] + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, row)) for row in matrix) + self.parameter["row_sums"] = [sum(row) for row in matrix] + self.parameter["col_sums"] = [sum(matrix[i][j] for i in range(N)) for j in range(M)] + + assert "sparsity" in self.parameter, "sparsity is required in parameter" + sparsity = self.parameter["sparsity"] + assert 0 < sparsity < 1, "sparsity should be between 0 and 1" + empty_cells = random.sample(range(N * M), max(1, int(N * M * sparsity))) + for cell in empty_cells : + row, column = divmod(cell, M) + matrix[row][column] = -1 + + + def _prompt_generate(self) -> str : + N, M = self.parameter["N"], self.parameter["M"] + return self.prompt_template.format( + N = N, + M = M, + NM_minus_1 = N * M - 1, + matrix = "\n".join(" ".join(map(str, row)) for row in self.parameter["matrix"]), + row_sums = " ".join(map(str, self.parameter["row_sums"])), + col_sums = " ".join(map(str, self.parameter["col_sums"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + matrix = [] + for line in answer.splitlines() : + line = line.strip() + if line : + matrix.append(list(map(int, line.split()))) + return matrix + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N, M = self.parameter["N"], self.parameter["M"] + solution = processed_result + + if len(solution) != N or any(len(row) != M for row in solution) : + return self.rewards["wrong_format"] + + if set(value for row in solution for value in row) != set(range(N * M)) : + return self.rewards["invalid_solution"] + if any(original_value != -1 and original_value != solution_value for original_row, solution_row in zip(self.parameter["matrix"], solution) for original_value, solution_value in zip(original_row, solution_row)) : + return self.rewards["invalid_solution"] + + row_sums = [sum(row) for row in solution] + col_sums = [sum(solution[i][j] for i in range(N)) for j in range(M)] + + satisfied = sum(int(answer == gold) for answer, gold in zip(row_sums, self.parameter["row_sums"])) + \ + sum(int(answer == gold) for answer, gold in zip(col_sums, self.parameter["col_sums"])) + assert satisfied <= N + M, "satisfied should not exceed N + M" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / (N + M)) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == (N + M)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/taking_prime_game/__init__.py b/server/Gym/environments/taking_prime_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..933e1976b3a133dbd926e629086a2ac02a7854d4 --- /dev/null +++ b/server/Gym/environments/taking_prime_game/__init__.py @@ -0,0 +1 @@ +from .environment import TakingPrimeGame_Environment diff --git a/server/Gym/environments/taking_prime_game/environment.py b/server/Gym/environments/taking_prime_game/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..b680290b247715f494f379650684698b1436d2ae --- /dev/null +++ b/server/Gym/environments/taking_prime_game/environment.py @@ -0,0 +1,107 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class TakingPrimeGame_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1857 + prompt_template = \ +r"""There are {N} stones in a pile and two players: Stan and his opponent. On each turn, a player may remove any **prime number** of stones from the pile. A player who cannot make a move **loses** the game. + +Stan goes first. Both players play **optimally**: +- If a player is guaranteed to win, they will try to win in the **minimum number of moves** possible. +- If a player is guaranteed to lose, they will try to **delay the loss** as much as possible. + +**Output Format:** +Your final answer should be a single integer: +- The **total number of moves** (both players’) until Stan wins (if he must win), or +- `-1` (if he must lose). +Do **NOT** include quotes or backticks.""" + + def __init__(self, + lose_probability : float = 0.2, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the TakingPrimeGame_Environment instance. + """ + super().__init__(**kwargs) + + self.lose_probability = lose_probability + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 1, "MAX_N should be greater than or equal to 1" + + + is_prime = [True] * (MAX_N + 1) + if MAX_N >= 0 : + is_prime[0] = False + if MAX_N >= 1 : + is_prime[1] = False + primes = [] + for i in range(2, MAX_N + 1) : + if is_prime[i] : + primes.append(i) + for j in range(i * i, MAX_N + 1, i) : + is_prime[j] = False + + win = [False] * (MAX_N + 1) + dp_moves = [0] * (MAX_N + 1) + + for i in range(2, MAX_N + 1) : + min_moves = (MAX_N + 1) * 100 + max_moves = 0 + has_winning_move = False + for p in primes : + if p > i : + break + if not win[i - p] : + has_winning_move = True + min_moves = min(min_moves, dp_moves[i - p] + 1) + else : + max_moves = max(max_moves, dp_moves[i - p] + 1) + if has_winning_move : + win[i] = True + dp_moves[i] = min_moves + else : + win[i] = False + dp_moves[i] = max_moves + + lose = random.random() < self.lose_probability + while True : + N = self.parameter["N"] = random.randint(1, MAX_N) + if win[N] != lose : + break + self.parameter["reference_answer"] = dp_moves[N] if win[N] else -1 + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/task_arrangement/__init__.py b/server/Gym/environments/task_arrangement/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7f9b535c550042e7b360b399955192abe63f3cca --- /dev/null +++ b/server/Gym/environments/task_arrangement/__init__.py @@ -0,0 +1 @@ +from .environment import TaskArrangement_Environment diff --git a/server/Gym/environments/task_arrangement/environment.py b/server/Gym/environments/task_arrangement/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..d81dc184f1315f8b2428d066289fa7da90639048 --- /dev/null +++ b/server/Gym/environments/task_arrangement/environment.py @@ -0,0 +1,184 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class TaskArrangement_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2365 + prompt_template = \ +r"""You are given {N} tasks, numbered from 1 to {N}. Each task i (1 <= i <= {N}) takes T[i] units of time to complete individually and has a cost coefficient F[i]. The values are given as: +{T_and_F} + +You may divide these tasks (in order) into any number of **consecutive batches**. Let the total number of batches be k (k >= 1), and let end[1], end[2], ..., end[k] (1 <= end[1] < end[2] < ... < end[k] = {N}) denote the last task index in each batch. +- This means: + + Batch 1 contains tasks 1 to end[1] + + Batch 2 contains tasks end[1] + 1 to end[2] + + ... + + Batch k contains tasks end[k - 1] + 1 to end[k] (with end[k] = {N}) + +- Before starting each batch, the machine must spend an additional {S} units of startup time. +- The time to **complete** a batch is the sum of T[i] for all tasks in that batch. +- Therefore, the **total completion time** of each task in a batch is the sum of the batch's startup time ({S}) and the total time of all tasks in that batch. +- All tasks in a batch are considered to finish **simultaneously**, at the end of that batch. + +- Tasks are completed in the order defined by the batch division. +- The cost of each task is equal to **the time when its batch finishes (after all previous batches, if any, have completed and the current batch has been processed), multiplied by F[i]**. +- The **total cost** is the sum of the costs of all tasks. + +Try your best to find a batch division (end[1], end[2], ..., end[k]) that **minimizes the total cost**. + +**Output Format:** +Your final answer should be a single line containing end[1], end[2], ..., end[k] (with end[k] always equal to {N}), separated by **spaces**. +Example: `1 2 {N}` (do **NOT** include the backticks or quotes); this means: +- There are 3 batches, +- The first batch ends at task 1, +- The second batch ends at task 2, +- The last batch ends at task {N} and includes the remaining tasks. +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = +3.0, + **kwargs) : + """ + Initialize the TaskArrangement_Environment instance. + """ + + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + S = self.parameter["S"] = random.randint(0, N * 3) + T, F = [None] + [random.randint(1, N) for _ in range(N)], [None] + [random.randint(1, N) for _ in range(N)] + self.parameter["T"], self.parameter["F"] = T[1 :], F[1 :] + assert len(self.parameter["T"]) == N, "T should have length N" + assert len(self.parameter["F"]) == N, "F should have length N" + + + prefix_T = [0] * (N + 1) + for i in range(1, N + 1) : + prefix_T[i] = prefix_T[i - 1] + T[i] + def sum_T(l, r) : + return prefix_T[r] - prefix_T[l - 1] + + suffix_F = [0] * (N + 2) + suffix_F[N + 1] = 0 + for i in range(N, 0, -1) : + suffix_F[i] = suffix_F[i + 1] + F[i] + + prefix_F = [0] * (N + 1) + for i in range(1, N + 1) : + prefix_F[i] = prefix_F[i - 1] + F[i] + def sum_F(l, r) : + return prefix_F[r] - prefix_F[l - 1] + + dpF, dpG = [None] * (N + 1), [None] * (N + 1) + dpF[0] = 0 + for i in range(1, N + 1) : + for j in range(1, i + 1) : + val = dpF[j - 1] + (S + sum_T(j, i)) * suffix_F[j] + if dpF[i] is None or dpF[i] > val : + dpF[i] = val + dpG[i] = j + + ends = [] + now = N + while now : + ends.append(now) + now = dpG[now] - 1 + ends.reverse() + + answer, current_time, last = 0, 0, 0 + for end in ends : + current_time += S + sum_T(last + 1, end) + answer += current_time * sum_F(last + 1, end) + last = end + assert answer == dpF[N] + + self.parameter["reference_answer"] = " ".join(map(str, ends)) + self.parameter["reference_answer_cost"] = answer + assert answer > 0, "answer should be greater than 0" + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + S = self.parameter["S"], + T_and_F = "\n".join("T[{}]={} F[{}]={}".format(i, self.parameter["T"][i - 1], i, self.parameter["F"][i - 1]) for i in range(1, N + 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if not answer_array : + return None + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + N = self.parameter["N"] + + ends = processed_result + for i in range(len(ends)) : + if not (1 <= ends[i] <= N) : + return self.rewards["invalid_solution"] + if i and not (ends[i - 1] < ends[i]) : + return self.rewards["invalid_solution"] + if ends[-1] != self.parameter["N"] : + return self.rewards["invalid_solution"] + + T, F = [None] + self.parameter["T"], [None] + self.parameter["F"] + + prefix_T = [0] * (N + 1) + for i in range(1, N + 1) : + prefix_T[i] = prefix_T[i - 1] + T[i] + def sum_T(l, r) : + return prefix_T[r] - prefix_T[l - 1] + + suffix_F = [0] * (N + 2) + suffix_F[N + 1] = 0 + for i in range(N, 0, -1) : + suffix_F[i] = suffix_F[i + 1] + F[i] + + prefix_F = [0] * (N + 1) + for i in range(1, N + 1) : + prefix_F[i] = prefix_F[i - 1] + F[i] + def sum_F(l, r) : + return prefix_F[r] - prefix_F[l - 1] + + answer, current_time, last = 0, 0, 0 + for end in ends : + current_time += self.parameter["S"] + sum_T(last + 1, end) + answer += current_time * sum_F(last + 1, end) + last = end + gold = self.parameter["reference_answer_cost"] + assert gold <= answer, "answer should be greater than or equal to gold" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/tetris_attack/__init__.py b/server/Gym/environments/tetris_attack/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9f60d320c5705a94dda47bfd46f56a7dc878b521 --- /dev/null +++ b/server/Gym/environments/tetris_attack/__init__.py @@ -0,0 +1 @@ +from .environment import TetrisAttack_Environment diff --git a/server/Gym/environments/tetris_attack/environment.py b/server/Gym/environments/tetris_attack/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..262dffe0c98d282fd556ba1ab940b9ab8d139a6e --- /dev/null +++ b/server/Gym/environments/tetris_attack/environment.py @@ -0,0 +1,131 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class TetrisAttack_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3460 + prompt_template = \ +r"""There is an array A (initially it is of length 2 × {N}, containing each integer from 0 to {N_minus_1} exactly twice). Initially, the array A is: {A} + +The array follows this rule: +- If there are two adjacent equal elements A[i] == A[i + 1], they are both removed from the array. +- After each removal, the array is compacted (i.e., elements are re-indexed from 0 to the new length), and the process continues as long as such adjacent pairs exist. + +Once the array becomes stable (i.e., no adjacent equal pairs remain), you may perform a **swap** between any two adjacent elements A[i] and A[i + 1] (0 ≤ i < current array length - 1). After a swap, the same removal process restarts and continues until stable again. Please **remove all elements from the array**, using the **minimum number of swaps**. Output a single line containing the indices of the swaps (space-separated), where each index `i` indicates a swap between A[i] and A[i + 1].""" + + def __init__(self, + cost_range : int = 10, + wrong_format : float = -1.0, invalid_solution : float = -0.5, unsuccessful_solution : float = -0.2, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the TetrisAttack_Environment instance. + """ + super().__init__(**kwargs) + + self.cost_range = cost_range + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "unsuccessful_solution" : unsuccessful_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + A = self.parameter["A"] = list(range(N)) + list(range(N)) + while True : + random.shuffle(A) + if all(a != b for a, b in zip(A, A[1 :])) : + break + + + vis = [False] * N + st = [] + Ans = [] + for x in A: + if vis[x]: + tax = [] + while st[-1] != x: + Ans.append(len(st) - 1) + tax.append(st.pop()) + # remove the matching element + st.pop() + # restore the other elements + while tax: + st.append(tax.pop()) + else: + st.append(x) + vis[x] = True + assert Ans, "There should be at least one swap to remove all elements from the array" + self.parameter["gold_answer"] = len(Ans) + self.parameter["reference_answer"] = " ".join(map(str, Ans)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + A = self.parameter["A"].copy() + + def removal() : + nonlocal A + removed = False + i = 0 + while i < len(A) - 1 : + if A[i] == A[i + 1] : + A.pop(i) + A.pop(i) + i = max(0, i - 1) + removed = True + else : + i += 1 + return removed + assert not removal(), "The input should not remove any elements from the array" + for i in processed_result : + if not (0 <= i < len(A) - 1) : + return self.rewards["invalid_solution"] + A[i], A[i + 1] = A[i + 1], A[i] + removal() + assert not removal(), "The input should not remove any elements from the array after a swap" + + if A : + return self.rewards["unsuccessful_solution"] + + gold, answer = self.parameter["gold_answer"], len(processed_result) + assert 0 < gold <= answer, "The number of swaps in the answer should be greater than or equal to the gold answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/three_string_common_subsequence_counting/__init__.py b/server/Gym/environments/three_string_common_subsequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aceed6cec3cbe174135a3a9361932ca3a8435929 --- /dev/null +++ b/server/Gym/environments/three_string_common_subsequence_counting/__init__.py @@ -0,0 +1 @@ +from .environment import ThreeStringCommonSubsequenceCounting_Environment diff --git a/server/Gym/environments/three_string_common_subsequence_counting/environment.py b/server/Gym/environments/three_string_common_subsequence_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..547badae1017978599e21ace082905fd25c4c54f --- /dev/null +++ b/server/Gym/environments/three_string_common_subsequence_counting/environment.py @@ -0,0 +1,123 @@ +import random +import functools +from typing import Optional +from ...environment import VerifiableEnvironment + + +class ThreeStringCommonSubsequenceCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3856 + prompt_template = \ +r"""There are three strings A, B, and C: +A: {A} +B: {B} +C: {C} + +A string T is called a **subsequence** of another string S if T can be obtained from S by deleting zero or more characters without changing the order of the remaining characters. What is the number of **non-empty strings** that are subsequences of **A, B, and C simultaneously**?""" + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the ThreeStringCommonSubsequenceCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 3, "MAX_N should be greater than or equal to 3" + + S = self.parameter["S"] = [] + a_probability = random.random() + for _ in range(3) : + length = random.randint(3, MAX_N) + S.append("".join("a" if random.random() < a_probability else "b" for _ in range(length))) + + + A, B, C = S[0], S[1], S[2] + + # Lengths + n, m, k = len(A), len(B), len(C) + + # 1-based padding so we can use position 0 as “before start” + A = '#' + A + B = '#' + B + C = '#' + C + + # Build next-occurrence tables of size (length+1)×2 + nextA = [[0]*2 for _ in range(n+1)] + nextB = [[0]*2 for _ in range(m+1)] + nextC = [[0]*2 for _ in range(k+1)] + + for u in range(n-1, -1, -1): + # copy from the “next” row + nextA[u] = nextA[u+1].copy() + # record that char A[u+1] next appears at position u+1 + nextA[u][ord(A[u+1]) - ord('a')] = u+1 + + for v in range(m-1, -1, -1): + nextB[v] = nextB[v+1].copy() + nextB[v][ord(B[v+1]) - ord('a')] = v+1 + + for w in range(k-1, -1, -1): + nextC[w] = nextC[w+1].copy() + nextC[w][ord(C[w+1]) - ord('a')] = w+1 + + # DFS with memoization: count all common substrings starting from positions (u,v,w) + @functools.lru_cache(None) + def dfs(u, v, w): + total = 1 # count the “empty extension” here; we'll subtract it off at the end + for ch in range(2): + nu = nextA[u][ch] + nv = nextB[v][ch] + nw = nextC[w][ch] + if nu and nv and nw: + total += dfs(nu, nv, nw) + return total + + # Subtract 1 to exclude the empty substring + self.parameter["reference_answer"] = dfs(0, 0, 0) - 1 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + A = self.parameter["S"][0], + B = self.parameter["S"][1], + C = self.parameter["S"][2], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + if self.parameter["reference_answer"] == 0 : + return self.rewards["rewarding_weight"] * int(processed_result == 0) + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/three_vertex_cycle_counting/__init__.py b/server/Gym/environments/three_vertex_cycle_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9c2dd4a08fd602470fcc49805854836ca0aff6a3 --- /dev/null +++ b/server/Gym/environments/three_vertex_cycle_counting/__init__.py @@ -0,0 +1 @@ +from .environment import ThreeVertexCycleCounting_Environment diff --git a/server/Gym/environments/three_vertex_cycle_counting/environment.py b/server/Gym/environments/three_vertex_cycle_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..74eb653e38a07bf483716482444e8f2adef8d043 --- /dev/null +++ b/server/Gym/environments/three_vertex_cycle_counting/environment.py @@ -0,0 +1,115 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class ThreeVertexCycleCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1989 + prompt_template = \ +r"""You are given an **undirected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. +The graph contains the following undirected edges: +{edges} + +Please count the number of distinct **three‐vertex cycles** in the graph (the order of vertices in the cycle does not matter, and cycles are considered distinct if they have different sets of vertices).""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the ThreeVertexCycleCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + assert "edge_ratio" in self.parameter, "edge_ratio is required in parameter" + edge_ratio = self.parameter["edge_ratio"] + + edges = self.parameter["edges"] = random.sample([(u, v) for u in range(N) for v in range(u + 1, N)], max(1, min(N * (N - 1) // 2, int(edge_ratio * N)))) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)), "edges should be unique" + + + degree = [0] * N + for u, v in edges : + degree[u] += 1 + degree[v] += 1 + + # build adjacency lists with edges directed from lower‐degree to higher‐degree endpoint + adj = [[] for _ in range(N)] + for u, v in edges: + a, b = u, v + if degree[a] > degree[b] or (degree[a] == degree[b] and a > b): + a, b = b, a + adj[a].append(b) + + # count triangles + vis = [False] * N + ans = 0 + for i in range(N): + # mark all neighbors of i + for j in adj[i]: + vis[j] = True + # for each two‐hop path i→j→k, check if k is also a neighbor of i + for j in adj[i]: + for k in adj[j]: + if vis[k]: + ans += 1 + # unmark + for j in adj[i]: + vis[j] = False + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + if processed_result == 0 : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == 0) + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/topological_sort/__init__.py b/server/Gym/environments/topological_sort/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3085007d3acd85252d13a1013534e5dc8d324f16 --- /dev/null +++ b/server/Gym/environments/topological_sort/__init__.py @@ -0,0 +1 @@ +from .environment import TopologicalSort_Environment diff --git a/server/Gym/environments/topological_sort/environment.py b/server/Gym/environments/topological_sort/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..3ea4a24be104d2f96bb49c30249194d2e17df19e --- /dev/null +++ b/server/Gym/environments/topological_sort/environment.py @@ -0,0 +1,96 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class TopologicalSort_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Please find a permutation of `0` to `{N_minus_1}` ({N} integers in total) such that the following conditions are satisfied: +{before_conditions} + +**Output Format:** Your final answer should be a single line containing the permutation `p(0), p(1), ..., p({N_minus_1})`, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the TopologicalSort_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 2" + + permutation = list(range(N)) + random.shuffle(permutation) + self.parameter["reference_answer"] = " ".join(map(str, permutation)) + + before_conditions = self.parameter["before_conditions"] = [] + for i in range(N) : + if i == 0 : + continue + for j in random.sample(range(i), random.randint(1, i)) : + before_conditions.append((permutation[j], permutation[i])) + random.shuffle(before_conditions) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + before_conditions = "\n".join("{} must be before {}".format(j, i) for j, i in self.parameter["before_conditions"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + permutation = processed_result + if len(permutation) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if len(set(permutation)) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(0 <= i < self.parameter["N"] for i in permutation) : + return self.rewards["invalid_solution"] + + positions = [None] * self.parameter["N"] + for i, p in enumerate(permutation) : + positions[p] = i + + satisfied = sum(positions[j] < positions[i] for j, i in self.parameter["before_conditions"]) + assert satisfied <= len(self.parameter["before_conditions"]), "satisfied should not exceed the number of conditions" + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / len(self.parameter["before_conditions"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied/all" : + return self.rewards["rewarding_weight"] * (satisfied == len(self.parameter["before_conditions"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/topological_sort_minimal_lexicographical_order/__init__.py b/server/Gym/environments/topological_sort_minimal_lexicographical_order/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fe6e2a2890d344586da293300f759fe77a78dd4a --- /dev/null +++ b/server/Gym/environments/topological_sort_minimal_lexicographical_order/__init__.py @@ -0,0 +1 @@ +from .environment import TopologicalSort_MinimalLexicographicalOrder_Environment diff --git a/server/Gym/environments/topological_sort_minimal_lexicographical_order/environment.py b/server/Gym/environments/topological_sort_minimal_lexicographical_order/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..66eea76667f4cfa7fdc7150c52b658a12ac4fa41 --- /dev/null +++ b/server/Gym/environments/topological_sort_minimal_lexicographical_order/environment.py @@ -0,0 +1,155 @@ +import heapq +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class TopologicalSort_MinimalLexicographicalOrder_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3243 + prompt_template = \ +r"""Please find a permutation of `0` to `{N_minus_1}` ({N} integers in total) such that the following conditions are satisfied: +{before_conditions} + +If multiple permutations satisfy the conditions, choose the one where: +(1) `0` should appear as early as possible; +(2) Subject to that, `1` should appear as early as possible; +(3) Subject to that, `2` should appear as early as possible; +(4) And so on... + +**Output Format:** Your final answer should be a single line containing the permutation `p(0), p(1), ..., p({N_minus_1})`, separated by spaces.""" + + def __init__(self, + max_indeg : int = 3, # Maximum in-degree of each vertex + wrong_format : float = -1.0, invalid_solution : float = -0.5, + rewarding_strategy_toposort : str = "(satisfied/all)^beta", rewarding_weight_toposort : float = +0.5, rewarding_beta_toposort : float = 5.0, + rewarding_strategy_lexicographical : str = "mean([gold=answer])^beta", rewarding_weight_lexicographical : float = +0.5, rewarding_beta_lexicographical : float = 5.0, + **kwargs) : + """ + Initialize the TopologicalSort_MinimalLexicographicalOrder_Environment instance. + """ + super().__init__(**kwargs) + + self.max_indeg = max_indeg + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy_toposort" : rewarding_strategy_toposort, + "rewarding_weight_toposort" : rewarding_weight_toposort, + "rewarding_beta_toposort" : rewarding_beta_toposort, + "rewarding_strategy_lexicographical" : rewarding_strategy_lexicographical, + "rewarding_weight_lexicographical" : rewarding_weight_lexicographical, + "rewarding_beta_lexicographical" : rewarding_beta_lexicographical, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 2" + + permutation = list(range(N)) + random.shuffle(permutation) + + before_conditions = self.parameter["before_conditions"] = [] + while True : + for i in range(N) : + if i == 0 : + continue + for j in random.sample(range(i), random.randint(0, min(i, self.max_indeg))) : + before_conditions.append((permutation[j], permutation[i])) + if before_conditions : + break + random.shuffle(before_conditions) + + + # --- build the reverse graph (Y → X) -------------------------------- + adjacency = [[] for _ in range(N)] # adjacency[u] holds every v with edge u→v + indeg = [0] * N # in-degree of each vertex + + for before, after in before_conditions: + adjacency[after].append(before) + indeg[before] += 1 + + # --- Kahn’s algorithm with a *max*-heap ------------------------------ + pq = [] + for i in range(N): + if indeg[i] == 0: + heapq.heappush(pq, -i) # negate to turn min-heap into max-heap + + order = [] # extraction order + while pq: + u = -heapq.heappop(pq) # restore original index + order.append(u) + for v in adjacency[u]: + indeg[v] -= 1 + if indeg[v] == 0: + heapq.heappush(pq, -v) + + # --- output ---------------------------------------------------------- + if len(order) < N: # a cycle exists + assert False + else: + self.parameter["gold_answer"] = list(reversed(order)) # store the gold answer as a list of integers + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["gold_answer"])) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + before_conditions = "\n".join("{} must be before {}".format(j, i) for j, i in self.parameter["before_conditions"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + permutation = processed_result + if len(permutation) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if len(set(permutation)) != self.parameter["N"] : + return self.rewards["invalid_solution"] + if not all(0 <= i < self.parameter["N"] for i in permutation) : + return self.rewards["invalid_solution"] + + positions = [None] * self.parameter["N"] + for i, p in enumerate(permutation) : + positions[p] = i + + + reward = 0.0 + + satisfied = sum(positions[j] < positions[i] for j, i in self.parameter["before_conditions"]) + assert satisfied <= len(self.parameter["before_conditions"]), "satisfied should not exceed the number of conditions" + if self.rewards["rewarding_strategy_toposort"] == "(satisfied/all)^beta" : + reward += self.rewards["rewarding_weight_toposort"] * ((satisfied / len(self.parameter["before_conditions"])) ** self.rewards["rewarding_beta_toposort"]) + elif self.rewards["rewarding_strategy_toposort"] == "satisfied/all" : + reward += self.rewards["rewarding_weight_toposort"] * (satisfied == len(self.parameter["before_conditions"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_toposort"])) + + if satisfied == len(self.parameter["before_conditions"]) : + if self.rewards["rewarding_strategy_lexicographical"] == "mean([gold=answer])^beta" : + reward += self.rewards["rewarding_weight_lexicographical"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], permutation)) / self.parameter["N"]) ** self.rewards["rewarding_beta_lexicographical"]) + elif self.rewards["rewarding_strategy_lexicographical"] == "gold=answer" : + reward += self.rewards["rewarding_weight_lexicographical"] * (self.parameter["gold_answer"] == permutation) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_lexicographical"])) + + return reward + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/tournament_longest_path/__init__.py b/server/Gym/environments/tournament_longest_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b96338a2b28dab3a72f2fb14aa1c00648d41bf0 --- /dev/null +++ b/server/Gym/environments/tournament_longest_path/__init__.py @@ -0,0 +1 @@ +from .environment import Tournament_LongestPath_Environment diff --git a/server/Gym/environments/tournament_longest_path/environment.py b/server/Gym/environments/tournament_longest_path/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..36a39590c40944df5219a12e3df4d81722103d1e --- /dev/null +++ b/server/Gym/environments/tournament_longest_path/environment.py @@ -0,0 +1,209 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Tournament_LongestPath_Environment(VerifiableEnvironment): + prompt_template = \ +r"""You are given a **directed graph** with {N} vertices labeled from `0` to `{N_minus_1}`. The graph contains the following directed edges. Each edge is represented as a tuple `(s, t)`, meaning there is a directed edge **from vertex `s` to vertex `t`**: +{edges} + +It is guaranteed that there is **exactly one directed edge** between every pair of two distinct vertices. +Please find the **longest path** starting from vertex `{S}`, such that no vertex is visited more than once. Output the path as a sequence of vertex labels, starting from `{S}`, separated by spaces, in the order they are visited.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Tournament_LongestPath_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + keep_probability = random.random() + self.parameter["TO"] = TO = [[False] * N for _ in range(N)] + for i in range(N) : + for j in range(i + 1, N) : + if random.random() < keep_probability : + TO[i][j] = True + else : + TO[j][i] = True + + + # Tarjan's algorithm for SCC + dfn = [0] * N + low = [0] * N + on_stack = [False] * N + stack = [] + scc = [0] * N + comp_nodes = [] + time_counter = 0 + scc_count = 0 + + def tarjan(u): + nonlocal time_counter, scc_count + time_counter += 1 + dfn[u] = low[u] = time_counter + stack.append(u) + on_stack[u] = True + for v in range(N): + if TO[u][v]: + if dfn[v] == 0: + tarjan(v) + low[u] = min(low[u], low[v]) + elif on_stack[v]: + low[u] = min(low[u], dfn[v]) + if dfn[u] == low[u]: + comp_nodes.append([]) + cid = scc_count + scc_count += 1 + while True: + w = stack.pop() + on_stack[w] = False + scc[w] = cid + comp_nodes[cid].append(w) + if w == u: + break + + for i in range(N): + if dfn[i] == 0: + tarjan(i) + + # Build a Hamiltonian cycle in each non-trivial SCC + nxt = [None] * N + def solve(cid): + nodes = comp_nodes[cid] + if len(nodes) <= 1: + return + s = t = nodes[0] + for x in nodes[1:]: + if TO[t][x]: + nxt[t] = x; t = x + elif TO[x][s]: + nxt[x] = s; s = x + else: + j = s + while j != t: + nj = nxt[j] + if TO[j][x] and TO[x][nj]: + nxt[x] = nj + nxt[j] = x + break + j = nj + # close the cycle + t2 = None + i = nxt[s] + while i is not None: + if TO[i][s]: + t2 = i + elif t2 is not None: + j = s + while j != t2: + nj = nxt[j] + if TO[i][nj]: + x = nj + nxt[j] = nxt[t2] + nxt[t2] = s + s = x + t2 = i + break + j = nj + i = nxt[i] + nxt[t2] = s + + for cid in range(scc_count): + solve(cid) + + # Build answers for each starting vertex + ans = [[] for _ in range(N)] + for i in range(N): + x = i + cid = scc[i] + while True: + ans[i].append(x) + nodes = comp_nodes[cid] + if len(nodes) == 1: + if cid == 0: + break + cid -= 1 + x = comp_nodes[cid][0] + continue + j = nxt[x] + while j != x: + ans[i].append(j) + j = nxt[j] + if cid == 0: + break + cid -= 1 + x = comp_nodes[cid][0] + + S = self.parameter["S"] = random.randint(0, N - 1) + path = ans[S] + self.parameter["gold_answer"] = len(path) + self.parameter["reference_answer"] = " ".join(map(str, path)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(s, t) for s in range(N) for t in range(N) if self.parameter["TO"][s][t]), + S = self.parameter["S"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if not answer_array : + return None + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + path = processed_result + if len(path) == 0 : + return self.rewards["wrong_format"] + if path[0] != self.parameter["S"] : + return self.rewards["invalid_solution"] + if not all(0 <= vertex < self.parameter["N"] for vertex in path) : + return self.rewards["invalid_solution"] + if len(set(path)) != len(path) : + return self.rewards["invalid_solution"] + if not all(self.parameter["TO"][s][t] for s, t in zip(path, path[1 :])) : + return self.rewards["invalid_solution"] + + answer, gold = len(path), self.parameter["gold_answer"] + assert 0 < answer <= gold, "Answer length should be positive and not exceed gold length" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/transmission_delay/__init__.py b/server/Gym/environments/transmission_delay/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6747b49dcaaff3280bd09f0b6eae55ce4c24d3bd --- /dev/null +++ b/server/Gym/environments/transmission_delay/__init__.py @@ -0,0 +1 @@ +from .environment import TransmissionDelay_Environment diff --git a/server/Gym/environments/transmission_delay/environment.py b/server/Gym/environments/transmission_delay/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..3602ad907cf5920a4a000a7874b67d0997e9d84b --- /dev/null +++ b/server/Gym/environments/transmission_delay/environment.py @@ -0,0 +1,152 @@ +import random +from array import array +from typing import Optional +from ...environment import VerifiableEnvironment + + +class TransmissionDelay_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2929 + prompt_template = \ +r"""You are given a binary (0/1) array A of length {N} (1-indexed): {A} + +You can generate a new array A′ by the following operation: +1) Choose a permutation P of 1, 2, ..., {N} such that for every i (1 ≤ i ≤ {N}), |i − P[i]| ≤ {D}. +2) For every i (1 ≤ i ≤ {N}), set A′[i] = A[P[i]]. + +Can you tell me the number of **distinct** arrays A′ that can be obtained by such operations?""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the TransmissionDelay_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + while True : + zero_probability = random.random() + A = self.parameter["A"] = [0 if random.random() < zero_probability else 1 for _ in range(N)] + if not (2 <= sum(A) <= N - 2) : + continue + + max_D = 0 + for c in (0, 1) : + indices = [i for i, x in enumerate(A, start = 1) if x == c] + max_D = max(max_D, max(indices[0] - 2, N - 1 - indices[-1])) + if len(indices) > 1 : + max_D = max(max_D, max((indices[i] - indices[i - 1] - 2) // 2 for i in range(1, len(indices)))) + if max_D >= 1 : + break + D = self.parameter["D"] = random.randint(1, max_D) + + + S = "".join(map(str, A)) + + # 1-based indexing for convenience (match the C++ logic) + S = " " + S + + # Collect positions of 0s and 1s (1-based); keep a dummy 0 at index 0 + p0 = [0] + p1 = [0] + for i in range(1, N + 1): + if S[i] == '0': + p0.append(i) + else: + p1.append(i) + cnt0 = len(p0) - 1 + cnt1 = len(p1) - 1 + + # DP tables: F for modulo counts, G for saturated counts (capped at MOD+1) + # Use array('I') to keep memory reasonable (4 bytes per entry) + F = [array('I', [0] * (cnt0 + 1)) for _ in range(N + 2)] + G = [array('I', [0] * (cnt0 + 1)) for _ in range(N + 2)] + + # Base case + F[N + 1][0] = 1 + G[N + 1][0] = 1 + + # Fill DP from i = N down to 1 + for i in range(N, 0, -1): + # Only valid states where remaining zeros j <= cnt0 and ones k <= cnt1 + # j + k = N - i + 1 => j in [max(0, L - cnt1), min(L, cnt0)] + L = N - i + 1 + j_min = max(0, L - cnt1) + j_max = min(L, cnt0) + Fi1 = F[i + 1] # row i+1 + Gi1 = G[i + 1] + Fi = F[i] + Gi = G[i] + + for j in range(j_min, j_max + 1): + k_ones = L - j + total_f = 0 + total_g = 0 + + # Try placing a '0' at position i + if j > 0: + idx0 = cnt0 - j + 1 # the "next" remaining 0 (from the end) + if abs(p0[idx0] - i) <= D: + total_f += Fi1[j - 1] + total_g = Gi1[j - 1] if total_g == 0 else total_g + Gi1[j - 1] + + # Try placing a '1' at position i + if k_ones > 0: + idx1 = cnt1 - k_ones + 1 # the "next" remaining 1 (from the end) + if abs(p1[idx1] - i) <= D: + total_f += Fi1[j] + total_g = Gi1[j] if total_g == 0 else total_g + Gi1[j] + + Fi[j] = total_f + Gi[j] = total_g + + self.parameter["reference_answer"] = F[1][cnt0] + assert self.parameter["reference_answer"] > 0, "Reference answer should be positive" + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = ";".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"], start = 1)), + D = self.parameter["D"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/tree_add_one_edge_diameter/__init__.py b/server/Gym/environments/tree_add_one_edge_diameter/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f9dcfbe54deb06609daa18d9c7c796d9a732949a --- /dev/null +++ b/server/Gym/environments/tree_add_one_edge_diameter/__init__.py @@ -0,0 +1 @@ +from .environment import TreeAddOneEdgeDiameter_Environment diff --git a/server/Gym/environments/tree_add_one_edge_diameter/environment.py b/server/Gym/environments/tree_add_one_edge_diameter/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..883d4e35efe25d876c419d691bacbebe0f645d64 --- /dev/null +++ b/server/Gym/environments/tree_add_one_edge_diameter/environment.py @@ -0,0 +1,249 @@ +import random +import networkx +from typing import Optional, Tuple +from ...environment import VerifiableEnvironment + + +class TreeAddOneEdgeDiameter_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3771 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices labeled from `1` to `{N}`. The tree contains the following {N_minus_1} undirected edges, where each tuple `(u, v, w)` represents an edge between vertices `u` and `v` with weight `w`: +{edges} + +Let's add **exactly one undirected edge** with weight {L} to the tree. Our goal is to minimize the **longest distance** between any two vertices in the resulting graph. The distance between two vertices is defined as the sum of edge weights along the shortest path connecting them. Output two integers `x y` (do NOT include quotes), separated by a space, indicating the two vertices to which the new edge of weight {L} is added.""" + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, + rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the TreeAddOneEdgeDiameter_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy": rewarding_strategy, + "rewarding_weight": rewarding_weight, + "rewarding_beta": rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u + 1, v + 1, random.randint(0, N))) # Convert to 1-based indexing + random.shuffle(edges) + + for u, v, w in edges : + assert 1 <= u < v <= N + assert len(edges) == len(set((u, v) for u, v, w in edges)) == N - 1 + + L = self.parameter["L"] = random.randint(0, N) + + + NEG_INF = 0 + + # Build adjacency list + e = [[] for _ in range(N+1)] + for u, v, w in edges: + e[u].append((v, w)) + e[v].append((u, w)) + NEG_INF -= w + 1 + + # 1) Find S: the farthest node from node 1 + dis1 = [0] * (N+1) + stack = [(1, 0)] + while stack: + u, p = stack.pop() + for v, w in e[u]: + if v == p: + continue + dis1[v] = dis1[u] + w + stack.append((v, u)) + S = max(range(1, N+1), key=lambda i: dis1[i]) + + # 2) DFS from S to compute distances (dis) and subtree max-distance (mx), plus parent pointers + dis = [0] * (N+1) + mx = [0] * (N+1) + parent = [0] * (N+1) + stack2 = [(S, 0, 0)] # (node, parent, state) state=0: pre, state=1: post + while stack2: + u, p, st = stack2.pop() + if st == 0: + parent[u] = p + stack2.append((u, p, 1)) + for v, w in e[u]: + if v == p: + continue + dis[v] = dis[u] + w + stack2.append((v, u, 0)) + else: + mxd = dis[u] + for v, _ in e[u]: + if v == p: + continue + if mx[v] > mxd: + mxd = mx[v] + mx[u] = mxd + + # 3) Find T: the farthest node from S, and record the original diameter + T = max(range(1, N+1), key=lambda i: dis[i]) + diam = dis[T] + + # 4) Extract the diameter path from S to T + p_nodes = [] + u = T + while True: + p_nodes.append(u) + if u == S: + break + u = parent[u] + p_nodes.reverse() + cnt = len(p_nodes) + + # 5) Compute prefix distances along the path (pre) and branch depths (val) + pre = [0] * (cnt+2) + val = [0] * (cnt+2) + for i in range(1, cnt+1): + pre[i] = dis[p_nodes[i-1]] + for i in range(1, cnt+1): + node = p_nodes[i-1] + prev_node = p_nodes[i-2] if i > 1 else None + next_node = p_nodes[i] if i < cnt else None + best = 0 + for v, _ in e[node]: + if v == prev_node or v == next_node: + continue + depth = mx[v] - dis[node] + if depth > best: + best = depth + val[i] = best + + # 6) Prepare sorted index lists for the two-pointer checks + p1 = [0] + sorted(range(1, cnt+1), key=lambda i: val[i] + pre[i]) + p2 = [0] + sorted(range(1, cnt+1), key=lambda i: val[i] - pre[i], reverse=True) + + # 7) Feasibility check: can we achieve diameter <= x after adding the new edge? + def check(x): + A = B = C = D = NEG_INF + mx1 = mx2 = NEG_INF + j = 0 + + # First pass: accumulate constraints from violating pairs + for idx in range(1, cnt+1): + i_idx = p1[idx] + while j+1 <= cnt and (val[i_idx] + pre[i_idx] + + val[p2[j+1]] - pre[p2[j+1]] > x): + j += 1 + k = p2[j] + c1 = val[k] + pre[k] + if c1 > mx1: mx1 = c1 + c2 = val[k] - pre[k] + if c2 > mx2: mx2 = c2 + + # Update A, B, C, D + t = val[i_idx] + pre[i_idx] + mx1 + if t > A: A = t + t = val[i_idx] - pre[i_idx] + mx1 + if t > B: B = t + t = val[i_idx] + pre[i_idx] + mx2 + if t > C: C = t + t = val[i_idx] - pre[i_idx] + mx2 + if t > D: D = t + + # If no pairs violated for all i, it's already feasible + if idx == cnt and j == 0: + return True + + # Adjust constraints by (L - x) + delta = L - x + A += delta; B += delta; C += delta; D += delta + + # Second pass: sliding-window ranges + a, b, c, d = cnt+1, 1, 0, cnt + for i_idx in range(1, cnt+1): + while a > 1 and pre[i_idx] + pre[a-1] >= A: + a -= 1 + while b <= cnt and -pre[i_idx] + pre[b] < B: + b += 1 + while c < cnt and pre[i_idx] - pre[c+1] >= C: + c += 1 + while d >= 1 and -pre[i_idx] - pre[d] < D: + d -= 1 + + left = a if a > b else b + r1 = c if c < d else d + right = i_idx-1 if i_idx-1 < r1 else r1 + if left <= right: + return True + + return False + + # 8) Binary search for the minimal achievable diameter + left, right, ans = 0, diam, diam + while left <= right: + mid = (left + right) // 2 + if check(mid): + ans = mid + right = mid - 1 + else: + left = mid + 1 + + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + L = self.parameter["L"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int]] : + if answer is not None : + answer = answer.strip() + try : + x, y = map(int, answer.split()) + return x, y + except : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + x, y = processed_result + if not (1 <= x <= self.parameter["N"] and 1 <= y <= self.parameter["N"]) : + return self.rewards["invalid_solution"] + + G = networkx.MultiGraph() + G.add_weighted_edges_from(self.parameter["edges"]) + G.add_edge(x, y, weight = self.parameter["L"]) + answer, gold = max(max(networkx.single_source_dijkstra_path_length(G, u, weight = "weight").values()) for u in G.nodes()), self.parameter["gold_answer"] + assert 0 <= gold <= answer, "The answer should be at least as large as the gold answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "gold should be zero if answer is zero" + return self.rewards["rewarding_weight"] * 1.0 # Reward for zero answer + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/tree_center/__init__.py b/server/Gym/environments/tree_center/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..60b5b9017ae0fb7880338ccc310b46ce80cc2684 --- /dev/null +++ b/server/Gym/environments/tree_center/__init__.py @@ -0,0 +1 @@ +from .environment import TreeCenter_Environment diff --git a/server/Gym/environments/tree_center/environment.py b/server/Gym/environments/tree_center/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..06bccc7f0ba2cc10da9affe56f53f52123e8c33a --- /dev/null +++ b/server/Gym/environments/tree_center/environment.py @@ -0,0 +1,146 @@ +import random +import networkx +from typing import Optional +from ...environment import VerifiableEnvironment + + +class TreeCenter_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices, labeled from `0` to `{N_minus_1}`. + +Each vertex has a cost, given as a list `C` of length {N}, where `C[i]` is the cost of vertex i: +{C} + +The tree contains the following {N} - 1 = {N_minus_1} undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning there is an undirected edge **connecting vertex `u` to vertex `v` with weight `w`: +{edges} + +Your task is to select a single vertex `r` (where `r` is in the range 0 to {N_minus_1}). +Try your best to **minimize** dist(0, r) * C[0] + dist(1, r) * C[1] + ... + dist({N_minus_1}, r) * C[{N_minus_1}], where `dist(i, j)` is the distance between vertices i and j in the tree. The distance between two vertices is defined as the sum of the weights of the edges on the unique path connecting them (since the graph is a tree, there is exactly one unique path between any two vertices). + +**Output Format:** Your final answer should be a single integer `r` (the index of the selected vertex). Example: `0` (do **NOT** include the backticks or quotes).""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 8.0, + **kwargs) : + """ + Initialize the TreeCenter_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + C = self.parameter["C"] = [random.randint(1, N) for vertex in range(N)] + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v, random.randint(1, N))) + random.shuffle(edges) + + for u, v, w in edges : + assert 0 <= u < v < N + assert len(edges) == len(set((u, v) for u, v, w in edges)) == N - 1 + + tree = networkx.Graph() + tree.add_weighted_edges_from(edges) + assert networkx.is_tree(tree) + + + adjacent = [[] for u in range(N)] + for u, v, w in edges : + adjacent[u].append((v, w)) + adjacent[v].append((u, w)) + + self.parameter["reference_answer"] = 0 + self.parameter["gold_answer"] = 0 + subtree_sumC = [0] * N + def DFS(u : int, parent : int, depth : int) -> None : + subtree_sumC[u] = C[u] + self.parameter["gold_answer"] += depth * C[u] + for v, w in adjacent[u] : + if v == parent : + continue + DFS(v, u, depth + w) + subtree_sumC[u] += subtree_sumC[v] + DFS(0, -1, 0) + + def FindSolution(u : int, parent : int, now_answer : int) : + if now_answer < self.parameter["gold_answer"] : + self.parameter["reference_answer"] = u + self.parameter["gold_answer"] = now_answer + for v, w in adjacent[u] : + if v == parent : + continue + FindSolution(v, u, now_answer + (subtree_sumC[0] - subtree_sumC[v]) * w - subtree_sumC[v] * w) + FindSolution(0, -1, self.parameter["gold_answer"]) + assert self.parameter["gold_answer"] > 0 + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + C = "\n".join("C[{}]={}".format(i, Ci) for i, Ci in enumerate(self.parameter["C"])), + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + root = processed_result + if not (0 <= root < self.parameter["N"]) : + return self.rewards["wrong_format"] + + adjacent = [[] for u in range(self.parameter["N"])] + for u, v, w in self.parameter["edges"] : + adjacent[u].append((v, w)) + adjacent[v].append((u, w)) + + gold, answer = self.parameter["gold_answer"], 0 + def DFS(u : int, parent : int, depth : int) -> None : + nonlocal answer + answer += depth * self.parameter["C"][u] + for v, w in adjacent[u] : + if v == parent : + continue + DFS(v, u, depth + w) + DFS(root, -1, 0) + + + assert gold <= answer, "gold <= answer" + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/tree_change_one_edge_diameter/__init__.py b/server/Gym/environments/tree_change_one_edge_diameter/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..027a97218b48f2f1f7361519e4d7b65ccbbaf80f --- /dev/null +++ b/server/Gym/environments/tree_change_one_edge_diameter/__init__.py @@ -0,0 +1 @@ +from .environment import TreeChangeOneEdgeDiameter_Environment diff --git a/server/Gym/environments/tree_change_one_edge_diameter/environment.py b/server/Gym/environments/tree_change_one_edge_diameter/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..37fc91aec29ce2ccc5887ffbf69ca33ff0d63046 --- /dev/null +++ b/server/Gym/environments/tree_change_one_edge_diameter/environment.py @@ -0,0 +1,280 @@ +import random +import networkx +from collections import deque +from typing import Optional, Tuple +from ...environment import VerifiableEnvironment + + +class TreeChangeOneEdgeDiameter_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3596 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices labeled from `1` to `{N}`. The tree contains the following edges: +{edges} + +You may remove one edge from the tree and add a new edge (possibly the same edge) such that the resulting graph is still a tree. Your goal is to {maximize_or_minimize} the diameter of the resulting tree; the **diameter** of a tree is defined as the number of edges on the longest path between any two vertices. + +**Output Format:** Output four integers `u1 v1 u2 v2` (do NOT include the backticks or quotes), separated by spaces, where: +- `(u1, v1)` is the edge to be removed +- `(u2, v2)` is the edge to be added""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, + rewarding_strategy_min : str = "(gold/answer)^beta", rewarding_weight_min : float = +1.0, rewarding_beta_min : float = 5.0, + rewarding_strategy_max : str = "(answer/gold)^beta", rewarding_weight_max : float = +1.0, rewarding_beta_max : float = 5.0, + **kwargs) : + """ + Initialize the TreeChangeOneEdgeDiameter_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format": wrong_format, + "invalid_solution": invalid_solution, + "rewarding_strategy_min": rewarding_strategy_min, + "rewarding_weight_min": rewarding_weight_min, + "rewarding_beta_min": rewarding_beta_min, + "rewarding_strategy_max": rewarding_strategy_max, + "rewarding_weight_max": rewarding_weight_max, + "rewarding_beta_max": rewarding_beta_max + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u + 1, v + 1)) # Convert to 1-based indexing + random.shuffle(edges) + + for u, v in edges : + assert 1 <= u < v <= N + assert len(edges) == len(set(edges)) == N - 1 + + self.parameter["minimize_or_maximize"] = random.choice(["minimize", "maximize"]) + + + A = [[] for _ in range(N + 1)] + for u, v in edges: + A[u].append(v) + A[v].append(u) + + def get_diameter(start, skip_u=None, skip_v=None): + # First BFS (or DFS) to find one end of the diameter + dist = [-1] * (N + 1) + dist[start] = 0 + q = deque([start]) + far = start + while q: + u = q.popleft() + for v in A[u]: + if skip_u is not None and ((u == skip_u and v == skip_v) or (u == skip_v and v == skip_u)): + continue + if dist[v] == -1: + dist[v] = dist[u] + 1 + q.append(v) + if dist[v] > dist[far]: + far = v + # Second BFS from that end to find the other end and record parents + P = [-1] * (N + 1) # 2) capitalized variable P for parent + dist2 = [-1] * (N + 1) + dist2[far] = 0 + q = deque([far]) + far2 = far + while q: + u = q.popleft() + for v in A[u]: + if skip_u is not None and ((u == skip_u and v == skip_v) or (u == skip_v and v == skip_u)): + continue + if dist2[v] == -1: + dist2[v] = dist2[u] + 1 + P[v] = u + q.append(v) + if dist2[v] > dist2[far2]: + far2 = v + # Reconstruct the diameter path + D = [] # 2) capitalized D for diameter list + u = far2 + while u != -1: + D.append(u) + u = P[u] + return D + + def get_farthest(start, skip_u=None, skip_v=None): + dist = [-1] * (N + 1) + dist[start] = 0 + q = deque([start]) + far = start + while q: + u = q.popleft() + for v in A[u]: + if skip_u is not None and ((u == skip_u and v == skip_v) or (u == skip_v and v == skip_u)): + continue + if dist[v] == -1: + dist[v] = dist[u] + 1 + q.append(v) + if dist[v] > dist[far]: + far = v + return far + + # Original diameter + D = get_diameter(1) + InDiameter = [False] * (N + 1) + for u in D: + InDiameter[u] = True + + # f[u]: longest chain from u into a subtree off the diameter + # g[u]: diameter within u's off-diameter subtree + f = [0] * (N + 1) + g = [0] * (N + 1) + + def tree_dp(u, p): + for v in A[u]: + if v == p: + continue + tree_dp(v, u) + if InDiameter[v]: + continue + old_f = f[u] + # update g[u] + g[u] = max(g[u], g[v], f[v] + 1 + old_f) + # update f[u] + f[u] = max(old_f, f[v] + 1) + + tree_dp(D[0], 0) + + L = len(D) + # prefix DP + pref = [0] * L + cur = 0 + for i in range(L): + u = D[i] + if i == 0: + pref[i] = max(0, g[u], cur + f[u]) + else: + pref[i] = max(pref[i - 1], g[u], cur + f[u]) + cur = max(cur + 1, f[u] + 1) + + # 5) INF computed from input + INF = N + 5 + kmin = INF + kmax = -INF + x1min = y1min = x2min = y2min = None + x1max = y1max = x2max = y2max = None + + # suffix DP + find best removal for min/max + R = 0 + cur = 0 + for i in range(L - 1, 0, -1): + u = D[i] + R = max(R, g[u], cur + f[u]) + cur = max(cur + 1, f[u] + 1) + left = pref[i - 1] + # candidate for minimal new diameter + cand_min = max(left, R, (R + 1)//2 + (left + 1)//2 + 1) + if cand_min < kmin: + kmin = cand_min + x1min, y1min = u, D[i - 1] + # candidate for maximal new diameter + if R + 1 + left > kmax: + kmax = R + 1 + left + x1max, y1max = u, D[i - 1] + + # also consider removing a single off-diameter branch edge for max + for u in D: + for v in A[u]: + if not InDiameter[v]: + if L + g[v] > kmax: + kmax = L + g[v] + x1max, y1max = u, v + + # find the new-edge endpoints for the minimal case + D1 = get_diameter(x1min, x1min, y1min) + x2min = D1[(len(D1) - 1) // 2] + D2 = get_diameter(y1min, x1min, y1min) + y2min = D2[(len(D2) - 1) // 2] + + # and for the maximal case + x2max = get_farthest(x1max, x1max, y1max) + y2max = get_farthest(y1max, x1max, y1max) + + # output + if self.parameter["minimize_or_maximize"] == "minimize" : + self.parameter["gold_answer"] = kmin + self.parameter["reference_answer"] = "{} {} {} {}".format(x1min, y1min, x2min, y2min) + elif self.parameter["minimize_or_maximize"] == "maximize" : + self.parameter["gold_answer"] = kmax + self.parameter["reference_answer"] = "{} {} {} {}".format(x1max, y1max, x2max, y2max) + else : + assert False, "minimize_or_maximize should be either 'minimize' or 'maximize'" + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + maximize_or_minimize = self.parameter["minimize_or_maximize"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int, int, int]] : + if answer is not None : + answer = answer.strip() + try : + u1, v1, u2, v2 = map(int, answer.split()) + return u1, v1, u2, v2 + except : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + u1, v1, u2, v2 = processed_result + + edges = [(u, v) for u, v in self.parameter["edges"] if (u, v) != (min(u1, v1), max(u1, v1))] + if len(edges) != self.parameter["N"] - 2 : + assert len(edges) == self.parameter["N"] - 1, "There should be exactly N-1 edges in the tree" + return self.rewards["invalid_solution"] + if not (1 <= u2 <= self.parameter["N"] and 1 <= v2 <= self.parameter["N"] and u2 != v2 and (min(u2, v2), max(u2, v2)) not in edges) : + return self.rewards["invalid_solution"] + edges.append((u2, v2)) + + G = networkx.Graph() + G.add_edges_from(edges) + if not networkx.is_tree(G) : + return self.rewards["invalid_solution"] + assert set([u for u, v in edges] + [v for u, v in edges]) == set(range(1, self.parameter["N"] + 1)), "All vertices should be present in the tree" + + answer, gold = networkx.diameter(G), self.parameter["gold_answer"] + if self.parameter["minimize_or_maximize"] == "minimize" : + assert 0 < gold <= answer, "For minimization, answer should be greater than 0 and at least as large as the gold answer" + if self.rewards["rewarding_strategy_min"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight_min"] * ((gold / answer) ** self.rewards["rewarding_beta_min"]) + elif self.rewards["rewarding_strategy_min"] == "gold=answer" : + return self.rewards["rewarding_weight_min"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_min"])) + elif self.parameter["minimize_or_maximize"] == "maximize" : + assert 0 < answer <= gold, "For maximization, answer should be greater than 0 and at most as large as the gold answer" + if self.rewards["rewarding_strategy_max"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight_max"] * ((answer / gold) ** self.rewards["rewarding_beta_max"]) + elif self.rewards["rewarding_strategy_max"] == "gold=answer" : + return self.rewards["rewarding_weight_max"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_max"])) + else : + assert False, "minimize_or_maximize should be either 'minimize' or 'maximize'" + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/tree_coloring/__init__.py b/server/Gym/environments/tree_coloring/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..76155e4d68e9e49a7c6178477b2faf0901a289df --- /dev/null +++ b/server/Gym/environments/tree_coloring/__init__.py @@ -0,0 +1 @@ +from .environment import TreeColoring_Environment diff --git a/server/Gym/environments/tree_coloring/environment.py b/server/Gym/environments/tree_coloring/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..47a85b71ac4baa88b7f176f9759e30beb22bde82 --- /dev/null +++ b/server/Gym/environments/tree_coloring/environment.py @@ -0,0 +1,187 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class TreeColoring_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3177 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices, labeled from `0` to `{N_minus_1}`. + +The tree contains the following {N} - 1 = {N_minus_1} undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning there is an undirected edge **connecting vertex `u` to vertex `v` with weight `w`: +{edges} + +Your task is to **select exactly {K} distinct vertices**. These selected vertices are called **colored**, and the remaining {N} - {K} = {N_minus_K} vertices are called **uncolored**. Try your best to **maximize the total distance**, defined as: +- The sum of all pairwise distances **between colored vertices**, +- Plus the sum of all pairwise distances **between uncolored vertices**. + +(Note: Since the graph is a tree, there is exactly one unique path between any two vertices.) + +**Output Format:** +Your final answer should be a single line containing the {K} selected (colored) vertices in any order, separated by **spaces**. +Example: `{first_K_vertices}` (do **NOT** include the backticks or quotes).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 2.0, + **kwargs) : + """ + Initialize the TreeColoring_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + K = self.parameter["K"] = random.randint(1, N - 1) + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v, random.randint(1, N))) + random.shuffle(edges) + + for u, v, w in edges : + assert 0 <= u < v < N + assert len(edges) == len(set((u, v) for u, v, w in edges)) == N - 1 + + tree = networkx.Graph() + tree.add_weighted_edges_from(edges) + assert networkx.is_tree(tree) + + + adjacency_list = [[] for s in range(N)] + for u, v, w in edges : + adjacency_list[u].append((v, w)) + adjacency_list[v].append((u, w)) + + dpF = [[None] * (K + 1) for u in range(N)] + decisions = [[] for u in range(N)] + Size = [0] * N + def DP(u, parent) : + Size[u] = 1 + dpF[u][0] = 0 + if K : + dpF[u][1] = 0 + for v, w in adjacency_list[u] : + if v == parent : + continue + DP(v, u) + decision = decisions[u] + decision.append((v, w, [None] * (min(Size[u] + Size[v], K) + 1))) + decision = decision[-1][-1] + for uk in range(min(Size[u], K), -1, -1) : + for vk in range(min(Size[v], K - uk), -1, -1) : + assert uk + vk <= K + if dpF[u][uk] is None or dpF[v][vk] is None : + continue + if (N - K) < (Size[v] - vk) : + continue + val = dpF[u][uk] + dpF[v][vk] + w * (vk * (K - vk) + (Size[v] - vk) * ((N - K) - (Size[v] - vk))) + if dpF[u][uk + vk] is None or dpF[u][uk + vk] <= val : + dpF[u][uk + vk] = val + decision[uk + vk] = vk + Size[u] += Size[v] + DP(0, -1) + assert dpF[0][K] + self.parameter["reference_answer_distance"] = dpF[0][K] + + self.parameter["reference_answer"] = [] + def DFS(u, k) : + if Size[u] == 1 : + assert len(decisions[u]) == 0 + decisions[u].reverse() + for decision in decisions[u] : + v, vk = decision[0], decision[-1][k] + k -= vk + DFS(v, vk) + assert k in (0, 1) + if k == 1 : + self.parameter["reference_answer"].append(u) + DFS(0, K) + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + K = self.parameter["K"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + K = K, + N_minus_K = N - K, + first_K_vertices = " ".join(map(str, range(K))), + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + colored_vertices = processed_result + if len(colored_vertices) != self.parameter["K"] : + return self.rewards["invalid_solution"] + if len(set(colored_vertices)) != self.parameter["K"] : + return self.rewards["invalid_solution"] + if not all((0 <= vertex < self.parameter["N"]) for vertex in colored_vertices) : + return self.rewards["invalid_solution"] + + adjacency_list = [[] for s in range(self.parameter["N"])] + for u, v, w in self.parameter["edges"] : + adjacency_list[u].append((v, w)) + adjacency_list[v].append((u, w)) + + colored = [0] * self.parameter["N"] + for colored_vertex in colored_vertices : + colored[colored_vertex] = 1 + Size = [0] * self.parameter["N"] + answer = 0 + def DFS(u, parent) : + nonlocal answer + Size[u] = 1 + for v, w in adjacency_list[u] : + if v == parent : + continue + DFS(v, u) + answer += w * (colored[v] * (self.parameter["K"] - colored[v]) + (Size[v] - colored[v]) * ((self.parameter["N"] - self.parameter["K"]) - (Size[v] - colored[v]))) + Size[u] += Size[v] + colored[u] += colored[v] + DFS(0, -1) + gold = self.parameter["reference_answer_distance"] + assert answer <= gold + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise ValueError("Invalid rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/tree_distance_equal_triad_counting/__init__.py b/server/Gym/environments/tree_distance_equal_triad_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..414165f27343961d82469bac87dd519bafe92c35 --- /dev/null +++ b/server/Gym/environments/tree_distance_equal_triad_counting/__init__.py @@ -0,0 +1 @@ +from .environment import Tree_DistanceEqualTriad_Counting_Environment diff --git a/server/Gym/environments/tree_distance_equal_triad_counting/environment.py b/server/Gym/environments/tree_distance_equal_triad_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e71d12ea26974103b33c15484ff730ad03482a4a --- /dev/null +++ b/server/Gym/environments/tree_distance_equal_triad_counting/environment.py @@ -0,0 +1,153 @@ +import random +from collections import deque +from typing import Optional +from ...environment import VerifiableEnvironment + + +class Tree_DistanceEqualTriad_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3565 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices, labeled from `1` to `{N}`. It contains the following {N_minus_1} undirected edges: +{edges} + +Please compute the number of three-vertex sets (a triad of vertices A, B, and C such that 1 ≤ A < B < C ≤ {N}) for which the **pairwise distances** are all equal — that is, the distance between A and B, between A and C, and between B and C are all the same. The distance between two vertices is the number of edges on the shortest path connecting them.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the Tree_DistanceEqualTriad_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 4, "N should be greater than or equal to 4" + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u + 1, v + 1)) # Convert to 1-based indexing + random.shuffle(edges) + + for u, v in edges : + assert 1 <= u < v <= N + assert len(edges) == len(set(edges)) == N - 1 + + + adjacency = [[] for _ in range(N+1)] + for a, b in edges: + adjacency[a].append(b) + adjacency[b].append(a) + + ans = 0 + + # For each candidate center c, we look at its branches (one per neighbor). + # In each branch we BFS to record how many nodes lie at each distance d from c. + # Then for each distance d we have counts [c1, c2, ..., ck] across branches, + # and the number of ways to pick one node in three distinct branches all at that + # same distance is the 3rd elementary symmetric sum: + # e3 = sum_{i= len(local): + local.extend([0] * (d - len(local) + 1)) + local[d] += 1 + if d > max_depth: + max_depth = d + for w in adjacency[u]: + if not visited[w]: + visited[w] = True + q.append((w, d+1)) + branch_counts.append(local) + + b = len(branch_counts) + if b < 3: + continue + + # for each possible distance t, compute the 3‐way product sum + for t in range(1, max_depth+1): + S1 = S2 = S3 = 0 + for f in branch_counts: + cnt = f[t] if t < len(f) else 0 + S1 += cnt + S2 += cnt*cnt + S3 += cnt*cnt*cnt + # elementary symmetric sum of order 3 + e3 = (S1*S1*S1 - 3*S1*S2 + 2*S3) // 6 + ans += e3 + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("{} {}".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + if processed_result == 0 : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == 0) + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/tree_dynamic_xor_zero_path/__init__.py b/server/Gym/environments/tree_dynamic_xor_zero_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9c57d10a5f49bc86e287b39550a5ca3580a79c64 --- /dev/null +++ b/server/Gym/environments/tree_dynamic_xor_zero_path/__init__.py @@ -0,0 +1 @@ +from .environment import TreeDynamic_XORZeroPath_Environment diff --git a/server/Gym/environments/tree_dynamic_xor_zero_path/environment.py b/server/Gym/environments/tree_dynamic_xor_zero_path/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..60281f7f1d7685aeb146d1d12f7b3bc506e79ce6 --- /dev/null +++ b/server/Gym/environments/tree_dynamic_xor_zero_path/environment.py @@ -0,0 +1,138 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class TreeDynamic_XORZeroPath_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3359 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices labeled from `0` to `{N_minus_1}`. + +The tree has the following {N_minus_1} undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning there is an undirected edge between vertex `u` and vertex `v` with weight `w`: +{edges} + +You will remove edges one by one in the following order: {removes} +After removing the first 0, 1, ..., {N_minus_1} edges (in the given order above), please compute the number of **paths** such that the **XOR** of the weights along the path is equal to 0. There are C({N}, 2) paths in total, where C is the binomial coefficient. + +**Output Format:** A single line containing {N} integers — the number of such paths at the beginning and after each removal, separated by spaces.""" + + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the TreeDynamic_XORZeroPath_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v, random.randint(0, N))) + random.shuffle(edges) + + for u, v, w in edges : + assert 0 <= u < v < N + assert len(edges) == len(set((u, v) for u, v, w in edges)) == N - 1 + + tree = networkx.Graph() + tree.add_weighted_edges_from(edges) + assert networkx.is_tree(tree) + + self.parameter["removes"] = removes = list(range(N - 1)) + random.shuffle(removes) + + + adjacent_lists = [[] for u in range(N)] + for u, v, w in edges : + adjacent_lists[u].append((v, w)) + adjacent_lists[v].append((u, w)) + + xor_from_0 = [0] * N + def DFS(u, parent) : + for v, w in adjacent_lists[u] : + if v != parent : + xor_from_0[v] = xor_from_0[u] ^ w + DFS(v, u) + xor_from_0[0] = 0 + DFS(0, -1) + + parent, xor2num, nodes_list = list(range(N)), [{xor : 1} for xor in xor_from_0], [[u] for u in range(N)] + + removes = reversed(removes) + answer = [0] + for remove in removes : + answer.append(answer[-1]) + u, v = edges[remove][0], edges[remove][1] + u, v = parent[u], parent[v] + if len(nodes_list[u]) < len(nodes_list[v]) : + u, v = v, u + nodes_list[u].extend(nodes_list[v]) + for node in nodes_list[v] : + answer[-1] += xor2num[u].get(xor_from_0[node], 0) + parent[node] = u + for node in nodes_list[v] : + xor2num[u][xor_from_0[node]] = xor2num[u].get(xor_from_0[node], 0) + 1 + answer.reverse() + + self.parameter["gold_answer"] = answer + self.parameter["reference_answer"] = " ".join(map(str, answer)) + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("edge {} : ({} {} {})".format(i, u, v, w) for i, (u, v, w) in enumerate(self.parameter["edges"])), + removes = " ".join(map(str, self.parameter["removes"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["invalid_solution"] + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(int(a == b) for a, b in zip(self.parameter["gold_answer"], processed_result)) / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == processed_result) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/tree_elimination_expectation/__init__.py b/server/Gym/environments/tree_elimination_expectation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cf6c4a7c9f2634d5f502560fe77c9cd6234f6ee4 --- /dev/null +++ b/server/Gym/environments/tree_elimination_expectation/__init__.py @@ -0,0 +1 @@ +from .environment import TreeElimination_Expectation_Environment diff --git a/server/Gym/environments/tree_elimination_expectation/environment.py b/server/Gym/environments/tree_elimination_expectation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7b83d7272d9c8611a94e3243ade386222f7dac9e --- /dev/null +++ b/server/Gym/environments/tree_elimination_expectation/environment.py @@ -0,0 +1,106 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class TreeElimination_Expectation_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a **tree** with {N} vertices labeled from `1` to `{N}`, where vertex `1` is the **root** of the tree. Each vertex (except the root `1`) has a parent, specified as follows: +{parents} + +Initially, **all vertices are uncolored**. In each step, you randomly select an **uncolored vertex** (with equal probability) and color all vertices on the entire path from the selected vertex to the root. + +Please compute the **expected number of steps** required until **all vertices are colored**. Please give the expectation **modulo 10^9 + 7**. + +**Output Format:** Your final answer should be a single integer — the expected number of steps modulo 10^9 + 7.""" + MOD = 10 ** 9 + 7 + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the TreeElimination_Expectation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + P = list(range(2, N + 1)) + random.shuffle(P) + P = [1] + P + + parents = self.parameter["parents"] = [] + for i in range(1, N) : + parent, u = P[random.randint(0, i - 1)], P[i] + parents.append((parent, u)) + + + def mod_inverse(a : int) -> int : + return pow(a, self.MOD - 2, self.MOD) + + def dfs(u : int, children : list[list[int]], size : list[int], fac : list[int], inv : list[int]) -> int : + total = 0 + size[u] = 1 + for v in children[u] : + total += dfs(v, children, size, fac, inv) + size[u] += size[v] + total += fac[size[u] - 1] * inv[size[u]] % self.MOD + return total % self.MOD + + children : list[list[int]] = [[] for _ in range(N + 1)] + for parent, u in parents : + children[parent].append(u) + + fac = [1] * (N + 1) + for i in range(1, N + 1) : + fac[i] = fac[i - 1] * i % self.MOD + inv = [1] * (N + 1) + inv[N] = mod_inverse(fac[N]) + for i in range(N, 0, -1) : + inv[i - 1] = inv[i] * i % self.MOD + + size = [0] * (N + 1) + self.parameter["reference_answer"] = dfs(1, children, size, fac, inv) + assert size[1] == N, "size[1] should be equal to N" + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + parents = "\n".join("parent[{}]={}".format(u, parent) for parent, u in self.parameter["parents"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.MOD) : + return self.rewards["wrong_range"] + + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/tree_even_partitioning/__init__.py b/server/Gym/environments/tree_even_partitioning/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1872383e80e741088c6e096c3b2b2a3f88b9837a --- /dev/null +++ b/server/Gym/environments/tree_even_partitioning/__init__.py @@ -0,0 +1 @@ +from .environment import TreeEvenPartitioning_Environment diff --git a/server/Gym/environments/tree_even_partitioning/environment.py b/server/Gym/environments/tree_even_partitioning/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..bc5d534f7c6b9d2d4977c25c3237a49a679a9b30 --- /dev/null +++ b/server/Gym/environments/tree_even_partitioning/environment.py @@ -0,0 +1,134 @@ +import random +import networkx +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class TreeEvenPartitioning_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3915 + prompt_template = \ +r"""You have a **tree** (i.e., a connected undirected graph with no cycles) with {NK} vertices labeled from `1` to `{NK}`. The tree contains the following {NK} - 1 undirected edges. Each edge is represented as a tuple `(u, v)`, meaning there is an undirected edge connecting vertex `u` to vertex `v`: +{edges} + +Partition all vertices into {N} **disjoint** sets such that: (1) each set contains exactly {K} vertices ({K} = {NK} / {N}), AND (2) each set forms a connected subgraph of the tree. Output {N} lines - each line should contain the {K} vertices of one set, separated by spaces; the vertices within a set and the sets themselves may be in any order.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(connected/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the TreeEvenPartitioning_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N" in self.parameter, "MAX_N is required in parameter" + MAX_N = self.parameter["MAX_N"] + assert MAX_N >= 2, "MAX_N should be greater than or equal to 2" + + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K >= 2, "MAX_K should be greater than or equal to 2" + + N = self.parameter["N"] = random.randint(2, MAX_N) + K = self.parameter["K"] = random.randint(2, MAX_K) + + groups = list(range(1, N * K + 1)) + random.shuffle(groups) + groups = [groups[i * K : (i + 1) * K] for i in range(N)] + + edges = self.parameter["edges"] = [] + + for i, group in enumerate(groups) : + assert len(group) == K, f"Group {i} should have exactly {K} vertices" + for index, vertex in enumerate(group) : + if index == 0 : + continue + u, v = vertex, group[random.randint(0, index - 1)] + u, v = min(u, v), max(u, v) + edges.append((u, v)) + if i == 0 : + continue + u, v = random.choice(group), random.choice(groups[random.randint(0, i - 1)]) + u, v = min(u, v), max(u, v) + edges.append((u, v)) + + random.shuffle(edges) + + for u, v in edges : + assert 1 <= u < v <= N * K + assert len(edges) == len(set(edges)) == N * K - 1 + + tree = networkx.Graph() + tree.add_edges_from(edges) + assert networkx.is_tree(tree) + + self.parameter["reference_answer"] = "\n".join(" ".join(map(str, group)) for group in groups) + + + def _prompt_generate(self) -> str : + N, K = self.parameter["N"], self.parameter["K"] + return self.prompt_template.format( + NK = N * K, + N = N, + K = K, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[List[int]]] : + if answer is not None : + answer = answer.strip() + try : + groups = [] + for line in answer.splitlines() : + line = line.strip() + if line : + groups.append(list(map(int, line.split()))) + if len(groups[-1]) != self.parameter["K"] : + return None + if len(groups) != self.parameter["N"] : + return None + return groups + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if set(vertex for group in processed_result for vertex in group) != set(range(1, self.parameter["N"] * self.parameter["K"] + 1)) : + return self.rewards["invalid_solution"] + + labels = [None] * (self.parameter["N"] * self.parameter["K"] + 1) + for label, group in enumerate(processed_result) : + assert 0 <= label < self.parameter["N"], f"Label {label} is out of range" + assert len(group) == self.parameter["K"], f"Group {group} should have exactly {self.parameter['K']} vertices" + for vertex in group : + assert labels[vertex] is None, f"Vertex {vertex} is already labeled" + labels[vertex] = label + edge_numbers = [0] * self.parameter["N"] + for u, v in self.parameter["edges"] : + if labels[u] == labels[v] : + edge_numbers[labels[u]] += 1 + + assert all(0 <= edge_number <= self.parameter["K"] - 1 for edge_number in edge_numbers), "Edge numbers are out of range" + connected = sum(int(edge_number == self.parameter["K"] - 1) for edge_number in edge_numbers) + assert connected <= self.parameter["N"], "Connected components exceed N" + if self.rewards["rewarding_strategy"] == "(connected/all)^beta" : + return self.rewards["rewarding_weight"] * ((connected / self.parameter["N"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (connected == self.parameter["N"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/tree_maximum_visited_vertex/__init__.py b/server/Gym/environments/tree_maximum_visited_vertex/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..46c6ce2b81c9a10c059482ed8bfabbe93b54beed --- /dev/null +++ b/server/Gym/environments/tree_maximum_visited_vertex/__init__.py @@ -0,0 +1 @@ +from .environment import TreeMaximumVisitedVertex_Environment diff --git a/server/Gym/environments/tree_maximum_visited_vertex/environment.py b/server/Gym/environments/tree_maximum_visited_vertex/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e6036244476d9b053819fdd46b966ca3da1293b6 --- /dev/null +++ b/server/Gym/environments/tree_maximum_visited_vertex/environment.py @@ -0,0 +1,143 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class TreeMaximumVisitedVertex_Environment(VerifiableEnvironment) : # https://www.luogu.com.cn/problem/P3412 + prompt_template = \ +r"""You are given a **tree** with {N} vertices labeled from 0 to {N_minus_1}. The tree has the following {N_minus_1} undirected edges: +{edges} + +Starting from vertex 0, find a path of length {M} (i.e., consisting of exactly {M} edges) that **maximizes the number of distinct vertices visited at least once**. At each step, you can move to any adjacent vertex; you may revisit vertices in the path. Output {M} + 1 integers (space-separated) representing the labels of the vertices visited along the path, starting from vertex 0.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 3.0, + **kwargs) : + """ + Initialize the TreeMaximumVisitedVertex_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + M = self.parameter["M"] = random.randint(2, 2 * (N - 1) - 1) + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)) == N - 1 + + + # Adjacency list of size N + graph = [[] for _ in range(N)] + for a, b in edges: + graph[a].append(b) + graph[b].append(a) + + # Compute the maximum depth (in nodes) from root 0 + visited = [False] * N + max_depth = 0 + + def dfs(u, depth): + nonlocal max_depth + visited[u] = True + # Update global max_depth + max_depth = max(max_depth, depth) + for v in graph[u]: + if not visited[v]: + dfs(v, depth + 1) + + # Perform DFS from node 0, initial depth = 1 + # Use a mutable container to allow assignment in nested scope + # (Alternatively, declare max_depth as global) + max_depth = 0 + dfs(0, 1) + + # mx - 1 is the length of the longest path (in edges) from 0 + longest_path_edges = max_depth - 1 + if M <= longest_path_edges: + # Can only move down the main path + result = M + 1 + else: + # Extra moves allow visiting off-path nodes, two steps per new node + extra = M - longest_path_edges + result = max_depth + extra // 2 + # Cannot exceed total nodes N + result = min(N, result) + + self.parameter["gold_answer"] = result + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + M = self.parameter["M"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[List[int]] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + if not answer_array : + return None + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + path = processed_result + if len(path) != self.parameter["M"] + 1 : + return self.rewards["invalid_solution"] + if not all(0 <= vertex < self.parameter["N"] for vertex in path) : + return self.rewards["invalid_solution"] + if path[0] != 0 : + return self.rewards["invalid_solution"] + + edges = {(u, v) for u, v in self.parameter["edges"]} + if not all((min(s, t), max(s, t)) in edges for s, t in zip(path, path[1 :])) : + return self.rewards["invalid_solution"] + + answer, gold = len(set(path)), self.parameter["gold_answer"] + assert 0 < answer <= gold + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/tree_random_walk_expectation/__init__.py b/server/Gym/environments/tree_random_walk_expectation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..af01387304a387e2d1cfd8809acd054614b53f3b --- /dev/null +++ b/server/Gym/environments/tree_random_walk_expectation/__init__.py @@ -0,0 +1 @@ +from .environment import TreeRandomWalkExpectation_Environment diff --git a/server/Gym/environments/tree_random_walk_expectation/environment.py b/server/Gym/environments/tree_random_walk_expectation/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..1cce33734dca7f23284c680b5ddda71f53b07fa8 --- /dev/null +++ b/server/Gym/environments/tree_random_walk_expectation/environment.py @@ -0,0 +1,131 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class TreeRandomWalkExpectation_Environment(VerifiableEnvironment) : # https://www.luogu.com.cn/problem/P3412 + prompt_template = \ +r"""You are given a **tree** with {N} vertices labeled from 0 to {N_minus_1}. The tree has the following {N_minus_1} undirected edges: +{edges} + +A random walk on the tree is defined as follows: from the current vertex, you move to one of its neighbors uniformly at random at each step. Define E(S, T) as the expected number of steps to reach vertex T starting from vertex S (the walk stops immediately upon reaching T). + +Please compute the sum of all E(S, T) over all ordered pairs (S, T), divided by {N}². Output this value modulo {MOD}. + +**Output Format:** A single integer — the value of (∑ E(S, T)) / {N}² modulo {MOD}.""" + MOD = 998244353 + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the TreeRandomWalkExpectation_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 2" + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)) == N - 1 + + + adj = [[] for _ in range(N)] + d = [0] * N + + # Read edges, build adjacency and initial degree array + for u, v in edges: + adj[u].append(v) + adj[v].append(u) + d[u] += 1 + d[v] += 1 + + totd = sum(d) + + sz = [0] * N + parent = [-1] * N + + # DFS to compute subtree sizes and accumulate degree-sums + def dfs(u, p): + parent[u] = p + sz[u] = 1 + for v in adj[u]: + if v == p: + continue + dfs(v, u) + sz[u] += sz[v] + d[u] += d[v] + + dfs(0, -1) + + # modular inverse of n^2 + rev = pow(N * N % self.MOD, self.MOD - 2, self.MOD) + + ans = 0 + for u in range(N): + for v in adj[u]: + if v == parent[u]: + # edge from u up to its parent + ans = (ans + d[u] * sz[u] * (N - sz[u])) % self.MOD + else: + # edge from u down to child v + ans = (ans + (totd - d[v]) * sz[v] * (N - sz[v])) % self.MOD + + self.parameter["reference_answer"] = ans * rev % self.MOD + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + MOD = self.MOD, + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.MOD) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/tree_topological_sequence_counting/__init__.py b/server/Gym/environments/tree_topological_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..87dbad21c1adba609cc3a18d55918b4aeb4f28d6 --- /dev/null +++ b/server/Gym/environments/tree_topological_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .environment import TreeTopologicalSequenceCounting_Environment diff --git a/server/Gym/environments/tree_topological_sequence_counting/environment.py b/server/Gym/environments/tree_topological_sequence_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..226d7f909c63e313e65e8d54a07267d29003c7eb --- /dev/null +++ b/server/Gym/environments/tree_topological_sequence_counting/environment.py @@ -0,0 +1,177 @@ +import random +import networkx +from typing import Optional +from ...environment import VerifiableEnvironment + + +class TreeTopologicalSequenceCounting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""Please count the number of permutations of the integers from 0 to {N_minus_1}, denoted as p[0], p[1], ..., p[{N_minus_1}], such that the following {N_minus_1} constraints are satisfied: {constraints} +Note that each constraint above is of the form `p[i] < p[j]` or `p[i] > p[j]`, and collectively, these constraints correspond to a tree — that is, a connected undirected graph with no cycles — on {N} vertices labeled from 0 to {N_minus_1}. +You should output the number of valid permutations modulo {MOD}.""" + def __init__(self, + max_MOD : int = 1000000, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the TreeTopologicalSequenceCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.max_MOD = max_MOD + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD) + + p = list(range(N)) + random.shuffle(p) + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, "<" if p[u] < p[v] else ">", v)) + random.shuffle(edges) + + for u, w, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set((u, v) for u, w, v in edges)) == N - 1 + + tree = networkx.Graph() + tree.add_edges_from((u, v) for u, w, v in edges) + assert networkx.is_tree(tree) + + + # Precompute binomial coefficients up to maxN + C = [[0] * (N + 1) for _ in range(N + 1)] + for i in range(N + 1): + C[i][0] = 1 + for j in range(1, i + 1): + C[i][j] = (C[i-1][j-1] + C[i-1][j]) % MOD + + def dfs(u, parent, h1, h2): + # f_raw[k]: number of ways (raw) to have exactly k nodes before u + f_raw = [0, 1] # only u itself => 1 way with k=1 + sz = 1 # size of subtree rooted at u + + # First, merge all children v where u < v (v must come after u) + for v in h1[u]: + if v == parent: + continue + f_v, sz_v = dfs(v, u, h1, h2) + g = f_raw[:] # copy old + new_sz = sz + sz_v + new_f = [0] * (new_sz + 1) + for j in range(1, sz + 1): + gj = g[j] + if gj == 0: + continue + for i_count in range(j, sz_v + j): + # Combine with child-subtree counts that place at least (i_count-j+1) before v + diff = f_v[sz_v] - f_v[i_count - j] + if diff < 0: + diff += MOD + term = gj + term = term * C[i_count - 1][j - 1] % MOD + term = term * C[sz + sz_v - i_count][sz - j] % MOD + term = term * diff % MOD + new_f[i_count] = (new_f[i_count] + term) % MOD + f_raw = new_f + sz = new_sz + + # Then, merge all children v where u > v (v must come before u) + for v in h2[u]: + if v == parent: + continue + f_v, sz_v = dfs(v, u, h1, h2) + g = f_raw[:] + new_sz = sz + sz_v + new_f = [0] * (new_sz + 1) + for j in range(1, sz + 1): + gj = g[j] + if gj == 0: + continue + for i_count in range(j + 1, sz_v + j + 1): + # Combine with child-subtree counts that place exactly (i_count-j) before v + term = gj + term = term * C[i_count - 1][j - 1] % MOD + term = term * C[sz + sz_v - i_count][sz - j] % MOD + term = term * f_v[i_count - j] % MOD + new_f[i_count] = (new_f[i_count] + term) % MOD + f_raw = new_f + sz = new_sz + + # Turn raw counts into prefix-sums: f_pref[k] = sum_{t=1..k} f_raw[t] + f_pref = [0] * (sz + 1) + for i_count in range(1, sz + 1): + s = f_pref[i_count - 1] + f_raw[i_count] + if s >= MOD: + s -= MOD + f_pref[i_count] = s + + return f_pref, sz + + # Build directed adjacency lists + h1 = [[] for _ in range(N + 1)] + h2 = [[] for _ in range(N + 1)] + for a, sign, b in edges: + x, y = a + 1, b + 1 + if sign == '<': + h1[x].append(y) + h2[y].append(x) + else: + h1[y].append(x) + h2[x].append(y) + + f_root, _ = dfs(1, 0, h1, h2) + # The answer is the number of ways to have all N nodes before root (i.e. full ordering) + self.parameter["reference_answer"] = f_root[N] % MOD + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + constraints = "; ".join("p[{}] {} p[{}]".format(u, w, v) for u, w, v in self.parameter["edges"]), + MOD = self.parameter["MOD"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.parameter["MOD"]) : + return self.rewards["wrong_range"] + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/triumphal_arch/__init__.py b/server/Gym/environments/triumphal_arch/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fe6a5eef14f6e2b2745b67edebbcce15df9ff708 --- /dev/null +++ b/server/Gym/environments/triumphal_arch/__init__.py @@ -0,0 +1 @@ +from .environment import TriumphalArch_Environment diff --git a/server/Gym/environments/triumphal_arch/environment.py b/server/Gym/environments/triumphal_arch/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..28d11e35009d9142ea51008c8900bcdadc98d4ca --- /dev/null +++ b/server/Gym/environments/triumphal_arch/environment.py @@ -0,0 +1,133 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class TriumphalArch_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3554 + prompt_template = \ +r"""You are given a **tree** (i.e., a connected undirected graph with no cycles) with {N} vertices labeled from `0` to `{N_minus_1}`. The edges of the tree are given as follows: +{edges} + +Alice and Bob are playing a game on this tree: +- Initially, Bob is standing at vertex `0`. The vertex `0` is already marked as **(permanently) black**, and all other vertices are **white**. +- On each turn: + - Alice first chooses any K vertices and marks them as "(permanently) black". + - Then, Bob may move to any vertex adjacent to his current position. +- If Bob ever reaches a **non-black** vertex on any turn, he wins. If eventually **all vertices become black**, then Alice wins. + +Assuming both players play optimally, what is the **minimum value of K** such that Alice is guaranteed to win?""" + + def __init__(self, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the TriumphalArch_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + edges = self.parameter["edges"] = [] + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v)) + random.shuffle(edges) + + for u, v in edges : + assert 0 <= u < v < N + assert len(edges) == len(set(edges)) == N - 1 + + + # Build adjacency list (0-indexed) + S = [[] for _ in range(N)] + for u, v in edges: + S[u].append(v) + S[v].append(u) + + # son[u] = number of children of u in the rooted tree at 0 + son = [0] * N + def dfs1(u, p): + for v in S[u]: + if v == p: + continue + son[u] += 1 + dfs1(v, u) + + dfs1(0, -1) + + # Binary search bounds: k in [L..R] + L = son[0] + R = max(son) + + # f[u] will hold the DP value for subtree rooted at u + f = [0] * N + def dfs2(u, p, k): + # Start with son[u] - k + total = son[u] - k + for v in S[u]: + if v == p: + continue + dfs2(v, u, k) + if f[v] > 0: + total += f[v] + f[u] = total + + ans = R + while L <= R: + mid = (L + R) // 2 + dfs2(0, -1, mid) + # If f[0] <= 0, A can win with k = mid + if f[0] <= 0: + ans = mid + R = mid - 1 + else: + L = mid + 1 + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/twiddle_puzzle/__init__.py b/server/Gym/environments/twiddle_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e2f2d5b045eec47ea6516638996da3958704774e --- /dev/null +++ b/server/Gym/environments/twiddle_puzzle/__init__.py @@ -0,0 +1 @@ +from .environment import TwiddlePuzzle_Environment diff --git a/server/Gym/environments/twiddle_puzzle/environment.py b/server/Gym/environments/twiddle_puzzle/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..6364c890f3d6a6b2cd079b3b74ab12d04f20f88b --- /dev/null +++ b/server/Gym/environments/twiddle_puzzle/environment.py @@ -0,0 +1,120 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class TwiddlePuzzle_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given a {N} × {M} grid, where each cell contains a digit from `0` to `{NM_minus_1}`. At any time, you may select a cell `(i, j)` such that 0 ≤ i ≤ {N} - {K} and 0 ≤ j ≤ {M} - {K}. Then, you perform a **90-degree counterclockwise rotation** on the {K} × {K} subgrid starting at position `(i, j)`. + +You start with the following grid: +{start_grid} + +Your goal is to transform it into the following grid: +{destination_grid} + +**Output Format:** Each action should be written on its own line as `i j`, where `i` and `j` are the row and column indices of the top-left corner of the rotated subgrid. Example: `0 1` (do **NOT** include backticks or quotes). Output one action per line in the order they should be performed.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the TwiddlePuzzle_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + K = self.parameter["K"] = random.randint(2, min(N, M)) + + start_permutation = list(range(N * M)) + random.shuffle(start_permutation) + start_grid = self.parameter["start_grid"] = [[start_permutation[i * M + j] for j in range(M)] for i in range(N)] + + assert "steps" in self.parameter, "steps is required in parameter" + steps = self.parameter["steps"] + assert steps >= 1, "steps should be greater than or equal to 1" + + destination_grid = [row.copy() for row in start_grid] + self.parameter["reference_answer"] = "" + for step in range(steps) : + i = random.randint(0, N - K) + j = random.randint(0, M - K) + self.parameter["reference_answer"] += "{} {}\n".format(i, j) + + new_grid = [row.copy() for row in destination_grid] + for x in range(K) : + for y in range(K) : + new_grid[i + K - 1 - y][j + x] = destination_grid[i + x][j + y] + destination_grid = new_grid + self.parameter["destination_grid"] = destination_grid + + + def _prompt_generate(self) -> str : + N, M = self.parameter["N"], self.parameter["M"] + return self.prompt_template.format( + N = N, + M = M, + NM_minus_1 = N * M - 1, + K = self.parameter["K"], + start_grid = "\n".join(" ".join(map(str, row)) for row in self.parameter["start_grid"]), + destination_grid = "\n".join(" ".join(map(str, row)) for row in self.parameter["destination_grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + actions = [] + for line in answer.splitlines() : + line = line.strip() + if line : + actions.append(line.split()) + action = actions[-1] + if len(action) != 2 : + return None + try : + action[0] = int(action[0]) + action[1] = int(action[1]) + except ValueError : + return None + return actions + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + destination_grid = [row.copy() for row in self.parameter["start_grid"]] + + for i, j in processed_result : + if not (0 <= i <= self.parameter["N"] - self.parameter["K"] and 0 <= j <= self.parameter["M"] - self.parameter["K"]) : + return self.rewards["invalid_solution"] + new_grid = [destination_grid[row].copy() for row in range(self.parameter["N"])] + for x in range(self.parameter["K"]) : + for y in range(self.parameter["K"]) : + new_grid[i + self.parameter["K"] - 1 - y][j + x] = destination_grid[i + x][j + y] + destination_grid = new_grid + + if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" : + return self.rewards["rewarding_weight"] * ((sum(sum(int(a == b) for a, b in zip(gold_row, answer_row)) for gold_row, answer_row in zip(self.parameter["destination_grid"], destination_grid)) / (self.parameter["N"] * self.parameter["M"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * all(all(a == b for a, b in zip(gold_row, answer_row)) for gold_row, answer_row in zip(self.parameter["destination_grid"], destination_grid)) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/two_sat/__init__.py b/server/Gym/environments/two_sat/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..00dd512f93fa182042aaacaac22df09e067d876f --- /dev/null +++ b/server/Gym/environments/two_sat/__init__.py @@ -0,0 +1 @@ +from .environment import TwoSAT_Environment diff --git a/server/Gym/environments/two_sat/environment.py b/server/Gym/environments/two_sat/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..907f1d7e2a745d8715849d80e53b07607d28fc10 --- /dev/null +++ b/server/Gym/environments/two_sat/environment.py @@ -0,0 +1,95 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class TwoSAT_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""There are {N} boolean (0/1) values x[0], x[1], ..., x[{N_minus_1}]. Each of the following {M} expressions (`|` means OR, `!` means NOT) must equal 1: +{expressions} + +Please find any solution x[0], x[1], ..., x[{N_minus_1}] that satisfies the conditions above. + +Output Format: Your final answer should be a single line containing x[0], x[1], ..., x[{N_minus_1}], separated by **spaces**. +Example: `{N_boolean}` (do **NOT** include quotes or backticks).""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0, + **kwargs) : + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "M" in self.parameter, "M is required in parameter" + M = self.parameter["M"] + assert M >= 1, "M should be greater than or equal to 1" + + x = self.parameter["x"] = [random.randint(0, 1) for i in range(N)] + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["x"])) + + clauses = self.parameter["clauses"] = [] + for m in range(M) : + while True : + clause = [] + indices = random.sample(range(N), 2) + all_or = False + for index in indices : + clause.append((index, random.random() < 0.5)) + all_or |= (x[index] if clause[-1][-1] else not x[index]) + assert len(clause) == 2, "clause should have length 2" + if all_or : + break + clauses.append(clause) + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + N_minus_1 = self.parameter["N"] - 1, + M = self.parameter["M"], + expressions = "\n".join(" | ".join("({}x[{}])".format("" if is_positive else "!", index) for index, is_positive in clause) for clause in self.parameter["clauses"]), + N_boolean = " ".join(str(i % 2) for i in range(self.parameter["N"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + x = processed_result + if len(x) != self.parameter["N"] : + return self.rewards["wrong_format"] + if not all(xi in (0, 1) for xi in x) : + return self.rewards["wrong_format"] + + satisfied = sum(int(any(x[index] if is_positive else not x[index] for index, is_positive in clause)) for clause in self.parameter["clauses"]) + + if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" : + return self.rewards["rewarding_weight"] * ((satisfied / len(self.parameter["clauses"])) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "satisfied=all" : + return self.rewards["rewarding_weight"] * (satisfied == len(self.parameter["clauses"])) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/two_set_all_coprime_counting/__init__.py b/server/Gym/environments/two_set_all_coprime_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1e99a62d803c9500d6abc598f202a0a2650a9af8 --- /dev/null +++ b/server/Gym/environments/two_set_all_coprime_counting/__init__.py @@ -0,0 +1 @@ +from .environment import TwoSet_AllCoprime_Counting_Environment diff --git a/server/Gym/environments/two_set_all_coprime_counting/environment.py b/server/Gym/environments/two_set_all_coprime_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..ab4ef77c05d92e718ada12c33c6aa7642acd231f --- /dev/null +++ b/server/Gym/environments/two_set_all_coprime_counting/environment.py @@ -0,0 +1,139 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class TwoSet_AllCoprime_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2150 + prompt_template = \ +r"""You are given a set of integers: {set} + +Please compute the number of set pairs (S, T) such that: +1. S and T are disjoint subsets of the given set. +2. For every x in S and y in T, gcd(x, y) = 1 (i.e., there is no pair with gcd > 1).""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the TwoSet_AllCoprime_Counting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + set_size = random.randint(2, N - 1) + A = self.parameter["set"] = random.sample(range(2, N + 1), set_size) + + assert len(A) == len(set(A)) == set_size, "The set must contain unique integers" + + + MAX = max(A) + + is_prime = [True] * (MAX + 1) + is_prime[0] = is_prime[1] = False + max_prime_factor = [None] * (MAX + 1) + for i in range(2, MAX + 1) : + if is_prime[i] : + max_prime_factor[i] = i + for j in range(2 * i, MAX + 1, i) : + is_prime[j] = False + max_prime_factor[j] = i + + group2numbers = {} + small_primes = dict() + for a in A : + prime_factors = [] + x = a + while x > 1 : + prime = max_prime_factor[x] + prime_factors.append(prime) + x //= prime + + assert max(prime_factors) == prime_factors[0], "The largest prime factor must be the first one" + if prime_factors[0] * prime_factors[0] > MAX : + group = prime_factors[0] + prime_factors = [prime for prime in prime_factors if prime != group] + if group not in group2numbers : + group2numbers[group] = [] + group2numbers[group].append(prime_factors) + else : + group2numbers[-a] = [prime_factors] + + for prime in prime_factors : + if prime not in small_primes : + small_primes[prime] = len(small_primes) + F = [[0] * (1 << len(small_primes)) for S in range(1 << len(small_primes))] + F[0][0] = 1 + for group, prime_factors_list in group2numbers.items() : + G0 = [[F[S][T] for T in range(1 << len(small_primes))] for S in range(1 << len(small_primes))] + G1 = [[F[S][T] for T in range(1 << len(small_primes))] for S in range(1 << len(small_primes))] + for prime_factors in prime_factors_list : + mask = 0 + for prime in prime_factors : + mask |= (1 << small_primes[prime]) + + new_G0 = [[G0[S][T] for T in range(1 << len(small_primes))] for S in range(1 << len(small_primes))] + new_G1 = [[G1[S][T] for T in range(1 << len(small_primes))] for S in range(1 << len(small_primes))] + for S in range(1 << len(small_primes)) : + T = (1 << len(small_primes)) - 1 - S + while True : + assert (T & S) == 0, "S and T must be disjoint" + if (mask & T) == 0 : + new_G0[S | mask][T] += G0[S][T] + if (mask & S) == 0 : + new_G1[S][T | mask] += G1[S][T] + if T == 0 : + break + T = (T - 1) & ((1 << len(small_primes)) - 1 - S) + G0 = new_G0 + G1 = new_G1 + for S in range(1 << len(small_primes)) : + for T in range(1 << len(small_primes)) : + F[S][T] = G0[S][T] + G1[S][T] - F[S][T] + + self.parameter["reference_answer"] = sum(F[S][T] for S in range(1 << len(small_primes)) for T in range(1 << len(small_primes))) + assert self.parameter["reference_answer"] > 0 + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(set = " ".join(map(str, self.parameter["set"]))) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/undamaged_submatrix_counting/__init__.py b/server/Gym/environments/undamaged_submatrix_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b2fa7c155cb2834e3faf6aa607c307473c3c4683 --- /dev/null +++ b/server/Gym/environments/undamaged_submatrix_counting/__init__.py @@ -0,0 +1 @@ +from .environment import UndamagedSubmatrixCounting_Environment diff --git a/server/Gym/environments/undamaged_submatrix_counting/environment.py b/server/Gym/environments/undamaged_submatrix_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..2d46516f8fa87eb27fdc84fadc5959939ac4d260 --- /dev/null +++ b/server/Gym/environments/undamaged_submatrix_counting/environment.py @@ -0,0 +1,122 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class UndamagedSubmatrixCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3400 + prompt_template = \ +r"""You are given a matrix of size {N} × {M}, where each element is either `0` or `1`. Please count the number of **contiguous non-empty submatrices** that consist entirely of `1`s. The matrix is: +{matrix} + +Note: +- Two submatrices are considered different if they differ in position, even if they contain the identical elements. +- The whole matrix itself is also considered a submatrix. +- **Output Format:** A single non-negative integer — the total number of all-one submatrices.""" + + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the UndamagedSubmatrixCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + one_probability = random.random() + A = self.parameter["matrix"] = [[1 if random.random() < one_probability else 0 for _ in range(M)] for _ in range(N)] + + + # f[j] stores the most recent row index where column j had a 0 (initialized to -1) + f = [-1] * M + ans = 0 + + # Process each row + for i in range(N): + # Monotonic stack: stores pairs (column_index, height) + stack = [] + # sum_arr[k] stores the cumulative count for stack up to index k + sum_arr = [] + + for j in range(M): + # Update last-zero position for this column + if A[i][j] == 0: + f[j] = i + # Height of consecutive ones ending at (i, j) + height = i - f[j] + + # Pop columns with greater height to maintain non-decreasing heights + while stack and stack[-1][1] > height: + stack.pop() + sum_arr.pop() + + # Compute contribution for this column + if not stack: + # All columns to the left are shorter; width = j+1 + total = height * (j + 1) + else: + # Extend from the last column in the stack + prev_total = sum_arr[-1] + prev_idx, _ = stack[-1] + total = prev_total + height * (j - prev_idx) + + # Push current column onto the stack + stack.append((j, height)) + sum_arr.append(total) + + # Accumulate into answer + ans += total + + self.parameter["reference_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + matrix = "\n".join("".join(map(str, row)) for row in self.parameter["matrix"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + if processed_result == 0 : + return self.rewards["rewarding_weight"] * (self.parameter["reference_answer"] == 0) + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/value_diminishing_selection/__init__.py b/server/Gym/environments/value_diminishing_selection/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..92ada758402d0c0d11156b5e7022ad943e0d09eb --- /dev/null +++ b/server/Gym/environments/value_diminishing_selection/__init__.py @@ -0,0 +1 @@ +from .environment import ValueDiminishingSelection_Environment diff --git a/server/Gym/environments/value_diminishing_selection/environment.py b/server/Gym/environments/value_diminishing_selection/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..c0fb2e0362782d14ae7e2e77501bc758934343b3 --- /dev/null +++ b/server/Gym/environments/value_diminishing_selection/environment.py @@ -0,0 +1,120 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class ValueDiminishingSelection_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2647 + prompt_template = \ +r"""You are given {N} items labeled from `0` to `{N_minus_1}`. Each item has a base value W[i] and a diminishing factor R[i]. The list of values and diminishing factors is given as: +{W_and_R} + +You must select a sequence of **distinct items** (the order matters). When selecting the i-th item: +- Its effective value is W[i] minus the total of R[j] for all previously selected items j. +- In other words, each item selected **after** i will lose R[i] from their gain due to the diminishing effect. + +Your goal is to select a sequence of items to **maximize the total gain**. + +**Output Format:** Output a single line containing the indices of the selected items in order, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the ValueDiminishingSelection_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + W = self.parameter["W"] = [random.randint(0, N * N // 2) for _ in range(N)] + R = self.parameter["R"] = [random.randint(1, N) for _ in range(N)] + + + P = [(Wi, Ri) for Wi, Ri in zip(W, R)] + + # sort by R descending + P.sort(key=lambda x: x[1], reverse=True) + + dp = [None] * (N + 1) # dp[j] = best gain picking j items + dp[0] = 0 + best = 0 # answer — at least 0 by taking nothing + + for i in range(N): + W, R = P[i] + new_dp = dp.copy() # row i -> row i+1 + for j in range(1, i + 2): # up to i+1 items can be chosen now + prev = dp[j - 1] + if prev is None: + continue + cand = prev + W - R * (j - 1) + if new_dp[j] is None or cand > new_dp[j]: + new_dp[j] = cand + if cand > best: + best = cand + dp = new_dp # move to next row + + self.parameter["gold_answer"] = best + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + W_and_R = "\n".join("W[{}]={} R[{}]={}".format(i, self.parameter["W"][i], i, self.parameter["R"][i]) for i in range(N)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != len(set(processed_result)) : + return self.rewards["invalid_solution"] + if not all(0 <= i < self.parameter["N"] for i in processed_result) : + return self.rewards["invalid_solution"] + + answer, gold = 0, self.parameter["gold_answer"] + sum_R = 0 + for i in processed_result : + Wi, Ri = self.parameter["W"][i], self.parameter["R"][i] + answer += Wi - sum_R + sum_R += Ri + answer = max(0, answer) + assert answer <= gold, "answer should be less than or equal to gold" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + if gold == 0 : + assert answer == 0, "If gold is 0, answer should also be 0" + return self.rewards["rewarding_weight"] * 1.0 + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/vertex_k_center/__init__.py b/server/Gym/environments/vertex_k_center/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8cde76f03533d50537563ffe44e2be3ace5c0e38 --- /dev/null +++ b/server/Gym/environments/vertex_k_center/__init__.py @@ -0,0 +1 @@ +from .environment import Vertex_KCenter_Environment diff --git a/server/Gym/environments/vertex_k_center/environment.py b/server/Gym/environments/vertex_k_center/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..a21c77dce58b03eb61dd4431898bad00bcb43edf --- /dev/null +++ b/server/Gym/environments/vertex_k_center/environment.py @@ -0,0 +1,161 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class Vertex_KCenter_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an **undirected connected graph** with {N} vertices, labeled from `0` to `{N_minus_1}`. + +The graph contains the following undirected edges. Each edge is represented as a tuple `(u, v, w)`, meaning an undirected edge **connecting vertex `u` to vertex `v` with weight `w`**: +{edges} + +Please select a set of {K} distinct vertices. Try your best to minimize the largest distance of any vertex in the graph to its closest vertex in the selected set; the distance between two vertices `u` and `v` is defined as the sum of the weights of the edges in the **shortest path** connecting them. + +**Output Format:** Your final answer should be a single line containing the selected {K} vertices in any order, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the Vertex_KCenter_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + K = self.parameter["K"] = random.randint(1, N - 1) + + assert "edge_density" in self.parameter, "edge_density is required in parameter" + edge_density = self.parameter["edge_density"] + assert 0.0 <= edge_density <= 1.0, "edge_density should be between 0.0 and 1.0" + + edges = self.parameter["edges"] = [] + + permutations = list(range(N)) + random.shuffle(permutations) + for index, vertex in enumerate(permutations) : + if index == 0 : + continue + u, v = vertex, random.choice(permutations[: index]) + u, v = min(u, v), max(u, v) + edges.append((u, v, random.randint(1, N))) + + num_edges = int(edge_density * N * (N - 1) / 2) + if len(edges) < num_edges : + remaining_edges = list(set((u, v) for u in range(N) for v in range(u + 1, N)) - set((u, v) for u, v, w in edges)) + remaining_edges = random.sample(remaining_edges, min(len(remaining_edges), num_edges - len(edges))) + for u, v in remaining_edges : + edges.append((u, v, random.randint(1, N))) + random.shuffle(edges) + + Floyd = self.parameter["Floyd"] = [[N * N] * N for _ in range(N)] + for i in range(N) : + Floyd[i][i] = 0 + + for u, v, w in edges : + assert 0 <= u < v < N + Floyd[u][v] = Floyd[v][u] = w + assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique" + + for k in range(N) : + for i in range(N) : + for j in range(N) : + val = Floyd[i][k] + Floyd[k][j] + if val < Floyd[i][j] : + Floyd[i][j] = val + + + self.parameter["reference_answer"], self.parameter["gold_answer"] = None, N * N + solution, solution_dist = [], [N * N] * N + def DFS(u : int) -> None : + nonlocal solution, solution_dist + + if len(solution) + (N - u) < K : + return + if N == u : + assert len(solution) == K, "solution should have exactly K elements" + current_answer = max(solution_dist) + if current_answer < self.parameter["gold_answer"] : + self.parameter["reference_answer"], self.parameter["gold_answer"] = solution.copy(), current_answer + return + + DFS(u + 1) + if len(solution) < K : + solution.append(u) + cache_solution_dist = solution_dist.copy() + for v in range(N) : + solution_dist[v] = min(solution_dist[v], Floyd[u][v]) + DFS(u + 1) + solution_dist = cache_solution_dist + solution.pop() + DFS(0) + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["reference_answer"])) + assert self.parameter["gold_answer"] > 0 + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + K = self.parameter["K"], + edges = "\n".join("({}, {}, {})".format(u, v, w) for u, v, w in self.parameter["edges"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + selected_vertices = processed_result + + if len(selected_vertices) != len(set(selected_vertices)) : + return self.rewards["invalid_solution"] + if len(selected_vertices) != self.parameter["K"] : + return self.rewards["invalid_solution"] + if not all(0 <= u < self.parameter["N"] for u in selected_vertices) : + return self.rewards["invalid_solution"] + + answer = 0 + for u in range(self.parameter["N"]) : + dist = self.parameter["Floyd"][u][selected_vertices[0]] + for selected_vertex in selected_vertices[1 :] : + dist = min(dist, self.parameter["Floyd"][u][selected_vertex]) + answer = max(answer, dist) + gold = self.parameter["gold_answer"] + assert gold <= answer, "gold should be less than or equal to answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/virus_synthesis/__init__.py b/server/Gym/environments/virus_synthesis/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fecf02b4108f9d28b1134ce3a1803eea44df9f25 --- /dev/null +++ b/server/Gym/environments/virus_synthesis/__init__.py @@ -0,0 +1 @@ +from .environment import VirusSynthesis_Environment diff --git a/server/Gym/environments/virus_synthesis/environment.py b/server/Gym/environments/virus_synthesis/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..cd05f25d18a556af71301f82f4cb88f266f040e0 --- /dev/null +++ b/server/Gym/environments/virus_synthesis/environment.py @@ -0,0 +1,164 @@ +import random +from collections import deque +from typing import Optional +from ...environment import VerifiableEnvironment + + +class VirusSynthesis_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4762 + prompt_template = \ +r"""Starting from an empty string, you can perform the following operations: +1. Add a single character to either the beginning or the end of the string. +2. Let the current string be S and its reverse be S'. You can append S' to either the beginning or the end of S (i.e., form S' + S or S + S', where + denotes string concatenation). + +Your task is to obtain the target string by performing the minimum number of operations: {S} +**Output Format:** Output a single integer — the minimum number of operations required to construct the string given above.""" + + def __init__(self, + wrong_format : float = -1.0, wrong_answer : float = 0.0, correct_answer : float = +1.0, + **kwargs) : + """ + Initialize the VirusSynthesis_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_answer" : wrong_answer, + "correct_answer" : correct_answer, + } + + + def _generate(self) -> None : + assert "loose_MAX_N" in self.parameter, "loose_MAX_N is required in parameter" + loose_MAX_N = self.parameter["loose_MAX_N"] + assert loose_MAX_N >= 4, "loose_MAX_N should be greater than or equal to 4" + + operation_probabilities = [random.randint(1, loose_MAX_N) for _ in range(4)] + operation_probabilities = [p / sum(operation_probabilities) for p in operation_probabilities] + S = "" + while True : + operation = random.choices(population = ["1_beginning", "1_end", "2_beginning", "2_end"], weights = operation_probabilities)[0] + if operation.startswith("1_") : + char = random.choice("01") + if operation == "1_beginning" : + S = char + S + elif operation == "1_end" : + S = S + char + else : + assert False + elif operation.startswith("2_") : + S_rev = S[:: -1] + if operation == "2_beginning" : + S = S_rev + S + elif operation == "2_end" : + S = S + S_rev + else : + assert False + else : + assert False + if len(S) >= loose_MAX_N : + break + self.parameter["S"] = S + + + def min_operations(S): + n = len(S) + # Map nucleotides to indices + char2idx = {'0': 0, '1': 1} + # Palindromic tree structures + ch = [[-1] * 4 for _ in range(2)] # child pointers, -1 means absent + fail = [1, 1] # fail links + len_list = [0, -1] # palindrome lengths + tran = [0, 0] # series links + + tot = 1 # current largest node index + cur = 0 # current node (last added) + + def get_fail(x, pos): + # Find the largest palindrome we can extend + while pos - len_list[x] - 1 < 0 or S[pos - len_list[x] - 1] != S[pos]: + x = fail[x] + return x + + # Build the palindromic tree + for pos in range(n): + c = char2idx[S[pos]] + posx = get_fail(cur, pos) + if ch[posx][c] == -1: + tot += 1 + ch.append([-1] * 4) + len_list.append(len_list[posx] + 2) + # Compute fail link for the new node + f = get_fail(fail[posx], pos) + f2 = ch[f][c] + if f2 == -1: + f2 = 0 + fail.append(f2) + # Compute series link (tran) + if len_list[tot] <= 2: + tran.append(f2) + else: + now = tran[posx] + while (pos - len_list[now] - 1 < 0 or + S[pos - len_list[now] - 1] != S[pos] or + (len_list[now] + 2) * 2 > len_list[tot]): + now = fail[now] + tran.append(ch[now][c]) + # Link the new node + ch[posx][c] = tot + cur = ch[posx][c] + + # DP over the palindromic tree to compute minimal operations + dp = [0] * (tot + 1) + for i in range(2, tot + 1): + dp[i] = len_list[i] + dp[0] = 1 + + q = deque([0]) + ans = n + while q: + now = q.popleft() + for c in range(4): + son = ch[now][c] + if son == -1: + continue + # Option 1: add one nucleotide + dp[son] = dp[now] + 1 + # Option 2: copy-paste a palindrome + alt = dp[tran[son]] + 1 + len_list[son] // 2 - len_list[tran[son]] + if alt < dp[son]: + dp[son] = alt + # Combine with remaining suffix + cost = dp[son] + n - len_list[son] + if cost < ans: + ans = cost + q.append(son) + return ans + self.parameter["reference_answer"] = min_operations(S) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format(S = self.parameter["S"]) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/visible_line/__init__.py b/server/Gym/environments/visible_line/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2faf484b0c580f0d455853e047bb0badb706f74b --- /dev/null +++ b/server/Gym/environments/visible_line/__init__.py @@ -0,0 +1 @@ +from .environment import VisibleLine_Environment diff --git a/server/Gym/environments/visible_line/environment.py b/server/Gym/environments/visible_line/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7eef650daf30eba167a04cc48f24b1606080582f --- /dev/null +++ b/server/Gym/environments/visible_line/environment.py @@ -0,0 +1,119 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class VisibleLine_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3194 + prompt_template = \ +r"""You are given {N} lines on the 2D plane: +{lines} + +We say a line is **visible** if any portion of it can be seen when viewed from y = +∞ (i.e., looking vertically downward). That is, a line is visible if there exists at least one x-coordinate such that this line lies on top (i.e., has the maximum y-value) at that x among all lines. + +**Output Format:** A single line containing the indices of all visible lines, in any order, separated by spaces.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(intersection/union)^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0, + **kwargs) : + """ + Initialize the VisibleLine_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_beta" : rewarding_beta, + "rewarding_weight" : rewarding_weight, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + lines = set() + while len(lines) < N : + Ai, Bi = random.randint(-N, +N), random.randint(-N, +N) + if (Ai, Bi) not in lines : + lines.add((Ai, Bi)) + self.parameter["lines"] = lines = list(lines) + random.shuffle(lines) + + + P = [] + for i, (A, B) in enumerate(lines): + P.append((A, B, i)) # store 1-based id for output + + # Sort by slope A ascending, and for ties by intercept B descending + P.sort(key=lambda x: (x[0], -x[1])) + + # Build the "upper hull" of visible lines + BIN = [] + prevA = None + for A, B, idx in P: + # skip duplicate slopes (only keep the one with highest intercept) + if A == prevA: + continue + prevA = A + + # While the last segment and the new point make a non-left turn, + # pop the last line (it's covered) + while len(BIN) >= 2: + A1, B1, _ = BIN[-2] + A2, B2, _ = BIN[-1] + # cross product of vectors (A2-A1, B2-B1) and (A-A2, B-B2) + if (A2 - A1) * (B - B2) - (B2 - B1) * (A - A2) >= 0: + BIN.pop() + else: + break + + BIN.append((A, B, idx)) + + # Sort visible lines by original input order (their ids) + BIN.sort(key=lambda x: x[2]) + + # Output the ids with a trailing space after each, including the last + self.parameter["gold_answer"] = [idx for A, B, idx in BIN] + self.parameter["reference_answer"] = " ".join(map(str, self.parameter["gold_answer"])) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + lines = "\n".join("Line {}: y = {}x + {}".format(i, A, B) for i, (A, B) in enumerate(self.parameter["lines"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[set] : + if answer is not None : + answer = answer.strip() + try : + return set(map(int, answer.split())) + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, set), "processed_result should be a list" + + answer = processed_result + if not all(0 <= x < self.parameter["N"] for x in answer) : + return self.rewards["wrong_format"] + gold = set(self.parameter["gold_answer"]) + + if self.rewards["rewarding_strategy"] == "(intersection/union)^beta" : + intersection = len(answer & gold) + union = len(answer | gold) + return ((intersection / union) ** self.rewards["rewarding_beta"]) * self.rewards["rewarding_weight"] + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/warehouse_construction/__init__.py b/server/Gym/environments/warehouse_construction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aaa019dc9a8a5b31eb613eefe95e506c31b63277 --- /dev/null +++ b/server/Gym/environments/warehouse_construction/__init__.py @@ -0,0 +1 @@ +from .environment import WarehouseConstruction_Environment diff --git a/server/Gym/environments/warehouse_construction/environment.py b/server/Gym/environments/warehouse_construction/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..fc8b5c793a6701351ecb786a22d784416e2fef0e --- /dev/null +++ b/server/Gym/environments/warehouse_construction/environment.py @@ -0,0 +1,171 @@ +import random +from collections import deque +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class WarehouseConstruction_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2120 + prompt_template = \ +r"""You are given {N} factories arranged from top to bottom along a mountain, indexed from 0 to {N_minus_1}. Factory 0 is at the top and factory {N_minus_1} is at the bottom. + +Each factory has +- Distance from factory 0: {D} +- Number of products: {P} +- Cost to build a warehouse at that factory: {C} + +You can choose to build warehouses at any subset of factories. +- A warehouse can store any number of products. +- If a factory does not build a warehouse, all its products must be sent **downhill** to a factory with a warehouse (i.e., to a factory with a higher index). Transporting one product over one unit of distance costs 1. +- The total cost is the sum of warehouse construction costs and product transportation costs. Try your best to minimize the total cost. + +**Output Format:** Output a single line containing the indices of the factories where warehouses should be built, separated by spaces (in any order).""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the WarehouseConstruction_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + D = random.sample(range(1, 2 * N + 1), N - 1) + D.sort() + self.parameter["D"] = D = [0] + D + assert len(D) == N, "X should have length N" + assert all(di < di1 for di, di1 in zip(D, D[1 :])), "D should be strictly increasing" + + self.parameter["P"] = P = [random.randint(0, N) for _ in range(N)] + self.parameter["C"] = C = [random.randint(1, N * 2) for _ in range(N)] + + + Q = [0] * (N+1) + R = [0] * (N+1) + for i in range(1, N+1): + Q[i] = Q[i-1] + P[i-1] + R[i] = R[i-1] + D[i-1] * P[i-1] + + # f[i] will hold the DP value corresponding to “having built a warehouse at factory i-1” + f = [0] * (N+1) + + # Mirror the C++ helpers: + def decx(idx): + return Q[idx] + def decy(idx): + return f[idx] + R[idx] + def maked(i, u): + # exactly f[u] + D[i-1]*(Q[i]-Q[u]) - (R[i]-R[u]) + C[i-1] + return f[u] + D[i-1] * (Q[i] - Q[u]) - (R[i] - R[u]) + C[i-1] + + # We'll keep a deque of candidate u-indices, with the left end = oldest, + # right end = newest, just like the C++ circular queue. + dq = deque([0]) + + for i in range(1, N+1): + # 1) Pop from the left (oldest) while the next‐oldest is better at x = D[i-1]: + while len(dq) >= 2: + u1, u2 = dq[0], dq[1] + if decy(u2) - decy(u1) <= D[i-1] * (decx(u2) - decx(u1)): + dq.popleft() + else: + break + + # 2) Use the best u = dq[0] to compute f[i]: + u = dq[0] + f[i] = maked(i, u) + + # 3) Now pop from the right (newest) while the new line i makes it obsolete: + while len(dq) >= 2: + u1, u2 = dq[-1], dq[-2] + if (decy(u1) - decy(u2)) * (decx(i) - decx(u1)) \ + >= (decy(i) - decy(u1)) * (decx(u1) - decx(u2)): + dq.pop() + else: + break + + # 4) Add the new candidate i: + dq.append(i) + + # At the end we want the minimum f[x] among the last non-empty factory: + ans = f[N] + x = N + # if the very last factory has P=0, we can skip it + while x > 0 and P[x-1] == 0: + x -= 1 + ans = min(ans, f[x]) + + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + D = " ".join("D[{}]={}".format(i, Di) for i, Di in enumerate(self.parameter["D"])), + P = " ".join("P[{}]={}".format(i, Pi) for i, Pi in enumerate(self.parameter["P"])), + C = " ".join("C[{}]={}".format(i, Ci) for i, Ci in enumerate(self.parameter["C"])), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + answer = 0 + built = [False] * self.parameter["N"] + for idx in processed_result : + if 0 <= idx < self.parameter["N"] : + built[idx] = True + answer += self.parameter["C"][idx] + else : + return self.rewards["invalid_solution"] + nearest_warehouse = None + for i in range(self.parameter["N"] - 1, -1, -1) : + if built[i] : + nearest_warehouse = i + if self.parameter["P"][i] : + if nearest_warehouse is None : + return self.rewards["invalid_solution"] + answer += self.parameter["P"][i] * (self.parameter["D"][nearest_warehouse] - self.parameter["D"][i]) + + gold = self.parameter["gold_answer"] + assert gold <= answer, "gold_answer should be less than or equal to answer" + + if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" : + if answer == 0 : + assert gold == 0, "If answer is 0, gold should also be 0" + return self.rewards["rewarding_weight"] + return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (gold == answer) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/weighted_binarytree/__init__.py b/server/Gym/environments/weighted_binarytree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ddd00a21537a4296a95b81dae58bb0c0b9e24ff2 --- /dev/null +++ b/server/Gym/environments/weighted_binarytree/__init__.py @@ -0,0 +1 @@ +from .environment import WeightedBinaryTree_Environment diff --git a/server/Gym/environments/weighted_binarytree/environment.py b/server/Gym/environments/weighted_binarytree/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..0005e0296bb391a90a45aeeeb2ba0d933856b4b5 --- /dev/null +++ b/server/Gym/environments/weighted_binarytree/environment.py @@ -0,0 +1,140 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class WeightedBinaryTree_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1040 + prompt_template = \ +r"""You are given a binary tree with {N} nodes, labeled from 0 to {N_minus_1}. +The **in-order traversal** of the tree is: `0, 1, ..., {N_minus_1}` — that is, the in-order sequence is fixed in increasing order of node labels. + +Each node `i` has an associated score `d_i` (where `0 ≤ i < {N}`), given as: +{scores} + +The **score of a binary tree** is defined recursively as follows: +- `score(tree) = score(left_subtree) × score(right_subtree) + d_i`, where `i` is the root of the current subtree. +- If a subtree is **empty**, its score is defined to be `1`. +- If a node is a **leaf**, its score is simply `d_i` (ignore its empty subtrees). + +Your task is to construct the binary tree that satisfies the above rules and has the **maximum possible score**, and then give its **pre-order traversal**. + +Output Format: +Your final answer should be a single line containing the node labels in **pre-order traversal**, separated by **spaces**. +Example: `{all_node_sequence}` (do **NOT** include the backticks or quotes). +""" + + def __init__(self, + wrong_format : float = -1.0, not_permutation : float = -0.5, invalid_solution : float = 0.0, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "not_permutation" : not_permutation, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + "MAX_SCORE" in self.parameter, "MAX_SCORE is required in parameter" + MAX_SCORE = self.parameter["MAX_SCORE"] + assert MAX_SCORE >= 1, "MAX_SCORE should be greater than or equal to 1" + + scores = self.parameter["scores"] = [random.randint(1, MAX_SCORE) for _ in range(N)] + + dpF = [[0] * N for _ in range(N)] + roots = [[None] * N for _ in range(N)] + for i, score in enumerate(scores) : + dpF[i][i] = score + roots[i][i] = i + for length in range(2, N + 1) : + for i in range(N - length + 1) : + j = i + length - 1 + for root in range(i, j + 1) : + left = dpF[i][root - 1] if i <= root - 1 else 1 + right = dpF[root + 1][j] if root + 1 <= j else 1 + if dpF[i][j] <= left * right + scores[root] : + dpF[i][j] = left * right + scores[root] + roots[i][j] = root + self.parameter["gold"] = dpF[0][N - 1] + + def preorder(i, j) : + if i > j : + return [] + root = roots[i][j] + return [root] + preorder(i, root - 1) + preorder(root + 1, j) + self.parameter["reference_answer"] = " ".join(map(str, preorder(0, N - 1))) + + def _prompt_generate(self) -> str : + N = self.parameter["N"] + scores = self.parameter["scores"] + return self.prompt_template.format( + N = N, + N_minus_1 = N - 1, + scores="\n".join("d_{}={}".format(i, score) for i, score in enumerate(scores)), + all_node_sequence = " ".join(map(str, range(N))), + ) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + if len(processed_result) != self.parameter["N"] : + return self.rewards["not_permutation"] + if len(set(processed_result)) != self.parameter["N"] : + return self.rewards["not_permutation"] + for i in processed_result : + if not (0 <= i < self.parameter["N"]) : + return self.rewards["not_permutation"] + + def get_score(inorder_l : int, inorder_r : int, preorder : list[int]) -> Optional[int] : + # The in-order traversal sequence is [inorder_l, inorder_r] + # The pre-order traversal sequence is preorder + assert len(preorder) == inorder_r - inorder_l + 1, "preorder should have the same length as inorder" + + root = preorder[0] + if inorder_l <= root <= inorder_r : + if inorder_l == inorder_r : + return self.parameter["scores"][root] + left = get_score(inorder_l, root - 1, preorder[1 : 1 + (root - 1 - inorder_l) + 1]) if inorder_l <= root - 1 else 1 + right = get_score(root + 1, inorder_r, preorder[1 + (root - 1 - inorder_l) + 1 :]) if root + 1 <= inorder_r else 1 + if left is not None and right is not None : + return left * right + self.parameter["scores"][root] + else : + return None + else : + return None + answer = get_score(0, self.parameter["N"] - 1, processed_result) + if answer is None : + return self.rewards["invalid_solution"] + + assert answer <= self.parameter["gold"], "answer should be less than or equal to gold" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / self.parameter["gold"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (answer == self.parameter["gold"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/weighted_lis/__init__.py b/server/Gym/environments/weighted_lis/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2110a6ca947958758c02a8626ce8590324c7b613 --- /dev/null +++ b/server/Gym/environments/weighted_lis/__init__.py @@ -0,0 +1 @@ +from .environment import WeightedLIS_Environment diff --git a/server/Gym/environments/weighted_lis/environment.py b/server/Gym/environments/weighted_lis/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f258ee20cd55745342f5a415706c4d3dafe4c2d3 --- /dev/null +++ b/server/Gym/environments/weighted_lis/environment.py @@ -0,0 +1,98 @@ +import random +from typing import Optional, List +from ...environment import VerifiableEnvironment + + +class WeightedLIS_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given two arrays `A` and `B`, each of length {N}. Their values are (indexing starts at 0): +{A} +{B} + +Your task is to select a strictly increasing sequence of indices `i1, i2, ..., ik` such that: +- 0 ≤ i1 < i2 < ... < ik < {N} +- A[i1] ≤ A[i2] ≤ ... ≤ A[ik] +- Try your best to **maximize** the sum: B[i1] + B[i2] + ... + B[ik]. + +Output Format: +Your final answer should be a single line containing the selected indices i1, i2, ..., ik, separated by **spaces**. +Example: `0 2 3` (do **NOT** include the backticks or quotes); this means k = 3, with i1 = 0, i2 = 2, and i3 = 3. +""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the WeightedLIS_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 1, "N should be greater than or equal to 1" + + self.parameter["arrayA"] = [random.randint(0, N) for _ in range(N)] + assert len(self.parameter["arrayA"]) == self.parameter["N"], "A should have the same length as N" + self.parameter["arrayB"] = [random.randint(1, N) for _ in range(N)] + assert len(self.parameter["arrayB"]) == self.parameter["N"], "B should have the same length as N" + + # Dynamic programming to find the maximum sum of increasing subsequence + dpF = [0] * N + for i in range(N) : + dpF[i] = self.parameter["arrayB"][i] + for j in range(i) : + if self.parameter["arrayA"][j] <= self.parameter["arrayA"][i] : + dpF[i] = max(dpF[i], dpF[j] + self.parameter["arrayB"][i]) + self.parameter["gold_answer"] = max(dpF) + assert self.parameter["gold_answer"] > 0, "gold_answer should be greater than 0" + + def _prompt_generate(self) -> str : + return self.prompt_template.format(N = self.parameter["N"], A = " ".join("A[{}]={}".format(index, value) for index, value in enumerate(self.parameter["arrayA"])), B = " ".join("B[{}]={}".format(index, value) for index, value in enumerate(self.parameter["arrayB"]))) + + + def _process(self, answer : Optional[str]) -> Optional[List] : + if answer is not None : + answer = answer.strip() + try : + answer_array = list(map(int, answer.split())) + return answer_array + except ValueError : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + assert isinstance(processed_result, list), "processed_result should be a list" + + SumB = 0 + for i in range(len(processed_result)) : + if not (0 <= processed_result[i] < self.parameter["N"]) : + return self.rewards["invalid_solution"] + if i > 0 and not (processed_result[i - 1] < processed_result[i]) : + return self.rewards["invalid_solution"] + if i > 0 and not (self.parameter["arrayA"][processed_result[i - 1]] <= self.parameter["arrayA"][processed_result[i]]) : + return self.rewards["invalid_solution"] + SumB += self.parameter["arrayB"][processed_result[i]] + assert SumB <= self.parameter["gold_answer"], "SumB should be less than or equal to gold_answer" + + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((SumB / self.parameter["gold_answer"]) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * int(SumB == self.parameter["gold_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/whack_a_mole/__init__.py b/server/Gym/environments/whack_a_mole/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..760ef0982460b5ca4afbdadd119c162f05c42a48 --- /dev/null +++ b/server/Gym/environments/whack_a_mole/__init__.py @@ -0,0 +1 @@ +from .environment import WhackAMole_Environment diff --git a/server/Gym/environments/whack_a_mole/environment.py b/server/Gym/environments/whack_a_mole/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..e1675cbd92b272df8afe55d5f484fd635b0911be --- /dev/null +++ b/server/Gym/environments/whack_a_mole/environment.py @@ -0,0 +1,164 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class WhackAMole_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P2484 + prompt_template = \ +r"""You are given an {N} × {M} grid, where each cell contains a non-negative integer representing the number of moles in that hole: +{grid} + +You are allowed to define a **fixed** hammer size of r × c (1 ≤ r ≤ {N}, 1 ≤ c ≤ {M}) before starting. Each time you swing the hammer: +- You choose an r × c subrectangle in the grid (without rotation). +- This subrectangle must be fully within the grid. +- Each cell in the subrectangle must contain at least 1 mole. +- Each cell in the subrectangle has exactly 1 mole removed (so r × c moles are removed per swing). + +You may swing the hammer multiple times, but you cannot change its size after choosing r and c. Your goal is to remove all the moles from the grid with the **minimum number of swings**. + +**Output Format:** Your final answer should be a single integer — the **minimum number of hammer swings** required to remove all moles from the grid. +""" + + def __init__(self, + max_beat : int = 3, + wrong_format : float = -1.0, wrong_answer : float = 0.0, correct_answer : float = +1.0, + **kwargs) : + """ + Initialize the WhackAMole_Environment instance. + """ + super().__init__(**kwargs) + + self.max_beat = max_beat + assert max_beat >= 1, "max_beat should be greater than or equal to 1" + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_answer" : wrong_answer, + "correct_answer" : correct_answer, + } + + + def _generate(self) -> None : + assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter" + MAX_N_M = self.parameter["MAX_N_M"] + assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2" + + N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M) + + R, C = random.randint(1, N), random.randint(1, M) + grid = self.parameter["grid"] = [[0] * M for _ in range(N)] + for i in range(N - R + 1) : + for j in range(M - C + 1) : + num_moles = random.randint(0, self.max_beat) + grid[i][j] += num_moles + if i + R < N : + grid[i + R][j] -= num_moles + if j + C < M : + grid[i][j + C] -= num_moles + if i + R < N and j + C < M : + grid[i + R][j + C] += num_moles + for i in range(N) : + for j in range(M) : + if i > 0 : + grid[i][j] += grid[i - 1][j] + if j > 0 : + grid[i][j] += grid[i][j - 1] + if i > 0 and j > 0 : + grid[i][j] -= grid[i - 1][j - 1] + + + total = sum(sum(row) for row in grid) + if total == 0 : + self.parameter["reference_answer"] = 0 + return + + best_area = 0 + + # Try every possible hammer size r x c, largest area first + for area in range(N * M + 1, 0, -1) : + if total % area != 0: + continue + if area <= best_area: + continue + for r in range(1, area + 1): + if area % r != 0: + continue + c = area // r + if not (1 <= r <= N and 1 <= c <= M): + continue + # Skip if we already have a better or equal area + if area <= best_area: + continue + + # 2D difference array, size (N+1)x(M+1) + diff = [[0] * (M + 1) for _ in range(N + 1)] + ok = True + + # Sweep through the grid, maintaining prefix‐sum of diff + for i in range(N): + for j in range(M): + # accumulate 2D prefix sum at (i,j) + if i > 0: + diff[i][j] += diff[i - 1][j] + if j > 0: + diff[i][j] += diff[i][j - 1] + if i > 0 and j > 0: + diff[i][j] -= diff[i - 1][j - 1] + + # If we've hit more moles here than exist, fail + if diff[i][j] > grid[i][j]: + ok = False + break + + # If we haven't hit enough, schedule hammer swings + if diff[i][j] < grid[i][j]: + # Must be able to place an r×c rectangle here + if i + r > N or j + c > M: + ok = False + break + t = grid[i][j] - diff[i][j] + # 2D-difference updates for adding t to rectangle [i..i+r-1][j..j+c-1] + diff[i][j] += t + diff[i + r][j] -= t + diff[i][j + c] -= t + diff[i + r][j + c] += t + if not ok: + break + + if ok: + best_area = area + + # The minimum number of swings is total moles divided by the largest valid hammer area + assert best_area >= R * C, "best_area should be at least R * C" + self.parameter["reference_answer"] = total // best_area + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + M = self.parameter["M"], + grid = "\n".join(" ".join(map(str, row)) for row in self.parameter["grid"]), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/wil/__init__.py b/server/Gym/environments/wil/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..17086de4e3f18679ca4730167cc2720361c70a57 --- /dev/null +++ b/server/Gym/environments/wil/__init__.py @@ -0,0 +1 @@ +from .environment import WIL_Environment diff --git a/server/Gym/environments/wil/environment.py b/server/Gym/environments/wil/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8a6fd89c387ee00aacaf870c3933da74b3b09ae2 --- /dev/null +++ b/server/Gym/environments/wil/environment.py @@ -0,0 +1,125 @@ +import random +from collections import deque +from typing import Optional, Tuple +from ...environment import VerifiableEnvironment + + +class WIL_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3594 + prompt_template = \ +r"""You are given an array `A` of length {N}, indexed from 1 to {N}. The array is: {A} + +Your task is as follows: +1. First, choose an interval [l1, r1] (such that r1 - l1 + 1 <= {D}) and set all A[i] = 0 for l1 ≤ i ≤ r1. +2. Then, find an interval [l2, r2] such that the **sum** of A[i] over l2 ≤ i ≤ r2 is at most {P}, and the **length** of this interval is as long as possible. + +Output `l1`, `r1`, `l2`, and `r2` (in order) — separated by spaces in a single line.""" + + def __init__(self, + wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(answer/gold)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0, + **kwargs) : + """ + Initialize the WIL_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "invalid_solution" : invalid_solution, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + A = self.parameter["A"] = [random.randint(1, N) for _ in range(N)] + D = self.parameter["D"] = random.randint(1, N - 1) + P = self.parameter["P"] = random.randint(1, sum(A) - sum(sorted(A, reverse = True)[: D])) + + + # Build prefix sums S where S[i] = sum of A[0..i-1] + S = [0] * (N + 1) + for i in range(1, N + 1): + S[i] = S[i - 1] + A[i - 1] + + # Deque to maintain candidate segment endpoints (indices in [D..N]) + # sorted so that the front q[0] has the segment of length D with the largest sum + q = deque([D]) + + ans = D # we can always zero out one segment of length D, giving at least length D + l = 1 # current window left endpoint (1-based for S) + + # Slide right endpoint i from D+1 to N (1-based) + for i in range(D + 1, N + 1): + # Add the new segment [i-D+1..i], with sum = S[i] - S[i-D]. + # Maintain deque in decreasing order of segment-sums. + curr_seg_sum = S[i] - S[i - D] + while q and curr_seg_sum > (S[q[-1]] - S[q[-1] - D]): + q.pop() + q.append(i) + + # Move l forward while the best window [l..i] (minus best segment) exceeds P + # Best segment to zero is the one at q[0] + while q and S[i] - S[l - 1] - (S[q[0]] - S[q[0] - D]) > P: + l += 1 + # Drop any segments that no longer fit entirely in [l..i] + while q and (q[0] - D + 1) < l: + q.popleft() + + # Update answer: window length is i - l + 1 + ans = max(ans, i - l + 1) + + self.parameter["gold_answer"] = ans + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + A = ", ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"], start = 1)), + D = self.parameter["D"], + P = self.parameter["P"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int, int, int]] : + if answer is not None : + answer = answer.strip() + try : + l1, r1, l2, r2 = map(int, answer.split()) + return l1, r1, l2, r2 + except : + return None # Invalid answer format + else : + return None # Invalid answer format + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + l1, r1, l2, r2 = processed_result + if not (1 <= l1 <= r1 <= self.parameter["N"] and 1 <= l2 <= r2 <= self.parameter["N"]) : + return self.rewards["invalid_solution"] + + if r1 - l1 + 1 > self.parameter["D"] : + return self.rewards["invalid_solution"] + + A = self.parameter["A"].copy() + for i in range(l1, r1 + 1) : + A[i - 1] = 0 + if sum(A[l2 - 1 : r2]) > self.parameter["P"] : + return self.rewards["invalid_solution"] + + answer, gold = r2 - l2 + 1, self.parameter["gold_answer"] + assert 0 < answer <= gold, "Answer length should not exceed gold length" + if self.rewards["rewarding_strategy"] == "(answer/gold)^beta" : + return self.rewards["rewarding_weight"] * ((answer / gold) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * int(answer == gold) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/wyc/__init__.py b/server/Gym/environments/wyc/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..15948496ac73979289c3af22d2ed743d9ddb30dd --- /dev/null +++ b/server/Gym/environments/wyc/__init__.py @@ -0,0 +1 @@ +from .environment import WYC_Environment diff --git a/server/Gym/environments/wyc/environment.py b/server/Gym/environments/wyc/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..f54a5807d7d7e4aa29d3bedae7ded1b0bd7bc10e --- /dev/null +++ b/server/Gym/environments/wyc/environment.py @@ -0,0 +1,182 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class WYC_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3597 + prompt_template = \ +r"""You are given a **directed graph** with {N} vertices (labeled from 1 to {N}). Each edge is represented as a tuple (s, t, w), meaning there is a directed edge from vertex `s` to vertex `t` with weight `w`. It is guaranteed that each weight `w` is either 1, 2, or 3. The list of edges is: +{edges} + +Considering **all possible paths** in this graph that consist of at least one edge (a path may start and end at any vertex, and may visit vertices or edges multiple times), sort all such paths by their total edge weight in **non-decreasing order**. Output a single integer - the total weight of the {K}-th path in the sorted list.""" + + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the WYC_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "MAX_K" in self.parameter, "MAX_K is required in parameter" + MAX_K = self.parameter["MAX_K"] + assert MAX_K >= 1, "MAX_K should be greater than or equal to 1" + + + while True : + edges = self.parameter["edges"] = [] + for edge_index in range(random.randint(1, N * (N - 1))) : + s, t = random.sample(range(1, N + 1), 2) + edges.append((s, t, random.randint(1, 3))) + random.shuffle(edges) + for s, t, w in edges : + assert 1 <= s <= N and 1 <= t <= N and s != t + + K = self.parameter["K"] = random.randint(1, MAX_K) + + + def mat_mult(X, Y, cap): + """ + Multiply two square matrices X and Y of the same dimension, capping all entries at `cap`. + """ + D = len(X) + Z = [[0] * D for _ in range(D)] + for i in range(D): + Xi = X[i] + Zi = Z[i] + for k, Xik in enumerate(Xi): + if Xik: + Yk = Y[k] + for j in range(D): + Zi[j] += Xik * Yk[j] + if Zi[j] > cap: + Zi[j] = cap + return Z + + def vec_mat_mult(v, M, cap): + """ + Multiply a row vector v by matrix M, capping all entries at `cap`. + Returns a new row vector. + """ + D = len(v) + w = [0] * D + for k, vk in enumerate(v): + if vk: + Mk = M[k] + for j in range(D): + w[j] += vk * Mk[j] + if w[j] > cap: + w[j] = cap + return w + + def compute_answer() : + # dimension of the expanded state space + D = 3 * N + 1 + # cap counts at K + N so we never need values above that + cap = K + N + + # build the base adjacency matrix g0 (size D x D) + g0 = [[0] * D for _ in range(D)] + # self-loop at state 0 + g0[0][0] = 1 + + # initial row-vector A of length D + A = [0] * D + # set up waiting chains and finishing transitions + for i in range(N): + idx1 = i * 3 + 1 + idx2 = idx1 + 1 + idx3 = idx1 + 2 + A[idx1] = 1 # can start at any vertex + g0[idx1][0] = 1 # from "just arrived" to finish + g0[idx2][idx1] = 1 # wait one unit + g0[idx3][idx2] = 1 # wait two units + + # read the edges and add the entry-point transitions + for u, v, w in edges: + u_idx = (u - 1) * 3 + 1 + v_idx = (v - 1) * 3 + w + g0[u_idx][v_idx] += 1 + + # store powers g[d] = g0^(2^d) + g = [g0] + + # determine how many bits are needed instead of a fixed 64 + max_bits = max(1, K.bit_length()) * 2 + + # find highest d such that number of paths of length ≤ 2^d is ≥ K + d = 0 + while True: + if d >= max_bits: + # even at length 2^max_bits we don't reach K paths + return -1 + g.append(mat_mult(g[d], g[d], cap)) + d += 1 + tmp = vec_mat_mult(A, g[d], cap) + # subtract N trivial finishes + if tmp[0] - N >= K: + break + + # binary-lift to find exact length + ans = 0 + for bit in range(d, -1, -1): + tmp = vec_mat_mult(A, g[bit], cap) + if tmp[0] - N < K: + A = tmp + ans += 1 << bit + + return ans + + self.parameter["reference_answer"] = compute_answer() + if self.parameter["reference_answer"] != -1 : + break + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + edges = "\n".join("({}, {}, {})".format(s, t, w) for s, t, w in self.parameter["edges"]), + K = self.parameter["K"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result <= 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/wyr_leveling_ground/__init__.py b/server/Gym/environments/wyr_leveling_ground/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..68aface8ef6f56a16867409b1890cb999ca46631 --- /dev/null +++ b/server/Gym/environments/wyr_leveling_ground/__init__.py @@ -0,0 +1 @@ +from .environment import WYRLevelingGround_Environment diff --git a/server/Gym/environments/wyr_leveling_ground/environment.py b/server/Gym/environments/wyr_leveling_ground/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..804890c33e2fad235b19c96d61e24bbff2e410c6 --- /dev/null +++ b/server/Gym/environments/wyr_leveling_ground/environment.py @@ -0,0 +1,189 @@ +import math +import heapq +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class WYRLevelingGround_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3543 + prompt_template = \ +r"""You are given an array H of {N} integers. Initially, it is: {H} +Your goal is to make every element in H equal to zero by applying a sequence of operations. A single operation is defined as choosing any non-empty contiguous subarray of H and applying one of the following four modifications to each element within that subarray: +- Add {A} +- Subtract {A} +- Add {B} +- Subtract {B} + +Each time you apply one of these modifications to a subarray, it counts as one operation. What is the minimum total number of operations required to make all elements of H equal to zero?""" + + def __init__(self, + A_B_multiple : int = 2, + wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the WYRLevelingGround_Environment instance. + """ + super().__init__(**kwargs) + + self.A_B_multiple = A_B_multiple + self.rewards = { + "wrong_format" : wrong_format, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + while True : + A, B = self.parameter["A"], self.parameter["B"] = random.randint(1, N * self.A_B_multiple), random.randint(1, N * self.A_B_multiple) + if A != B : + break + + positive_A_probability, positive_B_probability = random.random(), random.random() + H = self.parameter["H"] = [] + for _ in range(N) : + a_coeff, b_coeff = random.randint(0, N * self.A_B_multiple), random.randint(0, N * self.A_B_multiple) + if random.random() < positive_A_probability : + a_coeff = -a_coeff + if random.random() < positive_B_probability : + b_coeff = -b_coeff + H.append(a_coeff * A + b_coeff * B) + + + def extended_gcd(a, b): + """ + Returns (x, y, g) such that a*x + b*y = g = gcd(a,b). + """ + if b == 0: + return 1, 0, a + x1, y1, g = extended_gcd(b, a % b) + # back-substitute + return y1, x1 - (a // b) * y1, g + + def solve(): + # Build the difference array C of length N+1: + # C[0] = H[0]; C[i] = H[i] - H[i-1] for i=1..N-1; C[N] = -H[N-1] + new_N = N + 1 + C = [0] * new_N + C[0] = H[0] + for i in range(1, N): + C[i] = H[i] - H[i-1] + C[N] = -H[N-1] + + # Compute gcd and Bézout coefficients + d = math.gcd(A, B) + u, v, g = extended_gcd(A, B) + # g == d + ad = A // d + bd = B // d + + # Prepare x[i], y[i] so that A*x[i] + B*y[i] = C[i], minimizing |x|+|y| + x = [0] * new_N + y = [0] * new_N + dx = 0 + ans = 0 + sgn = lambda z: -1 if z < 0 else 1 + + for i in range(new_N): + ci = C[i] + if ci % d != 0: + assert False, "C[i] should be divisible by d" + + factor = ci // d + p0 = u * factor + q0 = v * factor + + # Try the two shifts from the p0-based solution: + best_x = p0 % bd + best_y = (ci - A * best_x) // B + best_cost = abs(best_x) + abs(best_y) + + # shift by one period in the x-direction + cand_x = best_x - bd + cand_y = best_y + ad + cand_cost = abs(cand_x) + abs(cand_y) + if cand_cost < best_cost: + best_x, best_y, best_cost = cand_x, cand_y, cand_cost + + # Now try the two shifts from the q0-based solution: + alt_y = q0 % ad + alt_x = (ci - B * alt_y) // A + alt_cost = abs(alt_x) + abs(alt_y) + if alt_cost < best_cost: + best_x, best_y, best_cost = alt_x, alt_y, alt_cost + + # one more shift + cand_y2 = alt_y - ad + cand_x2 = alt_x + bd + cand_cost2 = abs(cand_x2) + abs(cand_y2) + if cand_cost2 < best_cost: + best_x, best_y, best_cost = cand_x2, cand_y2, cand_cost2 + + x[i] = best_x + y[i] = best_y + dx += best_x + ans += best_cost + + # Build a min-heap of how much extra cost it costs to shift one unit of x (and compensate y) + sign = sgn(dx) + heap = [] + for i in range(new_N): + nx = x[i] - sign * bd + ny = y[i] + sign * ad + delta = (abs(nx) + abs(ny)) - (abs(x[i]) + abs(y[i])) + heapq.heappush(heap, (delta, i)) + + # We need to do abs(dx)//bd such adjustments + adjust_count = abs(dx) // bd + for _ in range(adjust_count): + delta, i = heapq.heappop(heap) + ans += delta + # apply the shift + x[i] -= sign * bd + y[i] += sign * ad + # re-compute this index's next delta and re-push + nx = x[i] - sign * bd + ny = y[i] + sign * ad + new_delta = (abs(nx) + abs(ny)) - (abs(x[i]) + abs(y[i])) + heapq.heappush(heap, (new_delta, i)) + + # Each boundary operation is counted twice, so divide by 2 + return ans // 2 + + self.parameter["reference_answer"] = solve() + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + H = " ".join("H[{}]={}".format(i, Hi) for i, Hi in enumerate(self.parameter["H"])), + A = self.parameter["A"], + B = self.parameter["B"], + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/xor_equation_counting/__init__.py b/server/Gym/environments/xor_equation_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eb2bc20d448a689cb0f69af6256a07f2d1efe186 --- /dev/null +++ b/server/Gym/environments/xor_equation_counting/__init__.py @@ -0,0 +1 @@ +from .environment import XorEquationCounting_Environment diff --git a/server/Gym/environments/xor_equation_counting/environment.py b/server/Gym/environments/xor_equation_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7355baed48df8fecb8a29e60b42bff9281aaf245 --- /dev/null +++ b/server/Gym/environments/xor_equation_counting/environment.py @@ -0,0 +1,189 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class XorEquationCounting_Environment(VerifiableEnvironment) : + prompt_template = \ +r"""You are given an equation: X[1] XOR ... XOR X[{N}] = {K} +That is, the bitwise XOR of all variables X[1] through X[{N}] must equal the integer {K}. Each variable X[i] must satisfy the constraint: {L} <= X[i] <= {R} for all i = 1, ..., {N}. Please compute how many such combinations of values satisfy the equation. Give the result **modulo {MOD}**. + +**Output Format:** Your final answer should be a single integer — the number of valid combinations modulo `{MOD}`.""" + MOD = 10000 + + def __init__(self, + wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0, + **kwargs) : + """ + Initialize the XorEquationCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "wrong_range" : wrong_range, + "correct_answer" : correct_answer, + "wrong_answer" : wrong_answer, + } + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 2, "N should be greater than or equal to 2" + + assert "RANGE" in self.parameter, "RANGE is required in parameter" + RANGE = self.parameter["RANGE"] + assert RANGE >= 1, "RANGE should be greater than or equal to 1" + + R = self.parameter["R"] = random.randint(0, RANGE) + L = self.parameter["L"] = random.randint(0, R) + + K = 0 + for i in range(1, N + 1) : + K ^= random.randint(L, R) + self.parameter["K"] = K + + + def mult(a: int, b: int) -> int: + return a * b % self.MOD + + def add(a: int, b: int) -> int: + s = a + b + return s - self.MOD if s >= self.MOD else s + + def sub(a: int, b: int) -> int: + d = a - b + return d + self.MOD if d < 0 else d + + def power(a: int, n: int) -> int: + result = 1 + while n > 0: + if n & 1: + result = mult(result, a) + a = mult(a, a) + n >>= 1 + return result + + def idx3(v0: int, v1: int, v2: int) -> int: + return v0 + (v1 << 1) + (v2 << 2) + + def idx2(v0: int, v1: int) -> int: + return v0 + (v1 << 1) + + class Matrix: + MOD = self.MOD + def __init__(self): + self.v = [[0]*8 for _ in range(8)] + def __mul__(self, other): + temp = [[0]*8 for _ in range(8)] + for k in range(8): + for i in range(8): + aik = self.v[i][k] + if aik: + for j in range(8): + temp[i][j] += aik * other.v[k][j] + c = Matrix() + for i in range(8): + for j in range(8): + c.v[i][j] = temp[i][j] % self.MOD + return c + def __pow__(self, n): + result = Matrix() + for i in range(8): + result.v[i][i] = 1 + base = self + while n > 0: + if n & 1: + result = result * base + base = base * base + n >>= 1 + return result + + def work4(c: int, a: int, b: int, k: int, N: int) -> int: + if a > b: + a, b = b, a + c ^= (N & 1) + if b == 0: + return power(2, N-1) if k == 0 else 0 + w = 1 << (b.bit_length() - 1) + if (w << 1) - 1 < k: + return 0 + + zy = Matrix() + for v0 in (0,1): + for v1 in (0,1): + for v2 in (0,1): + row = idx3(v0, v1, v2) + zy.v[row][idx3(v0^1, v1, v2)] = add(zy.v[row][idx3(v0^1, v1, v2)], b - w + 1) + zy.v[row][idx3(v0, 1, v2)] = add(zy.v[row][idx3(v0, 1, v2)], w if v1 else 1) + if a & w: + zy.v[row][idx3(v0^1, v1, v2^1)] = add(zy.v[row][idx3(v0^1, v1, v2^1)], a - w + 1) + zy.v[row][idx3(v0, 1, v2^1)] = add(zy.v[row][idx3(v0, 1, v2^1)], w if v1 else 1) + else: + zy.v[row][idx3(v0, v1, v2^1)] = add(zy.v[row][idx3(v0, v1, v2^1)], a + 1) + + zy = zy ** N + bit = 1 if (k & w) else 0 + base_count = zy.v[idx3(0,0,0)][idx3(bit,1,c)] + + next_a = (a ^ w) if (a & w) else a + next_b = b ^ w + next_k = k ^ ((a & w) * c) ^ (w * (c ^ (N & 1))) + + return add(base_count, work4(c, next_a, next_b, next_k, N)) + + def work2(b: int, k: int, N: int) -> int: + if b == 0: + return 1 if k == 0 else 0 + w = 1 << (b.bit_length() - 1) + if (w << 1) - 1 < k: + return 0 + zy = Matrix() + for v0 in (0,1): + for v1 in (0,1): + row = idx2(v0, v1) + zy.v[row][idx2(v0^1, v1)] = add(zy.v[row][idx2(v0^1, v1)], b - w + 1) + zy.v[row][idx2(v0, 1)] = add(zy.v[row][idx2(v0, 1)], w if v1 else 1) + zy = zy ** N + bit = 1 if (k & w) else 0 + base_count = zy.v[idx2(0,0)][idx2(bit,1)] + next_b = b ^ w + next_k = k ^ (w * (N & 1)) + return add(base_count, work2(next_b, next_k, N)) + + self.parameter["reference_answer"] = work2(R, K, N) if L == 0 else sub(work4(0, L-1, R, K, N), work4(1, L-1, R, K, N)) + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + K = self.parameter["K"], + L = self.parameter["L"], + R = self.parameter["R"], + MOD = self.MOD, + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if not (0 <= processed_result < self.MOD) : + return self.rewards["wrong_range"] + + if processed_result == self.parameter["reference_answer"] : + return self.rewards["correct_answer"] + else : + return self.rewards["wrong_answer"] + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/environments/zero_prefix_subset_counting/__init__.py b/server/Gym/environments/zero_prefix_subset_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..013dd1e6820f4e7ad2f3642ce25b5aaa207681e8 --- /dev/null +++ b/server/Gym/environments/zero_prefix_subset_counting/__init__.py @@ -0,0 +1 @@ +from .environment import ZeroPrefixSubsetCounting_Environment diff --git a/server/Gym/environments/zero_prefix_subset_counting/environment.py b/server/Gym/environments/zero_prefix_subset_counting/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..29189c7662c3df18e7bcb8b82a1c422660580569 --- /dev/null +++ b/server/Gym/environments/zero_prefix_subset_counting/environment.py @@ -0,0 +1,116 @@ +import random +from typing import Optional +from ...environment import VerifiableEnvironment + + +class ZeroPrefixSubsetCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1666 + prompt_template = \ +r"""You are given {N} strings: +{strings} + +How many **non-empty** subsets such that **no string is a prefix of another string** within the subset?""" + def __init__(self, + wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, + **kwargs) : + """ + Initialize the ZeroPrefixSubsetCounting_Environment instance. + """ + super().__init__(**kwargs) + + self.rewards = { + "wrong_format" : wrong_format, + "rewarding_strategy" : rewarding_strategy, + "rewarding_weight" : rewarding_weight, + "rewarding_beta" : rewarding_beta, + } + + + def _generate(self) -> None : + assert "N" in self.parameter, "N is required in parameter" + N = self.parameter["N"] + assert N >= 3, "N should be greater than or equal to 3" + + while True : + proportion_being_prefix = random.uniform(0.1, 0.9) + M = N - int(N * proportion_being_prefix) + if M < 1 : + continue + array = self.parameter["array"] = [] + for i in range(M) : + while True : + length = random.randint(2, N) + s = "".join(random.choices("ab", k = length)) + if s not in array : + array.append(s) + break + for i in range(N - M) : + prefix = random.choice(array[: M]) + array.append(prefix[: random.randint(1, len(prefix) - 1)]) + assert len(array) == N + if len(array) == len(set(array)) : + random.shuffle(array) + break + + + A = [''] + array.copy() + A = [''] + sorted(A[1:]) # sort a[1..N] + + # f and dp sized dynamically by N + f = [[False] * (N + 1) for _ in range(N + 1)] + dp = [0] * (N + 1) + + def calc(i, j): + # Ensure the shorter (or equal) string is at i + if len(A[i]) > len(A[j]): + i, j = j, i + # Return true iff A[i] is NOT a prefix of A[j] + return A[j].find(A[i]) != 0 + + for i in range(1, N + 1): + dp[i] = 1 + for j in range(1, N + 1): + f[i][j] = calc(i, j) + + for i in range(1, N + 1): + for j in range(i, N + 1): + if f[i][j]: + dp[j] += dp[i] + + ret = sum(dp[1:]) + self.parameter["reference_answer"] = ret + + + def _prompt_generate(self) -> str : + return self.prompt_template.format( + N = self.parameter["N"], + strings = "\n".join("String {}: {}".format(i, Si) for i, Si in enumerate(self.parameter["array"], start = 1)), + ) + + + def _process(self, answer : Optional[str]) -> Optional[int] : + if answer is not None : + answer = answer.strip() + try : + int_answer = int(answer) + return int_answer + except ValueError : + return None + else : + return None + + + def scorer(self, output : str) -> float : + processed_result = self.processor(output) + if processed_result is not None : + if processed_result < 0 : + return self.rewards["wrong_format"] + + if self.rewards["rewarding_strategy"] == "(min/max)^beta" : + a, b = self.parameter["reference_answer"], processed_result + return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) + elif self.rewards["rewarding_strategy"] == "gold=answer" : + return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) + else : + raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) + else : + return self.rewards["wrong_format"] \ No newline at end of file diff --git a/server/Gym/parameter_controller.py b/server/Gym/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..fd83dc02395b330700ede651696f6ab7a2d56c2f --- /dev/null +++ b/server/Gym/parameter_controller.py @@ -0,0 +1,28 @@ +from typing import Dict, List +from abc import ABC, abstractmethod + + + +class ParameterController(ABC) : + """ + Abstract base for driving the sequence of `parameter` dicts fed into a VerifiableEnvironment.generator(seed, parameter) call. + """ + + def __init__(self) : + pass + + + @abstractmethod + def update(self) -> None : + """ + Advance to the next parameter setting and store it + """ + pass + + + @abstractmethod + def get_parameter_list(self) -> List[Dict] : + """ + Returns the full list of parameter dicts this controller manages. + """ + pass diff --git a/server/Gym/parameter_controllers/__init__.py b/server/Gym/parameter_controllers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8a4a1dfe171b725f624c385c14983fe67a563af0 --- /dev/null +++ b/server/Gym/parameter_controllers/__init__.py @@ -0,0 +1,802 @@ +from .ab_program_simulation import ABProgramSimulation_ParameterController +from .add_multiple_divisible_counting import AddMultiple_Divisible_Counting_ParameterController +from .addition_table import AdditionTable_ParameterController +from .almost_complete_graph_cycle_counting import AlmostCompleteGraphCycleCounting_ParameterController +from .and_or_sequence_counting import AndOr_Sequence_Counting_ParameterController +from .anti_palindromic_substring_counting import AntiPalindromicSubstringCounting_ParameterController +from .axis_k_center import Axis_KCenter_ParameterController +from .baj_bytecomputer import BAJBytecomputer_ParameterController +from .banned_point_superset_path_counting import BannedPointSupersetPathCounting_ParameterController +from .banyan_heart import BanyanHeart_ParameterController +from .bez_minimalist_security import BEZMinimalistSecurity_ParameterController +from .bezout_identity import BezoutIdentity_ParameterController +from .binario import Binario_ParameterController +from .binario_no_adjacency_requirement import Binario_NoAdjacencyRequirement_ParameterController +from .binary_alternation import BinaryAlternation_ParameterController +from .binary_linear_equation_solution_counting import BinaryLinearEquation_SolutionCounting_ParameterController +from .binary_tree_leaf_num_expectation import BinaryTreeLeafNumExpectation_ParameterController +from .bit_equation_counting import BitEquationCounting_ParameterController +from .bitand_zero_path_counting import BitAndZero_PathCounting_ParameterController +from .bitwise_operation_sequence_counting import BitwiseOperationSequenceCounting_ParameterController +from .block_image import BlockImage_ParameterController +from .bounded_adjacency_difference_permutation_counting import BoundedAdjacencyDifference_Permutation_Counting_ParameterController +from .bounded_interval_intersection import BoundedIntervalIntersection_ParameterController +from .bounded_mean_subarray_counting import BoundedMeanSubarrayCounting_ParameterController +from .bounded_subarray_counting import BoundedSubarrayCounting_ParameterController +from .box_scheduling import BoxScheduling_ParameterController +from .bridge import Bridge_ParameterController +from .bubble_swap_lower_bound_permutation_counting import BubbleSwapLowerBound_PermutationCounting_ParameterController +from .bucket_sorting import BucketSorting_ParameterController +from .campfire_party import CampfireParty_ParameterController +from .campsite_puzzle import CampsitePuzzle_ParameterController +from .canon import Canon_ParameterController +from .cantor_expansion import CantorExpansion_ParameterController +from .capital_city_effect import CapitalCityEffect_ParameterController +from .card_coloring_counting import CardColoringCounting_ParameterController +from .catalan_number_mod import CatalanNumberMod_ParameterController +from .check_all_cycle_xor_zero import CheckAllCycleXorZero_ParameterController +from .cho_hamsters import ChoHamsters_ParameterController +from .cinema import Cinema_ParameterController +from .circuit import Circuit_ParameterController +from .circulating_decimal_counting import CirculatingDecimalCounting_ParameterController +from .circulating_grid import CirculatingGrid_ParameterController +from .cleaning_up import CleaningUp_ParameterController +from .clear_symmetry import ClearSymmetry_ParameterController +from .clique_independent_set_partitioning_counting import Clique_IndependentSet_Partitioning_Counting_ParameterController +from .coin_square_game import CoinSquareGame_ParameterController +from .coloring_counting import ColoringCounting_ParameterController +from .combination_odd_subsequence_counting import CombinationOddSubsequenceCounting_ParameterController +from .concatenation_partition_counting_sum import ConcatenationPartitionCountingSum_ParameterController +from .congruent_equation import CongruentEquation_ParameterController +from .construct_hack_interval import ConstructHackInterval_ParameterController +from .convex_hull import ConvexHull_ParameterController +from .cornfield import Cornfield_ParameterController +from .countdown import Countdown_ParameterController +from .cow_dance_show import CowDanceShow_ParameterController +from .crt import CRT_ParameterController +from .cryptarithmetic import Cryptarithmetic_ParameterController +from .cube_fixed_local_maximum_counting import Cube_FixedLocalMaximumCounting_ParameterController +from .cycle_counting import CycleCounting_ParameterController +from .decreasing_digit_counting import DecreasingDigitCounting_ParameterController +from .degree_fixed_spanning_tree import DegreeFixed_SpanningTree_ParameterController +from .delta_min_popcount import DeltaMinPopcount_ParameterController +from .delta_nim_game import DeltaNimGame_ParameterController +from .derangement_extension import DerangementExtension_ParameterController +from .difference_constraint_system import DifferenceConstraintSystem_ParameterController +from .difference_constraint_system_dag import DifferenceConstraintSystemDAG_ParameterController +from .different_color_pairing import DifferentColorPairing_ParameterController +from .differentiate import Differentiate_ParameterController +from .digit_lis_counting import DigitLISCounting_ParameterController +from .discrete_logarithm import DiscreteLogarithm_ParameterController +from .disinfection import Disinfection_ParameterController +from .distinct_array_permutation import DistinctArrayPermutation_ParameterController +from .distinct_edge_colored_complete_graph_counting import DistinctEdgeColoredCompleteGraphCounting_ParameterController +from .division import Division_ParameterController +from .divisor_flip_expectation import DivisorFlipExpectation_ParameterController +from .double_cross_counting import DoubleCrossCounting_ParameterController +from .double_palindromic_string_counting import DoublePalindromicStringCounting_ParameterController +from .double_stack_sorting import DoubleStackSorting_ParameterController +from .dyn_dynamite import DynDynamite_ParameterController +from .eight_digit_puzzle import EightDigitPuzzle_ParameterController +from .emperor_worries import EmperorWorries_ParameterController +from .energy_storage_meter import EnergyStorageMeter_ParameterController +from .euclid_game import EuclidGame_ParameterController +from .even_degree_graph_partitioning import EvenDegreeGraphPartitioning_ParameterController +from .expression_adding_parenthese_counting import Expression_AddingParenthese_Counting_ParameterController +from .face_right_way import FaceRightWay_ParameterController +from .factorial_trailing_zero_count import FactorialTrailingZeroCount_ParameterController +from .fbi_binary_tree import FBI_BinaryTree_ParameterController +from .fibonacci import Fibonacci_ParameterController +from .fibonacci_containing_counting import FibonacciContainingCounting_ParameterController +from .fibtrain import Fibtrain_ParameterController +from .firework_show import FireworkShow_ParameterController +from .fixed_mod_k_selection_counting import FixedModK_Selection_Counting_ParameterController +from .fixed_one_edge_num_spanning_tree import FixedOneEdgeNum_SpanningTree_ParameterController +from .fractional_programming import FractionalProgramming_ParameterController +from .fractional_programming_bipartite_graph_matching import FractionalProgramming_BipartiteGraphMatching_ParameterController +from .futoshiki_puzzle import FutoshikiPuzzle_ParameterController +from .gas_fire_extinguishers import GasFireExtinguishers_ParameterController +from .gaussian_elimination import GaussianElimination_ParameterController +from .gcd_fibonacci_product import GCDFibonacciProduct_ParameterController +from .gcd_lcm_counting import GcdLcmCounting_ParameterController +from .gcd_one_counting import GCDOne_Counting_ParameterController +from .gcd_prime_counting import GCDPrime_Counting_ParameterController +from .gold_washing import GoldWashing_ParameterController +from .gra_minima_game import GraMinimaGame_ParameterController +from .grade_ranking_counting import GradeRankingCounting_ParameterController +from .graph_contain_tree_counting import GraphContainTreeCounting_ParameterController +from .graph_isomorphism import GraphIsomorphism_ParameterController +from .grid_bfs import GridBFS_ParameterController +from .grid_coloring_counting import GridColoringCounting_ParameterController +from .grid_component import GridComponent_ParameterController +from .grid_local_minimum_counting import GridLocalMinimumCounting_ParameterController +from .grid_parity_construction import GridParityConstruction_ParameterController +from .grid_triangle_counting import GridTriangleCounting_ParameterController +from .halving_chain_counting import HalvingChainCounting_ParameterController +from .hamiltonian_path import HamiltonianPath_ParameterController +from .hamiltonian_path_existence import HamiltonianPathExistence_ParameterController +from .heap_counting import HeapCounting_ParameterController +from .hitori_puzzle import HitoriPuzzle_ParameterController +from .hungry_rabbit import HungryRabbit_ParameterController +from .hur_warehouse_store import HURWarehouseStore_ParameterController +from .imp_party import ImpParty_ParameterController +from .individual_sum_bounded_sequence_counting import IndividualSumBounded_SequenceCounting_ParameterController +from .integer_factorization_counting import IntegerFactorizationCounting_ParameterController +from .integer_programming import IntegerProgramming_ParameterController +from .integral import Integral_ParameterController +from .inversion_pair import InversionPair_ParameterController +from .inversion_pair_k_counting import InversionPairK_Counting_ParameterController +from .josephus import Josephus_ParameterController +from .jug_puzzle import JugPuzzle_ParameterController +from .k_partition import KPartition_ParameterController +from .kakurasu import Kakurasu_ParameterController +from .kidding_me import KiddingMe_ParameterController +from .king_sorting import KingSorting_ParameterController +from .klo_blocks import KloBlocks_ParameterController +from .knapsack import Knapsack_ParameterController +from .knights_and_knaves import KnightsAndKnaves_ParameterController +from .kos_dicing import KosDicing_ParameterController +from .kth_binary_tree import Kth_BinaryTree_ParameterController +from .kth_semi_balanced_bracket_sequence import Kth_SemiBalancedBracketSequence_ParameterController +from .kth_subsequence import KthSubsequence_ParameterController +from .kur import KUR_ParameterController +from .lamp_changing import LampChanging_ParameterController +from .land_acquisition import LandAcquisition_ParameterController +from .landform_generation_counting import LandformGenerationCounting_ParameterController +from .largest_convex_polygon import LargestConvexPolygon_ParameterController +from .largest_rectangle_among_points import LargestRectangle_AmongPoints_ParameterController +from .las import LAS_ParameterController +from .las_laser import LASLaser_ParameterController +from .lcm import LCM_ParameterController +from .lds_two_counting import LDSTwo_Counting_ParameterController +from .light_up_puzzle import LightUpPuzzle_ParameterController +from .link_beads import LinkBeads_ParameterController +from .lis_lds_concatenation import LIS_LDS_Concatenation_ParameterController +from .liz_lollipop import LIZ_Lollipop_ParameterController +from .longest_double_palindrome import Longest_DoublePalindrome_ParameterController +from .longest_matching_subsequence import Longest_MatchingSubsequence_ParameterController +from .longest_maxdiff_bounded_interval.parameter_controller import LongestMaxDiffBoundedInterval_ParameterController +from .longest_path import LongestPath_ParameterController +from .longest_repeated_palindrome import Longest_RepeatedPalindrome_ParameterController +from .maf_mafia import MafMafia_ParameterController +from .magic_square_puzzle import MagicSquarePuzzle_ParameterController +from .making_grade import MakingGrade_ParameterController +from .matrix_binary_exponentiation import Matrix_BinaryExponentiation_ParameterController +from .matrix_permutation_both_diagonal_one import MatrixPermutation_BothDiagonalOne_ParameterController +from .matrix_permutation_equivalence import MatrixPermutationEquivalence_ParameterController +from .matrix_permutation_main_diagonal_one import MatrixPermutation_MainDiagonalOne_ParameterController +from .matrix_pooling import MatrixPooling_ParameterController +from .matrix_rmq_counting import MatrixRMQCounting_ParameterController +from .max_different_group_pair_division import MaxDifferentGroupPairDivision_ParameterController +from .max_grid_path_intersection import MaxGridPathIntersection_ParameterController +from .max_minimum_after_interval_addition import MaxMinimum_AfterIntervalAddition_ParameterController +from .max_mult_split import MaxMultSplit_ParameterController +from .max_multiplication_fixed_sum import MaxMultiplicationFixedSum_ParameterController +from .max_no_conflicting_bombs import MaxNoConflictingBombs_ParameterController +from .max_nonadjacent_k_element_sum import Max_NonAdjacent_KElementSum_ParameterController +from .max_permutation import MaxPermutation_ParameterController +from .max_rmq_expectation import MaxRMQExpectation_ParameterController +from .max_segment_coverage_constraint import MaxSegmentCoverageConstraint_ParameterController +from .max_sum_lds import MaxSumLDS_ParameterController +from .max_three_square_sum import MaxThreeSquareSum_ParameterController +from .max_tree_constrained_permutation_weight import Max_TreeConstrainedPermutation_Weight_ParameterController +from .max_tree_k_path_coverage import MaxTree_KPathCoverahe_ParameterController +from .max_tree_xor_path import MaxTreeXorPath_ParameterController +from .max_weight_palindromic_substring import MaxWeightPalindromicSubstring_ParameterController +from .max_xor_path import MaxXorPath_ParameterController +from .max_xor_set import MaxXorSet_ParameterController +from .maximum_achromatic_number import MaximumAchromaticNumber_ParameterController +from .maximum_clique import MaximumClique_ParameterController +from .maximum_divisor import MaximumDivisor_ParameterController +from .maximum_independent_set_grid import MaximumIndependentSetGrid_ParameterController +from .maximum_independent_set_tree import Maximum_IndependentSet_Tree_ParameterController +from .maximum_lexicographical_order_subsequence import MaximumLexicographicalOrderSubsequence_ParameterController +from .maximum_point_segment_matching import MaximumPointSegmentMatching_ParameterController +from .maximum_subsequence_num import Maximum_SubsequenceNum_ParameterController +from .maximum_weight_matching import MaximumWeightMatching_ParameterController +from .maze import Maze_ParameterController +from .min_conversion_to_cycle_cost import MinConversionToCycleCost_ParameterController +from .min_cost_reducing_lnds import MinCostReducingLNDS_ParameterController +from .min_cost_tree_coverage import MinCostTreeCoverage_ParameterController +from .min_cube_assignment import MinCubeAssignment_ParameterController +from .min_division_sum_xor import MinDivisionSumXor_ParameterController +from .min_inorder_binary_tree import MinInorderBinaryTree_ParameterController +from .min_kdivisor_number import MinKDivisorNumber_ParameterController +from .min_no_solution_linear_diophantine_equation import MinNoSolutionLinearDiophantineEquation_ParameterController +from .min_nonsubstring import MinNonsubstring_ParameterController +from .min_pairsum_multiplication_permutation import MinPairSumMultiplicationPermutation_ParameterController +from .min_path_cover_dag import MinPathCover_DAG_ParameterController +from .min_sum_chebyshev_distance import MinSumChebyshevDistance_ParameterController +from .min_sum_distance_square import MinSumDistanceSquare_ParameterController +from .min_sum_pre_xor import MinSumPreXor_ParameterController +from .min_swap_two_permutations import MinSwapTwoPermutations_ParameterController +from .min_xor_pair import MinXorPair_ParameterController +from .minesweeping import Minesweeping_ParameterController +from .minimal_cyclic_shift import MinimalCyclicShift_ParameterController +from .minimum_chromatic_number import MinimumChromaticNumber_ParameterController +from .minimum_chromatic_number_segment_overlap import MinimumChromaticNumber_SegmentOverlap_ParameterController +from .minimum_cost_maximum_flow import MinimumCost_MaximumFlow_ParameterController +from .minimum_crossing_edges_graph_partition import Minimum_CrossingEdges_GraphPartition_ParameterController +from .minimum_directed_spanning_tree import MinimumDirectedSpanningTree_ParameterController +from .minimum_dominating_interval import Minimum_DominatingInterval_ParameterController +from .minimum_dominating_set import Minimum_DominatingSet_ParameterController +from .minimum_dominating_set_grid import Minimum_DominatingSet_Grid_ParameterController +from .minimum_fibonacci_representation import MinimumFibonacciRepresentation_ParameterController +from .minimum_harmonious_chromatic_number import MinimumHarmoniousChromaticNumber_ParameterController +from .minimum_interval_coverage import MinimumIntervalCoverage_ParameterController +from .minimum_max_abs_slicer import Minimum_MaxAbsSlicer_ParameterController +from .minimum_max_slicer import Minimum_MaxSlicer_ParameterController +from .minimum_ratio_path import MinimumRatioPath_ParameterController +from .minimum_spanning_tree import MinimumSpanningTree_ParameterController +from .minimum_spanning_tree_counting import MinimumSpanningTreeCounting_ParameterController +from .minimum_steiner_tree import MinimumSteinerTree_ParameterController +from .minimum_sum_difference_submatrix import MinimumSumDifferenceSubmatrix_ParameterController +from .minimum_tree_weighted_dominating_ancestor import MinimumTreeWeightedDominatingAncestor_ParameterController +from .minimum_unconflicted_grid_kmax import MinimumUnconflictedGridKMax_ParameterController +from .minimum_vertex_cover import Minimum_VertexCover_ParameterController +from .minimum_weighted_spanning_tree import MinimumWeightedSpanningTree_ParameterController +from .mitter_transportation import MitterTransportation_ParameterController +from .mixed_graph_eulerian_circuit import MixedGraphEulerianCircuit_ParameterController +from .money_charging_game import MoneyChargingGame_ParameterController +from .monochrome_block_counting import MonochromeBlockCounting_ParameterController +from .monotonic_stack import MonotonicStack_ParameterController +from .most_component_tree_removing_two_paths import MostComponentTreeRemovingTwoPaths_ParameterController +from .most_num_edge_non_self_isomorphism import MostNumEdge_NonSelfIsomorphism_ParameterController +from .multidrink import MultiDrink_ParameterController +from .multiple_flipping_game import MultipleFlippingGame_ParameterController +from .multiplication import Multiplication_ParameterController +from .myj import MYJ_ParameterController +from .nand_result_counting import NANDResultCounting_ParameterController +from .negative_base import NegativeBase_ParameterController +from .new_nim_game import NewNimGame_ParameterController +from .next_palindromic import NextPalindromic_ParameterController +from .nine_puzzle import NinePuzzle_ParameterController +from .no_adjacent_girl_counting import NoAdjacentGirlCounting_ParameterController +from .no_double_triple_counting import NoDoubleTripleCounting_ParameterController +from .not_containing_string_counting import NotContainingStringCounting_ParameterController +from .number_partition_counting import NumberPartitionCounting_ParameterController +from .numbrix import Numbrix_ParameterController +from .odd_visitation import OddVisitation_ParameterController +from .odl_distance import ODLDistance_ParameterController +from .pair_more_one_counting import PairMoreOneCounting_ParameterController +from .palembang_bridges import PalembangBridges_ParameterController +from .palindrome_partition_counting import PalindromePartitionCounting_ParameterController +from .palindromic_substring_number_counting import PalindromicSubstringNumberCounting_ParameterController +from .pan_solar_panels import PanSolarPanels_ParameterController +from .path_no_going_back_counting import Path_NoGoingBack_Counting_ParameterController +from .patrol import Patrol_ParameterController +from .pcp_permutation import PCPPermutation_ParameterController +from .pipeline_arrangement import PipelineArrangement_ParameterController +from .pol_polarization import POLPolarization_ParameterController +from .polya_model import PolyaModel_ParameterController +from .polynomial_factorization import PolynomialFactorization_ParameterController +from .polynomial_interpolation import PolynomialInterpolation_ParameterController +from .polynomial_minimum import PolynomialMinimum_ParameterController +from .polynomial_remainder import PolynomialRemainder_ParameterController +from .power_cycle import PowerCycle_ParameterController +from .power_shortcut import PowerShortcut_ParameterController +from .powernest import PowerNest_ParameterController +from .prefix_concatenation import PrefixConcatenation_ParameterController +from .prefix_product_mod_distinct_permutation import PrefixProductMODDistinctPermutation_ParameterController +from .prefix_sum_mod_distinct_permutation import PrefixSumMODDistinctPermutation_ParameterController +from .prefixuffix import Prefixuffix_ParameterController +from .preorder_traversal import PreorderTraversal_ParameterController +from .prime_graph_minimum_chromatic_number import PrimeGraph_MinimumChromaticNumber_ParameterController +from .protecting_flowers import ProtectingFlowers_ParameterController +from .pythagorean_graph_independent_set_counting import PythagoreanGraph_IndependentSetCounting_ParameterController +from .quad_magic_items import QuadMagicItems_ParameterController +from .quadratic_function_segmentation import QuadraticFunctionSegmentation_ParameterController +from .quantum_lock_puzzle import QuantumLockPuzzle_ParameterController +from .queen_placement import QueenPlacement_ParameterController +from .random_range_max_expectation import RandomRangeMaxExpectation_ParameterController +from .range_constrained_increasing_sequence_counting import RangeConstrained_IncreasingSequence_Counting_ParameterController +from .range_four_sequence_construction import RangeFourSequenceConstruction_ParameterController +from .range_shrinking_sequence_counting import RangeShrinkingSequenceCounting_ParameterController +from .recursive_function import RecursiveFunction_ParameterController +from .recursive_sequence_sum_construction import RecursiveSequenceSumConstruction_ParameterController +from .repeat_sequence_lnds import RepeatSequenceLNDS_ParameterController +from .root_extraction import RootExtraction_ParameterController +from .round_robin import RoundRobin_ParameterController +from .roundtable_assignment import RoundTableAssignment_ParameterController +from .royal_lock_counting import RoyalLockCounting_ParameterController +from .salad_bar import SaladBar_ParameterController +from .salesman_fatigue import SalesmanFatigue_ParameterController +from .same_adjacency_counting import SameAdjacencyCounting_ParameterController +from .sat import SAT_ParameterController +from .scc_sequence_counting import SCC_Sequence_Counting_ParameterController +from .secret_cow_code import SecretCowCode_ParameterController +from .segment_min_length_equal_counting import SegmentMinLengthEqual_Counting_ParameterController +from .segment_tree_sorting_counting import SegmentTreeSortingCounting_ParameterController +from .self_power_sequence_mod import SelfPowerSequenceMOD_ParameterController +from .set_cover import SetCover_ParameterController +from .set_splitting import SetSplitting_ParameterController +from .shared_substring_counting import SharedSubstringCounting_ParameterController +from .shortest_path import ShortestPath_ParameterController +from .shortest_path_count_construction import ShortestPathCountConstruction_ParameterController +from .shortest_unicolor_substring import ShortestUnicolorSubstring_ParameterController +from .singing_girl_story import SingingGirlStory_ParameterController +from .single_stack_sorting import SingleStackSorting_ParameterController +from .ska_rock_garden import SkaRockGarden_ParameterController +from .skyscraper_puzzle import SkyscraperPuzzle_ParameterController +from .skyscraper_sum_puzzle import SkyscraperSumPuzzle_ParameterController +from .sliding_window import SlidingWindow_ParameterController +from .slo_elephants import SLOElephants_ParameterController +from .smallest_binary_multiple import SmallestBinaryMultiple_ParameterController +from .smallest_circle import SmallestCircle_ParameterController +from .sorting import Sorting_ParameterController +from .spiral_matrix import SpiralMatrix_ParameterController +from .splitting_game import SplittingGame_ParameterController +from .spy_network import SpyNetwork_ParameterController +from .squ_squarks import SquSquarks_ParameterController +from .square_undamaged_point_counting import SquareUndamagedPointCounting_ParameterController +from .star_battle import StarBattle_ParameterController +from .stirling_second import StirlingSecond_ParameterController +from .stone_game import StoneGame_ParameterController +from .stone_intervals_game import StoneIntervalsGame_ParameterController +from .string_partition_shuffle import StringPartitionShuffle_ParameterController +from .string_reversal_construction import StringReversalConstruction_ParameterController +from .stu_well import STUWell_ParameterController +from .stunt_flying import StuntFlying_ParameterController +from .subarray_sum_xor import SubarraySumXor_ParameterController +from .subarray_xor_sum import SubarrayXorSum_ParameterController +from .subgraph_isomorphism import SubgraphIsomorphism_ParameterController +from .submatrix_sum_divisible_counting import SubmatrixSumDivisibleCounting_ParameterController +from .subsequence_reversal_lnds import SubsequenceReversalLNDS_ParameterController +from .subset_sum import SubsetSum_ParameterController +from .subset_sum_sequence import SubsetSumSequence_ParameterController +from .sudoku import Sudoku_ParameterController +from .sum_divisor_num import Sum_DivisorNum_ParameterController +from .sum_gcd import SumGCD_ParameterController +from .sum_gcd_with_individual import SumGCDWithIndividual_ParameterController +from .sum_lcm import SumLCM_ParameterController +from .sum_manhattan_curved_surface import SumManhattan_CurvedSurface_ParameterController +from .sum_mod import SumMOD_ParameterController +from .sum_phi_interval import SumPHIInterval_ParameterController +from .sum_product_divisor_num import SumProductDivisorNum_ParameterController +from .sum_pseudo_euclidean import SumPseudoEuclidean_ParameterController +from .sum_set_multiplication import SumSetMultiplication_ParameterController +from .sum_spanning_tree_gcd import SumSpanningTreeGCD_ParameterController +from .sum_triangle_area import SumTriangleArea_ParameterController +from .sum_xor_divisor_num import SumXorDivisorNum_ParameterController +from .survo_puzzle import SurvoPuzzle_ParameterController +from .taking_prime_game import TakingPrimeGame_ParameterController +from .task_arrangement import TaskArrangement_ParameterController +from .tetris_attack import TetrisAttack_ParameterController +from .three_string_common_subsequence_counting import ThreeStringCommonSubsequenceCounting_ParameterController +from .three_vertex_cycle_counting import ThreeVertexCycleCounting_ParameterController +from .topological_sort import TopologicalSort_ParameterController +from .topological_sort_minimal_lexicographical_order import TopologicalSort_MinimalLexicographicalOrder_ParameterController +from .tournament_longest_path import Tournament_LongestPath_ParameterController +from .transmission_delay import TransmissionDelay_ParameterController +from .tree_add_one_edge_diameter import TreeAddOneEdgeDiameter_ParameterController +from .tree_center import TreeCenter_ParameterController +from .tree_change_one_edge_diameter import TreeChangeOneEdgeDiameter_ParameterController +from .tree_coloring import TreeColoring_ParameterController +from .tree_distance_equal_triad_counting import Tree_DistanceEqualTriad_Counting_ParameterController +from .tree_dynamic_xor_zero_path import TreeDynamic_XORZeroPath_ParameterController +from .tree_elimination_expectation import TreeElimination_Expectation_ParameterController +from .tree_even_partitioning import TreeEvenPartitioning_ParameterController +from .tree_maximum_visited_vertex import TreeMaximumVisitedVertex_ParameterController +from .tree_random_walk_expectation import TreeRandomWalkExpectation_ParameterController +from .tree_topological_sequence_counting import TreeTopologicalSequenceCounting_ParameterController +from .triumphal_arch import TriumphalArch_ParameterController +from .twiddle_puzzle import TwiddlePuzzle_ParameterController +from .two_sat import TwoSAT_ParameterController +from .two_set_all_coprime_counting import TwoSet_AllCoprime_Counting_ParameterController +from .undamaged_submatrix_counting import UndamagedSubmatrixCounting_ParameterController +from .value_diminishing_selection import ValueDiminishingSelection_ParameterController +from .vertex_k_center import Vertex_KCenter_ParameterController +from .virus_synthesis import VirusSynthesis_ParameterController +from .visible_line import VisibleLine_ParameterController +from .warehouse_construction import WarehouseConstruction_ParameterController +from .weighted_binarytree import WeightedBinaryTree_ParameterController +from .weighted_lis import WeightedLIS_ParameterController +from .whack_a_mole import WhackAMole_ParameterController +from .wil import WIL_ParameterController +from .wyc import WYC_ParameterController +from .wyr_leveling_ground import WYRLevelingGround_ParameterController +from .xor_equation_counting import XorEquationCounting_ParameterController +from .zero_prefix_subset_counting import ZeroPrefixSubsetCounting_ParameterController + + +identifier2controller = { + "ABProgramSimulation" : ABProgramSimulation_ParameterController, + "AddMultiple_Divisible_Counting" : AddMultiple_Divisible_Counting_ParameterController, + "AdditionTable" : AdditionTable_ParameterController, + "AlmostCompleteGraphCycleCounting" : AlmostCompleteGraphCycleCounting_ParameterController, + "AndOr_Sequence_Counting" : AndOr_Sequence_Counting_ParameterController, + "AntiPalindromicSubstringCounting" : AntiPalindromicSubstringCounting_ParameterController, + "Axis_KCenter" : Axis_KCenter_ParameterController, + "BAJBytecomputer" : BAJBytecomputer_ParameterController, + "BannedPointSupersetPathCounting" : BannedPointSupersetPathCounting_ParameterController, + "BanyanHeart" : BanyanHeart_ParameterController, + "BEZMinimalistSecurity" : BEZMinimalistSecurity_ParameterController, + "BezoutIdentity" : BezoutIdentity_ParameterController, + "Binario" : Binario_ParameterController, + "Binario_NoAdjacencyRequirement" : Binario_NoAdjacencyRequirement_ParameterController, + "BinaryAlternation" : BinaryAlternation_ParameterController, + "BinaryLinearEquation_SolutionCounting" : BinaryLinearEquation_SolutionCounting_ParameterController, + "BinaryTreeLeafNumExpectation" : BinaryTreeLeafNumExpectation_ParameterController, + "BitEquationCounting" : BitEquationCounting_ParameterController, + "BitAndZero_PathCounting" : BitAndZero_PathCounting_ParameterController, + "BitwiseOperationSequenceCounting" : BitwiseOperationSequenceCounting_ParameterController, + "BlockImage" : BlockImage_ParameterController, + "BoundedAdjacencyDifference_Permutation_Counting" : BoundedAdjacencyDifference_Permutation_Counting_ParameterController, + "BoundedIntervalIntersection" : BoundedIntervalIntersection_ParameterController, + "BoundedMeanSubarrayCounting" : BoundedMeanSubarrayCounting_ParameterController, + "BoundedSubarrayCounting" : BoundedSubarrayCounting_ParameterController, + "BoxScheduling" : BoxScheduling_ParameterController, + "Bridge" : Bridge_ParameterController, + "BubbleSwapLowerBound_PermutationCounting" : BubbleSwapLowerBound_PermutationCounting_ParameterController, + "BucketSorting" : BucketSorting_ParameterController, + "CampfireParty" : CampfireParty_ParameterController, + "CampsitePuzzle" : CampsitePuzzle_ParameterController, + "Canon" : Canon_ParameterController, + "CantorExpansion" : CantorExpansion_ParameterController, + "CapitalCityEffect" : CapitalCityEffect_ParameterController, + "CardColoringCounting" : CardColoringCounting_ParameterController, + "CatalanNumberMod" : CatalanNumberMod_ParameterController, + "CheckAllCycleXorZero" : CheckAllCycleXorZero_ParameterController, + "ChoHamsters" : ChoHamsters_ParameterController, + "Cinema" : Cinema_ParameterController, + "Circuit" : Circuit_ParameterController, + "CirculatingDecimalCounting" : CirculatingDecimalCounting_ParameterController, + "CirculatingGrid" : CirculatingGrid_ParameterController, + "CleaningUp" : CleaningUp_ParameterController, + "ClearSymmetry" : ClearSymmetry_ParameterController, + "Clique_IndependentSet_Partitioning_Counting" : Clique_IndependentSet_Partitioning_Counting_ParameterController, + "CoinSquareGame" : CoinSquareGame_ParameterController, + "ColoringCounting" : ColoringCounting_ParameterController, + "CombinationOddSubsequenceCounting" : CombinationOddSubsequenceCounting_ParameterController, + "ConcatenationPartitionCountingSum" : ConcatenationPartitionCountingSum_ParameterController, + "CongruentEquation" : CongruentEquation_ParameterController, + "ConstructHackInterval" : ConstructHackInterval_ParameterController, + "ConvexHull" : ConvexHull_ParameterController, + "Cornfield" : Cornfield_ParameterController, + "CountdownEqual": Countdown_ParameterController, "CountdownClose": Countdown_ParameterController, + "CowDanceShow" : CowDanceShow_ParameterController, + "CRT" : CRT_ParameterController, + "Cryptarithmetic" : Cryptarithmetic_ParameterController, + "Cube_FixedLocalMaximumCounting" : Cube_FixedLocalMaximumCounting_ParameterController, + "CycleCounting" : CycleCounting_ParameterController, + "DecreasingDigitCounting" : DecreasingDigitCounting_ParameterController, + "DegreeFixed_SpanningTree" : DegreeFixed_SpanningTree_ParameterController, + "DeltaMinPopcount" : DeltaMinPopcount_ParameterController, + "DeltaNimGame" : DeltaNimGame_ParameterController, + "DerangementExtension" : DerangementExtension_ParameterController, + "DifferenceConstraintSystem" : DifferenceConstraintSystem_ParameterController, + "DifferenceConstraintSystemDAG" : DifferenceConstraintSystemDAG_ParameterController, + "DifferentColorPairing" : DifferentColorPairing_ParameterController, + "Differentiate" : Differentiate_ParameterController, + "DigitLISCounting" : DigitLISCounting_ParameterController, + "DiscreteLogarithm" : DiscreteLogarithm_ParameterController, + "Disinfection" : Disinfection_ParameterController, + "DistinctArrayPermutation" : DistinctArrayPermutation_ParameterController, + "DistinctEdgeColoredCompleteGraphCounting" : DistinctEdgeColoredCompleteGraphCounting_ParameterController, + "Division" : Division_ParameterController, + "DivisorFlipExpectation" : DivisorFlipExpectation_ParameterController, + "DoubleCrossCounting" : DoubleCrossCounting_ParameterController, + "DoublePalindromicStringCounting" : DoublePalindromicStringCounting_ParameterController, + "DoubleStackSorting" : DoubleStackSorting_ParameterController, + "DynDynamite" : DynDynamite_ParameterController, + "EightDigitPuzzle" : EightDigitPuzzle_ParameterController, + "EmperorWorries" : EmperorWorries_ParameterController, + "EnergyStorageMeter" : EnergyStorageMeter_ParameterController, + "EuclidGame" : EuclidGame_ParameterController, + "EvenDegreeGraphPartitioning" : EvenDegreeGraphPartitioning_ParameterController, + "Expression_AddingParenthese_Counting" : Expression_AddingParenthese_Counting_ParameterController, + "FaceRightWay" : FaceRightWay_ParameterController, + "FactorialTrailingZeroCount" : FactorialTrailingZeroCount_ParameterController, + "FBI_BinaryTree" : FBI_BinaryTree_ParameterController, + "Fibonacci" : Fibonacci_ParameterController, + "FibonacciContainingCounting" : FibonacciContainingCounting_ParameterController, + "Fibtrain" : Fibtrain_ParameterController, + "FireworkShow" : FireworkShow_ParameterController, + "FixedModK_Selection_Counting" : FixedModK_Selection_Counting_ParameterController, + "FixedOneEdgeNum_SpanningTree" : FixedOneEdgeNum_SpanningTree_ParameterController, + "FractionalProgramming" : FractionalProgramming_ParameterController, + "FractionalProgramming_BipartiteGraphMatching" : FractionalProgramming_BipartiteGraphMatching_ParameterController, + "FutoshikiPuzzle" : FutoshikiPuzzle_ParameterController, + "GasFireExtinguishers" : GasFireExtinguishers_ParameterController, + "GaussianElimination" : GaussianElimination_ParameterController, + "GCDFibonacciProduct" : GCDFibonacciProduct_ParameterController, + "GcdLcmCounting" : GcdLcmCounting_ParameterController, + "GCDOne_Counting" : GCDOne_Counting_ParameterController, + "GCDPrime_Counting" : GCDPrime_Counting_ParameterController, + "GoldWashing" : GoldWashing_ParameterController, + "GraMinimaGame" : GraMinimaGame_ParameterController, + "GradeRankingCounting" : GradeRankingCounting_ParameterController, + "GraphContainTreeCounting" : GraphContainTreeCounting_ParameterController, + "GraphIsomorphism" : GraphIsomorphism_ParameterController, + "GridBFS" : GridBFS_ParameterController, + "GridColoringCounting" : GridColoringCounting_ParameterController, + "GridComponent" : GridComponent_ParameterController, + "GridLocalMinimumCounting" : GridLocalMinimumCounting_ParameterController, + "GridParityConstruction" : GridParityConstruction_ParameterController, + "GridTriangleCounting" : GridTriangleCounting_ParameterController, + "HalvingChainCounting" : HalvingChainCounting_ParameterController, + "HamiltonianPath" : HamiltonianPath_ParameterController, + "HamiltonianPathExistence" : HamiltonianPathExistence_ParameterController, + "HeapCounting" : HeapCounting_ParameterController, + "HitoriPuzzle" : HitoriPuzzle_ParameterController, + "HungryRabbit" : HungryRabbit_ParameterController, + "HURWarehouseStore" : HURWarehouseStore_ParameterController, + "ImpParty" : ImpParty_ParameterController, + "IndividualSumBounded_SequenceCounting" : IndividualSumBounded_SequenceCounting_ParameterController, + "IntegerFactorizationCounting" : IntegerFactorizationCounting_ParameterController, + "IntegerProgramming" : IntegerProgramming_ParameterController, + "Integral" : Integral_ParameterController, + "InversionPair" : InversionPair_ParameterController, + "InversionPairK_Counting" : InversionPairK_Counting_ParameterController, + "Josephus" : Josephus_ParameterController, + "JugPuzzle" : JugPuzzle_ParameterController, + "KPartition" : KPartition_ParameterController, + "Kakurasu" : Kakurasu_ParameterController, + "KiddingMe" : KiddingMe_ParameterController, + "KingSorting" : KingSorting_ParameterController, + "KloBlocks" : KloBlocks_ParameterController, + "Knapsack" : Knapsack_ParameterController, + "KnightsAndKnaves" : KnightsAndKnaves_ParameterController, + "KosDicing" : KosDicing_ParameterController, + "Kth_BinaryTree" : Kth_BinaryTree_ParameterController, + "Kth_SemiBalancedBracketSequence" : Kth_SemiBalancedBracketSequence_ParameterController, + "KthSubsequence" : KthSubsequence_ParameterController, + "KUR" : KUR_ParameterController, + "LampChanging" : LampChanging_ParameterController, + "LandAcquisition" : LandAcquisition_ParameterController, + "LandformGenerationCounting" : LandformGenerationCounting_ParameterController, + "LargestConvexPolygon" : LargestConvexPolygon_ParameterController, + "LargestRectangle_AmongPoints" : LargestRectangle_AmongPoints_ParameterController, + "LAS" : LAS_ParameterController, + "LASLaser" : LASLaser_ParameterController, + "LCM" : LCM_ParameterController, + "LDSTwo_Counting" : LDSTwo_Counting_ParameterController, + "LightUpPuzzle" : LightUpPuzzle_ParameterController, + "LinkBeads" : LinkBeads_ParameterController, + "LIS_LDS_Concatenation" : LIS_LDS_Concatenation_ParameterController, + "LIZ_Lollipop" : LIZ_Lollipop_ParameterController, + "Longest_DoublePalindrome" : Longest_DoublePalindrome_ParameterController, + "Longest_MatchingSubsequence" : Longest_MatchingSubsequence_ParameterController, + "LongestMaxDiffBoundedInterval" : LongestMaxDiffBoundedInterval_ParameterController, + "LongestPath" : LongestPath_ParameterController, + "Longest_RepeatedPalindrome" : Longest_RepeatedPalindrome_ParameterController, + "MafMafia" : MafMafia_ParameterController, + "MagicSquarePuzzle" : MagicSquarePuzzle_ParameterController, + "MakingGrade" : MakingGrade_ParameterController, + "Matrix_BinaryExponentiation" : Matrix_BinaryExponentiation_ParameterController, + "MatrixPermutation_BothDiagonalOne" : MatrixPermutation_BothDiagonalOne_ParameterController, + "MatrixPermutationEquivalence" : MatrixPermutationEquivalence_ParameterController, + "MatrixPermutation_MainDiagonalOne" : MatrixPermutation_MainDiagonalOne_ParameterController, + "MatrixPooling" : MatrixPooling_ParameterController, + "MatrixRMQCounting" : MatrixRMQCounting_ParameterController, + "MaxDifferentGroupPairDivision" : MaxDifferentGroupPairDivision_ParameterController, + "MaxGridPathIntersection" : MaxGridPathIntersection_ParameterController, + "MaxMinimum_AfterIntervalAddition" : MaxMinimum_AfterIntervalAddition_ParameterController, + "MaxMultSplit" : MaxMultSplit_ParameterController, + "MaxMultiplicationFixedSum" : MaxMultiplicationFixedSum_ParameterController, + "MaxNoConflictingBombs" : MaxNoConflictingBombs_ParameterController, + "Max_NonAdjacent_KElementSum" : Max_NonAdjacent_KElementSum_ParameterController, + "MaxPermutation" : MaxPermutation_ParameterController, + "MaxRMQExpectation" : MaxRMQExpectation_ParameterController, + "MaxSegmentCoverageConstraint" : MaxSegmentCoverageConstraint_ParameterController, + "MaxSumLDS" : MaxSumLDS_ParameterController, + "MaxThreeSquareSum" : MaxThreeSquareSum_ParameterController, + "Max_TreeConstrainedPermutation_Weight" : Max_TreeConstrainedPermutation_Weight_ParameterController, + "MaxTree_KPathCoverage" : MaxTree_KPathCoverahe_ParameterController, + "MaxTreeXorPath" : MaxTreeXorPath_ParameterController, + "MaxWeightPalindromicSubstring" : MaxWeightPalindromicSubstring_ParameterController, + "MaxXorPath" : MaxXorPath_ParameterController, + "MaxXorSet" : MaxXorSet_ParameterController, + "MaximumAchromaticNumber" : MaximumAchromaticNumber_ParameterController, + "MaximumClique" : MaximumClique_ParameterController, + "MaximumDivisor" : MaximumDivisor_ParameterController, + "MaximumIndependentSetGrid" : MaximumIndependentSetGrid_ParameterController, + "Maximum_IndependentSet_Tree" : Maximum_IndependentSet_Tree_ParameterController, + "MaximumLexicographicalOrderSubsequence" : MaximumLexicographicalOrderSubsequence_ParameterController, + "MaximumPointSegmentMatching" : MaximumPointSegmentMatching_ParameterController, + "Maximum_SubsequenceNum" : Maximum_SubsequenceNum_ParameterController, + "MaximumWeightMatching" : MaximumWeightMatching_ParameterController, + "Maze" : Maze_ParameterController, + "MinConversionToCycleCost" : MinConversionToCycleCost_ParameterController, + "MinCostReducingLNDS" : MinCostReducingLNDS_ParameterController, + "MinCostTreeCoverage" : MinCostTreeCoverage_ParameterController, + "MinCubeAssignment" : MinCubeAssignment_ParameterController, + "MinDivisionSumXor" : MinDivisionSumXor_ParameterController, + "MinInorderBinaryTree" : MinInorderBinaryTree_ParameterController, + "MinKDivisorNumber" : MinKDivisorNumber_ParameterController, + "MinNoSolutionLinearDiophantineEquation" : MinNoSolutionLinearDiophantineEquation_ParameterController, + "MinNonsubstring" : MinNonsubstring_ParameterController, + "MinPairSumMultiplicationPermutation" : MinPairSumMultiplicationPermutation_ParameterController, + "MinPathCover_DAG" : MinPathCover_DAG_ParameterController, + "MinSumChebyshevDistance" : MinSumChebyshevDistance_ParameterController, + "MinSumDistanceSquare" : MinSumDistanceSquare_ParameterController, + "MinSumPreXor" : MinSumPreXor_ParameterController, + "MinSwapTwoPermutations" : MinSwapTwoPermutations_ParameterController, + "MinXorPair" : MinXorPair_ParameterController, + "Minesweeping" : Minesweeping_ParameterController, + "MinimalCyclicShift" : MinimalCyclicShift_ParameterController, + "MinimumChromaticNumber" : MinimumChromaticNumber_ParameterController, + "MinimumChromaticNumber_SegmentOverlap" : MinimumChromaticNumber_SegmentOverlap_ParameterController, + "MinimumCost_MaximumFlow" : MinimumCost_MaximumFlow_ParameterController, + "Minimum_CrossingEdges_GraphPartition" : Minimum_CrossingEdges_GraphPartition_ParameterController, + "MinimumDirectedSpanningTree" : MinimumDirectedSpanningTree_ParameterController, + "Minimum_DominatingInterval" : Minimum_DominatingInterval_ParameterController, + "Minimum_DominatingSet" : Minimum_DominatingSet_ParameterController, + "Minimum_DominatingSet_Grid" : Minimum_DominatingSet_Grid_ParameterController, + "MinimumFibonacciRepresentation" : MinimumFibonacciRepresentation_ParameterController, + "MinimumHarmoniousChromaticNumber" : MinimumHarmoniousChromaticNumber_ParameterController, + "MinimumIntervalCoverage" : MinimumIntervalCoverage_ParameterController, + "Minimum_MaxAbsSlicer" : Minimum_MaxAbsSlicer_ParameterController, + "Minimum_MaxSlicer" : Minimum_MaxSlicer_ParameterController, + "MinimumRatioPath" : MinimumRatioPath_ParameterController, + "MinimumSpanningTree" : MinimumSpanningTree_ParameterController, + "MinimumSpanningTreeCounting" : MinimumSpanningTreeCounting_ParameterController, + "MinimumSteinerTree" : MinimumSteinerTree_ParameterController, + "MinimumSumDifferenceSubmatrix" : MinimumSumDifferenceSubmatrix_ParameterController, + "MinimumTreeWeightedDominatingAncestor" : MinimumTreeWeightedDominatingAncestor_ParameterController, + "MinimumUnconflictedGridKMax" : MinimumUnconflictedGridKMax_ParameterController, + "Minimum_VertexCover" : Minimum_VertexCover_ParameterController, + "MinimumWeightedSpanningTree" : MinimumWeightedSpanningTree_ParameterController, + "MitterTransportation" : MitterTransportation_ParameterController, + "MixedGraphEulerianCircuit" : MixedGraphEulerianCircuit_ParameterController, + "MoneyChargingGame" : MoneyChargingGame_ParameterController, + "MonochromeBlockCounting" : MonochromeBlockCounting_ParameterController, + "MonotonicStack" : MonotonicStack_ParameterController, + "MostComponentTreeRemovingTwoPaths" : MostComponentTreeRemovingTwoPaths_ParameterController, + "MostNumEdge_NonSelfIsomorphism" : MostNumEdge_NonSelfIsomorphism_ParameterController, + "MultiDrink" : MultiDrink_ParameterController, + "MultipleFlippingGame" : MultipleFlippingGame_ParameterController, + "Multiplication" : Multiplication_ParameterController, + "MYJ" : MYJ_ParameterController, + "NANDResultCounting" : NANDResultCounting_ParameterController, + "NegativeBase" : NegativeBase_ParameterController, + "NewNimGame" : NewNimGame_ParameterController, + "NextPalindromic" : NextPalindromic_ParameterController, + "NinePuzzle" : NinePuzzle_ParameterController, + "NoAdjacentGirlCounting" : NoAdjacentGirlCounting_ParameterController, + "NoDoubleTripleCounting" : NoDoubleTripleCounting_ParameterController, + "NotContainingStringCounting" : NotContainingStringCounting_ParameterController, + "NumberPartitionCounting" : NumberPartitionCounting_ParameterController, + "Numbrix" : Numbrix_ParameterController, + "OddVisitation" : OddVisitation_ParameterController, + "ODLDistance" : ODLDistance_ParameterController, + "PairMoreOneCounting" : PairMoreOneCounting_ParameterController, + "PalembangBridges" : PalembangBridges_ParameterController, + "PalindromePartitionCounting" : PalindromePartitionCounting_ParameterController, + "PalindromicSubstringNumberCounting" : PalindromicSubstringNumberCounting_ParameterController, + "PanSolarPanels" : PanSolarPanels_ParameterController, + "Path_NoGoingBack_Counting" : Path_NoGoingBack_Counting_ParameterController, + "Patrol" : Patrol_ParameterController, + "PCPPermutation" : PCPPermutation_ParameterController, + "PipelineArrangement" : PipelineArrangement_ParameterController, + "POLPolarization" : POLPolarization_ParameterController, + "PolyaModel" : PolyaModel_ParameterController, + "PolynomialFactorization" : PolynomialFactorization_ParameterController, + "PolynomialInterpolation" : PolynomialInterpolation_ParameterController, + "PolynomialMinimum" : PolynomialMinimum_ParameterController, + "PolynomialRemainder" : PolynomialRemainder_ParameterController, + "PowerCycle" : PowerCycle_ParameterController, + "PowerShortcut" : PowerShortcut_ParameterController, + "PowerNest" : PowerNest_ParameterController, + "PrefixConcatenation" : PrefixConcatenation_ParameterController, + "PrefixProductMODDistinctPermutation" : PrefixProductMODDistinctPermutation_ParameterController, + "PrefixSumMODDistinctPermutation" : PrefixSumMODDistinctPermutation_ParameterController, + "Prefixuffix" : Prefixuffix_ParameterController, + "PreorderTraversal" : PreorderTraversal_ParameterController, + "PrimeGraph_MinimumChromaticNumber" : PrimeGraph_MinimumChromaticNumber_ParameterController, + "ProtectingFlowers" : ProtectingFlowers_ParameterController, + "PythagoreanGraph_IndependentSetCounting" : PythagoreanGraph_IndependentSetCounting_ParameterController, + "QuadMagicItems" : QuadMagicItems_ParameterController, + "QuadraticFunctionSegmentation" : QuadraticFunctionSegmentation_ParameterController, + "QuantumLockPuzzle" : QuantumLockPuzzle_ParameterController, + "QueenPlacement" : QueenPlacement_ParameterController, + "RandomRangeMaxExpectation" : RandomRangeMaxExpectation_ParameterController, + "RangeConstrained_IncreasingSequence_Counting" : RangeConstrained_IncreasingSequence_Counting_ParameterController, + "RangeFourSequenceConstruction" : RangeFourSequenceConstruction_ParameterController, + "RangeShrinkingSequenceCounting" : RangeShrinkingSequenceCounting_ParameterController, + "RecursiveFunction" : RecursiveFunction_ParameterController, + "RecursiveSequenceSumConstruction" : RecursiveSequenceSumConstruction_ParameterController, + "RepeatSequenceLNDS" : RepeatSequenceLNDS_ParameterController, + "RootExtraction" : RootExtraction_ParameterController, + "RoundRobin" : RoundRobin_ParameterController, + "RoundTableAssignment" : RoundTableAssignment_ParameterController, + "RoyalLockCounting" : RoyalLockCounting_ParameterController, + "SaladBar" : SaladBar_ParameterController, + "SalesmanFatigue" : SalesmanFatigue_ParameterController, + "SameAdjacencyCounting" : SameAdjacencyCounting_ParameterController, + "SAT" : SAT_ParameterController, + "SCC_Sequence_Counting" : SCC_Sequence_Counting_ParameterController, + "SecretCowCode" : SecretCowCode_ParameterController, + "SegmentMinLengthEqual_Counting" : SegmentMinLengthEqual_Counting_ParameterController, + "SegmentTreeSortingCounting" : SegmentTreeSortingCounting_ParameterController, + "SelfPowerSequenceMOD" : SelfPowerSequenceMOD_ParameterController, + "SetCover" : SetCover_ParameterController, + "SetSplitting" : SetSplitting_ParameterController, + "SharedSubstringCounting" : SharedSubstringCounting_ParameterController, + "ShortestPath" : ShortestPath_ParameterController, + "ShortestPathCountConstruction" : ShortestPathCountConstruction_ParameterController, + "ShortestUnicolorSubstring" : ShortestUnicolorSubstring_ParameterController, + "SingingGirlStory" : SingingGirlStory_ParameterController, + "SingleStackSorting" : SingleStackSorting_ParameterController, + "SkaRockGarden" : SkaRockGarden_ParameterController, + "SkyscraperPuzzle" : SkyscraperPuzzle_ParameterController, + "SkyscraperSumPuzzle" : SkyscraperSumPuzzle_ParameterController, + "SlidingWindow" : SlidingWindow_ParameterController, + "SLOElephants" : SLOElephants_ParameterController, + "SmallestBinaryMultiple" : SmallestBinaryMultiple_ParameterController, + "SmallestCircle" : SmallestCircle_ParameterController, + "Sorting" : Sorting_ParameterController, + "SpiralMatrix" : SpiralMatrix_ParameterController, + "SplittingGame" : SplittingGame_ParameterController, + "SpyNetwork" : SpyNetwork_ParameterController, + "SquSquarks" : SquSquarks_ParameterController, + "SquareUndamagedPointCounting" : SquareUndamagedPointCounting_ParameterController, + "StarBattle" : StarBattle_ParameterController, + "StirlingSecond" : StirlingSecond_ParameterController, + "StoneGame" : StoneGame_ParameterController, + "StoneIntervalsGame" : StoneIntervalsGame_ParameterController, + "StringPartitionShuffle" : StringPartitionShuffle_ParameterController, + "StringReversalConstruction" : StringReversalConstruction_ParameterController, + "STUWell" : STUWell_ParameterController, + "StuntFlying" : StuntFlying_ParameterController, + "SubarraySumXor" : SubarraySumXor_ParameterController, + "SubarrayXorSum" : SubarrayXorSum_ParameterController, + "SubgraphIsomorphism" : SubgraphIsomorphism_ParameterController, + "SubmatrixSumDivisibleCounting" : SubmatrixSumDivisibleCounting_ParameterController, + "SubsequenceReversalLNDS" : SubsequenceReversalLNDS_ParameterController, + "SubsetSum" : SubsetSum_ParameterController, + "SubsetSumSequence" : SubsetSumSequence_ParameterController, + "Sudoku" : Sudoku_ParameterController, + "Sum_DivisorNum" : Sum_DivisorNum_ParameterController, + "SumGCD" : SumGCD_ParameterController, + "SumGCDWithIndividual" : SumGCDWithIndividual_ParameterController, + "SumLCM" : SumLCM_ParameterController, + "SumManhattan_CurvedSurface" : SumManhattan_CurvedSurface_ParameterController, + "SumMOD" : SumMOD_ParameterController, + "SumPHIInterval" : SumPHIInterval_ParameterController, + "SumProductDivisorNum" : SumProductDivisorNum_ParameterController, + "SumPseudoEuclidean" : SumPseudoEuclidean_ParameterController, + "SumSetMultiplication" : SumSetMultiplication_ParameterController, + "SumSpanningTreeGCD" : SumSpanningTreeGCD_ParameterController, + "SumTriangleArea" : SumTriangleArea_ParameterController, + "SumXorDivisorNum" : SumXorDivisorNum_ParameterController, + "SurvoPuzzle" : SurvoPuzzle_ParameterController, + "TakingPrimeGame" : TakingPrimeGame_ParameterController, + "TaskArrangement" : TaskArrangement_ParameterController, + "TetrisAttack" : TetrisAttack_ParameterController, + "ThreeStringCommonSubsequenceCounting" : ThreeStringCommonSubsequenceCounting_ParameterController, + "ThreeVertexCycleCounting" : ThreeVertexCycleCounting_ParameterController, + "TopologicalSort" : TopologicalSort_ParameterController, + "TopologicalSort_MinimalLexicographicalOrder" : TopologicalSort_MinimalLexicographicalOrder_ParameterController, + "Tournament_LongestPath" : Tournament_LongestPath_ParameterController, + "TransmissionDelay" : TransmissionDelay_ParameterController, + "TreeAddOneEdgeDiameter" : TreeAddOneEdgeDiameter_ParameterController, + "TreeCenter" : TreeCenter_ParameterController, + "TreeChangeOneEdgeDiameter" : TreeChangeOneEdgeDiameter_ParameterController, + "TreeColoring" : TreeColoring_ParameterController, + "Tree_DistanceEqualTriad_Counting" : Tree_DistanceEqualTriad_Counting_ParameterController, + "TreeDynamic_XORZeroPath" : TreeDynamic_XORZeroPath_ParameterController, + "TreeElimination_Expectation" : TreeElimination_Expectation_ParameterController, + "TreeEvenPartitioning" : TreeEvenPartitioning_ParameterController, + "TreeMaximumVisitedVertex" : TreeMaximumVisitedVertex_ParameterController, + "TreeRandomWalkExpectation" : TreeRandomWalkExpectation_ParameterController, + "TreeTopologicalSequenceCounting" : TreeTopologicalSequenceCounting_ParameterController, + "TriumphalArch" : TriumphalArch_ParameterController, + "TwiddlePuzzle" : TwiddlePuzzle_ParameterController, + "TwoSAT" : TwoSAT_ParameterController, + "TwoSet_AllCoprime_Counting" : TwoSet_AllCoprime_Counting_ParameterController, + "UndamagedSubmatrixCounting" : UndamagedSubmatrixCounting_ParameterController, + "ValueDiminishingSelection" : ValueDiminishingSelection_ParameterController, + "Vertex_KCenter" : Vertex_KCenter_ParameterController, + "VirusSynthesis" : VirusSynthesis_ParameterController, + "VisibleLine" : VisibleLine_ParameterController, + "WarehouseConstruction" : WarehouseConstruction_ParameterController, + "WeightedBinaryTree" : WeightedBinaryTree_ParameterController, + "WeightedLIS" : WeightedLIS_ParameterController, + "WhackAMole" : WhackAMole_ParameterController, + "WIL" : WIL_ParameterController, + "WYC" : WYC_ParameterController, + "WYRLevelingGround" : WYRLevelingGround_ParameterController, + "XorEquationCounting" : XorEquationCounting_ParameterController, + "ZeroPrefixSubsetCounting" : ZeroPrefixSubsetCounting_ParameterController, +} \ No newline at end of file diff --git a/server/Gym/parameter_controllers/ab_program_simulation/__init__.py b/server/Gym/parameter_controllers/ab_program_simulation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7962927d70f332b2f9de64469b4cf74446eae05f --- /dev/null +++ b/server/Gym/parameter_controllers/ab_program_simulation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ABProgramSimulation_ParameterController diff --git a/server/Gym/parameter_controllers/ab_program_simulation/parameter_controller.py b/server/Gym/parameter_controllers/ab_program_simulation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9e96894514af4eaddf665a9c23869316f76968cf --- /dev/null +++ b/server/Gym/parameter_controllers/ab_program_simulation/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class ABProgramSimulation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + self.max_steps = 10 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + self.max_steps = int(self.max_steps * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, max_steps = self.max_steps)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/add_multiple_divisible_counting/__init__.py b/server/Gym/parameter_controllers/add_multiple_divisible_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b59d6331b3685dcfe0f37ecefee56851444cc944 --- /dev/null +++ b/server/Gym/parameter_controllers/add_multiple_divisible_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import AddMultiple_Divisible_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/add_multiple_divisible_counting/parameter_controller.py b/server/Gym/parameter_controllers/add_multiple_divisible_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..bc58d85c777afd7cc3f0aaaf3c75248b1ce64297 --- /dev/null +++ b/server/Gym/parameter_controllers/add_multiple_divisible_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class AddMultiple_Divisible_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 16 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/addition_table/__init__.py b/server/Gym/parameter_controllers/addition_table/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e272480c1c72f5d2552ee0260f05d3b98956d4a5 --- /dev/null +++ b/server/Gym/parameter_controllers/addition_table/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import AdditionTable_ParameterController diff --git a/server/Gym/parameter_controllers/addition_table/parameter_controller.py b/server/Gym/parameter_controllers/addition_table/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..62bb198c4fd83c43b7c4c91c91ebf93ca837908f --- /dev/null +++ b/server/Gym/parameter_controllers/addition_table/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class AdditionTable_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = min(26, self.N + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/almost_complete_graph_cycle_counting/__init__.py b/server/Gym/parameter_controllers/almost_complete_graph_cycle_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6aba07ca7e04f185c7f9e3029b5cd9127f595e5a --- /dev/null +++ b/server/Gym/parameter_controllers/almost_complete_graph_cycle_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import AlmostCompleteGraphCycleCounting_ParameterController diff --git a/server/Gym/parameter_controllers/almost_complete_graph_cycle_counting/parameter_controller.py b/server/Gym/parameter_controllers/almost_complete_graph_cycle_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..3ad5c1bf87f82cc3316f335e21a8040ca052263e --- /dev/null +++ b/server/Gym/parameter_controllers/almost_complete_graph_cycle_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class AlmostCompleteGraphCycleCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 5 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/and_or_sequence_counting/__init__.py b/server/Gym/parameter_controllers/and_or_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7ebf2f9c2ea8b799c0bcfa2e82a8fd54030c80c5 --- /dev/null +++ b/server/Gym/parameter_controllers/and_or_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import AndOr_Sequence_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/and_or_sequence_counting/parameter_controller.py b/server/Gym/parameter_controllers/and_or_sequence_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ad04a88de8e3689bb191eac86c77ac132676319e --- /dev/null +++ b/server/Gym/parameter_controllers/and_or_sequence_counting/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class AndOr_Sequence_Counting_ParameterController(ParameterController) : + def __init__(self, M_list : Optional[List[int]] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + self.M_List = M_list if M_list is not None else list(range(1, 20 + 1)) + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = M) for M in self.M_List] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/anti_palindromic_substring_counting/__init__.py b/server/Gym/parameter_controllers/anti_palindromic_substring_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e85f50913da65aaa9e2cd773952b65411752f49d --- /dev/null +++ b/server/Gym/parameter_controllers/anti_palindromic_substring_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import AntiPalindromicSubstringCounting_ParameterController diff --git a/server/Gym/parameter_controllers/anti_palindromic_substring_counting/parameter_controller.py b/server/Gym/parameter_controllers/anti_palindromic_substring_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..35d8884e43e651c9739cf463726aaf7e2ee63407 --- /dev/null +++ b/server/Gym/parameter_controllers/anti_palindromic_substring_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class AntiPalindromicSubstringCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/axis_k_center/__init__.py b/server/Gym/parameter_controllers/axis_k_center/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..304de36e4d6579f6d229c5e4326fae41b780b25e --- /dev/null +++ b/server/Gym/parameter_controllers/axis_k_center/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Axis_KCenter_ParameterController diff --git a/server/Gym/parameter_controllers/axis_k_center/parameter_controller.py b/server/Gym/parameter_controllers/axis_k_center/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..83a846d44742a3ab489d56d4865aa60586f3537c --- /dev/null +++ b/server/Gym/parameter_controllers/axis_k_center/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Axis_KCenter_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/baj_bytecomputer/__init__.py b/server/Gym/parameter_controllers/baj_bytecomputer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..28770bc80615f5c9e8175bc5731faf5f145880e4 --- /dev/null +++ b/server/Gym/parameter_controllers/baj_bytecomputer/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BAJBytecomputer_ParameterController diff --git a/server/Gym/parameter_controllers/baj_bytecomputer/parameter_controller.py b/server/Gym/parameter_controllers/baj_bytecomputer/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a096271cabfd2836135b15bafd728680ea0f4e45 --- /dev/null +++ b/server/Gym/parameter_controllers/baj_bytecomputer/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BAJBytecomputer_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/banned_point_superset_path_counting/__init__.py b/server/Gym/parameter_controllers/banned_point_superset_path_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..879d6170b0a1cf9c66b395abd85aaf6676ef3b0b --- /dev/null +++ b/server/Gym/parameter_controllers/banned_point_superset_path_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BannedPointSupersetPathCounting_ParameterController diff --git a/server/Gym/parameter_controllers/banned_point_superset_path_counting/parameter_controller.py b/server/Gym/parameter_controllers/banned_point_superset_path_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c0061a3db074f3c8548ae5fa77163aa16040654d --- /dev/null +++ b/server/Gym/parameter_controllers/banned_point_superset_path_counting/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BannedPointSupersetPathCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M_R = 1 + self.MAX_O = 10 + + def update(self) -> None : + self.MAX_N_M_R = int(self.MAX_N_M_R * 1.1 + 1) + self.MAX_O = int(self.MAX_O * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M_R = self.MAX_N_M_R, MAX_O = self.MAX_O)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/banyan_heart/__init__.py b/server/Gym/parameter_controllers/banyan_heart/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3215059dbb5b0e2c3fe46d316d8c9d23e2377b0a --- /dev/null +++ b/server/Gym/parameter_controllers/banyan_heart/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BanyanHeart_ParameterController diff --git a/server/Gym/parameter_controllers/banyan_heart/parameter_controller.py b/server/Gym/parameter_controllers/banyan_heart/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0f211097f620ab538aaf9f6e70ae71ffa565da57 --- /dev/null +++ b/server/Gym/parameter_controllers/banyan_heart/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BanyanHeart_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/bez_minimalist_security/__init__.py b/server/Gym/parameter_controllers/bez_minimalist_security/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..befb125cb04d9f8c112e82e7f9be0b2ed4c3962a --- /dev/null +++ b/server/Gym/parameter_controllers/bez_minimalist_security/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BEZMinimalistSecurity_ParameterController diff --git a/server/Gym/parameter_controllers/bez_minimalist_security/parameter_controller.py b/server/Gym/parameter_controllers/bez_minimalist_security/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0897950ccecaadcab7eaaac9808d8289cbdc1c62 --- /dev/null +++ b/server/Gym/parameter_controllers/bez_minimalist_security/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class BEZMinimalistSecurity_ParameterController(ParameterController) : + def __init__(self, edge_ratio_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if edge_ratio_list is None : + edge_ratio_list = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0] + self.edge_ratio_list = edge_ratio_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_ratio = edge_ratio) for edge_ratio in self.edge_ratio_list if int(self.N * edge_ratio) <= self.N * (self.N - 1) // 2] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/bezout_identity/__init__.py b/server/Gym/parameter_controllers/bezout_identity/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d6abf93b007c986f82dc6ccb61576e13b1a257b2 --- /dev/null +++ b/server/Gym/parameter_controllers/bezout_identity/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BezoutIdentity_ParameterController diff --git a/server/Gym/parameter_controllers/bezout_identity/parameter_controller.py b/server/Gym/parameter_controllers/bezout_identity/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c90b73cadbb7f98414f0643ddae2ccdc6032ebc1 --- /dev/null +++ b/server/Gym/parameter_controllers/bezout_identity/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BezoutIdentity_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + self.MAX_A = 32 + + def update(self) -> None : + self.N += 1 + self.MAX_A *= 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, MAX_A = self.MAX_A)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/binario/__init__.py b/server/Gym/parameter_controllers/binario/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e307be44ea7f8788657e0a2508df5f084c0f59e8 --- /dev/null +++ b/server/Gym/parameter_controllers/binario/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Binario_ParameterController diff --git a/server/Gym/parameter_controllers/binario/parameter_controller.py b/server/Gym/parameter_controllers/binario/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..42ffe04996f266cbaf7ef8c242df96125bcb8e82 --- /dev/null +++ b/server/Gym/parameter_controllers/binario/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Binario_ParameterController(ParameterController) : + def __init__(self, sparsity_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 2 + + if sparsity_list is None : + sparsity_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + self.sparsity_list = sparsity_list + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M, sparsity = sparsity) for sparsity in self.sparsity_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/binario_no_adjacency_requirement/__init__.py b/server/Gym/parameter_controllers/binario_no_adjacency_requirement/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5a86dbfe44f158535eeb34fc6471748d8943f2d1 --- /dev/null +++ b/server/Gym/parameter_controllers/binario_no_adjacency_requirement/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Binario_NoAdjacencyRequirement_ParameterController diff --git a/server/Gym/parameter_controllers/binario_no_adjacency_requirement/parameter_controller.py b/server/Gym/parameter_controllers/binario_no_adjacency_requirement/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..1bef9e5ec7c7e286c59146d51288eecca081e2a5 --- /dev/null +++ b/server/Gym/parameter_controllers/binario_no_adjacency_requirement/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Binario_NoAdjacencyRequirement_ParameterController(ParameterController) : + def __init__(self, sparsity_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 2 + + if sparsity_list is None : + sparsity_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + self.sparsity_list = sparsity_list + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M, sparsity = sparsity) for sparsity in self.sparsity_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/binary_alternation/__init__.py b/server/Gym/parameter_controllers/binary_alternation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..67351f7adf0555c8baa77bda0353e23ca73153af --- /dev/null +++ b/server/Gym/parameter_controllers/binary_alternation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BinaryAlternation_ParameterController diff --git a/server/Gym/parameter_controllers/binary_alternation/parameter_controller.py b/server/Gym/parameter_controllers/binary_alternation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..27aced950e2ceead78117a019733590b84b53ec3 --- /dev/null +++ b/server/Gym/parameter_controllers/binary_alternation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BinaryAlternation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.zero_count = 2 + + def update(self) -> None : + self.zero_count = int(self.zero_count * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(zero_count = self.zero_count)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/binary_linear_equation_solution_counting/__init__.py b/server/Gym/parameter_controllers/binary_linear_equation_solution_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e76eca304f7a05ee02607e72e47cc068b9ac5562 --- /dev/null +++ b/server/Gym/parameter_controllers/binary_linear_equation_solution_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BinaryLinearEquation_SolutionCounting_ParameterController diff --git a/server/Gym/parameter_controllers/binary_linear_equation_solution_counting/parameter_controller.py b/server/Gym/parameter_controllers/binary_linear_equation_solution_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e24cc12f45697e3dec0ffa6f9f9f2d45625d10bd --- /dev/null +++ b/server/Gym/parameter_controllers/binary_linear_equation_solution_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BinaryLinearEquation_SolutionCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_RANGE = 8 + + def update(self) -> None : + self.MAX_RANGE *= 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_RANGE = self.MAX_RANGE)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/binary_tree_leaf_num_expectation/__init__.py b/server/Gym/parameter_controllers/binary_tree_leaf_num_expectation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..792ffb8bd1804d6ed40e9a0fa748fa06bd9481c2 --- /dev/null +++ b/server/Gym/parameter_controllers/binary_tree_leaf_num_expectation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BinaryTreeLeafNumExpectation_ParameterController diff --git a/server/Gym/parameter_controllers/binary_tree_leaf_num_expectation/parameter_controller.py b/server/Gym/parameter_controllers/binary_tree_leaf_num_expectation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2ce4648a6fd857897c64ec1a0b0257fe9e972677 --- /dev/null +++ b/server/Gym/parameter_controllers/binary_tree_leaf_num_expectation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BinaryTreeLeafNumExpectation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 5 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/bit_equation_counting/__init__.py b/server/Gym/parameter_controllers/bit_equation_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6f65319962cd6a914c5c0d2c45d2332dc9631ecc --- /dev/null +++ b/server/Gym/parameter_controllers/bit_equation_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BitEquationCounting_ParameterController diff --git a/server/Gym/parameter_controllers/bit_equation_counting/parameter_controller.py b/server/Gym/parameter_controllers/bit_equation_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..eb449f02c8c349b6cc59dc3a3fad320ab9326739 --- /dev/null +++ b/server/Gym/parameter_controllers/bit_equation_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BitEquationCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/bitand_zero_path_counting/__init__.py b/server/Gym/parameter_controllers/bitand_zero_path_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c5a11b107772961763e122bf9358cf21c0e7b775 --- /dev/null +++ b/server/Gym/parameter_controllers/bitand_zero_path_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BitAndZero_PathCounting_ParameterController diff --git a/server/Gym/parameter_controllers/bitand_zero_path_counting/parameter_controller.py b/server/Gym/parameter_controllers/bitand_zero_path_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2fc7caff4107106af6ddcf421c3c0a39850107da --- /dev/null +++ b/server/Gym/parameter_controllers/bitand_zero_path_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BitAndZero_PathCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.max_length = 3 + + def update(self) -> None : + self.max_length = int(self.max_length * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(max_length = self.max_length)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/bitwise_operation_sequence_counting/__init__.py b/server/Gym/parameter_controllers/bitwise_operation_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8f152a7a10295dd9e2560766f2b6a41fe0a2249f --- /dev/null +++ b/server/Gym/parameter_controllers/bitwise_operation_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BitwiseOperationSequenceCounting_ParameterController diff --git a/server/Gym/parameter_controllers/bitwise_operation_sequence_counting/parameter_controller.py b/server/Gym/parameter_controllers/bitwise_operation_sequence_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..bcf532ac9ad212e18355e89106bbebafdb67d7e1 --- /dev/null +++ b/server/Gym/parameter_controllers/bitwise_operation_sequence_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BitwiseOperationSequenceCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/block_image/__init__.py b/server/Gym/parameter_controllers/block_image/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..134ed1c4be23e3ef6c8d2f4a5d8d9a20a9592257 --- /dev/null +++ b/server/Gym/parameter_controllers/block_image/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BlockImage_ParameterController diff --git a/server/Gym/parameter_controllers/block_image/parameter_controller.py b/server/Gym/parameter_controllers/block_image/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..65430db2df7ba7dc0064e352116b53c2ff086f14 --- /dev/null +++ b/server/Gym/parameter_controllers/block_image/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BlockImage_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_M_N = 2 + + def update(self) -> None : + self.MAX_M_N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_M_N = self.MAX_M_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/bounded_adjacency_difference_permutation_counting/__init__.py b/server/Gym/parameter_controllers/bounded_adjacency_difference_permutation_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d5255ed03bcb056d20f19c6886791599886f4767 --- /dev/null +++ b/server/Gym/parameter_controllers/bounded_adjacency_difference_permutation_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BoundedAdjacencyDifference_Permutation_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/bounded_adjacency_difference_permutation_counting/parameter_controller.py b/server/Gym/parameter_controllers/bounded_adjacency_difference_permutation_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..dcd3115ac745df656287f1f3223753e8326ef9af --- /dev/null +++ b/server/Gym/parameter_controllers/bounded_adjacency_difference_permutation_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BoundedAdjacencyDifference_Permutation_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/bounded_interval_intersection/__init__.py b/server/Gym/parameter_controllers/bounded_interval_intersection/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2be7b7be469cf9c1d2c49959a756c89d8f641764 --- /dev/null +++ b/server/Gym/parameter_controllers/bounded_interval_intersection/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BoundedIntervalIntersection_ParameterController diff --git a/server/Gym/parameter_controllers/bounded_interval_intersection/parameter_controller.py b/server/Gym/parameter_controllers/bounded_interval_intersection/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..988d52452383d3addc46e1fc9b604c2287d42922 --- /dev/null +++ b/server/Gym/parameter_controllers/bounded_interval_intersection/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BoundedIntervalIntersection_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/bounded_mean_subarray_counting/__init__.py b/server/Gym/parameter_controllers/bounded_mean_subarray_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8acdd95f9f1e2f034d11fba8e77687561cc85dfc --- /dev/null +++ b/server/Gym/parameter_controllers/bounded_mean_subarray_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BoundedMeanSubarrayCounting_ParameterController diff --git a/server/Gym/parameter_controllers/bounded_mean_subarray_counting/parameter_controller.py b/server/Gym/parameter_controllers/bounded_mean_subarray_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..72d76558692c319218ce3d18bd51d693a8f948e9 --- /dev/null +++ b/server/Gym/parameter_controllers/bounded_mean_subarray_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BoundedMeanSubarrayCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/bounded_subarray_counting/__init__.py b/server/Gym/parameter_controllers/bounded_subarray_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6b26be2a7ae1e63cb2bda640f68b6d2fdbed31e9 --- /dev/null +++ b/server/Gym/parameter_controllers/bounded_subarray_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BoundedSubarrayCounting_ParameterController diff --git a/server/Gym/parameter_controllers/bounded_subarray_counting/parameter_controller.py b/server/Gym/parameter_controllers/bounded_subarray_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..654475c7ffe1ab948c2f3992291c712545c28a44 --- /dev/null +++ b/server/Gym/parameter_controllers/bounded_subarray_counting/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BoundedSubarrayCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + self.M = 2 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + self.M = int(self.M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = self.M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/box_scheduling/__init__.py b/server/Gym/parameter_controllers/box_scheduling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2f72e2e01b4479b74bc25138462c96214a125fb6 --- /dev/null +++ b/server/Gym/parameter_controllers/box_scheduling/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BoxScheduling_ParameterController diff --git a/server/Gym/parameter_controllers/box_scheduling/parameter_controller.py b/server/Gym/parameter_controllers/box_scheduling/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..818156adbab3ee5980c32dde0b132e5e9ed6f19e --- /dev/null +++ b/server/Gym/parameter_controllers/box_scheduling/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BoxScheduling_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 3 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/bridge/__init__.py b/server/Gym/parameter_controllers/bridge/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8a22fa55759da545066003b8e71226fd891202f6 --- /dev/null +++ b/server/Gym/parameter_controllers/bridge/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Bridge_ParameterController diff --git a/server/Gym/parameter_controllers/bridge/parameter_controller.py b/server/Gym/parameter_controllers/bridge/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..dee80801df57197363ad0c181f68cccac8128321 --- /dev/null +++ b/server/Gym/parameter_controllers/bridge/parameter_controller.py @@ -0,0 +1,26 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Bridge_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, component_num_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.02, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6, 0.7, 0.8, 0.9] + self.edge_density_list = edge_density_list + + if component_num_density_list is None : + component_num_density_list = [0.05, 0.1, 0.2, 0.3, 0.5, 0.7, 0.8] + self.component_num_density_list = component_num_density_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + component_nums = set() + for component_num_density in self.component_num_density_list : + component_num = int(component_num_density * self.N) + if component_num >= 2 : + component_nums.add(component_num) + return [dict(N = self.N, edge_density = edge_density, component_num = component_num) for edge_density in self.edge_density_list for component_num in component_nums] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/bubble_swap_lower_bound_permutation_counting/__init__.py b/server/Gym/parameter_controllers/bubble_swap_lower_bound_permutation_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3d9cdff0a27fd8adc6d1d3b08b1e5c577bd661a8 --- /dev/null +++ b/server/Gym/parameter_controllers/bubble_swap_lower_bound_permutation_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BubbleSwapLowerBound_PermutationCounting_ParameterController diff --git a/server/Gym/parameter_controllers/bubble_swap_lower_bound_permutation_counting/parameter_controller.py b/server/Gym/parameter_controllers/bubble_swap_lower_bound_permutation_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..dff5892a4e435563a8995be1f39961803d3f5373 --- /dev/null +++ b/server/Gym/parameter_controllers/bubble_swap_lower_bound_permutation_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BubbleSwapLowerBound_PermutationCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/bucket_sorting/__init__.py b/server/Gym/parameter_controllers/bucket_sorting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..21b5695789a817809a96a4a6020e9305fcd92005 --- /dev/null +++ b/server/Gym/parameter_controllers/bucket_sorting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import BucketSorting_ParameterController diff --git a/server/Gym/parameter_controllers/bucket_sorting/parameter_controller.py b/server/Gym/parameter_controllers/bucket_sorting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e6b23b0c22aaf31dbc1c38374d5718cacc135544 --- /dev/null +++ b/server/Gym/parameter_controllers/bucket_sorting/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class BucketSorting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + self.MAX = 2 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + self.MAX += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, MAX = self.MAX)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/campfire_party/__init__.py b/server/Gym/parameter_controllers/campfire_party/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ea3ea154543428f173acf71d38116dc7cfa52e24 --- /dev/null +++ b/server/Gym/parameter_controllers/campfire_party/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CampfireParty_ParameterController diff --git a/server/Gym/parameter_controllers/campfire_party/parameter_controller.py b/server/Gym/parameter_controllers/campfire_party/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..6ecb0ed8d582e193735904d57e64d3e7b823d1e9 --- /dev/null +++ b/server/Gym/parameter_controllers/campfire_party/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class CampfireParty_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/campsite_puzzle/__init__.py b/server/Gym/parameter_controllers/campsite_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9079ed56a38db257d11b18956c9250c62e8f3da4 --- /dev/null +++ b/server/Gym/parameter_controllers/campsite_puzzle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CampsitePuzzle_ParameterController diff --git a/server/Gym/parameter_controllers/campsite_puzzle/parameter_controller.py b/server/Gym/parameter_controllers/campsite_puzzle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2db059d4dc7a9aeff1c3837f640ac4c04585469b --- /dev/null +++ b/server/Gym/parameter_controllers/campsite_puzzle/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class CampsitePuzzle_ParameterController(ParameterController) : + def __init__(self, sparsity_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + if sparsity_list is None : + sparsity_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + self.sparsity_list = sparsity_list + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M, sparsity = sparsity) for sparsity in self.sparsity_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/canon/__init__.py b/server/Gym/parameter_controllers/canon/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..48b5d0733af98f94f0f285716f58c29013827b43 --- /dev/null +++ b/server/Gym/parameter_controllers/canon/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Canon_ParameterController diff --git a/server/Gym/parameter_controllers/canon/parameter_controller.py b/server/Gym/parameter_controllers/canon/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..acbd0a830f1f97f806371ea8198fc268e3589df7 --- /dev/null +++ b/server/Gym/parameter_controllers/canon/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Canon_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/cantor_expansion/__init__.py b/server/Gym/parameter_controllers/cantor_expansion/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f9fd37e942c01f81e3b30b2d9b3d2d82022ea01f --- /dev/null +++ b/server/Gym/parameter_controllers/cantor_expansion/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CantorExpansion_ParameterController diff --git a/server/Gym/parameter_controllers/cantor_expansion/parameter_controller.py b/server/Gym/parameter_controllers/cantor_expansion/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..5bcf8ee5e56a707d4f9791df977ae6060836ba35 --- /dev/null +++ b/server/Gym/parameter_controllers/cantor_expansion/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class CantorExpansion_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/capital_city_effect/__init__.py b/server/Gym/parameter_controllers/capital_city_effect/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..39e541c18434cfe9918b7e0f975d7f496ab018fe --- /dev/null +++ b/server/Gym/parameter_controllers/capital_city_effect/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CapitalCityEffect_ParameterController diff --git a/server/Gym/parameter_controllers/capital_city_effect/parameter_controller.py b/server/Gym/parameter_controllers/capital_city_effect/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..7650b014af9e657d72996af7be3d1641c67569a3 --- /dev/null +++ b/server/Gym/parameter_controllers/capital_city_effect/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class CapitalCityEffect_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_R = 20 + + def update(self) -> None : + self.MAX_R = int(self.MAX_R * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_R = self.MAX_R)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/card_coloring_counting/__init__.py b/server/Gym/parameter_controllers/card_coloring_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5f6c371b2628f88e683b45c3f098079ee8863c60 --- /dev/null +++ b/server/Gym/parameter_controllers/card_coloring_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CardColoringCounting_ParameterController \ No newline at end of file diff --git a/server/Gym/parameter_controllers/card_coloring_counting/parameter_controller.py b/server/Gym/parameter_controllers/card_coloring_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..1cafe412b1ca8ac90f2db74f6f95e84e1edc6f11 --- /dev/null +++ b/server/Gym/parameter_controllers/card_coloring_counting/parameter_controller.py @@ -0,0 +1,18 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController +import math + +class CardColoringCounting_ParameterController(ParameterController) : + def __init__(self, Ks : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + if Ks == None : + self.Ks = [0, 1, 2, 3] + else : + self.Ks = Ks + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, K = K) for K in self.Ks if K < math.factorial(self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/catalan_number_mod/__init__.py b/server/Gym/parameter_controllers/catalan_number_mod/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2b7fd313eec3c2dae6d026af7b8f3665bfb259bd --- /dev/null +++ b/server/Gym/parameter_controllers/catalan_number_mod/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CatalanNumberMod_ParameterController diff --git a/server/Gym/parameter_controllers/catalan_number_mod/parameter_controller.py b/server/Gym/parameter_controllers/catalan_number_mod/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ffad482f2d2ca21d336d47ebe6d26652325d4552 --- /dev/null +++ b/server/Gym/parameter_controllers/catalan_number_mod/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class CatalanNumberMod_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 3 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/check_all_cycle_xor_zero/__init__.py b/server/Gym/parameter_controllers/check_all_cycle_xor_zero/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d535ae1dbf1a56cc5674fe5ba9e9e7092512bf46 --- /dev/null +++ b/server/Gym/parameter_controllers/check_all_cycle_xor_zero/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CheckAllCycleXorZero_ParameterController diff --git a/server/Gym/parameter_controllers/check_all_cycle_xor_zero/parameter_controller.py b/server/Gym/parameter_controllers/check_all_cycle_xor_zero/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e31d825e5f9d3cb572b70218b8ff5adcb55f629b --- /dev/null +++ b/server/Gym/parameter_controllers/check_all_cycle_xor_zero/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class CheckAllCycleXorZero_ParameterController(ParameterController) : + def __init__(self, edge_ratio_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_ratio_list is None : + edge_ratio_list = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0] + self.edge_ratio_list = edge_ratio_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_ratio = edge_ratio) for edge_ratio in self.edge_ratio_list if int(self.N * edge_ratio) <= self.N * (self.N - 1) // 2] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/cho_hamsters/__init__.py b/server/Gym/parameter_controllers/cho_hamsters/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7e2d6691db1fcfa1e2534947ddca4ac2fa3cfc8b --- /dev/null +++ b/server/Gym/parameter_controllers/cho_hamsters/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ChoHamsters_ParameterController diff --git a/server/Gym/parameter_controllers/cho_hamsters/parameter_controller.py b/server/Gym/parameter_controllers/cho_hamsters/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..b5ef23728707c8beb888440a04dfcbce1364328a --- /dev/null +++ b/server/Gym/parameter_controllers/cho_hamsters/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class ChoHamsters_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 1 + self.MAX_M = 4 + + def update(self) -> None : + self.N += 1 + self.MAX_M = int(self.MAX_M * 1.5 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, MAX_M = self.MAX_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/cinema/__init__.py b/server/Gym/parameter_controllers/cinema/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..20c445959c9af200f73e090db30365ed2170471a --- /dev/null +++ b/server/Gym/parameter_controllers/cinema/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Cinema_ParameterController diff --git a/server/Gym/parameter_controllers/cinema/parameter_controller.py b/server/Gym/parameter_controllers/cinema/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..41d536e8218367a1c531c8af7e8207cbc1c730e2 --- /dev/null +++ b/server/Gym/parameter_controllers/cinema/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Cinema_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_K = 4 + + def update(self) -> None : + self.MAX_N_K = int(self.MAX_N_K * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_K = self.MAX_N_K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/circuit/__init__.py b/server/Gym/parameter_controllers/circuit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..74b67cb3060a9d4d3e55ec2ee6dacf9b8045d760 --- /dev/null +++ b/server/Gym/parameter_controllers/circuit/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Circuit_ParameterController diff --git a/server/Gym/parameter_controllers/circuit/parameter_controller.py b/server/Gym/parameter_controllers/circuit/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ec940e0c6ed31bc648831af06f0a5fbc43aa870c --- /dev/null +++ b/server/Gym/parameter_controllers/circuit/parameter_controller.py @@ -0,0 +1,16 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Circuit_ParameterController(ParameterController) : + def __init__(self, M_multiple_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + if M_multiple_list is None : + M_multiple_list = [2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5.0] + self.M_multiple_list = M_multiple_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = int(M_multiple * self.N)) for M_multiple in self.M_multiple_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/circulating_decimal_counting/__init__.py b/server/Gym/parameter_controllers/circulating_decimal_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1e49a906993c891c58059e18714c7138516ce714 --- /dev/null +++ b/server/Gym/parameter_controllers/circulating_decimal_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CirculatingDecimalCounting_ParameterController diff --git a/server/Gym/parameter_controllers/circulating_decimal_counting/parameter_controller.py b/server/Gym/parameter_controllers/circulating_decimal_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9ccbb247b78fc85cf9d2bf71fe74277fbd8b11f6 --- /dev/null +++ b/server/Gym/parameter_controllers/circulating_decimal_counting/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class CirculatingDecimalCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 5 + self.MAX_M = 5 + self.MAX_K = 3 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + self.MAX_M = int(self.MAX_M * 1.5) + self.MAX_K = int(self.MAX_K * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, MAX_M = self.MAX_M, MAX_K = self.MAX_K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/circulating_grid/__init__.py b/server/Gym/parameter_controllers/circulating_grid/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b4de10b61bef22aee2227836be3913519589f47f --- /dev/null +++ b/server/Gym/parameter_controllers/circulating_grid/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CirculatingGrid_ParameterController diff --git a/server/Gym/parameter_controllers/circulating_grid/parameter_controller.py b/server/Gym/parameter_controllers/circulating_grid/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..fd31274cd56f78d5ccf6070f2f1b5b58cb6a5f32 --- /dev/null +++ b/server/Gym/parameter_controllers/circulating_grid/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class CirculatingGrid_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_R_C = 3 + + def update(self) -> None : + self.MAX_R_C += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_R_C = self.MAX_R_C)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/cleaning_up/__init__.py b/server/Gym/parameter_controllers/cleaning_up/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f61fe784d7d7dd8159fe6ecc01f5816768ebe414 --- /dev/null +++ b/server/Gym/parameter_controllers/cleaning_up/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CleaningUp_ParameterController diff --git a/server/Gym/parameter_controllers/cleaning_up/parameter_controller.py b/server/Gym/parameter_controllers/cleaning_up/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..244b227c9ead6183fec9b33bd510f995409fad43 --- /dev/null +++ b/server/Gym/parameter_controllers/cleaning_up/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class CleaningUp_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/clear_symmetry/__init__.py b/server/Gym/parameter_controllers/clear_symmetry/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a941c3e1337d75cf25e0d3312ee67e391e164e84 --- /dev/null +++ b/server/Gym/parameter_controllers/clear_symmetry/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ClearSymmetry_ParameterController \ No newline at end of file diff --git a/server/Gym/parameter_controllers/clear_symmetry/parameter_controller.py b/server/Gym/parameter_controllers/clear_symmetry/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ce0e0cbd83305e3db20c9999c969d4eb698780dd --- /dev/null +++ b/server/Gym/parameter_controllers/clear_symmetry/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + + +class ClearSymmetry_ParameterController(ParameterController): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.MAX_X = 5 + + def update(self) -> None: + self.MAX_X = int(self.MAX_X * 1.5) + + def get_parameter_list(self) -> List[Dict]: + return [dict(MAX_X=self.MAX_X)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/clique_independent_set_partitioning_counting/__init__.py b/server/Gym/parameter_controllers/clique_independent_set_partitioning_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..88520a207e6ab3f94076bd2428fe0e336a6bb058 --- /dev/null +++ b/server/Gym/parameter_controllers/clique_independent_set_partitioning_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Clique_IndependentSet_Partitioning_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/clique_independent_set_partitioning_counting/parameter_controller.py b/server/Gym/parameter_controllers/clique_independent_set_partitioning_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..845a96db54d08e32b621755dfc0ad9e16e358665 --- /dev/null +++ b/server/Gym/parameter_controllers/clique_independent_set_partitioning_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Clique_IndependentSet_Partitioning_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/coin_square_game/__init__.py b/server/Gym/parameter_controllers/coin_square_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2c38a94e691ebbb536644c27368243e93f8c02aa --- /dev/null +++ b/server/Gym/parameter_controllers/coin_square_game/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CoinSquareGame_ParameterController diff --git a/server/Gym/parameter_controllers/coin_square_game/parameter_controller.py b/server/Gym/parameter_controllers/coin_square_game/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..3aa39dd9480ce7d3a040872abc3fb49664b40c8d --- /dev/null +++ b/server/Gym/parameter_controllers/coin_square_game/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class CoinSquareGame_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/coloring_counting/__init__.py b/server/Gym/parameter_controllers/coloring_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..10473f0dd2534044eb263936438e01f8406a83f5 --- /dev/null +++ b/server/Gym/parameter_controllers/coloring_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ColoringCounting_ParameterController diff --git a/server/Gym/parameter_controllers/coloring_counting/parameter_controller.py b/server/Gym/parameter_controllers/coloring_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..503148962476dc606728cc617f1e603664079314 --- /dev/null +++ b/server/Gym/parameter_controllers/coloring_counting/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class ColoringCounting_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.02, 0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/combination_odd_subsequence_counting/__init__.py b/server/Gym/parameter_controllers/combination_odd_subsequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ca8177a835e80c43a15c77af9de1711b40ee1f2a --- /dev/null +++ b/server/Gym/parameter_controllers/combination_odd_subsequence_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CombinationOddSubsequenceCounting_ParameterController diff --git a/server/Gym/parameter_controllers/combination_odd_subsequence_counting/parameter_controller.py b/server/Gym/parameter_controllers/combination_odd_subsequence_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c82df2d54deeeaf691fd2449fc6845b4a4934781 --- /dev/null +++ b/server/Gym/parameter_controllers/combination_odd_subsequence_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class CombinationOddSubsequenceCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/concatenation_partition_counting_sum/__init__.py b/server/Gym/parameter_controllers/concatenation_partition_counting_sum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aaa6c3ec303ba395d4ea5dd13ad55055f585a6e7 --- /dev/null +++ b/server/Gym/parameter_controllers/concatenation_partition_counting_sum/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ConcatenationPartitionCountingSum_ParameterController diff --git a/server/Gym/parameter_controllers/concatenation_partition_counting_sum/parameter_controller.py b/server/Gym/parameter_controllers/concatenation_partition_counting_sum/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..21c4d2b564cb034038ef96d4557a4f3a2895fe5b --- /dev/null +++ b/server/Gym/parameter_controllers/concatenation_partition_counting_sum/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class ConcatenationPartitionCountingSum_ParameterController(ParameterController) : + def __init__(self, M_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + self.M_list = M_list if M_list is not None else [1, 2, 3, 4, 5] + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = M) for M in self.M_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/congruent_equation/__init__.py b/server/Gym/parameter_controllers/congruent_equation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9c11f0d913ce5a392b1c78d8974ff07b1d728443 --- /dev/null +++ b/server/Gym/parameter_controllers/congruent_equation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CongruentEquation_ParameterController diff --git a/server/Gym/parameter_controllers/congruent_equation/parameter_controller.py b/server/Gym/parameter_controllers/congruent_equation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e357601cc5674753a2e7580a082c223f2f9c6a4f --- /dev/null +++ b/server/Gym/parameter_controllers/congruent_equation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class CongruentEquation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_A_B = 5 + + def update(self) -> None : + self.MAX_A_B = int(self.MAX_A_B * 2) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_A_B = self.MAX_A_B)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/construct_hack_interval/__init__.py b/server/Gym/parameter_controllers/construct_hack_interval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3fbe1fcdfd9303871c18d0ddea27af63e764f9ab --- /dev/null +++ b/server/Gym/parameter_controllers/construct_hack_interval/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ConstructHackInterval_ParameterController diff --git a/server/Gym/parameter_controllers/construct_hack_interval/parameter_controller.py b/server/Gym/parameter_controllers/construct_hack_interval/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2dd6485b36b5abb6d51ff76625da3965957520ee --- /dev/null +++ b/server/Gym/parameter_controllers/construct_hack_interval/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class ConstructHackInterval_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_MOD = 10 + + def update(self) -> None : + self.MAX_MOD = int(self.MAX_MOD * 2) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_MOD = self.MAX_MOD)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/convex_hull/__init__.py b/server/Gym/parameter_controllers/convex_hull/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1fbe1544f9765ff8a91c8277f4518af7b8bcf8ac --- /dev/null +++ b/server/Gym/parameter_controllers/convex_hull/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ConvexHull_ParameterController diff --git a/server/Gym/parameter_controllers/convex_hull/parameter_controller.py b/server/Gym/parameter_controllers/convex_hull/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..396c15eb65a569b7faaf5a58c2226c6c0a3bee91 --- /dev/null +++ b/server/Gym/parameter_controllers/convex_hull/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class ConvexHull_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N=self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/cornfield/__init__.py b/server/Gym/parameter_controllers/cornfield/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b4f7874f92275ed46635eff773e9079c13ee0a67 --- /dev/null +++ b/server/Gym/parameter_controllers/cornfield/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Cornfield_ParameterController diff --git a/server/Gym/parameter_controllers/cornfield/parameter_controller.py b/server/Gym/parameter_controllers/cornfield/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c682a5b7dcab990df31e8c1ace40805baa09f1b9 --- /dev/null +++ b/server/Gym/parameter_controllers/cornfield/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Cornfield_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/countdown/__init__.py b/server/Gym/parameter_controllers/countdown/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a2579561bc79ea31efdb18d553d875426ac750b8 --- /dev/null +++ b/server/Gym/parameter_controllers/countdown/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Countdown_ParameterController \ No newline at end of file diff --git a/server/Gym/parameter_controllers/countdown/parameter_controller.py b/server/Gym/parameter_controllers/countdown/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..eabfd471fb66c4df49e075fb4d77317bd48c87e7 --- /dev/null +++ b/server/Gym/parameter_controllers/countdown/parameter_controller.py @@ -0,0 +1,18 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Countdown_ParameterController(ParameterController) : + def __init__(self, max_operands : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + + self.num_operands = 3 + + if max_operands is None : + max_operands = [8, 16, 24, 32, 40, 48] + self.max_operands = max_operands + + def update(self) -> None : + self.num_operands += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(num_operands = self.num_operands, max_operand = max_operand, max_target = max_operand * 10) for max_operand in self.max_operands] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/cow_dance_show/__init__.py b/server/Gym/parameter_controllers/cow_dance_show/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e9b94b784b241811d6322154bf46925d1a4076e8 --- /dev/null +++ b/server/Gym/parameter_controllers/cow_dance_show/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CowDanceShow_ParameterController diff --git a/server/Gym/parameter_controllers/cow_dance_show/parameter_controller.py b/server/Gym/parameter_controllers/cow_dance_show/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..4af8c2ee10486798f6efb533a70956ce40290a3b --- /dev/null +++ b/server/Gym/parameter_controllers/cow_dance_show/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class CowDanceShow_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/crt/__init__.py b/server/Gym/parameter_controllers/crt/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3fb2a2e6345c8fec82b4e09755d7205670bba176 --- /dev/null +++ b/server/Gym/parameter_controllers/crt/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CRT_ParameterController diff --git a/server/Gym/parameter_controllers/crt/parameter_controller.py b/server/Gym/parameter_controllers/crt/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e163da8eaa7fb7ee9e08fb3a41591b91ad6428e6 --- /dev/null +++ b/server/Gym/parameter_controllers/crt/parameter_controller.py @@ -0,0 +1,20 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class CRT_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_X = 5 + self.MAX_M = 2 + + self.current_stage = 0 + + def update(self) -> None : + self.current_stage += 1 + if self.current_stage % 3 == 0 : + self.MAX_M += 1 + else : + self.MAX_X = int(self.MAX_X * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_X = self.MAX_X, M = M) for M in range(2, self.MAX_M + 1)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/cryptarithmetic/__init__.py b/server/Gym/parameter_controllers/cryptarithmetic/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c8ccf797f080fdd5dd08b5d724abe2a58068c729 --- /dev/null +++ b/server/Gym/parameter_controllers/cryptarithmetic/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Cryptarithmetic_ParameterController diff --git a/server/Gym/parameter_controllers/cryptarithmetic/parameter_controller.py b/server/Gym/parameter_controllers/cryptarithmetic/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c1c8d1142f6a4f660011673be8d827a7ff9f0af8 --- /dev/null +++ b/server/Gym/parameter_controllers/cryptarithmetic/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Cryptarithmetic_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, addend_length = addend_length) for addend_length in range(self.N + 3, max(self.N + 3, self.N * 2) + 1)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/cube_fixed_local_maximum_counting/__init__.py b/server/Gym/parameter_controllers/cube_fixed_local_maximum_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fec0b549e45afde002af36c2cfbca7905b0c34be --- /dev/null +++ b/server/Gym/parameter_controllers/cube_fixed_local_maximum_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Cube_FixedLocalMaximumCounting_ParameterController diff --git a/server/Gym/parameter_controllers/cube_fixed_local_maximum_counting/parameter_controller.py b/server/Gym/parameter_controllers/cube_fixed_local_maximum_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..37d29d6adf36473c9b85db6211bfe6e32d34c448 --- /dev/null +++ b/server/Gym/parameter_controllers/cube_fixed_local_maximum_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Cube_FixedLocalMaximumCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M_L = 3 + + def update(self) -> None : + self.MAX_N_M_L = int(self.MAX_N_M_L * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M_L = self.MAX_N_M_L)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/cycle_counting/__init__.py b/server/Gym/parameter_controllers/cycle_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5cefe2e750a99b6212808fd918e474a89952311e --- /dev/null +++ b/server/Gym/parameter_controllers/cycle_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import CycleCounting_ParameterController diff --git a/server/Gym/parameter_controllers/cycle_counting/parameter_controller.py b/server/Gym/parameter_controllers/cycle_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..21a49014121eeedfaf075617ace26ee8aad2fea5 --- /dev/null +++ b/server/Gym/parameter_controllers/cycle_counting/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class CycleCounting_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.25, 0.45, 0.65, 0.85] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/decreasing_digit_counting/__init__.py b/server/Gym/parameter_controllers/decreasing_digit_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9128c1a7dfe6db5c927fcac1708459d244daa4fc --- /dev/null +++ b/server/Gym/parameter_controllers/decreasing_digit_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DecreasingDigitCounting_ParameterController diff --git a/server/Gym/parameter_controllers/decreasing_digit_counting/parameter_controller.py b/server/Gym/parameter_controllers/decreasing_digit_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c09a400826058be98a8e918bb0707e5d9c5a9fe7 --- /dev/null +++ b/server/Gym/parameter_controllers/decreasing_digit_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class DecreasingDigitCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_K = 2 + + def update(self) -> None : + self.MAX_K += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_K = self.MAX_K, MAX_W = self.MAX_K * (1 << self.MAX_K))] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/degree_fixed_spanning_tree/__init__.py b/server/Gym/parameter_controllers/degree_fixed_spanning_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..279875645f0a58635de7412b09122b4e5d731ecd --- /dev/null +++ b/server/Gym/parameter_controllers/degree_fixed_spanning_tree/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DegreeFixed_SpanningTree_ParameterController diff --git a/server/Gym/parameter_controllers/degree_fixed_spanning_tree/parameter_controller.py b/server/Gym/parameter_controllers/degree_fixed_spanning_tree/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..7dcef1e6bb775a9ab7532fcd0f624d52defa613c --- /dev/null +++ b/server/Gym/parameter_controllers/degree_fixed_spanning_tree/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class DegreeFixed_SpanningTree_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.1, 0.3, 0.5, 0.7, 0.9] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > self.N - 1] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/delta_min_popcount/__init__.py b/server/Gym/parameter_controllers/delta_min_popcount/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..65753b06a625b43deb611fdb072b10f1d182108e --- /dev/null +++ b/server/Gym/parameter_controllers/delta_min_popcount/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DeltaMinPopcount_ParameterController diff --git a/server/Gym/parameter_controllers/delta_min_popcount/parameter_controller.py b/server/Gym/parameter_controllers/delta_min_popcount/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..3e47b29bbb25bd9a42b6ea430927d5d602561243 --- /dev/null +++ b/server/Gym/parameter_controllers/delta_min_popcount/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class DeltaMinPopcount_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.digit_num = 4 + + def update(self) -> None : + self.digit_num = int(self.digit_num * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(digit_num = self.digit_num)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/delta_nim_game/__init__.py b/server/Gym/parameter_controllers/delta_nim_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c88525d5fc9c709fcb7022b862e51d7922f90d40 --- /dev/null +++ b/server/Gym/parameter_controllers/delta_nim_game/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DeltaNimGame_ParameterController diff --git a/server/Gym/parameter_controllers/delta_nim_game/parameter_controller.py b/server/Gym/parameter_controllers/delta_nim_game/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..4fcabb93f10d06759eb099367cf4d5fb52415ec2 --- /dev/null +++ b/server/Gym/parameter_controllers/delta_nim_game/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class DeltaNimGame_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/derangement_extension/__init__.py b/server/Gym/parameter_controllers/derangement_extension/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7e5174acc41acadf1e73d2100ffb76d0e7cd26db --- /dev/null +++ b/server/Gym/parameter_controllers/derangement_extension/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DerangementExtension_ParameterController diff --git a/server/Gym/parameter_controllers/derangement_extension/parameter_controller.py b/server/Gym/parameter_controllers/derangement_extension/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a9bedb7a3e4ef73edd57d24bfe322902fc0e5ab3 --- /dev/null +++ b/server/Gym/parameter_controllers/derangement_extension/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class DerangementExtension_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/difference_constraint_system/__init__.py b/server/Gym/parameter_controllers/difference_constraint_system/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1d0b48bbfbff137207fd07fba83a74a684626fd5 --- /dev/null +++ b/server/Gym/parameter_controllers/difference_constraint_system/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DifferenceConstraintSystem_ParameterController diff --git a/server/Gym/parameter_controllers/difference_constraint_system/parameter_controller.py b/server/Gym/parameter_controllers/difference_constraint_system/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..57b02132f8a9b5a708f0e3e16528c1fd58d4cf64 --- /dev/null +++ b/server/Gym/parameter_controllers/difference_constraint_system/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class DifferenceConstraintSystem_ParameterController(ParameterController) : + def __init__(self, M_multiple_list : Optional[List[float]] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + self.M_multiple = M_multiple_list if M_multiple_list is not None else [1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0] + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = int(M_multiple * self.N)) for M_multiple in self.M_multiple] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/difference_constraint_system_dag/__init__.py b/server/Gym/parameter_controllers/difference_constraint_system_dag/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..605a734d51e7ab435638609dede105c0982aa5f5 --- /dev/null +++ b/server/Gym/parameter_controllers/difference_constraint_system_dag/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DifferenceConstraintSystemDAG_ParameterController diff --git a/server/Gym/parameter_controllers/difference_constraint_system_dag/parameter_controller.py b/server/Gym/parameter_controllers/difference_constraint_system_dag/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..df348730d6551a4b3886e1b79bce5d838528147e --- /dev/null +++ b/server/Gym/parameter_controllers/difference_constraint_system_dag/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class DifferenceConstraintSystemDAG_ParameterController(ParameterController) : + def __init__(self, M_multiple_list : Optional[List[float]] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + self.M_multiple = M_multiple_list if M_multiple_list is not None else [1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0] + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = int(M_multiple * self.N)) for M_multiple in self.M_multiple] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/different_color_pairing/__init__.py b/server/Gym/parameter_controllers/different_color_pairing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..26492a4fa4c2ec49b01e1ceea94fd5fe059a826f --- /dev/null +++ b/server/Gym/parameter_controllers/different_color_pairing/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DifferentColorPairing_ParameterController diff --git a/server/Gym/parameter_controllers/different_color_pairing/parameter_controller.py b/server/Gym/parameter_controllers/different_color_pairing/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f57c286ca47d6d37018f3756044a4e3b95c33172 --- /dev/null +++ b/server/Gym/parameter_controllers/different_color_pairing/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class DifferentColorPairing_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 6 + + def update(self) -> None : + self.N += 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/differentiate/__init__.py b/server/Gym/parameter_controllers/differentiate/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..da246cea74c4c1a93e825eda7e1ce2159e52ecda --- /dev/null +++ b/server/Gym/parameter_controllers/differentiate/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Differentiate_ParameterController diff --git a/server/Gym/parameter_controllers/differentiate/parameter_controller.py b/server/Gym/parameter_controllers/differentiate/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..485d90aedfd5d382b7e397ac8f06c90c918d48e9 --- /dev/null +++ b/server/Gym/parameter_controllers/differentiate/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + + +class Differentiate_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.node_num = 2 + + def update(self) -> None : + self.node_num += 1 + + def get_parameter_list(self) -> List[Dict] : + return [{"node_num" : self.node_num}] diff --git a/server/Gym/parameter_controllers/digit_lis_counting/__init__.py b/server/Gym/parameter_controllers/digit_lis_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..00cfa31ca7b2d374116dbbb42f9c7212883470d7 --- /dev/null +++ b/server/Gym/parameter_controllers/digit_lis_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DigitLISCounting_ParameterController \ No newline at end of file diff --git a/server/Gym/parameter_controllers/digit_lis_counting/parameter_controller.py b/server/Gym/parameter_controllers/digit_lis_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..895141a8f11cebd4bb1ab32add1b9cadcc501193 --- /dev/null +++ b/server/Gym/parameter_controllers/digit_lis_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class DigitLISCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/discrete_logarithm/__init__.py b/server/Gym/parameter_controllers/discrete_logarithm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..98be7a2b88a7cc9416d639661afb06f60bb955c5 --- /dev/null +++ b/server/Gym/parameter_controllers/discrete_logarithm/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DiscreteLogarithm_ParameterController \ No newline at end of file diff --git a/server/Gym/parameter_controllers/discrete_logarithm/parameter_controller.py b/server/Gym/parameter_controllers/discrete_logarithm/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9f0ad762f6bbed506a9dc33279e7464cf84531e0 --- /dev/null +++ b/server/Gym/parameter_controllers/discrete_logarithm/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class DiscreteLogarithm_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_Z = 10 + + def update(self) -> None : + self.MAX_Z = int(self.MAX_Z * 1.2 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_Z = self.MAX_Z)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/disinfection/__init__.py b/server/Gym/parameter_controllers/disinfection/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b93b5eda9bb291d13d96f24569f840be5ffd8d35 --- /dev/null +++ b/server/Gym/parameter_controllers/disinfection/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Disinfection_ParameterController diff --git a/server/Gym/parameter_controllers/disinfection/parameter_controller.py b/server/Gym/parameter_controllers/disinfection/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f7a5ec316f8258f6e2d5bf26e29fe8af5ab5f4d3 --- /dev/null +++ b/server/Gym/parameter_controllers/disinfection/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Disinfection_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_A_B_C = 2 + + def update(self) -> None : + self.MAX_A_B_C += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_A_B_C = self.MAX_A_B_C)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/distinct_array_permutation/__init__.py b/server/Gym/parameter_controllers/distinct_array_permutation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..152411d21e3e0640b5f1641d5e3148cc9c817428 --- /dev/null +++ b/server/Gym/parameter_controllers/distinct_array_permutation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DistinctArrayPermutation_ParameterController \ No newline at end of file diff --git a/server/Gym/parameter_controllers/distinct_array_permutation/parameter_controller.py b/server/Gym/parameter_controllers/distinct_array_permutation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..1aba5bbc925ac07ff166715054bdafdc61e00125 --- /dev/null +++ b/server/Gym/parameter_controllers/distinct_array_permutation/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class DistinctArrayPermutation_ParameterController(ParameterController): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None: + self.N += 1 + self.N = min(self.N, 22) + + def get_parameter_list(self) -> List[Dict]: + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/distinct_edge_colored_complete_graph_counting/__init__.py b/server/Gym/parameter_controllers/distinct_edge_colored_complete_graph_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9b33ace87e4635d82bdb63bbf1e5b6ab830eae5d --- /dev/null +++ b/server/Gym/parameter_controllers/distinct_edge_colored_complete_graph_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DistinctEdgeColoredCompleteGraphCounting_ParameterController diff --git a/server/Gym/parameter_controllers/distinct_edge_colored_complete_graph_counting/parameter_controller.py b/server/Gym/parameter_controllers/distinct_edge_colored_complete_graph_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2a1d6ce8ec84f06c7fd579e5c8f3bd50eb1c51c5 --- /dev/null +++ b/server/Gym/parameter_controllers/distinct_edge_colored_complete_graph_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class DistinctEdgeColoredCompleteGraphCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/division/__init__.py b/server/Gym/parameter_controllers/division/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f4e6755699f48d262f89cb52fe68fa5a4c501278 --- /dev/null +++ b/server/Gym/parameter_controllers/division/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Division_ParameterController diff --git a/server/Gym/parameter_controllers/division/parameter_controller.py b/server/Gym/parameter_controllers/division/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8ee1381f9f733f8cdb036d5e053e76d7f0a5cf60 --- /dev/null +++ b/server/Gym/parameter_controllers/division/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Division_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.digit_num = 1 + + def update(self) -> None : + self.digit_num += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(divisor_digit_num = self.digit_num, answer_digit_num = self.digit_num)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/divisor_flip_expectation/__init__.py b/server/Gym/parameter_controllers/divisor_flip_expectation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b346028acf7511669efc5fea347838270d616e46 --- /dev/null +++ b/server/Gym/parameter_controllers/divisor_flip_expectation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DivisorFlipExpectation_ParameterController diff --git a/server/Gym/parameter_controllers/divisor_flip_expectation/parameter_controller.py b/server/Gym/parameter_controllers/divisor_flip_expectation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..3e4367ac5c37948d71b6e0e35b164d7847f93e53 --- /dev/null +++ b/server/Gym/parameter_controllers/divisor_flip_expectation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class DivisorFlipExpectation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/double_cross_counting/__init__.py b/server/Gym/parameter_controllers/double_cross_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..98071c8e2618db4e170c115e8f15cb3311a5ff25 --- /dev/null +++ b/server/Gym/parameter_controllers/double_cross_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DoubleCrossCounting_ParameterController diff --git a/server/Gym/parameter_controllers/double_cross_counting/parameter_controller.py b/server/Gym/parameter_controllers/double_cross_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..417ed1e04b3316a4632ffd0b802a62e9054045e4 --- /dev/null +++ b/server/Gym/parameter_controllers/double_cross_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class DoubleCrossCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 5 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/double_palindromic_string_counting/__init__.py b/server/Gym/parameter_controllers/double_palindromic_string_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6d97c6505ebd368ca5a35b22ad69993fc32e1798 --- /dev/null +++ b/server/Gym/parameter_controllers/double_palindromic_string_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DoublePalindromicStringCounting_ParameterController diff --git a/server/Gym/parameter_controllers/double_palindromic_string_counting/parameter_controller.py b/server/Gym/parameter_controllers/double_palindromic_string_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..1c7383c7b1f771dc71e6c6d5816e72f122eace77 --- /dev/null +++ b/server/Gym/parameter_controllers/double_palindromic_string_counting/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class DoublePalindromicStringCounting_ParameterController(ParameterController) : + def __init__(self, C_List : Optional[List[int]] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 3 + self.C_List = C_List if C_List is not None else [2, 3, 4, 5] + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, C = C) for C in self.C_List] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/double_stack_sorting/__init__.py b/server/Gym/parameter_controllers/double_stack_sorting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..012d35031383f152427e8ca3c6d11e889dbc3df7 --- /dev/null +++ b/server/Gym/parameter_controllers/double_stack_sorting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DoubleStackSorting_ParameterController diff --git a/server/Gym/parameter_controllers/double_stack_sorting/parameter_controller.py b/server/Gym/parameter_controllers/double_stack_sorting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9296cb3df20e886bf5de744bb268554a8b672044 --- /dev/null +++ b/server/Gym/parameter_controllers/double_stack_sorting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class DoubleStackSorting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/dyn_dynamite/__init__.py b/server/Gym/parameter_controllers/dyn_dynamite/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..095773d05c653f9cf36bcc6bcf9b7fa8b4cd1f08 --- /dev/null +++ b/server/Gym/parameter_controllers/dyn_dynamite/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import DynDynamite_ParameterController diff --git a/server/Gym/parameter_controllers/dyn_dynamite/parameter_controller.py b/server/Gym/parameter_controllers/dyn_dynamite/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..7eb4d8ebbd83eb92403a6023a8bfd2857c7e29f2 --- /dev/null +++ b/server/Gym/parameter_controllers/dyn_dynamite/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class DynDynamite_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/eight_digit_puzzle/__init__.py b/server/Gym/parameter_controllers/eight_digit_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eba4991dcd3d76f2ad812f8b3efb747462ab9dda --- /dev/null +++ b/server/Gym/parameter_controllers/eight_digit_puzzle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import EightDigitPuzzle_ParameterController diff --git a/server/Gym/parameter_controllers/eight_digit_puzzle/parameter_controller.py b/server/Gym/parameter_controllers/eight_digit_puzzle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..188a60e19b6f0c5a82ea4ec837ca2b83af5d1fd8 --- /dev/null +++ b/server/Gym/parameter_controllers/eight_digit_puzzle/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class EightDigitPuzzle_ParameterController(ParameterController) : + def __init__(self, steps_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + self.steps_list = [2, 3, 5, 10, 15, 20, 25, 30] if steps_list is None else steps_list + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M, steps = steps) for steps in self.steps_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/emperor_worries/__init__.py b/server/Gym/parameter_controllers/emperor_worries/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e134770540ab1bfdc60f56c2960f1645b1de3319 --- /dev/null +++ b/server/Gym/parameter_controllers/emperor_worries/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import EmperorWorries_ParameterController diff --git a/server/Gym/parameter_controllers/emperor_worries/parameter_controller.py b/server/Gym/parameter_controllers/emperor_worries/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..14ac4f0f83ac83bd6b04c4d55ba202f9b3a27812 --- /dev/null +++ b/server/Gym/parameter_controllers/emperor_worries/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class EmperorWorries_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.K = 1 + + def update(self) -> None : + self.K += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(K = self.K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/energy_storage_meter/__init__.py b/server/Gym/parameter_controllers/energy_storage_meter/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e9e7007d1a0009bf59d93dde0c5270a9b4f05a00 --- /dev/null +++ b/server/Gym/parameter_controllers/energy_storage_meter/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import EnergyStorageMeter_ParameterController diff --git a/server/Gym/parameter_controllers/energy_storage_meter/parameter_controller.py b/server/Gym/parameter_controllers/energy_storage_meter/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0bd02916a678686f9b97206b917eafd41de71f52 --- /dev/null +++ b/server/Gym/parameter_controllers/energy_storage_meter/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class EnergyStorageMeter_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 4 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/euclid_game/__init__.py b/server/Gym/parameter_controllers/euclid_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1bcc447c9355e21c953abb99d1cbe60d02d309ad --- /dev/null +++ b/server/Gym/parameter_controllers/euclid_game/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import EuclidGame_ParameterController diff --git a/server/Gym/parameter_controllers/euclid_game/parameter_controller.py b/server/Gym/parameter_controllers/euclid_game/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2c64f45865581dc4fe8128ec6ef0e25459d5e282 --- /dev/null +++ b/server/Gym/parameter_controllers/euclid_game/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class EuclidGame_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_X_Y = 16 + + def update(self) -> None : + self.MAX_X_Y *= 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_X_Y = self.MAX_X_Y)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/even_degree_graph_partitioning/__init__.py b/server/Gym/parameter_controllers/even_degree_graph_partitioning/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..991756cbdf821c2b1ed2e89fb97f287fd07b9a3d --- /dev/null +++ b/server/Gym/parameter_controllers/even_degree_graph_partitioning/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import EvenDegreeGraphPartitioning_ParameterController diff --git a/server/Gym/parameter_controllers/even_degree_graph_partitioning/parameter_controller.py b/server/Gym/parameter_controllers/even_degree_graph_partitioning/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..42aa1bac55031bbbec9a2142a684d5f1fdb63b3c --- /dev/null +++ b/server/Gym/parameter_controllers/even_degree_graph_partitioning/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class EvenDegreeGraphPartitioning_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/expression_adding_parenthese_counting/__init__.py b/server/Gym/parameter_controllers/expression_adding_parenthese_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f5c42f652718c1a0a307e71fa0a538830b79a491 --- /dev/null +++ b/server/Gym/parameter_controllers/expression_adding_parenthese_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Expression_AddingParenthese_Counting_ParameterController \ No newline at end of file diff --git a/server/Gym/parameter_controllers/expression_adding_parenthese_counting/parameter_controller.py b/server/Gym/parameter_controllers/expression_adding_parenthese_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8c3f0299e107f792dc84e2951169170af327fa6a --- /dev/null +++ b/server/Gym/parameter_controllers/expression_adding_parenthese_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Expression_AddingParenthese_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.num_operands = 3 + + def update(self) -> None : + self.num_operands += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(num_operands = self.num_operands)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/face_right_way/__init__.py b/server/Gym/parameter_controllers/face_right_way/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..df3694ccd312297bfd426428d95ccd4f15923964 --- /dev/null +++ b/server/Gym/parameter_controllers/face_right_way/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import FaceRightWay_ParameterController diff --git a/server/Gym/parameter_controllers/face_right_way/parameter_controller.py b/server/Gym/parameter_controllers/face_right_way/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..b470153e88c3d5757405618ee6c484bbda42144f --- /dev/null +++ b/server/Gym/parameter_controllers/face_right_way/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class FaceRightWay_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/factorial_trailing_zero_count/__init__.py b/server/Gym/parameter_controllers/factorial_trailing_zero_count/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f1fd6a46bc455def54e179f993ea4eea1589ccf7 --- /dev/null +++ b/server/Gym/parameter_controllers/factorial_trailing_zero_count/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import FactorialTrailingZeroCount_ParameterController diff --git a/server/Gym/parameter_controllers/factorial_trailing_zero_count/parameter_controller.py b/server/Gym/parameter_controllers/factorial_trailing_zero_count/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f5fe8ae253ef33a13cfaf9bf5e7c7aed7764fbf9 --- /dev/null +++ b/server/Gym/parameter_controllers/factorial_trailing_zero_count/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class FactorialTrailingZeroCount_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_K = 10 + + def update(self) -> None : + self.MAX_N_K = int(self.MAX_N_K * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_K = self.MAX_N_K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/fbi_binary_tree/__init__.py b/server/Gym/parameter_controllers/fbi_binary_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8372da76c7e2ae6aba7c831fb7cdeacdde3e972f --- /dev/null +++ b/server/Gym/parameter_controllers/fbi_binary_tree/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import FBI_BinaryTree_ParameterController diff --git a/server/Gym/parameter_controllers/fbi_binary_tree/parameter_controller.py b/server/Gym/parameter_controllers/fbi_binary_tree/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..bde7618280ea1359a3d5d416bfff1bd3345b2b0d --- /dev/null +++ b/server/Gym/parameter_controllers/fbi_binary_tree/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class FBI_BinaryTree_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/fibonacci/__init__.py b/server/Gym/parameter_controllers/fibonacci/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b370cc0626ad8e0baeb5e7fa8ea45a0e7b4b7074 --- /dev/null +++ b/server/Gym/parameter_controllers/fibonacci/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Fibonacci_ParameterController diff --git a/server/Gym/parameter_controllers/fibonacci/parameter_controller.py b/server/Gym/parameter_controllers/fibonacci/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..502dd10df53ea562af2f8985589b4dbb039a1ec5 --- /dev/null +++ b/server/Gym/parameter_controllers/fibonacci/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Fibonacci_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 3 + + def update(self) -> None : + self.MAX_N *= 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/fibonacci_containing_counting/__init__.py b/server/Gym/parameter_controllers/fibonacci_containing_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..05b0328881a35a2df3078eef59694cddabfe3766 --- /dev/null +++ b/server/Gym/parameter_controllers/fibonacci_containing_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import FibonacciContainingCounting_ParameterController diff --git a/server/Gym/parameter_controllers/fibonacci_containing_counting/parameter_controller.py b/server/Gym/parameter_controllers/fibonacci_containing_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8392f5d0c023e9a3e1d86717e59ebf278dbf410c --- /dev/null +++ b/server/Gym/parameter_controllers/fibonacci_containing_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class FibonacciContainingCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_K = 10 + + def update(self) -> None : + self.MAX_K = int(self.MAX_K * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_K = self.MAX_K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/fibtrain/__init__.py b/server/Gym/parameter_controllers/fibtrain/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..970508b9e83bb0151810a8df7d02ba6fcd62a86e --- /dev/null +++ b/server/Gym/parameter_controllers/fibtrain/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Fibtrain_ParameterController diff --git a/server/Gym/parameter_controllers/fibtrain/parameter_controller.py b/server/Gym/parameter_controllers/fibtrain/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2478e97d33b1037011821efdcb9bcc32b0bf84fa --- /dev/null +++ b/server/Gym/parameter_controllers/fibtrain/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Fibtrain_ParameterController(ParameterController) : + def __init__(self, MAX_A_B = 20, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 5 + self.MAX_A_B = MAX_A_B + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, MAX_A_B = self.MAX_A_B)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/firework_show/__init__.py b/server/Gym/parameter_controllers/firework_show/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..76d3816d581f1686b051f70f6cb4e996b1fa1128 --- /dev/null +++ b/server/Gym/parameter_controllers/firework_show/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import FireworkShow_ParameterController diff --git a/server/Gym/parameter_controllers/firework_show/parameter_controller.py b/server/Gym/parameter_controllers/firework_show/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8a55f73c4fd954fdf5647c76ee498e544709c863 --- /dev/null +++ b/server/Gym/parameter_controllers/firework_show/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class FireworkShow_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/fixed_mod_k_selection_counting/__init__.py b/server/Gym/parameter_controllers/fixed_mod_k_selection_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5bce9ebd961a1ad589ddae5275d6e5124cd355a6 --- /dev/null +++ b/server/Gym/parameter_controllers/fixed_mod_k_selection_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import FixedModK_Selection_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/fixed_mod_k_selection_counting/parameter_controller.py b/server/Gym/parameter_controllers/fixed_mod_k_selection_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..06dc7d84f2fc1437f10dd3602cff55c014f0df85 --- /dev/null +++ b/server/Gym/parameter_controllers/fixed_mod_k_selection_counting/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class FixedModK_Selection_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 8 + self.MAX_K = 5 + + def update(self) -> None : + self.MAX_N *= 2 + self.MAX_K = int(self.MAX_K * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, MAX_K = self.MAX_K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/fixed_one_edge_num_spanning_tree/__init__.py b/server/Gym/parameter_controllers/fixed_one_edge_num_spanning_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8f2e42a64f882e29f1f49791b6496de5b5301a96 --- /dev/null +++ b/server/Gym/parameter_controllers/fixed_one_edge_num_spanning_tree/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import FixedOneEdgeNum_SpanningTree_ParameterController diff --git a/server/Gym/parameter_controllers/fixed_one_edge_num_spanning_tree/parameter_controller.py b/server/Gym/parameter_controllers/fixed_one_edge_num_spanning_tree/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e1cff74e9db0688bb518bb294d0af84192543f3e --- /dev/null +++ b/server/Gym/parameter_controllers/fixed_one_edge_num_spanning_tree/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class FixedOneEdgeNum_SpanningTree_ParameterController(ParameterController) : + def __init__(self, edge_ratio_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_ratio_list is None : + edge_ratio_list = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0] + self.edge_ratio_list = edge_ratio_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_ratio = edge_ratio) for edge_ratio in self.edge_ratio_list if int(self.N * edge_ratio) <= self.N * (self.N - 1) // 2] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/fractional_programming/__init__.py b/server/Gym/parameter_controllers/fractional_programming/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..31cf72262297223d9666f5e34b6f8379d3e2bc8f --- /dev/null +++ b/server/Gym/parameter_controllers/fractional_programming/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import FractionalProgramming_ParameterController diff --git a/server/Gym/parameter_controllers/fractional_programming/parameter_controller.py b/server/Gym/parameter_controllers/fractional_programming/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2123734e17b3cf9279e15bc1c0f50e59ed9d3842 --- /dev/null +++ b/server/Gym/parameter_controllers/fractional_programming/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class FractionalProgramming_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/fractional_programming_bipartite_graph_matching/__init__.py b/server/Gym/parameter_controllers/fractional_programming_bipartite_graph_matching/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..efbcb7ae78e897c16505c9d775734d608b51bc5d --- /dev/null +++ b/server/Gym/parameter_controllers/fractional_programming_bipartite_graph_matching/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import FractionalProgramming_BipartiteGraphMatching_ParameterController diff --git a/server/Gym/parameter_controllers/fractional_programming_bipartite_graph_matching/parameter_controller.py b/server/Gym/parameter_controllers/fractional_programming_bipartite_graph_matching/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..37cdea6d37db2a96579757c0b3ad84238f063155 --- /dev/null +++ b/server/Gym/parameter_controllers/fractional_programming_bipartite_graph_matching/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class FractionalProgramming_BipartiteGraphMatching_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/futoshiki_puzzle/__init__.py b/server/Gym/parameter_controllers/futoshiki_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8c42b72bfe9e4159b35faf99808751a357832991 --- /dev/null +++ b/server/Gym/parameter_controllers/futoshiki_puzzle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import FutoshikiPuzzle_ParameterController diff --git a/server/Gym/parameter_controllers/futoshiki_puzzle/parameter_controller.py b/server/Gym/parameter_controllers/futoshiki_puzzle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..af27b9dd07aa7427efdd69028260c3aa7c9d919b --- /dev/null +++ b/server/Gym/parameter_controllers/futoshiki_puzzle/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class FutoshikiPuzzle_ParameterController(ParameterController) : + def __init__(self, sparsity_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if sparsity_list is None : + sparsity_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + self.sparsity_list = sparsity_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, sparsity = sparsity) for sparsity in self.sparsity_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/gas_fire_extinguishers/__init__.py b/server/Gym/parameter_controllers/gas_fire_extinguishers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c7b07d7e7560cd5d9bd026f98f9dd3c4d0a7a61a --- /dev/null +++ b/server/Gym/parameter_controllers/gas_fire_extinguishers/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GasFireExtinguishers_ParameterController diff --git a/server/Gym/parameter_controllers/gas_fire_extinguishers/parameter_controller.py b/server/Gym/parameter_controllers/gas_fire_extinguishers/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f9edbe317af122de28a6488aefe614455bed50e2 --- /dev/null +++ b/server/Gym/parameter_controllers/gas_fire_extinguishers/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GasFireExtinguishers_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/gaussian_elimination/__init__.py b/server/Gym/parameter_controllers/gaussian_elimination/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bf197b32e020ff14495cc7d5e28cfd3157e7ed21 --- /dev/null +++ b/server/Gym/parameter_controllers/gaussian_elimination/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GaussianElimination_ParameterController diff --git a/server/Gym/parameter_controllers/gaussian_elimination/parameter_controller.py b/server/Gym/parameter_controllers/gaussian_elimination/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c8969747e081440e81f370938c57c1f6c8f9a97d --- /dev/null +++ b/server/Gym/parameter_controllers/gaussian_elimination/parameter_controller.py @@ -0,0 +1,16 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class GaussianElimination_ParameterController(ParameterController) : + def __init__(self, M_multiple_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + if M_multiple_list is None : + M_multiple_list = [2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0] + self.M_multiple_list = M_multiple_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = int(M_multiple * self.N)) for M_multiple in self.M_multiple_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/gcd_fibonacci_product/__init__.py b/server/Gym/parameter_controllers/gcd_fibonacci_product/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..25c5cac26c96123e4c80533faa386489b4233678 --- /dev/null +++ b/server/Gym/parameter_controllers/gcd_fibonacci_product/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GCDFibonacciProduct_ParameterController diff --git a/server/Gym/parameter_controllers/gcd_fibonacci_product/parameter_controller.py b/server/Gym/parameter_controllers/gcd_fibonacci_product/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c5f4ade36570fba06958ba202331713198ed4131 --- /dev/null +++ b/server/Gym/parameter_controllers/gcd_fibonacci_product/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GCDFibonacciProduct_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 5 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/gcd_lcm_counting/__init__.py b/server/Gym/parameter_controllers/gcd_lcm_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..39f97b42294f76f11a0807a34c2d150bd19983a9 --- /dev/null +++ b/server/Gym/parameter_controllers/gcd_lcm_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GcdLcmCounting_ParameterController diff --git a/server/Gym/parameter_controllers/gcd_lcm_counting/parameter_controller.py b/server/Gym/parameter_controllers/gcd_lcm_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0e4d4adf8727999b68eac063e0190037b008356e --- /dev/null +++ b/server/Gym/parameter_controllers/gcd_lcm_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GcdLcmCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_LCM = 10 + + def update(self) -> None : + self.MAX_LCM = int(self.MAX_LCM * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_LCM = self.MAX_LCM)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/gcd_one_counting/__init__.py b/server/Gym/parameter_controllers/gcd_one_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4f678abef09b789480048ea9bae3c5c92c4c27fa --- /dev/null +++ b/server/Gym/parameter_controllers/gcd_one_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GCDOne_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/gcd_one_counting/parameter_controller.py b/server/Gym/parameter_controllers/gcd_one_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f15ec8ea05c37981253cef9154d8fa9cdc866b69 --- /dev/null +++ b/server/Gym/parameter_controllers/gcd_one_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GCDOne_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 5 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/gcd_prime_counting/__init__.py b/server/Gym/parameter_controllers/gcd_prime_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4b35819d85b8a27238c236e0c67c1e3b222f80d1 --- /dev/null +++ b/server/Gym/parameter_controllers/gcd_prime_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GCDPrime_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/gcd_prime_counting/parameter_controller.py b/server/Gym/parameter_controllers/gcd_prime_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ba2404e237b13bce6df559928b15aee2ef5a233c --- /dev/null +++ b/server/Gym/parameter_controllers/gcd_prime_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GCDPrime_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 5 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/gold_washing/__init__.py b/server/Gym/parameter_controllers/gold_washing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a29d26243109a3f307268bad46808ee850b2e6a0 --- /dev/null +++ b/server/Gym/parameter_controllers/gold_washing/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GoldWashing_ParameterController diff --git a/server/Gym/parameter_controllers/gold_washing/parameter_controller.py b/server/Gym/parameter_controllers/gold_washing/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2ad9926fff4f39a0a7e8430b1dcc15d5b36c20ba --- /dev/null +++ b/server/Gym/parameter_controllers/gold_washing/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GoldWashing_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 5 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/gra_minima_game/__init__.py b/server/Gym/parameter_controllers/gra_minima_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..57e341bf041faa2afd68af17ebc7c13f6e2383ac --- /dev/null +++ b/server/Gym/parameter_controllers/gra_minima_game/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GraMinimaGame_ParameterController diff --git a/server/Gym/parameter_controllers/gra_minima_game/parameter_controller.py b/server/Gym/parameter_controllers/gra_minima_game/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a1be98655e1c42cc89f1552b6ba881989fe582f2 --- /dev/null +++ b/server/Gym/parameter_controllers/gra_minima_game/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GraMinimaGame_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/grade_ranking_counting/__init__.py b/server/Gym/parameter_controllers/grade_ranking_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eaccc52db6a7675db852263c62cee94d7003ece9 --- /dev/null +++ b/server/Gym/parameter_controllers/grade_ranking_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GradeRankingCounting_ParameterController diff --git a/server/Gym/parameter_controllers/grade_ranking_counting/parameter_controller.py b/server/Gym/parameter_controllers/grade_ranking_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..26911d64b0593bff31f9f2ca4a1e05e860f73039 --- /dev/null +++ b/server/Gym/parameter_controllers/grade_ranking_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GradeRankingCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 2 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/graph_contain_tree_counting/__init__.py b/server/Gym/parameter_controllers/graph_contain_tree_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e63f975a7b347818c8c4236e3124dba2efe0bf93 --- /dev/null +++ b/server/Gym/parameter_controllers/graph_contain_tree_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GraphContainTreeCounting_ParameterController diff --git a/server/Gym/parameter_controllers/graph_contain_tree_counting/parameter_controller.py b/server/Gym/parameter_controllers/graph_contain_tree_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..5acba16bc9cce111257cc55a9aa12d78150ebc2e --- /dev/null +++ b/server/Gym/parameter_controllers/graph_contain_tree_counting/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class GraphContainTreeCounting_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) >= self.N - 1] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/graph_isomorphism/__init__.py b/server/Gym/parameter_controllers/graph_isomorphism/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7be4d7324785cc04906bbed6902e13c18206aaf5 --- /dev/null +++ b/server/Gym/parameter_controllers/graph_isomorphism/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GraphIsomorphism_ParameterController diff --git a/server/Gym/parameter_controllers/graph_isomorphism/parameter_controller.py b/server/Gym/parameter_controllers/graph_isomorphism/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..1f918d6ef199b8fd7ad8236fac52e7aa52581ce3 --- /dev/null +++ b/server/Gym/parameter_controllers/graph_isomorphism/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class GraphIsomorphism_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if edge_density_list is None : + edge_density_list = [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/grid_bfs/__init__.py b/server/Gym/parameter_controllers/grid_bfs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3dde1a4f5b13f8e144c7f9cdb609e83a98869ee4 --- /dev/null +++ b/server/Gym/parameter_controllers/grid_bfs/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GridBFS_ParameterController diff --git a/server/Gym/parameter_controllers/grid_bfs/parameter_controller.py b/server/Gym/parameter_controllers/grid_bfs/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..29b60e5f16016ecad25c413ac37217d6d4755df6 --- /dev/null +++ b/server/Gym/parameter_controllers/grid_bfs/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GridBFS_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 4 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/grid_coloring_counting/__init__.py b/server/Gym/parameter_controllers/grid_coloring_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1cedddcacd11660fbab4d4a9d1064350aea75b19 --- /dev/null +++ b/server/Gym/parameter_controllers/grid_coloring_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GridColoringCounting_ParameterController diff --git a/server/Gym/parameter_controllers/grid_coloring_counting/parameter_controller.py b/server/Gym/parameter_controllers/grid_coloring_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a273815b4fe534737de0234256244942e9979050 --- /dev/null +++ b/server/Gym/parameter_controllers/grid_coloring_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GridColoringCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/grid_component/__init__.py b/server/Gym/parameter_controllers/grid_component/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a3d181a2ab03a0ac4e1cc0f7005758f34ffc1884 --- /dev/null +++ b/server/Gym/parameter_controllers/grid_component/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GridComponent_ParameterController diff --git a/server/Gym/parameter_controllers/grid_component/parameter_controller.py b/server/Gym/parameter_controllers/grid_component/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..d7deebc1856f6bfe5641ea5621b056246bc590ff --- /dev/null +++ b/server/Gym/parameter_controllers/grid_component/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GridComponent_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 4 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/grid_local_minimum_counting/__init__.py b/server/Gym/parameter_controllers/grid_local_minimum_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fbdd0e58d3ddda16f0da699c54bc9885ca0fed13 --- /dev/null +++ b/server/Gym/parameter_controllers/grid_local_minimum_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GridLocalMinimumCounting_ParameterController diff --git a/server/Gym/parameter_controllers/grid_local_minimum_counting/parameter_controller.py b/server/Gym/parameter_controllers/grid_local_minimum_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f394ae6183a2dfd61bcd03ad939bb69a3f6c96b7 --- /dev/null +++ b/server/Gym/parameter_controllers/grid_local_minimum_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GridLocalMinimumCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/grid_parity_construction/__init__.py b/server/Gym/parameter_controllers/grid_parity_construction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b345d3af0e7087ec9adf4e4b4a7c939c6deef7bb --- /dev/null +++ b/server/Gym/parameter_controllers/grid_parity_construction/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GridParityConstruction_ParameterController diff --git a/server/Gym/parameter_controllers/grid_parity_construction/parameter_controller.py b/server/Gym/parameter_controllers/grid_parity_construction/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..7407f7bd95282bce1d019fa874a7f3b773d8f256 --- /dev/null +++ b/server/Gym/parameter_controllers/grid_parity_construction/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GridParityConstruction_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/grid_triangle_counting/__init__.py b/server/Gym/parameter_controllers/grid_triangle_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9d42efc50664e37e8025c2ae838d52b4dd1b0602 --- /dev/null +++ b/server/Gym/parameter_controllers/grid_triangle_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import GridTriangleCounting_ParameterController diff --git a/server/Gym/parameter_controllers/grid_triangle_counting/parameter_controller.py b/server/Gym/parameter_controllers/grid_triangle_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..869b7830f9f0d3e3d2eacef013a04c657e1c70fe --- /dev/null +++ b/server/Gym/parameter_controllers/grid_triangle_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class GridTriangleCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 5 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/halving_chain_counting/__init__.py b/server/Gym/parameter_controllers/halving_chain_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..89c8652881db09b761a4476671f7720b1e8ca494 --- /dev/null +++ b/server/Gym/parameter_controllers/halving_chain_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import HalvingChainCounting_ParameterController diff --git a/server/Gym/parameter_controllers/halving_chain_counting/parameter_controller.py b/server/Gym/parameter_controllers/halving_chain_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..14399712e9011c28e8e3eba141cfc136856404c6 --- /dev/null +++ b/server/Gym/parameter_controllers/halving_chain_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class HalvingChainCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 16 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/hamiltonian_path/__init__.py b/server/Gym/parameter_controllers/hamiltonian_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cb087661ae63ca1e797626426fcab8301bc7d3b1 --- /dev/null +++ b/server/Gym/parameter_controllers/hamiltonian_path/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import HamiltonianPath_ParameterController diff --git a/server/Gym/parameter_controllers/hamiltonian_path/parameter_controller.py b/server/Gym/parameter_controllers/hamiltonian_path/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2f765cefa8d23afeeb724c1fead835ebf3821352 --- /dev/null +++ b/server/Gym/parameter_controllers/hamiltonian_path/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class HamiltonianPath_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if edge_density_list is None : + edge_density_list = [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1))>= self.N] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/hamiltonian_path_existence/__init__.py b/server/Gym/parameter_controllers/hamiltonian_path_existence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d2453caf9d399be3b904f0422209a578f650b67c --- /dev/null +++ b/server/Gym/parameter_controllers/hamiltonian_path_existence/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import HamiltonianPathExistence_ParameterController diff --git a/server/Gym/parameter_controllers/hamiltonian_path_existence/parameter_controller.py b/server/Gym/parameter_controllers/hamiltonian_path_existence/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..77a57a2fcaa4a86e1932865fa5b7f5f8d3e1f687 --- /dev/null +++ b/server/Gym/parameter_controllers/hamiltonian_path_existence/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class HamiltonianPathExistence_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if edge_density_list is None : + edge_density_list = [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1))>= self.N] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/heap_counting/__init__.py b/server/Gym/parameter_controllers/heap_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a3efd92653c00041185388d775faccf1056c29cc --- /dev/null +++ b/server/Gym/parameter_controllers/heap_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import HeapCounting_ParameterController diff --git a/server/Gym/parameter_controllers/heap_counting/parameter_controller.py b/server/Gym/parameter_controllers/heap_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..7e1d98cd439d1e6a471fbd340afdc864e5a734fa --- /dev/null +++ b/server/Gym/parameter_controllers/heap_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class HeapCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 10 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/hitori_puzzle/__init__.py b/server/Gym/parameter_controllers/hitori_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ecddeb977d12288c0567c30592f358964680e5a2 --- /dev/null +++ b/server/Gym/parameter_controllers/hitori_puzzle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import HitoriPuzzle_ParameterController diff --git a/server/Gym/parameter_controllers/hitori_puzzle/parameter_controller.py b/server/Gym/parameter_controllers/hitori_puzzle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2ac55f923487a108bfddb9d6814e1923418be841 --- /dev/null +++ b/server/Gym/parameter_controllers/hitori_puzzle/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class HitoriPuzzle_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 4 + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/hungry_rabbit/__init__.py b/server/Gym/parameter_controllers/hungry_rabbit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b237aafd8cd3bc9c1c534d2a499b4271ba1ca69f --- /dev/null +++ b/server/Gym/parameter_controllers/hungry_rabbit/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import HungryRabbit_ParameterController diff --git a/server/Gym/parameter_controllers/hungry_rabbit/parameter_controller.py b/server/Gym/parameter_controllers/hungry_rabbit/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9ef0be9ddcdd6fe6e12075a09e199d38f0d5c35f --- /dev/null +++ b/server/Gym/parameter_controllers/hungry_rabbit/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class HungryRabbit_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 4 + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/hur_warehouse_store/__init__.py b/server/Gym/parameter_controllers/hur_warehouse_store/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d1677fd06d20c0f6e3b8c93d3d945aef925edba3 --- /dev/null +++ b/server/Gym/parameter_controllers/hur_warehouse_store/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import HURWarehouseStore_ParameterController diff --git a/server/Gym/parameter_controllers/hur_warehouse_store/parameter_controller.py b/server/Gym/parameter_controllers/hur_warehouse_store/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..19ca818f39729c810c2d1f678036798b5b200b8a --- /dev/null +++ b/server/Gym/parameter_controllers/hur_warehouse_store/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class HURWarehouseStore_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/imp_party/__init__.py b/server/Gym/parameter_controllers/imp_party/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d30466296abf77b5da80d0059cd1811d237031f0 --- /dev/null +++ b/server/Gym/parameter_controllers/imp_party/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ImpParty_ParameterController diff --git a/server/Gym/parameter_controllers/imp_party/parameter_controller.py b/server/Gym/parameter_controllers/imp_party/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c916429bedaa5b6c34c535947b15ed1d3d1265da --- /dev/null +++ b/server/Gym/parameter_controllers/imp_party/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class ImpParty_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/individual_sum_bounded_sequence_counting/__init__.py b/server/Gym/parameter_controllers/individual_sum_bounded_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ad2277a298fef603fd039697f0727ea84db452bf --- /dev/null +++ b/server/Gym/parameter_controllers/individual_sum_bounded_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import IndividualSumBounded_SequenceCounting_ParameterController diff --git a/server/Gym/parameter_controllers/individual_sum_bounded_sequence_counting/parameter_controller.py b/server/Gym/parameter_controllers/individual_sum_bounded_sequence_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..46ad4385d8cdab2f052c8ecc3c57ffd7cf893244 --- /dev/null +++ b/server/Gym/parameter_controllers/individual_sum_bounded_sequence_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class IndividualSumBounded_SequenceCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 4 + + def update(self) -> None : + self.MAX_N *= 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/integer_factorization_counting/__init__.py b/server/Gym/parameter_controllers/integer_factorization_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..09062f639a024040d751e9854d670ae95d200079 --- /dev/null +++ b/server/Gym/parameter_controllers/integer_factorization_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import IntegerFactorizationCounting_ParameterController diff --git a/server/Gym/parameter_controllers/integer_factorization_counting/parameter_controller.py b/server/Gym/parameter_controllers/integer_factorization_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..eab91f4bf516a911ec4b7dd507bec205c6dd013d --- /dev/null +++ b/server/Gym/parameter_controllers/integer_factorization_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class IntegerFactorizationCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 16 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/integer_programming/__init__.py b/server/Gym/parameter_controllers/integer_programming/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c2083df0650538e53262912d713e23c5dd28fa6c --- /dev/null +++ b/server/Gym/parameter_controllers/integer_programming/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import IntegerProgramming_ParameterController diff --git a/server/Gym/parameter_controllers/integer_programming/parameter_controller.py b/server/Gym/parameter_controllers/integer_programming/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..289e0aea4347de9ccb695a73ded44d3f25a79248 --- /dev/null +++ b/server/Gym/parameter_controllers/integer_programming/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class IntegerProgramming_ParameterController(ParameterController) : + def __init__(self, M_multiple_list : Optional[List[float]] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + self.M_multiple = M_multiple_list if M_multiple_list is not None else [1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0] + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = int(M_multiple * self.N)) for M_multiple in self.M_multiple] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/integral/__init__.py b/server/Gym/parameter_controllers/integral/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e25fa398b87b826bdb0e63da041b776fb5c9b428 --- /dev/null +++ b/server/Gym/parameter_controllers/integral/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Integral_ParameterController diff --git a/server/Gym/parameter_controllers/integral/parameter_controller.py b/server/Gym/parameter_controllers/integral/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2524da91e60541bab0b93da1e38269caf543f5df --- /dev/null +++ b/server/Gym/parameter_controllers/integral/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + + +class Integral_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.node_num = 2 + + def update(self) -> None : + self.node_num += 1 + + def get_parameter_list(self) -> List[Dict] : + return [{"node_num" : self.node_num}] diff --git a/server/Gym/parameter_controllers/inversion_pair/__init__.py b/server/Gym/parameter_controllers/inversion_pair/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4abcdb1d9f16a71a2ab6862b7ecca36d1be185c4 --- /dev/null +++ b/server/Gym/parameter_controllers/inversion_pair/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import InversionPair_ParameterController diff --git a/server/Gym/parameter_controllers/inversion_pair/parameter_controller.py b/server/Gym/parameter_controllers/inversion_pair/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ac337fdd47002fb48853a7ea154625561b5dfdf6 --- /dev/null +++ b/server/Gym/parameter_controllers/inversion_pair/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class InversionPair_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/inversion_pair_k_counting/__init__.py b/server/Gym/parameter_controllers/inversion_pair_k_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0a98920d0f32b2167226357b2a0b252b6f4233df --- /dev/null +++ b/server/Gym/parameter_controllers/inversion_pair_k_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import InversionPairK_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/inversion_pair_k_counting/parameter_controller.py b/server/Gym/parameter_controllers/inversion_pair_k_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..cf7f7fe896c3c71fc564393f0b41e9a91850167a --- /dev/null +++ b/server/Gym/parameter_controllers/inversion_pair_k_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class InversionPairK_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/josephus/__init__.py b/server/Gym/parameter_controllers/josephus/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6f74ac9bdba32cf3ede5682b8e958a8aa1a105d7 --- /dev/null +++ b/server/Gym/parameter_controllers/josephus/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Josephus_ParameterController diff --git a/server/Gym/parameter_controllers/josephus/parameter_controller.py b/server/Gym/parameter_controllers/josephus/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..81bea803e2502c2c0e9e6698bfd869ddc0804de5 --- /dev/null +++ b/server/Gym/parameter_controllers/josephus/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Josephus_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 3 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/jug_puzzle/__init__.py b/server/Gym/parameter_controllers/jug_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e5e5e369773ca361f1f6f735b0c7d42c61c2732b --- /dev/null +++ b/server/Gym/parameter_controllers/jug_puzzle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import JugPuzzle_ParameterController diff --git a/server/Gym/parameter_controllers/jug_puzzle/parameter_controller.py b/server/Gym/parameter_controllers/jug_puzzle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..cf9f614ce5cdd9b0937136e19f23eff639405686 --- /dev/null +++ b/server/Gym/parameter_controllers/jug_puzzle/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class JugPuzzle_ParameterController(ParameterController) : + def __init__(self, steps_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + self.steps_list = [3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30, 35, 40, 45, 50] if steps_list is None else steps_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, steps = steps) for steps in self.steps_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/k_partition/__init__.py b/server/Gym/parameter_controllers/k_partition/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4360293aaac33c40224b7d7f3ee944fb4abd8621 --- /dev/null +++ b/server/Gym/parameter_controllers/k_partition/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import KPartition_ParameterController diff --git a/server/Gym/parameter_controllers/k_partition/parameter_controller.py b/server/Gym/parameter_controllers/k_partition/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ba1a165fb82cf9eb6ec7b3015b67ce884a4159d3 --- /dev/null +++ b/server/Gym/parameter_controllers/k_partition/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class KPartition_ParameterController(ParameterController) : + def __init__(self, Ks : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.M = 2 # N//K + if Ks is None : + self.Ks = [2, 3, 4, 5] + else : + self.Ks = Ks + + def update(self) -> None : + self.M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.M * K, K = K) for K in self.Ks] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/kakurasu/__init__.py b/server/Gym/parameter_controllers/kakurasu/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f0b66963e9fa441ce832fb6b7e435c4a65a59043 --- /dev/null +++ b/server/Gym/parameter_controllers/kakurasu/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Kakurasu_ParameterController diff --git a/server/Gym/parameter_controllers/kakurasu/parameter_controller.py b/server/Gym/parameter_controllers/kakurasu/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0921e05d7be6dd3ca4d4188817d432cfeb092b11 --- /dev/null +++ b/server/Gym/parameter_controllers/kakurasu/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Kakurasu_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/kidding_me/__init__.py b/server/Gym/parameter_controllers/kidding_me/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cf16e6246cc42435dcb1abbea9967a1d19e1f4e0 --- /dev/null +++ b/server/Gym/parameter_controllers/kidding_me/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import KiddingMe_ParameterController diff --git a/server/Gym/parameter_controllers/kidding_me/parameter_controller.py b/server/Gym/parameter_controllers/kidding_me/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..3ffbd12e120c6fe477fb7636a67542439c6ac631 --- /dev/null +++ b/server/Gym/parameter_controllers/kidding_me/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class KiddingMe_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/king_sorting/__init__.py b/server/Gym/parameter_controllers/king_sorting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fbc52ac18fb381cee12bd77475093e4044880b36 --- /dev/null +++ b/server/Gym/parameter_controllers/king_sorting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import KingSorting_ParameterController diff --git a/server/Gym/parameter_controllers/king_sorting/parameter_controller.py b/server/Gym/parameter_controllers/king_sorting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2e5112e5084dc23c087b6b9fe953317abdfce57e --- /dev/null +++ b/server/Gym/parameter_controllers/king_sorting/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class KingSorting_ParameterController(ParameterController) : + def __init__(self, MAX_A_B : int = 10, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + self.MAX_A_B = MAX_A_B + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, MAX_A_B = self.MAX_A_B)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/klo_blocks/__init__.py b/server/Gym/parameter_controllers/klo_blocks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bfe62ac96e912cf455c98c16966e4c1fbdf53f5e --- /dev/null +++ b/server/Gym/parameter_controllers/klo_blocks/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import KloBlocks_ParameterController diff --git a/server/Gym/parameter_controllers/klo_blocks/parameter_controller.py b/server/Gym/parameter_controllers/klo_blocks/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a9c839db216908bde9908929c76f1a5b133f86db --- /dev/null +++ b/server/Gym/parameter_controllers/klo_blocks/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class KloBlocks_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/knapsack/__init__.py b/server/Gym/parameter_controllers/knapsack/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ebe93d28c46fb14a198ee81edfb2974e4c26aca1 --- /dev/null +++ b/server/Gym/parameter_controllers/knapsack/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Knapsack_ParameterController diff --git a/server/Gym/parameter_controllers/knapsack/parameter_controller.py b/server/Gym/parameter_controllers/knapsack/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..6950b253e6c94d419320389d6da4e2faf8313381 --- /dev/null +++ b/server/Gym/parameter_controllers/knapsack/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Knapsack_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/knights_and_knaves/__init__.py b/server/Gym/parameter_controllers/knights_and_knaves/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..49b8756dc469a527e2e710794cb361d7082788b2 --- /dev/null +++ b/server/Gym/parameter_controllers/knights_and_knaves/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import KnightsAndKnaves_ParameterController \ No newline at end of file diff --git a/server/Gym/parameter_controllers/knights_and_knaves/parameter_controller.py b/server/Gym/parameter_controllers/knights_and_knaves/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..58a4ebb15ec5ec1c73ea353ab7b784914c01b88e --- /dev/null +++ b/server/Gym/parameter_controllers/knights_and_knaves/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class KnightsAndKnaves_ParameterController(ParameterController): + def __init__(self, depth_constraint: int = 2, width_constraint: int = 2, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + self.depth_constraint = depth_constraint + self.width_constraint = width_constraint + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, depth_constraint=self.depth_constraint, width_constraint=self.width_constraint)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/kos_dicing/__init__.py b/server/Gym/parameter_controllers/kos_dicing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3d0bf650a8c185bc74afcb21af060f5811dfa7c6 --- /dev/null +++ b/server/Gym/parameter_controllers/kos_dicing/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import KosDicing_ParameterController diff --git a/server/Gym/parameter_controllers/kos_dicing/parameter_controller.py b/server/Gym/parameter_controllers/kos_dicing/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e0ba5a3cb12eefd7adcc9f3c7e700b02512aea71 --- /dev/null +++ b/server/Gym/parameter_controllers/kos_dicing/parameter_controller.py @@ -0,0 +1,16 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class KosDicing_ParameterController(ParameterController) : + def __init__(self, M_multiple_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + if M_multiple_list is None : + M_multiple_list = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0] + self.M_multiple_list = M_multiple_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = int(M_multiple * self.N)) for M_multiple in self.M_multiple_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/kth_binary_tree/__init__.py b/server/Gym/parameter_controllers/kth_binary_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f36b35ff2e80fb31ad0596d18644a778c2d79a02 --- /dev/null +++ b/server/Gym/parameter_controllers/kth_binary_tree/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Kth_BinaryTree_ParameterController diff --git a/server/Gym/parameter_controllers/kth_binary_tree/parameter_controller.py b/server/Gym/parameter_controllers/kth_binary_tree/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f0e4b70ebfe123abb3718a18f21a21bfd7b24523 --- /dev/null +++ b/server/Gym/parameter_controllers/kth_binary_tree/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Kth_BinaryTree_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 3 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/kth_semi_balanced_bracket_sequence/__init__.py b/server/Gym/parameter_controllers/kth_semi_balanced_bracket_sequence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5069d023b201820fe68c6c92de85e58cdec2e072 --- /dev/null +++ b/server/Gym/parameter_controllers/kth_semi_balanced_bracket_sequence/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Kth_SemiBalancedBracketSequence_ParameterController diff --git a/server/Gym/parameter_controllers/kth_semi_balanced_bracket_sequence/parameter_controller.py b/server/Gym/parameter_controllers/kth_semi_balanced_bracket_sequence/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..6205e2dfa450e0f7ff041f23bafc870cdccfa36d --- /dev/null +++ b/server/Gym/parameter_controllers/kth_semi_balanced_bracket_sequence/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Kth_SemiBalancedBracketSequence_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/kth_subsequence/__init__.py b/server/Gym/parameter_controllers/kth_subsequence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..396fddf45a04063a7653c44f8de1f63dc74e36b9 --- /dev/null +++ b/server/Gym/parameter_controllers/kth_subsequence/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import KthSubsequence_ParameterController diff --git a/server/Gym/parameter_controllers/kth_subsequence/parameter_controller.py b/server/Gym/parameter_controllers/kth_subsequence/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9e0316a2b8d427c35c8bcd7bccbbbe107453605e --- /dev/null +++ b/server/Gym/parameter_controllers/kth_subsequence/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class KthSubsequence_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/kur/__init__.py b/server/Gym/parameter_controllers/kur/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ef412e49b51156b67ed81504aecc1f244471181e --- /dev/null +++ b/server/Gym/parameter_controllers/kur/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import KUR_ParameterController diff --git a/server/Gym/parameter_controllers/kur/parameter_controller.py b/server/Gym/parameter_controllers/kur/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..bee0867b9c6098b0a38788143c6017e935df2b6a --- /dev/null +++ b/server/Gym/parameter_controllers/kur/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class KUR_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 8 + self.MAX_M = 8 + + def update(self) -> None : + self.MAX_N *= 2 + self.MAX_M = int(self.MAX_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, MAX_M = self.MAX_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/lamp_changing/__init__.py b/server/Gym/parameter_controllers/lamp_changing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..af970022aa439c7b0a08d7efd64ca8706764e713 --- /dev/null +++ b/server/Gym/parameter_controllers/lamp_changing/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LampChanging_ParameterController diff --git a/server/Gym/parameter_controllers/lamp_changing/parameter_controller.py b/server/Gym/parameter_controllers/lamp_changing/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a26fae1cdbbdeec8364c394690158988b0b9fe95 --- /dev/null +++ b/server/Gym/parameter_controllers/lamp_changing/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class LampChanging_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_T = 8 + + def update(self) -> None : + self.MAX_N_T = int(self.MAX_N_T * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_T = self.MAX_N_T)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/land_acquisition/__init__.py b/server/Gym/parameter_controllers/land_acquisition/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5d05de076f1496ceec5b8f2aaa3c9bcb21fdfd45 --- /dev/null +++ b/server/Gym/parameter_controllers/land_acquisition/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LandAcquisition_ParameterController diff --git a/server/Gym/parameter_controllers/land_acquisition/parameter_controller.py b/server/Gym/parameter_controllers/land_acquisition/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..457c6e9b6f2d490ef20cc87727a60dcae840f690 --- /dev/null +++ b/server/Gym/parameter_controllers/land_acquisition/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class LandAcquisition_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/landform_generation_counting/__init__.py b/server/Gym/parameter_controllers/landform_generation_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a4fb8d851efcba07c4afb28d2de1dba1a72f75ff --- /dev/null +++ b/server/Gym/parameter_controllers/landform_generation_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LandformGenerationCounting_ParameterController diff --git a/server/Gym/parameter_controllers/landform_generation_counting/parameter_controller.py b/server/Gym/parameter_controllers/landform_generation_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..237b32073c43ecdd6c5397491b74cd284727b33d --- /dev/null +++ b/server/Gym/parameter_controllers/landform_generation_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class LandformGenerationCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/largest_convex_polygon/__init__.py b/server/Gym/parameter_controllers/largest_convex_polygon/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e4cf22cccc45321cf5fdc0289c4b87ccd675a891 --- /dev/null +++ b/server/Gym/parameter_controllers/largest_convex_polygon/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LargestConvexPolygon_ParameterController diff --git a/server/Gym/parameter_controllers/largest_convex_polygon/parameter_controller.py b/server/Gym/parameter_controllers/largest_convex_polygon/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..519a94917121fd03c54d2fda64f15998261c0449 --- /dev/null +++ b/server/Gym/parameter_controllers/largest_convex_polygon/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class LargestConvexPolygon_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N=self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/largest_rectangle_among_points/__init__.py b/server/Gym/parameter_controllers/largest_rectangle_among_points/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f64a9e3793fffceb9df17baab1fe5a7c3d83d833 --- /dev/null +++ b/server/Gym/parameter_controllers/largest_rectangle_among_points/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LargestRectangle_AmongPoints_ParameterController diff --git a/server/Gym/parameter_controllers/largest_rectangle_among_points/parameter_controller.py b/server/Gym/parameter_controllers/largest_rectangle_among_points/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ea14d0ce541e4cb76c1548cf398a7ca0a5145972 --- /dev/null +++ b/server/Gym/parameter_controllers/largest_rectangle_among_points/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class LargestRectangle_AmongPoints_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/las/__init__.py b/server/Gym/parameter_controllers/las/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8cea0d293ef74f1427ed61af5c6049ad38c2c135 --- /dev/null +++ b/server/Gym/parameter_controllers/las/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LAS_ParameterController diff --git a/server/Gym/parameter_controllers/las/parameter_controller.py b/server/Gym/parameter_controllers/las/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9bdd3ada6615d6478a609d676c12d886a09e7eff --- /dev/null +++ b/server/Gym/parameter_controllers/las/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class LAS_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/las_laser/__init__.py b/server/Gym/parameter_controllers/las_laser/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a07ef39c2ad22fd26ed0fe5b8883c5419e5847cb --- /dev/null +++ b/server/Gym/parameter_controllers/las_laser/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LASLaser_ParameterController diff --git a/server/Gym/parameter_controllers/las_laser/parameter_controller.py b/server/Gym/parameter_controllers/las_laser/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f0bbceb9605b94cdac81ecd2a08e44ce400f0315 --- /dev/null +++ b/server/Gym/parameter_controllers/las_laser/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class LASLaser_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/lcm/__init__.py b/server/Gym/parameter_controllers/lcm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ea343ba9084207a072b1c5f3c450ab393adc133c --- /dev/null +++ b/server/Gym/parameter_controllers/lcm/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LCM_ParameterController diff --git a/server/Gym/parameter_controllers/lcm/parameter_controller.py b/server/Gym/parameter_controllers/lcm/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..24808a3991aa9521fe8cbbb757d7c8aedc56d389 --- /dev/null +++ b/server/Gym/parameter_controllers/lcm/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class LCM_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_a_b = 15 + + def update(self) -> None : + self.MAX_a_b = int(self.MAX_a_b * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_a_b = self.MAX_a_b)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/lds_two_counting/__init__.py b/server/Gym/parameter_controllers/lds_two_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..312cd96dde2841f65548fe2c4a655352343a4e9c --- /dev/null +++ b/server/Gym/parameter_controllers/lds_two_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LDSTwo_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/lds_two_counting/parameter_controller.py b/server/Gym/parameter_controllers/lds_two_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f1b707648b38e66840a3c90eb279f9021b6c7c3f --- /dev/null +++ b/server/Gym/parameter_controllers/lds_two_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class LDSTwo_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/light_up_puzzle/__init__.py b/server/Gym/parameter_controllers/light_up_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..11c15ab90f1ede7ed941a79bc6bae9e4a39b8be5 --- /dev/null +++ b/server/Gym/parameter_controllers/light_up_puzzle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LightUpPuzzle_ParameterController diff --git a/server/Gym/parameter_controllers/light_up_puzzle/parameter_controller.py b/server/Gym/parameter_controllers/light_up_puzzle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e2134c6ca662d286f126f75e3202a387414b5eeb --- /dev/null +++ b/server/Gym/parameter_controllers/light_up_puzzle/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class LightUpPuzzle_ParameterController(ParameterController) : + def __init__(self, density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + if density_list is None : + density_list = [0.5, 0.6, 0.7, 0.8, 0.9, 0.95] + self.density_list = density_list + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M, density = density) for density in self.density_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/link_beads/__init__.py b/server/Gym/parameter_controllers/link_beads/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d9a7ab9b247805048fbfc30c8c5cf7be2a4fd655 --- /dev/null +++ b/server/Gym/parameter_controllers/link_beads/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LinkBeads_ParameterController diff --git a/server/Gym/parameter_controllers/link_beads/parameter_controller.py b/server/Gym/parameter_controllers/link_beads/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..6fa798afa5bd5478bd30306123af93912802ce2e --- /dev/null +++ b/server/Gym/parameter_controllers/link_beads/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class LinkBeads_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/lis_lds_concatenation/__init__.py b/server/Gym/parameter_controllers/lis_lds_concatenation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e07e9f55edf068c3f797505672b3e967a953891f --- /dev/null +++ b/server/Gym/parameter_controllers/lis_lds_concatenation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LIS_LDS_Concatenation_ParameterController diff --git a/server/Gym/parameter_controllers/lis_lds_concatenation/parameter_controller.py b/server/Gym/parameter_controllers/lis_lds_concatenation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..356630706feb4102c330de908e5aab6f4b905527 --- /dev/null +++ b/server/Gym/parameter_controllers/lis_lds_concatenation/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class LIS_LDS_Concatenation_ParameterController(ParameterController) : + def __init__(self, MAX : int = 100000, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + self.MAX = MAX + + def update(self) -> None : + self.N = int(self.N * 1.2) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, MAX = self.MAX)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/liz_lollipop/__init__.py b/server/Gym/parameter_controllers/liz_lollipop/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bdfbeab496ce8663434e27909b9a7dfc54e62e0b --- /dev/null +++ b/server/Gym/parameter_controllers/liz_lollipop/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LIZ_Lollipop_ParameterController diff --git a/server/Gym/parameter_controllers/liz_lollipop/parameter_controller.py b/server/Gym/parameter_controllers/liz_lollipop/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..213d45df4f6055945acc024fecbe172ae45f21de --- /dev/null +++ b/server/Gym/parameter_controllers/liz_lollipop/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class LIZ_Lollipop_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/longest_double_palindrome/__init__.py b/server/Gym/parameter_controllers/longest_double_palindrome/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..626bdef35f522fa57ff92b033e040d0f4e4017d9 --- /dev/null +++ b/server/Gym/parameter_controllers/longest_double_palindrome/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Longest_DoublePalindrome_ParameterController diff --git a/server/Gym/parameter_controllers/longest_double_palindrome/parameter_controller.py b/server/Gym/parameter_controllers/longest_double_palindrome/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..565dd1324e2c20efa8b568e4cbb48e1360e8cefa --- /dev/null +++ b/server/Gym/parameter_controllers/longest_double_palindrome/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Longest_DoublePalindrome_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/longest_matching_subsequence/__init__.py b/server/Gym/parameter_controllers/longest_matching_subsequence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b4d7f29b84ef7658510479f2c77e96e1ec9f2abe --- /dev/null +++ b/server/Gym/parameter_controllers/longest_matching_subsequence/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Longest_MatchingSubsequence_ParameterController diff --git a/server/Gym/parameter_controllers/longest_matching_subsequence/parameter_controller.py b/server/Gym/parameter_controllers/longest_matching_subsequence/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..3ab2045372d2d31faed6ded61015654541e2e768 --- /dev/null +++ b/server/Gym/parameter_controllers/longest_matching_subsequence/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Longest_MatchingSubsequence_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/longest_maxdiff_bounded_interval/__init__.py b/server/Gym/parameter_controllers/longest_maxdiff_bounded_interval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..812e1a3bc978a2d726f88dd125e92395ba38b4a8 --- /dev/null +++ b/server/Gym/parameter_controllers/longest_maxdiff_bounded_interval/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LongestMaxDiffBoundedInterval_ParameterController diff --git a/server/Gym/parameter_controllers/longest_maxdiff_bounded_interval/parameter_controller.py b/server/Gym/parameter_controllers/longest_maxdiff_bounded_interval/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..61310baa745ae98df7557aa3f06dd36e6f4f02ee --- /dev/null +++ b/server/Gym/parameter_controllers/longest_maxdiff_bounded_interval/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class LongestMaxDiffBoundedInterval_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/longest_path/__init__.py b/server/Gym/parameter_controllers/longest_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..408722b5598d83de31edbc205ef8fa81c396ad63 --- /dev/null +++ b/server/Gym/parameter_controllers/longest_path/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import LongestPath_ParameterController diff --git a/server/Gym/parameter_controllers/longest_path/parameter_controller.py b/server/Gym/parameter_controllers/longest_path/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8ae4419e82dd1e88d3ca29450fc5cde3d88bb413 --- /dev/null +++ b/server/Gym/parameter_controllers/longest_path/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class LongestPath_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1)) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/longest_repeated_palindrome/__init__.py b/server/Gym/parameter_controllers/longest_repeated_palindrome/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3ff383cc0d63c2211e13cd68ad54cca8759eb4a2 --- /dev/null +++ b/server/Gym/parameter_controllers/longest_repeated_palindrome/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Longest_RepeatedPalindrome_ParameterController diff --git a/server/Gym/parameter_controllers/longest_repeated_palindrome/parameter_controller.py b/server/Gym/parameter_controllers/longest_repeated_palindrome/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a20e0cceea8506f44baf9e074fa09796125e8b15 --- /dev/null +++ b/server/Gym/parameter_controllers/longest_repeated_palindrome/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Longest_RepeatedPalindrome_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/maf_mafia/__init__.py b/server/Gym/parameter_controllers/maf_mafia/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5b92c1166ae7a271f9fb755860ee7ce1a8e89255 --- /dev/null +++ b/server/Gym/parameter_controllers/maf_mafia/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MafMafia_ParameterController diff --git a/server/Gym/parameter_controllers/maf_mafia/parameter_controller.py b/server/Gym/parameter_controllers/maf_mafia/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..7f971b0ff0c9971b85e65e62f91e8217ca024c0a --- /dev/null +++ b/server/Gym/parameter_controllers/maf_mafia/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MafMafia_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/magic_square_puzzle/__init__.py b/server/Gym/parameter_controllers/magic_square_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e730b0e2f17528bd37cc55977085bc4d12cad0d0 --- /dev/null +++ b/server/Gym/parameter_controllers/magic_square_puzzle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MagicSquarePuzzle_ParameterController diff --git a/server/Gym/parameter_controllers/magic_square_puzzle/parameter_controller.py b/server/Gym/parameter_controllers/magic_square_puzzle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a16fc2764104b9fc0d7f7166d7646bee3c2014fb --- /dev/null +++ b/server/Gym/parameter_controllers/magic_square_puzzle/parameter_controller.py @@ -0,0 +1,19 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MagicSquarePuzzle_ParameterController(ParameterController) : + def __init__(self, sparsity_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if sparsity_list is None : + sparsity_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + self.sparsity_list = sparsity_list + + def update(self) -> None : + self.N += 1 + if self.N % 4 == 2 : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, sparsity = sparsity) for sparsity in self.sparsity_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/making_grade/__init__.py b/server/Gym/parameter_controllers/making_grade/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..36a39baa5b67396bb1443918c8b0f34e1068f624 --- /dev/null +++ b/server/Gym/parameter_controllers/making_grade/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MakingGrade_ParameterController diff --git a/server/Gym/parameter_controllers/making_grade/parameter_controller.py b/server/Gym/parameter_controllers/making_grade/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..486b2667377d7d4d94323db8f0b2eef6ff68dedd --- /dev/null +++ b/server/Gym/parameter_controllers/making_grade/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MakingGrade_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/matrix_binary_exponentiation/__init__.py b/server/Gym/parameter_controllers/matrix_binary_exponentiation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..15a68b6b08d5a767fa864f4c13cdacdb37673f91 --- /dev/null +++ b/server/Gym/parameter_controllers/matrix_binary_exponentiation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Matrix_BinaryExponentiation_ParameterController diff --git a/server/Gym/parameter_controllers/matrix_binary_exponentiation/parameter_controller.py b/server/Gym/parameter_controllers/matrix_binary_exponentiation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..b40bf5ae78b18c2692c347c4bb283ecaa2f9f7af --- /dev/null +++ b/server/Gym/parameter_controllers/matrix_binary_exponentiation/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Matrix_BinaryExponentiation_ParameterController(ParameterController) : + def __init__(self, N_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_K = 2 + self.N_list = N_list if N_list is not None else [2, 3, 5, 10] + + def update(self) -> None : + self.MAX_K *= 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = N, MAX_K = self.MAX_K) for N in self.N_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/matrix_permutation_both_diagonal_one/__init__.py b/server/Gym/parameter_controllers/matrix_permutation_both_diagonal_one/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9895e13c403e1dbdb9d18ccf740ff9b2aadd283f --- /dev/null +++ b/server/Gym/parameter_controllers/matrix_permutation_both_diagonal_one/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MatrixPermutation_BothDiagonalOne_ParameterController diff --git a/server/Gym/parameter_controllers/matrix_permutation_both_diagonal_one/parameter_controller.py b/server/Gym/parameter_controllers/matrix_permutation_both_diagonal_one/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ac84a40d641c0a2598c362c6947366124ae6e249 --- /dev/null +++ b/server/Gym/parameter_controllers/matrix_permutation_both_diagonal_one/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MatrixPermutation_BothDiagonalOne_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.OddN = 3 + + def update(self) -> None : + self.OddN += 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.OddN), dict(N = self.OddN + 1)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/matrix_permutation_equivalence/__init__.py b/server/Gym/parameter_controllers/matrix_permutation_equivalence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0ee55ec39dcfd27e24d25107bbd5279997867ced --- /dev/null +++ b/server/Gym/parameter_controllers/matrix_permutation_equivalence/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MatrixPermutationEquivalence_ParameterController diff --git a/server/Gym/parameter_controllers/matrix_permutation_equivalence/parameter_controller.py b/server/Gym/parameter_controllers/matrix_permutation_equivalence/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..422622a4b4160b63058a48c175f0b235fd60b00b --- /dev/null +++ b/server/Gym/parameter_controllers/matrix_permutation_equivalence/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MatrixPermutationEquivalence_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/matrix_permutation_main_diagonal_one/__init__.py b/server/Gym/parameter_controllers/matrix_permutation_main_diagonal_one/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a58e9361368225c2c8c4334d13f6f3a8eca8e106 --- /dev/null +++ b/server/Gym/parameter_controllers/matrix_permutation_main_diagonal_one/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MatrixPermutation_MainDiagonalOne_ParameterController diff --git a/server/Gym/parameter_controllers/matrix_permutation_main_diagonal_one/parameter_controller.py b/server/Gym/parameter_controllers/matrix_permutation_main_diagonal_one/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ac8a45a06174335629849552d817e48c33300da6 --- /dev/null +++ b/server/Gym/parameter_controllers/matrix_permutation_main_diagonal_one/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MatrixPermutation_MainDiagonalOne_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/matrix_pooling/__init__.py b/server/Gym/parameter_controllers/matrix_pooling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f8c37a9643716f56d3741ce90a491127fa51bd52 --- /dev/null +++ b/server/Gym/parameter_controllers/matrix_pooling/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MatrixPooling_ParameterController diff --git a/server/Gym/parameter_controllers/matrix_pooling/parameter_controller.py b/server/Gym/parameter_controllers/matrix_pooling/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9486dca0a4c51b41ddf8374463d863af2dec8992 --- /dev/null +++ b/server/Gym/parameter_controllers/matrix_pooling/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MatrixPooling_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 4 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/matrix_rmq_counting/__init__.py b/server/Gym/parameter_controllers/matrix_rmq_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4126167c8b5d5e94b9c8bd8f63624f2eb9f2dbf9 --- /dev/null +++ b/server/Gym/parameter_controllers/matrix_rmq_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MatrixRMQCounting_ParameterController diff --git a/server/Gym/parameter_controllers/matrix_rmq_counting/parameter_controller.py b/server/Gym/parameter_controllers/matrix_rmq_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..29c742f599458d4d6f92f273e8579985dbe80de3 --- /dev/null +++ b/server/Gym/parameter_controllers/matrix_rmq_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MatrixRMQCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_different_group_pair_division/__init__.py b/server/Gym/parameter_controllers/max_different_group_pair_division/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e0c2773ee82c47ba49441bb55d03564914997bfa --- /dev/null +++ b/server/Gym/parameter_controllers/max_different_group_pair_division/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxDifferentGroupPairDivision_ParameterController diff --git a/server/Gym/parameter_controllers/max_different_group_pair_division/parameter_controller.py b/server/Gym/parameter_controllers/max_different_group_pair_division/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c7ca537bb56283e6c3eb63225b622515e853df20 --- /dev/null +++ b/server/Gym/parameter_controllers/max_different_group_pair_division/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaxDifferentGroupPairDivision_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_grid_path_intersection/__init__.py b/server/Gym/parameter_controllers/max_grid_path_intersection/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7ff152852d90feec2e2683356bba16ea3f8e9a44 --- /dev/null +++ b/server/Gym/parameter_controllers/max_grid_path_intersection/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxGridPathIntersection_ParameterController diff --git a/server/Gym/parameter_controllers/max_grid_path_intersection/parameter_controller.py b/server/Gym/parameter_controllers/max_grid_path_intersection/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c547331e47952acd35b599c932bf1a3cc8588170 --- /dev/null +++ b/server/Gym/parameter_controllers/max_grid_path_intersection/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaxGridPathIntersection_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_minimum_after_interval_addition/__init__.py b/server/Gym/parameter_controllers/max_minimum_after_interval_addition/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..977c5c48f306d604dc393025815fa5f205407d71 --- /dev/null +++ b/server/Gym/parameter_controllers/max_minimum_after_interval_addition/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxMinimum_AfterIntervalAddition_ParameterController diff --git a/server/Gym/parameter_controllers/max_minimum_after_interval_addition/parameter_controller.py b/server/Gym/parameter_controllers/max_minimum_after_interval_addition/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..b66a5e916a88789e015f56fb0684efbe3e12a6af --- /dev/null +++ b/server/Gym/parameter_controllers/max_minimum_after_interval_addition/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaxMinimum_AfterIntervalAddition_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_mult_split/__init__.py b/server/Gym/parameter_controllers/max_mult_split/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..41c878c7d7254896753d13d7b36cabc292c0de9e --- /dev/null +++ b/server/Gym/parameter_controllers/max_mult_split/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxMultSplit_ParameterController diff --git a/server/Gym/parameter_controllers/max_mult_split/parameter_controller.py b/server/Gym/parameter_controllers/max_mult_split/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..49806459b52e04c90e469e6074b5d06202018038 --- /dev/null +++ b/server/Gym/parameter_controllers/max_mult_split/parameter_controller.py @@ -0,0 +1,19 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MaxMultSplit_ParameterController(ParameterController) : + def __init__(self, K_ratio_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + self.K_ratio_list = K_ratio_list if K_ratio_list is not None else [0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95] + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + K_set = set([2, self.N - 1]) + for K_ratio in self.K_ratio_list : + K = int(self.N * K_ratio) + if 2 <= K <= self.N - 1 : + K_set.add(K) + return [dict(N = self.N, K = K) for K in K_set] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_multiplication_fixed_sum/__init__.py b/server/Gym/parameter_controllers/max_multiplication_fixed_sum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4dcd1a210033a12bba4867e5f8d5f96628cc635f --- /dev/null +++ b/server/Gym/parameter_controllers/max_multiplication_fixed_sum/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxMultiplicationFixedSum_ParameterController diff --git a/server/Gym/parameter_controllers/max_multiplication_fixed_sum/parameter_controller.py b/server/Gym/parameter_controllers/max_multiplication_fixed_sum/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9833127d71cda5661370e0a3cfeed56a922a9801 --- /dev/null +++ b/server/Gym/parameter_controllers/max_multiplication_fixed_sum/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaxMultiplicationFixedSum_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 10 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_no_conflicting_bombs/__init__.py b/server/Gym/parameter_controllers/max_no_conflicting_bombs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03027ddbbab75b21df79778d87e1a0e5e1d4be74 --- /dev/null +++ b/server/Gym/parameter_controllers/max_no_conflicting_bombs/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxNoConflictingBombs_ParameterController diff --git a/server/Gym/parameter_controllers/max_no_conflicting_bombs/parameter_controller.py b/server/Gym/parameter_controllers/max_no_conflicting_bombs/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..3f0579dc9e49329c0d62bf69f4af5b90fbbb7b13 --- /dev/null +++ b/server/Gym/parameter_controllers/max_no_conflicting_bombs/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaxNoConflictingBombs_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_nonadjacent_k_element_sum/__init__.py b/server/Gym/parameter_controllers/max_nonadjacent_k_element_sum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2e9692c99e60ce9a879668ff5c38ed485a5eecab --- /dev/null +++ b/server/Gym/parameter_controllers/max_nonadjacent_k_element_sum/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Max_NonAdjacent_KElementSum_ParameterController diff --git a/server/Gym/parameter_controllers/max_nonadjacent_k_element_sum/parameter_controller.py b/server/Gym/parameter_controllers/max_nonadjacent_k_element_sum/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ae8d225f260aa8bfcb4b0ca88b6b0d4b6bcab8ed --- /dev/null +++ b/server/Gym/parameter_controllers/max_nonadjacent_k_element_sum/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Max_NonAdjacent_KElementSum_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_permutation/__init__.py b/server/Gym/parameter_controllers/max_permutation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4c3481453808019b99a9332da84c1ffbda10cfb3 --- /dev/null +++ b/server/Gym/parameter_controllers/max_permutation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxPermutation_ParameterController diff --git a/server/Gym/parameter_controllers/max_permutation/parameter_controller.py b/server/Gym/parameter_controllers/max_permutation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..b355494ae540095c02619348a341024149ae0e85 --- /dev/null +++ b/server/Gym/parameter_controllers/max_permutation/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaxPermutation_ParameterController(ParameterController) : + def __init__(self, MAX_DIGIT_NUM : int = 5, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + self.MAX_DIGIT_NUM = MAX_DIGIT_NUM + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, MAX_DIGIT_NUM = self.MAX_DIGIT_NUM)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_rmq_expectation/__init__.py b/server/Gym/parameter_controllers/max_rmq_expectation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9db66e833cb749283420265ce85cdc95e7320b0f --- /dev/null +++ b/server/Gym/parameter_controllers/max_rmq_expectation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxRMQExpectation_ParameterController diff --git a/server/Gym/parameter_controllers/max_rmq_expectation/parameter_controller.py b/server/Gym/parameter_controllers/max_rmq_expectation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0c96156632803e86ca87e6393c9049eb29eb7f32 --- /dev/null +++ b/server/Gym/parameter_controllers/max_rmq_expectation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaxRMQExpectation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_segment_coverage_constraint/__init__.py b/server/Gym/parameter_controllers/max_segment_coverage_constraint/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1b779de60b313105185b3b3832639dc8f9f5af23 --- /dev/null +++ b/server/Gym/parameter_controllers/max_segment_coverage_constraint/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxSegmentCoverageConstraint_ParameterController diff --git a/server/Gym/parameter_controllers/max_segment_coverage_constraint/parameter_controller.py b/server/Gym/parameter_controllers/max_segment_coverage_constraint/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0f198b98d66f2e557c976ede795f18bd253a3832 --- /dev/null +++ b/server/Gym/parameter_controllers/max_segment_coverage_constraint/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaxSegmentCoverageConstraint_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_sum_lds/__init__.py b/server/Gym/parameter_controllers/max_sum_lds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..486cacb194badb9764544923005a48f9a890a9ed --- /dev/null +++ b/server/Gym/parameter_controllers/max_sum_lds/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxSumLDS_ParameterController diff --git a/server/Gym/parameter_controllers/max_sum_lds/parameter_controller.py b/server/Gym/parameter_controllers/max_sum_lds/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ffd492d134e181d3ae4b3f11b4c5b4182febad97 --- /dev/null +++ b/server/Gym/parameter_controllers/max_sum_lds/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaxSumLDS_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_three_square_sum/__init__.py b/server/Gym/parameter_controllers/max_three_square_sum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5113694f47aa5bd8fb9661ccf41e5792c0af6500 --- /dev/null +++ b/server/Gym/parameter_controllers/max_three_square_sum/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxThreeSquareSum_ParameterController diff --git a/server/Gym/parameter_controllers/max_three_square_sum/parameter_controller.py b/server/Gym/parameter_controllers/max_three_square_sum/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0accd9235d066249d00144c83860909705da68e3 --- /dev/null +++ b/server/Gym/parameter_controllers/max_three_square_sum/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MaxThreeSquareSum_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 4 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_tree_constrained_permutation_weight/__init__.py b/server/Gym/parameter_controllers/max_tree_constrained_permutation_weight/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..47d46fc56646b251707fc597a35a2ed01d3b4939 --- /dev/null +++ b/server/Gym/parameter_controllers/max_tree_constrained_permutation_weight/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Max_TreeConstrainedPermutation_Weight_ParameterController diff --git a/server/Gym/parameter_controllers/max_tree_constrained_permutation_weight/parameter_controller.py b/server/Gym/parameter_controllers/max_tree_constrained_permutation_weight/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..1d55c08bb1bacd0fd523dc0bf9edc7f0be2f600d --- /dev/null +++ b/server/Gym/parameter_controllers/max_tree_constrained_permutation_weight/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Max_TreeConstrainedPermutation_Weight_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_tree_k_path_coverage/__init__.py b/server/Gym/parameter_controllers/max_tree_k_path_coverage/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..91e391d291e5b6c9da7ab426a9fa718ac5d3ef41 --- /dev/null +++ b/server/Gym/parameter_controllers/max_tree_k_path_coverage/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxTree_KPathCoverahe_ParameterController diff --git a/server/Gym/parameter_controllers/max_tree_k_path_coverage/parameter_controller.py b/server/Gym/parameter_controllers/max_tree_k_path_coverage/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c7dfc99a6b8c6af6ec2fe1106124b05ae8cb97cc --- /dev/null +++ b/server/Gym/parameter_controllers/max_tree_k_path_coverage/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaxTree_KPathCoverahe_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_tree_xor_path/__init__.py b/server/Gym/parameter_controllers/max_tree_xor_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c83e8a523576c09d4d64df83742808f269244737 --- /dev/null +++ b/server/Gym/parameter_controllers/max_tree_xor_path/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxTreeXorPath_ParameterController diff --git a/server/Gym/parameter_controllers/max_tree_xor_path/parameter_controller.py b/server/Gym/parameter_controllers/max_tree_xor_path/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2a034de02b7c3d61438ab13e4ab978ca7628f65d --- /dev/null +++ b/server/Gym/parameter_controllers/max_tree_xor_path/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaxTreeXorPath_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_weight_palindromic_substring/__init__.py b/server/Gym/parameter_controllers/max_weight_palindromic_substring/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6850500e77f7ac4880ab4ab68089be64eccda253 --- /dev/null +++ b/server/Gym/parameter_controllers/max_weight_palindromic_substring/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxWeightPalindromicSubstring_ParameterController diff --git a/server/Gym/parameter_controllers/max_weight_palindromic_substring/parameter_controller.py b/server/Gym/parameter_controllers/max_weight_palindromic_substring/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ef05e5cb6252a966e00ad8602ad4f5cbefeacf5e --- /dev/null +++ b/server/Gym/parameter_controllers/max_weight_palindromic_substring/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaxWeightPalindromicSubstring_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_xor_path/__init__.py b/server/Gym/parameter_controllers/max_xor_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..83d1be6198befd0f674809a3c6597ef688b766cc --- /dev/null +++ b/server/Gym/parameter_controllers/max_xor_path/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxXorPath_ParameterController diff --git a/server/Gym/parameter_controllers/max_xor_path/parameter_controller.py b/server/Gym/parameter_controllers/max_xor_path/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0b0a756cd1d826ec0698a50c756b80b3b793e116 --- /dev/null +++ b/server/Gym/parameter_controllers/max_xor_path/parameter_controller.py @@ -0,0 +1,21 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MaxXorPath_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + self.edge_density_list = edge_density_list + + self.MAX_bit_length = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + while 2 ** self.MAX_bit_length < self.N * 2 : + self.MAX_bit_length += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density, MAX_bit_length = MAX_bit_length) for edge_density in self.edge_density_list for MAX_bit_length in range(self.MAX_bit_length, self.MAX_bit_length + 5) if int(edge_density * self.N * (self.N - 1) / 2) >= self.N] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/max_xor_set/__init__.py b/server/Gym/parameter_controllers/max_xor_set/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..874f39048ec0cd7b1d0cf7037b7b7e079151684f --- /dev/null +++ b/server/Gym/parameter_controllers/max_xor_set/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaxXorSet_ParameterController diff --git a/server/Gym/parameter_controllers/max_xor_set/parameter_controller.py b/server/Gym/parameter_controllers/max_xor_set/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a143f57762f02c8b9af5ac1cfbfa3ccbaa80b500 --- /dev/null +++ b/server/Gym/parameter_controllers/max_xor_set/parameter_controller.py @@ -0,0 +1,16 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaxXorSet_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + self.MAX_bit_length = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + while (2 ** self.MAX_bit_length - 2) < self.N * 2 : + self.MAX_bit_length += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, MAX_bit_length = MAX_bit_length) for MAX_bit_length in range(self.MAX_bit_length, self.MAX_bit_length + 5)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/maximum_achromatic_number/__init__.py b/server/Gym/parameter_controllers/maximum_achromatic_number/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0ada11496499213c3ff096a9b438bd02768fdeb0 --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_achromatic_number/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaximumAchromaticNumber_ParameterController diff --git a/server/Gym/parameter_controllers/maximum_achromatic_number/parameter_controller.py b/server/Gym/parameter_controllers/maximum_achromatic_number/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..7c76b4ce1158c59a3a96d8efcd896da49c73dffc --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_achromatic_number/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MaximumAchromaticNumber_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + if edge_density_list is None : + edge_density_list = [0.02, 0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/maximum_clique/__init__.py b/server/Gym/parameter_controllers/maximum_clique/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..358ceae7e26ac2a96c6f524c3001b3de78f71a02 --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_clique/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaximumClique_ParameterController diff --git a/server/Gym/parameter_controllers/maximum_clique/parameter_controller.py b/server/Gym/parameter_controllers/maximum_clique/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..1d17362016f8d137275c227b13ca6554f08d9fce --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_clique/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MaximumClique_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/maximum_divisor/__init__.py b/server/Gym/parameter_controllers/maximum_divisor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6122cfd147b423f123e543229677b0f83ddb1042 --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_divisor/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaximumDivisor_ParameterController diff --git a/server/Gym/parameter_controllers/maximum_divisor/parameter_controller.py b/server/Gym/parameter_controllers/maximum_divisor/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..257db08c8823c53638066f77319a1af652c6059f --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_divisor/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaximumDivisor_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.2 + 2) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/maximum_independent_set_grid/__init__.py b/server/Gym/parameter_controllers/maximum_independent_set_grid/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6dddec604fd62d7ae2f8e89446a8888445a6b692 --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_independent_set_grid/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaximumIndependentSetGrid_ParameterController diff --git a/server/Gym/parameter_controllers/maximum_independent_set_grid/parameter_controller.py b/server/Gym/parameter_controllers/maximum_independent_set_grid/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ff979001ff379bcb3b1609ab2a286e1c46a3d94e --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_independent_set_grid/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaximumIndependentSetGrid_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/maximum_independent_set_tree/__init__.py b/server/Gym/parameter_controllers/maximum_independent_set_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4fa471d150834aa7ce364cf7994d4f2e9ace9e04 --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_independent_set_tree/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Maximum_IndependentSet_Tree_ParameterController diff --git a/server/Gym/parameter_controllers/maximum_independent_set_tree/parameter_controller.py b/server/Gym/parameter_controllers/maximum_independent_set_tree/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..548f9a555f4cd022e6b6dc773e19dd575028211d --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_independent_set_tree/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Maximum_IndependentSet_Tree_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/maximum_lexicographical_order_subsequence/__init__.py b/server/Gym/parameter_controllers/maximum_lexicographical_order_subsequence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d72b836f239c700c79549f06c82c2c4aa9bdd869 --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_lexicographical_order_subsequence/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaximumLexicographicalOrderSubsequence_ParameterController diff --git a/server/Gym/parameter_controllers/maximum_lexicographical_order_subsequence/parameter_controller.py b/server/Gym/parameter_controllers/maximum_lexicographical_order_subsequence/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..6f067510c3521961557c9165e216fee5068c68d9 --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_lexicographical_order_subsequence/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaximumLexicographicalOrderSubsequence_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/maximum_point_segment_matching/__init__.py b/server/Gym/parameter_controllers/maximum_point_segment_matching/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d569ba29835b898db232706c3440719c78d87d65 --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_point_segment_matching/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaximumPointSegmentMatching_ParameterController diff --git a/server/Gym/parameter_controllers/maximum_point_segment_matching/parameter_controller.py b/server/Gym/parameter_controllers/maximum_point_segment_matching/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0eed20beed85bea0fb2b1648fbe45f9e1fcd7150 --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_point_segment_matching/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MaximumPointSegmentMatching_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_C_N = 3 + + def update(self) -> None : + self.MAX_C_N = int(self.MAX_C_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_C_N = self.MAX_C_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/maximum_subsequence_num/__init__.py b/server/Gym/parameter_controllers/maximum_subsequence_num/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f515b68ba05c941600f817ebd2a17811ee7f5f7c --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_subsequence_num/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Maximum_SubsequenceNum_ParameterController diff --git a/server/Gym/parameter_controllers/maximum_subsequence_num/parameter_controller.py b/server/Gym/parameter_controllers/maximum_subsequence_num/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..69caaeba5e09476e8f767d026f9921026bcddbc8 --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_subsequence_num/parameter_controller.py @@ -0,0 +1,25 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Maximum_SubsequenceNum_ParameterController(ParameterController) : + def __init__(self, K_ratio_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + self.M = 2 + + if K_ratio_list is None : + K_ratio_list = [0.1, 0.2, 0.3, 0.5, 1.0] + self.K_ratio_list = K_ratio_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + self.M = int(self.M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + Ks = set() + Ks.add(2) + for K_ratio in self.K_ratio_list : + K = int(self.N * K_ratio) + if K >= 2 : + Ks.add(K) + return [dict(N = self.N, M = self.M, K = K) for K in Ks] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/maximum_weight_matching/__init__.py b/server/Gym/parameter_controllers/maximum_weight_matching/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..181282c111aa69e4454d277179f6cd48f966fd84 --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_weight_matching/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MaximumWeightMatching_ParameterController diff --git a/server/Gym/parameter_controllers/maximum_weight_matching/parameter_controller.py b/server/Gym/parameter_controllers/maximum_weight_matching/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..b8a229612d8e02c0dfa365450bde3d4fe3c4ff92 --- /dev/null +++ b/server/Gym/parameter_controllers/maximum_weight_matching/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MaximumWeightMatching_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + if edge_density_list is None : + edge_density_list = [0.1, 0.3, 0.4] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/maze/__init__.py b/server/Gym/parameter_controllers/maze/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..644ab3e11258fae189a9547784edd1806d4de66f --- /dev/null +++ b/server/Gym/parameter_controllers/maze/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Maze_ParameterController diff --git a/server/Gym/parameter_controllers/maze/parameter_controller.py b/server/Gym/parameter_controllers/maze/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a5968f23faf6c81873aa1047cf3d6cb4c8fdfbb4 --- /dev/null +++ b/server/Gym/parameter_controllers/maze/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Maze_ParameterController(ParameterController) : + def __init__(self, density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if density_list is None : + density_list = [0.1, 0.2, 0.3, 0.4] + self.density_list = density_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, density = density) for density in self.density_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_conversion_to_cycle_cost/__init__.py b/server/Gym/parameter_controllers/min_conversion_to_cycle_cost/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4ce92ad853ddfb57b32efd8c107fbc4ac137e689 --- /dev/null +++ b/server/Gym/parameter_controllers/min_conversion_to_cycle_cost/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinConversionToCycleCost_ParameterController diff --git a/server/Gym/parameter_controllers/min_conversion_to_cycle_cost/parameter_controller.py b/server/Gym/parameter_controllers/min_conversion_to_cycle_cost/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9056b2683fc619af8f9381cce0852ca282a6296c --- /dev/null +++ b/server/Gym/parameter_controllers/min_conversion_to_cycle_cost/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinConversionToCycleCost_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_cost_reducing_lnds/__init__.py b/server/Gym/parameter_controllers/min_cost_reducing_lnds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f50487fdb8828b693087cd30b78ee0d56a94daeb --- /dev/null +++ b/server/Gym/parameter_controllers/min_cost_reducing_lnds/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinCostReducingLNDS_ParameterController diff --git a/server/Gym/parameter_controllers/min_cost_reducing_lnds/parameter_controller.py b/server/Gym/parameter_controllers/min_cost_reducing_lnds/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c9849d6e1397d9af22e1be6d9d58600cdde2b7bf --- /dev/null +++ b/server/Gym/parameter_controllers/min_cost_reducing_lnds/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinCostReducingLNDS_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_cost_tree_coverage/__init__.py b/server/Gym/parameter_controllers/min_cost_tree_coverage/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..59f3d6e0a4fb1a16a4221aec5255db32a733779e --- /dev/null +++ b/server/Gym/parameter_controllers/min_cost_tree_coverage/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinCostTreeCoverage_ParameterController diff --git a/server/Gym/parameter_controllers/min_cost_tree_coverage/parameter_controller.py b/server/Gym/parameter_controllers/min_cost_tree_coverage/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..638853a7a675a1ea970bc88ff59632965a88a1bf --- /dev/null +++ b/server/Gym/parameter_controllers/min_cost_tree_coverage/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinCostTreeCoverage_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_cube_assignment/__init__.py b/server/Gym/parameter_controllers/min_cube_assignment/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..79ee36c865580e91bb508c305bcb8159dc08d95e --- /dev/null +++ b/server/Gym/parameter_controllers/min_cube_assignment/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinCubeAssignment_ParameterController diff --git a/server/Gym/parameter_controllers/min_cube_assignment/parameter_controller.py b/server/Gym/parameter_controllers/min_cube_assignment/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..232add90596cc1e43d429b8aeb8387f96ab0455b --- /dev/null +++ b/server/Gym/parameter_controllers/min_cube_assignment/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinCubeAssignment_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_P_Q_R = 2 + + def update(self) -> None : + self.MAX_P_Q_R += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_P_Q_R = self.MAX_P_Q_R)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_division_sum_xor/__init__.py b/server/Gym/parameter_controllers/min_division_sum_xor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0980361065f01dab7b5201fbb6301b8560525b46 --- /dev/null +++ b/server/Gym/parameter_controllers/min_division_sum_xor/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinDivisionSumXor_ParameterController diff --git a/server/Gym/parameter_controllers/min_division_sum_xor/parameter_controller.py b/server/Gym/parameter_controllers/min_division_sum_xor/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..dafe37e4fd1343b0c4b4b49fd7adc09c53bc1934 --- /dev/null +++ b/server/Gym/parameter_controllers/min_division_sum_xor/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinDivisionSumXor_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_inorder_binary_tree/__init__.py b/server/Gym/parameter_controllers/min_inorder_binary_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7a2eb7475a6b9ce581412806adeb9c2da28c001e --- /dev/null +++ b/server/Gym/parameter_controllers/min_inorder_binary_tree/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinInorderBinaryTree_ParameterController diff --git a/server/Gym/parameter_controllers/min_inorder_binary_tree/parameter_controller.py b/server/Gym/parameter_controllers/min_inorder_binary_tree/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c5d94d070e35dd207022853cb6595e0243be36a6 --- /dev/null +++ b/server/Gym/parameter_controllers/min_inorder_binary_tree/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinInorderBinaryTree_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_kdivisor_number/__init__.py b/server/Gym/parameter_controllers/min_kdivisor_number/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3b2e3e0674fbf2ce5617be12d29634847b699cfa --- /dev/null +++ b/server/Gym/parameter_controllers/min_kdivisor_number/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinKDivisorNumber_ParameterController diff --git a/server/Gym/parameter_controllers/min_kdivisor_number/parameter_controller.py b/server/Gym/parameter_controllers/min_kdivisor_number/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..4f255e8bb524a3bc6cc07a7693ea2684c35af8ce --- /dev/null +++ b/server/Gym/parameter_controllers/min_kdivisor_number/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinKDivisorNumber_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_K = 5 + + def update(self) -> None : + self.MAX_K *= 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_K = self.MAX_K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_no_solution_linear_diophantine_equation/__init__.py b/server/Gym/parameter_controllers/min_no_solution_linear_diophantine_equation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4bdda08243f1d481788e5349a6fd1b26b9b03f75 --- /dev/null +++ b/server/Gym/parameter_controllers/min_no_solution_linear_diophantine_equation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinNoSolutionLinearDiophantineEquation_ParameterController diff --git a/server/Gym/parameter_controllers/min_no_solution_linear_diophantine_equation/parameter_controller.py b/server/Gym/parameter_controllers/min_no_solution_linear_diophantine_equation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8dd1b1e0d7a9d0749472d6cc6421ce9600b3994f --- /dev/null +++ b/server/Gym/parameter_controllers/min_no_solution_linear_diophantine_equation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinNoSolutionLinearDiophantineEquation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_A_B = 16 + + def update(self) -> None : + self.MAX_A_B *= 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_A_B = self.MAX_A_B)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_nonsubstring/__init__.py b/server/Gym/parameter_controllers/min_nonsubstring/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b2657f25f0b5be406fd6506e1f9043418a69cd36 --- /dev/null +++ b/server/Gym/parameter_controllers/min_nonsubstring/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinNonsubstring_ParameterController diff --git a/server/Gym/parameter_controllers/min_nonsubstring/parameter_controller.py b/server/Gym/parameter_controllers/min_nonsubstring/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..317dde6a5a74273f37940d2f9c84a54b19bac204 --- /dev/null +++ b/server/Gym/parameter_controllers/min_nonsubstring/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinNonsubstring_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_pairsum_multiplication_permutation/__init__.py b/server/Gym/parameter_controllers/min_pairsum_multiplication_permutation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..800aaabb751269c193d7555efb7df8bbe50afda3 --- /dev/null +++ b/server/Gym/parameter_controllers/min_pairsum_multiplication_permutation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinPairSumMultiplicationPermutation_ParameterController diff --git a/server/Gym/parameter_controllers/min_pairsum_multiplication_permutation/parameter_controller.py b/server/Gym/parameter_controllers/min_pairsum_multiplication_permutation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..5a9619d0238a9ec7e346cdea7ff0621509c9bdde --- /dev/null +++ b/server/Gym/parameter_controllers/min_pairsum_multiplication_permutation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinPairSumMultiplicationPermutation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_path_cover_dag/__init__.py b/server/Gym/parameter_controllers/min_path_cover_dag/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bd30ee1ba69b05826d559fc1403225ca658b1514 --- /dev/null +++ b/server/Gym/parameter_controllers/min_path_cover_dag/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinPathCover_DAG_ParameterController diff --git a/server/Gym/parameter_controllers/min_path_cover_dag/parameter_controller.py b/server/Gym/parameter_controllers/min_path_cover_dag/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..08f03ee2051fe384ad07a982ac4773616f239cbf --- /dev/null +++ b/server/Gym/parameter_controllers/min_path_cover_dag/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinPathCover_DAG_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_sum_chebyshev_distance/__init__.py b/server/Gym/parameter_controllers/min_sum_chebyshev_distance/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..160b64eea4a2625a8db035b60ec73ebcf628f564 --- /dev/null +++ b/server/Gym/parameter_controllers/min_sum_chebyshev_distance/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinSumChebyshevDistance_ParameterController diff --git a/server/Gym/parameter_controllers/min_sum_chebyshev_distance/parameter_controller.py b/server/Gym/parameter_controllers/min_sum_chebyshev_distance/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8b16ab37ec5627ac40fb4c876b8dce61ecf457ea --- /dev/null +++ b/server/Gym/parameter_controllers/min_sum_chebyshev_distance/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinSumChebyshevDistance_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_sum_distance_square/__init__.py b/server/Gym/parameter_controllers/min_sum_distance_square/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cff10d6234258214cc78eaa0e9059d7b000cce1e --- /dev/null +++ b/server/Gym/parameter_controllers/min_sum_distance_square/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinSumDistanceSquare_ParameterController diff --git a/server/Gym/parameter_controllers/min_sum_distance_square/parameter_controller.py b/server/Gym/parameter_controllers/min_sum_distance_square/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..051fdfa48a2b8e491f9699ec11498e365b0f6184 --- /dev/null +++ b/server/Gym/parameter_controllers/min_sum_distance_square/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinSumDistanceSquare_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.M = 2 + + def update(self) -> None : + self.M = int(self.M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(M = self.M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_sum_pre_xor/__init__.py b/server/Gym/parameter_controllers/min_sum_pre_xor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..74ab7041ac0bc6db273fb550d8a34c4ab182c1af --- /dev/null +++ b/server/Gym/parameter_controllers/min_sum_pre_xor/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinSumPreXor_ParameterController diff --git a/server/Gym/parameter_controllers/min_sum_pre_xor/parameter_controller.py b/server/Gym/parameter_controllers/min_sum_pre_xor/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..88d5ccd81b9e9efe1662e9174afc412d102b9512 --- /dev/null +++ b/server/Gym/parameter_controllers/min_sum_pre_xor/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinSumPreXor_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_swap_two_permutations/__init__.py b/server/Gym/parameter_controllers/min_swap_two_permutations/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..741d9915ebe2b6623b3dc05914198ef0930dadb8 --- /dev/null +++ b/server/Gym/parameter_controllers/min_swap_two_permutations/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinSwapTwoPermutations_ParameterController diff --git a/server/Gym/parameter_controllers/min_swap_two_permutations/parameter_controller.py b/server/Gym/parameter_controllers/min_swap_two_permutations/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..782a40030a608d1a89645626c5999843faf4fa65 --- /dev/null +++ b/server/Gym/parameter_controllers/min_swap_two_permutations/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinSwapTwoPermutations_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/min_xor_pair/__init__.py b/server/Gym/parameter_controllers/min_xor_pair/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8693dedb899b556c69e5e19b552d01559f2e0f46 --- /dev/null +++ b/server/Gym/parameter_controllers/min_xor_pair/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinXorPair_ParameterController diff --git a/server/Gym/parameter_controllers/min_xor_pair/parameter_controller.py b/server/Gym/parameter_controllers/min_xor_pair/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c4d7492cdcd111eeb5f944dfa0da9d2ccfb19783 --- /dev/null +++ b/server/Gym/parameter_controllers/min_xor_pair/parameter_controller.py @@ -0,0 +1,16 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinXorPair_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + self.max_bit_length = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + while (1 << self.max_bit_length) <= self.N * 2 : + self.max_bit_length += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, max_bit_length = self.max_bit_length)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minesweeping/__init__.py b/server/Gym/parameter_controllers/minesweeping/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8c9b6358d9f008f87c172b7fcdddc8dc4834379e --- /dev/null +++ b/server/Gym/parameter_controllers/minesweeping/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Minesweeping_ParameterController diff --git a/server/Gym/parameter_controllers/minesweeping/parameter_controller.py b/server/Gym/parameter_controllers/minesweeping/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a8402c59630f4da836fe5819fce7d5a812aadb3b --- /dev/null +++ b/server/Gym/parameter_controllers/minesweeping/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Minesweeping_ParameterController(ParameterController) : + def __init__(self, density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 4 + + if density_list is None : + density_list = [0.6, 0.7, 0.8, 0.9, 0.95] + self.density_list = density_list + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M, density = density) for density in self.density_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimal_cyclic_shift/__init__.py b/server/Gym/parameter_controllers/minimal_cyclic_shift/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e5bbc74437aab2933e8c98f58471f7a445a5b83d --- /dev/null +++ b/server/Gym/parameter_controllers/minimal_cyclic_shift/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimalCyclicShift_ParameterController diff --git a/server/Gym/parameter_controllers/minimal_cyclic_shift/parameter_controller.py b/server/Gym/parameter_controllers/minimal_cyclic_shift/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..7d42677b71cfe8f9dc4c008694d1e04607cfc93f --- /dev/null +++ b/server/Gym/parameter_controllers/minimal_cyclic_shift/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinimalCyclicShift_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_chromatic_number/__init__.py b/server/Gym/parameter_controllers/minimum_chromatic_number/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eed5c0188ef87df348313311f8978c1e5d5781a7 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_chromatic_number/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumChromaticNumber_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_chromatic_number/parameter_controller.py b/server/Gym/parameter_controllers/minimum_chromatic_number/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..be2d65d0ae9bea8c6238618fe3735e5afebf6837 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_chromatic_number/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MinimumChromaticNumber_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + if edge_density_list is None : + edge_density_list = [0.02, 0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_chromatic_number_segment_overlap/__init__.py b/server/Gym/parameter_controllers/minimum_chromatic_number_segment_overlap/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e3bca69d793b7891f4f010e16288edff826d07c6 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_chromatic_number_segment_overlap/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumChromaticNumber_SegmentOverlap_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_chromatic_number_segment_overlap/parameter_controller.py b/server/Gym/parameter_controllers/minimum_chromatic_number_segment_overlap/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0822e36028588180b2d89f8a0c2155a70aead00a --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_chromatic_number_segment_overlap/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinimumChromaticNumber_SegmentOverlap_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_cost_maximum_flow/__init__.py b/server/Gym/parameter_controllers/minimum_cost_maximum_flow/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7bc7031cc5928072d2026564c304d3284ea108bb --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_cost_maximum_flow/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumCost_MaximumFlow_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_cost_maximum_flow/parameter_controller.py b/server/Gym/parameter_controllers/minimum_cost_maximum_flow/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..90750899b8609eda6840590d0a44768b9ca3b89a --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_cost_maximum_flow/parameter_controller.py @@ -0,0 +1,18 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + + +class MinimumCost_MaximumFlow_ParameterController(ParameterController): + def __init__(self, edge_density_list: Optional[List] = None, **kwargs): + super().__init__(**kwargs) + self.N = 4 # Start with 4 vertices + + if edge_density_list is None : + edge_density_list = [0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_crossing_edges_graph_partition/__init__.py b/server/Gym/parameter_controllers/minimum_crossing_edges_graph_partition/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3fa19e0485d82128c8cf3d9bb04a9798894c95d3 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_crossing_edges_graph_partition/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Minimum_CrossingEdges_GraphPartition_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_crossing_edges_graph_partition/parameter_controller.py b/server/Gym/parameter_controllers/minimum_crossing_edges_graph_partition/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..697219e52decb0cbd0c03085f38838078a995bea --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_crossing_edges_graph_partition/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Minimum_CrossingEdges_GraphPartition_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if edge_density_list is None : + edge_density_list = [0.02, 0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_directed_spanning_tree/__init__.py b/server/Gym/parameter_controllers/minimum_directed_spanning_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..153ad49815ebfd3344df00f27269d3de83edc468 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_directed_spanning_tree/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumDirectedSpanningTree_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_directed_spanning_tree/parameter_controller.py b/server/Gym/parameter_controllers/minimum_directed_spanning_tree/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..820927b1823325d66ee524328f98297c5bab7c43 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_directed_spanning_tree/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MinimumDirectedSpanningTree_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.9] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1)) > self.N - 1] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_dominating_interval/__init__.py b/server/Gym/parameter_controllers/minimum_dominating_interval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7af320d7dd54a164cc32793e041a9fd6873162c0 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_dominating_interval/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Minimum_DominatingInterval_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_dominating_interval/parameter_controller.py b/server/Gym/parameter_controllers/minimum_dominating_interval/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..29745cba3dadc61d1205fec342723bf454ac5733 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_dominating_interval/parameter_controller.py @@ -0,0 +1,16 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Minimum_DominatingInterval_ParameterController(ParameterController) : + def __init__(self, K_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if K_density_list is None : + self.K_density_list = [0.1, 0.2, 0.3, 0.5, 0.7, 0.9] + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = min(self.N * (self.N + 1) // 2, self.N * 2), K_density = K_density) for K_density in self.K_density_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_dominating_set/__init__.py b/server/Gym/parameter_controllers/minimum_dominating_set/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4360addf7f6ef42b1a578b91b56f19d1044c7efe --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_dominating_set/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Minimum_DominatingSet_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_dominating_set/parameter_controller.py b/server/Gym/parameter_controllers/minimum_dominating_set/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..08b545ee6d92de8c8f91894abc8d71df3e74ef0f --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_dominating_set/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Minimum_DominatingSet_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.02, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6, 0.7, 0.8, 0.9] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_dominating_set_grid/__init__.py b/server/Gym/parameter_controllers/minimum_dominating_set_grid/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a8c43107a8217fcb61f7fc1626ccb41082d989a9 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_dominating_set_grid/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Minimum_DominatingSet_Grid_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_dominating_set_grid/parameter_controller.py b/server/Gym/parameter_controllers/minimum_dominating_set_grid/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..46626536977397a13630c630c674d2bb8ea7c2db --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_dominating_set_grid/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Minimum_DominatingSet_Grid_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_fibonacci_representation/__init__.py b/server/Gym/parameter_controllers/minimum_fibonacci_representation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bea9d08068934dbe928bf740d83408cf6807bb4e --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_fibonacci_representation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumFibonacciRepresentation_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_fibonacci_representation/parameter_controller.py b/server/Gym/parameter_controllers/minimum_fibonacci_representation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ac2afff5d178a0913ea98f3eea3141db0efa332d --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_fibonacci_representation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinimumFibonacciRepresentation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_K = 15 + + def update(self) -> None : + self.MAX_K = int(self.MAX_K * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_K = self.MAX_K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_harmonious_chromatic_number/__init__.py b/server/Gym/parameter_controllers/minimum_harmonious_chromatic_number/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6819a8d00068b2dec63ad6b10aed1dc8a8c5829b --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_harmonious_chromatic_number/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumHarmoniousChromaticNumber_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_harmonious_chromatic_number/parameter_controller.py b/server/Gym/parameter_controllers/minimum_harmonious_chromatic_number/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..31ec7fcd563354309cd3fa24830b7549bfd54d86 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_harmonious_chromatic_number/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MinimumHarmoniousChromaticNumber_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + if edge_density_list is None : + edge_density_list = [0.02, 0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_interval_coverage/__init__.py b/server/Gym/parameter_controllers/minimum_interval_coverage/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9edaaade8492b157711842826d9673433bbac48e --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_interval_coverage/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumIntervalCoverage_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_interval_coverage/parameter_controller.py b/server/Gym/parameter_controllers/minimum_interval_coverage/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..69426e2a31a3a890d8358d1438718544707d082f --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_interval_coverage/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MinimumIntervalCoverage_ParameterController(ParameterController) : + def __init__(self, M_multiple_list : Optional[List[float]] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + self.M_multiple = M_multiple_list if M_multiple_list is not None else [0.7, 0.9, 1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0] + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = int(M_multiple * self.N)) for M_multiple in self.M_multiple] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_max_abs_slicer/__init__.py b/server/Gym/parameter_controllers/minimum_max_abs_slicer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7223169214e2b351b14660adae87695c23f636df --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_max_abs_slicer/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Minimum_MaxAbsSlicer_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_max_abs_slicer/parameter_controller.py b/server/Gym/parameter_controllers/minimum_max_abs_slicer/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..cdb4711e81126b7b9d9b96d1696abbd69b63b1fe --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_max_abs_slicer/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Minimum_MaxAbsSlicer_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_max_slicer/__init__.py b/server/Gym/parameter_controllers/minimum_max_slicer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a2aee3e004b350b1580957e671b9a35ccbce45f2 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_max_slicer/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Minimum_MaxSlicer_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_max_slicer/parameter_controller.py b/server/Gym/parameter_controllers/minimum_max_slicer/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..11cba329c61cdaa684bd0041b28eac41f579d8d1 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_max_slicer/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Minimum_MaxSlicer_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_ratio_path/__init__.py b/server/Gym/parameter_controllers/minimum_ratio_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e5e0556ecceff3584dcc6689b0a060303653f0a0 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_ratio_path/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumRatioPath_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_ratio_path/parameter_controller.py b/server/Gym/parameter_controllers/minimum_ratio_path/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c9a0d1585d6b13c50c188b1308b393caaf808034 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_ratio_path/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MinimumRatioPath_ParameterController(ParameterController) : + def __init__(self, edge_ratio_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_ratio_list is None : + edge_ratio_list = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0] + self.edge_ratio_list = edge_ratio_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_ratio = edge_ratio) for edge_ratio in self.edge_ratio_list if int(self.N * edge_ratio) <= self.N * (self.N - 1) // 2] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_spanning_tree/__init__.py b/server/Gym/parameter_controllers/minimum_spanning_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7b4f3cd6b9ba9801e7e0d9a69529a12d12efd90e --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_spanning_tree/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumSpanningTree_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_spanning_tree/parameter_controller.py b/server/Gym/parameter_controllers/minimum_spanning_tree/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..b070f5539cd717ce02fca3788c85dde73130fe50 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_spanning_tree/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MinimumSpanningTree_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.9] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > self.N - 1] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_spanning_tree_counting/__init__.py b/server/Gym/parameter_controllers/minimum_spanning_tree_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e12b60ece9cbf6de5e8a1ebbf98f8935e659186f --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_spanning_tree_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumSpanningTreeCounting_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_spanning_tree_counting/parameter_controller.py b/server/Gym/parameter_controllers/minimum_spanning_tree_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0439f4eac121ba349c50054892025a64aaf8125d --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_spanning_tree_counting/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MinimumSpanningTreeCounting_ParameterController(ParameterController) : + def __init__(self, edge_ratio_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_ratio_list is None : + edge_ratio_list = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5.0] + self.edge_ratio_list = edge_ratio_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_ratio = edge_ratio) for edge_ratio in self.edge_ratio_list if int(self.N * edge_ratio) <= self.N * (self.N - 1) // 2] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_steiner_tree/__init__.py b/server/Gym/parameter_controllers/minimum_steiner_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9246412ce84fd55099eed351b557a45e44b857df --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_steiner_tree/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumSteinerTree_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_steiner_tree/parameter_controller.py b/server/Gym/parameter_controllers/minimum_steiner_tree/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9152104a92b9cf0b3a53a7e0c641415a324ce25d --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_steiner_tree/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MinimumSteinerTree_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > self.N - 1] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_sum_difference_submatrix/__init__.py b/server/Gym/parameter_controllers/minimum_sum_difference_submatrix/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a5c8383f11fdceb34e64833be523f22f225344d5 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_sum_difference_submatrix/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumSumDifferenceSubmatrix_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_sum_difference_submatrix/parameter_controller.py b/server/Gym/parameter_controllers/minimum_sum_difference_submatrix/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..80821d8f8b002f0e16eb0078eee813bb2ec3929a --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_sum_difference_submatrix/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinimumSumDifferenceSubmatrix_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 4 + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_tree_weighted_dominating_ancestor/__init__.py b/server/Gym/parameter_controllers/minimum_tree_weighted_dominating_ancestor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e1bfb3fd84b0573470328a26adf15eb607763f26 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_tree_weighted_dominating_ancestor/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumTreeWeightedDominatingAncestor_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_tree_weighted_dominating_ancestor/parameter_controller.py b/server/Gym/parameter_controllers/minimum_tree_weighted_dominating_ancestor/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..519989b73f8b920435906d94febd2868abd215d5 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_tree_weighted_dominating_ancestor/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinimumTreeWeightedDominatingAncestor_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_unconflicted_grid_kmax/__init__.py b/server/Gym/parameter_controllers/minimum_unconflicted_grid_kmax/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0bebb03f4addd7ff6a4b434e07a5a04f32a57f71 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_unconflicted_grid_kmax/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumUnconflictedGridKMax_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_unconflicted_grid_kmax/parameter_controller.py b/server/Gym/parameter_controllers/minimum_unconflicted_grid_kmax/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ef82637d7a37bae709abf5a49df7124a2f4273ff --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_unconflicted_grid_kmax/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MinimumUnconflictedGridKMax_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_vertex_cover/__init__.py b/server/Gym/parameter_controllers/minimum_vertex_cover/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..19388460937b05c92c0e5bd14eaef553c84155b9 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_vertex_cover/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Minimum_VertexCover_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_vertex_cover/parameter_controller.py b/server/Gym/parameter_controllers/minimum_vertex_cover/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..b84c18624316e80987bb3213ce3de924554ee847 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_vertex_cover/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Minimum_VertexCover_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.02, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6, 0.7, 0.8, 0.9] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/minimum_weighted_spanning_tree/__init__.py b/server/Gym/parameter_controllers/minimum_weighted_spanning_tree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..db5230accb638be1915725d7de79edb27906b1ee --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_weighted_spanning_tree/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MinimumWeightedSpanningTree_ParameterController diff --git a/server/Gym/parameter_controllers/minimum_weighted_spanning_tree/parameter_controller.py b/server/Gym/parameter_controllers/minimum_weighted_spanning_tree/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8b4aaec36210175b7858e580d2803f419b6fd179 --- /dev/null +++ b/server/Gym/parameter_controllers/minimum_weighted_spanning_tree/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class MinimumWeightedSpanningTree_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if edge_density_list is None : + edge_density_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > self.N - 1] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/mitter_transportation/__init__.py b/server/Gym/parameter_controllers/mitter_transportation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ec4ddcac674e94817080c6ba3fe1d4c408e70885 --- /dev/null +++ b/server/Gym/parameter_controllers/mitter_transportation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MitterTransportation_ParameterController diff --git a/server/Gym/parameter_controllers/mitter_transportation/parameter_controller.py b/server/Gym/parameter_controllers/mitter_transportation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..b382574b745f85af39f5e3a1846d0bcc135e003d --- /dev/null +++ b/server/Gym/parameter_controllers/mitter_transportation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MitterTransportation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/mixed_graph_eulerian_circuit/__init__.py b/server/Gym/parameter_controllers/mixed_graph_eulerian_circuit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..beefee663eebd89ff8a5f4ea9b251163d85f95f6 --- /dev/null +++ b/server/Gym/parameter_controllers/mixed_graph_eulerian_circuit/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MixedGraphEulerianCircuit_ParameterController diff --git a/server/Gym/parameter_controllers/mixed_graph_eulerian_circuit/parameter_controller.py b/server/Gym/parameter_controllers/mixed_graph_eulerian_circuit/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..5a72b5160399168c3a5d700a018f451156c5ed84 --- /dev/null +++ b/server/Gym/parameter_controllers/mixed_graph_eulerian_circuit/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MixedGraphEulerianCircuit_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/money_charging_game/__init__.py b/server/Gym/parameter_controllers/money_charging_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c3183ba946e1133261927f75f0b0aa0d545bc7d4 --- /dev/null +++ b/server/Gym/parameter_controllers/money_charging_game/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MoneyChargingGame_ParameterController diff --git a/server/Gym/parameter_controllers/money_charging_game/parameter_controller.py b/server/Gym/parameter_controllers/money_charging_game/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..6fc311cdb9fedf1ee2b5b2fc3148e8e2e7d57648 --- /dev/null +++ b/server/Gym/parameter_controllers/money_charging_game/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MoneyChargingGame_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/monochrome_block_counting/__init__.py b/server/Gym/parameter_controllers/monochrome_block_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e1637b178220e325768b46dee8368c0d9bc126a8 --- /dev/null +++ b/server/Gym/parameter_controllers/monochrome_block_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MonochromeBlockCounting_ParameterController diff --git a/server/Gym/parameter_controllers/monochrome_block_counting/parameter_controller.py b/server/Gym/parameter_controllers/monochrome_block_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..48cac0632046b5a2da13ed8dd2e5d37e97bb9d72 --- /dev/null +++ b/server/Gym/parameter_controllers/monochrome_block_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MonochromeBlockCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_A_B = 5 + + def update(self) -> None : + self.MAX_A_B = int(self.MAX_A_B * 1.5 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_A_B = self.MAX_A_B)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/monotonic_stack/__init__.py b/server/Gym/parameter_controllers/monotonic_stack/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f71ccd99ad72871433d98147fea5271f45a342f3 --- /dev/null +++ b/server/Gym/parameter_controllers/monotonic_stack/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MonotonicStack_ParameterController diff --git a/server/Gym/parameter_controllers/monotonic_stack/parameter_controller.py b/server/Gym/parameter_controllers/monotonic_stack/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..03acb5514e7987f5a822e1c24ad39730e528d4cb --- /dev/null +++ b/server/Gym/parameter_controllers/monotonic_stack/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MonotonicStack_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/most_component_tree_removing_two_paths/__init__.py b/server/Gym/parameter_controllers/most_component_tree_removing_two_paths/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3dc76f1ee7d912737d378d04f4e7f04a26f1acb5 --- /dev/null +++ b/server/Gym/parameter_controllers/most_component_tree_removing_two_paths/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MostComponentTreeRemovingTwoPaths_ParameterController diff --git a/server/Gym/parameter_controllers/most_component_tree_removing_two_paths/parameter_controller.py b/server/Gym/parameter_controllers/most_component_tree_removing_two_paths/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..7d0a20d001da29cd062d22d2b006e55eba694ffe --- /dev/null +++ b/server/Gym/parameter_controllers/most_component_tree_removing_two_paths/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MostComponentTreeRemovingTwoPaths_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/most_num_edge_non_self_isomorphism/__init__.py b/server/Gym/parameter_controllers/most_num_edge_non_self_isomorphism/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a565f5e77d2dbd34a37baaed85a19e0ee9e67488 --- /dev/null +++ b/server/Gym/parameter_controllers/most_num_edge_non_self_isomorphism/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MostNumEdge_NonSelfIsomorphism_ParameterController diff --git a/server/Gym/parameter_controllers/most_num_edge_non_self_isomorphism/parameter_controller.py b/server/Gym/parameter_controllers/most_num_edge_non_self_isomorphism/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c88466cdfa93a498fdb1d4fca9d2228d80ae1ed7 --- /dev/null +++ b/server/Gym/parameter_controllers/most_num_edge_non_self_isomorphism/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MostNumEdge_NonSelfIsomorphism_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 10 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/multidrink/__init__.py b/server/Gym/parameter_controllers/multidrink/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..77ffed51b140261d8f39cdb1585ca50440cd6517 --- /dev/null +++ b/server/Gym/parameter_controllers/multidrink/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MultiDrink_ParameterController diff --git a/server/Gym/parameter_controllers/multidrink/parameter_controller.py b/server/Gym/parameter_controllers/multidrink/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f93b6211dab41f11a0687f15615d6daaaab7e15f --- /dev/null +++ b/server/Gym/parameter_controllers/multidrink/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MultiDrink_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/multiple_flipping_game/__init__.py b/server/Gym/parameter_controllers/multiple_flipping_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..738225627376093c0ee88337d49e134b4206155c --- /dev/null +++ b/server/Gym/parameter_controllers/multiple_flipping_game/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MultipleFlippingGame_ParameterController diff --git a/server/Gym/parameter_controllers/multiple_flipping_game/parameter_controller.py b/server/Gym/parameter_controllers/multiple_flipping_game/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ed45c7fdeb130736c8bc7de1fc320d2b9f0b9068 --- /dev/null +++ b/server/Gym/parameter_controllers/multiple_flipping_game/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MultipleFlippingGame_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/multiplication/__init__.py b/server/Gym/parameter_controllers/multiplication/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b30156ad52aa33164752dbb223eb8877d7400946 --- /dev/null +++ b/server/Gym/parameter_controllers/multiplication/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Multiplication_ParameterController diff --git a/server/Gym/parameter_controllers/multiplication/parameter_controller.py b/server/Gym/parameter_controllers/multiplication/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..4fa9e9f0d0a0a745a6169339bbb068c0b823eeb9 --- /dev/null +++ b/server/Gym/parameter_controllers/multiplication/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Multiplication_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.digit_num = 2 + + def update(self) -> None : + self.digit_num += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(digit_num = self.digit_num)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/myj/__init__.py b/server/Gym/parameter_controllers/myj/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7b6ccf1ffc4a14138ce868632f45e77af86a0142 --- /dev/null +++ b/server/Gym/parameter_controllers/myj/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import MYJ_ParameterController diff --git a/server/Gym/parameter_controllers/myj/parameter_controller.py b/server/Gym/parameter_controllers/myj/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9e49650a484be98426888f92372608d921004daa --- /dev/null +++ b/server/Gym/parameter_controllers/myj/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class MYJ_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/nand_result_counting/__init__.py b/server/Gym/parameter_controllers/nand_result_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..18e4db93df407a6691e8e4b657ee5c19701213b6 --- /dev/null +++ b/server/Gym/parameter_controllers/nand_result_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import NANDResultCounting_ParameterController diff --git a/server/Gym/parameter_controllers/nand_result_counting/parameter_controller.py b/server/Gym/parameter_controllers/nand_result_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c50702fd287366b7982a930effb91cde4f785a54 --- /dev/null +++ b/server/Gym/parameter_controllers/nand_result_counting/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class NANDResultCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + self.K = 2 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + self.K += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, K = self.K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/negative_base/__init__.py b/server/Gym/parameter_controllers/negative_base/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0a9a8caf522ebe87ce2dcf8cfe8eb7c8d2eaec5f --- /dev/null +++ b/server/Gym/parameter_controllers/negative_base/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import NegativeBase_ParameterController diff --git a/server/Gym/parameter_controllers/negative_base/parameter_controller.py b/server/Gym/parameter_controllers/negative_base/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..756148996407929537fa38ce72dae53774565636 --- /dev/null +++ b/server/Gym/parameter_controllers/negative_base/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class NegativeBase_ParameterController(ParameterController) : + def __init__(self, MAX_R = 16, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 4 + self.MAX_R = MAX_R + + def update(self) -> None : + self.MAX_N *= 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, MAX_R = min(self.MAX_N, self.MAX_R))] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/new_nim_game/__init__.py b/server/Gym/parameter_controllers/new_nim_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d614eb2ec458ad71fd2d223a92b0f4245c56db1e --- /dev/null +++ b/server/Gym/parameter_controllers/new_nim_game/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import NewNimGame_ParameterController diff --git a/server/Gym/parameter_controllers/new_nim_game/parameter_controller.py b/server/Gym/parameter_controllers/new_nim_game/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..97952e804c3c84c19a260705d1226fb4814194cc --- /dev/null +++ b/server/Gym/parameter_controllers/new_nim_game/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class NewNimGame_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/next_palindromic/__init__.py b/server/Gym/parameter_controllers/next_palindromic/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d9e3db50b4f5de40c9c6bbcc69cf5ff2d421ca04 --- /dev/null +++ b/server/Gym/parameter_controllers/next_palindromic/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import NextPalindromic_ParameterController diff --git a/server/Gym/parameter_controllers/next_palindromic/parameter_controller.py b/server/Gym/parameter_controllers/next_palindromic/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..6e7f5128356b4df0f045e4f45e2c63e6c0026645 --- /dev/null +++ b/server/Gym/parameter_controllers/next_palindromic/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class NextPalindromic_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.digit_num = 2 + + def update(self) -> None : + self.digit_num = int(self.digit_num * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(digit_num = self.digit_num)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/nine_puzzle/__init__.py b/server/Gym/parameter_controllers/nine_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..43d7df310a815a47b3f20dbda6fa78b0f275530b --- /dev/null +++ b/server/Gym/parameter_controllers/nine_puzzle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import NinePuzzle_ParameterController diff --git a/server/Gym/parameter_controllers/nine_puzzle/parameter_controller.py b/server/Gym/parameter_controllers/nine_puzzle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..5f4f6f9097835a7d11403fcadc88fcf9fd11ffd2 --- /dev/null +++ b/server/Gym/parameter_controllers/nine_puzzle/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class NinePuzzle_ParameterController(ParameterController) : + def __init__(self, steps_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + self.steps_list = [2, 3, 4, 5, 6, 7, 8, 9, 10] if steps_list is None else steps_list + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M, steps = steps) for steps in self.steps_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/no_adjacent_girl_counting/__init__.py b/server/Gym/parameter_controllers/no_adjacent_girl_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e222153695b0920da6039e2763f46a7fb76dff0c --- /dev/null +++ b/server/Gym/parameter_controllers/no_adjacent_girl_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import NoAdjacentGirlCounting_ParameterController diff --git a/server/Gym/parameter_controllers/no_adjacent_girl_counting/parameter_controller.py b/server/Gym/parameter_controllers/no_adjacent_girl_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..7f149b83020f4ee28f571b714e1ae2b4df53deb8 --- /dev/null +++ b/server/Gym/parameter_controllers/no_adjacent_girl_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class NoAdjacentGirlCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 2 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/no_double_triple_counting/__init__.py b/server/Gym/parameter_controllers/no_double_triple_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..01caae124ee6029b2e61c950396e36091bdb317b --- /dev/null +++ b/server/Gym/parameter_controllers/no_double_triple_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import NoDoubleTripleCounting_ParameterController diff --git a/server/Gym/parameter_controllers/no_double_triple_counting/parameter_controller.py b/server/Gym/parameter_controllers/no_double_triple_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f9806205b846e0a3feefa70a80220c1b2fbe5202 --- /dev/null +++ b/server/Gym/parameter_controllers/no_double_triple_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class NoDoubleTripleCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 10 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/not_containing_string_counting/__init__.py b/server/Gym/parameter_controllers/not_containing_string_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dcff76072c191d6a0bf3eea3a4417b50b024f7b3 --- /dev/null +++ b/server/Gym/parameter_controllers/not_containing_string_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import NotContainingStringCounting_ParameterController diff --git a/server/Gym/parameter_controllers/not_containing_string_counting/parameter_controller.py b/server/Gym/parameter_controllers/not_containing_string_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8b904c081955ee21ef8c1c26739f21f7507ea8d0 --- /dev/null +++ b/server/Gym/parameter_controllers/not_containing_string_counting/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class NotContainingStringCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 8 + self.MAX_M = 3 + + def update(self) -> None : + self.MAX_N *= 2 + self.MAX_M = int(self.MAX_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, MAX_M = self.MAX_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/number_partition_counting/__init__.py b/server/Gym/parameter_controllers/number_partition_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a7afe6a9193a91d8bc6ec66780ff570cd214840b --- /dev/null +++ b/server/Gym/parameter_controllers/number_partition_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import NumberPartitionCounting_ParameterController diff --git a/server/Gym/parameter_controllers/number_partition_counting/parameter_controller.py b/server/Gym/parameter_controllers/number_partition_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..1f22e0477621b87cb9bf65030305f27e9c6ba019 --- /dev/null +++ b/server/Gym/parameter_controllers/number_partition_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class NumberPartitionCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 10 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/numbrix/__init__.py b/server/Gym/parameter_controllers/numbrix/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6e0fa6100e28ffe223e4c48f7ccc76e81268786e --- /dev/null +++ b/server/Gym/parameter_controllers/numbrix/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Numbrix_ParameterController diff --git a/server/Gym/parameter_controllers/numbrix/parameter_controller.py b/server/Gym/parameter_controllers/numbrix/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0b1ad5d4abd77e35dd3965f9d52e998535466560 --- /dev/null +++ b/server/Gym/parameter_controllers/numbrix/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Numbrix_ParameterController(ParameterController) : + def __init__(self, sparsity_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + if sparsity_list is None : + sparsity_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + self.sparsity_list = sparsity_list + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M, sparsity = sparsity) for sparsity in self.sparsity_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/odd_visitation/__init__.py b/server/Gym/parameter_controllers/odd_visitation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..38d31d0ab228be6e91f3cec21ffb6b6ae9501223 --- /dev/null +++ b/server/Gym/parameter_controllers/odd_visitation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import OddVisitation_ParameterController diff --git a/server/Gym/parameter_controllers/odd_visitation/parameter_controller.py b/server/Gym/parameter_controllers/odd_visitation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..657f379f368baccf927e7c1fad0b8d7ed8a1329e --- /dev/null +++ b/server/Gym/parameter_controllers/odd_visitation/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class OddVisitation_ParameterController(ParameterController) : + def __init__(self, edge_ratio_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if edge_ratio_list is None : + edge_ratio_list = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0] + self.edge_ratio_list = edge_ratio_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_ratio = edge_ratio) for edge_ratio in self.edge_ratio_list if int(self.N * edge_ratio) <= self.N * (self.N - 1) // 2] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/odl_distance/__init__.py b/server/Gym/parameter_controllers/odl_distance/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..80a598a85d64c8c8fd44ca3036da1464dbe984c8 --- /dev/null +++ b/server/Gym/parameter_controllers/odl_distance/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ODLDistance_ParameterController diff --git a/server/Gym/parameter_controllers/odl_distance/parameter_controller.py b/server/Gym/parameter_controllers/odl_distance/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9c35f05b21b124afec1541f635b86e66e45cf5e0 --- /dev/null +++ b/server/Gym/parameter_controllers/odl_distance/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class ODLDistance_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/pair_more_one_counting/__init__.py b/server/Gym/parameter_controllers/pair_more_one_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8e95499e43d96be1fc7bf3d54a9e5fe0332bca59 --- /dev/null +++ b/server/Gym/parameter_controllers/pair_more_one_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PairMoreOneCounting_ParameterController diff --git a/server/Gym/parameter_controllers/pair_more_one_counting/parameter_controller.py b/server/Gym/parameter_controllers/pair_more_one_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..1ab498c1b033978dbf9584f993555c126b3396ab --- /dev/null +++ b/server/Gym/parameter_controllers/pair_more_one_counting/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PairMoreOneCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_M = 10 + self.MAX_delta = 5 + + def update(self) -> None : + self.MAX_M *= 2 + self.MAX_delta = int(self.MAX_delta * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_M = self.MAX_M, MAX_delta = self.MAX_delta)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/palembang_bridges/__init__.py b/server/Gym/parameter_controllers/palembang_bridges/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..41225c2f4417f1f325cbee8f04f94672ef71f8c4 --- /dev/null +++ b/server/Gym/parameter_controllers/palembang_bridges/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PalembangBridges_ParameterController diff --git a/server/Gym/parameter_controllers/palembang_bridges/parameter_controller.py b/server/Gym/parameter_controllers/palembang_bridges/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0fc0f025fa08b415cd08ca0986fe67f26f8df6b2 --- /dev/null +++ b/server/Gym/parameter_controllers/palembang_bridges/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PalembangBridges_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/palindrome_partition_counting/__init__.py b/server/Gym/parameter_controllers/palindrome_partition_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..af5d110d2388fd1168238e95ed67b16b754c5dc1 --- /dev/null +++ b/server/Gym/parameter_controllers/palindrome_partition_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PalindromePartitionCounting_ParameterController diff --git a/server/Gym/parameter_controllers/palindrome_partition_counting/parameter_controller.py b/server/Gym/parameter_controllers/palindrome_partition_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..117f6d2c0bdf94d58bed8e2dade076c8d6b55cd6 --- /dev/null +++ b/server/Gym/parameter_controllers/palindrome_partition_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PalindromePartitionCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/palindromic_substring_number_counting/__init__.py b/server/Gym/parameter_controllers/palindromic_substring_number_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dffd3b17965cf627d35ffc3651629364b86db5c5 --- /dev/null +++ b/server/Gym/parameter_controllers/palindromic_substring_number_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PalindromicSubstringNumberCounting_ParameterController diff --git a/server/Gym/parameter_controllers/palindromic_substring_number_counting/parameter_controller.py b/server/Gym/parameter_controllers/palindromic_substring_number_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ec5b395e3f5c082340fa7b40661e9123657829e8 --- /dev/null +++ b/server/Gym/parameter_controllers/palindromic_substring_number_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PalindromicSubstringNumberCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_R = 20 + + def update(self) -> None : + self.MAX_R = int(self.MAX_R * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_R = self.MAX_R)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/pan_solar_panels/__init__.py b/server/Gym/parameter_controllers/pan_solar_panels/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c2917bbd84a7d7b52290ce5c0505732b260b27e3 --- /dev/null +++ b/server/Gym/parameter_controllers/pan_solar_panels/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PanSolarPanels_ParameterController diff --git a/server/Gym/parameter_controllers/pan_solar_panels/parameter_controller.py b/server/Gym/parameter_controllers/pan_solar_panels/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..d92db9090b9084535e62ce821bc311a95e1935f9 --- /dev/null +++ b/server/Gym/parameter_controllers/pan_solar_panels/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PanSolarPanels_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_A_B_C_D = 10 + + def update(self) -> None : + self.MAX_A_B_C_D = int(self.MAX_A_B_C_D * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_A_B_C_D = self.MAX_A_B_C_D)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/path_no_going_back_counting/__init__.py b/server/Gym/parameter_controllers/path_no_going_back_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f55177aeaac9fa5f3291c5467f279edba8cee6dd --- /dev/null +++ b/server/Gym/parameter_controllers/path_no_going_back_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Path_NoGoingBack_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/path_no_going_back_counting/parameter_controller.py b/server/Gym/parameter_controllers/path_no_going_back_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c473b9bb1ebc3c435d3bc4ee1013645edbe46808 --- /dev/null +++ b/server/Gym/parameter_controllers/path_no_going_back_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Path_NoGoingBack_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_M = 10 + + def update(self) -> None : + self.MAX_M = int(self.MAX_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_M = self.MAX_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/patrol/__init__.py b/server/Gym/parameter_controllers/patrol/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5a0a6364c8ec5c48d0927e0861ada93f4323be66 --- /dev/null +++ b/server/Gym/parameter_controllers/patrol/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Patrol_ParameterController diff --git a/server/Gym/parameter_controllers/patrol/parameter_controller.py b/server/Gym/parameter_controllers/patrol/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..db7dfe00adc2da7bc3c85c43166f2e757620bd2d --- /dev/null +++ b/server/Gym/parameter_controllers/patrol/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Patrol_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/pcp_permutation/__init__.py b/server/Gym/parameter_controllers/pcp_permutation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..21472541615251faeb5c546d9317e3a2f1cccfcb --- /dev/null +++ b/server/Gym/parameter_controllers/pcp_permutation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PCPPermutation_ParameterController diff --git a/server/Gym/parameter_controllers/pcp_permutation/parameter_controller.py b/server/Gym/parameter_controllers/pcp_permutation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..340f5a881233a142ab10dc8f706c68e0d71f42ae --- /dev/null +++ b/server/Gym/parameter_controllers/pcp_permutation/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class PCPPermutation_ParameterController(ParameterController) : + def __init__(self, average_length_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if average_length_list is None : + average_length_list = [1.2, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 7.0, 10.0] + self.average_length_list = average_length_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, average_length = average_length) for average_length in self.average_length_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/pipeline_arrangement/__init__.py b/server/Gym/parameter_controllers/pipeline_arrangement/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a5a55b77f3e201eebcc283c63f5f26f0807fdb3f --- /dev/null +++ b/server/Gym/parameter_controllers/pipeline_arrangement/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PipelineArrangement_ParameterController diff --git a/server/Gym/parameter_controllers/pipeline_arrangement/parameter_controller.py b/server/Gym/parameter_controllers/pipeline_arrangement/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..176f449cba99e1e726dee4c633eb51985cb7bfa5 --- /dev/null +++ b/server/Gym/parameter_controllers/pipeline_arrangement/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PipelineArrangement_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/pol_polarization/__init__.py b/server/Gym/parameter_controllers/pol_polarization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ca25f115d1b86da42c16fe230c34da1488bf4ab7 --- /dev/null +++ b/server/Gym/parameter_controllers/pol_polarization/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import POLPolarization_ParameterController diff --git a/server/Gym/parameter_controllers/pol_polarization/parameter_controller.py b/server/Gym/parameter_controllers/pol_polarization/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..69bbd5c711ddbced72a16989310eff97cc1247f6 --- /dev/null +++ b/server/Gym/parameter_controllers/pol_polarization/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class POLPolarization_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/polya_model/__init__.py b/server/Gym/parameter_controllers/polya_model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fd00c9622cbbf819152ce571620479bd648826a6 --- /dev/null +++ b/server/Gym/parameter_controllers/polya_model/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PolyaModel_ParameterController diff --git a/server/Gym/parameter_controllers/polya_model/parameter_controller.py b/server/Gym/parameter_controllers/polya_model/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..eeef22d38ce3b5f1d2a0a3aded04ec26791ec323 --- /dev/null +++ b/server/Gym/parameter_controllers/polya_model/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PolyaModel_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_T_N = 3 + + def update(self) -> None : + self.MAX_T_N = int(self.MAX_T_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_T_N = self.MAX_T_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/polynomial_factorization/__init__.py b/server/Gym/parameter_controllers/polynomial_factorization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d7f48a1459fd8ed3a7395bbca04e1acc39d6ffe1 --- /dev/null +++ b/server/Gym/parameter_controllers/polynomial_factorization/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PolynomialFactorization_ParameterController diff --git a/server/Gym/parameter_controllers/polynomial_factorization/parameter_controller.py b/server/Gym/parameter_controllers/polynomial_factorization/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e6da46f608abdccae4f65ca0b43f13b43a1524b9 --- /dev/null +++ b/server/Gym/parameter_controllers/polynomial_factorization/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PolynomialFactorization_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/polynomial_interpolation/__init__.py b/server/Gym/parameter_controllers/polynomial_interpolation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a5ab86b7d5e3bc9cd7b3fe62c1686e9c5d8567fb --- /dev/null +++ b/server/Gym/parameter_controllers/polynomial_interpolation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PolynomialInterpolation_ParameterController diff --git a/server/Gym/parameter_controllers/polynomial_interpolation/parameter_controller.py b/server/Gym/parameter_controllers/polynomial_interpolation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..113ff2808bb63110b3f0edad0c73ac002e609c28 --- /dev/null +++ b/server/Gym/parameter_controllers/polynomial_interpolation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PolynomialInterpolation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/polynomial_minimum/__init__.py b/server/Gym/parameter_controllers/polynomial_minimum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1604f2ff19c620bac3d588f9080098abbab3b4db --- /dev/null +++ b/server/Gym/parameter_controllers/polynomial_minimum/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PolynomialMinimum_ParameterController diff --git a/server/Gym/parameter_controllers/polynomial_minimum/parameter_controller.py b/server/Gym/parameter_controllers/polynomial_minimum/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ccada6f77e4871d410820656555a4e0d0d2f9315 --- /dev/null +++ b/server/Gym/parameter_controllers/polynomial_minimum/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PolynomialMinimum_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N += 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/polynomial_remainder/__init__.py b/server/Gym/parameter_controllers/polynomial_remainder/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..171a9e461c788cec8d18edce6ce32d65ddf85850 --- /dev/null +++ b/server/Gym/parameter_controllers/polynomial_remainder/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PolynomialRemainder_ParameterController diff --git a/server/Gym/parameter_controllers/polynomial_remainder/parameter_controller.py b/server/Gym/parameter_controllers/polynomial_remainder/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..4f862a313f5fdaeed2ffdcd50e95296419904cab --- /dev/null +++ b/server/Gym/parameter_controllers/polynomial_remainder/parameter_controller.py @@ -0,0 +1,24 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class PolynomialRemainder_ParameterController(ParameterController) : + def __init__(self, M_list : Optional[List] = None, Mratio_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + self.M_list = M_list if M_list is not None else [2, 3, 4, 5] + self.Mratio_list = Mratio_list if Mratio_list is not None else [0.1, 0.3, 0.5, 0.7, 0.9] + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + Ms = set() + for M in self.M_list : + if M <= self.N : + Ms.add(M) + for Mratio in self.Mratio_list : + M = int(self.N * Mratio) + if M >= 2 : + Ms.add(M) + return [dict(N = self.N, M = M) for M in Ms] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/power_cycle/__init__.py b/server/Gym/parameter_controllers/power_cycle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5c03f6d93c487c72d0f9aa0bd20d16679aafad9b --- /dev/null +++ b/server/Gym/parameter_controllers/power_cycle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PowerCycle_ParameterController diff --git a/server/Gym/parameter_controllers/power_cycle/parameter_controller.py b/server/Gym/parameter_controllers/power_cycle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..be88e12e0853901ebf4e4901c6fb9f66766b85f2 --- /dev/null +++ b/server/Gym/parameter_controllers/power_cycle/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PowerCycle_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.digit_num = 2 + + def update(self) -> None : + self.digit_num = int(self.digit_num * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(digit_num = self.digit_num)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/power_shortcut/__init__.py b/server/Gym/parameter_controllers/power_shortcut/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0da5ab54ad040ec755696b9e106fc9b7d6280f40 --- /dev/null +++ b/server/Gym/parameter_controllers/power_shortcut/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PowerShortcut_ParameterController diff --git a/server/Gym/parameter_controllers/power_shortcut/parameter_controller.py b/server/Gym/parameter_controllers/power_shortcut/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..de1cee11e78d1334755700078168ec348b86dd04 --- /dev/null +++ b/server/Gym/parameter_controllers/power_shortcut/parameter_controller.py @@ -0,0 +1,21 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class PowerShortcut_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, K_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.01, 0.02, 0.03] + self.edge_density_list = edge_density_list + + if K_list is None : + K_list = [1, 2, 3] + self.K_list = K_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density, K = K) for edge_density in self.edge_density_list for K in self.K_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/powernest/__init__.py b/server/Gym/parameter_controllers/powernest/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dc7efae08df8c08d529ae544a5aad1876420e61d --- /dev/null +++ b/server/Gym/parameter_controllers/powernest/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PowerNest_ParameterController diff --git a/server/Gym/parameter_controllers/powernest/parameter_controller.py b/server/Gym/parameter_controllers/powernest/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..67bbb5173950f94a074979e3acc05b0ba51c0f86 --- /dev/null +++ b/server/Gym/parameter_controllers/powernest/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PowerNest_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.max_number = 4 + + def update(self) -> None : + self.max_number *= 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(max_number = self.max_number,)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/prefix_concatenation/__init__.py b/server/Gym/parameter_controllers/prefix_concatenation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7a61933470be45d666667f18e669a100641e6592 --- /dev/null +++ b/server/Gym/parameter_controllers/prefix_concatenation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PrefixConcatenation_ParameterController diff --git a/server/Gym/parameter_controllers/prefix_concatenation/parameter_controller.py b/server/Gym/parameter_controllers/prefix_concatenation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2ddf78c17fabe103ac5a1f68c1c6bc1d070ad336 --- /dev/null +++ b/server/Gym/parameter_controllers/prefix_concatenation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PrefixConcatenation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 64 + + def update(self) -> None : + self.MAX_N *= 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/prefix_product_mod_distinct_permutation/__init__.py b/server/Gym/parameter_controllers/prefix_product_mod_distinct_permutation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c711c54f8d513caf1a7bc56fe8aacf2111943635 --- /dev/null +++ b/server/Gym/parameter_controllers/prefix_product_mod_distinct_permutation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PrefixProductMODDistinctPermutation_ParameterController diff --git a/server/Gym/parameter_controllers/prefix_product_mod_distinct_permutation/parameter_controller.py b/server/Gym/parameter_controllers/prefix_product_mod_distinct_permutation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..5f452fd227ba1b7e40fd1676f621073e8d592d0b --- /dev/null +++ b/server/Gym/parameter_controllers/prefix_product_mod_distinct_permutation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PrefixProductMODDistinctPermutation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 8 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/prefix_sum_mod_distinct_permutation/__init__.py b/server/Gym/parameter_controllers/prefix_sum_mod_distinct_permutation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d08e8f8b5fa544a45a55c7b1d6876b5be0e96577 --- /dev/null +++ b/server/Gym/parameter_controllers/prefix_sum_mod_distinct_permutation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PrefixSumMODDistinctPermutation_ParameterController diff --git a/server/Gym/parameter_controllers/prefix_sum_mod_distinct_permutation/parameter_controller.py b/server/Gym/parameter_controllers/prefix_sum_mod_distinct_permutation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e15cea929d5bdf510f3f34c837cbf8b80224fe18 --- /dev/null +++ b/server/Gym/parameter_controllers/prefix_sum_mod_distinct_permutation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PrefixSumMODDistinctPermutation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 8 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/prefixuffix/__init__.py b/server/Gym/parameter_controllers/prefixuffix/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..de711595f610797c8b4d49cbff1416a37795e3dd --- /dev/null +++ b/server/Gym/parameter_controllers/prefixuffix/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Prefixuffix_ParameterController diff --git a/server/Gym/parameter_controllers/prefixuffix/parameter_controller.py b/server/Gym/parameter_controllers/prefixuffix/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8abe992f0ff44ad7ce1e79b9542af2388ab03b38 --- /dev/null +++ b/server/Gym/parameter_controllers/prefixuffix/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Prefixuffix_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/preorder_traversal/__init__.py b/server/Gym/parameter_controllers/preorder_traversal/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..98edc089629ed9a5b69809c2feaff681484c6083 --- /dev/null +++ b/server/Gym/parameter_controllers/preorder_traversal/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PreorderTraversal_ParameterController diff --git a/server/Gym/parameter_controllers/preorder_traversal/parameter_controller.py b/server/Gym/parameter_controllers/preorder_traversal/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..771f8217b65773e775dba5c4f32a260840d8cbc7 --- /dev/null +++ b/server/Gym/parameter_controllers/preorder_traversal/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PreorderTraversal_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/prime_graph_minimum_chromatic_number/__init__.py b/server/Gym/parameter_controllers/prime_graph_minimum_chromatic_number/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2bced6169c43dcea3f35544f470c84ff5c8b4863 --- /dev/null +++ b/server/Gym/parameter_controllers/prime_graph_minimum_chromatic_number/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PrimeGraph_MinimumChromaticNumber_ParameterController diff --git a/server/Gym/parameter_controllers/prime_graph_minimum_chromatic_number/parameter_controller.py b/server/Gym/parameter_controllers/prime_graph_minimum_chromatic_number/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..5fbe05fa025f84929b30c871deb290198a6df76d --- /dev/null +++ b/server/Gym/parameter_controllers/prime_graph_minimum_chromatic_number/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PrimeGraph_MinimumChromaticNumber_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 6 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/protecting_flowers/__init__.py b/server/Gym/parameter_controllers/protecting_flowers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ae2d0581fc66fb19ab16c3736e347924b13d7df1 --- /dev/null +++ b/server/Gym/parameter_controllers/protecting_flowers/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ProtectingFlowers_ParameterController diff --git a/server/Gym/parameter_controllers/protecting_flowers/parameter_controller.py b/server/Gym/parameter_controllers/protecting_flowers/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8a35474b5c17f803e5a0b087a01fbe2bbfad9d1e --- /dev/null +++ b/server/Gym/parameter_controllers/protecting_flowers/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class ProtectingFlowers_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/pythagorean_graph_independent_set_counting/__init__.py b/server/Gym/parameter_controllers/pythagorean_graph_independent_set_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..24f34cd4d2b4c247f9111b907dc47a51f42a2dde --- /dev/null +++ b/server/Gym/parameter_controllers/pythagorean_graph_independent_set_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import PythagoreanGraph_IndependentSetCounting_ParameterController diff --git a/server/Gym/parameter_controllers/pythagorean_graph_independent_set_counting/parameter_controller.py b/server/Gym/parameter_controllers/pythagorean_graph_independent_set_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9e2f8f7d493b30ac4d877442bde0567788469cc4 --- /dev/null +++ b/server/Gym/parameter_controllers/pythagorean_graph_independent_set_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class PythagoreanGraph_IndependentSetCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/quad_magic_items/__init__.py b/server/Gym/parameter_controllers/quad_magic_items/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6ea570dc1cde9d81e5b44179463ca69386338e0e --- /dev/null +++ b/server/Gym/parameter_controllers/quad_magic_items/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import QuadMagicItems_ParameterController diff --git a/server/Gym/parameter_controllers/quad_magic_items/parameter_controller.py b/server/Gym/parameter_controllers/quad_magic_items/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..98fac08dbb82e8bd60f4b3e5c72a8ba66bacf925 --- /dev/null +++ b/server/Gym/parameter_controllers/quad_magic_items/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class QuadMagicItems_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 12 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/quadratic_function_segmentation/__init__.py b/server/Gym/parameter_controllers/quadratic_function_segmentation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cceb4efdafe4e90585def4dbd11e261378403a3a --- /dev/null +++ b/server/Gym/parameter_controllers/quadratic_function_segmentation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import QuadraticFunctionSegmentation_ParameterController diff --git a/server/Gym/parameter_controllers/quadratic_function_segmentation/parameter_controller.py b/server/Gym/parameter_controllers/quadratic_function_segmentation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..45bceaaa6f9ca1877cc3060d2a01078dbae1e6e6 --- /dev/null +++ b/server/Gym/parameter_controllers/quadratic_function_segmentation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class QuadraticFunctionSegmentation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/quantum_lock_puzzle/__init__.py b/server/Gym/parameter_controllers/quantum_lock_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c21a212ba703229eb5b7af4b622ba6b33f8978c5 --- /dev/null +++ b/server/Gym/parameter_controllers/quantum_lock_puzzle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import QuantumLockPuzzle_ParameterController diff --git a/server/Gym/parameter_controllers/quantum_lock_puzzle/parameter_controller.py b/server/Gym/parameter_controllers/quantum_lock_puzzle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0a4130752d03aa9629cc842997ee472c85baaf64 --- /dev/null +++ b/server/Gym/parameter_controllers/quantum_lock_puzzle/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class QuantumLockPuzzle_ParameterController(ParameterController) : + def __init__(self, steps_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + self.steps_list = [2, 3, 4, 5, 6, 7, 8, 9, 10] if steps_list is None else steps_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, steps = steps) for steps in self.steps_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/queen_placement/__init__.py b/server/Gym/parameter_controllers/queen_placement/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7fd8a0df50986b60153c8ba23e46097eeac368d1 --- /dev/null +++ b/server/Gym/parameter_controllers/queen_placement/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import QueenPlacement_ParameterController diff --git a/server/Gym/parameter_controllers/queen_placement/parameter_controller.py b/server/Gym/parameter_controllers/queen_placement/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..d6fd3c2cddf6034ae502893fabe6648d01ee1163 --- /dev/null +++ b/server/Gym/parameter_controllers/queen_placement/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class QueenPlacement_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/random_range_max_expectation/__init__.py b/server/Gym/parameter_controllers/random_range_max_expectation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0774e913a9bdbae923ab49586065f7522a68013d --- /dev/null +++ b/server/Gym/parameter_controllers/random_range_max_expectation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import RandomRangeMaxExpectation_ParameterController diff --git a/server/Gym/parameter_controllers/random_range_max_expectation/parameter_controller.py b/server/Gym/parameter_controllers/random_range_max_expectation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..5c8ea6f9beba61fb5f3689f34f47713dc29c829a --- /dev/null +++ b/server/Gym/parameter_controllers/random_range_max_expectation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class RandomRangeMaxExpectation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/range_constrained_increasing_sequence_counting/__init__.py b/server/Gym/parameter_controllers/range_constrained_increasing_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..878bc925a8a2e26dabd2f0ae138f3636bec6af5d --- /dev/null +++ b/server/Gym/parameter_controllers/range_constrained_increasing_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import RangeConstrained_IncreasingSequence_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/range_constrained_increasing_sequence_counting/parameter_controller.py b/server/Gym/parameter_controllers/range_constrained_increasing_sequence_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..3567bc9bfca8a85a0f8ffe49a73b3df896b9bca3 --- /dev/null +++ b/server/Gym/parameter_controllers/range_constrained_increasing_sequence_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class RangeConstrained_IncreasingSequence_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/range_four_sequence_construction/__init__.py b/server/Gym/parameter_controllers/range_four_sequence_construction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8464abdf118a777d620232779f6e0bd9fbc3fc13 --- /dev/null +++ b/server/Gym/parameter_controllers/range_four_sequence_construction/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import RangeFourSequenceConstruction_ParameterController diff --git a/server/Gym/parameter_controllers/range_four_sequence_construction/parameter_controller.py b/server/Gym/parameter_controllers/range_four_sequence_construction/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..203b0b59b2204d1c578ddded09eb409024092db1 --- /dev/null +++ b/server/Gym/parameter_controllers/range_four_sequence_construction/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class RangeFourSequenceConstruction_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/range_shrinking_sequence_counting/__init__.py b/server/Gym/parameter_controllers/range_shrinking_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c1c4968976fa856cd64b64381385952ba8ab489c --- /dev/null +++ b/server/Gym/parameter_controllers/range_shrinking_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import RangeShrinkingSequenceCounting_ParameterController diff --git a/server/Gym/parameter_controllers/range_shrinking_sequence_counting/parameter_controller.py b/server/Gym/parameter_controllers/range_shrinking_sequence_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e992e12aca16372fcfadedd0f1a3ed78f9d688d4 --- /dev/null +++ b/server/Gym/parameter_controllers/range_shrinking_sequence_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class RangeShrinkingSequenceCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/recursive_function/__init__.py b/server/Gym/parameter_controllers/recursive_function/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..34e699160b40640f956caa45d0ba4e411c22d125 --- /dev/null +++ b/server/Gym/parameter_controllers/recursive_function/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import RecursiveFunction_ParameterController diff --git a/server/Gym/parameter_controllers/recursive_function/parameter_controller.py b/server/Gym/parameter_controllers/recursive_function/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..7fd5b2ef06176d08ffebd2788791415a048055ee --- /dev/null +++ b/server/Gym/parameter_controllers/recursive_function/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class RecursiveFunction_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_M_N = 3 + + def update(self) -> None : + self.MAX_M_N = int(self.MAX_M_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_M_N = self.MAX_M_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/recursive_sequence_sum_construction/__init__.py b/server/Gym/parameter_controllers/recursive_sequence_sum_construction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c3b372ce360d10b14792a9bd63654faf1a28e516 --- /dev/null +++ b/server/Gym/parameter_controllers/recursive_sequence_sum_construction/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import RecursiveSequenceSumConstruction_ParameterController diff --git a/server/Gym/parameter_controllers/recursive_sequence_sum_construction/parameter_controller.py b/server/Gym/parameter_controllers/recursive_sequence_sum_construction/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..b3ff6c27d34f8ddada25926d8d42b0f76e179b2b --- /dev/null +++ b/server/Gym/parameter_controllers/recursive_sequence_sum_construction/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class RecursiveSequenceSumConstruction_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + self.MAX_F0 = 128 + self.MAX_A = 16 + self.MAX_B = 16384 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, MAX_F0 = self.MAX_F0, MAX_A = self.MAX_A, MAX_B = self.MAX_B)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/repeat_sequence_lnds/__init__.py b/server/Gym/parameter_controllers/repeat_sequence_lnds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8c237a3ef115305aa4a20f2a9dd12165b7d29f84 --- /dev/null +++ b/server/Gym/parameter_controllers/repeat_sequence_lnds/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import RepeatSequenceLNDS_ParameterController \ No newline at end of file diff --git a/server/Gym/parameter_controllers/repeat_sequence_lnds/parameter_controller.py b/server/Gym/parameter_controllers/repeat_sequence_lnds/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..79920128f37f7e81d2fdab64397a5f068193451d --- /dev/null +++ b/server/Gym/parameter_controllers/repeat_sequence_lnds/parameter_controller.py @@ -0,0 +1,16 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + + +class RepeatSequenceLNDS_ParameterController(ParameterController): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.n = 3 + self.MAX_T = 5 + + def update(self) -> None: + self.n += 1 + self.MAX_T = int(self.MAX_T * 1.5) + + def get_parameter_list(self) -> List[Dict]: + return [dict(n=self.n, MAX_T=self.MAX_T)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/root_extraction/__init__.py b/server/Gym/parameter_controllers/root_extraction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8f4a60311955eafd53fae9537b1c2913543faea3 --- /dev/null +++ b/server/Gym/parameter_controllers/root_extraction/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import RootExtraction_ParameterController diff --git a/server/Gym/parameter_controllers/root_extraction/parameter_controller.py b/server/Gym/parameter_controllers/root_extraction/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c17e0d3d1dd4b97af0c72f1c27beae02b62ec46c --- /dev/null +++ b/server/Gym/parameter_controllers/root_extraction/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class RootExtraction_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 3 + self.MAX_K = 2 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + self.MAX_K += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, MAX_K = self.MAX_K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/round_robin/__init__.py b/server/Gym/parameter_controllers/round_robin/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..54c09246ca52661e9970e5a8e95ee0eff2c80d51 --- /dev/null +++ b/server/Gym/parameter_controllers/round_robin/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import RoundRobin_ParameterController diff --git a/server/Gym/parameter_controllers/round_robin/parameter_controller.py b/server/Gym/parameter_controllers/round_robin/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e78004267127bc2ed8c12785098a173057ca3e01 --- /dev/null +++ b/server/Gym/parameter_controllers/round_robin/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class RoundRobin_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/roundtable_assignment/__init__.py b/server/Gym/parameter_controllers/roundtable_assignment/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..422e993b47e46d4deb6135c48c9cfe658421277a --- /dev/null +++ b/server/Gym/parameter_controllers/roundtable_assignment/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import RoundTableAssignment_ParameterController diff --git a/server/Gym/parameter_controllers/roundtable_assignment/parameter_controller.py b/server/Gym/parameter_controllers/roundtable_assignment/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f6de278531ea04ea7db8b84d9ab73634b6c7e4eb --- /dev/null +++ b/server/Gym/parameter_controllers/roundtable_assignment/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class RoundTableAssignment_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/royal_lock_counting/__init__.py b/server/Gym/parameter_controllers/royal_lock_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4a28ef0811fd760ca378c97749d785aebd5baa75 --- /dev/null +++ b/server/Gym/parameter_controllers/royal_lock_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import RoyalLockCounting_ParameterController diff --git a/server/Gym/parameter_controllers/royal_lock_counting/parameter_controller.py b/server/Gym/parameter_controllers/royal_lock_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..67bbff05d9bfa559962f9461c396f5371ee6aa29 --- /dev/null +++ b/server/Gym/parameter_controllers/royal_lock_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class RoyalLockCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/salad_bar/__init__.py b/server/Gym/parameter_controllers/salad_bar/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0510b2baec7ad7cc5a43bfdc8261051773904fdc --- /dev/null +++ b/server/Gym/parameter_controllers/salad_bar/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SaladBar_ParameterController diff --git a/server/Gym/parameter_controllers/salad_bar/parameter_controller.py b/server/Gym/parameter_controllers/salad_bar/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..004606db7f3a59b9a8a34a2e0e1811bf50e399b0 --- /dev/null +++ b/server/Gym/parameter_controllers/salad_bar/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SaladBar_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/salesman_fatigue/__init__.py b/server/Gym/parameter_controllers/salesman_fatigue/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f316d58e61ff239388ed4bfc9deeb6879c6edb8f --- /dev/null +++ b/server/Gym/parameter_controllers/salesman_fatigue/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SalesmanFatigue_ParameterController diff --git a/server/Gym/parameter_controllers/salesman_fatigue/parameter_controller.py b/server/Gym/parameter_controllers/salesman_fatigue/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..14409b2ce060a389c5b55b2741350157213caf23 --- /dev/null +++ b/server/Gym/parameter_controllers/salesman_fatigue/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SalesmanFatigue_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/same_adjacency_counting/__init__.py b/server/Gym/parameter_controllers/same_adjacency_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f7c3306d7abb3b4929e0db4d629e36ed0f3a7d6a --- /dev/null +++ b/server/Gym/parameter_controllers/same_adjacency_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SameAdjacencyCounting_ParameterController diff --git a/server/Gym/parameter_controllers/same_adjacency_counting/parameter_controller.py b/server/Gym/parameter_controllers/same_adjacency_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..5b7ae312e11844f1f2e9193831af57761c2fa192 --- /dev/null +++ b/server/Gym/parameter_controllers/same_adjacency_counting/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SameAdjacencyCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 7 + self.MAX_M = 2 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5 + 1) + self.MAX_M = int(self.MAX_M * 1.5 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, MAX_M = self.MAX_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sat/__init__.py b/server/Gym/parameter_controllers/sat/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3b3bff750bbca0de6b60d9b386a6af9082a24213 --- /dev/null +++ b/server/Gym/parameter_controllers/sat/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SAT_ParameterController diff --git a/server/Gym/parameter_controllers/sat/parameter_controller.py b/server/Gym/parameter_controllers/sat/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e2641841e945ce097cbe3f1b4c92041d6e589585 --- /dev/null +++ b/server/Gym/parameter_controllers/sat/parameter_controller.py @@ -0,0 +1,21 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class SAT_ParameterController(ParameterController) : + def __init__(self, density_list : Optional[List] = None, M_multiple_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if density_list is None : + density_list = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5] + self.density_list = density_list + + if M_multiple_list is None : + M_multiple_list = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5.0] + self.M_multiple_list = M_multiple_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = int(M_multiple * self.N), density = density) for density in self.density_list for M_multiple in self.M_multiple_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/scc_sequence_counting/__init__.py b/server/Gym/parameter_controllers/scc_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..64b4087d3ab0aaff7a2dcfef303cafb991df03c4 --- /dev/null +++ b/server/Gym/parameter_controllers/scc_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SCC_Sequence_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/scc_sequence_counting/parameter_controller.py b/server/Gym/parameter_controllers/scc_sequence_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..856986a499a419d940d503e60ad4254bced0c186 --- /dev/null +++ b/server/Gym/parameter_controllers/scc_sequence_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SCC_Sequence_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/secret_cow_code/__init__.py b/server/Gym/parameter_controllers/secret_cow_code/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6eea0ee95afecf991cdc8ef93bebb8a210ef4411 --- /dev/null +++ b/server/Gym/parameter_controllers/secret_cow_code/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SecretCowCode_ParameterController diff --git a/server/Gym/parameter_controllers/secret_cow_code/parameter_controller.py b/server/Gym/parameter_controllers/secret_cow_code/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..10a9025fbc3913cbdebea967dd93cb1edd3f1b0d --- /dev/null +++ b/server/Gym/parameter_controllers/secret_cow_code/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SecretCowCode_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_K = 8 + self.MAX_N = 5 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.1 + 1) + self.MAX_K *= 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, MAX_K = self.MAX_K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/segment_min_length_equal_counting/__init__.py b/server/Gym/parameter_controllers/segment_min_length_equal_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..be7751767d16d64dc33c97f79e0c1cb964af292a --- /dev/null +++ b/server/Gym/parameter_controllers/segment_min_length_equal_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SegmentMinLengthEqual_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/segment_min_length_equal_counting/parameter_controller.py b/server/Gym/parameter_controllers/segment_min_length_equal_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..16376438e546e24e253846c2f14e367a59c7ea54 --- /dev/null +++ b/server/Gym/parameter_controllers/segment_min_length_equal_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SegmentMinLengthEqual_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/segment_tree_sorting_counting/__init__.py b/server/Gym/parameter_controllers/segment_tree_sorting_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..026ff784b65f5f7c9ee419db79933482192e2135 --- /dev/null +++ b/server/Gym/parameter_controllers/segment_tree_sorting_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SegmentTreeSortingCounting_ParameterController diff --git a/server/Gym/parameter_controllers/segment_tree_sorting_counting/parameter_controller.py b/server/Gym/parameter_controllers/segment_tree_sorting_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a369c8c316d9243e8effb7917c27ef6a5c14888d --- /dev/null +++ b/server/Gym/parameter_controllers/segment_tree_sorting_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SegmentTreeSortingCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/self_power_sequence_mod/__init__.py b/server/Gym/parameter_controllers/self_power_sequence_mod/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4d642ef12e7baa34e164d97188d6baf0ec15ed23 --- /dev/null +++ b/server/Gym/parameter_controllers/self_power_sequence_mod/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SelfPowerSequenceMOD_ParameterController diff --git a/server/Gym/parameter_controllers/self_power_sequence_mod/parameter_controller.py b/server/Gym/parameter_controllers/self_power_sequence_mod/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..40c3ed5949b1236866d63615b038cf925bea8893 --- /dev/null +++ b/server/Gym/parameter_controllers/self_power_sequence_mod/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SelfPowerSequenceMOD_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_MOD = 16 + + def update(self) -> None : + self.MAX_MOD = int(self.MAX_MOD * 2) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_MOD = self.MAX_MOD)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/set_cover/__init__.py b/server/Gym/parameter_controllers/set_cover/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c4bd98f1b1703299fdee695ffa4f568993de2a5a --- /dev/null +++ b/server/Gym/parameter_controllers/set_cover/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SetCover_ParameterController diff --git a/server/Gym/parameter_controllers/set_cover/parameter_controller.py b/server/Gym/parameter_controllers/set_cover/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..96507704fbe592f198bd9244aad3a456a873bfa0 --- /dev/null +++ b/server/Gym/parameter_controllers/set_cover/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SetCover_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/set_splitting/__init__.py b/server/Gym/parameter_controllers/set_splitting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2b5b102b548a85dca8a24a889f0947eaf0abdfff --- /dev/null +++ b/server/Gym/parameter_controllers/set_splitting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SetSplitting_ParameterController diff --git a/server/Gym/parameter_controllers/set_splitting/parameter_controller.py b/server/Gym/parameter_controllers/set_splitting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..07aebc14dbabfe25945edeff82d0732f3ad0657f --- /dev/null +++ b/server/Gym/parameter_controllers/set_splitting/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class SetSplitting_ParameterController(ParameterController) : + def __init__(self, M_multiple_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if M_multiple_list is None : + M_multiple_list = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0] + self.M_multiple_list = M_multiple_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = int(M_multiple * self.N)) for M_multiple in self.M_multiple_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/shared_substring_counting/__init__.py b/server/Gym/parameter_controllers/shared_substring_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5a9eb1e6c71d672717c0acf121d788450af7a7db --- /dev/null +++ b/server/Gym/parameter_controllers/shared_substring_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SharedSubstringCounting_ParameterController diff --git a/server/Gym/parameter_controllers/shared_substring_counting/parameter_controller.py b/server/Gym/parameter_controllers/shared_substring_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c2c95d115caaa76372d220abe808a31f0e355664 --- /dev/null +++ b/server/Gym/parameter_controllers/shared_substring_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SharedSubstringCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_LEN = 10 + + def update(self) -> None : + self.MAX_LEN = int(self.MAX_LEN * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_LEN = self.MAX_LEN)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/shortest_path/__init__.py b/server/Gym/parameter_controllers/shortest_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b9347610263cd6f8a2786ec94ded1b551b5b488a --- /dev/null +++ b/server/Gym/parameter_controllers/shortest_path/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ShortestPath_ParameterController diff --git a/server/Gym/parameter_controllers/shortest_path/parameter_controller.py b/server/Gym/parameter_controllers/shortest_path/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..87d5570576c44e34fe110d8591fd3b4b0240d220 --- /dev/null +++ b/server/Gym/parameter_controllers/shortest_path/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class ShortestPath_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/shortest_path_count_construction/__init__.py b/server/Gym/parameter_controllers/shortest_path_count_construction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6fea0d8426763b682dedbb3dafb658052c9bdfec --- /dev/null +++ b/server/Gym/parameter_controllers/shortest_path_count_construction/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ShortestPathCountConstruction_ParameterController diff --git a/server/Gym/parameter_controllers/shortest_path_count_construction/parameter_controller.py b/server/Gym/parameter_controllers/shortest_path_count_construction/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..3c6bd03e21142277227332baf096384678d30476 --- /dev/null +++ b/server/Gym/parameter_controllers/shortest_path_count_construction/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class ShortestPathCountConstruction_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_K = 12 + + def update(self) -> None : + self.MAX_K = int(self.MAX_K * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_K = self.MAX_K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/shortest_unicolor_substring/__init__.py b/server/Gym/parameter_controllers/shortest_unicolor_substring/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0c366209474d83f7730f8bbf0be29bb09620f213 --- /dev/null +++ b/server/Gym/parameter_controllers/shortest_unicolor_substring/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ShortestUnicolorSubstring_ParameterController diff --git a/server/Gym/parameter_controllers/shortest_unicolor_substring/parameter_controller.py b/server/Gym/parameter_controllers/shortest_unicolor_substring/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a860739ca424f6487bc41cff3a798c7b402d9dad --- /dev/null +++ b/server/Gym/parameter_controllers/shortest_unicolor_substring/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class ShortestUnicolorSubstring_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/singing_girl_story/__init__.py b/server/Gym/parameter_controllers/singing_girl_story/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8697437e9855a0dffa4263523841ae081a80fa59 --- /dev/null +++ b/server/Gym/parameter_controllers/singing_girl_story/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SingingGirlStory_ParameterController diff --git a/server/Gym/parameter_controllers/singing_girl_story/parameter_controller.py b/server/Gym/parameter_controllers/singing_girl_story/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ca7f814c4944aceb35373d0fa9b34e2db4fb8e20 --- /dev/null +++ b/server/Gym/parameter_controllers/singing_girl_story/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SingingGirlStory_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/single_stack_sorting/__init__.py b/server/Gym/parameter_controllers/single_stack_sorting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..91d7ab11c59dc559af3a800ad248c8b222cefd93 --- /dev/null +++ b/server/Gym/parameter_controllers/single_stack_sorting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SingleStackSorting_ParameterController diff --git a/server/Gym/parameter_controllers/single_stack_sorting/parameter_controller.py b/server/Gym/parameter_controllers/single_stack_sorting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ee6c7cd15bd28dd73fb19f4410ee977a199c53c3 --- /dev/null +++ b/server/Gym/parameter_controllers/single_stack_sorting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SingleStackSorting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/ska_rock_garden/__init__.py b/server/Gym/parameter_controllers/ska_rock_garden/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1d517ad40ed8a75119777ca5686c42f70149d835 --- /dev/null +++ b/server/Gym/parameter_controllers/ska_rock_garden/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SkaRockGarden_ParameterController diff --git a/server/Gym/parameter_controllers/ska_rock_garden/parameter_controller.py b/server/Gym/parameter_controllers/ska_rock_garden/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0641e8ea9f4c7639daae241db07974d2705965d0 --- /dev/null +++ b/server/Gym/parameter_controllers/ska_rock_garden/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SkaRockGarden_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/skyscraper_puzzle/__init__.py b/server/Gym/parameter_controllers/skyscraper_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0495bede2dc86b6682d0a6997f2a00c5b471f575 --- /dev/null +++ b/server/Gym/parameter_controllers/skyscraper_puzzle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SkyscraperPuzzle_ParameterController diff --git a/server/Gym/parameter_controllers/skyscraper_puzzle/parameter_controller.py b/server/Gym/parameter_controllers/skyscraper_puzzle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8059a1579aaa6238a26d02dad190c394eb5b7037 --- /dev/null +++ b/server/Gym/parameter_controllers/skyscraper_puzzle/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SkyscraperPuzzle_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/skyscraper_sum_puzzle/__init__.py b/server/Gym/parameter_controllers/skyscraper_sum_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..225b5f6f076013089d93a8e77b3ee03984afe949 --- /dev/null +++ b/server/Gym/parameter_controllers/skyscraper_sum_puzzle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SkyscraperSumPuzzle_ParameterController diff --git a/server/Gym/parameter_controllers/skyscraper_sum_puzzle/parameter_controller.py b/server/Gym/parameter_controllers/skyscraper_sum_puzzle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..3c60397fe6770261f3a661d609d9da24371a4c45 --- /dev/null +++ b/server/Gym/parameter_controllers/skyscraper_sum_puzzle/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SkyscraperSumPuzzle_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sliding_window/__init__.py b/server/Gym/parameter_controllers/sliding_window/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..51456edde56bbd03a58a0afad04d4e4bd6afcd23 --- /dev/null +++ b/server/Gym/parameter_controllers/sliding_window/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SlidingWindow_ParameterController diff --git a/server/Gym/parameter_controllers/sliding_window/parameter_controller.py b/server/Gym/parameter_controllers/sliding_window/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f83cb49a6c91e94d0c5e6dfc53368736912a3d89 --- /dev/null +++ b/server/Gym/parameter_controllers/sliding_window/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SlidingWindow_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/slo_elephants/__init__.py b/server/Gym/parameter_controllers/slo_elephants/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5185a865389d788aa9b84be14fb2396174068047 --- /dev/null +++ b/server/Gym/parameter_controllers/slo_elephants/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SLOElephants_ParameterController diff --git a/server/Gym/parameter_controllers/slo_elephants/parameter_controller.py b/server/Gym/parameter_controllers/slo_elephants/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..7eeff7c6621d3a8cd82ed1a9a2eaeb9bc2a4935e --- /dev/null +++ b/server/Gym/parameter_controllers/slo_elephants/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SLOElephants_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/smallest_binary_multiple/__init__.py b/server/Gym/parameter_controllers/smallest_binary_multiple/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ecf677e612bfa6f138245889814fc05452d0f6b9 --- /dev/null +++ b/server/Gym/parameter_controllers/smallest_binary_multiple/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SmallestBinaryMultiple_ParameterController diff --git a/server/Gym/parameter_controllers/smallest_binary_multiple/parameter_controller.py b/server/Gym/parameter_controllers/smallest_binary_multiple/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..1085feb321fc2ed9f44c29778a0e749dc362ab4b --- /dev/null +++ b/server/Gym/parameter_controllers/smallest_binary_multiple/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SmallestBinaryMultiple_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_A = 10 + + def update(self) -> None : + self.MAX_A = int(self.MAX_A * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_A = self.MAX_A)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/smallest_circle/__init__.py b/server/Gym/parameter_controllers/smallest_circle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e58648e2185c0e751f8210da94f187e1aa93c629 --- /dev/null +++ b/server/Gym/parameter_controllers/smallest_circle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SmallestCircle_ParameterController diff --git a/server/Gym/parameter_controllers/smallest_circle/parameter_controller.py b/server/Gym/parameter_controllers/smallest_circle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..4b0d7c9f2d09204a7db9c6b7adc545eab7a142c5 --- /dev/null +++ b/server/Gym/parameter_controllers/smallest_circle/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SmallestCircle_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sorting/__init__.py b/server/Gym/parameter_controllers/sorting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..db960345796f94029259b1117c1a0af0d127bcd6 --- /dev/null +++ b/server/Gym/parameter_controllers/sorting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Sorting_ParameterController diff --git a/server/Gym/parameter_controllers/sorting/parameter_controller.py b/server/Gym/parameter_controllers/sorting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a359996609db5fd691574540ce83012b487d7834 --- /dev/null +++ b/server/Gym/parameter_controllers/sorting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Sorting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/spiral_matrix/__init__.py b/server/Gym/parameter_controllers/spiral_matrix/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..670a94bca1fae9655bec4946ea0b285469ba469e --- /dev/null +++ b/server/Gym/parameter_controllers/spiral_matrix/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SpiralMatrix_ParameterController diff --git a/server/Gym/parameter_controllers/spiral_matrix/parameter_controller.py b/server/Gym/parameter_controllers/spiral_matrix/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..197f6231a0928173abf3c6bd88c2d5f0bcb349e6 --- /dev/null +++ b/server/Gym/parameter_controllers/spiral_matrix/parameter_controller.py @@ -0,0 +1,14 @@ +from ...parameter_controller import ParameterController +from typing import Dict, List + + +class SpiralMatrix_ParameterController(ParameterController): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.MAX_M_N = 3 + + def update(self) -> None: + self.MAX_M_N = int(self.MAX_M_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict]: + return [dict(MAX_M_N = self.MAX_M_N)] diff --git a/server/Gym/parameter_controllers/splitting_game/__init__.py b/server/Gym/parameter_controllers/splitting_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c70af6fa0b251eb46a4021208114a14dbbbb59ee --- /dev/null +++ b/server/Gym/parameter_controllers/splitting_game/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SplittingGame_ParameterController diff --git a/server/Gym/parameter_controllers/splitting_game/parameter_controller.py b/server/Gym/parameter_controllers/splitting_game/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ea7ac0ba093efa7116aa446d417e906f6b9bc747 --- /dev/null +++ b/server/Gym/parameter_controllers/splitting_game/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SplittingGame_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/spy_network/__init__.py b/server/Gym/parameter_controllers/spy_network/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0004d153096d4b82e3877720a5483b7f8d0394f1 --- /dev/null +++ b/server/Gym/parameter_controllers/spy_network/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SpyNetwork_ParameterController diff --git a/server/Gym/parameter_controllers/spy_network/parameter_controller.py b/server/Gym/parameter_controllers/spy_network/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f44fde71262853cfa80f65cd32dfd6693ff7b768 --- /dev/null +++ b/server/Gym/parameter_controllers/spy_network/parameter_controller.py @@ -0,0 +1,21 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class SpyNetwork_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, dominated_probability_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.01, 0.02, 0.03, 0.05, 0.1, 0.15] + self.edge_density_list = edge_density_list + + if dominated_probability_list is None : + dominated_probability_list = [0.3, 0.4, 0.5, 0.6, 0.7] + self.dominated_probability_list = dominated_probability_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density, dominated_probability = dominated_probability) for edge_density in self.edge_density_list for dominated_probability in self.dominated_probability_list if int(edge_density * self.N * (self.N - 1)) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/squ_squarks/__init__.py b/server/Gym/parameter_controllers/squ_squarks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..02ff9844512c418d010b3152a7ae245041db272e --- /dev/null +++ b/server/Gym/parameter_controllers/squ_squarks/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SquSquarks_ParameterController diff --git a/server/Gym/parameter_controllers/squ_squarks/parameter_controller.py b/server/Gym/parameter_controllers/squ_squarks/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..d9d6d2c31828d9877cc07cb31039115f0cc102d3 --- /dev/null +++ b/server/Gym/parameter_controllers/squ_squarks/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SquSquarks_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/square_undamaged_point_counting/__init__.py b/server/Gym/parameter_controllers/square_undamaged_point_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..600fbf979b7ca26b9cbdf76ac2c85e7d5d25b3e4 --- /dev/null +++ b/server/Gym/parameter_controllers/square_undamaged_point_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SquareUndamagedPointCounting_ParameterController diff --git a/server/Gym/parameter_controllers/square_undamaged_point_counting/parameter_controller.py b/server/Gym/parameter_controllers/square_undamaged_point_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..96072f0d677086f5c07344c60b04da6841997958 --- /dev/null +++ b/server/Gym/parameter_controllers/square_undamaged_point_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SquareUndamagedPointCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 2 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/star_battle/__init__.py b/server/Gym/parameter_controllers/star_battle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dae1393babca30744a53e1e3ce572682bd944400 --- /dev/null +++ b/server/Gym/parameter_controllers/star_battle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import StarBattle_ParameterController diff --git a/server/Gym/parameter_controllers/star_battle/parameter_controller.py b/server/Gym/parameter_controllers/star_battle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..63904969b6f1b7059c93596ce18f2d803f64f44b --- /dev/null +++ b/server/Gym/parameter_controllers/star_battle/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class StarBattle_ParameterController(ParameterController) : + def __init__(self, sparsity_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 4 + + if sparsity_list is None : + sparsity_list = [0.1, 0.3, 0.5, 0.6, 0.7, 0.8, 0.9] + self.sparsity_list = sparsity_list + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M, sparsity = sparsity) for sparsity in self.sparsity_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/stirling_second/__init__.py b/server/Gym/parameter_controllers/stirling_second/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..550c02575c5d9fc5bd1818c3cd9002d180b9ece8 --- /dev/null +++ b/server/Gym/parameter_controllers/stirling_second/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import StirlingSecond_ParameterController diff --git a/server/Gym/parameter_controllers/stirling_second/parameter_controller.py b/server/Gym/parameter_controllers/stirling_second/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..cbf0d9477b3813284c9684676d1741acbcf83ebb --- /dev/null +++ b/server/Gym/parameter_controllers/stirling_second/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class StirlingSecond_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 7 + self.MAX_R = 2 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.1 + 1) + self.MAX_R = int(self.MAX_R * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, MAX_R = self.MAX_R)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/stone_game/__init__.py b/server/Gym/parameter_controllers/stone_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ca047b8b1e2e4de8b52ab919f4141f2f29f80433 --- /dev/null +++ b/server/Gym/parameter_controllers/stone_game/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import StoneGame_ParameterController diff --git a/server/Gym/parameter_controllers/stone_game/parameter_controller.py b/server/Gym/parameter_controllers/stone_game/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ece240beea80951b4095f5d25eb3f5f233818bbf --- /dev/null +++ b/server/Gym/parameter_controllers/stone_game/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class StoneGame_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_SUM = 5 + + def update(self) -> None : + self.MAX_SUM = int(self.MAX_SUM * 1.2 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_SUM = self.MAX_SUM)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/stone_intervals_game/__init__.py b/server/Gym/parameter_controllers/stone_intervals_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d3d26b8ad333bfc3ae88f44f136077f17fe046ad --- /dev/null +++ b/server/Gym/parameter_controllers/stone_intervals_game/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import StoneIntervalsGame_ParameterController diff --git a/server/Gym/parameter_controllers/stone_intervals_game/parameter_controller.py b/server/Gym/parameter_controllers/stone_intervals_game/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..a519ba4848d2870d0aac4183b27066ae5b2c8d79 --- /dev/null +++ b/server/Gym/parameter_controllers/stone_intervals_game/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class StoneIntervalsGame_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/string_partition_shuffle/__init__.py b/server/Gym/parameter_controllers/string_partition_shuffle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ecfb7d5400b941786fbbd168fced5af6c122bc2e --- /dev/null +++ b/server/Gym/parameter_controllers/string_partition_shuffle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import StringPartitionShuffle_ParameterController diff --git a/server/Gym/parameter_controllers/string_partition_shuffle/parameter_controller.py b/server/Gym/parameter_controllers/string_partition_shuffle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..5e206a6c2718d6038ceb1e40d81e1ee68729687a --- /dev/null +++ b/server/Gym/parameter_controllers/string_partition_shuffle/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class StringPartitionShuffle_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/string_reversal_construction/__init__.py b/server/Gym/parameter_controllers/string_reversal_construction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f0e29288328d5de1187a5fd29469236f579772e9 --- /dev/null +++ b/server/Gym/parameter_controllers/string_reversal_construction/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import StringReversalConstruction_ParameterController \ No newline at end of file diff --git a/server/Gym/parameter_controllers/string_reversal_construction/parameter_controller.py b/server/Gym/parameter_controllers/string_reversal_construction/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..528bd97103464b163e657490d8d2ff93c0a505ca --- /dev/null +++ b/server/Gym/parameter_controllers/string_reversal_construction/parameter_controller.py @@ -0,0 +1,15 @@ +import random +from typing import Dict, List +from ...parameter_controller import ParameterController + + +class StringReversalConstruction_ParameterController(ParameterController): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.n = 3 + + def update(self) -> None: + self.n += 1 + + def get_parameter_list(self) -> List[Dict]: + return [dict(n=self.n)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/stu_well/__init__.py b/server/Gym/parameter_controllers/stu_well/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a8ffb64904f343346fbe2c66dafd18773b1115ce --- /dev/null +++ b/server/Gym/parameter_controllers/stu_well/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import STUWell_ParameterController diff --git a/server/Gym/parameter_controllers/stu_well/parameter_controller.py b/server/Gym/parameter_controllers/stu_well/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..abc8d2dec0674c4df0c6a14e1f66c63b7eaddef4 --- /dev/null +++ b/server/Gym/parameter_controllers/stu_well/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class STUWell_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/stunt_flying/__init__.py b/server/Gym/parameter_controllers/stunt_flying/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..37357c1a3ec9bf372c8fd7f294b2e5ce8e3bef6d --- /dev/null +++ b/server/Gym/parameter_controllers/stunt_flying/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import StuntFlying_ParameterController diff --git a/server/Gym/parameter_controllers/stunt_flying/parameter_controller.py b/server/Gym/parameter_controllers/stunt_flying/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..c98a1973c4655d614f419109c4606425abdb8465 --- /dev/null +++ b/server/Gym/parameter_controllers/stunt_flying/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class StuntFlying_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/subarray_sum_xor/__init__.py b/server/Gym/parameter_controllers/subarray_sum_xor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1594daf9494cf6599c9ec8633c2d9d38f2c8aaf9 --- /dev/null +++ b/server/Gym/parameter_controllers/subarray_sum_xor/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SubarraySumXor_ParameterController diff --git a/server/Gym/parameter_controllers/subarray_sum_xor/parameter_controller.py b/server/Gym/parameter_controllers/subarray_sum_xor/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..060fef647d77be894c5d0341b6a884ffe78a9822 --- /dev/null +++ b/server/Gym/parameter_controllers/subarray_sum_xor/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SubarraySumXor_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/subarray_xor_sum/__init__.py b/server/Gym/parameter_controllers/subarray_xor_sum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d5a5c4999f10e42c7cd3421cbb203a9320fe8c84 --- /dev/null +++ b/server/Gym/parameter_controllers/subarray_xor_sum/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SubarrayXorSum_ParameterController diff --git a/server/Gym/parameter_controllers/subarray_xor_sum/parameter_controller.py b/server/Gym/parameter_controllers/subarray_xor_sum/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..b536ef61776489f4af62b9e13617190092a5e529 --- /dev/null +++ b/server/Gym/parameter_controllers/subarray_xor_sum/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SubarrayXorSum_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/subgraph_isomorphism/__init__.py b/server/Gym/parameter_controllers/subgraph_isomorphism/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cc01333b7288de693389c32cefc4f3735d1ace31 --- /dev/null +++ b/server/Gym/parameter_controllers/subgraph_isomorphism/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SubgraphIsomorphism_ParameterController diff --git a/server/Gym/parameter_controllers/subgraph_isomorphism/parameter_controller.py b/server/Gym/parameter_controllers/subgraph_isomorphism/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..097971d0133054971423216b5bd6d9fa04ee8341 --- /dev/null +++ b/server/Gym/parameter_controllers/subgraph_isomorphism/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class SubgraphIsomorphism_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if edge_density_list is None : + edge_density_list = [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N2 = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) > 0] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/submatrix_sum_divisible_counting/__init__.py b/server/Gym/parameter_controllers/submatrix_sum_divisible_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7a9c9cbff501e0bbd9c7acd6739e41b100cf43b6 --- /dev/null +++ b/server/Gym/parameter_controllers/submatrix_sum_divisible_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SubmatrixSumDivisibleCounting_ParameterController diff --git a/server/Gym/parameter_controllers/submatrix_sum_divisible_counting/parameter_controller.py b/server/Gym/parameter_controllers/submatrix_sum_divisible_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..3b882e4a7bb697db77be011a67c751c7101deb15 --- /dev/null +++ b/server/Gym/parameter_controllers/submatrix_sum_divisible_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SubmatrixSumDivisibleCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/subsequence_reversal_lnds/__init__.py b/server/Gym/parameter_controllers/subsequence_reversal_lnds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ba078d458812ccec1ad7a1d84369c85a750b6d69 --- /dev/null +++ b/server/Gym/parameter_controllers/subsequence_reversal_lnds/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SubsequenceReversalLNDS_ParameterController diff --git a/server/Gym/parameter_controllers/subsequence_reversal_lnds/parameter_controller.py b/server/Gym/parameter_controllers/subsequence_reversal_lnds/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..957fcf8ffdfd731859667c942ca701fa89e71d4b --- /dev/null +++ b/server/Gym/parameter_controllers/subsequence_reversal_lnds/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SubsequenceReversalLNDS_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/subset_sum/__init__.py b/server/Gym/parameter_controllers/subset_sum/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..87dc79a7a556b00e10b9df62a63d3403b98c1c26 --- /dev/null +++ b/server/Gym/parameter_controllers/subset_sum/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SubsetSum_ParameterController diff --git a/server/Gym/parameter_controllers/subset_sum/parameter_controller.py b/server/Gym/parameter_controllers/subset_sum/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..540d11b1ac70214ea354d697d94242b1a01b8a78 --- /dev/null +++ b/server/Gym/parameter_controllers/subset_sum/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SubsetSum_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/subset_sum_sequence/__init__.py b/server/Gym/parameter_controllers/subset_sum_sequence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..59fe5b4ce5f3c4733d6450da87913a1b5ec96ab3 --- /dev/null +++ b/server/Gym/parameter_controllers/subset_sum_sequence/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SubsetSumSequence_ParameterController diff --git a/server/Gym/parameter_controllers/subset_sum_sequence/parameter_controller.py b/server/Gym/parameter_controllers/subset_sum_sequence/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8dc6a6961197b00ca36a16166482336639b2294a --- /dev/null +++ b/server/Gym/parameter_controllers/subset_sum_sequence/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SubsetSumSequence_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 3 + self.MAX_K = 2 + + def update(self) -> None : + self.MAX_N *= 2 + self.MAX_K += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, MAX_K = min(self.MAX_N, self.MAX_K))] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sudoku/__init__.py b/server/Gym/parameter_controllers/sudoku/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3f853719152e04965c93a7a70842f043b823e67f --- /dev/null +++ b/server/Gym/parameter_controllers/sudoku/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Sudoku_ParameterController diff --git a/server/Gym/parameter_controllers/sudoku/parameter_controller.py b/server/Gym/parameter_controllers/sudoku/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..696891f3c1c3a742367a6bf52e1ee70f726b5c94 --- /dev/null +++ b/server/Gym/parameter_controllers/sudoku/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Sudoku_ParameterController(ParameterController) : + def __init__(self, sparsity_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 2 + + if sparsity_list is None : + sparsity_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + self.sparsity_list = sparsity_list + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M, sparsity = sparsity) for sparsity in self.sparsity_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sum_divisor_num/__init__.py b/server/Gym/parameter_controllers/sum_divisor_num/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c035d91fe10fe18d9a70c571cf751fedc56d4dcb --- /dev/null +++ b/server/Gym/parameter_controllers/sum_divisor_num/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Sum_DivisorNum_ParameterController diff --git a/server/Gym/parameter_controllers/sum_divisor_num/parameter_controller.py b/server/Gym/parameter_controllers/sum_divisor_num/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..876c3f9a0267236a3a69909fb5930b5e7f8629f7 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_divisor_num/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Sum_DivisorNum_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_R = 10 + + def update(self) -> None : + self.MAX_R = int(self.MAX_R * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_R = self.MAX_R)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sum_gcd/__init__.py b/server/Gym/parameter_controllers/sum_gcd/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..487ec4be6a3bea5730ee3942acdea9e3120c28b9 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_gcd/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SumGCD_ParameterController diff --git a/server/Gym/parameter_controllers/sum_gcd/parameter_controller.py b/server/Gym/parameter_controllers/sum_gcd/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e2b0e5c0de879194d82cf4b3b3ab6a81a03468c9 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_gcd/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SumGCD_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 5 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sum_gcd_with_individual/__init__.py b/server/Gym/parameter_controllers/sum_gcd_with_individual/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2e1fe4ba0f6a98dd1ef5a70767f7944433c6a4b2 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_gcd_with_individual/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SumGCDWithIndividual_ParameterController diff --git a/server/Gym/parameter_controllers/sum_gcd_with_individual/parameter_controller.py b/server/Gym/parameter_controllers/sum_gcd_with_individual/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..fd4aa563160df6e49ec3907c9f4be2eeba5d7beb --- /dev/null +++ b/server/Gym/parameter_controllers/sum_gcd_with_individual/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SumGCDWithIndividual_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 32 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sum_lcm/__init__.py b/server/Gym/parameter_controllers/sum_lcm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..38095c42fa5a8fa620e4660bfb1918928354f6de --- /dev/null +++ b/server/Gym/parameter_controllers/sum_lcm/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SumLCM_ParameterController diff --git a/server/Gym/parameter_controllers/sum_lcm/parameter_controller.py b/server/Gym/parameter_controllers/sum_lcm/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..5e2bcb9c9c65815cbc7c890fdfb9867865b3f773 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_lcm/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SumLCM_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 5 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sum_manhattan_curved_surface/__init__.py b/server/Gym/parameter_controllers/sum_manhattan_curved_surface/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..58554085ae712d2b1a10948ebe29799f6fe455a2 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_manhattan_curved_surface/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SumManhattan_CurvedSurface_ParameterController diff --git a/server/Gym/parameter_controllers/sum_manhattan_curved_surface/parameter_controller.py b/server/Gym/parameter_controllers/sum_manhattan_curved_surface/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..86f816ab3e8b692b701fa6283717059b2dbc4db1 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_manhattan_curved_surface/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SumManhattan_CurvedSurface_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_A_B = 5 + + def update(self) -> None : + self.MAX_A_B = int(self.MAX_A_B * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_A_B = self.MAX_A_B)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sum_mod/__init__.py b/server/Gym/parameter_controllers/sum_mod/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..68d32dc34ea134ff015a09e7a0b9ed9463a04002 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_mod/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SumMOD_ParameterController diff --git a/server/Gym/parameter_controllers/sum_mod/parameter_controller.py b/server/Gym/parameter_controllers/sum_mod/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..9d8b445aa84de8332303dacdb02b9d3012ba73d8 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_mod/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SumMOD_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 5 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sum_phi_interval/__init__.py b/server/Gym/parameter_controllers/sum_phi_interval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4467e70d3d08c3f9e77b31d2ae52704be8b2fcc8 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_phi_interval/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SumPHIInterval_ParameterController diff --git a/server/Gym/parameter_controllers/sum_phi_interval/parameter_controller.py b/server/Gym/parameter_controllers/sum_phi_interval/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..4b93a2c7a5bafe3345e5157b23d8c63ad42da59e --- /dev/null +++ b/server/Gym/parameter_controllers/sum_phi_interval/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SumPHIInterval_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_DELTA = 5 + + def update(self) -> None : + self.MAX_DELTA = int(self.MAX_DELTA * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_DELTA = self.MAX_DELTA)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sum_product_divisor_num/__init__.py b/server/Gym/parameter_controllers/sum_product_divisor_num/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8f5a6d19e204fd4e5d082e9542cb52b48cb5abfb --- /dev/null +++ b/server/Gym/parameter_controllers/sum_product_divisor_num/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SumProductDivisorNum_ParameterController diff --git a/server/Gym/parameter_controllers/sum_product_divisor_num/parameter_controller.py b/server/Gym/parameter_controllers/sum_product_divisor_num/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..55d1219c54f0e2ad9b54f111c8dceea2c473e746 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_product_divisor_num/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SumProductDivisorNum_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 5 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sum_pseudo_euclidean/__init__.py b/server/Gym/parameter_controllers/sum_pseudo_euclidean/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a96e5fe5e3df63c0ea63217a758efdc5abc6871f --- /dev/null +++ b/server/Gym/parameter_controllers/sum_pseudo_euclidean/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SumPseudoEuclidean_ParameterController diff --git a/server/Gym/parameter_controllers/sum_pseudo_euclidean/parameter_controller.py b/server/Gym/parameter_controllers/sum_pseudo_euclidean/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e1bfbb4167329e117ee0edec3074c503a03195bc --- /dev/null +++ b/server/Gym/parameter_controllers/sum_pseudo_euclidean/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SumPseudoEuclidean_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 5 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sum_set_multiplication/__init__.py b/server/Gym/parameter_controllers/sum_set_multiplication/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..72d467924e53bd0ff5f069744f1ea149e1d88ced --- /dev/null +++ b/server/Gym/parameter_controllers/sum_set_multiplication/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SumSetMultiplication_ParameterController diff --git a/server/Gym/parameter_controllers/sum_set_multiplication/parameter_controller.py b/server/Gym/parameter_controllers/sum_set_multiplication/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f68b106033017c5d4ed4e2770f00167fd6dffa93 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_set_multiplication/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SumSetMultiplication_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 3 + self.MAX_K = 8 + + def update(self) -> None : + self.MAX_N += 1 + self.MAX_K = int(self.MAX_K * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, MAX_K = self.MAX_K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sum_spanning_tree_gcd/__init__.py b/server/Gym/parameter_controllers/sum_spanning_tree_gcd/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..422c305df30573e546e829693fbccf28b79ff7c5 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_spanning_tree_gcd/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SumSpanningTreeGCD_ParameterController diff --git a/server/Gym/parameter_controllers/sum_spanning_tree_gcd/parameter_controller.py b/server/Gym/parameter_controllers/sum_spanning_tree_gcd/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..572ec96c414fca3d937fb2df074eede3318a6857 --- /dev/null +++ b/server/Gym/parameter_controllers/sum_spanning_tree_gcd/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class SumSpanningTreeGCD_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.9] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) >= self.N - 1] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sum_triangle_area/__init__.py b/server/Gym/parameter_controllers/sum_triangle_area/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3342716cc9814bf978069082d9bcdfa47c15e73a --- /dev/null +++ b/server/Gym/parameter_controllers/sum_triangle_area/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SumTriangleArea_ParameterController diff --git a/server/Gym/parameter_controllers/sum_triangle_area/parameter_controller.py b/server/Gym/parameter_controllers/sum_triangle_area/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e4993d92e4b2b61e08d59383ee8b59b71a1a736a --- /dev/null +++ b/server/Gym/parameter_controllers/sum_triangle_area/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SumTriangleArea_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/sum_xor_divisor_num/__init__.py b/server/Gym/parameter_controllers/sum_xor_divisor_num/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fdc54af25def4f51db7057e238d0b95cc9488a8e --- /dev/null +++ b/server/Gym/parameter_controllers/sum_xor_divisor_num/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SumXorDivisorNum_ParameterController diff --git a/server/Gym/parameter_controllers/sum_xor_divisor_num/parameter_controller.py b/server/Gym/parameter_controllers/sum_xor_divisor_num/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..713cbfdac8428d1e1de76a1d7e0f1149ec325d3a --- /dev/null +++ b/server/Gym/parameter_controllers/sum_xor_divisor_num/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class SumXorDivisorNum_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 5 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.5) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/survo_puzzle/__init__.py b/server/Gym/parameter_controllers/survo_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4688c45997d75f77d4bf5614d401ea1d017d55dd --- /dev/null +++ b/server/Gym/parameter_controllers/survo_puzzle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import SurvoPuzzle_ParameterController diff --git a/server/Gym/parameter_controllers/survo_puzzle/parameter_controller.py b/server/Gym/parameter_controllers/survo_puzzle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..eae445bd52e74e4126e1887042e117f2ea4a6520 --- /dev/null +++ b/server/Gym/parameter_controllers/survo_puzzle/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class SurvoPuzzle_ParameterController(ParameterController) : + def __init__(self, sparsity_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + if sparsity_list is None : + sparsity_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + self.sparsity_list = sparsity_list + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M, sparsity = sparsity) for sparsity in self.sparsity_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/taking_prime_game/__init__.py b/server/Gym/parameter_controllers/taking_prime_game/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4a77961639b8ad710b2e6bb65cceccbe856dba6d --- /dev/null +++ b/server/Gym/parameter_controllers/taking_prime_game/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TakingPrimeGame_ParameterController diff --git a/server/Gym/parameter_controllers/taking_prime_game/parameter_controller.py b/server/Gym/parameter_controllers/taking_prime_game/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..df6165975b6fa487e112c671e7b7e6d19337afe3 --- /dev/null +++ b/server/Gym/parameter_controllers/taking_prime_game/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TakingPrimeGame_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 8 + + def update(self) -> None : + self.MAX_N = int(self.MAX_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/task_arrangement/__init__.py b/server/Gym/parameter_controllers/task_arrangement/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a50e5447487bfe79aa3e223de3924063d775cf92 --- /dev/null +++ b/server/Gym/parameter_controllers/task_arrangement/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TaskArrangement_ParameterController diff --git a/server/Gym/parameter_controllers/task_arrangement/parameter_controller.py b/server/Gym/parameter_controllers/task_arrangement/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..b028e26333652ae912af4327acc98da348364180 --- /dev/null +++ b/server/Gym/parameter_controllers/task_arrangement/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TaskArrangement_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/tetris_attack/__init__.py b/server/Gym/parameter_controllers/tetris_attack/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..592e5222a68f71919e88463a1fe7a2d12602656b --- /dev/null +++ b/server/Gym/parameter_controllers/tetris_attack/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TetrisAttack_ParameterController diff --git a/server/Gym/parameter_controllers/tetris_attack/parameter_controller.py b/server/Gym/parameter_controllers/tetris_attack/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f07dc67a7f50e49536ae1bc9415793e0282f6aea --- /dev/null +++ b/server/Gym/parameter_controllers/tetris_attack/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TetrisAttack_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/three_string_common_subsequence_counting/__init__.py b/server/Gym/parameter_controllers/three_string_common_subsequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..013fed35c6a29a0735e7204217b4bc48fc320f2b --- /dev/null +++ b/server/Gym/parameter_controllers/three_string_common_subsequence_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ThreeStringCommonSubsequenceCounting_ParameterController diff --git a/server/Gym/parameter_controllers/three_string_common_subsequence_counting/parameter_controller.py b/server/Gym/parameter_controllers/three_string_common_subsequence_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..e2b18c94b2f56aba54c4e6a6825680b743a3b03d --- /dev/null +++ b/server/Gym/parameter_controllers/three_string_common_subsequence_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class ThreeStringCommonSubsequenceCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 3 + + def update(self) -> None : + self.MAX_N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/three_vertex_cycle_counting/__init__.py b/server/Gym/parameter_controllers/three_vertex_cycle_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aa3887ea86a5ab773938f89532e0858bed5ad46b --- /dev/null +++ b/server/Gym/parameter_controllers/three_vertex_cycle_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ThreeVertexCycleCounting_ParameterController diff --git a/server/Gym/parameter_controllers/three_vertex_cycle_counting/parameter_controller.py b/server/Gym/parameter_controllers/three_vertex_cycle_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..01f97cf889000b1ffc70698cb0e0111f6723d3f6 --- /dev/null +++ b/server/Gym/parameter_controllers/three_vertex_cycle_counting/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class ThreeVertexCycleCounting_ParameterController(ParameterController) : + def __init__(self, edge_ratio_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 5 + + if edge_ratio_list is None : + edge_ratio_list = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0] + self.edge_ratio_list = edge_ratio_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_ratio = edge_ratio) for edge_ratio in self.edge_ratio_list if int(self.N * edge_ratio) <= self.N * (self.N - 1) // 2] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/topological_sort/__init__.py b/server/Gym/parameter_controllers/topological_sort/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..914810d866d69f32ef5bd893903856f91abc5b94 --- /dev/null +++ b/server/Gym/parameter_controllers/topological_sort/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TopologicalSort_ParameterController diff --git a/server/Gym/parameter_controllers/topological_sort/parameter_controller.py b/server/Gym/parameter_controllers/topological_sort/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..dc5a12f38fe1992226f3c6bed4fdd0ddb35800e8 --- /dev/null +++ b/server/Gym/parameter_controllers/topological_sort/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TopologicalSort_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/topological_sort_minimal_lexicographical_order/__init__.py b/server/Gym/parameter_controllers/topological_sort_minimal_lexicographical_order/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..77b7652a69a13a41f980fa8dc7fd53a8f223f499 --- /dev/null +++ b/server/Gym/parameter_controllers/topological_sort_minimal_lexicographical_order/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TopologicalSort_MinimalLexicographicalOrder_ParameterController diff --git a/server/Gym/parameter_controllers/topological_sort_minimal_lexicographical_order/parameter_controller.py b/server/Gym/parameter_controllers/topological_sort_minimal_lexicographical_order/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..1a340079895d75da34faa3d841bda9728f13da70 --- /dev/null +++ b/server/Gym/parameter_controllers/topological_sort_minimal_lexicographical_order/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TopologicalSort_MinimalLexicographicalOrder_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/tournament_longest_path/__init__.py b/server/Gym/parameter_controllers/tournament_longest_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f328c81a4a41d0d19b67566affb8bbc19af51f4d --- /dev/null +++ b/server/Gym/parameter_controllers/tournament_longest_path/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Tournament_LongestPath_ParameterController diff --git a/server/Gym/parameter_controllers/tournament_longest_path/parameter_controller.py b/server/Gym/parameter_controllers/tournament_longest_path/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..5ce18ea00079c944f8532e2b660a036cc796b2d0 --- /dev/null +++ b/server/Gym/parameter_controllers/tournament_longest_path/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Tournament_LongestPath_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/transmission_delay/__init__.py b/server/Gym/parameter_controllers/transmission_delay/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..404ad98e098fe6e6749262b60d51d545c1596a88 --- /dev/null +++ b/server/Gym/parameter_controllers/transmission_delay/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TransmissionDelay_ParameterController diff --git a/server/Gym/parameter_controllers/transmission_delay/parameter_controller.py b/server/Gym/parameter_controllers/transmission_delay/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..b729a7f9b12d84a7e89d45f6fadd97b571bf2998 --- /dev/null +++ b/server/Gym/parameter_controllers/transmission_delay/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TransmissionDelay_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/tree_add_one_edge_diameter/__init__.py b/server/Gym/parameter_controllers/tree_add_one_edge_diameter/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..171575c30b2540c1b105874df416d8e0aa2974e4 --- /dev/null +++ b/server/Gym/parameter_controllers/tree_add_one_edge_diameter/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TreeAddOneEdgeDiameter_ParameterController diff --git a/server/Gym/parameter_controllers/tree_add_one_edge_diameter/parameter_controller.py b/server/Gym/parameter_controllers/tree_add_one_edge_diameter/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..53066239d9859c69c323c334f2f12d510d77a866 --- /dev/null +++ b/server/Gym/parameter_controllers/tree_add_one_edge_diameter/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TreeAddOneEdgeDiameter_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/tree_center/__init__.py b/server/Gym/parameter_controllers/tree_center/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..419fa90b6e0f4e07c5a1e43923ccca360baa605a --- /dev/null +++ b/server/Gym/parameter_controllers/tree_center/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TreeCenter_ParameterController diff --git a/server/Gym/parameter_controllers/tree_center/parameter_controller.py b/server/Gym/parameter_controllers/tree_center/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..7c6c5d91703eb263ac0604173d06ee71d18828bd --- /dev/null +++ b/server/Gym/parameter_controllers/tree_center/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TreeCenter_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/tree_change_one_edge_diameter/__init__.py b/server/Gym/parameter_controllers/tree_change_one_edge_diameter/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..29ac210bbef5e68c5f0a376f913cb4c12f11d751 --- /dev/null +++ b/server/Gym/parameter_controllers/tree_change_one_edge_diameter/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TreeChangeOneEdgeDiameter_ParameterController diff --git a/server/Gym/parameter_controllers/tree_change_one_edge_diameter/parameter_controller.py b/server/Gym/parameter_controllers/tree_change_one_edge_diameter/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..62b7057f7b2bfa6cce241cf6fe701d3ce59c28bb --- /dev/null +++ b/server/Gym/parameter_controllers/tree_change_one_edge_diameter/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TreeChangeOneEdgeDiameter_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/tree_coloring/__init__.py b/server/Gym/parameter_controllers/tree_coloring/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2be8876204dfe6d9fc703a7dd6ea8e66bd29c376 --- /dev/null +++ b/server/Gym/parameter_controllers/tree_coloring/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TreeColoring_ParameterController diff --git a/server/Gym/parameter_controllers/tree_coloring/parameter_controller.py b/server/Gym/parameter_controllers/tree_coloring/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..46c0960812cafedb7b24c6c05f1547a9e92b65ed --- /dev/null +++ b/server/Gym/parameter_controllers/tree_coloring/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TreeColoring_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/tree_distance_equal_triad_counting/__init__.py b/server/Gym/parameter_controllers/tree_distance_equal_triad_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1b0cc05b0647770c760fe10e7feb65c68ee0ee27 --- /dev/null +++ b/server/Gym/parameter_controllers/tree_distance_equal_triad_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Tree_DistanceEqualTriad_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/tree_distance_equal_triad_counting/parameter_controller.py b/server/Gym/parameter_controllers/tree_distance_equal_triad_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..ccf53e195cb632281ae5fccf1de771c49b955655 --- /dev/null +++ b/server/Gym/parameter_controllers/tree_distance_equal_triad_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class Tree_DistanceEqualTriad_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/tree_dynamic_xor_zero_path/__init__.py b/server/Gym/parameter_controllers/tree_dynamic_xor_zero_path/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..761a1b21588e4df5e2450b222b85c548d531524c --- /dev/null +++ b/server/Gym/parameter_controllers/tree_dynamic_xor_zero_path/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TreeDynamic_XORZeroPath_ParameterController diff --git a/server/Gym/parameter_controllers/tree_dynamic_xor_zero_path/parameter_controller.py b/server/Gym/parameter_controllers/tree_dynamic_xor_zero_path/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..fda6ca93894ac21b6c40159d1b09050bea22720b --- /dev/null +++ b/server/Gym/parameter_controllers/tree_dynamic_xor_zero_path/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TreeDynamic_XORZeroPath_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/tree_elimination_expectation/__init__.py b/server/Gym/parameter_controllers/tree_elimination_expectation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..443703814fb6e6eead8bf88fafeba6b82543eecd --- /dev/null +++ b/server/Gym/parameter_controllers/tree_elimination_expectation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TreeElimination_Expectation_ParameterController diff --git a/server/Gym/parameter_controllers/tree_elimination_expectation/parameter_controller.py b/server/Gym/parameter_controllers/tree_elimination_expectation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2f7f4a577ece1b7cfb93a2e459dc013726278afe --- /dev/null +++ b/server/Gym/parameter_controllers/tree_elimination_expectation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TreeElimination_Expectation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/tree_even_partitioning/__init__.py b/server/Gym/parameter_controllers/tree_even_partitioning/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8d0a0d13b993e9d6f803bc1a086b1ed915ea93b9 --- /dev/null +++ b/server/Gym/parameter_controllers/tree_even_partitioning/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TreeEvenPartitioning_ParameterController diff --git a/server/Gym/parameter_controllers/tree_even_partitioning/parameter_controller.py b/server/Gym/parameter_controllers/tree_even_partitioning/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..6922440f40a887fad6f7febc38c0148ce4213e19 --- /dev/null +++ b/server/Gym/parameter_controllers/tree_even_partitioning/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TreeEvenPartitioning_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N = 3 + self.MAX_K = 2 + + def update(self) -> None : + if self.MAX_K < self.MAX_N : + self.MAX_K += 1 + else : + self.MAX_N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N = self.MAX_N, MAX_K = self.MAX_K)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/tree_maximum_visited_vertex/__init__.py b/server/Gym/parameter_controllers/tree_maximum_visited_vertex/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ad97361950b6d2a38691debfc1bb8857fd044010 --- /dev/null +++ b/server/Gym/parameter_controllers/tree_maximum_visited_vertex/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TreeMaximumVisitedVertex_ParameterController diff --git a/server/Gym/parameter_controllers/tree_maximum_visited_vertex/parameter_controller.py b/server/Gym/parameter_controllers/tree_maximum_visited_vertex/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..427132ba6d8e170be4491eebf8f62db1b195effd --- /dev/null +++ b/server/Gym/parameter_controllers/tree_maximum_visited_vertex/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TreeMaximumVisitedVertex_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/tree_random_walk_expectation/__init__.py b/server/Gym/parameter_controllers/tree_random_walk_expectation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9cf9d0252a96053862cfc462253082cd4298772d --- /dev/null +++ b/server/Gym/parameter_controllers/tree_random_walk_expectation/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TreeRandomWalkExpectation_ParameterController diff --git a/server/Gym/parameter_controllers/tree_random_walk_expectation/parameter_controller.py b/server/Gym/parameter_controllers/tree_random_walk_expectation/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..973a10aab4b3eeb3a6bff947d5112fde641f66c2 --- /dev/null +++ b/server/Gym/parameter_controllers/tree_random_walk_expectation/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TreeRandomWalkExpectation_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/tree_topological_sequence_counting/__init__.py b/server/Gym/parameter_controllers/tree_topological_sequence_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4cc07aa8c309a0ef6f703175cf2e2171a5f51e7b --- /dev/null +++ b/server/Gym/parameter_controllers/tree_topological_sequence_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TreeTopologicalSequenceCounting_ParameterController diff --git a/server/Gym/parameter_controllers/tree_topological_sequence_counting/parameter_controller.py b/server/Gym/parameter_controllers/tree_topological_sequence_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..27bbc0571281861eabb46938ce98ea38a17e99a9 --- /dev/null +++ b/server/Gym/parameter_controllers/tree_topological_sequence_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TreeTopologicalSequenceCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/triumphal_arch/__init__.py b/server/Gym/parameter_controllers/triumphal_arch/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0a76faf6c6414d53b7d57d53725fa854e14443b2 --- /dev/null +++ b/server/Gym/parameter_controllers/triumphal_arch/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TriumphalArch_ParameterController diff --git a/server/Gym/parameter_controllers/triumphal_arch/parameter_controller.py b/server/Gym/parameter_controllers/triumphal_arch/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..53cc452d1383c498458d4c359b5ae03c4e22198e --- /dev/null +++ b/server/Gym/parameter_controllers/triumphal_arch/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TriumphalArch_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/twiddle_puzzle/__init__.py b/server/Gym/parameter_controllers/twiddle_puzzle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c87e63c9673ed9e219de0335f1511179e5935a41 --- /dev/null +++ b/server/Gym/parameter_controllers/twiddle_puzzle/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TwiddlePuzzle_ParameterController diff --git a/server/Gym/parameter_controllers/twiddle_puzzle/parameter_controller.py b/server/Gym/parameter_controllers/twiddle_puzzle/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..beff0e7383000872cce58c8976514edb6654573c --- /dev/null +++ b/server/Gym/parameter_controllers/twiddle_puzzle/parameter_controller.py @@ -0,0 +1,15 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class TwiddlePuzzle_ParameterController(ParameterController) : + def __init__(self, steps_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + self.steps_list = [2, 3, 4, 5, 6, 7, 8, 9, 10] if steps_list is None else steps_list + + def update(self) -> None : + self.MAX_N_M += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M, steps = steps) for steps in self.steps_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/two_sat/__init__.py b/server/Gym/parameter_controllers/two_sat/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d9bb2c16b77104fbfa401aafe2bd888501f00195 --- /dev/null +++ b/server/Gym/parameter_controllers/two_sat/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TwoSAT_ParameterController diff --git a/server/Gym/parameter_controllers/two_sat/parameter_controller.py b/server/Gym/parameter_controllers/two_sat/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..673a4fe3767377f84503992dc4f7a9dbfb415845 --- /dev/null +++ b/server/Gym/parameter_controllers/two_sat/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class TwoSAT_ParameterController(ParameterController) : + def __init__(self, M_multiple_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + if M_multiple_list is None : + M_multiple_list = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5.0] + self.M_multiple_list = M_multiple_list + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, M = int(M_multiple * self.N)) for M_multiple in self.M_multiple_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/two_set_all_coprime_counting/__init__.py b/server/Gym/parameter_controllers/two_set_all_coprime_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0970ec35bf49f680850f9f60542c7038506b86d9 --- /dev/null +++ b/server/Gym/parameter_controllers/two_set_all_coprime_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import TwoSet_AllCoprime_Counting_ParameterController diff --git a/server/Gym/parameter_controllers/two_set_all_coprime_counting/parameter_controller.py b/server/Gym/parameter_controllers/two_set_all_coprime_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..2b8dd19b0022ec5b8855569b17e73c02de211804 --- /dev/null +++ b/server/Gym/parameter_controllers/two_set_all_coprime_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class TwoSet_AllCoprime_Counting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 8 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/undamaged_submatrix_counting/__init__.py b/server/Gym/parameter_controllers/undamaged_submatrix_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fccce0e32eccf2f168cc167145d8f754f2a424f1 --- /dev/null +++ b/server/Gym/parameter_controllers/undamaged_submatrix_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import UndamagedSubmatrixCounting_ParameterController diff --git a/server/Gym/parameter_controllers/undamaged_submatrix_counting/parameter_controller.py b/server/Gym/parameter_controllers/undamaged_submatrix_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..db3d767fa759fada4c46aa52074faf0396e81923 --- /dev/null +++ b/server/Gym/parameter_controllers/undamaged_submatrix_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class UndamagedSubmatrixCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/value_diminishing_selection/__init__.py b/server/Gym/parameter_controllers/value_diminishing_selection/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d2b27934e826f99895f4e78ede7e98169b6469ba --- /dev/null +++ b/server/Gym/parameter_controllers/value_diminishing_selection/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ValueDiminishingSelection_ParameterController diff --git a/server/Gym/parameter_controllers/value_diminishing_selection/parameter_controller.py b/server/Gym/parameter_controllers/value_diminishing_selection/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..824e29a552eda325ac9b4bbdf38b322cf4e4e9c5 --- /dev/null +++ b/server/Gym/parameter_controllers/value_diminishing_selection/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class ValueDiminishingSelection_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/vertex_k_center/__init__.py b/server/Gym/parameter_controllers/vertex_k_center/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a3ae5d0c5d098a381515159efc88ec25a601c0e5 --- /dev/null +++ b/server/Gym/parameter_controllers/vertex_k_center/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import Vertex_KCenter_ParameterController diff --git a/server/Gym/parameter_controllers/vertex_k_center/parameter_controller.py b/server/Gym/parameter_controllers/vertex_k_center/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..bb41186a5798eb53ebfc5bc2f78b0b3ac9e1a84b --- /dev/null +++ b/server/Gym/parameter_controllers/vertex_k_center/parameter_controller.py @@ -0,0 +1,17 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class Vertex_KCenter_ParameterController(ParameterController) : + def __init__(self, edge_density_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + if edge_density_list is None : + edge_density_list = [0.02, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6, 0.7, 0.8, 0.9] + self.edge_density_list = edge_density_list + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, edge_density = edge_density) for edge_density in self.edge_density_list if int(edge_density * self.N * (self.N - 1) / 2) >= self.N - 1] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/virus_synthesis/__init__.py b/server/Gym/parameter_controllers/virus_synthesis/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ac4aa163a4095ff4f6351507d9d722c028ab36f3 --- /dev/null +++ b/server/Gym/parameter_controllers/virus_synthesis/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import VirusSynthesis_ParameterController diff --git a/server/Gym/parameter_controllers/virus_synthesis/parameter_controller.py b/server/Gym/parameter_controllers/virus_synthesis/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..3619935a43d79b54958ab225497aa487843b079c --- /dev/null +++ b/server/Gym/parameter_controllers/virus_synthesis/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class VirusSynthesis_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.loose_MAX_N = 4 + + def update(self) -> None : + self.loose_MAX_N = int(self.loose_MAX_N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(loose_MAX_N = self.loose_MAX_N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/visible_line/__init__.py b/server/Gym/parameter_controllers/visible_line/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..db50a0acaf5e540b05f3a100c1392064ad3fe28f --- /dev/null +++ b/server/Gym/parameter_controllers/visible_line/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import VisibleLine_ParameterController diff --git a/server/Gym/parameter_controllers/visible_line/parameter_controller.py b/server/Gym/parameter_controllers/visible_line/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..3a84b64efbaec2ae92ed726418791c1560248a82 --- /dev/null +++ b/server/Gym/parameter_controllers/visible_line/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class VisibleLine_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/warehouse_construction/__init__.py b/server/Gym/parameter_controllers/warehouse_construction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..73c38a83deadf2adaf6daf062c0750829fab9a34 --- /dev/null +++ b/server/Gym/parameter_controllers/warehouse_construction/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import WarehouseConstruction_ParameterController diff --git a/server/Gym/parameter_controllers/warehouse_construction/parameter_controller.py b/server/Gym/parameter_controllers/warehouse_construction/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..8e1c6d41d66d53ceea058036e42e17779689a152 --- /dev/null +++ b/server/Gym/parameter_controllers/warehouse_construction/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class WarehouseConstruction_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/weighted_binarytree/__init__.py b/server/Gym/parameter_controllers/weighted_binarytree/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3cea6b1d031fb75d7bab49dcb234150e35cdeb31 --- /dev/null +++ b/server/Gym/parameter_controllers/weighted_binarytree/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import WeightedBinaryTree_ParameterController diff --git a/server/Gym/parameter_controllers/weighted_binarytree/parameter_controller.py b/server/Gym/parameter_controllers/weighted_binarytree/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0c43ec816a9d182da283b98dabffe41ca507ce17 --- /dev/null +++ b/server/Gym/parameter_controllers/weighted_binarytree/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class WeightedBinaryTree_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + self.MAX_SCORE = 5 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, MAX_SCORE = self.MAX_SCORE)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/weighted_lis/__init__.py b/server/Gym/parameter_controllers/weighted_lis/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c9bbaf877a63732b663810065e0d579b6043de1c --- /dev/null +++ b/server/Gym/parameter_controllers/weighted_lis/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import WeightedLIS_ParameterController diff --git a/server/Gym/parameter_controllers/weighted_lis/parameter_controller.py b/server/Gym/parameter_controllers/weighted_lis/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..60dc426d0cc4241a879b9d52663d2efa764206ba --- /dev/null +++ b/server/Gym/parameter_controllers/weighted_lis/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class WeightedLIS_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 4 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/whack_a_mole/__init__.py b/server/Gym/parameter_controllers/whack_a_mole/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4c513c016ca1615334851958847c4ea3b52c5840 --- /dev/null +++ b/server/Gym/parameter_controllers/whack_a_mole/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import WhackAMole_ParameterController diff --git a/server/Gym/parameter_controllers/whack_a_mole/parameter_controller.py b/server/Gym/parameter_controllers/whack_a_mole/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..d8b90e306133a54241214b0adb0881b0a2c2032c --- /dev/null +++ b/server/Gym/parameter_controllers/whack_a_mole/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class WhackAMole_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.MAX_N_M = 3 + + def update(self) -> None : + self.MAX_N_M = int(self.MAX_N_M * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(MAX_N_M = self.MAX_N_M)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/wil/__init__.py b/server/Gym/parameter_controllers/wil/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5c4a831cdcb86b6a8bc359e53eec3ba1a3f9d63f --- /dev/null +++ b/server/Gym/parameter_controllers/wil/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import WIL_ParameterController diff --git a/server/Gym/parameter_controllers/wil/parameter_controller.py b/server/Gym/parameter_controllers/wil/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..6eeff8d7985d5e4be22c9489cb1282f1a0c7b590 --- /dev/null +++ b/server/Gym/parameter_controllers/wil/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class WIL_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/wyc/__init__.py b/server/Gym/parameter_controllers/wyc/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6eb6681cdc3055b2a92d648ac6d967b0e58cd3be --- /dev/null +++ b/server/Gym/parameter_controllers/wyc/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import WYC_ParameterController diff --git a/server/Gym/parameter_controllers/wyc/parameter_controller.py b/server/Gym/parameter_controllers/wyc/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..f06b6e8378117ced400e26113a48192b9eada1a6 --- /dev/null +++ b/server/Gym/parameter_controllers/wyc/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class WYC_ParameterController(ParameterController) : + def __init__(self, N_list : Optional[List] = None, **kwargs) : + super().__init__(**kwargs) + self.MAX_K = 5 + self.N_list = N_list if N_list is not None else list(range(2, 8 + 1)) + + def update(self) -> None : + self.MAX_K *= 2 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = N, MAX_K = self.MAX_K) for N in self.N_list] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/wyr_leveling_ground/__init__.py b/server/Gym/parameter_controllers/wyr_leveling_ground/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0694fca128bd38d350799dd16ccb175bd0ffd500 --- /dev/null +++ b/server/Gym/parameter_controllers/wyr_leveling_ground/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import WYRLevelingGround_ParameterController diff --git a/server/Gym/parameter_controllers/wyr_leveling_ground/parameter_controller.py b/server/Gym/parameter_controllers/wyr_leveling_ground/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..308493e5abb2ba3c754d12a7671cc0e612679cca --- /dev/null +++ b/server/Gym/parameter_controllers/wyr_leveling_ground/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class WYRLevelingGround_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/xor_equation_counting/__init__.py b/server/Gym/parameter_controllers/xor_equation_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..350c65468b944a0c4a50de18c28284469d1d78e3 --- /dev/null +++ b/server/Gym/parameter_controllers/xor_equation_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import XorEquationCounting_ParameterController diff --git a/server/Gym/parameter_controllers/xor_equation_counting/parameter_controller.py b/server/Gym/parameter_controllers/xor_equation_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..de1bb1602cc77cb4a0c48670a175b6179619110e --- /dev/null +++ b/server/Gym/parameter_controllers/xor_equation_counting/parameter_controller.py @@ -0,0 +1,14 @@ +from typing import Dict, List, Optional +from ...parameter_controller import ParameterController + +class XorEquationCounting_ParameterController(ParameterController) : + def __init__(self, RANGE_List : Optional[List[int]] = None, **kwargs) : + super().__init__(**kwargs) + self.N = 2 + self.RANGE_List = RANGE_List if RANGE_List is not None else [2 ** 2 - 1, 2 ** 3 - 1, 2 ** 5 - 1, 2 ** 7 - 1, 2 ** 10 - 1, 2 ** 12 - 1, 2 ** 15 - 1, 2 ** 17 - 1, 2 ** 20 - 1] + + def update(self) -> None : + self.N = int(self.N * 1.1 + 1) + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N, RANGE = RANGE) for RANGE in self.RANGE_List] \ No newline at end of file diff --git a/server/Gym/parameter_controllers/zero_prefix_subset_counting/__init__.py b/server/Gym/parameter_controllers/zero_prefix_subset_counting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..35fad08156794a25a4ee6320aa78c67ae749236b --- /dev/null +++ b/server/Gym/parameter_controllers/zero_prefix_subset_counting/__init__.py @@ -0,0 +1 @@ +from .parameter_controller import ZeroPrefixSubsetCounting_ParameterController diff --git a/server/Gym/parameter_controllers/zero_prefix_subset_counting/parameter_controller.py b/server/Gym/parameter_controllers/zero_prefix_subset_counting/parameter_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..0f6d58f5f1f50bcbb95df4de16780a12640d06dd --- /dev/null +++ b/server/Gym/parameter_controllers/zero_prefix_subset_counting/parameter_controller.py @@ -0,0 +1,13 @@ +from typing import Dict, List +from ...parameter_controller import ParameterController + +class ZeroPrefixSubsetCounting_ParameterController(ParameterController) : + def __init__(self, **kwargs) : + super().__init__(**kwargs) + self.N = 3 + + def update(self) -> None : + self.N += 1 + + def get_parameter_list(self) -> List[Dict] : + return [dict(N = self.N)] \ No newline at end of file diff --git a/server/RLVE_Gym_environment.py b/server/RLVE_Gym_environment.py new file mode 100644 index 0000000000000000000000000000000000000000..7c8d9ac4f31d00fd8b4f5a9547ed264a2fcb2574 --- /dev/null +++ b/server/RLVE_Gym_environment.py @@ -0,0 +1,186 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +RLVE-Gym Environment Implementation. +""" +from openenv_core.env_server.interfaces import Environment + +from ..models import RlveGymState, RlveGymAction, RlveGymObservation + +from typing import Optional, Tuple + +import random +from .Gym.environment import VerifiableEnvironment +from .Gym.parameter_controller import ParameterController +from .Gym.environments import identifier2environment +from .Gym.parameter_controllers import identifier2controller + + +class RlveGymEnvironment(Environment): + """ + Wrap any verifiable environment from RLVE-Gym behind the OpenEnv ``Environment`` API. + """ + + def __init__( + self, + environment_identifier : str = "Multiplication", + difficulty : int = 0, + answer_markers : Optional[Tuple[str, str]] = None, + initial_seed : int = 0, + ): + """Initialize the RLVE_Gym environment.""" + + self._state = RlveGymState( + seed = initial_seed, + problem_input = None, + num_samples = 0, sum_accuracy = 0, + ) + + self.environment_identifier = environment_identifier + self.difficulty = difficulty + self.answer_markers = answer_markers + + self.problem = None + + + def reset(self) -> RlveGymObservation: + """ + Reset the environment. + + Returns: + problem_input: The generated problem input string (or None if generation failed) + verifier_result: None + success: Boolean indicating if the reset was successful + message: Message indicating the result of the reset + """ + if (self.environment_identifier not in identifier2environment) or (self.environment_identifier not in identifier2controller) : + return RlveGymObservation( + problem_input = None, + verifier_result = None, + success = False, + message = "Invalid environment identifier.", + reward = None, + ) + if not (isinstance(self.difficulty, int) and self.difficulty >= 0) : + return RlveGymObservation( + problem_input = None, + verifier_result = None, + success = False, + message = "Difficulty should be a non-negative integer.", + reward = None, + ) + if not (isinstance(self._state.seed, int) and self._state.seed >= 0) : + return RlveGymObservation( + problem_input = None, + verifier_result = None, + success = False, + message = "Seed should be a non-negative integer.", + reward = None, + ) + + try : + problem : VerifiableEnvironment = identifier2environment[self.environment_identifier](answer_markers = self.answer_markers) + except Exception as e : + return RlveGymObservation( + problem_input = None, + verifier_result = None, + success = False, + message = f"Failed to initialize environment: {e}", + reward = None, + ) + + controller : ParameterController = identifier2controller[self.environment_identifier]() + for _ in range(self.difficulty) : + controller.update() + random.seed(self._state.seed) + parameter = random.choice(controller.get_parameter_list()) + + if problem.generator(seed = self._state.seed, parameter = parameter) : + self._state.problem_input = problem.prompt_generator() + self.problem = problem + else : + self._state.problem_input = None + self.problem = None + + self._state.seed += 1 + self._state.num_samples = self._state.sum_accuracy = 0 + + if self.problem is not None : + return RlveGymObservation( + problem_input = self._state.problem_input, + verifier_result = None, + success = True, + message = "Problem generated successfully.", + reward = None, + ) + else : + return RlveGymObservation( + problem_input = None, + verifier_result = None, + success = False, + message = "Problem generation failed. Please try decreasing difficulty or changing seed.", + reward = None, + ) + + + def step(self, action: RlveGymAction) -> RlveGymObservation: # type: ignore[override] + """ + Execute a step in the environment by verifying the model output. + + Args: + action: RlveGymAction containing the output to verify + + Returns: + problem_input: The problem input string from the current state + verifier_result: Result of the verification containing accuracy and other metrics + success: Boolean indicating if the step was successful + message: Message indicating the result of the step + """ + if self.problem is None : + return RlveGymObservation( + problem_input = None, + verifier_result = None, + success = False, + message = "Problem not ready. Please reset the environment.", + reward = None, + ) + + try : + verifier_result = self.problem.verifier(action.output) + except Exception as e : + return RlveGymObservation( + problem_input = self._state.problem_input, + verifier_result = None, + success = False, + message = f"Verification failed with error: {e}", + reward = None, + ) + + self._state.num_samples += 1 + self._state.sum_accuracy += verifier_result["accuracy"] + + return RlveGymObservation( + problem_input = self._state.problem_input, + verifier_result = verifier_result, + success = True, + message = "Verification completed.", + reward = verifier_result["reward"], + ) + + + @property + def state(self) -> RlveGymState: + """ + Get the current environment state. + + Returns: + seed: The current random seed value for problem generation + problem_input: The generated problem input string (or None if generation failed) + num_samples: Number of samples taken so far + sum_accuracy: Sum of accuracies from verifications so far + """ + return self._state diff --git a/server/__init__.py b/server/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..183d15c6e712dfc35ff6978af4886e3fbc03080d --- /dev/null +++ b/server/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Rlve Gym environment server components.""" + +from .RLVE_Gym_environment import RlveGymEnvironment + +__all__ = ["RlveGymEnvironment"] + diff --git a/server/app.py b/server/app.py new file mode 100644 index 0000000000000000000000000000000000000000..877d4e5eb941665ea9f6ad53fe36b7ebc6a18029 --- /dev/null +++ b/server/app.py @@ -0,0 +1,72 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +FastAPI application for the Rlve Gym Environment. + +This module creates an HTTP server that exposes the RlveGymEnvironment +over HTTP endpoints, making it compatible with HTTPEnvClient. + +Usage: + # Development (with auto-reload): + uvicorn server.app:app --reload --host 0.0.0.0 --port 8000 + + # Production: + uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4 + + # Or run directly: + python -m server.app +""" + +try: + from openenv_core.env_server.http_server import create_app +except Exception as e: # pragma: no cover + raise ImportError("openenv_core is required for the web interface. Install dependencies with '\n uv sync\n'") from e + +from .RLVE_Gym_environment import RlveGymEnvironment +from models import RlveGymAction, RlveGymObservation + +# Create the environment instance +env = RlveGymEnvironment() + +# Create the app with web interface and README integration +app = create_app( + env, + RlveGymAction, + RlveGymObservation, + env_name="RLVE_Gym", +) + + +def main(host: str = "0.0.0.0", port: int = 8000): + """ + Entry point for direct execution via uv run or python -m. + + This function enables running the server without Docker: + uv run --project . server + uv run --project . server --port 8001 + python -m RLVE_Gym.server.app + + Args: + host: Host address to bind to (default: "0.0.0.0") + port: Port number to listen on (default: 8000) + + For production deployments, consider using uvicorn directly with + multiple workers: + uvicorn RLVE_Gym.server.app:app --workers 4 + """ + import uvicorn + + uvicorn.run(app, host=host, port=port) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--port", type=int, default=8000) + args = parser.parse_args() + main(port=args.port)