| | """ |
| | Pydantic models for request/response validation. |
| | """ |
| |
|
| | from __future__ import annotations |
| |
|
| | import base64 |
| | import enum |
| | import io |
| | import typing |
| | from typing import Literal, Optional |
| |
|
| | import numpy as np |
| | import pydantic |
| | from PIL import Image |
| |
|
| | if typing.TYPE_CHECKING: |
| | from numpy.typing import NDArray |
| |
|
| |
|
| | class ImageData(pydantic.BaseModel): |
| | """Image data model for base64 encoded images.""" |
| | mediaType: str = pydantic.Field( |
| | description="The IETF Media Type (MIME type) of the data" |
| | ) |
| | data: str = pydantic.Field( |
| | description="A base64 string encoding of the data.", |
| | |
| | |
| | pattern=r"^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/][AQgw]==|[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=)?$", |
| | ) |
| |
|
| |
|
| | class BinaryMask(pydantic.BaseModel): |
| | """A bit mask indicating which pixels are manipulated / synthesized. A |
| | pixel value of ``0`` means "no detection", and a value of ``1`` means |
| | "detection". |
| | |
| | The mask data must be encoded in PNG format, so that typical mask data is |
| | compressed effectively. The PNG encoding **should** use "bilevel" mode for |
| | maximum compactness. You can use the ``BinaryMask.from_numpy()`` |
| | function to convert a 0-1 numpy array to a BinaryMask. |
| | """ |
| | mediaType: Literal["image/png"] = pydantic.Field( |
| | description="The IETF Media Type (MIME type) of the data." |
| | ) |
| | data: str = pydantic.Field( |
| | description="A base64 string encoding of the data.", |
| | |
| | |
| | pattern=r"^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/][AQgw]==|[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=)?$", |
| | ) |
| |
|
| | @staticmethod |
| | def from_numpy(mask: NDArray[np.uint8]) -> BinaryMask: |
| | """Convert a 0-1 numpy array to a BinaryMask. |
| | |
| | The numpy data must be in row-major order. That means the first |
| | dimension corresponds to **height** and the second dimension corresponds |
| | to **width**. |
| | """ |
| | |
| | mask_img = Image.fromarray(mask * 255, mode="L").convert("1", dither=None) |
| | mask_img_buffer = io.BytesIO() |
| | mask_img.save(mask_img_buffer, format="png") |
| | mask_data = base64.b64encode(mask_img_buffer.getbuffer()).decode("utf-8") |
| | return BinaryMask(mediaType="image/png", data=mask_data) |
| |
|
| |
|
| | class ImageRequest(pydantic.BaseModel): |
| | """Request model for image classification.""" |
| | image: ImageData |
| |
|
| |
|
| | class Labels(enum.IntEnum): |
| | Natural = 0 |
| | FullySynthesized = 1 |
| | LocallyEdited = 2 |
| | LocallySynthesized = 3 |
| |
|
| |
|
| | class PredictionResponse(pydantic.BaseModel): |
| | """Response model for synthetic image classification results. |
| | |
| | Detector models will be scored primarily on their ability to classify the |
| | entire image into 1 of the 4 label categories:: |
| | |
| | 0: (Natural) The image is natural / unaltered. |
| | 1: (FullySynthesized) The entire image was synthesized by e.g., a |
| | generative image model. |
| | 2: (LocallyEdited) The image is a natural image where a portion has |
| | been edited using traditional photo editing techniques such as |
| | splicing. |
| | 3: (LocallySynthesized) The image is a natural image where a portion |
| | has been replaced by synthesized content. |
| | """ |
| |
|
| | logprobs: list[float] = pydantic.Field( |
| | description="The log-probabilities for each of the 4 possible labels.", |
| | min_length=4, |
| | max_length=4, |
| | ) |
| |
|
| | localizationMask: Optional[BinaryMask] = pydantic.Field( |
| | description="A bit mask localizing predicted edits. Models that are" |
| | " not capable of localization may omit this field. It may also be" |
| | " omitted if the predicted label is ``0`` or ``1``, in which case the" |
| | " mask will be assumed to be all 0's or all 1's, as appropriate." |
| | ) |