from tricorder.schemas.analytics.computer_vision import BoundingBox class BoundingBox(BaseModel): """Bounding box for an object whether in an image, video, or reference to document.""" xmin: confloat(ge=0, le=1) = Field( ..., description="The x-coordinate of the top-left corner of the bounding box." ) ymin: confloat(ge=0, le=1) = Field( ..., description="The y-coordinate of the top-left corner of the bounding box." ) xmax: confloat(ge=0, le=1) = Field( ..., description="The x-coordinate of the bottom-right corner of the bounding box.", ) ymax: confloat(ge=0, le=1) = Field( ..., description="The y-coordinate of the bottom-right corner of the bounding box.", ) @model_validator(mode="after") def validate_bbox(self): if self.xmin >= self.xmax: raise ValueError("xmin must be less than xmax") if self.ymin >= self.ymax: raise ValueError("ymin must be less than ymax") return self
from tricorder.schemas.analytics.computer_vision import Detection class Detection(Classification): """A detected object in an image or video.""" bbox: BoundingBox = Field(..., description="Bounding box of the detection") landmarks: Optional[list[float]] = Field( None, description="Landmarks of the detection" )
from tricorder.schemas.analytics.computer_vision import FaceAttributes class FaceAttributes(BaseModel): """Attributes of a detected face such as age or sex.""" age: Optional[int] = Field( None, description="The estimated age as detected by a model." ) sex: Optional[Sex] = Field( None, description="The estimated sex as detected by a model." ) model_config = ConfigDict(use_enum_values=True)
from tricorder.schemas.analytics.computer_vision import FaceLandmarks5 class FaceLandmarks5(BaseModel): """Five key landmarks for a face, typically used to align a face for downstream embedding and recognition models.""" lefteye_x: float = Field(..., description="The x-coordinate of the left eye.") lefteye_y: float = Field(..., description="The y-coordinate of the left eye.") righteye_x: float = Field(..., description="The x-coordinate of the right eye.") righteye_y: float = Field(..., description="The y-coordinate of the right eye.") nose_x: float = Field(..., description="The x-coordinate of the nose.") nose_y: float = Field(..., description="The y-coordinate of the nose.") leftmouth_x: float = Field(..., description="The x-coordinate of the left mouth.") leftmouth_y: float = Field(..., description="The y-coordinate of the left mouth.") rightmouth_x: float = Field(..., description="The x-coordinate of the right mouth.") rightmouth_y: float = Field(..., description="The y-coordinate of the right mouth.")
from tricorder.schemas.analytics.computer_vision import FaceEmbedding512 class FaceEmbedding512(RootModel[conlist(float, min_length=512, max_length=512)]): """An embedding for a face specific to models with output of 512 dimensions.""" pass
from tricorder.schemas.analytics.computer_vision import FaceDetection class FaceDetection(Detection): """A detected face in an image or video.""" label: Literal["face"] = Field( "face", description="The label of the detected object." )
from tricorder.schemas.analytics.computer_vision import FaceAnnotation class FaceAnnotation(FaceDetection): """An annotation for a detected face in an image or video.""" attributes: Optional[FaceAttributes] = Field( None, description="The attributes of the detected face." ) embedding: Optional[FaceEmbedding] = Field( None, description="The embedding of the detected face." ) pose: Optional[Pose] = Field(None, description="The pose of the detected face.")
from tricorder.schemas.analytics.computer_vision import FacesResponse class FacesResponse(RootModel[list[FaceAnnotation]]): """A set of face annotations.""" pass
from tricorder.schemas.analytics.computer_vision import ImageCaption class ImageCaption(RootModel[str]): """A small text based summary of an image.""" pass
from tricorder.schemas.analytics.computer_vision import ImageClassification class ImageClassification(Classification): """A tag associated with an image.""" pass
from tricorder.schemas.analytics.computer_vision import OCRResponse class OCRResponse(AnalyticAnnotation): """A response containing OCR'd text and its detection annotations.""" text: str | None = Field(None, description="The OCR'd text") script: Optional[ISO_15924] = Field(None, description="The script of the text") annotations: Optional[list[OCRDetection]] = Field( None, description="A list of OCR annotations" ) model_config = ConfigDict( use_enum_values=True, json_schema_extra={ "examples": [ { "schema_name": "OCRResponse", "text": "Test", "annotations": [ { "label": "text", "confidence": 0.96, "bbox": { "xmin": 0.10224438902743142, "ymin": 0.1984126984126984, "xmax": 0.46633416458852867, "ymax": 0.36507936507936506, }, "offset": 0, "length": 4, } ], } ] }, )
from tricorder.schemas.analytics.computer_vision import Pose class Pose(BaseModel): """The pose of a detected face, typically used for alignment and recognition.""" pitch: confloat(ge=-180, le=180) = Field( ..., description="Rotation around the x-axis in degrees." ) yaw: confloat(ge=-180, le=180) = Field( ..., description="Rotation around the y-axis in degrees." ) roll: confloat(ge=-180, le=180) = Field( ..., description="Rotation around the z-axis (or front to back) in degrees." )