from tricorder.schemas.analytics.nlp import LanguageDetection class LanguageDetection(TextClassification): """Language annotation object""" label: Iso639_3 = Field(..., description="Language of the text in ISO 639-3 code")
from tricorder.schemas.analytics.nlp import EntityAnnotation class EntityAnnotation(AnalyticAnnotation): """Entity Annotation typically used for Named Entity Recognition (NER) tasks.""" offset: Optional[int] = Field( None, description="Start index of the extraction text for the selector" ) length: Optional[int] = Field( None, description="Length of the extraction from the text body to find end index", ) label: str = Field(..., description="The unique label for the type of selector") text: str = Field(..., description="Raw selector text extracted from the text body") attributes: Optional[dict[str, Any]] = Field( None, description="A list of attributes specific to the label of selector" ) confidence: Optional[confloat(ge=0, le=1)] = Field( None, description="Confidence level attributed to the pattern" ) model_config = ConfigDict(use_enum_values=True) @field_validator("confidence") def round_confidence(cls, value: confloat(ge=0, le=1)) -> confloat(ge=0, le=1): return round(value, 2) if value is not None else None
from tricorder.schemas.analytics.nlp import SelectorAnnotation class SelectorAnnotation(EntityAnnotation): """An entity annotation that represents a selector, typically resolvable to a specific pattern, standardized format, or a known entity type. """ text: str = Field(..., description="Raw selector text extracted from the text body") normalized_text: Optional[str] = Field(None, description="Normalized selector text") realm: str = Field( "unknown", description="The realm of the selector. Defualt is 'unknown' if not specified", ) potential_usp: Optional[bool] = Field( None, description="Whether the selector contains potential USP information as determined by an attribute", ) model_config = ConfigDict(use_enum_values=True)
from tricorder.schemas.analytics.nlp import TextClassification class TextClassification(Classification): """A tag associated with a body of text.""" pass
from tricorder.schemas.analytics.nlp import NERResponse class NERResponse(RootModel[list[EntityAnnotation | SelectorAnnotation]]): """A set of named entities detected in a text.""" pass
from tricorder.schemas.analytics.nlp import ChunkTextEmbedding384 class ChunkTextEmbedding384(BaseModel): """A 384 dimensional embedding for a chunk of text.""" embedding: conlist(float, min_length=384, max_length=384) = Field( ..., description="Language embedding" ) offset: Optional[int] = Field( None, description="Offset of the embedding in the original text" ) length: Optional[int] = Field( None, description="Length of the embedding in the original text" )
from tricorder.schemas.analytics.nlp import EmbeddingsResponse class EmbeddingsResponse(RootModel[list[ChunkTextEmbedding384]]): """A set of embeddings for each chunk within a document or body of text.""" pass