{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://kakao.io/data/cacao-genetics.schema.json",
  "title": "Cacao Genetic Record (draft / planned)",
  "description": "PLANNED, NOT YET POPULATED. A future schema for genetic data attached to a cacao origin. Each record references a cacao origin by its id (see cacao-origin.schema.json), so genetic characterisation can be added without changing the origin records. Reserved for later work — e.g. SSR/SNP marker profiles, genetic-cluster membership proportions, and links to public sequence and germplasm records.",
  "type": "object",
  "additionalProperties": false,
  "required": ["id", "origin_id", "status", "updated_at"],
  "properties": {
    "id": {
      "type": "string",
      "pattern": "^gen-[a-z0-9-]+$",
      "description": "Stable identifier for the genetic record, e.g. 'gen-ve-chuao-01'."
    },
    "origin_id": {
      "type": "string",
      "description": "The cacao origin this record characterises. References an id in cacao-origins.json."
    },
    "accession": {
      "type": ["string", "null"],
      "description": "Germplasm accession code where applicable (e.g. an international cocoa genebank accession)."
    },
    "marker_system": {
      "type": ["string", "null"],
      "enum": ["SSR", "SNP", "whole-genome", "other", null],
      "description": "The genotyping system used."
    },
    "cluster_membership": {
      "type": ["object", "null"],
      "description": "Estimated membership proportion in each modern genetic cluster (values 0-1, summing to about 1).",
      "additionalProperties": false,
      "properties": {
        "Amelonado": { "type": "number", "minimum": 0, "maximum": 1 },
        "Contamana": { "type": "number", "minimum": 0, "maximum": 1 },
        "Criollo": { "type": "number", "minimum": 0, "maximum": 1 },
        "Curaray": { "type": "number", "minimum": 0, "maximum": 1 },
        "Guiana": { "type": "number", "minimum": 0, "maximum": 1 },
        "Iquitos": { "type": "number", "minimum": 0, "maximum": 1 },
        "Marañón": { "type": "number", "minimum": 0, "maximum": 1 },
        "Nacional": { "type": "number", "minimum": 0, "maximum": 1 },
        "Nanay": { "type": "number", "minimum": 0, "maximum": 1 },
        "Purús": { "type": "number", "minimum": 0, "maximum": 1 }
      }
    },
    "reference_population": {
      "type": ["string", "null"],
      "description": "Reference population or study the assignment is based on."
    },
    "sequence_refs": {
      "type": "array",
      "items": { "type": "string" },
      "description": "Identifiers or links to public DNA sequence records (e.g. an NCBI accession)."
    },
    "method_notes": {
      "type": ["string", "null"],
      "description": "Notes on the genotyping or analysis method."
    },
    "sources": {
      "type": "array",
      "items": { "type": "string" },
      "description": "References supporting the record."
    },
    "status": {
      "type": "string",
      "enum": ["planned", "draft", "reviewed", "verified"],
      "description": "Editorial status. The genetics dataset itself is currently 'planned'."
    },
    "updated_at": {
      "type": "string",
      "format": "date",
      "description": "ISO 8601 date this record was last updated."
    }
  }
}
