Creating Custom Data Products Snippets
The DataOps.live data product implementation is tightly connected to the out-of-the-box Modelling and Transformation (MATE) orchestrator responsible for executing the models from the /dataops/modelling directory in a Snowflake data warehouse.
Building a data product starts with the data product specification. The specification includes extension points, which pipeline jobs fill with details. Once you run the pipeline that includes the Data Product orchestrator, the orchestrator enriches the data product specification with metadata. At the end of the pipeline, the orchestrator generates the data product manifest as a merged document from the specification and the data product snippets and publishes the manifest into the data product registry.
To enhance flexibility, you can adopt alternative methods for data transformation by creating custom data product snippets. Once the pipeline runs, the orchestrator aggregates all the snippets inside the dataops/modelling/target/ directory. Each snippet file name must conform to this regex: dataproduct_snippet_[0-9]*\.yml e.g. dataproduct_snippet_14913696.yml.
The below topics should help you generate the required snippets in the correct format, regardless of your chosen transformation approach.
Snippet JSON schema
The following code shows the expected attributes in the snippet schema. The JSON schema is used to validate the snippet's YAML file.
{
  "title": "Snippet",
  "type": "object",
  "properties": {
    "catalog": {
      "title": "Catalog",
      "type": "object",
      "additionalProperties": {
        "$ref": "#/definitions/CatalogObject"
      }
    },
    "dataset": {
      "title": "Dataset",
      "type": "object",
      "additionalProperties": {
        "$ref": "#/definitions/DatasetObject"
      }
    },
    "service_level_indicators": {
      "title": "Service Level Indicators",
      "type": "object",
      "additionalProperties": {
        "$ref": "#/definitions/ServiceLevelIndicator"
      }
    },
    "selectors": {
      "title": "Selectors",
      "type": "object",
      "additionalProperties": {
        "type": "array",
        "items": {
          "type": "string"
        },
        "uniqueItems": true
      }
    }
  },
  "required": ["catalog", "dataset", "service_level_indicators", "selectors"],
  "definitions": {
    "CatalogColumn": {
      "title": "CatalogColumn",
      "type": "object",
      "properties": {
        "comment": {
          "title": "Comment",
          "type": "string"
        },
        "index": {
          "title": "Index",
          "type": "integer"
        },
        "name": {
          "title": "Name",
          "type": "string"
        },
        "type": {
          "title": "Type",
          "type": "string"
        }
      },
      "required": ["comment", "index", "name", "type"]
    },
    "CatalogObject": {
      "title": "CatalogObject",
      "type": "object",
      "properties": {
        "columns": {
          "title": "Columns",
          "type": "array",
          "items": {
            "$ref": "#/definitions/CatalogColumn"
          }
        },
        "database": {
          "title": "Database",
          "type": "string"
        },
        "name": {
          "title": "Name",
          "type": "string"
        },
        "schema": {
          "title": "Schema",
          "type": "string"
        },
        "type": {
          "title": "Type",
          "type": "string"
        }
      },
      "required": ["columns", "database", "name", "schema", "type"]
    },
    "DatasetObject": {
      "title": "DatasetObject",
      "type": "object",
      "properties": {
        "database": {
          "title": "Database",
          "type": "string"
        },
        "mate_unique_id": {
          "title": "Mate Unique Id",
          "type": "string"
        },
        "name": {
          "title": "Name",
          "type": "string"
        },
        "schema": {
          "title": "Schema",
          "type": "string"
        }
      },
      "required": ["database", "mate_unique_id", "name", "schema"]
    },
    "ServiceLevelIndicator": {
      "title": "ServiceLevelIndicator",
      "type": "object",
      "properties": {
        "mate_unique_id": {
          "title": "Mate Unique Id",
          "type": "string"
        },
        "name": {
          "title": "Name",
          "type": "string"
        },
        "type": {
          "title": "Type",
          "type": "string"
        },
        "value": {
          "title": "Value",
          "type": "string"
        }
      },
      "required": ["mate_unique_id", "name", "type", "value"]
    }
  }
}
Example snippet
The below code shows an example custom snippet with actual data.
catalog: {}
dataset:
  model.MyProject.samples_customer:
    database: DATAOPS_FB_DATAPRODUCTMANIFESTUI
    mate_unique_id: model.MyProject.samples_customer
    name: CUSTOMER
    schema: SAMPLES
  model.MyProject.samples_customer_v2:
    database: DATAOPS_FB_DATAPRODUCTMANIFESTUI
    mate_unique_id: model.MyProject.samples_customer_v2
    name: CUSTOMER
    schema: SAMPLES_v2
  model.MyProject.samples_lineitem:
    database: DATAOPS_FB_DATAPRODUCTMANIFESTUI
    mate_unique_id: model.MyProject.samples_lineitem
    name: LINEITEM
    schema: SAMPLES
  model.MyProject.samples_lineitem_v2:
    database: DATAOPS_FB_DATAPRODUCTMANIFESTUI
    mate_unique_id: model.MyProject.samples_lineitem_v2
    name: LINEITEM
    schema: SAMPLES_v2
  model.MyProject.samples_orders:
    database: DATAOPS_FB_DATAPRODUCTMANIFESTUI
    mate_unique_id: model.MyProject.samples_orders
    name: ORDERS
    schema: SAMPLES
  model.MyProject.samples_orders_v2:
    database: DATAOPS_FB_DATAPRODUCTMANIFESTUI
    mate_unique_id: model.MyProject.samples_orders_v2
    name: ORDERS
    schema: SAMPLES_v2
selectors:
  tag:CRM_data:
    - test.MyProject.source_not_null_snowflake_sample_data_tpch_sf1_CUSTOMER_C_CUSTKEY.8688ae83f8
    - test.MyProject.source_unique_snowflake_sample_data_tpch_sf1_CUSTOMER_C_CUSTKEY.9975c6dc5c
  tag:CRM_v1:
    - model.MyProject.samples_orders
    - test.MyProject.not_null_samples_customer_C_NATIONKEY.9cb7edf6e0
    - model.MyProject.samples_lineitem
    - test.MyProject.unique_samples_customer_C_CUSTKEY.088a62b788
    - test.MyProject.not_null_samples_customer_C_CUSTKEY.4e8448c7f1
    - model.MyProject.samples_customer
service_level_indicators: {}