Skip to main content

Creating Custom Data Products Snippets

Feature release status badge: PubPrev
PubPrev

The DataOps.live data product implementation is tightly connected to the out-of-the-box Modelling and Transformation (MATE) orchestrator responsible for executing the models from the /dataops/modelling directory in a Snowflake data warehouse.

Building a data product starts with the data product specification. The specification includes extension points, which pipeline jobs fill with details. Once you run the pipeline that includes the Data Product orchestrator, the orchestrator enriches the data product specification with metadata. At the end of the pipeline, the orchestrator generates the data product manifest as a merged document from the specification and the data product snippets and publishes the manifest into the data product registry.

To enhance flexibility, you can adopt alternative methods for data transformation by creating custom data product snippets. Once the pipeline runs, the orchestrator aggregates all the snippets inside the dataops/modelling/target/ directory. Each snippet file name must conform to this regex: dataproduct_snippet_[0-9]*\.yml e.g. dataproduct_snippet_14913696.yml.

The below topics should help you generate the required snippets in the correct format, regardless of your chosen transformation approach.

Snippet JSON schema

The following code shows the expected attributes in the snippet schema. The JSON schema is used to validate the snippet's YAML file.

{
"title": "Snippet",
"type": "object",
"properties": {
"catalog": {
"title": "Catalog",
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/CatalogObject"
}
},
"dataset": {
"title": "Dataset",
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/DatasetObject"
}
},
"service_level_indicators": {
"title": "Service Level Indicators",
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/ServiceLevelIndicator"
}
},
"selectors": {
"title": "Selectors",
"type": "object",
"additionalProperties": {
"type": "array",
"items": {
"type": "string"
},
"uniqueItems": true
}
}
},
"required": ["catalog", "dataset", "service_level_indicators", "selectors"],
"definitions": {
"CatalogColumn": {
"title": "CatalogColumn",
"type": "object",
"properties": {
"comment": {
"title": "Comment",
"type": "string"
},
"index": {
"title": "Index",
"type": "integer"
},
"name": {
"title": "Name",
"type": "string"
},
"type": {
"title": "Type",
"type": "string"
}
},
"required": ["comment", "index", "name", "type"]
},
"CatalogObject": {
"title": "CatalogObject",
"type": "object",
"properties": {
"columns": {
"title": "Columns",
"type": "array",
"items": {
"$ref": "#/definitions/CatalogColumn"
}
},
"database": {
"title": "Database",
"type": "string"
},
"name": {
"title": "Name",
"type": "string"
},
"schema": {
"title": "Schema",
"type": "string"
},
"type": {
"title": "Type",
"type": "string"
}
},
"required": ["columns", "database", "name", "schema", "type"]
},
"DatasetObject": {
"title": "DatasetObject",
"type": "object",
"properties": {
"database": {
"title": "Database",
"type": "string"
},
"mate_unique_id": {
"title": "Mate Unique Id",
"type": "string"
},
"name": {
"title": "Name",
"type": "string"
},
"schema": {
"title": "Schema",
"type": "string"
}
},
"required": ["database", "mate_unique_id", "name", "schema"]
},
"ServiceLevelIndicator": {
"title": "ServiceLevelIndicator",
"type": "object",
"properties": {
"mate_unique_id": {
"title": "Mate Unique Id",
"type": "string"
},
"name": {
"title": "Name",
"type": "string"
},
"type": {
"title": "Type",
"type": "string"
},
"value": {
"title": "Value",
"type": "string"
}
},
"required": ["mate_unique_id", "name", "type", "value"]
}
}
}

Example snippet

The below code shows an example custom snippet with actual data.

catalog: {}
dataset:
model.MyProject.samples_customer:
database: DATAOPS_FB_DATAPRODUCTMANIFESTUI
mate_unique_id: model.MyProject.samples_customer
name: CUSTOMER
schema: SAMPLES
model.MyProject.samples_customer_v2:
database: DATAOPS_FB_DATAPRODUCTMANIFESTUI
mate_unique_id: model.MyProject.samples_customer_v2
name: CUSTOMER
schema: SAMPLES_v2
model.MyProject.samples_lineitem:
database: DATAOPS_FB_DATAPRODUCTMANIFESTUI
mate_unique_id: model.MyProject.samples_lineitem
name: LINEITEM
schema: SAMPLES
model.MyProject.samples_lineitem_v2:
database: DATAOPS_FB_DATAPRODUCTMANIFESTUI
mate_unique_id: model.MyProject.samples_lineitem_v2
name: LINEITEM
schema: SAMPLES_v2
model.MyProject.samples_orders:
database: DATAOPS_FB_DATAPRODUCTMANIFESTUI
mate_unique_id: model.MyProject.samples_orders
name: ORDERS
schema: SAMPLES
model.MyProject.samples_orders_v2:
database: DATAOPS_FB_DATAPRODUCTMANIFESTUI
mate_unique_id: model.MyProject.samples_orders_v2
name: ORDERS
schema: SAMPLES_v2
selectors:
tag:CRM_data:
- test.MyProject.source_not_null_snowflake_sample_data_tpch_sf1_CUSTOMER_C_CUSTKEY.8688ae83f8
- test.MyProject.source_unique_snowflake_sample_data_tpch_sf1_CUSTOMER_C_CUSTKEY.9975c6dc5c
tag:CRM_v1:
- model.MyProject.samples_orders
- test.MyProject.not_null_samples_customer_C_NATIONKEY.9cb7edf6e0
- model.MyProject.samples_lineitem
- test.MyProject.unique_samples_customer_C_CUSTKEY.088a62b788
- test.MyProject.not_null_samples_customer_C_CUSTKEY.4e8448c7f1
- model.MyProject.samples_customer
service_level_indicators: {}