diff --git a/model_monitoring_batch/function.yaml b/model_monitoring_batch/function.yaml deleted file mode 100644 index 1009c4e72..000000000 --- a/model_monitoring_batch/function.yaml +++ /dev/null @@ -1,128 +0,0 @@ -kind: job -metadata: - name: model-monitoring-batch - tag: '' - hash: 0a07259a35b487d80169a53e31ce0c62dc288c2c - project: '' - categories: - - monitoring -spec: - command: '' - args: [] - image: mlrun/mlrun - env: [] - default_handler: handler - entry_points: - compute: - name: compute - doc: '' - parameters: - - name: self - default: '' - - name: capping - default: null - - name: kld_scaling - default: 0.0001 - outputs: - - default: '' - type: float - lineno: 64 - dict_to_histogram: - name: dict_to_histogram - doc: '' - parameters: - - name: self - default: '' - - name: histogram_dict - default: '' - outputs: - - default: '' - lineno: 112 - compute_metrics_over_df: - name: compute_metrics_over_df - doc: '' - parameters: - - name: self - default: '' - - name: base_histogram - default: '' - - name: latest_histogram - default: '' - outputs: - - default: '' - lineno: 129 - compute_drift_from_histograms: - name: compute_drift_from_histograms - doc: '' - parameters: - - name: self - default: '' - - name: feature_stats - default: '' - - name: current_stats - default: '' - outputs: - - default: '' - lineno: 140 - post_init: - name: post_init - doc: '' - parameters: - - name: self - default: '' - outputs: - - default: '' - lineno: 283 - run: - name: run - doc: '' - parameters: - - name: self - default: '' - outputs: - - default: '' - lineno: 295 - check_for_drift: - name: check_for_drift - doc: '' - parameters: - - name: self - default: '' - - name: drift_result - default: '' - - name: endpoint - default: '' - outputs: - - default: '' - lineno: 421 - get_last_created_dir: - name: get_last_created_dir - doc: '' - parameters: - - name: fs - default: '' - - name: endpoint_dir - default: '' - outputs: - - default: '' - lineno: 447 - handler: - name: handler - doc: '' - parameters: - - name: context - type: MLClientCtx - default: '' - outputs: - - default: '' - lineno: 453 - description: '' - build: - functionSourceCode: aW1wb3J0IGpzb24KaW1wb3J0IG9zCmZyb20gY29sbGVjdGlvbnMgaW1wb3J0IGRlZmF1bHRkaWN0CmZyb20gZGF0YWNsYXNzZXMgaW1wb3J0IGRhdGFjbGFzcwpmcm9tIHR5cGluZyBpbXBvcnQgT3B0aW9uYWwsIExpc3QsIERpY3QKCmltcG9ydCBudW1weSBhcyBucAppbXBvcnQgcGFuZGFzIGFzIHBkCmltcG9ydCB2M2lvCmZyb20gbWxydW4gaW1wb3J0IGdldF9ydW5fZGIKZnJvbSBtbHJ1biBpbXBvcnQgc3RvcmVfbWFuYWdlcgpmcm9tIG1scnVuLmRhdGFfdHlwZXMuaW5mZXIgaW1wb3J0IERGRGF0YUluZmVyLCBJbmZlck9wdGlvbnMKZnJvbSBtbHJ1bi5ydW4gaW1wb3J0IE1MQ2xpZW50Q3R4CmZyb20gbWxydW4udXRpbHMgaW1wb3J0IGxvZ2dlciwgY29uZmlnCmZyb20gbWxydW4udXRpbHMubW9kZWxfbW9uaXRvcmluZyBpbXBvcnQgRW5kcG9pbnRUeXBlLCBwYXJzZV9tb2RlbF9lbmRwb2ludF9zdG9yZV9wcmVmaXgKZnJvbSBtbHJ1bi51dGlscy52M2lvX2NsaWVudHMgaW1wb3J0IGdldF92M2lvX2NsaWVudCwgZ2V0X2ZyYW1lc19jbGllbnQKZnJvbSBza2xlYXJuLnByZXByb2Nlc3NpbmcgaW1wb3J0IEtCaW5zRGlzY3JldGl6ZXIKClRJTUVfRk9STUFUID0gIiVZLSVtLSVkICVIOiVNOiVTLiVmJXoiCgoKQGRhdGFjbGFzcwpjbGFzcyBUb3RhbFZhcmlhbmNlRGlzdGFuY2U6CiAgICAiIiIKICAgIFByb3ZpZGVzIGEgc3ltbWV0cmljIGRyaWZ0IGRpc3RhbmNlIGJldHdlZW4gdHdvIHBlcmlvZHMgdCBhbmQgdQogICAgWiAtIHZlY3RvciBvZiByYW5kb20gdmFyaWFibGVzCiAgICBQdCAtIFByb2JhYmlsaXR5IGRpc3RyaWJ1dGlvbiBvdmVyIHRpbWUgc3BhbiB0CiAgICAiIiIKCiAgICBkaXN0cmliX3Q6IG5wLm5kYXJyYXkKICAgIGRpc3RyaWJfdTogbnAubmRhcnJheQoKICAgIGRlZiBjb21wdXRlKHNlbGYpIC0+IGZsb2F0OgogICAgICAgIHJldHVybiBucC5zdW0obnAuYWJzKHNlbGYuZGlzdHJpYl90IC0gc2VsZi5kaXN0cmliX3UpKSAvIDIKCgpAZGF0YWNsYXNzCmNsYXNzIEhlbGxpbmdlckRpc3RhbmNlOgogICAgIiIiCiAgICBIZWxsaW5nZXIgZGlzdGFuY2UgaXMgYW4gZiBkaXZlcmdlbmNlIG1lYXN1cmUsIHNpbWlsYXIgdG8gdGhlIEt1bGxiYWNrLUxlaWJsZXIgKEtMKSBkaXZlcmdlbmNlLgogICAgSG93ZXZlciwgdW5saWtlIEtMIERpdmVyZ2VuY2UgdGhlIEhlbGxpbmdlciBkaXZlcmdlbmNlIGlzIHN5bW1ldHJpYyBhbmQgYm91bmRlZCBvdmVyIGEgcHJvYmFiaWxpdHkgc3BhY2UuCiAgICAiIiIKCiAgICBkaXN0cmliX3Q6IG5wLm5kYXJyYXkKICAgIGRpc3RyaWJfdTogbnAubmRhcnJheQoKICAgIGRlZiBjb21wdXRlKHNlbGYpIC0+IGZsb2F0OgogICAgICAgIHJldHVybiBucC5zcXJ0KAogICAgICAgICAgICAwLjUgKiAoKG5wLnNxcnQoc2VsZi5kaXN0cmliX3UpIC0gbnAuc3FydChzZWxmLmRpc3RyaWJfdCkpICoqIDIpLnN1bSgpCiAgICAgICAgKQoKCkBkYXRhY2xhc3MKY2xhc3MgS3VsbGJhY2tMZWlibGVyRGl2ZXJnZW5jZToKICAgICIiIgogICAgS0wgRGl2ZXJnZW5jZSAob3IgcmVsYXRpdmUgZW50cm9weSkgaXMgYSBtZWFzdXJlIG9mIGhvdyBvbmUgcHJvYmFiaWxpdHkgZGlzdHJpYnV0aW9uIGRpZmZlcnMgZnJvbSBhbm90aGVyLgogICAgSXQgaXMgYW4gYXN5bW1ldHJpYyBtZWFzdXJlICh0aHVzIGl0J3Mgbm90IGEgbWV0cmljKSBhbmQgaXQgZG9lc24ndCBzYXRpc2Z5IHRoZSB0cmlhbmdsZSBpbmVxdWFsaXR5LgogICAgS0wgRGl2ZXJnZW5jZSBvZiAwLCBpbmRpY2F0ZXMgdHdvIGlkZW50aWNhbCBkaXN0cmlidXRpb25zLgogICAgIiIiCgogICAgZGlzdHJpYl90OiBucC5uZGFycmF5CiAgICBkaXN0cmliX3U6IG5wLm5kYXJyYXkKCiAgICBkZWYgY29tcHV0ZShzZWxmLCBjYXBwaW5nPU5vbmUsIGtsZF9zY2FsaW5nPTAuMDAwMSkgLT4gZmxvYXQ6CiAgICAgICAgdF91ID0gbnAuc3VtKAogICAgICAgICAgICBucC53aGVyZSgKICAgICAgICAgICAgICAgIHNlbGYuZGlzdHJpYl90ICE9IDAsCiAgICAgICAgICAgICAgICAoc2VsZi5kaXN0cmliX3QpCiAgICAgICAgICAgICAgICAqIG5wLmxvZygKICAgICAgICAgICAgICAgICAgICBzZWxmLmRpc3RyaWJfdAogICAgICAgICAgICAgICAgICAgIC8gbnAud2hlcmUoc2VsZi5kaXN0cmliX3UgIT0gMCwgc2VsZi5kaXN0cmliX3UsIGtsZF9zY2FsaW5nKQogICAgICAgICAgICAgICAgKSwKICAgICAgICAgICAgICAgIDAsCiAgICAgICAgICAgICkKICAgICAgICApCiAgICAgICAgdV90ID0gbnAuc3VtKAogICAgICAgICAgICBucC53aGVyZSgKICAgICAgICAgICAgICAgIHNlbGYuZGlzdHJpYl91ICE9IDAsCiAgICAgICAgICAgICAgICAoc2VsZi5kaXN0cmliX3UpCiAgICAgICAgICAgICAgICAqIG5wLmxvZygKICAgICAgICAgICAgICAgICAgICBzZWxmLmRpc3RyaWJfdQogICAgICAgICAgICAgICAgICAgIC8gbnAud2hlcmUoc2VsZi5kaXN0cmliX3QgIT0gMCwgc2VsZi5kaXN0cmliX3QsIGtsZF9zY2FsaW5nKQogICAgICAgICAgICAgICAgKSwKICAgICAgICAgICAgICAgIDAsCiAgICAgICAgICAgICkKICAgICAgICApCiAgICAgICAgcmVzdWx0ID0gdF91ICsgdV90CiAgICAgICAgaWYgY2FwcGluZzoKICAgICAgICAgICAgcmV0dXJuIGNhcHBpbmcgaWYgcmVzdWx0ID09IGZsb2F0KCJpbmYiKSBlbHNlIHJlc3VsdAogICAgICAgIHJldHVybiByZXN1bHQKCgpjbGFzcyBWaXJ0dWFsRHJpZnQ6CiAgICBkZWYgX19pbml0X18oCiAgICAgICAgc2VsZiwKICAgICAgICBwcmVkaWN0aW9uX2NvbDogT3B0aW9uYWxbc3RyXSA9IE5vbmUsCiAgICAgICAgbGFiZWxfY29sOiBPcHRpb25hbFtzdHJdID0gTm9uZSwKICAgICAgICBmZWF0dXJlX3dlaWdodHM6IE9wdGlvbmFsW0xpc3RbZmxvYXRdXSA9IE5vbmUsCiAgICAgICAgaW5mX2NhcHBpbmc6IE9wdGlvbmFsW2Zsb2F0XSA9IDEwLAogICAgKToKICAgICAgICBzZWxmLnByZWRpY3Rpb25fY29sID0gcHJlZGljdGlvbl9jb2wKICAgICAgICBzZWxmLmxhYmVsX2NvbCA9IGxhYmVsX2NvbAogICAgICAgIHNlbGYuZmVhdHVyZV93ZWlnaHRzID0gZmVhdHVyZV93ZWlnaHRzCiAgICAgICAgc2VsZi5jYXBwaW5nID0gaW5mX2NhcHBpbmcKICAgICAgICBzZWxmLmRpc2NyZXRpemVyczogRGljdFtzdHIsIEtCaW5zRGlzY3JldGl6ZXJdID0ge30KICAgICAgICBzZWxmLm1ldHJpY3MgPSB7CiAgICAgICAgICAgICJ0dmQiOiBUb3RhbFZhcmlhbmNlRGlzdGFuY2UsCiAgICAgICAgICAgICJoZWxsaW5nZXIiOiBIZWxsaW5nZXJEaXN0YW5jZSwKICAgICAgICAgICAgImtsZCI6IEt1bGxiYWNrTGVpYmxlckRpdmVyZ2VuY2UsCiAgICAgICAgfQoKICAgIGRlZiBkaWN0X3RvX2hpc3RvZ3JhbShzZWxmLCBoaXN0b2dyYW1fZGljdCk6CiAgICAgICAgaGlzdG9ncmFtcyA9IHt9CiAgICAgICAgZm9yIGZlYXR1cmUsIHN0YXRzIGluIGhpc3RvZ3JhbV9kaWN0Lml0ZW1zKCk6CiAgICAgICAgICAgIGhpc3RvZ3JhbXNbZmVhdHVyZV0gPSBzdGF0c1siaGlzdCJdWzBdCgogICAgICAgICMgR2V0IGZlYXR1cmVzIHZhbHVlIGNvdW50cwogICAgICAgIGhpc3RvZ3JhbXMgPSBwZC5jb25jYXQoCiAgICAgICAgICAgIFsKICAgICAgICAgICAgICAgIHBkLkRhdGFGcmFtZShkYXRhPWhpc3QsIGNvbHVtbnM9W2ZlYXR1cmVdKQogICAgICAgICAgICAgICAgZm9yIGZlYXR1cmUsIGhpc3QgaW4gaGlzdG9ncmFtcy5pdGVtcygpCiAgICAgICAgICAgIF0sCiAgICAgICAgICAgIGF4aXM9MSwKICAgICAgICApCiAgICAgICAgIyBUbyBEaXN0cmlidXRpb24KICAgICAgICBoaXN0b2dyYW1zID0gaGlzdG9ncmFtcyAvIGhpc3RvZ3JhbXMuc3VtKCkKICAgICAgICByZXR1cm4gaGlzdG9ncmFtcwoKICAgIGRlZiBjb21wdXRlX21ldHJpY3Nfb3Zlcl9kZihzZWxmLCBiYXNlX2hpc3RvZ3JhbSwgbGF0ZXN0X2hpc3RvZ3JhbSk6CiAgICAgICAgZHJpZnRfbWVhc3VyZXMgPSB7fQogICAgICAgIGZvciBtZXRyaWNfbmFtZSwgbWV0cmljIGluIHNlbGYubWV0cmljcy5pdGVtcygpOgogICAgICAgICAgICBkcmlmdF9tZWFzdXJlc1ttZXRyaWNfbmFtZV0gPSB7CiAgICAgICAgICAgICAgICBmZWF0dXJlOiBtZXRyaWMoCiAgICAgICAgICAgICAgICAgICAgYmFzZV9oaXN0b2dyYW0ubG9jWzosIGZlYXR1cmVdLCBsYXRlc3RfaGlzdG9ncmFtLmxvY1s6LCBmZWF0dXJlXQogICAgICAgICAgICAgICAgKS5jb21wdXRlKCkKICAgICAgICAgICAgICAgIGZvciBmZWF0dXJlIGluIGJhc2VfaGlzdG9ncmFtCiAgICAgICAgICAgIH0KICAgICAgICByZXR1cm4gZHJpZnRfbWVhc3VyZXMKCiAgICBkZWYgY29tcHV0ZV9kcmlmdF9mcm9tX2hpc3RvZ3JhbXMoc2VsZiwgZmVhdHVyZV9zdGF0cywgY3VycmVudF9zdGF0cyk6CiAgICAgICAgIyBQcm9jZXNzIGhpc3RvZ3JhbSBkaWN0aW9uYXJpZXMgdG8gRGF0YWZyYW1lIG9mIHRoZSBoaXN0b2dyYW1zCiAgICAgICAgIyB3aXRoIEZlYXR1cmUgaGlzdG9ncmFtIGFzIGNvbHMKICAgICAgICBiYXNlX2hpc3RvZ3JhbSA9IHNlbGYuZGljdF90b19oaXN0b2dyYW0oZmVhdHVyZV9zdGF0cykKICAgICAgICBsYXRlc3RfaGlzdG9ncmFtID0gc2VsZi5kaWN0X3RvX2hpc3RvZ3JhbShjdXJyZW50X3N0YXRzKQoKICAgICAgICAjIFZlcmlmeSBhbGwgdGhlIGZlYXR1cmVzIGV4aXN0IGJldHdlZW4gZGF0YXNldHMKICAgICAgICBiYXNlX2ZlYXR1cmVzID0gc2V0KGJhc2VfaGlzdG9ncmFtLmNvbHVtbnMpCiAgICAgICAgbGF0ZXN0X2ZlYXR1cmVzID0gc2V0KGxhdGVzdF9oaXN0b2dyYW0uY29sdW1ucykKCiAgICAgICAgZmVhdHVyZXNfY29tbW9uID0gbGlzdChiYXNlX2ZlYXR1cmVzLmludGVyc2VjdGlvbihsYXRlc3RfZmVhdHVyZXMpKQogICAgICAgIGZlYXR1cmVfZGlmZmVyZW5jZSA9IGxpc3QoYmFzZV9mZWF0dXJlcyBeIGxhdGVzdF9mZWF0dXJlcykKCiAgICAgICAgaWYgbm90IGZlYXR1cmVzX2NvbW1vbjoKICAgICAgICAgICAgcmFpc2UgVmFsdWVFcnJvcigKICAgICAgICAgICAgICAgIGYiTm8gY29tbW9uIGZlYXR1cmVzIGZvdW5kOiB7YmFzZV9mZWF0dXJlc30gPD4ge2xhdGVzdF9mZWF0dXJlc30iCiAgICAgICAgICAgICkKCiAgICAgICAgYmFzZV9oaXN0b2dyYW0gPSBiYXNlX2hpc3RvZ3JhbS5kcm9wKAogICAgICAgICAgICBmZWF0dXJlX2RpZmZlcmVuY2UsIGF4aXM9MSwgZXJyb3JzPSJpZ25vcmUiCiAgICAgICAgKQogICAgICAgIGxhdGVzdF9oaXN0b2dyYW0gPSBsYXRlc3RfaGlzdG9ncmFtLmRyb3AoCiAgICAgICAgICAgIGZlYXR1cmVfZGlmZmVyZW5jZSwgYXhpcz0xLCBlcnJvcnM9Imlnbm9yZSIKICAgICAgICApCgogICAgICAgICMgQ29tcHV0ZSB0aGUgZHJpZnQgcGVyIGZlYXR1cmUKICAgICAgICBmZWF0dXJlc19kcmlmdF9tZWFzdXJlcyA9IHNlbGYuY29tcHV0ZV9tZXRyaWNzX292ZXJfZGYoCiAgICAgICAgICAgIGJhc2VfaGlzdG9ncmFtLmxvY1s6LCBmZWF0dXJlc19jb21tb25dLAogICAgICAgICAgICBsYXRlc3RfaGlzdG9ncmFtLmxvY1s6LCBmZWF0dXJlc19jb21tb25dLAogICAgICAgICkKCiAgICAgICAgIyBDb21wdXRlIHRvdGFsIGRyaWZ0IG1lYXN1cmVzIGZvciBmZWF0dXJlcwogICAgICAgIGZvciBtZXRyaWNfbmFtZSBpbiBzZWxmLm1ldHJpY3Mua2V5cygpOgogICAgICAgICAgICBmZWF0dXJlX3ZhbHVlcyA9IGxpc3QoZmVhdHVyZXNfZHJpZnRfbWVhc3VyZXNbbWV0cmljX25hbWVdLnZhbHVlcygpKQogICAgICAgICAgICBmZWF0dXJlc19kcmlmdF9tZWFzdXJlc1ttZXRyaWNfbmFtZV1bInRvdGFsX3N1bSJdID0gbnAuc3VtKGZlYXR1cmVfdmFsdWVzKQogICAgICAgICAgICBmZWF0dXJlc19kcmlmdF9tZWFzdXJlc1ttZXRyaWNfbmFtZV1bInRvdGFsX21lYW4iXSA9IG5wLm1lYW4oZmVhdHVyZV92YWx1ZXMpCgogICAgICAgICAgICAjIEFkZCB3ZWlnaHRlZCBtZWFuIGJ5IGdpdmVuIGZlYXR1cmUgd2VpZ2h0cyBpZiBwcm92aWRlZAogICAgICAgICAgICBpZiBzZWxmLmZlYXR1cmVfd2VpZ2h0czoKICAgICAgICAgICAgICAgIGZlYXR1cmVzX2RyaWZ0X21lYXN1cmVzW21ldHJpY19uYW1lXVsidG90YWxfd2VpZ2h0ZWRfbWVhbiJdID0gbnAuZG90KAogICAgICAgICAgICAgICAgICAgIGZlYXR1cmVfdmFsdWVzLCBzZWxmLmZlYXR1cmVfd2VpZ2h0cwogICAgICAgICAgICAgICAgKQoKICAgICAgICBkcmlmdF9yZXN1bHQgPSBkZWZhdWx0ZGljdChkaWN0KQoKICAgICAgICBmb3IgZmVhdHVyZSBpbiBmZWF0dXJlc19jb21tb246CiAgICAgICAgICAgIGZvciBtZXRyaWMsIHZhbHVlcyBpbiBmZWF0dXJlc19kcmlmdF9tZWFzdXJlcy5pdGVtcygpOgogICAgICAgICAgICAgICAgZHJpZnRfcmVzdWx0W2ZlYXR1cmVdW21ldHJpY10gPSB2YWx1ZXNbZmVhdHVyZV0KICAgICAgICAgICAgICAgIHN1bSA9IGZlYXR1cmVzX2RyaWZ0X21lYXN1cmVzW21ldHJpY11bInRvdGFsX3N1bSJdCiAgICAgICAgICAgICAgICBtZWFuID0gZmVhdHVyZXNfZHJpZnRfbWVhc3VyZXNbbWV0cmljXVsidG90YWxfbWVhbiJdCiAgICAgICAgICAgICAgICBkcmlmdF9yZXN1bHRbZiJ7bWV0cmljfV9zdW0iXSA9IHN1bQogICAgICAgICAgICAgICAgZHJpZnRfcmVzdWx0W2Yie21ldHJpY31fbWVhbiJdID0gbWVhbgogICAgICAgICAgICAgICAgaWYgc2VsZi5mZWF0dXJlX3dlaWdodHM6CiAgICAgICAgICAgICAgICAgICAgbWV0cmljX21lYXN1cmUgPSBmZWF0dXJlc19kcmlmdF9tZWFzdXJlc1ttZXRyaWNdCiAgICAgICAgICAgICAgICAgICAgd2VpZ2h0ZWRfbWVhbiA9IG1ldHJpY19tZWFzdXJlWyJ0b3RhbF93ZWlnaHRlZF9tZWFuIl0KICAgICAgICAgICAgICAgICAgICBkcmlmdF9yZXN1bHRbZiJ7bWV0cmljfV93ZWlnaHRlZF9tZWFuIl0gPSB3ZWlnaHRlZF9tZWFuCgogICAgICAgIGlmIHNlbGYubGFiZWxfY29sOgogICAgICAgICAgICBsYWJlbF9kcmlmdF9tZWFzdXJlcyA9IHNlbGYuY29tcHV0ZV9tZXRyaWNzX292ZXJfZGYoCiAgICAgICAgICAgICAgICBiYXNlX2hpc3RvZ3JhbS5sb2NbOiwgc2VsZi5sYWJlbF9jb2xdLAogICAgICAgICAgICAgICAgbGF0ZXN0X2hpc3RvZ3JhbS5sb2NbOiwgc2VsZi5sYWJlbF9jb2xdLAogICAgICAgICAgICApCiAgICAgICAgICAgIGZvciBtZXRyaWMsIHZhbHVlcyBpbiBsYWJlbF9kcmlmdF9tZWFzdXJlcy5pdGVtcygpOgogICAgICAgICAgICAgICAgZHJpZnRfcmVzdWx0W3NlbGYubGFiZWxfY29sXVttZXRyaWNdID0gdmFsdWVzW21ldHJpY10KCiAgICAgICAgaWYgc2VsZi5wcmVkaWN0aW9uX2NvbDoKICAgICAgICAgICAgcHJlZGljdGlvbl9kcmlmdF9tZWFzdXJlcyA9IHNlbGYuY29tcHV0ZV9tZXRyaWNzX292ZXJfZGYoCiAgICAgICAgICAgICAgICBiYXNlX2hpc3RvZ3JhbS5sb2NbOiwgc2VsZi5wcmVkaWN0aW9uX2NvbF0sCiAgICAgICAgICAgICAgICBsYXRlc3RfaGlzdG9ncmFtLmxvY1s6LCBzZWxmLnByZWRpY3Rpb25fY29sXSwKICAgICAgICAgICAgKQogICAgICAgICAgICBmb3IgbWV0cmljLCB2YWx1ZXMgaW4gcHJlZGljdGlvbl9kcmlmdF9tZWFzdXJlcy5pdGVtcygpOgogICAgICAgICAgICAgICAgZHJpZnRfcmVzdWx0W3NlbGYucHJlZGljdGlvbl9jb2xdW21ldHJpY10gPSB2YWx1ZXNbbWV0cmljXQoKICAgICAgICByZXR1cm4gZHJpZnRfcmVzdWx0CgoKY2xhc3MgQmF0Y2hQcm9jZXNzb3I6CiAgICBkZWYgX19pbml0X18oCiAgICAgICAgc2VsZiwKICAgICAgICBjb250ZXh0OiBNTENsaWVudEN0eCwKICAgICAgICBwcm9qZWN0OiBzdHIsCiAgICAgICAgbW9kZWxfbW9uaXRvcmluZ19hY2Nlc3Nfa2V5OiBzdHIsCiAgICAgICAgdjNpb19hY2Nlc3Nfa2V5OiBzdHIsCiAgICApOgogICAgICAgIHNlbGYuY29udGV4dCA9IGNvbnRleHQKICAgICAgICBzZWxmLnByb2plY3QgPSBwcm9qZWN0CgogICAgICAgIHNlbGYudjNpb19hY2Nlc3Nfa2V5ID0gdjNpb19hY2Nlc3Nfa2V5CiAgICAgICAgc2VsZi5tb2RlbF9tb25pdG9yaW5nX2FjY2Vzc19rZXkgPSAoCiAgICAgICAgICAgICAgICBtb2RlbF9tb25pdG9yaW5nX2FjY2Vzc19rZXkgb3IgdjNpb19hY2Nlc3Nfa2V5CiAgICAgICAgKQoKICAgICAgICBzZWxmLnZpcnR1YWxfZHJpZnQgPSBWaXJ0dWFsRHJpZnQoaW5mX2NhcHBpbmc9MTApCgogICAgICAgIHRlbXBsYXRlID0gY29uZmlnLm1vZGVsX2VuZHBvaW50X21vbml0b3Jpbmcuc3RvcmVfcHJlZml4ZXMuZGVmYXVsdAoKICAgICAgICBrdl9wYXRoID0gdGVtcGxhdGUuZm9ybWF0KHByb2plY3Q9c2VsZi5wcm9qZWN0LCBraW5kPSJlbmRwb2ludHMiKQogICAgICAgIF8sIHNlbGYua3ZfY29udGFpbmVyLCBzZWxmLmt2X3BhdGggPSBwYXJzZV9tb2RlbF9lbmRwb2ludF9zdG9yZV9wcmVmaXgoa3ZfcGF0aCkKCiAgICAgICAgdHNkYl9wYXRoID0gdGVtcGxhdGUuZm9ybWF0KHByb2plY3Q9cHJvamVjdCwga2luZD0iZXZlbnRzIikKICAgICAgICBfLCBzZWxmLnRzZGJfY29udGFpbmVyLCBzZWxmLnRzZGJfcGF0aCA9IHBhcnNlX21vZGVsX2VuZHBvaW50X3N0b3JlX3ByZWZpeCgKICAgICAgICAgICAgdHNkYl9wYXRoCiAgICAgICAgKQoKICAgICAgICBzdHJlYW1fcGF0aCA9IHRlbXBsYXRlLmZvcm1hdChwcm9qZWN0PXNlbGYucHJvamVjdCwga2luZD0ibG9nX3N0cmVhbSIpCiAgICAgICAgXywgc2VsZi5zdHJlYW1fY29udGFpbmVyLCBzZWxmLnN0cmVhbV9wYXRoID0gcGFyc2VfbW9kZWxfZW5kcG9pbnRfc3RvcmVfcHJlZml4KAogICAgICAgICAgICBzdHJlYW1fcGF0aAogICAgICAgICkKCiAgICAgICAgc2VsZi5wYXJxdWV0X3BhdGggPSBjb25maWcubW9kZWxfZW5kcG9pbnRfbW9uaXRvcmluZy5zdG9yZV9wcmVmaXhlcy51c2VyX3NwYWNlLmZvcm1hdCgKICAgICAgICAgICAgcHJvamVjdD1wcm9qZWN0LCBraW5kPSJwYXJxdWV0IgogICAgICAgICkKCiAgICAgICAgbG9nZ2VyLmluZm8oCiAgICAgICAgICAgICJJbml0aWFsaXppbmcgQmF0Y2hQcm9jZXNzb3IiLAogICAgICAgICAgICBwcm9qZWN0PXByb2plY3QsCiAgICAgICAgICAgIG1vZGVsX21vbml0b3JpbmdfYWNjZXNzX2tleV9pbml0YWxpemVkPWJvb2wobW9kZWxfbW9uaXRvcmluZ19hY2Nlc3Nfa2V5KSwKICAgICAgICAgICAgdjNpb19hY2Nlc3Nfa2V5X2luaXRpYWxpemVkPWJvb2wodjNpb19hY2Nlc3Nfa2V5KSwKICAgICAgICAgICAgcGFycXVldF9wYXRoPXNlbGYucGFycXVldF9wYXRoLAogICAgICAgICAgICBrdl9jb250YWluZXI9c2VsZi5rdl9jb250YWluZXIsCiAgICAgICAgICAgIGt2X3BhdGg9c2VsZi5rdl9wYXRoLAogICAgICAgICAgICB0c2RiX2NvbnRhaW5lcj1zZWxmLnRzZGJfY29udGFpbmVyLAogICAgICAgICAgICB0c2RiX3BhdGg9c2VsZi50c2RiX3BhdGgsCiAgICAgICAgICAgIHN0cmVhbV9jb250YWluZXI9c2VsZi5zdHJlYW1fY29udGFpbmVyLAogICAgICAgICAgICBzdHJlYW1fcGF0aD1zZWxmLnN0cmVhbV9wYXRoLAogICAgICAgICkKCiAgICAgICAgc2VsZi5kZWZhdWx0X3Bvc3NpYmxlX2RyaWZ0X3RocmVzaG9sZCA9ICgKICAgICAgICAgICAgY29uZmlnLm1vZGVsX2VuZHBvaW50X21vbml0b3JpbmcuZHJpZnRfdGhyZXNob2xkcy5kZWZhdWx0LnBvc3NpYmxlX2RyaWZ0CiAgICAgICAgKQogICAgICAgIHNlbGYuZGVmYXVsdF9kcmlmdF9kZXRlY3RlZF90aHJlc2hvbGQgPSAoCiAgICAgICAgICAgIGNvbmZpZy5tb2RlbF9lbmRwb2ludF9tb25pdG9yaW5nLmRyaWZ0X3RocmVzaG9sZHMuZGVmYXVsdC5kcmlmdF9kZXRlY3RlZAogICAgICAgICkKCiAgICAgICAgc2VsZi5kYiA9IGdldF9ydW5fZGIoKQogICAgICAgIHNlbGYudjNpbyA9IGdldF92M2lvX2NsaWVudChhY2Nlc3Nfa2V5PXNlbGYudjNpb19hY2Nlc3Nfa2V5KQogICAgICAgIHNlbGYuZnJhbWVzID0gZ2V0X2ZyYW1lc19jbGllbnQoCiAgICAgICAgICAgIGFkZHJlc3M9Y29uZmlnLnYzaW9fZnJhbWVzZCwKICAgICAgICAgICAgY29udGFpbmVyPXNlbGYudHNkYl9jb250YWluZXIsCiAgICAgICAgICAgIHRva2VuPXNlbGYudjNpb19hY2Nlc3Nfa2V5LAogICAgICAgICkKICAgICAgICBzZWxmLmV4Y2VwdGlvbiA9IE5vbmUKCiAgICBkZWYgcG9zdF9pbml0KHNlbGYpOgogICAgICAgIHJlc3BvbnNlID0gc2VsZi52M2lvLmNyZWF0ZV9zdHJlYW0oCiAgICAgICAgICAgIGNvbnRhaW5lcj1zZWxmLnN0cmVhbV9jb250YWluZXIsCiAgICAgICAgICAgIHBhdGg9c2VsZi5zdHJlYW1fcGF0aCwKICAgICAgICAgICAgc2hhcmRfY291bnQ9MSwKICAgICAgICAgICAgcmFpc2VfZm9yX3N0YXR1cz12M2lvLmRhdGFwbGFuZS5SYWlzZUZvclN0YXR1cy5uZXZlciwKICAgICAgICAgICAgYWNjZXNzX2tleT1zZWxmLnYzaW9fYWNjZXNzX2tleSwKICAgICAgICApCgogICAgICAgIGlmIG5vdCAocmVzcG9uc2Uuc3RhdHVzX2NvZGUgPT0gNDAwIGFuZCAiUmVzb3VyY2VJblVzZSIgaW4gc3RyKHJlc3BvbnNlLmJvZHkpKToKICAgICAgICAgICAgcmVzcG9uc2UucmFpc2VfZm9yX3N0YXR1cyhbNDA5LCAyMDQsIDQwM10pCgogICAgZGVmIHJ1bihzZWxmKToKCiAgICAgICAgdHJ5OgogICAgICAgICAgICBlbmRwb2ludHMgPSBzZWxmLmRiLmxpc3RfbW9kZWxfZW5kcG9pbnRzKHNlbGYucHJvamVjdCkKICAgICAgICBleGNlcHQgRXhjZXB0aW9uIGFzIGU6CiAgICAgICAgICAgIGxvZ2dlci5lcnJvcigiRmFpbGVkIHRvIGxpc3QgZW5kcG9pbnRzIiwgZXhjPWUpCiAgICAgICAgICAgIHJldHVybgoKICAgICAgICBhY3RpdmVfZW5kcG9pbnRzID0gc2V0KCkKICAgICAgICBmb3IgZW5kcG9pbnQgaW4gZW5kcG9pbnRzLmVuZHBvaW50czoKICAgICAgICAgICAgaWYgZW5kcG9pbnQuc3BlYy5hY3RpdmU6CiAgICAgICAgICAgICAgICBhY3RpdmVfZW5kcG9pbnRzLmFkZChlbmRwb2ludC5tZXRhZGF0YS51aWQpCgogICAgICAgIHN0b3JlLCBzdWIgPSBzdG9yZV9tYW5hZ2VyLmdldF9vcl9jcmVhdGVfc3RvcmUoc2VsZi5wYXJxdWV0X3BhdGgpCiAgICAgICAgcHJlZml4ID0gc2VsZi5wYXJxdWV0X3BhdGgucmVwbGFjZShzdWIsICIiKQogICAgICAgIGZzID0gc3RvcmUuZ2V0X2ZpbGVzeXN0ZW0oc2lsZW50PUZhbHNlKQoKICAgICAgICBpZiBub3QgZnMuZXhpc3RzKHN1Yik6CiAgICAgICAgICAgIGxvZ2dlci53YXJuKAogICAgICAgICAgICAgICAgZiJ7c3VifSBkb2VzIG5vdCBleGlzdCIKICAgICAgICAgICAgKQogICAgICAgICAgICByZXR1cm4KCiAgICAgICAgZm9yIGVuZHBvaW50X2RpciBpbiBmcy5scyhzdWIpOgogICAgICAgICAgICBlbmRwb2ludF9pZCA9IGVuZHBvaW50X2RpclsibmFtZSJdLnNwbGl0KCI9IilbLTFdCiAgICAgICAgICAgIGlmIGVuZHBvaW50X2lkIG5vdCBpbiBhY3RpdmVfZW5kcG9pbnRzOgogICAgICAgICAgICAgICAgY29udGludWUKCiAgICAgICAgICAgIHRyeToKICAgICAgICAgICAgICAgIGxhc3RfeWVhciA9IHNlbGYuZ2V0X2xhc3RfY3JlYXRlZF9kaXIoZnMsIGVuZHBvaW50X2RpcikKICAgICAgICAgICAgICAgIGxhc3RfbW9udGggPSBzZWxmLmdldF9sYXN0X2NyZWF0ZWRfZGlyKGZzLCBsYXN0X3llYXIpCiAgICAgICAgICAgICAgICBsYXN0X2RheSA9IHNlbGYuZ2V0X2xhc3RfY3JlYXRlZF9kaXIoZnMsIGxhc3RfbW9udGgpCiAgICAgICAgICAgICAgICBsYXN0X2hvdXIgPSBzZWxmLmdldF9sYXN0X2NyZWF0ZWRfZGlyKGZzLCBsYXN0X2RheSkKCiAgICAgICAgICAgICAgICBmdWxsX3BhdGggPSBmIntwcmVmaXh9e2xhc3RfaG91clsnbmFtZSddfSIKCiAgICAgICAgICAgICAgICBsb2dnZXIuaW5mbyhmIk5vdyBwcm9jZXNzaW5nIHtmdWxsX3BhdGh9IikKCiAgICAgICAgICAgICAgICBlbmRwb2ludCA9IHNlbGYuZGIuZ2V0X21vZGVsX2VuZHBvaW50KAogICAgICAgICAgICAgICAgICAgIHByb2plY3Q9c2VsZi5wcm9qZWN0LCBlbmRwb2ludF9pZD1lbmRwb2ludF9pZAogICAgICAgICAgICAgICAgKQoKICAgICAgICAgICAgICAgIGlmIGVuZHBvaW50LnN0YXR1cy5lbmRwb2ludF90eXBlID09IEVuZHBvaW50VHlwZS5ST1VURVI6CiAgICAgICAgICAgICAgICAgICAgIyBlbmRwb2ludC5zdGF0dXMuZmVhdHVyZV9zdGF0cyBpcyBOb25lCiAgICAgICAgICAgICAgICAgICAgbG9nZ2VyLmluZm8oZiJ7ZW5kcG9pbnRfaWR9IGlzIHJvdXRlciBza2lwcGluZyIpCiAgICAgICAgICAgICAgICAgICAgY29udGludWUKCiAgICAgICAgICAgICAgICBkZiA9IHBkLnJlYWRfcGFycXVldChmdWxsX3BhdGgpCiAgICAgICAgICAgICAgICB0aW1lc3RhbXAgPSBkZlsidGltZXN0YW1wIl0uaWxvY1stMV0KCiAgICAgICAgICAgICAgICBuYW1lZF9mZWF0dXJlc19kZiA9IGxpc3QoZGZbIm5hbWVkX2ZlYXR1cmVzIl0pCiAgICAgICAgICAgICAgICBuYW1lZF9mZWF0dXJlc19kZiA9IHBkLkRhdGFGcmFtZShuYW1lZF9mZWF0dXJlc19kZikKCiAgICAgICAgICAgICAgICBjdXJyZW50X3N0YXRzID0gREZEYXRhSW5mZXIuZ2V0X3N0YXRzKAogICAgICAgICAgICAgICAgICAgIGRmPW5hbWVkX2ZlYXR1cmVzX2RmLCBvcHRpb25zPUluZmVyT3B0aW9ucy5IaXN0b2dyYW0KICAgICAgICAgICAgICAgICkKCiAgICAgICAgICAgICAgICBkcmlmdF9yZXN1bHQgPSBzZWxmLnZpcnR1YWxfZHJpZnQuY29tcHV0ZV9kcmlmdF9mcm9tX2hpc3RvZ3JhbXMoCiAgICAgICAgICAgICAgICAgICAgZmVhdHVyZV9zdGF0cz1lbmRwb2ludC5zdGF0dXMuZmVhdHVyZV9zdGF0cywKICAgICAgICAgICAgICAgICAgICBjdXJyZW50X3N0YXRzPWN1cnJlbnRfc3RhdHMsCiAgICAgICAgICAgICAgICApCgogICAgICAgICAgICAgICAgbG9nZ2VyLmluZm8oIkRyaWZ0IHJlc3VsdCIsIGRyaWZ0X3Jlc3VsdD1kcmlmdF9yZXN1bHQpCgogICAgICAgICAgICAgICAgZHJpZnRfc3RhdHVzLCBkcmlmdF9tZWFzdXJlID0gc2VsZi5jaGVja19mb3JfZHJpZnQoCiAgICAgICAgICAgICAgICAgICAgZHJpZnRfcmVzdWx0PWRyaWZ0X3Jlc3VsdCwgZW5kcG9pbnQ9ZW5kcG9pbnQKICAgICAgICAgICAgICAgICkKCiAgICAgICAgICAgICAgICBsb2dnZXIuaW5mbygKICAgICAgICAgICAgICAgICAgICAiRHJpZnQgc3RhdHVzIiwKICAgICAgICAgICAgICAgICAgICBlbmRwb2ludF9pZD1lbmRwb2ludF9pZCwKICAgICAgICAgICAgICAgICAgICBkcmlmdF9zdGF0dXM9ZHJpZnRfc3RhdHVzLAogICAgICAgICAgICAgICAgICAgIGRyaWZ0X21lYXN1cmU9ZHJpZnRfbWVhc3VyZSwKICAgICAgICAgICAgICAgICkKCiAgICAgICAgICAgICAgICBpZiBkcmlmdF9zdGF0dXMgPT0gIlBPU1NJQkxFX0RSSUZUIiBvciBkcmlmdF9zdGF0dXMgPT0gIkRSSUZUX0RFVEVDVEVEIjoKICAgICAgICAgICAgICAgICAgICBzZWxmLnYzaW8uc3RyZWFtLnB1dF9yZWNvcmRzKAogICAgICAgICAgICAgICAgICAgICAgICBjb250YWluZXI9c2VsZi5zdHJlYW1fY29udGFpbmVyLAogICAgICAgICAgICAgICAgICAgICAgICBzdHJlYW1fcGF0aD1zZWxmLnN0cmVhbV9wYXRoLAogICAgICAgICAgICAgICAgICAgICAgICByZWNvcmRzPVsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiZGF0YSI6IGpzb24uZHVtcHMoCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJlbmRwb2ludF9pZCI6IGVuZHBvaW50X2lkLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgImRyaWZ0X3N0YXR1cyI6IGRyaWZ0X3N0YXR1cywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJkcmlmdF9tZWFzdXJlIjogZHJpZnRfbWVhc3VyZSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJkcmlmdF9wZXJfZmVhdHVyZSI6IHsqKmRyaWZ0X3Jlc3VsdH0sCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICApCiAgICAgICAgICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICAgICAgICAgIF0sCiAgICAgICAgICAgICAgICAgICAgKQoKICAgICAgICAgICAgICAgIHNlbGYudjNpby5rdi51cGRhdGUoCiAgICAgICAgICAgICAgICAgICAgY29udGFpbmVyPXNlbGYua3ZfY29udGFpbmVyLAogICAgICAgICAgICAgICAgICAgIHRhYmxlX3BhdGg9c2VsZi5rdl9wYXRoLAogICAgICAgICAgICAgICAgICAgIGtleT1lbmRwb2ludF9pZCwKICAgICAgICAgICAgICAgICAgICBhdHRyaWJ1dGVzPXsKICAgICAgICAgICAgICAgICAgICAgICAgImN1cnJlbnRfc3RhdHMiOiBqc29uLmR1bXBzKGN1cnJlbnRfc3RhdHMpLAogICAgICAgICAgICAgICAgICAgICAgICAiZHJpZnRfbWVhc3VyZXMiOiBqc29uLmR1bXBzKGRyaWZ0X3Jlc3VsdCksCiAgICAgICAgICAgICAgICAgICAgICAgICJkcmlmdF9zdGF0dXMiOiBkcmlmdF9zdGF0dXMsCiAgICAgICAgICAgICAgICAgICAgfSwKICAgICAgICAgICAgICAgICkKCiAgICAgICAgICAgICAgICB0c2RiX2RyaWZ0X21lYXN1cmVzID0gewogICAgICAgICAgICAgICAgICAgICJlbmRwb2ludF9pZCI6IGVuZHBvaW50X2lkLAogICAgICAgICAgICAgICAgICAgICJ0aW1lc3RhbXAiOiBwZC50b19kYXRldGltZSh0aW1lc3RhbXAsIGZvcm1hdD1USU1FX0ZPUk1BVCksCiAgICAgICAgICAgICAgICAgICAgInJlY29yZF90eXBlIjogImRyaWZ0X21lYXN1cmVzIiwKICAgICAgICAgICAgICAgICAgICAidHZkX21lYW4iOiBkcmlmdF9yZXN1bHRbInR2ZF9tZWFuIl0sCiAgICAgICAgICAgICAgICAgICAgImtsZF9tZWFuIjogZHJpZnRfcmVzdWx0WyJrbGRfbWVhbiJdLAogICAgICAgICAgICAgICAgICAgICJoZWxsaW5nZXJfbWVhbiI6IGRyaWZ0X3Jlc3VsdFsiaGVsbGluZ2VyX21lYW4iXSwKICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICBzZWxmLmZyYW1lcy53cml0ZSgKICAgICAgICAgICAgICAgICAgICBiYWNrZW5kPSJ0c2RiIiwKICAgICAgICAgICAgICAgICAgICB0YWJsZT1zZWxmLnRzZGJfcGF0aCwKICAgICAgICAgICAgICAgICAgICBkZnM9cGQuRGF0YUZyYW1lLmZyb21fZGljdChbdHNkYl9kcmlmdF9tZWFzdXJlc10pLAogICAgICAgICAgICAgICAgICAgIGluZGV4X2NvbHM9WyJ0aW1lc3RhbXAiLCAiZW5kcG9pbnRfaWQiLCAicmVjb3JkX3R5cGUiXSwKICAgICAgICAgICAgICAgICkKCiAgICAgICAgICAgICAgICBsb2dnZXIuaW5mbyhmIkRvbmUgdXBkYXRpbmcgZHJpZnQgbWVhc3VyZXMge2Z1bGxfcGF0aH0iKQoKICAgICAgICAgICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgICAgICAgICAgbG9nZ2VyLmVycm9yKGYiRXhjZXB0aW9uIGZvciBlbmRwb2ludCB7ZW5kcG9pbnRfaWR9IikKICAgICAgICAgICAgICAgIHNlbGYuZXhjZXB0aW9uID0gZQoKICAgIGRlZiBjaGVja19mb3JfZHJpZnQoc2VsZiwgZHJpZnRfcmVzdWx0LCBlbmRwb2ludCk6CiAgICAgICAgdHZkX21lYW4gPSBkcmlmdF9yZXN1bHQuZ2V0KCJ0dmRfbWVhbiIpCiAgICAgICAgaGVsbGluZ2VyX21lYW4gPSBkcmlmdF9yZXN1bHQuZ2V0KCJoZWxsaW5nZXJfbWVhbiIpCgogICAgICAgIGRyaWZ0X21lYW4gPSAwLjAKICAgICAgICBpZiB0dmRfbWVhbiBhbmQgaGVsbGluZ2VyX21lYW46CiAgICAgICAgICAgIGRyaWZ0X21lYW4gPSAodHZkX21lYW4gKyBoZWxsaW5nZXJfbWVhbikgLyAyCgogICAgICAgIG1vbml0b3JfY29uZmlndXJhdGlvbiA9IGVuZHBvaW50LnNwZWMubW9uaXRvcl9jb25maWd1cmF0aW9uIG9yIHt9CgogICAgICAgIHBvc3NpYmxlX2RyaWZ0ID0gbW9uaXRvcl9jb25maWd1cmF0aW9uLmdldCgKICAgICAgICAgICAgInBvc3NpYmxlX2RyaWZ0Iiwgc2VsZi5kZWZhdWx0X3Bvc3NpYmxlX2RyaWZ0X3RocmVzaG9sZAogICAgICAgICkKICAgICAgICBkcmlmdF9kZXRlY3RlZCA9IG1vbml0b3JfY29uZmlndXJhdGlvbi5nZXQoCiAgICAgICAgICAgICJwb3NzaWJsZV9kcmlmdCIsIHNlbGYuZGVmYXVsdF9kcmlmdF9kZXRlY3RlZF90aHJlc2hvbGQKICAgICAgICApCgogICAgICAgIGRyaWZ0X3N0YXR1cyA9ICJOT19EUklGVCIKICAgICAgICBpZiBkcmlmdF9tZWFuID49IGRyaWZ0X2RldGVjdGVkOgogICAgICAgICAgICBkcmlmdF9zdGF0dXMgPSAiRFJJRlRfREVURUNURUQiCiAgICAgICAgZWxpZiBkcmlmdF9tZWFuID49IHBvc3NpYmxlX2RyaWZ0OgogICAgICAgICAgICBkcmlmdF9zdGF0dXMgPSAiUE9TU0lCTEVfRFJJRlQiCgogICAgICAgIHJldHVybiBkcmlmdF9zdGF0dXMsIGRyaWZ0X21lYW4KCiAgICBAc3RhdGljbWV0aG9kCiAgICBkZWYgZ2V0X2xhc3RfY3JlYXRlZF9kaXIoZnMsIGVuZHBvaW50X2Rpcik6CiAgICAgICAgZGlycyA9IGZzLmxzKGVuZHBvaW50X2RpclsibmFtZSJdKQogICAgICAgIGxhc3RfZGlyID0gc29ydGVkKGRpcnMsIGtleT1sYW1iZGEgazoga1sibmFtZSJdLnNwbGl0KCI9IilbLTFdKVstMV0KICAgICAgICByZXR1cm4gbGFzdF9kaXIKCgpkZWYgaGFuZGxlcihjb250ZXh0OiBNTENsaWVudEN0eCk6CiAgICBiYXRjaF9wcm9jZXNzb3IgPSBCYXRjaFByb2Nlc3NvcigKICAgICAgICBjb250ZXh0PWNvbnRleHQsCiAgICAgICAgcHJvamVjdD1jb250ZXh0LnByb2plY3QsCiAgICAgICAgbW9kZWxfbW9uaXRvcmluZ19hY2Nlc3Nfa2V5PW9zLmVudmlyb24uZ2V0KCJNT0RFTF9NT05JVE9SSU5HX0FDQ0VTU19LRVkiKSwKICAgICAgICB2M2lvX2FjY2Vzc19rZXk9b3MuZW52aXJvbi5nZXQoIlYzSU9fQUNDRVNTX0tFWSIpLAogICAgKQogICAgYmF0Y2hfcHJvY2Vzc29yLnBvc3RfaW5pdCgpCiAgICBiYXRjaF9wcm9jZXNzb3IucnVuKCkKICAgIGlmIGJhdGNoX3Byb2Nlc3Nvci5leGNlcHRpb246CiAgICAgICAgcmFpc2UgYmF0Y2hfcHJvY2Vzc29yLmV4Y2VwdGlvbgo= - commands: [] - code_origin: https://github.com/katyakats/functions.git#c5315901af56bce3f113041ad540591ee86ac00a:/Users/katyak/work/functions/model_monitoring_batch/model_monitoring_batch.py - origin_filename: /Users/katyak/work/functions/model_monitoring_batch/model_monitoring_batch.py - disable_auto_mount: false - priority_class_name: '' - affinity: null -verbose: false diff --git a/model_monitoring_batch/item.yaml b/model_monitoring_batch/item.yaml deleted file mode 100644 index a417ef149..000000000 --- a/model_monitoring_batch/item.yaml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: v1 -categories: -- monitoring -description: '' -doc: '' -example: model_monitoring_batch.ipynb -generationDate: 2022-08-28:17-25 -hidden: false -icon: '' -labels: {} -maintainers: [] -marketplaceType: '' -mlrunVersion: 1.1.0 -name: model-monitoring-batch -platformVersion: 3.5.0 -spec: - filename: model_monitoring_batch.py - handler: handler - image: mlrun/mlrun - kind: job - requirements: [] -url: '' -version: 1.1.0 diff --git a/model_monitoring_batch/model_monitoring_batch.ipynb b/model_monitoring_batch/model_monitoring_batch.ipynb deleted file mode 100644 index f6e470072..000000000 --- a/model_monitoring_batch/model_monitoring_batch.ipynb +++ /dev/null @@ -1,91 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Model Monitoring\n", - "## Export function yaml" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "from mlrun import code_to_function\n", - "from mlrun.runtimes import RemoteRuntime\n", - "\n", - "fn: RemoteRuntime = code_to_function(\n", - " name=\"model-monitoring-batch\",\n", - " kind=\"job\",\n", - " image=\"mlrun/mlrun\",\n", - " filename=\"model_monitoring_batch.py\",\n", - " handler=\"handler\",\n", - ")\n", - "\n", - "fn.export(\"model_monitoring_batch.yaml\")" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Deploy Batch Processing" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "from mlrun import import_function\n", - "from mlrun.platforms import mount_v3io\n", - "from mlrun.runtimes import KubejobRuntime\n", - "\n", - "\n", - "# Set project name\n", - "project = \"\"\n", - "\n", - "fn: KubejobRuntime = import_function(\"hub://model_monitoring_batch\")\n", - "fn.metadata.project = project\n", - "fn.apply(mount_v3io())\n", - "fn.run(name='model-monitoring-batch', schedule=\"0 */1 * * *\", params={\"project\": project})" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/model_monitoring_batch/model_monitoring_batch.py b/model_monitoring_batch/model_monitoring_batch.py deleted file mode 100644 index 66c19de35..000000000 --- a/model_monitoring_batch/model_monitoring_batch.py +++ /dev/null @@ -1,477 +0,0 @@ -# Copyright 2019 Iguazio -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import json -import os -from collections import defaultdict -from dataclasses import dataclass -from typing import Optional, List, Dict - -import numpy as np -import pandas as pd -import v3io -from mlrun import get_run_db -from mlrun import store_manager -from mlrun.data_types.infer import DFDataInfer, InferOptions -from mlrun.run import MLClientCtx -from mlrun.utils import logger, config -from mlrun.utils.model_monitoring import EndpointType, parse_model_endpoint_store_prefix -from mlrun.utils.v3io_clients import get_v3io_client, get_frames_client -from sklearn.preprocessing import KBinsDiscretizer - -TIME_FORMAT = "%Y-%m-%d %H:%M:%S.%f%z" - - -@dataclass -class TotalVarianceDistance: - """ - Provides a symmetric drift distance between two periods t and u - Z - vector of random variables - Pt - Probability distribution over time span t - """ - - distrib_t: np.ndarray - distrib_u: np.ndarray - - def compute(self) -> float: - return np.sum(np.abs(self.distrib_t - self.distrib_u)) / 2 - - -@dataclass -class HellingerDistance: - """ - Hellinger distance is an f divergence measure, similar to the Kullback-Leibler (KL) divergence. - However, unlike KL Divergence the Hellinger divergence is symmetric and bounded over a probability space. - """ - - distrib_t: np.ndarray - distrib_u: np.ndarray - - def compute(self) -> float: - return np.sqrt( - 0.5 * ((np.sqrt(self.distrib_u) - np.sqrt(self.distrib_t)) ** 2).sum() - ) - - -@dataclass -class KullbackLeiblerDivergence: - """ - KL Divergence (or relative entropy) is a measure of how one probability distribution differs from another. - It is an asymmetric measure (thus it's not a metric) and it doesn't satisfy the triangle inequality. - KL Divergence of 0, indicates two identical distributions. - """ - - distrib_t: np.ndarray - distrib_u: np.ndarray - - def compute(self, capping=None, kld_scaling=0.0001) -> float: - t_u = np.sum( - np.where( - self.distrib_t != 0, - (self.distrib_t) - * np.log( - self.distrib_t - / np.where(self.distrib_u != 0, self.distrib_u, kld_scaling) - ), - 0, - ) - ) - u_t = np.sum( - np.where( - self.distrib_u != 0, - (self.distrib_u) - * np.log( - self.distrib_u - / np.where(self.distrib_t != 0, self.distrib_t, kld_scaling) - ), - 0, - ) - ) - result = t_u + u_t - if capping: - return capping if result == float("inf") else result - return result - - -class VirtualDrift: - def __init__( - self, - prediction_col: Optional[str] = None, - label_col: Optional[str] = None, - feature_weights: Optional[List[float]] = None, - inf_capping: Optional[float] = 10, - ): - self.prediction_col = prediction_col - self.label_col = label_col - self.feature_weights = feature_weights - self.capping = inf_capping - self.discretizers: Dict[str, KBinsDiscretizer] = {} - self.metrics = { - "tvd": TotalVarianceDistance, - "hellinger": HellingerDistance, - "kld": KullbackLeiblerDivergence, - } - - def dict_to_histogram(self, histogram_dict): - histograms = {} - for feature, stats in histogram_dict.items(): - histograms[feature] = stats["hist"][0] - - # Get features value counts - histograms = pd.concat( - [ - pd.DataFrame(data=hist, columns=[feature]) - for feature, hist in histograms.items() - ], - axis=1, - ) - # To Distribution - histograms = histograms / histograms.sum() - return histograms - - def compute_metrics_over_df(self, base_histogram, latest_histogram): - drift_measures = {} - for metric_name, metric in self.metrics.items(): - drift_measures[metric_name] = { - feature: metric( - base_histogram.loc[:, feature], latest_histogram.loc[:, feature] - ).compute() - for feature in base_histogram - } - return drift_measures - - def compute_drift_from_histograms(self, feature_stats, current_stats): - # Process histogram dictionaries to Dataframe of the histograms - # with Feature histogram as cols - base_histogram = self.dict_to_histogram(feature_stats) - latest_histogram = self.dict_to_histogram(current_stats) - - # Verify all the features exist between datasets - base_features = set(base_histogram.columns) - latest_features = set(latest_histogram.columns) - - features_common = list(base_features.intersection(latest_features)) - feature_difference = list(base_features ^ latest_features) - - if not features_common: - raise ValueError( - f"No common features found: {base_features} <> {latest_features}" - ) - - base_histogram = base_histogram.drop( - feature_difference, axis=1, errors="ignore" - ) - latest_histogram = latest_histogram.drop( - feature_difference, axis=1, errors="ignore" - ) - - # Compute the drift per feature - features_drift_measures = self.compute_metrics_over_df( - base_histogram.loc[:, features_common], - latest_histogram.loc[:, features_common], - ) - - # Compute total drift measures for features - for metric_name in self.metrics.keys(): - feature_values = list(features_drift_measures[metric_name].values()) - features_drift_measures[metric_name]["total_sum"] = np.sum(feature_values) - features_drift_measures[metric_name]["total_mean"] = np.mean(feature_values) - - # Add weighted mean by given feature weights if provided - if self.feature_weights: - features_drift_measures[metric_name]["total_weighted_mean"] = np.dot( - feature_values, self.feature_weights - ) - - drift_result = defaultdict(dict) - - for feature in features_common: - for metric, values in features_drift_measures.items(): - drift_result[feature][metric] = values[feature] - sum = features_drift_measures[metric]["total_sum"] - mean = features_drift_measures[metric]["total_mean"] - drift_result[f"{metric}_sum"] = sum - drift_result[f"{metric}_mean"] = mean - if self.feature_weights: - metric_measure = features_drift_measures[metric] - weighted_mean = metric_measure["total_weighted_mean"] - drift_result[f"{metric}_weighted_mean"] = weighted_mean - - if self.label_col: - label_drift_measures = self.compute_metrics_over_df( - base_histogram.loc[:, self.label_col], - latest_histogram.loc[:, self.label_col], - ) - for metric, values in label_drift_measures.items(): - drift_result[self.label_col][metric] = values[metric] - - if self.prediction_col: - prediction_drift_measures = self.compute_metrics_over_df( - base_histogram.loc[:, self.prediction_col], - latest_histogram.loc[:, self.prediction_col], - ) - for metric, values in prediction_drift_measures.items(): - drift_result[self.prediction_col][metric] = values[metric] - - return drift_result - - -class BatchProcessor: - def __init__( - self, - context: MLClientCtx, - project: str, - model_monitoring_access_key: str, - v3io_access_key: str, - ): - self.context = context - self.project = project - - self.v3io_access_key = v3io_access_key - self.model_monitoring_access_key = ( - model_monitoring_access_key or v3io_access_key - ) - - self.virtual_drift = VirtualDrift(inf_capping=10) - - template = config.model_endpoint_monitoring.store_prefixes.default - - kv_path = template.format(project=self.project, kind="endpoints") - _, self.kv_container, self.kv_path = parse_model_endpoint_store_prefix(kv_path) - - tsdb_path = template.format(project=project, kind="events") - _, self.tsdb_container, self.tsdb_path = parse_model_endpoint_store_prefix( - tsdb_path - ) - - stream_path = template.format(project=self.project, kind="log_stream") - _, self.stream_container, self.stream_path = parse_model_endpoint_store_prefix( - stream_path - ) - - self.parquet_path = config.model_endpoint_monitoring.store_prefixes.user_space.format( - project=project, kind="parquet" - ) - - logger.info( - "Initializing BatchProcessor", - project=project, - model_monitoring_access_key_initalized=bool(model_monitoring_access_key), - v3io_access_key_initialized=bool(v3io_access_key), - parquet_path=self.parquet_path, - kv_container=self.kv_container, - kv_path=self.kv_path, - tsdb_container=self.tsdb_container, - tsdb_path=self.tsdb_path, - stream_container=self.stream_container, - stream_path=self.stream_path, - ) - - self.default_possible_drift_threshold = ( - config.model_endpoint_monitoring.drift_thresholds.default.possible_drift - ) - self.default_drift_detected_threshold = ( - config.model_endpoint_monitoring.drift_thresholds.default.drift_detected - ) - - self.db = get_run_db() - self.v3io = get_v3io_client(access_key=self.v3io_access_key) - self.frames = get_frames_client( - address=config.v3io_framesd, - container=self.tsdb_container, - token=self.v3io_access_key, - ) - self.exception = None - - def post_init(self): - response = self.v3io.create_stream( - container=self.stream_container, - path=self.stream_path, - shard_count=1, - raise_for_status=v3io.dataplane.RaiseForStatus.never, - access_key=self.v3io_access_key, - ) - - if not (response.status_code == 400 and "ResourceInUse" in str(response.body)): - response.raise_for_status([409, 204, 403]) - - def run(self): - - try: - endpoints = self.db.list_model_endpoints(self.project) - except Exception as e: - logger.error("Failed to list endpoints", exc=e) - return - - active_endpoints = set() - for endpoint in endpoints.endpoints: - if endpoint.spec.active: - active_endpoints.add(endpoint.metadata.uid) - - store, sub = store_manager.get_or_create_store(self.parquet_path) - prefix = self.parquet_path.replace(sub, "") - fs = store.get_filesystem(silent=False) - - if not fs.exists(sub): - logger.warn( - f"{sub} does not exist" - ) - return - - for endpoint_dir in fs.ls(sub): - endpoint_id = endpoint_dir["name"].split("=")[-1] - if endpoint_id not in active_endpoints: - continue - - try: - last_year = self.get_last_created_dir(fs, endpoint_dir) - last_month = self.get_last_created_dir(fs, last_year) - last_day = self.get_last_created_dir(fs, last_month) - last_hour = self.get_last_created_dir(fs, last_day) - - full_path = f"{prefix}{last_hour['name']}" - - logger.info(f"Now processing {full_path}") - - endpoint = self.db.get_model_endpoint( - project=self.project, endpoint_id=endpoint_id - ) - - if endpoint.status.endpoint_type == EndpointType.ROUTER: - # endpoint.status.feature_stats is None - logger.info(f"{endpoint_id} is router skipping") - continue - - df = pd.read_parquet(full_path) - timestamp = df["timestamp"].iloc[-1] - - named_features_df = list(df["named_features"]) - named_features_df = pd.DataFrame(named_features_df) - - current_stats = DFDataInfer.get_stats( - df=named_features_df, options=InferOptions.Histogram - ) - - drift_result = self.virtual_drift.compute_drift_from_histograms( - feature_stats=endpoint.status.feature_stats, - current_stats=current_stats, - ) - - logger.info("Drift result", drift_result=drift_result) - - drift_status, drift_measure = self.check_for_drift( - drift_result=drift_result, endpoint=endpoint - ) - - logger.info( - "Drift status", - endpoint_id=endpoint_id, - drift_status=drift_status, - drift_measure=drift_measure, - ) - - if drift_status == "POSSIBLE_DRIFT" or drift_status == "DRIFT_DETECTED": - self.v3io.stream.put_records( - container=self.stream_container, - stream_path=self.stream_path, - records=[ - { - "data": json.dumps( - { - "endpoint_id": endpoint_id, - "drift_status": drift_status, - "drift_measure": drift_measure, - "drift_per_feature": {**drift_result}, - } - ) - } - ], - ) - - self.v3io.kv.update( - container=self.kv_container, - table_path=self.kv_path, - key=endpoint_id, - attributes={ - "current_stats": json.dumps(current_stats), - "drift_measures": json.dumps(drift_result), - "drift_status": drift_status, - }, - ) - - tsdb_drift_measures = { - "endpoint_id": endpoint_id, - "timestamp": pd.to_datetime(timestamp, format=TIME_FORMAT), - "record_type": "drift_measures", - "tvd_mean": drift_result["tvd_mean"], - "kld_mean": drift_result["kld_mean"], - "hellinger_mean": drift_result["hellinger_mean"], - } - - self.frames.write( - backend="tsdb", - table=self.tsdb_path, - dfs=pd.DataFrame.from_dict([tsdb_drift_measures]), - index_cols=["timestamp", "endpoint_id", "record_type"], - ) - - logger.info(f"Done updating drift measures {full_path}") - - except Exception as e: - logger.error(f"Exception for endpoint {endpoint_id}") - self.exception = e - - def check_for_drift(self, drift_result, endpoint): - tvd_mean = drift_result.get("tvd_mean") - hellinger_mean = drift_result.get("hellinger_mean") - - drift_mean = 0.0 - if tvd_mean and hellinger_mean: - drift_mean = (tvd_mean + hellinger_mean) / 2 - - monitor_configuration = endpoint.spec.monitor_configuration or {} - - possible_drift = monitor_configuration.get( - "possible_drift", self.default_possible_drift_threshold - ) - drift_detected = monitor_configuration.get( - "possible_drift", self.default_drift_detected_threshold - ) - - drift_status = "NO_DRIFT" - if drift_mean >= drift_detected: - drift_status = "DRIFT_DETECTED" - elif drift_mean >= possible_drift: - drift_status = "POSSIBLE_DRIFT" - - return drift_status, drift_mean - - @staticmethod - def get_last_created_dir(fs, endpoint_dir): - dirs = fs.ls(endpoint_dir["name"]) - last_dir = sorted(dirs, key=lambda k: k["name"].split("=")[-1])[-1] - return last_dir - - -def handler(context: MLClientCtx): - batch_processor = BatchProcessor( - context=context, - project=context.project, - model_monitoring_access_key=os.environ.get("MODEL_MONITORING_ACCESS_KEY"), - v3io_access_key=os.environ.get("V3IO_ACCESS_KEY"), - ) - batch_processor.post_init() - batch_processor.run() - if batch_processor.exception: - raise batch_processor.exception diff --git a/model_monitoring_batch/requirements.txt b/model_monitoring_batch/requirements.txt deleted file mode 100644 index 06d315b02..000000000 --- a/model_monitoring_batch/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -v3io -scikit-learn \ No newline at end of file diff --git a/validate_great_expectations/README.md b/validate_great_expectations/README.md deleted file mode 100644 index 5f113be58..000000000 --- a/validate_great_expectations/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# Great Expectations Validation -![Great Expectations Logo](doc/great-expectations-logo-full-size.png) - -Run data validation via Great Expectations. Will validate a given dataset with a given set of expectations, run the validation, and log the output HTML data doc in MLRun. - -## Prerequisites - -See [1_set_expectations.ipynb](1_set_expectations.ipynb) for a full example. - -- Initialized a Great Expectations project -- Configured at least one Datasource i.e. `my_datasource` -- Created at least one Expectation Suite i.e. `my_suite` -- Created a Checkpoint i.e. `my_checkpoint` - -## Usage - -See [2_validate_expectations.ipynb](2_validate_expectations.ipynb) for a full example. - -```python -import mlrun - -fn = mlrun.import_function("hub://great_expectations") -run = fn.run( - inputs={"data": "https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv"}, - params={ - "expectation_suite_name": "test_suite", - "data_asset_name": "iris_dataset", - }, -) -``` - -## All Configuration -Inputs -```rst -:param data: Data to validate. Can be local or remote link. -``` - -Parameters -```rst -:param expectation_suite_name: Name of expectation suite to validate against. -:param data_asset_name: Name of dataset in Great Expectations. -:param datasource_name: Name of datasource to use for validation. -:param data_connector_name: Name of data connector to use for validation. -:param datasource_config: Full configuration for datasource. For use with custom - data sources other than the default pandas datasource. -:param batch_identifiers: Custom metadata for identifying particular batches of - data. For use when not using the default batch identifiers. -:param root_directory: Path to underlying Great Expectations project. Defaults to - MLRun project artifact path if not specified. -:param checkpoint_name: Name of checkpoint to use for validation. -:param checkpoint_config: Full configuration for checkpoint. For use with custome - checkpoint config other than the default. -``` \ No newline at end of file diff --git a/validate_great_expectations/doc/great-expectations-logo-full-size.png b/validate_great_expectations/doc/great-expectations-logo-full-size.png deleted file mode 100644 index 625fc92bc..000000000 Binary files a/validate_great_expectations/doc/great-expectations-logo-full-size.png and /dev/null differ diff --git a/validate_great_expectations/function.yaml b/validate_great_expectations/function.yaml deleted file mode 100644 index f3f1d3fce..000000000 --- a/validate_great_expectations/function.yaml +++ /dev/null @@ -1,170 +0,0 @@ -kind: job -metadata: - name: validate-great-expectations - tag: '' - hash: 82d0b647d443eb6e643d9dbfc8c0a650d74da018 - project: '' - labels: - author: nicks - framework: great-expectations - categories: - - data-validation - - data-analysis -spec: - command: '' - args: [] - image: '' - build: - functionSourceCode: aW1wb3J0IG9zCmltcG9ydCBzaHV0aWwKCmltcG9ydCBtbHJ1bgoKZnJvbSBncmVhdF9leHBlY3RhdGlvbnMuY29yZS5iYXRjaCBpbXBvcnQgUnVudGltZUJhdGNoUmVxdWVzdApmcm9tIGdyZWF0X2V4cGVjdGF0aW9ucy5kYXRhX2NvbnRleHQgaW1wb3J0IEJhc2VEYXRhQ29udGV4dApmcm9tIGdyZWF0X2V4cGVjdGF0aW9ucy5kYXRhX2NvbnRleHQudHlwZXMuYmFzZSBpbXBvcnQgKAogICAgRGF0YUNvbnRleHRDb25maWcsCiAgICBGaWxlc3lzdGVtU3RvcmVCYWNrZW5kRGVmYXVsdHMsCikKZnJvbSBncmVhdF9leHBlY3RhdGlvbnMuY2hlY2twb2ludC50eXBlcy5jaGVja3BvaW50X3Jlc3VsdCBpbXBvcnQgQ2hlY2twb2ludFJlc3VsdAoKCmRlZiBnZXRfZGVmYXVsdF9kYXRhc291cmNlX2NvbmZpZygKICAgIGRhdGFzb3VyY2VfbmFtZTogc3RyLCBkYXRhX2Nvbm5lY3Rvcl9uYW1lOiBzdHIKKSAtPiBkaWN0OgogICAgIiIiCiAgICBDb252ZW5pZW5jZSBmdW5jdGlvbiB0byBnZXQgdGhlIGRlZmF1bHQgcGFuZGFzIGRhdGFzb3VyY2UgY29uZmlnCiAgICBmb3IgdXNlIGluIHZhbGlkYXRpbmcgZXhwZWN0YXRpb25zLgoKICAgIDpwYXJhbSBkYXRhc291cmNlX25hbWU6ICAgICBOYW1lIG9mIGRhdGFzb3VyY2UuCiAgICA6cGFyYW0gZGF0YV9jb25uZWN0b3JfbmFtZTogTmFtZSBvZiBkYXRhIGNvbm5lY3Rvci4KCiAgICA6cmV0dXJuczogQ29uZmlndXJhdGlvbiBmb3IgZGVmYXVsdCBkYXRhc291cmNlLgogICAgIiIiCiAgICBkZWZhdWx0X2RhdGFzb3VyY2VfY29uZmlnID0gewogICAgICAgICJuYW1lIjogZiJ7ZGF0YXNvdXJjZV9uYW1lfSIsCiAgICAgICAgImNsYXNzX25hbWUiOiAiRGF0YXNvdXJjZSIsCiAgICAgICAgIm1vZHVsZV9uYW1lIjogImdyZWF0X2V4cGVjdGF0aW9ucy5kYXRhc291cmNlIiwKICAgICAgICAiZXhlY3V0aW9uX2VuZ2luZSI6IHsKICAgICAgICAgICAgIm1vZHVsZV9uYW1lIjogImdyZWF0X2V4cGVjdGF0aW9ucy5leGVjdXRpb25fZW5naW5lIiwKICAgICAgICAgICAgImNsYXNzX25hbWUiOiAiUGFuZGFzRXhlY3V0aW9uRW5naW5lIiwKICAgICAgICB9LAogICAgICAgICJkYXRhX2Nvbm5lY3RvcnMiOiB7CiAgICAgICAgICAgIGYie2RhdGFfY29ubmVjdG9yX25hbWV9IjogewogICAgICAgICAgICAgICAgImNsYXNzX25hbWUiOiAiUnVudGltZURhdGFDb25uZWN0b3IiLAogICAgICAgICAgICAgICAgIm1vZHVsZV9uYW1lIjogImdyZWF0X2V4cGVjdGF0aW9ucy5kYXRhc291cmNlLmRhdGFfY29ubmVjdG9yIiwKICAgICAgICAgICAgICAgICJiYXRjaF9pZGVudGlmaWVycyI6IFsiZGVmYXVsdF9pZGVudGlmaWVyX25hbWUiXSwKICAgICAgICAgICAgfSwKICAgICAgICB9LAogICAgfQogICAgcmV0dXJuIGRlZmF1bHRfZGF0YXNvdXJjZV9jb25maWcKCgpkZWYgZ2V0X2RlZmF1bHRfY2hlY2twb2ludF9jb25maWcoY2hlY2twb2ludF9uYW1lOiBzdHIpIC0+IGRpY3Q6CiAgICAiIiIKICAgIENvbnZlbmllbmNlIGZ1bmN0aW9uIHRvIGdldCB0aGUgZGVmYXVsdCBjaGVja3BvaW50IGNvbmZpZyBmb3IKICAgIHVzZSBpbiB2YWxpZGF0aW5nIGV4cGVjdGF0aW9ucy4KCiAgICA6cGFyYW0gY2hlY2twb2ludF9uYW1lOiBOYW1lIG9mIGNoZWNrcG9pbnQuCgogICAgOnJldHVybnM6IENvbmZpZ3VyYXRpb24gZm9yIGRlZmF1bHQgY2hlY2twb2ludC4KICAgICIiIgogICAgcmV0dXJuIHsKICAgICAgICAibmFtZSI6IGNoZWNrcG9pbnRfbmFtZSwKICAgICAgICAiY29uZmlnX3ZlcnNpb24iOiAxLjAsCiAgICAgICAgImNsYXNzX25hbWUiOiAiU2ltcGxlQ2hlY2twb2ludCIsCiAgICAgICAgInJ1bl9uYW1lX3RlbXBsYXRlIjogIiVZJW0lZC0lSCVNJVMtbXktcnVuLW5hbWUtdGVtcGxhdGUiLAogICAgfQoKCmRlZiBnZXRfZGF0YV9kb2NfcGF0aChjaGVja3BvaW50X3Jlc3VsdDogQ2hlY2twb2ludFJlc3VsdCkgLT4gc3RyOgogICAgIiIiCiAgICBDb252ZW5pZW5jZSBmdW5jdGlvbiB0byBnZXQgdGhlIHBhdGggb2YgdGhlIG91dHB1dAogICAgZGF0YSBkb2MgZnJvbSBhIGNoZWNrcG9pbnQgcmVzdWx0LgoKICAgIDpwYXJhbSBjaGVja3BvaW50X3Jlc3VsdDogR3JlYXQgRXhwZWN0YXRpb25zIGNoZWNrcG9pbnQgcmVzdWx0LgoKICAgIDpyZXR1cm5zOiBBYnNvbHV0ZSBwYXRoIHRvIG5ldyBkYXRhIGRvYy4KICAgICIiIgogICAgcmVzdWx0X2lkID0gY2hlY2twb2ludF9yZXN1bHQubGlzdF92YWxpZGF0aW9uX3Jlc3VsdF9pZGVudGlmaWVycygpWzBdCiAgICBkYXRhX2RvY19wYXRoID0gY2hlY2twb2ludF9yZXN1bHRbInJ1bl9yZXN1bHRzIl1bcmVzdWx0X2lkXVsiYWN0aW9uc19yZXN1bHRzIl1bCiAgICAgICAgInVwZGF0ZV9kYXRhX2RvY3MiCiAgICBdWyJsb2NhbF9zaXRlIl0KICAgIGRhdGFfZG9jX3BhdGggPSBkYXRhX2RvY19wYXRoLnJlcGxhY2UoImZpbGU6Ly8iLCAiIikKICAgIHJldHVybiBkYXRhX2RvY19wYXRoCgoKZGVmIHZhbGlkYXRlX2V4cGVjdGF0aW9ucygKICAgIGNvbnRleHQ6IG1scnVuLk1MQ2xpZW50Q3R4LAogICAgZGF0YTogbWxydW4uRGF0YUl0ZW0sCiAgICBleHBlY3RhdGlvbl9zdWl0ZV9uYW1lOiBzdHIsCiAgICBkYXRhX2Fzc2V0X25hbWU6IHN0ciwKICAgIGRhdGFzb3VyY2VfbmFtZTogc3RyID0gInBhbmRhc19kYXRhc291cmNlIiwKICAgIGRhdGFfY29ubmVjdG9yX25hbWU6IHN0ciA9ICJkZWZhdWx0X3J1bnRpbWVfZGF0YV9jb25uZWN0b3JfbmFtZSIsCiAgICBkYXRhc291cmNlX2NvbmZpZzogZGljdCA9IE5vbmUsCiAgICBiYXRjaF9pZGVudGlmaWVyczogZGljdCA9IE5vbmUsCiAgICByb290X2RpcmVjdG9yeTogc3RyID0gTm9uZSwKICAgIGNoZWNrcG9pbnRfbmFtZTogc3RyID0gTm9uZSwKICAgIGNoZWNrcG9pbnRfY29uZmlnOiBkaWN0ID0gTm9uZSwKKSAtPiBOb25lOgogICAgIiIiCiAgICBNYWluIGZ1bmN0aW9uIHRvIHZhbGlkYXRlIGFuIGlucHV0IGRhdGFzZXQsIGRhdGFzb3VyY2UsIGRhdGEgY29ubmVjdG9yLAogICAgYW5kIGV4cGVjdGF0aW9uIHN1aXRlLgoKICAgIFJ1bnMgdGhlIEdyZWF0IEV4cGVjdGF0aW9uIHZhbGlkYXRpb24gYW5kIGxvZ3MKICAgIHdoZXRoZXIgdGhlIHZhbGlkYXRpb24gd2FzIGEgc3VjY2VzcyBhcyB3ZWxsIGFzIHRoZSBvdXRwdXQgcGFnZQogICAgb2YgdGhlIGRhdGEgZG9jcy4KCiAgICA6cGFyYW0gY29udGV4dDogICAgICAgICAgICAgICAgTUxSdW4gY29udGV4dC4KICAgIDpwYXJhbSBkYXRhOiAgICAgICAgICAgICAgICAgICBEYXRhIHRvIHZhbGlkYXRlLiBDYW4gYmUgbG9jYWwgb3IgcmVtb3RlIGxpbmsuCiAgICA6cGFyYW0gZXhwZWN0YXRpb25fc3VpdGVfbmFtZTogTmFtZSBvZiBleHBlY3RhdGlvbiBzdWl0ZSB0byB2YWxpZGF0ZSBhZ2FpbnN0LgogICAgOnBhcmFtIGRhdGFfYXNzZXRfbmFtZTogICAgICAgIE5hbWUgb2YgZGF0YXNldCBpbiBHcmVhdCBFeHBlY3RhdGlvbnMuCiAgICA6cGFyYW0gZGF0YXNvdXJjZV9uYW1lOiAgICAgICAgTmFtZSBvZiBkYXRhc291cmNlIHRvIHVzZSBmb3IgdmFsaWRhdGlvbi4KICAgIDpwYXJhbSBkYXRhX2Nvbm5lY3Rvcl9uYW1lOiAgICBOYW1lIG9mIGRhdGEgY29ubmVjdG9yIHRvIHVzZSBmb3IgdmFsaWRhdGlvbi4KICAgIDpwYXJhbSBkYXRhc291cmNlX2NvbmZpZzogICAgICBGdWxsIGNvbmZpZ3VyYXRpb24gZm9yIGRhdGFzb3VyY2UuIEZvciB1c2Ugd2l0aCBjdXN0b20KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhIHNvdXJjZXMgb3RoZXIgdGhhbiB0aGUgZGVmYXVsdCBwYW5kYXMgZGF0YXNvdXJjZS4KICAgIDpwYXJhbSBiYXRjaF9pZGVudGlmaWVyczogICAgICBDdXN0b20gbWV0YWRhdGEgZm9yIGlkZW50aWZ5aW5nIHBhcnRpY3VsYXIgYmF0Y2hlcyBvZgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRhdGEuIEZvciB1c2Ugd2hlbiBub3QgdXNpbmcgdGhlIGRlZmF1bHQgYmF0Y2ggaWRlbnRpZmllcnMuCiAgICA6cGFyYW0gcm9vdF9kaXJlY3Rvcnk6ICAgICAgICAgUGF0aCB0byB1bmRlcmx5aW5nIEdyZWF0IEV4cGVjdGF0aW9ucyBwcm9qZWN0LiBEZWZhdWx0cyB0bwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIE1MUnVuIHByb2plY3QgYXJ0aWZhY3QgcGF0aCBpZiBub3Qgc3BlY2lmaWVkLgogICAgOnBhcmFtIGNoZWNrcG9pbnRfbmFtZTogICAgICAgIE5hbWUgb2YgY2hlY2twb2ludCB0byB1c2UgZm9yIHZhbGlkYXRpb24uCiAgICA6cGFyYW0gY2hlY2twb2ludF9jb25maWc6ICAgICAgRnVsbCBjb25maWd1cmF0aW9uIGZvciBjaGVja3BvaW50LiBGb3IgdXNlIHdpdGggY3VzdG9tZQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNoZWNrcG9pbnQgY29uZmlnIG90aGVyIHRoYW4gdGhlIGRlZmF1bHQuCiAgICAiIiIKCiAgICAjIEdldCBkYXRhCiAgICBkZiA9IGRhdGEuYXNfZGYoKQoKICAgICMgVXNlIGRlZmF1bHQgcm9vdCBkaXJlY3RvcnkgZm9yIHByb2plY3QgaWYgbm90IHNwZWNpZmllZAogICAgcm9vdF9kaXJlY3RvcnkgPSAoCiAgICAgICAgcm9vdF9kaXJlY3RvcnkKICAgICAgICBpZiByb290X2RpcmVjdG9yeQogICAgICAgIGVsc2UgZiIvdjNpby9wcm9qZWN0cy97Y29udGV4dC5wcm9qZWN0fS9ncmVhdF9leHBlY3RhdGlvbnMiCiAgICApCgogICAgIyBMb2FkIGdyZWF0IGV4cGVjdGF0aW9ucyBjb250ZXh0CiAgICBnZV9jb250ZXh0ID0gQmFzZURhdGFDb250ZXh0KAogICAgICAgIHByb2plY3RfY29uZmlnPURhdGFDb250ZXh0Q29uZmlnKAogICAgICAgICAgICBzdG9yZV9iYWNrZW5kX2RlZmF1bHRzPUZpbGVzeXN0ZW1TdG9yZUJhY2tlbmREZWZhdWx0cygKICAgICAgICAgICAgICAgIHJvb3RfZGlyZWN0b3J5PXJvb3RfZGlyZWN0b3J5CiAgICAgICAgICAgICkKICAgICAgICApCiAgICApCgogICAgIyBHZXQgZXhwZWN0YXRpb24gc3VpdGUKICAgIGdlX2NvbnRleHQuZ2V0X2V4cGVjdGF0aW9uX3N1aXRlKGV4cGVjdGF0aW9uX3N1aXRlX25hbWU9ZXhwZWN0YXRpb25fc3VpdGVfbmFtZSkKCiAgICAjIEFkZCBkZWZhdWx0IGRhdGEgc291cmNlIGlmIG5vdCBzcGVjaWZpZWQKICAgIGRhdGFzb3VyY2VfY29uZmlnID0gKAogICAgICAgIGRhdGFzb3VyY2VfY29uZmlnCiAgICAgICAgaWYgZGF0YXNvdXJjZV9jb25maWcKICAgICAgICBlbHNlIGdldF9kZWZhdWx0X2RhdGFzb3VyY2VfY29uZmlnKGRhdGFzb3VyY2VfbmFtZSwgZGF0YV9jb25uZWN0b3JfbmFtZSkKICAgICkKICAgIGdlX2NvbnRleHQuYWRkX2RhdGFzb3VyY2UoKipkYXRhc291cmNlX2NvbmZpZykKCiAgICAjIEdldCBkYXRhIGJhdGNoCiAgICBiYXRjaF9pZGVudGlmaWVycyA9ICgKICAgICAgICBiYXRjaF9pZGVudGlmaWVycwogICAgICAgIGlmIGJhdGNoX2lkZW50aWZpZXJzCiAgICAgICAgZWxzZSB7ImRlZmF1bHRfaWRlbnRpZmllcl9uYW1lIjogImRlZmF1bHRfaWRlbnRpZmllciJ9CiAgICApCiAgICBiYXRjaF9yZXF1ZXN0ID0gUnVudGltZUJhdGNoUmVxdWVzdCgKICAgICAgICBkYXRhc291cmNlX25hbWU9ZGF0YXNvdXJjZV9uYW1lLAogICAgICAgIGRhdGFfY29ubmVjdG9yX25hbWU9ZGF0YV9jb25uZWN0b3JfbmFtZSwKICAgICAgICBkYXRhX2Fzc2V0X25hbWU9ZGF0YV9hc3NldF9uYW1lLAogICAgICAgIHJ1bnRpbWVfcGFyYW1ldGVycz17ImJhdGNoX2RhdGEiOiBkZn0sCiAgICAgICAgYmF0Y2hfaWRlbnRpZmllcnM9YmF0Y2hfaWRlbnRpZmllcnMsCiAgICApCgogICAgIyBHZXQgdmFsaWRhdG9yCiAgICB2YWxpZGF0b3IgPSBnZV9jb250ZXh0LmdldF92YWxpZGF0b3IoCiAgICAgICAgYmF0Y2hfcmVxdWVzdD1iYXRjaF9yZXF1ZXN0LAogICAgICAgIGV4cGVjdGF0aW9uX3N1aXRlX25hbWU9ZXhwZWN0YXRpb25fc3VpdGVfbmFtZSwKICAgICkKCiAgICAjIFVzZSBkZWZhdWx0IGNoZWNrcG9pbnQgbmFtZSBhbmQgY29uZmlnIGlmIG5vdCBzcGVjaWZpZWQKICAgIGNoZWNrcG9pbnRfbmFtZSA9ICgKICAgICAgICBjaGVja3BvaW50X25hbWUgaWYgY2hlY2twb2ludF9uYW1lIGVsc2UgZiJ7ZGF0YV9hc3NldF9uYW1lfV9jaGVja3BvaW50IgogICAgKQogICAgY2hlY2twb2ludF9jb25maWcgPSAoCiAgICAgICAgY2hlY2twb2ludF9jb25maWcKICAgICAgICBpZiBjaGVja3BvaW50X2NvbmZpZwogICAgICAgIGVsc2UgZ2V0X2RlZmF1bHRfY2hlY2twb2ludF9jb25maWcoY2hlY2twb2ludF9uYW1lKQogICAgKQoKICAgICMgQWRkIGNoZWNrcG9pbnQKICAgIGdlX2NvbnRleHQuYWRkX2NoZWNrcG9pbnQoKipjaGVja3BvaW50X2NvbmZpZykKCiAgICAjIFJ1biBleHBlY3RhdGlvbiBzdWl0ZSBvbiBjaGVja3BvaW50CiAgICBjaGVja3BvaW50X3Jlc3VsdCA9IGdlX2NvbnRleHQucnVuX2NoZWNrcG9pbnQoCiAgICAgICAgY2hlY2twb2ludF9uYW1lPWNoZWNrcG9pbnRfbmFtZSwKICAgICAgICB2YWxpZGF0aW9ucz1bCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAgICJiYXRjaF9yZXF1ZXN0IjogYmF0Y2hfcmVxdWVzdCwKICAgICAgICAgICAgICAgICJleHBlY3RhdGlvbl9zdWl0ZV9uYW1lIjogZXhwZWN0YXRpb25fc3VpdGVfbmFtZSwKICAgICAgICAgICAgfQogICAgICAgIF0sCiAgICApCgogICAgIyBMb2cgc3VjY2VzcwogICAgY29udGV4dC5sb2dfcmVzdWx0KCJ2YWxpZGF0ZWQiLCBjaGVja3BvaW50X3Jlc3VsdFsic3VjY2VzcyJdKQoKICAgICMgTG9nIGRhdGEgZG9jCiAgICBkYXRhX2RvY19wYXRoID0gZ2V0X2RhdGFfZG9jX3BhdGgoY2hlY2twb2ludF9yZXN1bHQpCiAgICBjb250ZXh0LmxvZ19hcnRpZmFjdCgidmFsaWRhdGlvbl9yZXN1bHRzIiwgdGFyZ2V0X3BhdGg9ZGF0YV9kb2NfcGF0aCkK - base_image: mlrun/mlrun - commands: - - python -m pip install great-expectations==0.15.41 - code_origin: https://github.com/igz-us-sales/functions.git#c7b44af35294494a531a014f3d02a28eff3f4105:/User/functions/validate_great_expectations/validate_great_expectations.py - origin_filename: /User/functions/validate_great_expectations/validate_great_expectations.py - entry_points: - get_default_datasource_config: - name: get_default_datasource_config - doc: 'Convenience function to get the default pandas datasource config - - for use in validating expectations.' - parameters: - - name: datasource_name - type: str - doc: Name of datasource. - default: '' - - name: data_connector_name - type: str - doc: Name of data connector. - default: '' - outputs: - - default: '' - doc: Configuration for default datasource. - type: dict - lineno: 15 - get_default_checkpoint_config: - name: get_default_checkpoint_config - doc: 'Convenience function to get the default checkpoint config for - - use in validating expectations.' - parameters: - - name: checkpoint_name - type: str - doc: Name of checkpoint. - default: '' - outputs: - - default: '' - doc: Configuration for default checkpoint. - type: dict - lineno: 46 - get_data_doc_path: - name: get_data_doc_path - doc: 'Convenience function to get the path of the output - - data doc from a checkpoint result.' - parameters: - - name: checkpoint_result - type: CheckpointResult - doc: Great Expectations checkpoint result. - default: '' - outputs: - - default: '' - doc: Absolute path to new data doc. - type: str - lineno: 63 - validate_expectations: - name: validate_expectations - doc: 'Main function to validate an input dataset, datasource, data connector, - - and expectation suite. - - - Runs the Great Expectation validation and logs - - whether the validation was a success as well as the output page - - of the data docs.' - parameters: - - name: context - type: MLClientCtx - doc: MLRun context. - default: '' - - name: data - type: DataItem - doc: Data to validate. Can be local or remote link. - default: '' - - name: expectation_suite_name - type: str - doc: Name of expectation suite to validate against. - default: '' - - name: data_asset_name - type: str - doc: Name of dataset in Great Expectations. - default: '' - - name: datasource_name - type: str - doc: Name of datasource to use for validation. - default: pandas_datasource - - name: data_connector_name - type: str - doc: Name of data connector to use for validation. - default: default_runtime_data_connector_name - - name: datasource_config - type: dict - doc: Full configuration for datasource. For use with custom data sources other - than the default pandas datasource. - default: null - - name: batch_identifiers - type: dict - doc: Custom metadata for identifying particular batches of data. For use when - not using the default batch identifiers. - default: null - - name: root_directory - type: str - doc: Path to underlying Great Expectations project. Defaults to MLRun project - artifact path if not specified. - default: null - - name: checkpoint_name - type: str - doc: Name of checkpoint to use for validation. - default: null - - name: checkpoint_config - type: dict - doc: Full configuration for checkpoint. For use with custome checkpoint config - other than the default. - default: null - outputs: - - default: '' - lineno: 80 - description: Validate a dataset using Great Expectations - default_handler: validate_expectations - disable_auto_mount: false - env: [] - resources: - requests: - memory: 1Mi - cpu: 25m - limits: - memory: 20Gi - cpu: '2' - priority_class_name: igz-workload-medium - preemption_mode: prevent - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: app.iguazio.com/lifecycle - operator: NotIn - values: - - preemptible - - key: eks.amazonaws.com/capacityType - operator: NotIn - values: - - SPOT - - key: node-lifecycle - operator: NotIn - values: - - spot - tolerations: null - security_context: {} -verbose: false diff --git a/validate_great_expectations/item.yaml b/validate_great_expectations/item.yaml deleted file mode 100644 index 2c1a98b51..000000000 --- a/validate_great_expectations/item.yaml +++ /dev/null @@ -1,26 +0,0 @@ -apiVersion: v1 -categories: -- data-validation -- data-analysis -description: Validate a dataset using Great Expectations -doc: '' -example: validate_great_expectations.ipynb -generationDate: 2022-04-26:12-28 -hidden: false -icon: '' -labels: - author: nicks - framework: great-expectations -maintainers: [] -marketplaceType: '' -mlrunVersion: 1.1.0 -name: validate-great-expectations -platformVersion: 3.5.2 -spec: - filename: validate_great_expectations.py - handler: validate_expectations - image: mlrun/mlrun - kind: job - requirements: [great-expectations==0.15.41] -url: '' -version: 1.1.0 \ No newline at end of file diff --git a/validate_great_expectations/requirements.txt b/validate_great_expectations/requirements.txt deleted file mode 100644 index d1b8ef94b..000000000 --- a/validate_great_expectations/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -great-expectations==0.15.41 \ No newline at end of file diff --git a/validate_great_expectations/test_validate_great_expectations.py b/validate_great_expectations/test_validate_great_expectations.py deleted file mode 100644 index 0c54a6ec9..000000000 --- a/validate_great_expectations/test_validate_great_expectations.py +++ /dev/null @@ -1,209 +0,0 @@ -import os -import sys -from pathlib import Path -import shutil -import mlrun - -import pandas as pd -from great_expectations.core.batch import RuntimeBatchRequest -from great_expectations.data_context import BaseDataContext -from great_expectations.data_context.types.base import ( - DataContextConfig, - FilesystemStoreBackendDefaults, -) -from great_expectations.checkpoint.types.checkpoint_result import CheckpointResult - -from validate_great_expectations import ( - get_default_datasource_config, - get_default_checkpoint_config, - get_data_doc_path, -) - - -DATA_ASSET_NAME = "iris_dataset" -DATA_PATH = "https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv" -EXPECTATION_SUITE_NAME = "test_suite" -ROOT_DIRECTORY = f"/tmp/great_expectations" -DATASOURCE_NAME = "pandas_datasource" -DATA_CONNECTOR_NAME = "default_runtime_data_connector_name" - - -def test_get_default_datasource_config(): - datasource_name = "my_datasource" - data_connector_name = "my_dataconnector" - - expected_datasource_config = { - "name": f"{datasource_name}", - "class_name": "Datasource", - "module_name": "great_expectations.datasource", - "execution_engine": { - "module_name": "great_expectations.execution_engine", - "class_name": "PandasExecutionEngine", - }, - "data_connectors": { - f"{data_connector_name}": { - "class_name": "RuntimeDataConnector", - "module_name": "great_expectations.datasource.data_connector", - "batch_identifiers": ["default_identifier_name"], - }, - }, - } - - assert ( - get_default_datasource_config( - datasource_name=datasource_name, data_connector_name=data_connector_name - ) - == expected_datasource_config - ) - - -def test_get_default_checkpoint_config(): - checkpoint_name = "my_checkpoint" - - expected_checkpoint_config = { - "name": checkpoint_name, - "config_version": 1.0, - "class_name": "SimpleCheckpoint", - "run_name_template": "%Y%m%d-%H%M%S-my-run-name-template", - } - - assert ( - get_default_checkpoint_config(checkpoint_name=checkpoint_name) - == expected_checkpoint_config - ) - - -def set_expectations(fail=False): - ge_context = BaseDataContext( - project_config=DataContextConfig( - store_backend_defaults=FilesystemStoreBackendDefaults( - root_directory=ROOT_DIRECTORY - ) - ) - ) - - datasource_config = { - "name": f"{DATASOURCE_NAME}", - "class_name": "Datasource", - "module_name": "great_expectations.datasource", - "execution_engine": { - "module_name": "great_expectations.execution_engine", - "class_name": "PandasExecutionEngine", - }, - "data_connectors": { - f"{DATA_CONNECTOR_NAME}": { - "class_name": "RuntimeDataConnector", - "module_name": "great_expectations.datasource.data_connector", - "batch_identifiers": ["default_identifier_name"], - }, - }, - } - ge_context.add_datasource(**datasource_config) - - ge_context.create_expectation_suite( - expectation_suite_name=EXPECTATION_SUITE_NAME, overwrite_existing=True - ) - - df = pd.read_csv(DATA_PATH) - - batch_request = RuntimeBatchRequest( - datasource_name=DATASOURCE_NAME, - data_connector_name=DATA_CONNECTOR_NAME, - data_asset_name=DATA_ASSET_NAME, - runtime_parameters={"batch_data": df}, - batch_identifiers={"default_identifier_name": "default_identifier"}, - ) - - validator = ge_context.get_validator( - batch_request=batch_request, - expectation_suite_name=EXPECTATION_SUITE_NAME, - ) - - validator.expect_column_values_to_not_be_null(column="sepal length (cm)") - validator.expect_column_values_to_not_be_null(column="sepal width (cm)") - validator.expect_column_values_to_be_between( - column="sepal width (cm)", min_value=2, max_value=4.4 - ) - if fail: - validator.expect_column_values_to_be_between( - column="sepal length (cm)", min_value=0, max_value=5 - ) - - validator.save_expectation_suite(discard_failed_expectations=False) - - -def cleanup_expectations(): - dirpath = Path(ROOT_DIRECTORY) - if dirpath.exists() and dirpath.is_dir(): - shutil.rmtree(dirpath) - - -def run_expectations(): - fn = mlrun.import_function("function.yaml") - run = fn.run( - inputs={"data": "https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv"}, - params={ - "expectation_suite_name": EXPECTATION_SUITE_NAME, - "data_asset_name": DATA_ASSET_NAME, - "root_directory": ROOT_DIRECTORY, - "datasource_name": DATASOURCE_NAME, - "data_connector_name": DATA_CONNECTOR_NAME, - }, - local=True, - ) - return run - - -def test_validate_expectations_pass(): - # Setup - set_expectations(fail=False) - run = run_expectations() - - # Check that great expectations directory structure was successfully created - dirpath = Path(ROOT_DIRECTORY) - assert dirpath.exists() - assert dirpath.is_dir() - - # Check that run outptuts were successfully saved - assert "validated" in run.outputs - assert "validation_results" in run.outputs - - # Check that validation passed - assert run.outputs["validated"] == True - - # Assert that data docs were saved in run - assert run.outputs["validation_results"].endswith(".html") - - # Assert that data docs exist on filesystem - dirpath = Path(run.outputs["validation_results"]) - assert dirpath.exists() - - # Tear down - cleanup_expectations() - -def test_validate_expectations_fail(): - # Setup - set_expectations(fail=True) - run = run_expectations() - - # Check that great expectations directory structure was successfully created - dirpath = Path(ROOT_DIRECTORY) - assert dirpath.exists() - assert dirpath.is_dir() - - # Check that run outptuts were successfully saved - assert "validated" in run.outputs - assert "validation_results" in run.outputs - - # Check that validation passed - assert run.outputs["validated"] == False - - # Assert that data docs were saved in run - assert run.outputs["validation_results"].endswith(".html") - - # Assert that data docs exist on filesystem - dirpath = Path(run.outputs["validation_results"]) - assert dirpath.exists() - - # Tear down - cleanup_expectations() \ No newline at end of file diff --git a/validate_great_expectations/validate_great_expectations.ipynb b/validate_great_expectations/validate_great_expectations.ipynb deleted file mode 100644 index cd07a033f..000000000 --- a/validate_great_expectations/validate_great_expectations.ipynb +++ /dev/null @@ -1,934 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "07e810dc", - "metadata": {}, - "outputs": [], - "source": [ - "import mlrun\n", - "import pandas as pd\n", - "from great_expectations.core.batch import RuntimeBatchRequest\n", - "from great_expectations.data_context import BaseDataContext\n", - "from great_expectations.data_context.types.base import (\n", - " DataContextConfig,\n", - " FilesystemStoreBackendDefaults,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "f57b4a0e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2023-03-03 22:08:23,289 [info] loaded project great-expectations from MLRun DB\n" - ] - } - ], - "source": [ - "project = mlrun.get_or_create_project(\"great-expectations\", context=\"./\")" - ] - }, - { - "cell_type": "markdown", - "id": "6f721976", - "metadata": {}, - "source": [ - "### Config" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "63852ffe", - "metadata": {}, - "outputs": [], - "source": [ - "data_asset_name = \"iris_dataset\"\n", - "data_path = \"https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv\"\n", - "expectation_suite_name = \"test_suite\"\n", - "root_directory = f\"/v3io/projects/{project.name}/great_expectations\"" - ] - }, - { - "cell_type": "markdown", - "id": "b4a3da34", - "metadata": {}, - "source": [ - "### Intialize Great Expectations Context" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "fb9c3956", - "metadata": {}, - "outputs": [], - "source": [ - "ge_context = BaseDataContext(\n", - " project_config=DataContextConfig(\n", - " store_backend_defaults=FilesystemStoreBackendDefaults(\n", - " root_directory=root_directory\n", - " )\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "42f80798", - "metadata": {}, - "source": [ - "### Add Pandas Datasource" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "dc3a01f3", - "metadata": {}, - "outputs": [], - "source": [ - "datasource_name = \"pandas_datasource\"\n", - "data_connector_name = \"default_runtime_data_connector_name\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "b893a260", - "metadata": {}, - "outputs": [], - "source": [ - "datasource_config = {\n", - " \"name\": f\"{datasource_name}\",\n", - " \"class_name\": \"Datasource\",\n", - " \"module_name\": \"great_expectations.datasource\",\n", - " \"execution_engine\": {\n", - " \"module_name\": \"great_expectations.execution_engine\",\n", - " \"class_name\": \"PandasExecutionEngine\",\n", - " },\n", - " \"data_connectors\": {\n", - " f\"{data_connector_name}\": {\n", - " \"class_name\": \"RuntimeDataConnector\",\n", - " \"module_name\": \"great_expectations.datasource.data_connector\",\n", - " \"batch_identifiers\": [\"default_identifier_name\"],\n", - " },\n", - " },\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "0358a4ac", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ge_context.add_datasource(**datasource_config)" - ] - }, - { - "cell_type": "markdown", - "id": "8c8406cf", - "metadata": {}, - "source": [ - "### Create Expectation Suite" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "f68fb7e9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{\n", - " \"data_asset_type\": null,\n", - " \"meta\": {\n", - " \"great_expectations_version\": \"0.15.41\"\n", - " },\n", - " \"ge_cloud_id\": null,\n", - " \"expectations\": [],\n", - " \"expectation_suite_name\": \"test_suite\"\n", - "}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ge_context.create_expectation_suite(\n", - " expectation_suite_name=expectation_suite_name, overwrite_existing=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "c805fb0b", - "metadata": {}, - "source": [ - "### Get Data Batch" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "a2a7c0ed", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_csv(data_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "9838eb91", - "metadata": {}, - "outputs": [], - "source": [ - "batch_request = RuntimeBatchRequest(\n", - " datasource_name=datasource_name,\n", - " data_connector_name=data_connector_name,\n", - " data_asset_name=data_asset_name,\n", - " runtime_parameters={\"batch_data\": df},\n", - " batch_identifiers={\"default_identifier_name\": \"default_identifier\"},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "b65f9642", - "metadata": {}, - "source": [ - "### Get Validator" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "8f8a1b3a", - "metadata": {}, - "outputs": [], - "source": [ - "validator = ge_context.get_validator(\n", - " batch_request=batch_request,\n", - " expectation_suite_name=expectation_suite_name,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "a5e9f68c", - "metadata": {}, - "source": [ - "### Add Expectations" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "7c3b44aa", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "789561a733774d34a821d4a57e18e9b9", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Calculating Metrics: 0%| | 0/6 [00:00 2023-03-03 22:09:01,338 [info] function spec saved to path: function.yaml\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "fn.export()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "a54cac0e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2023-03-03 22:09:13,830 [info] Started building image: .mlrun/func-great-expectations-validate-expectations:latest\n", - "\u001b[36mINFO\u001b[0m[0000] Retrieving image manifest mlrun/mlrun:1.1.0 \n", - "\u001b[36mINFO\u001b[0m[0000] Retrieving image mlrun/mlrun:1.1.0 from registry index.docker.io \n", - "\u001b[36mINFO\u001b[0m[0000] Built cross stage deps: map[] \n", - "\u001b[36mINFO\u001b[0m[0000] Retrieving image manifest mlrun/mlrun:1.1.0 \n", - "\u001b[36mINFO\u001b[0m[0000] Returning cached image manifest \n", - "\u001b[36mINFO\u001b[0m[0000] Executing 0 build triggers \n", - "\u001b[36mINFO\u001b[0m[0000] Unpacking rootfs as cmd RUN python -m pip install great-expectations==0.15.41 requires it. \n", - "\u001b[36mINFO\u001b[0m[0021] RUN python -m pip install great-expectations==0.15.41 \n", - "\u001b[36mINFO\u001b[0m[0021] Taking snapshot of full filesystem... \n", - "\u001b[36mINFO\u001b[0m[0033] cmd: /bin/sh \n", - "\u001b[36mINFO\u001b[0m[0033] args: [-c python -m pip install great-expectations==0.15.41] \n", - "\u001b[36mINFO\u001b[0m[0033] Running: [/bin/sh -c python -m pip install great-expectations==0.15.41] \n", - "Collecting great-expectations==0.15.41\n", - " Downloading great_expectations-0.15.41-py3-none-any.whl (5.2 MB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.2/5.2 MB 122.0 MB/s eta 0:00:00\n", - "Requirement already satisfied: pyparsing>=2.4 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (3.0.9)\n", - "Collecting ipywidgets>=7.5.1\n", - " Downloading ipywidgets-8.0.4-py3-none-any.whl (137 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 137.8/137.8 KB 249.8 MB/s eta 0:00:00\n", - "Requirement already satisfied: notebook>=6.4.10 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (6.4.12)\n", - "Requirement already satisfied: mistune>=0.8.4 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (2.0.4)\n", - "Collecting tqdm>=4.59.0\n", - " Downloading tqdm-4.65.0-py3-none-any.whl (77 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 77.1/77.1 KB 230.4 MB/s eta 0:00:00\n", - "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (1.26.12)\n", - "Requirement already satisfied: jinja2>=2.10 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (3.0.3)\n", - "Requirement already satisfied: scipy>=0.19.0 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (1.7.3)\n", - "Collecting tzlocal>=1.2\n", - " Downloading tzlocal-4.2-py3-none-any.whl (19 kB)\n", - "Requirement already satisfied: typing-extensions>=3.10.0.0 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (4.3.0)\n", - "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (2.28.1)\n", - "Requirement already satisfied: importlib-metadata>=1.7.0 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (4.12.0)\n", - "Requirement already satisfied: nbformat>=5.0 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (5.4.0)\n", - "Collecting altair<5,>=4.0.0\n", - " Downloading altair-4.2.2-py3-none-any.whl (813 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 813.6/813.6 KB 278.3 MB/s eta 0:00:00\n", - "Requirement already satisfied: numpy>=1.18.5 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (1.21.6)\n", - "Requirement already satisfied: pydantic<2.0,>=1.0 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (1.10.1)\n", - "Collecting jsonpatch>=1.22\n", - " Downloading jsonpatch-1.32-py2.py3-none-any.whl (12 kB)\n", - "Requirement already satisfied: cryptography>=3.2 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (3.3.2)\n", - "Requirement already satisfied: jsonschema>=2.5.1 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (3.2.0)\n", - "Requirement already satisfied: pytz>=2021.3 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (2022.2.1)\n", - "Collecting ruamel.yaml<0.17.18,>=0.16\n", - " Downloading ruamel.yaml-0.17.17-py3-none-any.whl (109 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 109.1/109.1 KB 247.4 MB/s eta 0:00:00\n", - "Requirement already satisfied: Ipython>=7.16.3 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (7.34.0)\n", - "Collecting marshmallow<4.0.0,>=3.7.1\n", - " Downloading marshmallow-3.19.0-py3-none-any.whl (49 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 49.1/49.1 KB 216.6 MB/s eta 0:00:00\n", - "Requirement already satisfied: Click>=7.1.2 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (8.0.4)\n", - "Collecting makefun<2,>=1.7.0\n", - " Downloading makefun-1.15.1-py2.py3-none-any.whl (22 kB)\n", - "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (2.8.2)\n", - "Collecting colorama>=0.4.3\n", - " Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n", - "Requirement already satisfied: pandas>=1.1.0 in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (1.3.5)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.7/site-packages (from great-expectations==0.15.41) (21.3)\n", - "Requirement already satisfied: toolz in /usr/local/lib/python3.7/site-packages (from altair<5,>=4.0.0->great-expectations==0.15.41) (0.12.0)\n", - "Requirement already satisfied: entrypoints in /usr/local/lib/python3.7/site-packages (from altair<5,>=4.0.0->great-expectations==0.15.41) (0.4)\n", - "Requirement already satisfied: six>=1.4.1 in /usr/local/lib/python3.7/site-packages (from cryptography>=3.2->great-expectations==0.15.41) (1.16.0)\n", - "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.7/site-packages (from cryptography>=3.2->great-expectations==0.15.41) (1.15.1)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/site-packages (from importlib-metadata>=1.7.0->great-expectations==0.15.41) (3.8.1)\n", - "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.7/site-packages (from Ipython>=7.16.3->great-expectations==0.15.41) (3.0.31)\n", - "Requirement already satisfied: backcall in /usr/local/lib/python3.7/site-packages (from Ipython>=7.16.3->great-expectations==0.15.41) (0.2.0)\n", - "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.7/site-packages (from Ipython>=7.16.3->great-expectations==0.15.41) (0.1.6)\n", - "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.7/site-packages (from Ipython>=7.16.3->great-expectations==0.15.41) (4.8.0)\n", - "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.7/site-packages (from Ipython>=7.16.3->great-expectations==0.15.41) (57.5.0)\n", - "Requirement already satisfied: decorator in /usr/local/lib/python3.7/site-packages (from Ipython>=7.16.3->great-expectations==0.15.41) (5.1.1)\n", - "Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.7/site-packages (from Ipython>=7.16.3->great-expectations==0.15.41) (0.18.1)\n", - "Requirement already satisfied: pygments in /usr/local/lib/python3.7/site-packages (from Ipython>=7.16.3->great-expectations==0.15.41) (2.13.0)\n", - "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.7/site-packages (from Ipython>=7.16.3->great-expectations==0.15.41) (5.3.0)\n", - "Requirement already satisfied: pickleshare in /usr/local/lib/python3.7/site-packages (from Ipython>=7.16.3->great-expectations==0.15.41) (0.7.5)\n", - "Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.7/site-packages (from ipywidgets>=7.5.1->great-expectations==0.15.41) (6.15.2)\n", - "Collecting jupyterlab-widgets~=3.0\n", - " Downloading jupyterlab_widgets-3.0.5-py3-none-any.whl (384 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 384.3/384.3 KB 282.6 MB/s eta 0:00:00\n", - "Collecting widgetsnbextension~=4.0\n", - " Downloading widgetsnbextension-4.0.5-py3-none-any.whl (2.0 MB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.0/2.0 MB 264.7 MB/s eta 0:00:00\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.7/site-packages (from jinja2>=2.10->great-expectations==0.15.41) (2.1.1)\n", - "Collecting jsonpointer>=1.9\n", - " Downloading jsonpointer-2.3-py2.py3-none-any.whl (7.8 kB)\n", - "Requirement already satisfied: pyrsistent>=0.14.0 in /usr/local/lib/python3.7/site-packages (from jsonschema>=2.5.1->great-expectations==0.15.41) (0.18.1)\n", - "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.7/site-packages (from jsonschema>=2.5.1->great-expectations==0.15.41) (22.1.0)\n", - "Requirement already satisfied: jupyter-core in /usr/local/lib/python3.7/site-packages (from nbformat>=5.0->great-expectations==0.15.41) (4.11.1)\n", - "Requirement already satisfied: fastjsonschema in /usr/local/lib/python3.7/site-packages (from nbformat>=5.0->great-expectations==0.15.41) (2.16.1)\n", - "Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.7/site-packages (from notebook>=6.4.10->great-expectations==0.15.41) (0.2.0)\n", - "Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.7/site-packages (from notebook>=6.4.10->great-expectations==0.15.41) (7.0.0)\n", - "Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.7/site-packages (from notebook>=6.4.10->great-expectations==0.15.41) (1.8.0)\n", - "Requirement already satisfied: prometheus-client in /usr/local/lib/python3.7/site-packages (from notebook>=6.4.10->great-expectations==0.15.41) (0.14.1)\n", - "Requirement already satisfied: tornado>=6.1 in /usr/local/lib/python3.7/site-packages (from notebook>=6.4.10->great-expectations==0.15.41) (6.2)\n", - "Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.7/site-packages (from notebook>=6.4.10->great-expectations==0.15.41) (1.5.5)\n", - "Requirement already satisfied: pyzmq>=17 in /usr/local/lib/python3.7/site-packages (from notebook>=6.4.10->great-expectations==0.15.41) (23.2.1)\n", - "Requirement already satisfied: jupyter-client>=5.3.4 in /usr/local/lib/python3.7/site-packages (from notebook>=6.4.10->great-expectations==0.15.41) (7.3.5)\n", - "Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.7/site-packages (from notebook>=6.4.10->great-expectations==0.15.41) (0.15.0)\n", - "Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.7/site-packages (from notebook>=6.4.10->great-expectations==0.15.41) (21.3.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.7/site-packages (from requests>=2.20->great-expectations==0.15.41) (3.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/site-packages (from requests>=2.20->great-expectations==0.15.41) (2022.6.15)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.7/site-packages (from requests>=2.20->great-expectations==0.15.41) (2.1.1)\n", - "Collecting ruamel.yaml.clib>=0.1.2\n", - " Downloading ruamel.yaml.clib-0.2.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (500 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 500.1/500.1 KB 251.9 MB/s eta 0:00:00\n", - "Collecting pytz-deprecation-shim\n", - " Downloading pytz_deprecation_shim-0.1.0.post0-py2.py3-none-any.whl (15 kB)\n", - "Collecting backports.zoneinfo\n", - " Downloading backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_x86_64.whl (70 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 70.7/70.7 KB 212.6 MB/s eta 0:00:00\n", - "Requirement already satisfied: pycparser in /usr/local/lib/python3.7/site-packages (from cffi>=1.12->cryptography>=3.2->great-expectations==0.15.41) (2.21)\n", - "Requirement already satisfied: psutil in /usr/local/lib/python3.7/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.5.1->great-expectations==0.15.41) (5.9.2)\n", - "Requirement already satisfied: debugpy>=1.0 in /usr/local/lib/python3.7/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.5.1->great-expectations==0.15.41) (1.6.3)\n", - "Requirement already satisfied: parso<0.9.0,>=0.8.0 in /usr/local/lib/python3.7/site-packages (from jedi>=0.16->Ipython>=7.16.3->great-expectations==0.15.41) (0.8.3)\n", - "Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.7/site-packages (from nbconvert>=5->notebook>=6.4.10->great-expectations==0.15.41) (0.6.7)\n", - "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.7/site-packages (from nbconvert>=5->notebook>=6.4.10->great-expectations==0.15.41) (4.11.1)\n", - "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.7/site-packages (from nbconvert>=5->notebook>=6.4.10->great-expectations==0.15.41) (1.5.0)\n", - "Requirement already satisfied: tinycss2 in /usr/local/lib/python3.7/site-packages (from nbconvert>=5->notebook>=6.4.10->great-expectations==0.15.41) (1.1.1)\n", - "Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.7/site-packages (from nbconvert>=5->notebook>=6.4.10->great-expectations==0.15.41) (0.2.2)\n", - "Requirement already satisfied: defusedxml in /usr/local/lib/python3.7/site-packages (from nbconvert>=5->notebook>=6.4.10->great-expectations==0.15.41) (0.7.1)\n", - "Requirement already satisfied: bleach in /usr/local/lib/python3.7/site-packages (from nbconvert>=5->notebook>=6.4.10->great-expectations==0.15.41) (5.0.1)\n", - "Requirement already satisfied: lxml in /usr/local/lib/python3.7/site-packages (from nbconvert>=5->notebook>=6.4.10->great-expectations==0.15.41) (4.9.1)\n", - "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.7/site-packages (from pexpect>4.3->Ipython>=7.16.3->great-expectations==0.15.41) (0.7.0)\n", - "Requirement already satisfied: wcwidth in /usr/local/lib/python3.7/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->Ipython>=7.16.3->great-expectations==0.15.41) (0.2.5)\n", - "Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.7/site-packages (from argon2-cffi->notebook>=6.4.10->great-expectations==0.15.41) (21.2.0)\n", - "Collecting tzdata\n", - " Downloading tzdata-2022.7-py2.py3-none-any.whl (340 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 340.1/340.1 KB 258.0 MB/s eta 0:00:00\n", - "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.7/site-packages (from beautifulsoup4->nbconvert>=5->notebook>=6.4.10->great-expectations==0.15.41) (2.3.2.post1)\n", - "Requirement already satisfied: webencodings in /usr/local/lib/python3.7/site-packages (from bleach->nbconvert>=5->notebook>=6.4.10->great-expectations==0.15.41) (0.5.1)\n", - "Installing collected packages: makefun, widgetsnbextension, tzdata, tqdm, ruamel.yaml.clib, jupyterlab-widgets, jsonpointer, colorama, backports.zoneinfo, ruamel.yaml, pytz-deprecation-shim, marshmallow, jsonpatch, tzlocal, altair, ipywidgets, great-expectations\n", - "Successfully installed altair-4.2.2 backports.zoneinfo-0.2.1 colorama-0.4.6 great-expectations-0.15.41 ipywidgets-8.0.4 jsonpatch-1.32 jsonpointer-2.3 jupyterlab-widgets-3.0.5 makefun-1.15.1 marshmallow-3.19.0 pytz-deprecation-shim-0.1.0.post0 ruamel.yaml-0.17.17 ruamel.yaml.clib-0.2.7 tqdm-4.65.0 tzdata-2022.7 tzlocal-4.2 widgetsnbextension-4.0.5\n", - "WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\n", - "WARNING: You are using pip version 22.0.4; however, version 23.0.1 is available.\n", - "You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\n", - "\u001b[36mINFO\u001b[0m[0039] Taking snapshot of full filesystem... \n", - "\u001b[36mINFO\u001b[0m[0042] Pushing image to docker-registry.default-tenant.app.us-sales-350.iguazio-cd1.com:80/mlrun/func-great-expectations-validate-expectations:latest \n", - "\u001b[36mINFO\u001b[0m[0042] Pushed docker-registry.default-tenant.app.us-sales-350.iguazio-cd1.com:80/mlrun/func-great-expectations-validate-expectations@sha256:1e3b2615cc8f2dc39062037c0a27299e15d12d3011d50a9e8214ec34b84c21a2 \n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "BuildStatus(ready=True, outputs={'image': '.mlrun/func-great-expectations-validate-expectations:latest'})" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "project.build_function(fn)" - ] - }, - { - "cell_type": "markdown", - "id": "df71ef7c", - "metadata": {}, - "source": [ - "### Run Validation" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "857021cf", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2023-03-03 22:10:40,839 [info] starting run validate-expectations-validate_expectations uid=436372d741034d678145c63fecfe4450 DB=http://mlrun-api:8080\n", - "> 2023-03-03 22:10:41,124 [info] Job is running in the background, pod: validate-expectations-validate-expectations-tx9xb\n", - "> 2023-03-03 22:10:55,088 [info] run executed, status=completed\n", - "Calculating Metrics: 100%|██████████| 19/19 [00:00<00:00, 323.87it/s]\n", - "final state: completed\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
great-expectations0Mar 03 22:10:51completedvalidate-expectations-validate_expectations
v3io_user=nick
kind=job
owner=nick
mlrun/client_version=1.1.0
host=validate-expectations-validate-expectations-tx9xb
data
expectation_suite_name=test_suite
data_asset_name=iris_dataset
validated=False
validation_results
\n", - "
\n", - "
\n", - "
\n", - " Title\n", - " ×\n", - "
\n", - " \n", - "
\n", - "
\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/html": [ - " > to track results use the .show() or .logs() methods or click here to open in UI" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2023-03-03 22:11:01,178 [info] run executed, status=completed\n" - ] - } - ], - "source": [ - "run = fn.run(\n", - " inputs={\"data\": \"https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv\"},\n", - " params={\n", - " \"expectation_suite_name\": \"test_suite\",\n", - " \"data_asset_name\": \"iris_dataset\",\n", - " },\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "dec9172c", - "metadata": {}, - "source": [ - "### View Data Doc" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "8b90a4c7", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from IPython.display import IFrame" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "3c59d69e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "IFrame(src=os.path.relpath(run.outputs[\"validation_results\"]), width=1000, height=800)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a3caea0c", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python [conda env:root] *", - "language": "python", - "name": "conda-root-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/validate_great_expectations/validate_great_expectations.py b/validate_great_expectations/validate_great_expectations.py deleted file mode 100644 index 1e48df031..000000000 --- a/validate_great_expectations/validate_great_expectations.py +++ /dev/null @@ -1,197 +0,0 @@ -import os -import shutil - -import mlrun - -from great_expectations.core.batch import RuntimeBatchRequest -from great_expectations.data_context import BaseDataContext -from great_expectations.data_context.types.base import ( - DataContextConfig, - FilesystemStoreBackendDefaults, -) -from great_expectations.checkpoint.types.checkpoint_result import CheckpointResult - - -def get_default_datasource_config( - datasource_name: str, data_connector_name: str -) -> dict: - """ - Convenience function to get the default pandas datasource config - for use in validating expectations. - - :param datasource_name: Name of datasource. - :param data_connector_name: Name of data connector. - - :returns: Configuration for default datasource. - """ - default_datasource_config = { - "name": f"{datasource_name}", - "class_name": "Datasource", - "module_name": "great_expectations.datasource", - "execution_engine": { - "module_name": "great_expectations.execution_engine", - "class_name": "PandasExecutionEngine", - }, - "data_connectors": { - f"{data_connector_name}": { - "class_name": "RuntimeDataConnector", - "module_name": "great_expectations.datasource.data_connector", - "batch_identifiers": ["default_identifier_name"], - }, - }, - } - return default_datasource_config - - -def get_default_checkpoint_config(checkpoint_name: str) -> dict: - """ - Convenience function to get the default checkpoint config for - use in validating expectations. - - :param checkpoint_name: Name of checkpoint. - - :returns: Configuration for default checkpoint. - """ - return { - "name": checkpoint_name, - "config_version": 1.0, - "class_name": "SimpleCheckpoint", - "run_name_template": "%Y%m%d-%H%M%S-my-run-name-template", - } - - -def get_data_doc_path(checkpoint_result: CheckpointResult) -> str: - """ - Convenience function to get the path of the output - data doc from a checkpoint result. - - :param checkpoint_result: Great Expectations checkpoint result. - - :returns: Absolute path to new data doc. - """ - result_id = checkpoint_result.list_validation_result_identifiers()[0] - data_doc_path = checkpoint_result["run_results"][result_id]["actions_results"][ - "update_data_docs" - ]["local_site"] - data_doc_path = data_doc_path.replace("file://", "") - return data_doc_path - - -def validate_expectations( - context: mlrun.MLClientCtx, - data: mlrun.DataItem, - expectation_suite_name: str, - data_asset_name: str, - datasource_name: str = "pandas_datasource", - data_connector_name: str = "default_runtime_data_connector_name", - datasource_config: dict = None, - batch_identifiers: dict = None, - root_directory: str = None, - checkpoint_name: str = None, - checkpoint_config: dict = None, -) -> None: - """ - Main function to validate an input dataset, datasource, data connector, - and expectation suite. - - Runs the Great Expectation validation and logs - whether the validation was a success as well as the output page - of the data docs. - - :param context: MLRun context. - :param data: Data to validate. Can be local or remote link. - :param expectation_suite_name: Name of expectation suite to validate against. - :param data_asset_name: Name of dataset in Great Expectations. - :param datasource_name: Name of datasource to use for validation. - :param data_connector_name: Name of data connector to use for validation. - :param datasource_config: Full configuration for datasource. For use with custom - data sources other than the default pandas datasource. - :param batch_identifiers: Custom metadata for identifying particular batches of - data. For use when not using the default batch identifiers. - :param root_directory: Path to underlying Great Expectations project. Defaults to - MLRun project artifact path if not specified. - :param checkpoint_name: Name of checkpoint to use for validation. - :param checkpoint_config: Full configuration for checkpoint. For use with custome - checkpoint config other than the default. - """ - - # Get data - df = data.as_df() - - # Use default root directory for project if not specified - root_directory = ( - root_directory - if root_directory - else f"/v3io/projects/{context.project}/great_expectations" - ) - - # Load great expectations context - ge_context = BaseDataContext( - project_config=DataContextConfig( - store_backend_defaults=FilesystemStoreBackendDefaults( - root_directory=root_directory - ) - ) - ) - - # Get expectation suite - ge_context.get_expectation_suite(expectation_suite_name=expectation_suite_name) - - # Add default data source if not specified - datasource_config = ( - datasource_config - if datasource_config - else get_default_datasource_config(datasource_name, data_connector_name) - ) - ge_context.add_datasource(**datasource_config) - - # Get data batch - batch_identifiers = ( - batch_identifiers - if batch_identifiers - else {"default_identifier_name": "default_identifier"} - ) - batch_request = RuntimeBatchRequest( - datasource_name=datasource_name, - data_connector_name=data_connector_name, - data_asset_name=data_asset_name, - runtime_parameters={"batch_data": df}, - batch_identifiers=batch_identifiers, - ) - - # Get validator - validator = ge_context.get_validator( - batch_request=batch_request, - expectation_suite_name=expectation_suite_name, - ) - - # Use default checkpoint name and config if not specified - checkpoint_name = ( - checkpoint_name if checkpoint_name else f"{data_asset_name}_checkpoint" - ) - checkpoint_config = ( - checkpoint_config - if checkpoint_config - else get_default_checkpoint_config(checkpoint_name) - ) - - # Add checkpoint - ge_context.add_checkpoint(**checkpoint_config) - - # Run expectation suite on checkpoint - checkpoint_result = ge_context.run_checkpoint( - checkpoint_name=checkpoint_name, - validations=[ - { - "batch_request": batch_request, - "expectation_suite_name": expectation_suite_name, - } - ], - ) - - # Log success - context.log_result("validated", checkpoint_result["success"]) - - # Log data doc - data_doc_path = get_data_doc_path(checkpoint_result) - context.log_artifact("validation_results", target_path=data_doc_path)