Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 21 additions & 42 deletions open_archive/function.yaml
Original file line number Diff line number Diff line change
@@ -1,40 +1,30 @@
kind: job
metadata:
name: open-archive
tag: ''
hash: a7dffde0d24ae5dd22d88a641ac25b82816a2bc1
project: ''
labels:
author: yaronh
categories:
- data-preparation
verbose: false
spec:
command: ''
args: []
image: mlrun/mlrun
description: Open a file/object archive into a target directory
default_handler: open_archive
build:
functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKIyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG9zCmltcG9ydCB6aXBmaWxlCmltcG9ydCB1cmxsaWIucmVxdWVzdAppbXBvcnQgdGFyZmlsZQppbXBvcnQganNvbgoKZnJvbSBtbHJ1bi5leGVjdXRpb24gaW1wb3J0IE1MQ2xpZW50Q3R4CmZyb20gbWxydW4uZGF0YXN0b3JlIGltcG9ydCBEYXRhSXRlbQpmcm9tIG1scnVuLmFydGlmYWN0cy5iYXNlIGltcG9ydCBEaXJBcnRpZmFjdAoKZnJvbSB0eXBpbmcgaW1wb3J0IFVuaW9uCmltcG9ydCBib3RvMwpmcm9tIHVybGxpYi5wYXJzZSBpbXBvcnQgdXJscGFyc2UKCmRlZiBvcGVuX2FyY2hpdmUoCiAgICBjb250ZXh0OiBNTENsaWVudEN0eCwKICAgIGFyY2hpdmVfdXJsOiBEYXRhSXRlbSwKICAgIHN1YmRpcjogc3RyID0gImNvbnRlbnQvIiwKICAgIGtleTogc3RyID0gImNvbnRlbnQiLAogICAgdGFyZ2V0X3BhdGg6IHN0ciA9IE5vbmUsCik6CiAgICAiIiJPcGVuIGEgZmlsZS9vYmplY3QgYXJjaGl2ZSBpbnRvIGEgdGFyZ2V0IGRpcmVjdG9yeQogICAgQ3VycmVudGx5IHN1cHBvcnRzIHppcCBhbmQgdGFyLmd6CiAgICA6cGFyYW0gY29udGV4dDogICAgICBmdW5jdGlvbiBleGVjdXRpb24gY29udGV4dAogICAgOnBhcmFtIGFyY2hpdmVfdXJsOiAgdXJsIG9mIGFyY2hpdmUgZmlsZSAKICAgIDpwYXJhbSBzdWJkaXI6ICAgICAgIHBhdGggd2l0aGluIGFydGlmYWN0IHN0b3JlIHdoZXJlIGV4dHJhY3RlZCBmaWxlcwogICAgICAgICAgICAgICAgICAgICAgICAgYXJlIHN0b3JlZAogICAgOnBhcmFtIGtleTogICAgICAgICAga2V5IG9mIGFyY2hpdmUgY29udGVudHMgaW4gYXJ0aWZhY3Qgc3RvcmUKICAgIDpwYXJhbSB0YXJnZXRfcGF0aDogIGZpbGUgc3lzdGVtIHBhdGggdG8gc3RvcmUgZXh0cmFjdGVkIGZpbGVzCiAgICAiIiIKICAgIAogICAgYXJjaGl2ZV91cmwgPSBhcmNoaXZlX3VybC5sb2NhbCgpCiAgICB2M2lvX3N1YmRpciA9IE5vbmUKICAgICMgV2hlbiBjdXN0b20gYXJ0aWZhY3QgcGF0aCBpcyBkZWZpbmVkCiAgICBpZiBub3QgdGFyZ2V0X3BhdGggYW5kIGNvbnRleHQuYXJ0aWZhY3RfcGF0aDoKICAgICAgICBwYXJzZWRfc3ViZGlyID0gdXJscGFyc2UoY29udGV4dC5hcnRpZmFjdF9wYXRoKQogICAgICAgIGlmIHBhcnNlZF9zdWJkaXIuc2NoZW1lID09ICdzMyc6CiAgICAgICAgICAgIHN1YmRpciA9IG9zLnBhdGguam9pbihjb250ZXh0LmFydGlmYWN0X3BhdGgsIHN1YmRpcikKICAgICAgICBlbGlmIHBhcnNlZF9zdWJkaXIuc2NoZW1lID09ICd2M2lvJzoKICAgICAgICAgICAgdjNpb19zdWJkaXIgPSBvcy5wYXRoLmpvaW4oY29udGV4dC5hcnRpZmFjdF9wYXRoLCBzdWJkaXIpICMgVXNpbmcgdjNpb19zdWJkaXIgZm9yIGxvZ2dpbmcKICAgICAgICAgICAgc3ViZGlyID0gJy92M2lvJyArIHBhcnNlZF9zdWJkaXIucGF0aCArICcvJyArIHN1YmRpcgogICAgICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYnVXNpbmcgdjNpbyBzY2hlbWUsIGV4dHJhY3RpbmcgdG8ge3N1YmRpcn0nKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIGNvbnRleHQubG9nZ2VyLmluZm8oZidVbnJlY29nbml6YWJsZSBzY2hlbWUsIGV4dHJhY3RpbmcgdG8ge3N1YmRpcn0nKQogICAgICAgICAgICAKICAgICMgV2hlbiB3b3JraW5nIG9uIENFLCB0YXJnZXQgcGF0aCBtaWdodCBiZSBvbiBzMwogICAgaWYgJ3MzJyBpbiAodGFyZ2V0X3BhdGggb3Igc3ViZGlyKToKICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYnVXNpbmcgczMgc2NoZW1lLCBleHRyYWN0aW5nIHRvIHt0YXJnZXRfcGF0aCBvciBzdWJkaXJ9JykKICAgICAgICBpZiBvcy5lbnZpcm9uLmdldCgnUzNfRU5EUE9JTlRfVVJMJyk6CiAgICAgICAgICAgIGNsaWVudCA9IGJvdG8zLmNsaWVudCgnczMnLCBlbmRwb2ludF91cmwgPSBvcy5lbnZpcm9uLmdldCgnUzNfRU5EUE9JTlRfVVJMJykpIAogICAgICAgIGVsc2U6CiAgICAgICAgICAgIGNsaWVudCA9IGJvdG8zLmNsaWVudCgnczMnKSAgCiAgICAgICAgICAgIAogICAgICAgIGlmIGFyY2hpdmVfdXJsLmVuZHN3aXRoKCJneiIpOgogICAgICAgICAgICB3aXRoIHRhcmZpbGUub3BlbihhcmNoaXZlX3VybCwgbW9kZT0icnxneiIpIGFzIHJlZjoKICAgICAgICAgICAgICAgIGZvciBmaWxlbmFtZSBpbiByZWYubmFtZWxpc3QoKToKICAgICAgICAgICAgICAgICAgICBkYXRhPXJlZi5yZWFkKGZpbGVuYW1lKQogICAgICAgICAgICAgICAgICAgIGNsaWVudC5wdXRfb2JqZWN0KEJvZHk9ZGF0YSwgQnVja2V0PXVybHBhcnNlKHRhcmdldF9wYXRoIG9yIHN1YmRpcikubmV0bG9jLCBLZXk9Zid7dXJscGFyc2UodGFyZ2V0X3BhdGggb3Igc3ViZGlyKS5wYXRoWzE6XX17ZmlsZW5hbWV9JykKCiAgICAgICAgZWxpZiBhcmNoaXZlX3VybC5lbmRzd2l0aCgiemlwIik6CiAgICAgICAgICAgIHdpdGggemlwZmlsZS5aaXBGaWxlKGFyY2hpdmVfdXJsLCAiciIpIGFzIHJlZjoKICAgICAgICAgICAgICAgIGZvciBmaWxlbmFtZSBpbiByZWYubmFtZWxpc3QoKToKICAgICAgICAgICAgICAgICAgICBkYXRhPXJlZi5yZWFkKGZpbGVuYW1lKQogICAgICAgICAgICAgICAgICAgIGNsaWVudC5wdXRfb2JqZWN0KEJvZHk9ZGF0YSwgQnVja2V0PXVybHBhcnNlKHRhcmdldF9wYXRoIG9yIHN1YmRpcikubmV0bG9jLCBLZXk9Zid7dXJscGFyc2UodGFyZ2V0X3BhdGggb3Igc3ViZGlyKS5wYXRoWzE6XX17ZmlsZW5hbWV9JykKICAgICAgICBlbHNlOgogICAgICAgICAgICByYWlzZSBWYWx1ZUVycm9yKGYidW5zdXBwb3J0ZWQgYXJjaGl2ZSB0eXBlIGluIHthcmNoaXZlX3VybH0iKQogICAgCiAgICBlbHNlOgogICAgICAgIG9zLm1ha2VkaXJzKHRhcmdldF9wYXRoIG9yIHN1YmRpciwgZXhpc3Rfb2s9VHJ1ZSkKICAgICAgICBpZiBhcmNoaXZlX3VybC5lbmRzd2l0aCgiZ3oiKToKICAgICAgICAgICAgd2l0aCB0YXJmaWxlLm9wZW4oYXJjaGl2ZV91cmwsIG1vZGU9InJ8Z3oiKSBhcyByZWY6CiAgICAgICAgICAgICAgICByZWYuZXh0cmFjdGFsbCh0YXJnZXRfcGF0aCBvciBzdWJkaXIpCiAgICAgICAgZWxpZiBhcmNoaXZlX3VybC5lbmRzd2l0aCgiemlwIik6CiAgICAgICAgICAgIHdpdGggemlwZmlsZS5aaXBGaWxlKGFyY2hpdmVfdXJsLCAiciIpIGFzIHJlZjoKICAgICAgICAgICAgICAgIHJlZi5leHRyYWN0YWxsKHRhcmdldF9wYXRoIG9yIHN1YmRpcikKICAgICAgICBlbHNlOgogICAgICAgICAgICByYWlzZSBWYWx1ZUVycm9yKGYidW5zdXBwb3J0ZWQgYXJjaGl2ZSB0eXBlIGluIHthcmNoaXZlX3VybH0iKQogICAgICAgICAgICAKICAgIGlmIHYzaW9fc3ViZGlyOgogICAgICAgIHN1YmRpciA9IHYzaW9fc3ViZGlyCiAgICAgICAgCiAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYnTG9nZ2luZyBhcnRpZmFjdCB0byB7KHRhcmdldF9wYXRoIG9yIHN1YmRpcil9JykKICAgIGNvbnRleHQubG9nX2FydGlmYWN0KERpckFydGlmYWN0KGtleT1rZXksIHRhcmdldF9wYXRoPSh0YXJnZXRfcGF0aCBvciBzdWJkaXIpKSkK
commands: []
code_origin: https://github.com/daniels290813/functions.git#3e17475a7c3cb3d01913056fbc2e1a5ab150f559:/User/test/functions/open_archive/open_archive.py
origin_filename: /User/test/functions/open_archive/open_archive.py
origin_filename: ''
code_origin: ''
functionSourceCode: IyBDb3B5cmlnaHQgMjAyNSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKCmltcG9ydCBvcwppbXBvcnQgemlwZmlsZQppbXBvcnQgdGFyZmlsZQoKZnJvbSBtbHJ1bi5leGVjdXRpb24gaW1wb3J0IE1MQ2xpZW50Q3R4CmZyb20gbWxydW4uZGF0YXN0b3JlIGltcG9ydCBEYXRhSXRlbQpmcm9tIG1scnVuLmFydGlmYWN0cy5iYXNlIGltcG9ydCBEaXJBcnRpZmFjdAoKaW1wb3J0IGJvdG8zCmZyb20gdXJsbGliLnBhcnNlIGltcG9ydCB1cmxwYXJzZQoKZGVmIG9wZW5fYXJjaGl2ZSgKICAgIGNvbnRleHQ6IE1MQ2xpZW50Q3R4LAogICAgYXJjaGl2ZV91cmw6IERhdGFJdGVtLAogICAgc3ViZGlyOiBzdHIgPSAiY29udGVudC8iLAogICAga2V5OiBzdHIgPSAiY29udGVudCIsCiAgICB0YXJnZXRfcGF0aDogc3RyID0gTm9uZSwKKToKICAgICIiIk9wZW4gYSBmaWxlL29iamVjdCBhcmNoaXZlIGludG8gYSB0YXJnZXQgZGlyZWN0b3J5LiBDdXJyZW50bHksIHN1cHBvcnRzIHppcCBhbmQgdGFyLmd6LgoKICAgIDpwYXJhbSBjb250ZXh0OiAgICAgIGZ1bmN0aW9uIGV4ZWN1dGlvbiBjb250ZXh0CiAgICA6cGFyYW0gYXJjaGl2ZV91cmw6ICB1cmwgb2YgYXJjaGl2ZSBmaWxlIAogICAgOnBhcmFtIHN1YmRpcjogICAgICAgcGF0aCB3aXRoaW4gYXJ0aWZhY3Qgc3RvcmUgd2hlcmUgZXh0cmFjdGVkIGZpbGVzIGFyZSBzdG9yZWQsIGRlZmF1bHQgaXMgIi9jb250ZW50IgogICAgOnBhcmFtIGtleTogICAgICAgICAga2V5IG9mIGFyY2hpdmUgY29udGVudHMgaW4gYXJ0aWZhY3Qgc3RvcmUKICAgIDpwYXJhbSB0YXJnZXRfcGF0aDogIGZpbGUgc3lzdGVtIHBhdGggdG8gc3RvcmUgZXh0cmFjdGVkIGZpbGVzCiAgICAiIiIKCiAgICAjIFJlc29sdmVzIHRoZSBhcmNoaXZlIGxvY2FsbHkKICAgIGFyY2hpdmVfdXJsID0gYXJjaGl2ZV91cmwubG9jYWwoKQogICAgdjNpb19zdWJkaXIgPSBOb25lCiAgICAjIFdoZW4gY3VzdG9tIGFydGlmYWN0IHBhdGggaXMgZGVmaW5lZAogICAgaWYgbm90IHRhcmdldF9wYXRoIGFuZCBjb250ZXh0LmFydGlmYWN0X3BhdGg6CiAgICAgICAgcGFyc2VkX3N1YmRpciA9IHVybHBhcnNlKGNvbnRleHQuYXJ0aWZhY3RfcGF0aCkKICAgICAgICBpZiBwYXJzZWRfc3ViZGlyLnNjaGVtZSA9PSAnczMnOgogICAgICAgICAgICBzdWJkaXIgPSBvcy5wYXRoLmpvaW4oY29udGV4dC5hcnRpZmFjdF9wYXRoLCBzdWJkaXIpCiAgICAgICAgZWxpZiBwYXJzZWRfc3ViZGlyLnNjaGVtZSA9PSAndjNpbyc6CiAgICAgICAgICAgIHYzaW9fc3ViZGlyID0gb3MucGF0aC5qb2luKGNvbnRleHQuYXJ0aWZhY3RfcGF0aCwgc3ViZGlyKSAjIFVzaW5nIHYzaW9fc3ViZGlyIGZvciBsb2dnaW5nCiAgICAgICAgICAgIHN1YmRpciA9ICcvdjNpbycgKyBwYXJzZWRfc3ViZGlyLnBhdGggKyAnLycgKyBzdWJkaXIKICAgICAgICAgICAgY29udGV4dC5sb2dnZXIuaW5mbyhmJ1VzaW5nIHYzaW8gc2NoZW1lLCBleHRyYWN0aW5nIHRvIHtzdWJkaXJ9JykKICAgICAgICBlbHNlOgogICAgICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYnVW5yZWNvZ25pemFibGUgc2NoZW1lLCBleHRyYWN0aW5nIHRvIHtzdWJkaXJ9JykKICAgICAgICAgICAgCiAgICAjIFdoZW4gd29ya2luZyBvbiBDRSwgdGFyZ2V0IHBhdGggbWlnaHQgYmUgb24gczMKICAgIGlmICdzMycgaW4gKHRhcmdldF9wYXRoIG9yIHN1YmRpcik6CiAgICAgICAgY29udGV4dC5sb2dnZXIuaW5mbyhmJ1VzaW5nIHMzIHNjaGVtZSwgZXh0cmFjdGluZyB0byB7dGFyZ2V0X3BhdGggb3Igc3ViZGlyfScpCiAgICAgICAgaWYgb3MuZW52aXJvbi5nZXQoJ1MzX0VORFBPSU5UX1VSTCcpOgogICAgICAgICAgICBjbGllbnQgPSBib3RvMy5jbGllbnQoJ3MzJywgZW5kcG9pbnRfdXJsID0gb3MuZW52aXJvbi5nZXQoJ1MzX0VORFBPSU5UX1VSTCcpKSAKICAgICAgICBlbHNlOgogICAgICAgICAgICBjbGllbnQgPSBib3RvMy5jbGllbnQoJ3MzJykgIAogICAgICAgICAgICAKICAgICAgICBpZiBhcmNoaXZlX3VybC5lbmRzd2l0aCgiZ3oiKToKICAgICAgICAgICAgd2l0aCB0YXJmaWxlLm9wZW4oYXJjaGl2ZV91cmwsIG1vZGU9InJ8Z3oiKSBhcyByZWY6CiAgICAgICAgICAgICAgICBmb3IgbWVtYmVyIGluIHJlZi5nZXRtZW1iZXJzKCk6CiAgICAgICAgICAgICAgICAgICAgZGF0YT1yZWYuZXh0cmFjdGZpbGUobWVtYmVyPW1lbWJlcikucmVhZCgpCiAgICAgICAgICAgICAgICAgICAgY2xpZW50LnB1dF9vYmplY3QoQm9keT1kYXRhLCBCdWNrZXQ9dXJscGFyc2UodGFyZ2V0X3BhdGggb3Igc3ViZGlyKS5uZXRsb2MsIEtleT1mJ3t1cmxwYXJzZSh0YXJnZXRfcGF0aCBvciBzdWJkaXIpLnBhdGhbMTpdfXttZW1iZXIubmFtZX0nKQoKICAgICAgICBlbGlmIGFyY2hpdmVfdXJsLmVuZHN3aXRoKCJ6aXAiKToKICAgICAgICAgICAgd2l0aCB6aXBmaWxlLlppcEZpbGUoYXJjaGl2ZV91cmwsICJyIikgYXMgcmVmOgogICAgICAgICAgICAgICAgZm9yIGZpbGVuYW1lIGluIHJlZi5uYW1lbGlzdCgpOgogICAgICAgICAgICAgICAgICAgIGRhdGE9cmVmLnJlYWQoZmlsZW5hbWUpCiAgICAgICAgICAgICAgICAgICAgY2xpZW50LnB1dF9vYmplY3QoQm9keT1kYXRhLCBCdWNrZXQ9dXJscGFyc2UodGFyZ2V0X3BhdGggb3Igc3ViZGlyKS5uZXRsb2MsIEtleT1mJ3t1cmxwYXJzZSh0YXJnZXRfcGF0aCBvciBzdWJkaXIpLnBhdGhbMTpdfXtmaWxlbmFtZX0nKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIHJhaXNlIFZhbHVlRXJyb3IoZiJ1bnN1cHBvcnRlZCBhcmNoaXZlIHR5cGUgaW4ge2FyY2hpdmVfdXJsfSIpCiAgICAKICAgIGVsc2U6CiAgICAgICAgb3MubWFrZWRpcnModGFyZ2V0X3BhdGggb3Igc3ViZGlyLCBleGlzdF9vaz1UcnVlKQogICAgICAgIGlmIGFyY2hpdmVfdXJsLmVuZHN3aXRoKCJneiIpOgogICAgICAgICAgICB3aXRoIHRhcmZpbGUub3BlbihhcmNoaXZlX3VybCwgbW9kZT0icjpneiIpIGFzIHJlZjoKICAgICAgICAgICAgICAgICMgVmFsaWRhdGUgdGhhdCB0aGVyZSBpcyBubyBwYXRoIHRyYXZlcnNhbCBpbiB0aGUgYXJjaGl2ZQogICAgICAgICAgICAgICAgZm9yIGVudHJ5IGluIHJlZi5nZXRtZW1iZXJzKCk6CiAgICAgICAgICAgICAgICAgICAgaWYgb3MucGF0aC5pc2FicyhlbnRyeS5uYW1lKSBvciAiLi4iIGluIGVudHJ5Lm5hbWU6CiAgICAgICAgICAgICAgICAgICAgICAgIHJhaXNlIFZhbHVlRXJyb3IoZiJJbGxlZ2FsIHRhciBhcmNoaXZlIGVudHJ5OiB7ZW50cnkubmFtZX0iKQogICAgICAgICAgICAgICAgcmVmLmV4dHJhY3RhbGwodGFyZ2V0X3BhdGggb3Igc3ViZGlyKQogICAgICAgIGVsaWYgYXJjaGl2ZV91cmwuZW5kc3dpdGgoInppcCIpOgogICAgICAgICAgICB3aXRoIHppcGZpbGUuWmlwRmlsZShhcmNoaXZlX3VybCwgInIiKSBhcyByZWY6CiAgICAgICAgICAgICAgICAjIFZhbGlkYXRlIHRoYXQgdGhlcmUgaXMgbm8gcGF0aCB0cmF2ZXJzYWwgaW4gdGhlIGFyY2hpdmUKICAgICAgICAgICAgICAgIGZvciBlbnRyeSBpbiByZWYubmFtZWxpc3QoKToKICAgICAgICAgICAgICAgICAgICBpZiBvcy5wYXRoLmlzYWJzKGVudHJ5KSBvciAiLi4iIGluIGVudHJ5OgogICAgICAgICAgICAgICAgICAgICAgICByYWlzZSBWYWx1ZUVycm9yKGYiSWxsZWdhbCB6aXAgYXJjaGl2ZSBlbnRyeToge2VudHJ5fSIpCiAgICAgICAgICAgICAgICByZWYuZXh0cmFjdGFsbCh0YXJnZXRfcGF0aCBvciBzdWJkaXIpCiAgICAgICAgZWxzZToKICAgICAgICAgICAgcmFpc2UgVmFsdWVFcnJvcihmInVuc3VwcG9ydGVkIGFyY2hpdmUgdHlwZSBpbiB7YXJjaGl2ZV91cmx9IikKICAgICAgICAgICAgCiAgICBpZiB2M2lvX3N1YmRpcjoKICAgICAgICBzdWJkaXIgPSB2M2lvX3N1YmRpcgogICAgICAgIAogICAgY29udGV4dC5sb2dnZXIuaW5mbyhmJ0xvZ2dpbmcgYXJ0aWZhY3QgdG8geyh0YXJnZXRfcGF0aCBvciBzdWJkaXIpfScpCiAgICBjb250ZXh0LmxvZ19hcnRpZmFjdChEaXJBcnRpZmFjdChrZXk9a2V5LCB0YXJnZXRfcGF0aD0odGFyZ2V0X3BhdGggb3Igc3ViZGlyKSkp
entry_points:
open_archive:
lineno: 27
has_varargs: false
doc: Open a file/object archive into a target directory. Currently, supports
zip and tar.gz.
name: open_archive
doc: 'Open a file/object archive into a target directory

Currently supports zip and tar.gz'
has_kwargs: false
parameters:
- name: context
type: MLClientCtx
doc: function execution context
default: ''
- name: archive_url
type: DataItem
doc: 'url of archive file '
default: ''
- name: subdir
type: str
doc: path within artifact store where extracted files are stored
doc: path within artifact store where extracted files are stored, default
is "/content"
default: content/
- name: key
type: str
Expand All @@ -44,23 +34,12 @@ spec:
type: str
doc: file system path to store extracted files
default: null
outputs:
- default: ''
lineno: 31
description: Open a file/object archive into a target directory
default_handler: open_archive
image: mlrun/mlrun
command: ''
disable_auto_mount: false
env: []
resources:
requests:
memory: 1Mi
cpu: 25m
limits:
memory: 20Gi
cpu: '2'
priority_class_name: igz-workload-medium
preemption_mode: prevent
affinity: null
tolerations: null
security_context: {}
verbose: false
metadata:
categories:
- data-preparation
tag: ''
name: open-archive
kind: job
4 changes: 2 additions & 2 deletions open_archive/item.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ labels:
author: yaronh
maintainers: []
marketplaceType: ''
mlrunVersion: 1.1.0
mlrunVersion: 1.8.0-rc50
name: open-archive
platformVersion: 3.5.0
spec:
Expand All @@ -21,4 +21,4 @@ spec:
kind: job
requirements: []
url: ''
version: 1.1.0
version: 1.2.0
32 changes: 18 additions & 14 deletions open_archive/open_archive.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2019 Iguazio
# Copyright 2025 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -12,19 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Generated by nuclio.export.NuclioExporter

import os
import zipfile
import urllib.request
import tarfile
import json

from mlrun.execution import MLClientCtx
from mlrun.datastore import DataItem
from mlrun.artifacts.base import DirArtifact

from typing import Union
import boto3
from urllib.parse import urlparse

Expand All @@ -35,16 +31,16 @@ def open_archive(
key: str = "content",
target_path: str = None,
):
"""Open a file/object archive into a target directory
Currently supports zip and tar.gz
"""Open a file/object archive into a target directory. Currently, supports zip and tar.gz.

:param context: function execution context
:param archive_url: url of archive file
:param subdir: path within artifact store where extracted files
are stored
:param subdir: path within artifact store where extracted files are stored, default is "/content"
:param key: key of archive contents in artifact store
:param target_path: file system path to store extracted files
"""


# Resolves the archive locally
archive_url = archive_url.local()
v3io_subdir = None
# When custom artifact path is defined
Expand All @@ -69,9 +65,9 @@ def open_archive(

if archive_url.endswith("gz"):
with tarfile.open(archive_url, mode="r|gz") as ref:
for filename in ref.namelist():
data=ref.read(filename)
client.put_object(Body=data, Bucket=urlparse(target_path or subdir).netloc, Key=f'{urlparse(target_path or subdir).path[1:]}{filename}')
for member in ref.getmembers():
data=ref.extractfile(member=member).read()
client.put_object(Body=data, Bucket=urlparse(target_path or subdir).netloc, Key=f'{urlparse(target_path or subdir).path[1:]}{member.name}')

elif archive_url.endswith("zip"):
with zipfile.ZipFile(archive_url, "r") as ref:
Expand All @@ -84,10 +80,18 @@ def open_archive(
else:
os.makedirs(target_path or subdir, exist_ok=True)
if archive_url.endswith("gz"):
with tarfile.open(archive_url, mode="r|gz") as ref:
with tarfile.open(archive_url, mode="r:gz") as ref:
# Validate that there is no path traversal in the archive
for entry in ref.getmembers():
if os.path.isabs(entry.name) or ".." in entry.name:
raise ValueError(f"Illegal tar archive entry: {entry.name}")
ref.extractall(target_path or subdir)
elif archive_url.endswith("zip"):
with zipfile.ZipFile(archive_url, "r") as ref:
# Validate that there is no path traversal in the archive
for entry in ref.namelist():
if os.path.isabs(entry) or ".." in entry:
raise ValueError(f"Illegal zip archive entry: {entry}")
ref.extractall(target_path or subdir)
else:
raise ValueError(f"unsupported archive type in {archive_url}")
Expand Down
5 changes: 2 additions & 3 deletions open_archive/test_open_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_open_archive():
kind="local",
)
fn.spec.command = "open_archive.py"
run = fn.run(inputs={'archive_url': ARCHIVE_URL},
fn.run(inputs={'archive_url': ARCHIVE_URL},
params={'key': 'test_archive', 'target_path': os.getcwd() + '/content/'},
local=True)

Expand All @@ -50,6 +50,5 @@ def test_open_archive_import_function():
run = fn.run(inputs={'archive_url': ARCHIVE_URL},
params={'key': 'test_archive', 'target_path': os.getcwd() + '/content/'},
local=True)

assert (run.artifact('test_archive'))
assert (run.status.artifact_uris["test_archive"])
_delete_outputs({'artifacts', 'runs', 'schedules', 'content'})
Loading