From 8992861a7bfd25102b98a5f7f42c58bde54b716e Mon Sep 17 00:00:00 2001 From: Felix Date: Sun, 6 Nov 2016 14:41:50 +0100 Subject: [PATCH 1/2] Added the ability to create and modify fields, including aliases and calculated fields --- tableaudocumentapi/datasource.py | 86 +++++++++++- tableaudocumentapi/field.py | 175 ++++++++++++++++++++++++- test/assets/.gitignore | 1 + test/assets/field_change_test.tds | 103 +++++++++++++++ test/test_field_change.py | 208 ++++++++++++++++++++++++++++++ 5 files changed, 568 insertions(+), 5 deletions(-) create mode 100644 test/assets/.gitignore create mode 100644 test/assets/field_change_test.tds create mode 100644 test/test_field_change.py diff --git a/tableaudocumentapi/datasource.py b/tableaudocumentapi/datasource.py index a34cba5..1e7f64f 100644 --- a/tableaudocumentapi/datasource.py +++ b/tableaudocumentapi/datasource.py @@ -240,9 +240,12 @@ def clear_repository_location(self): @property def fields(self): if not self._fields: - self._fields = self._get_all_fields() + self._refresh_fields() return self._fields + def _refresh_fields(self): + self._fields = self._get_all_fields() + def _get_all_fields(self): column_field_objects = self._get_column_objects() existing_column_fields = [x.id for x in column_field_objects] @@ -258,3 +261,84 @@ def _get_metadata_objects(self): def _get_column_objects(self): return [_column_object_from_column_xml(self._datasourceTree, xml) for xml in self._datasourceTree.findall('.//column')] + + def add_field(self, name, datatype, role, type, caption): + """ Adds a base field object with the given values. + + Args: + name: Name of the new Field. String. + datatype: Datatype of the new field. String. + role: Role of the new field. String. + type: Type of the new field. String. + caption: Caption of the new field. String. + + Returns: + The new field that was created. Field. + """ + # TODO: A better approach would be to create an empty column and then + # use the input validation from its "Field"-object-representation to set values. + # However, creating an empty column causes errors :( + + # If no caption is specified, create one with the same format Tableau does + if not caption: + caption = name.replace('[', '').replace(']', '').title() + + # Create the elements + column = ET.Element('column') + column.set('caption', caption) + column.set('datatype', datatype) + column.set('role', role) + column.set('type', type) + column.set('name', name) + + self._datasourceTree.getroot().append(column) + + # Refresh fields to reflect changes and return the Field object + self._refresh_fields() + return self.fields[name] + + def remove_field(self, field): + """ Remove a given field + + Args: + field: The field to remove. ET.Element + + Returns: + None + """ + if not field or not isinstance(field, Field): + raise ValueError("Need to supply a field to remove element") + + self._datasourceTree.getroot().remove(field.xml) + self._refresh_fields() + + ########### + # Calculations + ########### + @property + def calculations(self): + """ Returns all calculated fields. + """ + # TODO: There is a default [Number of Records] calculation. + # Should this be excluded so users can't meddle with it? + return {k: v for k, v in self.fields.items() if v.calculation is not None} + + def add_calculation(self, caption, formula, datatype, role, type): + """ Adds a calculated field with the given values. + + Args: + caption: Caption of the new calculation. String. + formula: Formula of the new calculation. String. + datatype: Datatype of the new calculation. String. + role: Role of the new calculation. String. + type: Type of the new calculation. String. + + Returns: + The new calculated field that was created. Field. + """ + # Dynamically create the name of the field + name = '[Calculation_{}]'.format(str(uuid4().int)[:18]) + field = self.add_field(name, datatype, role, type, caption) + field.calculation = formula + + return field diff --git a/tableaudocumentapi/field.py b/tableaudocumentapi/field.py index 65ce78d..e924e72 100644 --- a/tableaudocumentapi/field.py +++ b/tableaudocumentapi/field.py @@ -1,6 +1,6 @@ import functools import xml.etree.ElementTree as ET - +from functools import wraps _ATTRIBUTES = [ 'id', # Name of the field as specified in the file, usually surrounded by [ ] @@ -24,6 +24,21 @@ ] +def argument_is_one_of(*allowed_values): + def property_type_decorator(func): + @wraps(func) + def wrapper(self, value): + if value not in allowed_values: + error = "Invalid argument: {0}. {1} must be one of {2}." + msg = error.format(value, func.__name__, allowed_values) + raise ValueError(error) + return func(self, value) + + return wrapper + + return property_type_decorator + + def _find_metadata_record(record, attrib): element = record.find('.//{}'.format(attrib)) if element is None: @@ -43,14 +58,18 @@ def __init__(self, column_xml=None, metadata_xml=None): setattr(self, '_{}'.format(attrib), None) self._worksheets = set() + self._xml = None + if column_xml is not None: self._initialize_from_column_xml(column_xml) + self._xml = column_xml # This isn't currently never called because of the way we get the data from the xml, # but during the refactor, we might need it. This is commented out as a reminder # if metadata_xml is not None: # self.apply_metadata(metadata_xml) elif metadata_xml is not None: + self._xml = metadata_xml self._initialize_from_metadata_xml(metadata_xml) else: @@ -116,52 +135,200 @@ def id(self): """ Name of the field as specified in the file, usually surrounded by [ ] """ return self._id + @property + def xml(self): + """ XML representation of the field. """ + return self._xml + + ######################################## + # Attribute getters and setters + ######################################## + @property def caption(self): """ Name of the field as displayed in Tableau unless an aliases is defined """ return self._caption + @caption.setter + def caption(self, caption): + """ Set the caption of a field + + Args: + caption: New caption. String. + + Returns: + Nothing. + """ + self._caption = caption + self._xml.set('caption', caption) + @property def alias(self): """ Name of the field as displayed in Tableau if the default name isn't wanted """ return self._alias + @alias.setter + def alias(self, alias): + """ Set the alias of a field + + Args: + alias: New alias. String. + + Returns: + Nothing. + """ + self._alias = alias + self._xml.set('alias', alias) + @property def datatype(self): """ Type of the field within Tableau (string, integer, etc) """ return self._datatype + @datatype.setter + @argument_is_one_of('string', 'integer', 'date', 'boolean') + def datatype(self, datatype): + """ Set the datatype of a field + + Args: + datatype: New datatype. String. + + Returns: + Nothing. + """ + self._datatype = datatype + self._xml.set('datatype', datatype) + @property def role(self): """ Dimension or Measure """ return self._role + @role.setter + @argument_is_one_of('dimension', 'measure') + def role(self, role): + """ Set the role of a field + + Args: + role: New role. String. + + Returns: + Nothing. + """ + self._role = role + self._xml.set('role', role) + + @property + def type(self): + """ Dimension or Measure """ + return self._type + + @type.setter + @argument_is_one_of('quantitative', 'ordinal', 'nominal') + def type(self, type): + """ Set the type of a field + + Args: + type: New type. String. + + Returns: + Nothing. + """ + self._type = type + self._xml.set('type', type) + + ######################################## + # Aliases getter and setter + # Those are NOT the 'alias' field of the column, + # but instead the key-value aliases in its child elements + ######################################## + + def add_alias(self, key, value): + """ Add an alias for a given display value. + + Args: + key: The data value to map. Example: "1". String. + value: The display value for the key. Example: "True". String. + Returns: + Nothing. + """ + + # determine whether there already is an aliases-tag + aliases = self._xml.find('aliases') + # and create it if there isn't + if not aliases: + aliases = ET.Element('aliases') + self._xml.append(aliases) + + # find out if an alias with this key already exists and use it + existing_alias = [tag for tag in aliases.findall('alias') if tag.get('key') == key] + # if not, create a new ET.Element + alias = existing_alias[0] if existing_alias else ET.Element('alias') + + alias.set('key', key) + alias.set('value', value) + if not existing_alias: + aliases.append(alias) + + @property + def aliases(self): + """ Returns all aliases that are registered under this field. + + Returns: + Key-value mappings of all registered aliases. Dict. + """ + aliases_tag = self._xml.find('aliases') or [] + return {a.get('key', 'None'): a.get('value', 'None') for a in list(aliases_tag)} + + ######################################## + # Attribute getters + ######################################## + @property def is_quantitative(self): """ A dependent value, usually a measure of something e.g. Profit, Gross Sales """ - return self._type == 'quantitative' + return self.type == 'quantitative' @property def is_ordinal(self): """ Is this field a categorical field that has a specific order e.g. How do you feel? 1 - awful, 2 - ok, 3 - fantastic """ - return self._type == 'ordinal' + return self.type == 'ordinal' @property def is_nominal(self): """ Is this field a categorical field that does not have a specific order e.g. What color is your hair? """ - return self._type == 'nominal' + return self.type == 'nominal' @property def calculation(self): """ If this field is a calculated field, this will be the formula """ return self._calculation + @calculation.setter + def calculation(self, new_calculation): + """ Set the calculation of a calculated field. + + Args: + new_calculation: The new calculation/formula of the field. String. + """ + if self.calculation is None: + calculation = ET.Element('calculation') + calculation.set('class', 'tableau') + calculation.set('formula', new_calculation) + # Append the elements to the respective structure + self._xml.append(calculation) + + else: + self._xml.find('calculation').set('formula', new_calculation) + + self._calculation = new_calculation + @property def default_aggregation(self): """ The default type of aggregation on the field (e.g Sum, Avg)""" diff --git a/test/assets/.gitignore b/test/assets/.gitignore new file mode 100644 index 0000000..6aa8fd1 --- /dev/null +++ b/test/assets/.gitignore @@ -0,0 +1 @@ +field_change_test_output.tds diff --git a/test/assets/field_change_test.tds b/test/assets/field_change_test.tds new file mode 100644 index 0000000..dba389c --- /dev/null +++ b/test/assets/field_change_test.tds @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + name + 130 + [name] + [my_data] + name + 1 + string + Count + 8190 + true + true + + + "SQL_WLONGVARCHAR" + "SQL_C_WCHAR" + + + + typ + 130 + [typ] + [my_data] + typ + 2 + string + Count + 8190 + true + true + + + "SQL_WLONGVARCHAR" + "SQL_C_WCHAR" + + + + amount + 3 + [amount] + [my_data] + amount + 3 + integer + Sum + 10 + true + + "SQL_INTEGER" + "SQL_C_SLONG" + + + + price + 5 + [price] + [my_data] + price + 4 + real + Sum + 17 + true + + "SQL_FLOAT" + "SQL_C_DOUBLE" + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/test_field_change.py b/test/test_field_change.py new file mode 100644 index 0000000..6acd7b7 --- /dev/null +++ b/test/test_field_change.py @@ -0,0 +1,208 @@ +import unittest +import os.path + +from tableaudocumentapi import Datasource +import xml.etree.ElementTree as ET + + +TEST_ASSET_DIR = os.path.join( + os.path.dirname(__file__), + 'assets' +) +TEST_TDS_FILE = os.path.join( + TEST_ASSET_DIR, + 'field_change_test.tds' +) +TEST_TDS_FILE_OUTPUT = os.path.join( + TEST_ASSET_DIR, + 'field_change_test_output.tds' +) + +MESSAGES = { + 'test_change_values1': 'Value has not changed when altering values for {}.', + 'test_change_values2': 'XML-Structure has not changed when altering values for {}.', + 'test_change_valuesFail1': 'Value has changed when submitting the wrong value for {}.', + 'test_change_valuesFail2': 'XML-Structure has changed when submitting the wrong value for {}.', + 'test_change_aliases1': 'XML-Structure has not changed when altering aliases for {}.', + 'test_change_aliases2': 'Values have not changed when altering aliases for {}.' + +} + +NEW_VALUES = { + 'caption': 'testcaption', + 'alias': 'testalias', + 'datatype': 'boolean', + 'role': 'measure', + 'type': 'ordinal' +} + +WRONG_VALUES = { + 'datatype': 'boolani', + 'role': 'messhure', + 'type': 'gordinol' +} + +ALIASES = { + 'one': 'two', + 'three': 'four', + 'five': 'six' +} + + +class TestFieldChange(unittest.TestCase): + + def setUp(self): + self.tds = Datasource.from_file(TEST_TDS_FILE) + + def current_hash(self): + """ Return a hash of the current state of the XML. + + Allows us to easily identify whether the underlying XML-structure + of a TDS-file has actually changed. Avoids false positives if, + for example, a fields value has changed but the XML hasn't. + """ + return hash(ET.tostring(self.tds._datasourceTree.getroot())) + + def test_change_values(self): + """ Test if the value changes of a field are reflected in the object and in the underlying XML structure. + """ + field_to_test = "[amount]" + state = self.current_hash() + # change all fields + for key, value in NEW_VALUES.items(): + setattr(self.tds.fields[field_to_test], key, value) + # the new value must be reflected in the object + self.assertEqual( + getattr(self.tds.fields[field_to_test], key), + value, + msg=MESSAGES['test_change_values1'].format(key) + ) + # the new value must be reflected in the xml + new_state = self.current_hash() + self.assertNotEqual( + state, + new_state, + msg=MESSAGES['test_change_values2'].format(key) + ) + state = new_state + + def test_change_values_fail(self): + """ Test if the value changes of a field are rejected if the wrong arguments are passed. + """ + field_to_test = "[amount]" + state = self.current_hash() + # change all fields + for key, value in WRONG_VALUES.items(): + + with self.assertRaises(ValueError): + # this must fail + setattr(self.tds.fields[field_to_test], key, value) + + # the new value must NOT be reflected in the object + self.assertNotEqual( + getattr(self.tds.fields[field_to_test], key), + value, + msg=MESSAGES['test_change_valuesFail1'].format(key) + ) + + # the new value must NOT be reflected in the xml + new_state = self.current_hash() + self.assertEqual( + state, + new_state, + msg=MESSAGES['test_change_valuesFail2'].format(key) + ) + state = new_state + + def test_remove_field(self): + """ Test if a Field can be removed. + """ + field_to_test = "[amount]" + state = self.current_hash() + # change all fields + field = self.tds.fields["[amount]"] + self.tds.remove_field(field) + self.assertNotEqual(state, self.current_hash()) + + def test_change_aliases(self): + """ Test if the alias changes of a field are reflected in the object and in the underlying XML structure. + """ + field_to_test = "[amount]" + state = self.current_hash() + # change all fields + for key, value in ALIASES.items(): + self.tds.fields[field_to_test].add_alias(key, value) + # the new value must be reflected in the xml + new_state = self.current_hash() + self.assertNotEqual( + state, + new_state, + msg=MESSAGES['test_change_aliases1'].format(field_to_test) + ) + state = new_state + + # check whether all fields of ALIASES have been applied + self.assertEqual( + set(self.tds.fields[field_to_test].aliases), + set(ALIASES), + msg=MESSAGES['test_change_aliases2'].format(field_to_test) + ) + + def test_calculation_base(self): + """ Test if the initial state of calculated fields is correct. + """ + # Demo data has 2 calculated fields at the start + original_len = len(self.tds.calculations) + + # Can't write to calculation for not-calculated fields! + self.tds.fields['[name]'].calculation = '1 * 2' + self.assertEqual(len(self.tds.calculations), original_len + 1) + self.tds.fields['[name]'].calculation = '2 * 3' + self.assertEqual(len(self.tds.calculations), original_len + 1) + self.tds.fields['[price]'].calculation = '2 * 3' + self.assertEqual(len(self.tds.calculations), original_len + 2) + + def test_calculation_change(self): + """ Test whether changing calculations of a field works. + """ + state = self.current_hash() + new_calc = '33 * 44' + fld_name = '[Calculation_357754699576291328]' + self.tds.calculations[fld_name].calculation = new_calc + + # Check object representation + self.assertEqual(self.tds.calculations[fld_name].calculation, new_calc) + + # Check XML representation + new_state = self.current_hash() + self.assertNotEqual(state, new_state) + + def test_calculation_new(self): + """ Test if creating a new calculation works. + """ + args = 'TestCalc', '12*34', 'integer', 'measure', 'quantitative' + original_len = len(self.tds.calculations) + self.tds.add_calculation(*args) + self.assertEqual(len(self.tds.calculations), original_len + 1) + + def test_calculation_remove(self): + """ Test if deleting a calculation works. + """ + args = 'TestCalc2', '12*34', 'integer', 'measure', 'quantitative' + + original_len = len(self.tds.calculations) + calc = self.tds.add_calculation(*args) + self.assertEqual(len(self.tds.calculations), original_len + 1) + + self.tds.remove_field(calc) + self.assertEqual(len(self.tds.calculations), original_len) + + def tearDown(self): + """ Test if the file can be saved. + Output file will be ignored by git, but can be used to verify the results. + """ + self.tds.save_as(TEST_TDS_FILE_OUTPUT) + + +if __name__ == '__main__': + unittest.main() From 7595f4294598453adafb3db8b5d79761400ada45 Mon Sep 17 00:00:00 2001 From: Felix Eggert Date: Fri, 4 Nov 2016 16:12:48 +0100 Subject: [PATCH 2/2] first version of metadata creation --- tableaudocumentapi/datasource.py | 82 ++++++++++++++++++++++++ tableaudocumentapi/metadata_structure.py | 72 +++++++++++++++++++++ test/test_metadata.py | 51 +++++++++++++++ 3 files changed, 205 insertions(+) create mode 100644 tableaudocumentapi/metadata_structure.py create mode 100644 test/test_metadata.py diff --git a/tableaudocumentapi/datasource.py b/tableaudocumentapi/datasource.py index 1e7f64f..b1dc077 100644 --- a/tableaudocumentapi/datasource.py +++ b/tableaudocumentapi/datasource.py @@ -13,6 +13,7 @@ from tableaudocumentapi import Field from tableaudocumentapi.multilookup_dict import MultiLookupDict from tableaudocumentapi.xfile import xml_open +from tableaudocumentapi import metadata_structure ######## # This is needed in order to determine if something is a string or not. It is necessary because @@ -312,6 +313,87 @@ def remove_field(self, field): self._datasourceTree.getroot().remove(field.xml) self._refresh_fields() + def add_metadata_record(self, name, parent, datatype): + """ This function creates and appends a full metadata-record column. + + This function depends on the datatype-to-values information from metadata_structure.py. + + TODO: This function is huge and compicated. Rework this ASAP! + Maybe create a own file for metadata-fields? + """ + # get first part of values needed for a meta-record + db_class = self.connections[0].dbclass + defaults = getattr(metadata_structure, db_class, None) + + # TODO Are those mappings valid for other databases? + # If not: Create default mappings for other databases + if not defaults: + msg = "No default mappings are available for {}-databases.".format(db_class) + raise NotImplementedError(msg) + + # get second part of values needed for a meta-record + passed_values = { + 'remote-name': name, + 'local-name': '[{}]'.format(name), + 'parent-name': '[{}]'.format(parent), + 'remote-alias': name, + 'local-type': datatype, + } + + # merge them + record_attributes = dict(itertools.chain(defaults[datatype].items(), passed_values.items())) + + # determine the value of "ordinal" for new record + ordinals = self._datasourceTree.findall('.//*/metadata-record/ordinal') + max_nr = max([int(x.text) for x in ordinals] + [0]) + record_attributes['ordinal'] = str(max_nr + 1) + + # create base record + record = ET.Element('metadata-record') + record.set('class', 'column') + + # add all sub-elements of the metadata-record field + for key, value in record_attributes.items(): + if key == '__extra__': + # Those need special treatment + continue + elem = ET.Element(key) + elem.text = value + record.append(elem) + + # add the 'collation' sub-elements if the metadata_structure provides them + collation_info = record_attributes.get('__extra__', {}).get('collation', {}) + if collation_info: + collation = ET.Element('collation') + collation.set('flag', collation_info['flag']) + collation.set('name', collation_info['name']) + record.append(collation) + + # add the 'attribute' sub-elements if the metadata_structure provides them + attribute_info = record_attributes.get('__extra__', {}).get('attributes', {}) + if attribute_info: + + attrs = ET.Element('attributes') + + attr1 = ET.Element('attribute') + attr1.set('datatype', 'string') + attr1.set('name', 'DebugRemoteType') + attr1.text = attribute_info['DebugRemoteType'] + attrs.append(attr1) + + attr2 = ET.Element('attribute') + attr2.set('datatype', 'string') + attr2.set('name', 'DebugWireType') + attr2.text = attribute_info['DebugWireType'] + attrs.append(attr2) + + record.append(attrs) + + # append to the base xml + base = self._datasourceTree.find('.//metadata-records') + base.append(record) + self._refresh_fields() + ########### # Calculations ########### diff --git a/tableaudocumentapi/metadata_structure.py b/tableaudocumentapi/metadata_structure.py new file mode 100644 index 0000000..f540bcd --- /dev/null +++ b/tableaudocumentapi/metadata_structure.py @@ -0,0 +1,72 @@ +# This file contains local-typ to value mappings for metadata-records +# TODO: This might or might not be database-specific. +postgres = { + 'integer': { + 'local-type': 'integer', + 'remote-type': '3', + 'aggregation': 'Sum', + 'precision': '10', + 'contains-null': 'true', + '__extra__': { + 'attributes': { + 'DebugRemoteType': 'SQL_INTEGER', + 'DebugWireType': 'SQL_C_SLONG' + } + } + }, + 'string': { + 'local-type': 'string', + 'remote-type': '130', + 'aggregation': 'Count', + 'cast-to-local-type': 'true', + 'width': '8190', + 'contains-null': 'true', + '__extra__': { + 'collation': { + 'flag': '0', + 'name': 'LEN_RUS' + }, + 'attributes': { + 'DebugRemoteType': 'SQL_WLONGVARCHAR', + 'DebugWireType': 'SQL_C_WCHAR' + } + } + }, + 'boolean': { + 'local-type': 'boolean', + 'remote-type': '11', + 'aggregation': 'Count', + 'contains-null': 'true', + '__extra__': { + 'attributes': { + 'DebugRemoteType': 'SQL_BIT', + 'DebugWireType': 'SQL_C_BIT' + } + } + }, + 'date': { + 'local-type': 'date', + 'remote-type': '7', + 'aggregation': 'Year', + 'contains-null': 'true', + '__extra__': { + 'attributes': { + 'DebugRemoteType': 'SQL_TYPE_DATE', + 'DebugWireType': 'SQL_C_TYPE_DATE' + } + } + }, + 'real': { + 'local-type': 'real', + 'remote-type': '131', + 'aggregation': 'Sum', + 'precision': '28', + 'contains-null': 'true', + '__extra__': { + 'attributes': { + 'DebugRemoteType': 'SQL_NUMERIC', + 'DebugWireType': 'SQL_C_NUMERIC' + } + } + } +} diff --git a/test/test_metadata.py b/test/test_metadata.py new file mode 100644 index 0000000..4c0284b --- /dev/null +++ b/test/test_metadata.py @@ -0,0 +1,51 @@ +import unittest +import os.path + +from tableaudocumentapi import Datasource +import xml.etree.ElementTree as ET + + +TEST_ASSET_DIR = os.path.join( + os.path.dirname(__file__), + 'assets' +) +TEST_TDS_FILE = os.path.join( + TEST_ASSET_DIR, + 'field_change_test.tds' +) +TEST_TDS_FILE_OUTPUT = os.path.join( + TEST_ASSET_DIR, + 'field_change_test_output.tds' +) + + +class TestFieldChange(unittest.TestCase): + + def setUp(self): + self.tds = Datasource.from_file(TEST_TDS_FILE) + + def current_hash(self): + """ Return a hash of the current state of the XML. + + Allows us to easily identify whether the underlying XML-structure + of a TDS-file has actually changed. Avoids false positives if, + for example, a fields value has changed but the XML hasn't. + """ + return hash(ET.tostring(self.tds._datasourceTree.getroot())) + + def test_metadata_creation(self): + """ Test if a metadata-record column can be created + """ + state = self.current_hash() + self.tds.add_metadata_record("my_new_attr", "my_data", "string") + self.assertNotEqual(state, self.current_hash()) + + def tearDown(self): + """ Test if the file can be saved. + Output file will be ignored by git, but can be used to verify the results. + """ + self.tds.save_as(TEST_TDS_FILE_OUTPUT) + + +if __name__ == '__main__': + unittest.main()