diff --git a/setup.py b/setup.py index 5ef3e85..96d8659 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tableaudocumentapi', - version='0.1.0-dev', + version='0.1.0.dev0', author='Tableau Software', author_email='github@tableau.com', url='https://github.com/tableau/document-api-python', diff --git a/tableaudocumentapi/__init__.py b/tableaudocumentapi/__init__.py index 2ee7df1..6a10f6f 100644 --- a/tableaudocumentapi/__init__.py +++ b/tableaudocumentapi/__init__.py @@ -1,5 +1,7 @@ +from .field import Field from .connection import Connection from .datasource import Datasource, ConnectionParser from .workbook import Workbook + __version__ = '0.0.1' __VERSION__ = __version__ diff --git a/tableaudocumentapi/datasource.py b/tableaudocumentapi/datasource.py index b4fb8ed..924575d 100644 --- a/tableaudocumentapi/datasource.py +++ b/tableaudocumentapi/datasource.py @@ -3,15 +3,29 @@ # Datasource - A class for writing datasources to Tableau files # ############################################################################### -import os +import collections +import xml.etree.ElementTree as ET +import xml.sax.saxutils as sax import zipfile -import xml.etree.ElementTree as ET from tableaudocumentapi import Connection, xfile +from tableaudocumentapi import Field +from tableaudocumentapi.multilookup_dict import MultiLookupDict -class ConnectionParser(object): +def _mapping_from_xml(root_xml, column_xml): + retval = Field.from_xml(column_xml) + local_name = retval.id + if "'" in local_name: + local_name = sax.escape(local_name, {"'": "'"}) + xpath = ".//metadata-record[@class='column'][local-name='{}']".format(local_name) + metadata_record = root_xml.find(xpath) + if metadata_record is not None: + retval.apply_metadata(metadata_record) + return retval.id, retval + +class ConnectionParser(object): def __init__(self, datasource_xml, version): self._dsxml = datasource_xml self._dsversion = version @@ -55,6 +69,7 @@ def __init__(self, dsxml, filename=None): self._connection_parser = ConnectionParser( self._datasourceXML, version=self._version) self._connections = self._connection_parser.get_connections() + self._fields = None @classmethod def from_file(cls, filename): @@ -115,3 +130,17 @@ def version(self): @property def connections(self): return self._connections + + ########### + # fields + ########### + @property + def fields(self): + if not self._fields: + self._fields = self._get_all_fields() + return self._fields + + def _get_all_fields(self): + column_objects = (_mapping_from_xml(self._datasourceTree, xml) + for xml in self._datasourceTree.findall('.//column')) + return MultiLookupDict({k: v for k, v in column_objects}) diff --git a/tableaudocumentapi/field.py b/tableaudocumentapi/field.py new file mode 100644 index 0000000..1eb68ef --- /dev/null +++ b/tableaudocumentapi/field.py @@ -0,0 +1,89 @@ +import functools + +_ATTRIBUTES = [ + 'id', # Name of the field as specified in the file, usually surrounded by [ ] + 'caption', # Name of the field as displayed in Tableau unless an aliases is defined + 'datatype', # Type of the field within Tableau (string, integer, etc) + 'role', # Dimension or Measure + 'type', # three possible values: quantitative, ordinal, or nominal + 'alias', # Name of the field as displayed in Tableau if the default name isn't wanted + 'calculation', # If this field is a calculated field, this will be the formula +] + +_METADATA_ATTRIBUTES = [ + 'aggregation', # The type of aggregation on the field (e.g Sum, Avg) +] + + +def _find_metadata_record(record, attrib): + element = record.find('.//{}'.format(attrib)) + if element is None: + return None + return element.text + + +class Field(object): + """ Represents a field in a datasource """ + + def __init__(self, xmldata): + for attrib in _ATTRIBUTES: + self._apply_attribute(xmldata, attrib, lambda x: xmldata.attrib.get(x, None)) + + # All metadata attributes begin at None + for attrib in _METADATA_ATTRIBUTES: + setattr(self, '_{}'.format(attrib), None) + + def apply_metadata(self, metadata_record): + for attrib in _METADATA_ATTRIBUTES: + self._apply_attribute(metadata_record, attrib, functools.partial(_find_metadata_record, metadata_record)) + + @classmethod + def from_xml(cls, xmldata): + return cls(xmldata) + + def __getattr__(self, item): + private_name = '_{}'.format(item) + if item in _ATTRIBUTES or item in _METADATA_ATTRIBUTES: + return getattr(self, private_name) + raise AttributeError(item) + + def _apply_attribute(self, xmldata, attrib, default_func): + if hasattr(self, '_read_{}'.format(attrib)): + value = getattr(self, '_read_{}'.format(attrib))(xmldata) + else: + value = default_func(attrib) + + setattr(self, '_{}'.format(attrib), value) + + @property + def name(self): + """ Provides a nice name for the field which is derived from the alias, caption, or the id. + + The name resolves as either the alias if it's defined, or the caption if alias is not defined, + and finally the id which is the underlying name if neither of the fields exist. """ + alias = getattr(self, 'alias', None) + if alias: + return alias + + caption = getattr(self, 'caption', None) + if caption: + return caption + + return self.id + + ###################################### + # Special Case handling methods for reading the values from the XML + ###################################### + @staticmethod + def _read_id(xmldata): + # ID is actually the name of the field, but to provide a nice name, we call this ID + return xmldata.attrib.get('name', None) + + @staticmethod + def _read_calculation(xmldata): + # The formula for a calculation is stored in a child element, so we need to pull it out separately. + calc = xmldata.find('.//calculation') + if calc is None: + return None + + return calc.attrib.get('formula', None) diff --git a/tableaudocumentapi/multilookup_dict.py b/tableaudocumentapi/multilookup_dict.py new file mode 100644 index 0000000..39c92c6 --- /dev/null +++ b/tableaudocumentapi/multilookup_dict.py @@ -0,0 +1,49 @@ +def _resolve_value(key, value): + try: + retval = value.get(key, None) + if retval is None: + retval = value.getattr(key, None) + except AttributeError: + retval = None + return retval + + +def _build_index(key, d): + return {_resolve_value(key, v): k + for k, v in d.items() + if _resolve_value(key, v) is not None} + + +# TODO: Improve this to be more generic +class MultiLookupDict(dict): + def __init__(self, args=None): + if args is None: + args = {} + super(MultiLookupDict, self).__init__(args) + self._indexes = { + 'alias': {}, + 'caption': {} + } + self._populate_indexes() + + def _populate_indexes(self): + self._indexes['alias'] = _build_index('alias', self) + self._indexes['caption'] = _build_index('caption', self) + + def __setitem__(self, key, value): + alias = _resolve_value('alias', value) + caption = _resolve_value('caption', value) + if alias is not None: + self._indexes['alias'][alias] = key + if caption is not None: + self._indexes['caption'][caption] = key + + dict.__setitem__(self, key, value) + + def __getitem__(self, key): + if key in self._indexes['alias']: + key = self._indexes['alias'][key] + elif key in self._indexes['caption']: + key = self._indexes['caption'][key] + + return dict.__getitem__(self, key) diff --git a/test/__init__.py b/test/__init__.py index e69de29..c715da8 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -0,0 +1,2 @@ +from . import bvt +from . import test_datasource diff --git a/test/assets/datasource_test.tds b/test/assets/datasource_test.tds new file mode 100644 index 0000000..227e006 --- /dev/null +++ b/test/assets/datasource_test.tds @@ -0,0 +1,86 @@ + + + + + + + a + 130 + [a] + [xy] + a + 1 + string + Count + 255 + true + + "SQL_WVARCHAR" + "SQL_C_WCHAR" + "true" + + + + Today's Date + 130 + [Today's Date] + [xy] + a + 1 + string + Count + 255 + true + + "SQL_WVARCHAR" + "SQL_C_WCHAR" + "true" + + + + x + 3 + [x] + [xy] + x + 2 + integer + Sum + 10 + true + + "SQL_INTEGER" + "SQL_C_SLONG" + + + + y + 3 + [y] + [xy] + y + 3 + integer + Sum + 10 + true + + "SQL_INTEGER" + "SQL_C_SLONG" + + + + + + + + + + + + + + + + + diff --git a/test/test_datasource.py b/test/test_datasource.py new file mode 100644 index 0000000..da956ee --- /dev/null +++ b/test/test_datasource.py @@ -0,0 +1,29 @@ +import unittest +import os.path +import functools + +from tableaudocumentapi import Datasource + +TEST_TDS_FILE = os.path.join( + os.path.dirname(__file__), + 'assets', + 'datasource_test.tds' +) + + +class DataSourceFields(unittest.TestCase): + def setUp(self): + self.ds = Datasource.from_file(TEST_TDS_FILE) + + def test_datasource_returns_correct_fields(self): + self.assertIsNotNone(self.ds.fields) + self.assertIsNotNone(self.ds.fields.get('[Number of Records]', None)) + + def test_datasource_returns_calculation_from_fields(self): + self.assertEqual('1', self.ds.fields['[Number of Records]'].calculation) + + def test_datasource_uses_metadata_record(self): + self.assertEqual('Sum', self.ds.fields['[x]'].aggregation) + + def test_datasource_column_name_contains_apostrophy(self): + self.assertIsNotNone(self.ds.fields.get("[Today's Date]", None)) diff --git a/test/test_multidict.py b/test/test_multidict.py new file mode 100644 index 0000000..abb01c5 --- /dev/null +++ b/test/test_multidict.py @@ -0,0 +1,47 @@ +import unittest +import os.path +import functools + +from tableaudocumentapi.multilookup_dict import MultiLookupDict + + +class MLDTests(unittest.TestCase): + def setUp(self): + self.mld = MultiLookupDict({ + '[foo]': { + 'alias': 'bar', + 'caption': 'baz', + 'value': 1 + }, + '[bar]': { + 'caption': 'foo', + 'value': 2 + }, + '[baz]': { + 'value': 3 + } + }) + + def test_multilookupdict_name_only(self): + actual = self.mld['[baz]'] + self.assertEqual(3, actual['value']) + + def test_multilookupdict_alias_overrides_everything(self): + actual = self.mld['bar'] + self.assertEqual(1, actual['value']) + + def test_mutlilookupdict_caption_overrides_id(self): + actual = self.mld['foo'] + self.assertEqual(2, actual['value']) + + def test_mutlilookupdict_can_still_find_id_even_with_alias(self): + actual = self.mld['[foo]'] + self.assertEqual(1, actual['value']) + + def test_mutlilookupdict_can_still_find_caption_even_with_alias(self): + actual = self.mld['baz'] + self.assertEqual(1, actual['value']) + + def test_mutlilookupdict_can_still_find_id_even_with_caption(self): + actual = self.mld['[bar]'] + self.assertEqual(2, actual['value'])