diff --git a/.gitignore b/.gitignore index 0b50f47..581cedb 100644 --- a/.gitignore +++ b/.gitignore @@ -65,5 +65,11 @@ target/ .DS_Store .idea +#Editor things +*.sublime-project +*.sublime-workspace +settings.json +tasks.json + #Jekyll docs/_site diff --git a/.travis.yml b/.travis.yml index 32f39d0..454322d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,17 +6,18 @@ python: - "3.3" - "3.4" - "3.5" + - "3.6" - "pypy" # command to install dependencies install: - "pip install -e ." - - "pip install pep8" + - "pip install pycodestyle" # command to run tests script: # Tests - python setup.py test - # pep8 - - pep8 . + # pycodestyle + - pycodestyle tableaudocumentapi test samples # Examples - (cd "samples/replicate-workbook" && python replicate_workbook.py) - (cd "samples/list-tds-info" && python list_tds_info.py) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49783e1..3c5976a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## 06 (11 January 2017) + +* Initial SQL and query banding support (#123) +* Fixed bug in xfiles to allow opening workbooks with external file caches (#117, #118) +* Code Cleanup (#120, #121) +* Added Py36 support (#124) +* Switched to pycodestyle from pip8 on travis runs (#124) + ## 05 (01 November 2016) * Added ability to set the port for connections (#97) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index d79e152..903e57b 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -6,6 +6,7 @@ The following people have contributed to this project to make it possible, and w * [Charley Peng](https://github.com/chid) * [Miguel Sánchez](https://github.com/MiguelSR) +* [Ryan Richmond](https://github.com/r-richmond) ## Core Team diff --git a/docs/docs/api-ref.md b/docs/docs/api-ref.md index 3819b70..cf6bc96 100644 --- a/docs/docs/api-ref.md +++ b/docs/docs/api-ref.md @@ -48,6 +48,30 @@ class Datasource(dsxml, filename=None) class Connection(connxml) ``` +The Connection class represents a tableau data connection. It can be from any type of connection found in `dbclass.py` via `is_valid_dbclass` + +**Params:** + +**Raises:** + +**Methods:** + +**Properities:** + +`self.server:` Returns a string containing the server. + +`self.dbname:` Returns a string containing the database name. + +`self.username:` Returns a string containing the username. + +`self.dbclass:` Returns a string containing the database class. + +`self.port:` Returns a string containing the port. + +`self.query_band:` Returns a string containing the query band. + +`self.initial_sql:` Returns a string containing the initial sql. + ## Fields ```python class Workbook(column_xml=None, metadata_xml=None) diff --git a/setup.py b/setup.py index 82e9517..59270ed 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tableaudocumentapi', - version='0.5', + version='0.6', author='Tableau', author_email='github@tableau.com', url='https://github.com/tableau/document-api-python', diff --git a/tableaudocumentapi/connection.py b/tableaudocumentapi/connection.py index f71daa8..30343b5 100644 --- a/tableaudocumentapi/connection.py +++ b/tableaudocumentapi/connection.py @@ -1,27 +1,14 @@ -############################################################################### -# -# Connection - A class for writing connections to Tableau files -# -############################################################################### import xml.etree.ElementTree as ET from tableaudocumentapi.dbclass import is_valid_dbclass class Connection(object): - """ - A class for writing connections to Tableau files. - - """ - - ########################################################################### - # - # Public API. - # - ########################################################################### + """A class representing connections inside Data Sources.""" def __init__(self, connxml): - """ - Constructor. + """Connection is usually instantiated by passing in connection elements + in a Data Source. If creating a connection from scratch you can call + `from_attributes` passing in the connection attributes. """ self._connectionXML = connxml @@ -31,12 +18,18 @@ def __init__(self, connxml): self._authentication = connxml.get('authentication') self._class = connxml.get('class') self._port = connxml.get('port', None) + self._query_band = connxml.get('query-band-spec', None) + self._initial_sql = connxml.get('one-time-sql', None) def __repr__(self): return "''".format(self._server, self._dbname, hex(id(self))) @classmethod - def from_attributes(cls, server, dbname, username, dbclass, port=None, authentication=''): + def from_attributes(cls, server, dbname, username, dbclass, port=None, query_band=None, + initial_sql=None, authentication=''): + """Creates a new connection that can be added into a Data Source. + defaults to `''` which will be treated as 'prompt' by Tableau.""" + root = ET.Element('connection', authentication=authentication) xml = cls(root) xml.server = server @@ -44,14 +37,14 @@ def from_attributes(cls, server, dbname, username, dbclass, port=None, authentic xml.username = username xml.dbclass = dbclass xml.port = port + xml.query_band = query_band + xml.initial_sql = initial_sql return xml - ########### - # dbname - ########### @property def dbname(self): + """Database name for the connection. Not the table name.""" return self._dbname @dbname.setter @@ -69,11 +62,9 @@ def dbname(self, value): self._dbname = value self._connectionXML.set('dbname', value) - ########### - # server - ########### @property def server(self): + """Hostname or IP address of the database server. May also be a URL in some connection types.""" return self._server @server.setter @@ -91,11 +82,9 @@ def server(self, value): self._server = value self._connectionXML.set('server', value) - ########### - # username - ########### @property def username(self): + """Username used to authenticate to the database.""" return self._username @username.setter @@ -113,22 +102,26 @@ def username(self, value): self._username = value self._connectionXML.set('username', value) - ########### - # authentication - ########### @property def authentication(self): return self._authentication - ########### - # dbclass - ########### @property def dbclass(self): + """The type of connection (e.g. 'MySQL', 'Postgresql'). A complete list + can be found in dbclass.py""" return self._class @dbclass.setter def dbclass(self, value): + """Set the connection's dbclass property. + + Args: + value: New dbclass value. String. + + Returns: + Nothing. + """ if not is_valid_dbclass(value): raise AttributeError("'{}' is not a valid database type".format(value)) @@ -136,15 +129,22 @@ def dbclass(self, value): self._class = value self._connectionXML.set('class', value) - ########### - # port - ########### @property def port(self): + """Port used to connect to the database.""" return self._port @port.setter def port(self, value): + """Set the connection's port property. + + Args: + value: New port value. String. + + Returns: + Nothing. + """ + self._port = value # If port is None we remove the element and don't write it to XML if value is None: @@ -154,3 +154,55 @@ def port(self, value): pass else: self._connectionXML.set('port', value) + + @property + def query_band(self): + """Query band passed on connection to database.""" + return self._query_band + + @query_band.setter + def query_band(self, value): + """Set the connection's query_band property. + + Args: + value: New query_band value. String. + + Returns: + Nothing. + """ + + self._query_band = value + # If query band is None we remove the element and don't write it to XML + if value is None: + try: + del self._connectionXML.attrib['query-band-spec'] + except KeyError: + pass + else: + self._connectionXML.set('query-band-spec', value) + + @property + def initial_sql(self): + """Initial SQL to be run.""" + return self._initial_sql + + @initial_sql.setter + def initial_sql(self, value): + """Set the connection's initial_sql property. + + Args: + value: New initial_sql value. String. + + Returns: + Nothing. + """ + + self._initial_sql = value + # If initial_sql is None we remove the element and don't write it to XML + if value is None: + try: + del self._connectionXML.attrib['one-time-sql'] + except KeyError: + pass + else: + self._connectionXML.set('one-time-sql', value) diff --git a/tableaudocumentapi/datasource.py b/tableaudocumentapi/datasource.py index a34cba5..418dc53 100644 --- a/tableaudocumentapi/datasource.py +++ b/tableaudocumentapi/datasource.py @@ -1,8 +1,3 @@ -############################################################################### -# -# Datasource - A class for writing datasources to Tableau files -# -############################################################################### import collections import itertools import xml.etree.ElementTree as ET @@ -16,7 +11,7 @@ ######## # This is needed in order to determine if something is a string or not. It is necessary because -# of differences between python2 (basestring) and python3 (str). If python2 support is every +# of differences between python2 (basestring) and python3 (str). If python2 support is ever # dropped, remove this and change the basestring references below to str try: basestring @@ -35,7 +30,7 @@ def _get_metadata_xml_for_field(root_xml, field_name): def _is_used_by_worksheet(names, field): - return any((y for y in names if y in field.worksheets)) + return any(y for y in names if y in field.worksheets) class FieldDictionary(MultiLookupDict): @@ -87,13 +82,14 @@ def base36encode(number): return sign + base36 -def make_unique_name(dbclass): +def _make_unique_name(dbclass): rand_part = base36encode(uuid4().int) name = dbclass + '.' + rand_part return name class ConnectionParser(object): + """Parser for detecting and extracting connections from differing Tableau file formats.""" def __init__(self, datasource_xml, version): self._dsxml = datasource_xml @@ -101,6 +97,8 @@ def __init__(self, datasource_xml, version): def _extract_federated_connections(self): connections = list(map(Connection, self._dsxml.findall('.//named-connections/named-connection/*'))) + # 'sqlproxy' connections (Tableau Server Connections) are not embedded into named-connection elements + # extract them manually for now connections.extend(map(Connection, self._dsxml.findall("./connection[@class='sqlproxy']"))) return connections @@ -108,6 +106,8 @@ def _extract_legacy_connection(self): return list(map(Connection, self._dsxml.findall('connection'))) def get_connections(self): + """Find and return all connections based on file format version.""" + if float(self._dsversion) < 10: connections = self._extract_legacy_connection() else: @@ -116,16 +116,11 @@ def get_connections(self): class Datasource(object): - """ - A class for writing datasources to Tableau files. + """A class representing Tableau Data Sources, embedded in workbook files or + in TDS files. """ - ########################################################################### - # - # Public API. - # - ########################################################################### def __init__(self, dsxml, filename=None): """ Constructor. Default is to create datasource from xml. @@ -145,13 +140,15 @@ def __init__(self, dsxml, filename=None): @classmethod def from_file(cls, filename): - """Initialize datasource from file (.tds)""" + """Initialize datasource from file (.tds ot .tdsx)""" - dsxml = xml_open(filename, cls.__name__.lower()).getroot() + dsxml = xml_open(filename, 'datasource').getroot() return cls(dsxml, filename) @classmethod def from_connections(cls, caption, connections): + """Create a new Data Source give a list of Connections.""" + root = ET.Element('datasource', caption=caption, version='10.0', inline='true') outer_connection = ET.SubElement(root, 'connection') outer_connection.set('class', 'federated') @@ -159,7 +156,7 @@ def from_connections(cls, caption, connections): for conn in connections: nc = ET.SubElement(named_conns, 'named-connection', - name=make_unique_name(conn.dbclass), + name=_make_unique_name(conn.dbclass), caption=conn.server) nc.append(conn._connectionXML) return cls(root) @@ -194,16 +191,10 @@ def save_as(self, new_filename): xfile._save_file(self._filename, self._datasourceTree, new_filename) - ########### - # name - ########### @property def name(self): return self._name - ########### - # version - ########### @property def version(self): return self._version @@ -222,9 +213,6 @@ def caption(self): del self._datasourceXML.attrib['caption'] self._caption = '' - ########### - # connections - ########### @property def connections(self): return self._connections @@ -234,9 +222,6 @@ def clear_repository_location(self): if tag is not None: self._datasourceXML.remove(tag) - ########### - # fields - ########### @property def fields(self): if not self._fields: @@ -244,6 +229,8 @@ def fields(self): return self._fields def _get_all_fields(self): + # Some columns are represented by `column` tags and others as `metadata-record` tags + # Find them all and chain them into one dictionary column_field_objects = self._get_column_objects() existing_column_fields = [x.id for x in column_field_objects] metadata_only_field_objects = (x for x in self._get_metadata_objects() if x.id not in existing_column_fields) diff --git a/tableaudocumentapi/workbook.py b/tableaudocumentapi/workbook.py index 4b4ce59..70b280c 100644 --- a/tableaudocumentapi/workbook.py +++ b/tableaudocumentapi/workbook.py @@ -1,8 +1,3 @@ -############################################################################### -# -# Workbook - A class for writing Tableau workbook files -# -############################################################################### import weakref @@ -11,25 +6,18 @@ class Workbook(object): - """ - A class for writing Tableau workbook files. + """A class for writing Tableau workbook files.""" - """ - - ########################################################################### - # - # Public API. - # - ########################################################################### def __init__(self, filename): - """ - Constructor. + """Open the workbook at `filename`. This will handle packaged and unpacked + workbook files automatically. This will also parse Data Sources and Worksheets + for access. """ self._filename = filename - self._workbookTree = xml_open(self._filename, self.__class__.__name__.lower()) + self._workbookTree = xml_open(self._filename, 'workbook') self._workbookRoot = self._workbookTree.getroot() # prepare our datasource objects @@ -42,23 +30,14 @@ def __init__(self, filename): self._workbookRoot, self._datasource_index ) - ########### - # datasources - ########### @property def datasources(self): return self._datasources - ########### - # worksheets - ########### @property def worksheets(self): return self._worksheets - ########### - # filename - ########### @property def filename(self): return self._filename @@ -92,11 +71,6 @@ def save_as(self, new_filename): xfile._save_file( self._filename, self._workbookTree, new_filename) - ########################################################################### - # - # Private API. - # - ########################################################################### @staticmethod def _prepare_datasource_index(datasources): retval = weakref.WeakValueDictionary() diff --git a/tableaudocumentapi/xfile.py b/tableaudocumentapi/xfile.py index 3067781..8e213ab 100644 --- a/tableaudocumentapi/xfile.py +++ b/tableaudocumentapi/xfile.py @@ -22,19 +22,23 @@ class TableauInvalidFileException(Exception): def xml_open(filename, expected_root=None): + """Opens the provided 'filename'. Handles detecting if the file is an archive, + detecting the document version, and validating the root tag.""" + # Is the file a zip (.twbx or .tdsx) if zipfile.is_zipfile(filename): tree = get_xml_from_archive(filename) else: tree = ET.parse(filename) + # Is the file a supported version tree_root = tree.getroot() - file_version = Version(tree_root.attrib.get('version', '0.0')) if file_version < MIN_SUPPORTED_VERSION: raise TableauVersionNotSupportedException(file_version) + # Does the root tag match the object type (workbook or data source) if expected_root and (expected_root != tree_root.tag): raise TableauInvalidFileException( "'{}'' is not a valid '{}' file".format(filename, expected_root)) @@ -52,7 +56,15 @@ def temporary_directory(*args, **kwargs): def find_file_in_zip(zip_file): - for filename in zip_file.namelist(): + '''Returns the twb/tds file from a Tableau packaged file format. Packaged + files can contain cache entries which are also valid XML, so only look for + files with a .tds or .twb extension. + ''' + + candidate_files = filter(lambda x: x.split('.')[-1] in ('twb', 'tds'), + zip_file.namelist()) + + for filename in candidate_files: with zip_file.open(filename) as xml_candidate: try: ET.parse(xml_candidate) @@ -71,6 +83,10 @@ def get_xml_from_archive(filename): def build_archive_file(archive_contents, zip_file): + """Build a Tableau-compatible archive file.""" + + # This is tested against Desktop and Server, and reverse engineered by lots + # of trial and error. Do not change this logic. for root_dir, _, files in os.walk(archive_contents): relative_dir = os.path.relpath(root_dir, archive_contents) for f in files: @@ -81,10 +97,10 @@ def build_archive_file(archive_contents, zip_file): def save_into_archive(xml_tree, filename, new_filename=None): - # Saving a archive means extracting the contents into a temp folder, + # Saving an archive means extracting the contents into a temp folder, # saving the changes over the twb/tds in that folder, and then - # packaging it back up into a specifically formatted zip with the correct - # relative file paths + # packaging it back up into a zip with a very specific format + # e.g. no empty files for directories, which Windows and Mac do by default if new_filename is None: new_filename = filename diff --git a/test/__init__.py b/test/__init__.py index c715da8..e69de29 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -1,2 +0,0 @@ -from . import bvt -from . import test_datasource diff --git a/test/assets/CONNECTION.xml b/test/assets/CONNECTION.xml index beb606f..56d17d5 100644 --- a/test/assets/CONNECTION.xml +++ b/test/assets/CONNECTION.xml @@ -1 +1 @@ - + diff --git a/test/assets/Cache.twbx b/test/assets/Cache.twbx new file mode 100644 index 0000000..b1df6b7 Binary files /dev/null and b/test/assets/Cache.twbx differ diff --git a/test/bvt.py b/test/bvt.py index e09ec55..b2fb4af 100644 --- a/test/bvt.py +++ b/test/bvt.py @@ -60,6 +60,8 @@ def test_can_read_attributes_from_connection(self): self.assertEqual(conn.dbclass, 'sqlserver') self.assertEqual(conn.authentication, 'sspi') self.assertEqual(conn.port, '1433') + self.assertEqual(conn.initial_sql, '') + self.assertEqual(conn.query_band, '') def test_can_write_attributes_to_connection(self): conn = Connection(self.connection) @@ -67,10 +69,14 @@ def test_can_write_attributes_to_connection(self): conn.server = 'mssql2014' conn.username = 'bob' conn.port = '1337' + conn.initial_sql = "insert values (1, 'winning') into schema.table" + conn.query_band = 'TableauReport=' self.assertEqual(conn.dbname, 'BubblesInMyDrink') self.assertEqual(conn.username, 'bob') self.assertEqual(conn.server, 'mssql2014') self.assertEqual(conn.port, '1337') + self.assertEqual(conn.initial_sql, "insert values (1, 'winning') into schema.table") + self.assertEqual(conn.query_band, 'TableauReport=') def test_can_delete_port_from_connection(self): conn = Connection(self.connection) @@ -78,6 +84,18 @@ def test_can_delete_port_from_connection(self): self.assertEqual(conn.port, None) self.assertIsNone(conn._connectionXML.get('port')) + def test_can_delete_initial_sql_from_connection(self): + conn = Connection(self.connection) + conn.initial_sql = None + self.assertEqual(conn.initial_sql, None) + self.assertIsNone(conn._connectionXML.get('initial_sql')) + + def test_can_delete_query_band_from_connection(self): + conn = Connection(self.connection) + conn.query_band = None + self.assertEqual(conn.query_band, None) + self.assertIsNone(conn._connectionXML.get('query_band')) + def test_bad_dbclass_rasies_attribute_error(self): conn = Connection(self.connection) conn.dbclass = 'sqlserver' @@ -393,5 +411,6 @@ def test_82_workbook_throws_exception(self): with self.assertRaises(TableauVersionNotSupportedException): wb = Workbook(TABLEAU_82_TWB) + if __name__ == '__main__': unittest.main() diff --git a/test/test_xfile.py b/test/test_xfile.py index 6cbe67f..259c98b 100644 --- a/test/test_xfile.py +++ b/test/test_xfile.py @@ -12,8 +12,17 @@ 'BadZip.zip' ) +TWBX_WITH_CACHE_FILES = os.path.join( + TEST_ASSET_DIR, + 'Cache.twbx' +) + class XFileEdgeTests(unittest.TestCase): def test_find_file_in_zip_no_xml_file(self): badzip = zipfile.ZipFile(BAD_ZIP_FILE) self.assertIsNone(find_file_in_zip(badzip)) + + def test_only_find_twbs(self): + twb_from_twbx_with_cache = zipfile.ZipFile(TWBX_WITH_CACHE_FILES) + self.assertEqual(find_file_in_zip(twb_from_twbx_with_cache), 'Superstore.twb')