diff --git a/tableaudocumentapi/connection.py b/tableaudocumentapi/connection.py index f71daa8..7bdc1a9 100644 --- a/tableaudocumentapi/connection.py +++ b/tableaudocumentapi/connection.py @@ -1,27 +1,14 @@ -############################################################################### -# -# Connection - A class for writing connections to Tableau files -# -############################################################################### import xml.etree.ElementTree as ET from tableaudocumentapi.dbclass import is_valid_dbclass class Connection(object): - """ - A class for writing connections to Tableau files. - - """ - - ########################################################################### - # - # Public API. - # - ########################################################################### + """A class representing connections inside Data Sources.""" def __init__(self, connxml): - """ - Constructor. + """Connection is usually instantiated by passing in connection elements + in a Data Source. If creating a connection from scratch you can call + `from_attributes` passing in the connection attributes. """ self._connectionXML = connxml @@ -37,6 +24,9 @@ def __repr__(self): @classmethod def from_attributes(cls, server, dbname, username, dbclass, port=None, authentication=''): + """Creates a new connection that can be added into a Data Source. + defaults to `''` which will be treated as 'prompt' by Tableau.""" + root = ET.Element('connection', authentication=authentication) xml = cls(root) xml.server = server @@ -47,11 +37,9 @@ def from_attributes(cls, server, dbname, username, dbclass, port=None, authentic return xml - ########### - # dbname - ########### @property def dbname(self): + """Database name for the connection. Not the table name.""" return self._dbname @dbname.setter @@ -69,11 +57,9 @@ def dbname(self, value): self._dbname = value self._connectionXML.set('dbname', value) - ########### - # server - ########### @property def server(self): + """Hostname or IP address of the database server. May also be a URL in some connection types.""" return self._server @server.setter @@ -91,11 +77,9 @@ def server(self, value): self._server = value self._connectionXML.set('server', value) - ########### - # username - ########### @property def username(self): + """Username used to authenticate to the database.""" return self._username @username.setter @@ -113,22 +97,26 @@ def username(self, value): self._username = value self._connectionXML.set('username', value) - ########### - # authentication - ########### @property def authentication(self): return self._authentication - ########### - # dbclass - ########### @property def dbclass(self): + """The type of connection (e.g. 'MySQL', 'Postgresql'). A complete list + can be found in dbclass.py""" return self._class @dbclass.setter def dbclass(self, value): + """Set the connection's dbclass property. + + Args: + value: New dbclass value. String. + + Returns: + Nothing. + """ if not is_valid_dbclass(value): raise AttributeError("'{}' is not a valid database type".format(value)) @@ -136,15 +124,22 @@ def dbclass(self, value): self._class = value self._connectionXML.set('class', value) - ########### - # port - ########### @property def port(self): + """Port used to connect to the database.""" return self._port @port.setter def port(self, value): + """Set the connection's port property. + + Args: + value: New port value. String. + + Returns: + Nothing. + """ + self._port = value # If port is None we remove the element and don't write it to XML if value is None: diff --git a/tableaudocumentapi/datasource.py b/tableaudocumentapi/datasource.py index a34cba5..418dc53 100644 --- a/tableaudocumentapi/datasource.py +++ b/tableaudocumentapi/datasource.py @@ -1,8 +1,3 @@ -############################################################################### -# -# Datasource - A class for writing datasources to Tableau files -# -############################################################################### import collections import itertools import xml.etree.ElementTree as ET @@ -16,7 +11,7 @@ ######## # This is needed in order to determine if something is a string or not. It is necessary because -# of differences between python2 (basestring) and python3 (str). If python2 support is every +# of differences between python2 (basestring) and python3 (str). If python2 support is ever # dropped, remove this and change the basestring references below to str try: basestring @@ -35,7 +30,7 @@ def _get_metadata_xml_for_field(root_xml, field_name): def _is_used_by_worksheet(names, field): - return any((y for y in names if y in field.worksheets)) + return any(y for y in names if y in field.worksheets) class FieldDictionary(MultiLookupDict): @@ -87,13 +82,14 @@ def base36encode(number): return sign + base36 -def make_unique_name(dbclass): +def _make_unique_name(dbclass): rand_part = base36encode(uuid4().int) name = dbclass + '.' + rand_part return name class ConnectionParser(object): + """Parser for detecting and extracting connections from differing Tableau file formats.""" def __init__(self, datasource_xml, version): self._dsxml = datasource_xml @@ -101,6 +97,8 @@ def __init__(self, datasource_xml, version): def _extract_federated_connections(self): connections = list(map(Connection, self._dsxml.findall('.//named-connections/named-connection/*'))) + # 'sqlproxy' connections (Tableau Server Connections) are not embedded into named-connection elements + # extract them manually for now connections.extend(map(Connection, self._dsxml.findall("./connection[@class='sqlproxy']"))) return connections @@ -108,6 +106,8 @@ def _extract_legacy_connection(self): return list(map(Connection, self._dsxml.findall('connection'))) def get_connections(self): + """Find and return all connections based on file format version.""" + if float(self._dsversion) < 10: connections = self._extract_legacy_connection() else: @@ -116,16 +116,11 @@ def get_connections(self): class Datasource(object): - """ - A class for writing datasources to Tableau files. + """A class representing Tableau Data Sources, embedded in workbook files or + in TDS files. """ - ########################################################################### - # - # Public API. - # - ########################################################################### def __init__(self, dsxml, filename=None): """ Constructor. Default is to create datasource from xml. @@ -145,13 +140,15 @@ def __init__(self, dsxml, filename=None): @classmethod def from_file(cls, filename): - """Initialize datasource from file (.tds)""" + """Initialize datasource from file (.tds ot .tdsx)""" - dsxml = xml_open(filename, cls.__name__.lower()).getroot() + dsxml = xml_open(filename, 'datasource').getroot() return cls(dsxml, filename) @classmethod def from_connections(cls, caption, connections): + """Create a new Data Source give a list of Connections.""" + root = ET.Element('datasource', caption=caption, version='10.0', inline='true') outer_connection = ET.SubElement(root, 'connection') outer_connection.set('class', 'federated') @@ -159,7 +156,7 @@ def from_connections(cls, caption, connections): for conn in connections: nc = ET.SubElement(named_conns, 'named-connection', - name=make_unique_name(conn.dbclass), + name=_make_unique_name(conn.dbclass), caption=conn.server) nc.append(conn._connectionXML) return cls(root) @@ -194,16 +191,10 @@ def save_as(self, new_filename): xfile._save_file(self._filename, self._datasourceTree, new_filename) - ########### - # name - ########### @property def name(self): return self._name - ########### - # version - ########### @property def version(self): return self._version @@ -222,9 +213,6 @@ def caption(self): del self._datasourceXML.attrib['caption'] self._caption = '' - ########### - # connections - ########### @property def connections(self): return self._connections @@ -234,9 +222,6 @@ def clear_repository_location(self): if tag is not None: self._datasourceXML.remove(tag) - ########### - # fields - ########### @property def fields(self): if not self._fields: @@ -244,6 +229,8 @@ def fields(self): return self._fields def _get_all_fields(self): + # Some columns are represented by `column` tags and others as `metadata-record` tags + # Find them all and chain them into one dictionary column_field_objects = self._get_column_objects() existing_column_fields = [x.id for x in column_field_objects] metadata_only_field_objects = (x for x in self._get_metadata_objects() if x.id not in existing_column_fields) diff --git a/tableaudocumentapi/workbook.py b/tableaudocumentapi/workbook.py index 4b4ce59..70b280c 100644 --- a/tableaudocumentapi/workbook.py +++ b/tableaudocumentapi/workbook.py @@ -1,8 +1,3 @@ -############################################################################### -# -# Workbook - A class for writing Tableau workbook files -# -############################################################################### import weakref @@ -11,25 +6,18 @@ class Workbook(object): - """ - A class for writing Tableau workbook files. + """A class for writing Tableau workbook files.""" - """ - - ########################################################################### - # - # Public API. - # - ########################################################################### def __init__(self, filename): - """ - Constructor. + """Open the workbook at `filename`. This will handle packaged and unpacked + workbook files automatically. This will also parse Data Sources and Worksheets + for access. """ self._filename = filename - self._workbookTree = xml_open(self._filename, self.__class__.__name__.lower()) + self._workbookTree = xml_open(self._filename, 'workbook') self._workbookRoot = self._workbookTree.getroot() # prepare our datasource objects @@ -42,23 +30,14 @@ def __init__(self, filename): self._workbookRoot, self._datasource_index ) - ########### - # datasources - ########### @property def datasources(self): return self._datasources - ########### - # worksheets - ########### @property def worksheets(self): return self._worksheets - ########### - # filename - ########### @property def filename(self): return self._filename @@ -92,11 +71,6 @@ def save_as(self, new_filename): xfile._save_file( self._filename, self._workbookTree, new_filename) - ########################################################################### - # - # Private API. - # - ########################################################################### @staticmethod def _prepare_datasource_index(datasources): retval = weakref.WeakValueDictionary() diff --git a/tableaudocumentapi/xfile.py b/tableaudocumentapi/xfile.py index cac6c09..8e213ab 100644 --- a/tableaudocumentapi/xfile.py +++ b/tableaudocumentapi/xfile.py @@ -22,19 +22,23 @@ class TableauInvalidFileException(Exception): def xml_open(filename, expected_root=None): + """Opens the provided 'filename'. Handles detecting if the file is an archive, + detecting the document version, and validating the root tag.""" + # Is the file a zip (.twbx or .tdsx) if zipfile.is_zipfile(filename): tree = get_xml_from_archive(filename) else: tree = ET.parse(filename) + # Is the file a supported version tree_root = tree.getroot() - file_version = Version(tree_root.attrib.get('version', '0.0')) if file_version < MIN_SUPPORTED_VERSION: raise TableauVersionNotSupportedException(file_version) + # Does the root tag match the object type (workbook or data source) if expected_root and (expected_root != tree_root.tag): raise TableauInvalidFileException( "'{}'' is not a valid '{}' file".format(filename, expected_root)) @@ -79,6 +83,10 @@ def get_xml_from_archive(filename): def build_archive_file(archive_contents, zip_file): + """Build a Tableau-compatible archive file.""" + + # This is tested against Desktop and Server, and reverse engineered by lots + # of trial and error. Do not change this logic. for root_dir, _, files in os.walk(archive_contents): relative_dir = os.path.relpath(root_dir, archive_contents) for f in files: