diff --git a/.gitignore b/.gitignore index 13ba8daa..bfa56760 100644 --- a/.gitignore +++ b/.gitignore @@ -11,5 +11,6 @@ pattern/web/cache/tmp/ web/cache/tmp/ test/pattern_unittest_db pattern_unittest_db +examples/06-graph/test/ .DS_Store \ No newline at end of file diff --git a/pattern/graph/__init__.py b/pattern/graph/__init__.py index e4e95bf2..a79ba8c9 100644 --- a/pattern/graph/__init__.py +++ b/pattern/graph/__init__.py @@ -133,11 +133,18 @@ def __init__(self, id="", radius=5, **kwargs): self.fill = kwargs.pop("fill", None) self.stroke = kwargs.pop("stroke", (0, 0, 0, 1)) self.strokewidth = kwargs.pop("strokewidth", 1) + + if not isinstance(id, unicode): + id = str(id).decode("utf-8", "ignore") + + # FIXME this is a mess. self.text = kwargs.get("text", True) and \ - Text(isinstance(id, unicode) and id or str(id).decode("utf-8", "ignore"), + Text(id, width=85, fill=kwargs.pop("text", (0, 0, 0, 1)), - fontsize=kwargs.pop("fontsize", 11), **kwargs) or None + fontsize=kwargs.pop("fontsize", 11), + **kwargs) or None + self._weight = None # Calculated by Graph.eigenvector_centrality(). # Calculated by Graph.betweenness_centrality(). self._centrality = None @@ -260,11 +267,19 @@ def contains(self, x, y): def __repr__(self): return "%s(id=%s)" % (self.__class__.__name__, repr(self.id)) - def __eq__(self, node): - return isinstance(node, Node) and self.id == node.id + def __eq__(self, other): + return isinstance(other, Node) and self.id == other.id + + def __ne__(self, other): + return not self.__eq__(other) + + def __lt__(self, other): + return isinstance(other, Node) and self.id < other.id - def __ne__(self, node): - return not self.__eq__(node) + def __hash__(self): + # an alternative might be to use hash(self.id) in some way + # since this is supposed to be unique. + return id(self) #--- NODE LINKS ---------------------------------------------------------- @@ -1181,7 +1196,7 @@ def partition(graph): g[i] = union(g[i], g[j]) g[j] = [] g = [graph.copy(nodes=[graph[id] for id in n]) for n in g if n] - g.sort(lambda a, b: len(b) - len(a)) + g.sort(key=len, reverse=True) return g diff --git a/pattern/graph/commonsense.py b/pattern/graph/commonsense.py index 53b1f161..65013db4 100644 --- a/pattern/graph/commonsense.py +++ b/pattern/graph/commonsense.py @@ -8,8 +8,9 @@ from __future__ import absolute_import -from codecs import BOM_UTF8 from itertools import chain +import os +import sys try: from urllib.request import urlopen @@ -19,7 +20,11 @@ from .__init__ import Graph, Node, Edge, bfs from .__init__ import WEIGHT, CENTRALITY, EIGENVECTOR, BETWEENNESS -import os +from codecs import BOM_UTF8 +if sys.version > "3": + BOM_UTF8 = BOM_UTF8.decode("utf-8") + + basestring = str try: MODULE = os.path.dirname(os.path.realpath(__file__)) @@ -125,7 +130,10 @@ def __init__(self, data=os.path.join(MODULE, "commonsense.csv"), **kwargs): if data is not None: s = open(data).read() s = s.strip(BOM_UTF8) - s = s.decode("utf-8") + try: + s = s.decode("utf-8") + except AttributeError: # python 3 + pass s = ((v.strip("\"") for v in r.split(",")) for r in s.splitlines()) for concept1, relation, concept2, context, weight in s: self.add_edge(concept1, concept2, diff --git a/test/test_graph.py b/test/test_graph.py index a07c1d9d..13e9b0f5 100644 --- a/test/test_graph.py +++ b/test/test_graph.py @@ -288,7 +288,8 @@ def test_fringe(self): # Assert leaf fetching. g = self.g.copy() self.assertEqual(g.fringe(0), [g["a"], g["c"]]) - self.assertEqual(g.fringe(1), [g["a"], g["b"], g["c"]]) + # FIXME the ordering is variable in python3 + self.assertEqual(set(g.fringe(1)), set([g["a"], g["b"], g["c"]])) print("pattern.graph.Graph.fringe()") def test_split(self):