diff --git a/doc/changes.rst b/doc/changes.rst index 5a825add..84112fbb 100644 --- a/doc/changes.rst +++ b/doc/changes.rst @@ -3,6 +3,20 @@ Changes .. currentmodule:: patsy +v0.4.0 +------ + +* Formulas (more precisely, :class:`EvalFactor` objects) now only + keep a reference to the variables required from their environment + instead of the whole environment when the formula was defined. + +* Incompatible change: :class:`EvalFactor` does not take an + ``eval_env`` argument anymore. + +* Incompatible change: the :func:`design_matrix_builders` function and + the :meth:`EvalFactor.memorize_passes_needed` method now + requires an ``eval_env`` as an additional argument. + v0.3.0 ------ diff --git a/doc/expert-model-specification.rst b/doc/expert-model-specification.rst index 5a685d2b..5eec536f 100644 --- a/doc/expert-model-specification.rst +++ b/doc/expert-model-specification.rst @@ -78,12 +78,11 @@ things are.) from patsy import (ModelDesc, EvalEnvironment, Term, EvalFactor, LookupFactor, demo_data, dmatrix) data = demo_data("a", "x") - env = EvalEnvironment.capture() # LookupFactor takes a dictionary key: a_lookup = LookupFactor("a") # EvalFactor takes arbitrary Python code: - x_transform = EvalFactor("np.log(x ** 2)", env) + x_transform = EvalFactor("np.log(x ** 2)") # First argument is empty list for dmatrix; we would need to put # something there if we were calling dmatrices. desc = ModelDesc([], @@ -157,7 +156,7 @@ The full interface looks like this: :term:`hashable`. These methods will determine which factors Patsy considers equal for purposes of redundancy elimination. - .. method:: memorize_passes_needed(state) + .. method:: memorize_passes_needed(state, eval_env) Return the number of passes through the data that this factor will need in order to set up any :ref:`stateful-transforms`. @@ -171,6 +170,9 @@ The full interface looks like this: will be passed back in to all memorization and evaluation methods. + `eval_env` is an :class:`EvalEnvironment` object, describing + the Python environment where the factor is being evaluated. + .. method:: memorize_chunk(state, which_pass, data) Called repeatedly with each 'chunk' of data produced by the diff --git a/doc/formulas.rst b/doc/formulas.rst index 5a6c9bfd..bd6d49d0 100644 --- a/doc/formulas.rst +++ b/doc/formulas.rst @@ -58,13 +58,12 @@ To make this more concrete, here's how you could manually construct the same objects that Patsy will construct if given the above formula:: - from patsy import EvalEnvironment, ModelDesc - env = EvalEnvironment.capture() - ModelDesc([Term([EvalFactor("y", env)])], + from patsy import ModelDesc + ModelDesc([Term([EvalFactor("y")])], [Term([]), - Term([EvalFactor("a", env)]), - Term([EvalFactor("a", env), EvalFactor("b", env)]), - Term([EvalFactor("np.log(x)", env)])]) + Term([EvalFactor("a")]), + Term([EvalFactor("a"), EvalFactor("b")]), + Term([EvalFactor("np.log(x)")])]) Compare to what you get from parsing the above formula:: diff --git a/patsy/build.py b/patsy/build.py index 091b138b..4a414f45 100644 --- a/patsy/build.py +++ b/patsy/build.py @@ -325,14 +325,14 @@ def test__ColumnBuilder(): cb_intercept.build({f1: [1, 2, 3], f2: [1, 2, 3], f3: [1, 2, 3]}, mat3) assert np.allclose(mat3, 1) -def _factors_memorize(factors, data_iter_maker): +def _factors_memorize(factors, data_iter_maker, eval_env): # First, start off the memorization process by setting up each factor's # state and finding out how many passes it will need: factor_states = {} passes_needed = {} for factor in factors: state = {} - which_pass = factor.memorize_passes_needed(state) + which_pass = factor.memorize_passes_needed(state, eval_env) factor_states[factor] = state passes_needed[factor] = which_pass # Now, cycle through the data until all the factors have finished @@ -362,7 +362,7 @@ def __init__(self, requested_passes, token): self._chunk_in_pass = 0 self._seen_passes = 0 - def memorize_passes_needed(self, state): + def memorize_passes_needed(self, state, eval_env): state["calls"] = [] state["token"] = self._token return self._requested_passes @@ -389,7 +389,7 @@ def __call__(self): f1 = MockFactor(1, "f1") f2a = MockFactor(2, "f2a") f2b = MockFactor(2, "f2b") - factor_states = _factors_memorize(set([f0, f1, f2a, f2b]), data) + factor_states = _factors_memorize(set([f0, f1, f2a, f2b]), data, {}) assert data.calls == 2 mem_chunks0 = [("memorize_chunk", 0)] * data.CHUNKS mem_chunks1 = [("memorize_chunk", 1)] * data.CHUNKS @@ -615,7 +615,7 @@ def _make_term_column_builders(terms, term_to_column_builders[term] = column_builders return new_term_order, term_to_column_builders -def design_matrix_builders(termlists, data_iter_maker, NA_action="drop"): +def design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action="drop"): """Construct several :class:`DesignMatrixBuilders` from termlists. This is one of Patsy's fundamental functions. This function and @@ -629,6 +629,14 @@ def design_matrix_builders(termlists, data_iter_maker, NA_action="drop"): simple iterator because sufficiently complex formulas may require multiple passes over the data (e.g. if there are nested stateful transforms). + :arg eval_env: Either a :class:`EvalEnvironment` which will be used to + look up any variables referenced in `termlists` that cannot be + found in `data_iter_maker`, or else a depth represented as an + integer which will be passed to :meth:`EvalEnvironment.capture`. + ``eval_env=0`` means to use the context of the function calling + :func:`design_matrix_builders` for lookups. If calling this function + from a library, you probably want ``eval_env=1``, which means that + variables should be resolved in *your* caller's namespace. :arg NA_action: An :class:`NAAction` object or string, used to determine what values count as 'missing' for purposes of determining the levels of categorical factors. @@ -643,14 +651,25 @@ def design_matrix_builders(termlists, data_iter_maker, NA_action="drop"): .. versionadded:: 0.2.0 The ``NA_action`` argument. + .. versionadded:: 0.4.0 + The ``eval_env`` argument. """ + # Check type of eval_env to help people migrating to 0.4.0. Third + # argument used to be NA_action (a string). Having the check for + # eval_env's type gives people migrating to 0.4.0 who used NA_action + # not as a keyword argument a nice error message here, instead of a + # more obscure backtrace later on. + if not isinstance(eval_env, six.integer_types + (EvalEnvironment,)): + raise TypeError("Parameter 'eval_env' must be either an integer or an instance " + "of patsy.EvalEnvironment.") + eval_env = EvalEnvironment.capture(eval_env, reference=1) if isinstance(NA_action, str): NA_action = NAAction(NA_action) all_factors = set() for termlist in termlists: for term in termlist: all_factors.update(term.factors) - factor_states = _factors_memorize(all_factors, data_iter_maker) + factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env) # Now all the factors have working eval methods, so we can evaluate them # on some data to find out what type of data they return. (num_column_counts, diff --git a/patsy/desc.py b/patsy/desc.py index 8cef3d1d..2a325799 100644 --- a/patsy/desc.py +++ b/patsy/desc.py @@ -191,8 +191,8 @@ def test_ModelDesc_from_formula(): for input in ("y ~ x", parse_formula("y ~ x")): eval_env = EvalEnvironment.capture(0) md = ModelDesc.from_formula(input, eval_env) - assert md.lhs_termlist == [Term([EvalFactor("y", eval_env)]),] - assert md.rhs_termlist == [INTERCEPT, Term([EvalFactor("x", eval_env)])] + assert md.lhs_termlist == [Term([EvalFactor("y")]),] + assert md.rhs_termlist == [INTERCEPT, Term([EvalFactor("x")])] class IntermediateExpr(object): "This class holds an intermediate result while we're evaluating a tree." @@ -356,8 +356,7 @@ def _eval_number(evaluator, tree): "only allowed with **", tree) def _eval_python_expr(evaluator, tree): - factor = EvalFactor(tree.token.extra, evaluator._factor_eval_env, - origin=tree.origin) + factor = EvalFactor(tree.token.extra, origin=tree.origin) return IntermediateExpr(False, None, False, [Term([factor])]) class Evaluator(object): @@ -585,7 +584,7 @@ def eval(self, tree, require_evalexpr=True): "a + <-a**2>", ] -def _assert_terms_match(terms, expected_intercept, expecteds, eval_env): # pragma: no cover +def _assert_terms_match(terms, expected_intercept, expecteds): # pragma: no cover if expected_intercept: expecteds = [()] + expecteds assert len(terms) == len(expecteds) @@ -593,8 +592,7 @@ def _assert_terms_match(terms, expected_intercept, expecteds, eval_env): # pragm if isinstance(term, Term): if isinstance(expected, str): expected = (expected,) - assert term.factors == tuple([EvalFactor(s, eval_env) - for s in expected]) + assert term.factors == tuple([EvalFactor(s) for s in expected]) else: assert term == expected @@ -609,11 +607,9 @@ def _do_eval_formula_tests(tests): # pragma: no cover print(model_desc) lhs_intercept, lhs_termlist, rhs_intercept, rhs_termlist = result _assert_terms_match(model_desc.lhs_termlist, - lhs_intercept, lhs_termlist, - eval_env) + lhs_intercept, lhs_termlist) _assert_terms_match(model_desc.rhs_termlist, - rhs_intercept, rhs_termlist, - eval_env) + rhs_intercept, rhs_termlist) def test_eval_formula(): _do_eval_formula_tests(_eval_tests) diff --git a/patsy/eval.py b/patsy/eval.py index 058c6e1b..8269c4e3 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -17,6 +17,7 @@ import inspect import tokenize import six +import ast from patsy import PatsyError from patsy.util import PushbackAdapter from patsy.tokens import (pretty_untokenize, normalize_token_spacing, @@ -81,6 +82,45 @@ def test_VarLookupDict(): ds["a"] = 10 assert ds["a"] == 10 assert d1["a"] == 1 + assert ds.get("c") is None + assert isinstance(repr(ds), six.string_types) + +def ast_names(code): + """Iterator that yields all the (ast) names in a Python expression. + + :arg code: A string containing a Python expression. + """ + # Syntax that allows new name bindings to be introduced is tricky to + # handle here, so we just refuse to do so. + disallowed_ast_nodes = (ast.Lambda, ast.ListComp, ast.GeneratorExp) + if sys.version_info >= (2, 7): + disallowed_ast_nodes += (ast.DictComp, ast.SetComp) + + for node in ast.walk(ast.parse(code)): + if isinstance(node, disallowed_ast_nodes): + raise PatsyError("Lambda, list/dict/set comprehension, generator " + "expression in patsy formula not currently supported.") + if isinstance(node, ast.Name): + yield node.id + +def test_ast_names(): + test_data = [('np.log(x)', ['np', 'x']), + ('x', ['x']), + ('center(x + 1)', ['center', 'x']), + ('dt.date.dt.month', ['dt'])] + for code, expected in test_data: + assert set(ast_names(code)) == set(expected) + +def test_ast_names_disallowed_nodes(): + from nose.tools import assert_raises + def list_ast_names(code): + return list(ast_names(code)) + assert_raises(PatsyError, list_ast_names, "lambda x: x + y") + assert_raises(PatsyError, list_ast_names, "[x + 1 for x in range(10)]") + assert_raises(PatsyError, list_ast_names, "(x + 1 for x in range(10))") + if sys.version_info >= (2, 7): + assert_raises(PatsyError, list_ast_names, "{x: True for x in range(10)}") + assert_raises(PatsyError, list_ast_names, "{x + 1 for x in range(10)}") class EvalEnvironment(object): """Represents a Python execution environment. @@ -191,6 +231,13 @@ def my_model(formula_like, data, eval_env=0): finally: del frame + def subset(self, names): + """Creates a new, flat EvalEnvironment that contains only + the variables specified.""" + vld = VarLookupDict(self._namespaces) + new_ns = dict((name, vld[name]) for name in names) + return EvalEnvironment([new_ns], self.flags) + def _namespace_ids(self): return [id(n) for n in self._namespaces] @@ -304,19 +351,40 @@ def test_EvalEnvironment_eval_flags(): # http://www.python.org/dev/peps/pep-0401/ test_flag = __future__.barry_as_FLUFL.compiler_flag assert test_flag & _ALL_FUTURE_FLAGS + env = EvalEnvironment([{"a": 11}], flags=0) assert env.eval("a != 0") == True assert_raises(SyntaxError, env.eval, "a <> 0") + assert env.subset(["a"]).flags == 0 + env2 = EvalEnvironment([{"a": 11}], flags=test_flag) assert env2.eval("a <> 0") == True assert_raises(SyntaxError, env2.eval, "a != 0") + assert env2.subset(["a"]).flags == test_flag else: test_flag = __future__.division.compiler_flag assert test_flag & _ALL_FUTURE_FLAGS + env = EvalEnvironment([{"a": 11}], flags=0) assert env.eval("a / 2") == 11 // 2 == 5 + assert env.subset(["a"]).flags == 0 + env2 = EvalEnvironment([{"a": 11}], flags=test_flag) assert env2.eval("a / 2") == 11 * 1. / 2 != 5 + env2.subset(["a"]).flags == test_flag + +def test_EvalEnvironment_subset(): + env = EvalEnvironment([{"a": 1}, {"b": 2}, {"c": 3}]) + + subset_a = env.subset(["a"]) + assert subset_a.eval("a") == 1 + from nose.tools import assert_raises + assert_raises(NameError, subset_a.eval, "b") + assert_raises(NameError, subset_a.eval, "c") + + subset_bc = env.subset(["b", "c"]) + assert subset_bc.eval("b * c") == 6 + assert_raises(NameError, subset_bc.eval, "a") def test_EvalEnvironment_eq(): # Two environments are eq only if they refer to exactly the same @@ -343,33 +411,30 @@ def test_EvalEnvironment_add_outer_namespace(): assert env != env2 class EvalFactor(object): - def __init__(self, code, eval_env, origin=None): + def __init__(self, code, origin=None): """A factor class that executes arbitrary Python code and supports stateful transforms. :arg code: A string containing a Python expression, that will be evaluated to produce this factor's value. - :arg eval_env: The :class:`EvalEnvironment` where `code` will be - evaluated. This is the standard factor class that is used when parsing formula strings and implements the standard stateful transform processing. See :ref:`stateful-transforms` and :ref:`expert-model-specification`. Two EvalFactor's are considered equal (e.g., for purposes of - redundancy detection) if they use the same evaluation environment and - they contain the same token stream. Basically this means that the - source code must be identical except for whitespace:: + redundancy detection) if they contain the same token stream. Basically + this means that the source code must be identical except for + whitespace:: - env = EvalEnvironment.capture() - assert EvalFactor("a + b", env) == EvalFactor("a+b", env) - assert EvalFactor("a + b", env) != EvalFactor("b + a", env) + assert EvalFactor("a + b") == EvalFactor("a+b") + assert EvalFactor("a + b") != EvalFactor("b + a") """ + # For parsed formulas, the code will already have been normalized by # the parser. But let's normalize anyway, so we can be sure of having # consistent semantics for __eq__ and __hash__. self.code = normalize_token_spacing(code) - self._eval_env = eval_env self.origin = origin def name(self): @@ -380,24 +445,27 @@ def __repr__(self): def __eq__(self, other): return (isinstance(other, EvalFactor) - and self.code == other.code - and self._eval_env == other._eval_env) + and self.code == other.code) def __ne__(self, other): return not self == other def __hash__(self): - return hash((EvalFactor, self.code, self._eval_env)) + return hash((EvalFactor, self.code)) - def memorize_passes_needed(self, state): + def memorize_passes_needed(self, state, eval_env): # 'state' is just an empty dict which we can do whatever we want with, # and that will be passed back to later memorize functions state["transforms"] = {} + env_namespace = eval_env.namespace + subset_names = [name for name in ast_names(self.code) if name in env_namespace] + state["eval_env"] = eval_env.subset(subset_names) + # example code: == "2 * center(x)" i = [0] def new_name_maker(token): - value = self._eval_env.namespace.get(token) + value = eval_env.namespace.get(token) if hasattr(value, "__patsy_stateful_transform__"): obj_name = "_patsy_stobj%s__%s__" % (i[0], token) i[0] += 1 @@ -460,29 +528,33 @@ def new_name_maker(token): return len(pass_bins) - def _eval(self, code, memorize_state, data): + def _eval(self, code, eval_env, memorize_state, data): inner_namespace = VarLookupDict([data, memorize_state["transforms"]]) return call_and_wrap_exc("Error evaluating factor", self, - self._eval_env.eval, + eval_env.eval, code, inner_namespace=inner_namespace) def memorize_chunk(self, state, which_pass, data): for obj_name in state["pass_bins"][which_pass]: - self._eval(state["memorize_code"][obj_name], state, data) + self._eval(state["memorize_code"][obj_name], + state["eval_env"], + state, + data) def memorize_finish(self, state, which_pass): for obj_name in state["pass_bins"][which_pass]: state["transforms"][obj_name].memorize_finish() def eval(self, memorize_state, data): - return self._eval(memorize_state["eval_code"], memorize_state, data) + return self._eval(memorize_state["eval_code"], memorize_state["eval_env"], + memorize_state, data) def test_EvalFactor_basics(): - e = EvalFactor("a+b", EvalEnvironment.capture(0)) + e = EvalFactor("a+b") assert e.code == "a + b" assert e.name() == "a + b" - e2 = EvalFactor("a +b", EvalEnvironment.capture(0), origin="asdf") + e2 = EvalFactor("a +b", origin="asdf") assert e == e2 assert hash(e) == hash(e2) assert e.origin is None @@ -493,13 +565,18 @@ def test_EvalFactor_memorize_passes_needed(): foo = stateful_transform(lambda: "FOO-OBJ") bar = stateful_transform(lambda: "BAR-OBJ") quux = stateful_transform(lambda: "QUUX-OBJ") - e = EvalFactor("foo(x) + bar(foo(y)) + quux(z, w)", - EvalEnvironment.capture(0)) + e = EvalFactor("foo(x) + bar(foo(y)) + quux(z, w)") + state = {} - passes = e.memorize_passes_needed(state) + eval_env = EvalEnvironment.capture(0) + passes = e.memorize_passes_needed(state, eval_env) print(passes) print(state) assert passes == 2 + for name in ["foo", "bar", "quux"]: + assert state["eval_env"].namespace[name] is locals()[name] + for name in ["w", "x", "y", "z", "e", "state"]: + assert name not in state["eval_env"].namespace assert state["transforms"] == {"_patsy_stobj0__foo__": "FOO-OBJ", "_patsy_stobj1__bar__": "BAR-OBJ", "_patsy_stobj2__foo__": "FOO-OBJ", @@ -546,12 +623,16 @@ def transform(self, data): def test_EvalFactor_end_to_end(): from patsy.state import stateful_transform foo = stateful_transform(_MockTransform) - e = EvalFactor("foo(x) + foo(foo(y))", EvalEnvironment.capture(0)) + e = EvalFactor("foo(x) + foo(foo(y))") state = {} - passes = e.memorize_passes_needed(state) + eval_env = EvalEnvironment.capture(0) + passes = e.memorize_passes_needed(state, eval_env) print(passes) print(state) assert passes == 2 + assert state["eval_env"].namespace["foo"] is foo + for name in ["x", "y", "e", "state"]: + assert name not in state["eval_env"].namespace import numpy as np e.memorize_chunk(state, 0, {"x": np.array([1, 2]), diff --git a/patsy/highlevel.py b/patsy/highlevel.py index a6c77f7e..2eca43dd 100644 --- a/patsy/highlevel.py +++ b/patsy/highlevel.py @@ -33,7 +33,7 @@ def _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action): if isinstance(formula_like, DesignMatrixBuilder): - return (design_matrix_builders([[]], data_iter_maker, NA_action)[0], + return (design_matrix_builders([[]], data_iter_maker, eval_env, NA_action)[0], formula_like) if (isinstance(formula_like, tuple) and len(formula_like) == 2 @@ -54,6 +54,7 @@ def _try_incr_builders(formula_like, data_iter_maker, eval_env, return design_matrix_builders([formula_like.lhs_termlist, formula_like.rhs_termlist], data_iter_maker, + eval_env, NA_action) else: return None diff --git a/patsy/test_build.py b/patsy/test_build.py index 03e7a028..1ec6ca68 100644 --- a/patsy/test_build.py +++ b/patsy/test_build.py @@ -63,7 +63,7 @@ def make_matrix(data, expected_rank, entries, column_names=None): termlist = make_termlist(*entries) def iter_maker(): yield data - builders = design_matrix_builders([termlist], iter_maker) + builders = design_matrix_builders([termlist], iter_maker, eval_env=0) matrices = build_design_matrices(builders, data) matrix = matrices[0] assert (builders[0].design_info.term_slices @@ -232,7 +232,7 @@ def test_build_design_matrices_dtype(): data = {"x": [1, 2, 3]} def iter_maker(): yield data - builder = design_matrix_builders([make_termlist("x")], iter_maker)[0] + builder = design_matrix_builders([make_termlist("x")], iter_maker, 0)[0] mat = build_design_matrices([builder], data)[0] assert mat.dtype == np.dtype(np.float64) @@ -248,7 +248,7 @@ def test_return_type(): data = {"x": [1, 2, 3]} def iter_maker(): yield data - builder = design_matrix_builders([make_termlist("x")], iter_maker)[0] + builder = design_matrix_builders([make_termlist("x")], iter_maker, 0)[0] # Check explicitly passing return_type="matrix" works mat = build_design_matrices([builder], data, return_type="matrix")[0] @@ -263,7 +263,7 @@ def test_NA_action(): initial_data = {"x": [1, 2, 3], "c": ["c1", "c2", "c1"]} def iter_maker(): yield initial_data - builder = design_matrix_builders([make_termlist("x", "c")], iter_maker)[0] + builder = design_matrix_builders([make_termlist("x", "c")], iter_maker, 0)[0] # By default drops rows containing either NaN or None mat = build_design_matrices([builder], @@ -310,7 +310,7 @@ def test_NA_drop_preserves_levels(): data = {"x": [1.0, np.nan, 3.0], "c": ["c1", "c2", "c3"]} def iter_maker(): yield data - builder = design_matrix_builders([make_termlist("x", "c")], iter_maker)[0] + builder = design_matrix_builders([make_termlist("x", "c")], iter_maker, 0)[0] assert builder.design_info.column_names == ["c[c1]", "c[c2]", "c[c3]", "x"] @@ -330,14 +330,17 @@ def test_return_type_pandas(): index=[10, 20, 30]) def iter_maker(): yield data - int_builder, = design_matrix_builders([make_termlist([])], iter_maker) + int_builder, = design_matrix_builders([make_termlist([])], iter_maker, 0) (y_builder, x_builder) = design_matrix_builders([make_termlist("y"), make_termlist("x")], - iter_maker) + iter_maker, + eval_env=0) (x_a_builder,) = design_matrix_builders([make_termlist("x", "a")], - iter_maker) + iter_maker, + eval_env=0) (x_y_builder,) = design_matrix_builders([make_termlist("x", "y")], - iter_maker) + iter_maker, + eval_env=0) # Index compatibility is always checked for pandas input, regardless of # whether we're producing pandas output assert_raises(PatsyError, @@ -471,7 +474,7 @@ def iter_maker(): yield {"x": data1} yield {"x": data2} try: - builders = design_matrix_builders([termlist], iter_maker) + builders = design_matrix_builders([termlist], iter_maker, 0) build_design_matrices(builders, {"x": data1}) build_design_matrices(builders, {"x": data2}) except PatsyError: @@ -481,7 +484,7 @@ def iter_maker(): def t_setup_predict(data1, data2): def iter_maker(): yield {"x": data1} - builders = design_matrix_builders([termlist], iter_maker) + builders = design_matrix_builders([termlist], iter_maker, 0) assert_raises(PatsyError, build_design_matrices, builders, {"x": data2}) for (a, b) in test_cases_twoway: @@ -510,11 +513,12 @@ def iter_maker(): # - the index argument is not given # - the data is not a DataFrame # - there are no other matrices - null_builder = design_matrix_builders([make_termlist()], iter_maker)[0] + null_builder = design_matrix_builders([make_termlist()], iter_maker, 0)[0] assert_raises(PatsyError, build_design_matrices, [null_builder], data) intercept_builder = design_matrix_builders([make_termlist([])], - iter_maker)[0] + iter_maker, + eval_env=0)[0] assert_raises(PatsyError, build_design_matrices, [intercept_builder], data) assert_raises(PatsyError, @@ -534,13 +538,15 @@ def iter_maker(): x_termlist = make_termlist(["x"]) builders = design_matrix_builders([x_termlist, make_termlist()], - iter_maker) + iter_maker, + eval_env=0) x_m, null_m = build_design_matrices(builders, data) assert np.allclose(x_m, [[1], [2], [3]]) assert null_m.shape == (3, 0) builders = design_matrix_builders([x_termlist, make_termlist([])], - iter_maker) + iter_maker, + eval_env=0) x_m, null_m = build_design_matrices(builders, data) x_m, intercept_m = build_design_matrices(builders, data) assert np.allclose(x_m, [[1], [2], [3]]) @@ -552,13 +558,19 @@ def iter_maker(): yield data t1 = make_termlist(["x"]) t2 = make_termlist(["x", "a"]) - builders = design_matrix_builders([t1, t2], iter_maker) + builders = design_matrix_builders([t1, t2], iter_maker, eval_env=0) m1, m2 = build_design_matrices(builders, data) check_design_matrix(m1, 1, t1, column_names=["x"]) assert np.allclose(m1, [[1], [2], [3]]) check_design_matrix(m2, 2, t2, column_names=["x:a[a1]", "x:a[a2]"]) assert np.allclose(m2, [[1, 0], [0, 2], [3, 0]]) +def test_eval_env_type_builder(): + data = {"x": [1, 2, 3]} + def iter_maker(): + yield data + assert_raises(TypeError, design_matrix_builders, [make_termlist("x")], iter_maker, "foo") + def test_categorical(): data_strings = {"a": ["a1", "a2", "a1"]} data_categ = {"a": C(["a2", "a1", "a2"])} @@ -570,7 +582,8 @@ def t(data1, data2): def iter_maker(): yield data1 builders = design_matrix_builders([make_termlist(["a"])], - iter_maker) + iter_maker, + eval_env=0) build_design_matrices(builders, data2) for data1 in datas: for data2 in datas: @@ -666,7 +679,7 @@ def test_DesignMatrixBuilder_subset(): all_terms = make_termlist("x", "y", "z") def iter_maker(): yield all_data - all_builder = design_matrix_builders([all_terms], iter_maker)[0] + all_builder = design_matrix_builders([all_terms], iter_maker, 0)[0] full_matrix = build_design_matrices([all_builder], all_data)[0] def t(which_terms, variables, columns): diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py index fda3e4f3..2b389735 100644 --- a/patsy/test_highlevel.py +++ b/patsy/test_highlevel.py @@ -282,7 +282,8 @@ def __patsy_get_model_desc__(self, data): [Term([]), Term([LookupFactor("x")])], ) builders = design_matrix_builders(termlists, - lambda: iter([{"x": [1, 2, 3]}])) + lambda: iter([{"x": [1, 2, 3]}]), + eval_env=0) # twople but with no LHS t((builders[0], builders[2]), {"x": [10, 20, 30]}, 0, True, @@ -706,6 +707,15 @@ def test_0d_data(): data_series)[0], expected) +def test_env_not_saved_in_builder(): + x_in_env = [1, 2, 3] + design_matrix = dmatrix("x_in_env", {}) + + x_in_env = [10, 20, 30] + design_matrix2 = dmatrix(design_matrix.design_info.builder, {}) + + assert np.allclose(design_matrix, design_matrix2) + def test_C_and_pandas_categorical(): if not have_pandas_categorical: return diff --git a/patsy/user_util.py b/patsy/user_util.py index c2442c35..fc8e7740 100644 --- a/patsy/user_util.py +++ b/patsy/user_util.py @@ -199,10 +199,10 @@ def __hash__(self): return hash((LookupFactor, self._varname, self._force_categorical, self._contrast, self._levels)) - def memorize_passes_needed(self, state): + def memorize_passes_needed(self, state, eval_env): return 0 - def memorize_chunk(self, state, which_pass, env): # pragma: no cover + def memorize_chunk(self, state, which_pass, data): # pragma: no cover assert False def memorize_finish(self, state, which_pass): # pragma: no cover