Skip to content
This repository was archived by the owner on Aug 25, 2024. It is now read-only.
Merged
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Source which modifies record features as they are read from another source.
Useful for modifying datasets as they are used with ML commands or editing
in bulk.
- Auto create Definition for the `op` when they might have a spec, subspec.
### Changed
- `Edit on Github` button now hidden for plugins.
- Doctests now run via unittests
Expand Down
12 changes: 1 addition & 11 deletions dffml/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,7 @@
from argparse import ArgumentParser
from typing import Dict, Any, Type, Optional

try:
from typing import get_origin, get_args
except ImportError:
# Added in Python 3.8
def get_origin(t):
return getattr(t, "__origin__", None)

def get_args(t):
return getattr(t, "__args__", None)


from .util.data import get_args, get_origin
from .util.cli.arg import Arg
from .util.data import (
traverse_config_set,
Expand Down
83 changes: 83 additions & 0 deletions dffml/df/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
Optional,
Set,
)
from dataclasses import is_dataclass
from contextlib import asynccontextmanager

from .exceptions import NotOpImp
Expand All @@ -25,6 +26,7 @@
BaseDataFlowFacilitatorObject,
)
from ..util.cli.arg import Arg
from ..util.data import get_origin, get_args
from ..util.asynchelper import context_stacker
from ..util.entrypoint import base_entry_point

Expand Down Expand Up @@ -108,6 +110,12 @@ class FailedToLoadOperationImplementation(Exception):
"""


class OpCouldNotDeterminePrimitive(Exception):
"""
op could not determine the primitive of the parameter
"""


@base_entry_point("dffml.operation", "opimp")
class OperationImplementation(BaseDataFlowObject):
def __init__(self, config: "BaseConfig") -> None:
Expand Down Expand Up @@ -184,6 +192,49 @@ def op(*args, imp_enter=None, ctx_enter=None, config_cls=None, **kwargs):
iterated over and the values in that ``dict`` are entered. The value yielded
upon entry is assigned to a parameter in the ``OperationImplementation``
instance named after the respective key.

Examples
--------

>>> from dffml import Definition, Input, op
>>> from typing import NamedTuple, List, Dict
>>>
>>> class Person(NamedTuple):
... name: str
... age: int
...
>>> @op
... def cannotVote(p: List[Person]):
... return list(filter(lambda person: person.age < 18, p))
...
>>>
>>> Input(
... value=[
... {"name": "Bob", "age": 20},
... {"name": "Mark", "age": 21},
... {"name": "Alice", "age": 90},
... ],
... definition=cannotVote.op.inputs["p"],
... )
Input(value=[Person(name='Bob', age=20), Person(name='Mark', age=21), Person(name='Alice', age=90)], definition=cannotVote.p)
>>>
>>> @op
... def canVote(p: Dict[str, Person]):
... return {
... person.name: person
... for person in filter(lambda person: person.age >= 18, p.values())
... }
...
>>>
>>> Input(
... value={
... "Bob": {"name": "Bob", "age": 19},
... "Alice": {"name": "Alice", "age": 21},
... "Mark": {"name": "Mark", "age": 90},
... },
... definition=canVote.op.inputs["p"],
... )
Input(value={'Bob': Person(name='Bob', age=19), 'Alice': Person(name='Alice', age=21), 'Mark': Person(name='Mark', age=90)}, definition=canVote.p)
"""

def wrap(func):
Expand Down Expand Up @@ -215,6 +266,38 @@ def wrap(func):
param.annotation, param.annotation.__name__
),
)
elif (
get_origin(param.annotation) is list
or get_origin(param.annotation) is dict
):
# If the annotation are of the form List[MyDataClass] or Dict[Any, MyDataClass]
if get_origin(param.annotation) is list:
primitive = "array"
innerclass = list(get_args(param.annotation))[0]
else:
primitive = "map"
innerclass = list(get_args(param.annotation))[1]

if is_dataclass(innerclass) or bool(
issubclass(innerclass, tuple)
and hasattr(innerclass, "_asdict")
):
kwargs["inputs"][name] = Definition(
name=".".join(name_list),
primitive=primitive,
spec=innerclass,
subspec=True,
)
elif is_dataclass(param.annotation) or bool(
issubclass(param.annotation, tuple)
and hasattr(param.annotation, "_asdict")
):
# If the annotation is either a dataclass or namedtuple
kwargs["inputs"][name] = Definition(
name=".".join(name_list),
primitive="map",
spec=param.annotation,
)
else:
raise OpCouldNotDeterminePrimitive(
f"The primitive of {name} could not be determined"
Expand Down
14 changes: 12 additions & 2 deletions dffml/df/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ class DefinitionMissing(Exception):
"""


class PrimitiveDoesNotMatchValue(Exception):
"""
Primitive does not match the value type
"""


class Definition(NamedTuple):
"""
Examples
Expand Down Expand Up @@ -308,12 +314,16 @@ def __init__(
parents = []
if definition.spec is not None:
if definition.subspec:
if isinstance(value, list):
if isinstance(value, list) and definition.primitive == "array":
for i, subvalue in enumerate(value):
value[i] = definition.spec(**subvalue)
elif isinstance(value, dict):
elif isinstance(value, dict) and definition.primitive == "map":
for key, subvalue in value.items():
value[key] = definition.spec(**subvalue)
else:
raise PrimitiveDoesNotMatchValue(
f"{type(value)} is not the right type for primitive {definition.primitive}"
)
elif isinstance(value, dict):
value = definition.spec(**value)
if definition.validate is not None:
Expand Down
10 changes: 10 additions & 0 deletions dffml/util/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@
import pathlib
from typing import Callable

try:
from typing import get_origin, get_args
except ImportError:
# Added in Python 3.8
def get_origin(t):
return getattr(t, "__origin__", None)

def get_args(t):
return getattr(t, "__args__", None)


def merge(one, two, list_append: bool = True):
for key, value in two.items():
Expand Down