Commit b41c932e authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Redo lost changes

parent 8d4ead19
......@@ -105,7 +105,7 @@ def label(x, cache=None):
def box_label(key, verbose=False):
""" Label boxes in graph by chunk index
"""Label boxes in graph by chunk index
>>> box_label(('x', 1, 2, 3))
'(1, 2, 3)'
......
......@@ -8,7 +8,7 @@ from .core import reverse_dict
class HighLevelGraph(Mapping):
""" Task graph composed of layers of dependent subgraphs
"""Task graph composed of layers of dependent subgraphs
This object encodes a Dask task graph that is composed of layers of
dependent subgraphs, such as commonly occurs when building task graphs
......@@ -109,7 +109,7 @@ class HighLevelGraph(Mapping):
@classmethod
def from_collections(cls, name, layer, dependencies=()):
""" Construct a HighLevelGraph from a new layer and a set of collections
"""Construct a HighLevelGraph from a new layer and a set of collections
This constructs a HighLevelGraph in the common case where we have a single
new layer and a set of old collections on which we want to depend.
......@@ -217,6 +217,17 @@ class HighLevelGraph(Mapping):
g = to_graphviz(self, **kwargs)
return graphviz_to_file(g, filename, format)
def validate(self):
# Check dependencies
for layer_name, deps in self.dependencies.items():
if layer_name not in self.layers:
raise ValueError(
f"dependencies[{repr(layer_name)}] not found in layers"
)
for dep in deps:
if dep not in self.dependencies:
raise ValueError(f"{repr(dep)} not found in dependencies")
def to_graphviz(
hg,
......@@ -226,7 +237,7 @@ def to_graphviz(
graph_attr={},
node_attr=None,
edge_attr=None,
**kwargs
**kwargs,
):
from .dot import graphviz, name, label
......
......@@ -137,7 +137,7 @@ DEBUG = False
def start_state_from_dask(dsk, cache=None, sortkey=None):
""" Start state from a dask
"""Start state from a dask
Examples
--------
......@@ -230,7 +230,7 @@ def execute_task(key, task_info, dumps, loads, get_id, pack_exception):
def release_data(key, state, delete=True):
""" Remove data from temporary storage
"""Remove data from temporary storage
See Also
finish_task
......@@ -283,7 +283,7 @@ def finish_task(
def nested_get(ind, coll):
""" Get nested index from collection
"""Get nested index from collection
Examples
--------
......@@ -317,7 +317,7 @@ def reraise(exc, tb=None):
def identity(x):
""" Identity function. Returns x.
"""Identity function. Returns x.
>>> identity(3)
3
......@@ -359,7 +359,7 @@ def get_async(
loads=identity,
**kwargs
):
""" Asynchronous get function
"""Asynchronous get function
This is a general version of various asynchronous schedulers for dask. It
takes a an apply_async function as found on Pool objects to form a more
......@@ -528,7 +528,7 @@ def get_sync(dsk, keys, **kwargs):
def sortkey(item):
""" Sorting key function that is robust to different types
"""Sorting key function that is robust to different types
Both strings and tuples are common key types in dask graphs.
However In Python 3 one can not compare strings with tuples directly.
......
......@@ -56,7 +56,7 @@ def _process_get_id():
# To enable testing of the ``RemoteException`` class even when tblib is
# installed, we don't wrap the class in the try block below
class RemoteException(Exception):
""" Remote Exception
"""Remote Exception
Contains the exception and traceback from a remotely run task
"""
......@@ -160,7 +160,7 @@ def get(
pool=None,
**kwargs
):
""" Multiprocessed get function appropriate for Bags
"""Multiprocessed get function appropriate for Bags
Parameters
----------
......
import math
import numbers
import re
from enum import Enum
from . import config, core
from . import config, core, utils
from .core import (
istask,
get_dependencies,
......@@ -17,7 +16,7 @@ from .utils_test import add, inc # noqa: F401
def cull(dsk, keys):
""" Return new dask with only the tasks required to calculate keys.
"""Return new dask with only the tasks required to calculate keys.
In other words, remove unnecessary tasks from dask.
``keys`` may be a single key or list of keys.
......@@ -65,11 +64,11 @@ def default_fused_linear_keys_renamer(keys):
"""Create new keys for fused tasks"""
typ = type(keys[0])
if typ is str:
names = [key_split(x) for x in keys[:0:-1]]
names = [utils.key_split(x) for x in keys[:0:-1]]
names.append(keys[0])
return "-".join(names)
elif typ is tuple and len(keys[0]) > 0 and isinstance(keys[0][0], str):
names = [key_split(x) for x in keys[:0:-1]]
names = [utils.key_split(x) for x in keys[:0:-1]]
names.append(keys[0][0])
return ("-".join(names),) + keys[0][1:]
else:
......@@ -77,7 +76,7 @@ def default_fused_linear_keys_renamer(keys):
def fuse_linear(dsk, keys=None, dependencies=None, rename_keys=True):
""" Return new dask graph with linear sequence of tasks fused together.
"""Return new dask graph with linear sequence of tasks fused together.
If specified, the keys in ``keys`` keyword argument are *not* fused.
Supply ``dependencies`` from output of ``cull`` if available to avoid
......@@ -226,7 +225,7 @@ def _flat_set(x):
def inline(dsk, keys=None, inline_constants=True, dependencies=None):
""" Return new dask with the given keys inlined with their values.
"""Return new dask with the given keys inlined with their values.
Inlines all constants if ``inline_constants`` keyword is True. Note that
the constant keys will remain in the graph, to remove them follow
......@@ -288,7 +287,7 @@ def inline(dsk, keys=None, inline_constants=True, dependencies=None):
def inline_functions(
dsk, output, fast_functions=None, inline_constants=False, dependencies=None
):
""" Inline cheap functions into larger operations
"""Inline cheap functions into larger operations
Examples
--------
......@@ -348,7 +347,7 @@ def unwrap_partial(func):
def functions_of(task):
""" Set of functions contained within nested task
"""Set of functions contained within nested task
Examples
--------
......@@ -395,16 +394,16 @@ def default_fused_keys_renamer(keys, max_fused_key_length=120):
return key_name
if typ is str:
first_name = key_split(first_key)
names = {key_split(k) for k in it}
first_name = utils.key_split(first_key)
names = {utils.key_split(k) for k in it}
names.discard(first_name)
names = sorted(names)
names.append(first_key)
concatenated_name = "-".join(names)
return _enforce_max_key_limit(concatenated_name)
elif typ is tuple and len(first_key) > 0 and isinstance(first_key[0], str):
first_name = key_split(first_key)
names = {key_split(k) for k in it}
first_name = utils.key_split(first_key)
names = {utils.key_split(k) for k in it}
names.discard(first_name)
names = sorted(names)
names.append(first_key[0])
......@@ -435,7 +434,7 @@ def fuse(
rename_keys=_default,
fuse_subgraphs=_default,
):
""" Fuse tasks that form reductions; more advanced than ``fuse_linear``
"""Fuse tasks that form reductions; more advanced than ``fuse_linear``
This trades parallelism opportunities for faster scheduling by making tasks
less granular. It can replace ``fuse_linear`` in optimization passes.
......@@ -919,65 +918,6 @@ def _inplace_fuse_subgraphs(dsk, keys, dependencies, fused_trees, rename_keys):
fused_trees[outkey] = chain2
# Defining `key_split` (used by key renamers in `fuse`) in utils.py
# results in messy circular imports, so define it here instead.
hex_pattern = re.compile("[a-f]+")
def key_split(s):
"""
>>> key_split('x')
'x'
>>> key_split('x-1')
'x'
>>> key_split('x-1-2-3')
'x'
>>> key_split(('x-2', 1))
'x'
>>> key_split("('x-2', 1)")
'x'
>>> key_split('hello-world-1')
'hello-world'
>>> key_split(b'hello-world-1')
'hello-world'
>>> key_split('ae05086432ca935f6eba409a8ecd4896')
'data'
>>> key_split('<module.submodule.myclass object at 0xdaf372')
'myclass'
>>> key_split(None)
'Other'
>>> key_split('x-abcdefab') # ignores hex
'x'
>>> key_split('_(x)') # strips unpleasant characters
'x'
"""
if type(s) is bytes:
s = s.decode()
if type(s) is tuple:
s = s[0]
try:
words = s.split("-")
if not words[0][0].isalpha():
result = words[0].strip("_'()\"")
else:
result = words[0]
for word in words[1:]:
if word.isalpha() and not (
len(word) == 8 and hex_pattern.match(word) is not None
):
result += "-" + word
else:
break
if len(result) == 32 and re.match(r"[a-f0-9]{32}", result):
return "data"
else:
if result[0] == "<":
result = result.strip("<>").split()[0].split(".")[-1]
return result
except Exception:
return "Other"
class SubgraphCallable(object):
"""Create a callable object from a dask graph.
......
......@@ -82,7 +82,7 @@ from .utils_test import add, inc # noqa: F401
def order(dsk, dependencies=None):
""" Order nodes in dask graph
"""Order nodes in dask graph
This produces an ordering over our tasks that we use to break ties when
executing. We do this ahead of time to reduce a bit of stress on the
......@@ -154,7 +154,7 @@ def order(dsk, dependencies=None):
initial_stack_key = init_stack.__getitem__
def dependents_key(x):
""" Choose a path from our starting task to our tactical goal
"""Choose a path from our starting task to our tactical goal
This path is connected to a large goal, but focuses on completing
a small goal and being memory efficient.
......@@ -168,7 +168,7 @@ def order(dsk, dependencies=None):
)
def dependencies_key(x):
""" Choose which dependency to run as part of a reverse DFS
"""Choose which dependency to run as part of a reverse DFS
This is very similar to both ``initial_stack_key``.
"""
......@@ -493,7 +493,7 @@ def order(dsk, dependencies=None):
def graph_metrics(dependencies, dependents, total_dependencies):
r""" Useful measures of a graph used by ``dask.order.order``
r"""Useful measures of a graph used by ``dask.order.order``
Example DAG (a1 has no dependencies; b2 and c1 are root nodes):
......@@ -620,7 +620,7 @@ def graph_metrics(dependencies, dependents, total_dependencies):
def ndependencies(dependencies, dependents):
""" Number of total data elements on which this key depends
"""Number of total data elements on which this key depends
For each key we return the number of tasks that must be run for us to run
this task.
......@@ -666,7 +666,7 @@ def ndependencies(dependencies, dependents):
class StrComparable(object):
""" Wrap object so that it defaults to string comparison
"""Wrap object so that it defaults to string comparison
When comparing two objects of different types Python fails
......
import random
import sys
from array import array
from distutils.version import LooseVersion
from .utils import Dispatch
......@@ -21,6 +22,22 @@ def sizeof_default(o):
return getsizeof(o)
@sizeof.register(bytes)
@sizeof.register(bytearray)
def sizeof_bytes(o):
return len(o)
@sizeof.register(memoryview)
def sizeof_memoryview(o):
return o.nbytes
@sizeof.register(array)
def sizeof_array(o):
return o.itemsize * len(o)
@sizeof.register(list)
@sizeof.register(tuple)
@sizeof.register(set)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment