Commit b41c932e authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Redo lost changes

parent 8d4ead19
...@@ -15,7 +15,7 @@ def add_leading_dimension(x): ...@@ -15,7 +15,7 @@ def add_leading_dimension(x):
def imread(filename, imread=None, preprocess=None): def imread(filename, imread=None, preprocess=None):
""" Read a stack of images into a dask array """Read a stack of images into a dask array
Parameters Parameters
---------- ----------
......
...@@ -48,7 +48,7 @@ def _wrapped_qr(a): ...@@ -48,7 +48,7 @@ def _wrapped_qr(a):
def tsqr(data, compute_svd=False, _max_vchunk_size=None): def tsqr(data, compute_svd=False, _max_vchunk_size=None):
""" Direct Tall-and-Skinny QR algorithm """Direct Tall-and-Skinny QR algorithm
As presented in: As presented in:
...@@ -506,7 +506,7 @@ def tsqr(data, compute_svd=False, _max_vchunk_size=None): ...@@ -506,7 +506,7 @@ def tsqr(data, compute_svd=False, _max_vchunk_size=None):
def sfqr(data, name=None): def sfqr(data, name=None):
""" Direct Short-and-Fat QR """Direct Short-and-Fat QR
Currently, this is a quick hack for non-tall-and-skinny matrices which Currently, this is a quick hack for non-tall-and-skinny matrices which
are one chunk tall and (unless they are one chunk wide) have chunks are one chunk tall and (unless they are one chunk wide) have chunks
...@@ -611,7 +611,7 @@ def sfqr(data, name=None): ...@@ -611,7 +611,7 @@ def sfqr(data, name=None):
def compression_level(n, q, oversampling=10, min_subspace_size=20): def compression_level(n, q, oversampling=10, min_subspace_size=20):
""" Compression level to use in svd_compressed """Compression level to use in svd_compressed
Given the size ``n`` of a space, compress that that to one of size Given the size ``n`` of a space, compress that that to one of size
``q`` plus oversampling. ``q`` plus oversampling.
...@@ -630,7 +630,7 @@ def compression_level(n, q, oversampling=10, min_subspace_size=20): ...@@ -630,7 +630,7 @@ def compression_level(n, q, oversampling=10, min_subspace_size=20):
def compression_matrix(data, q, n_power_iter=0, seed=None, compute=False): def compression_matrix(data, q, n_power_iter=0, seed=None, compute=False):
""" Randomly sample matrix to find most active subspace """Randomly sample matrix to find most active subspace
This compression matrix returned by this algorithm can be used to This compression matrix returned by this algorithm can be used to
compute both the QR decomposition and the Singular Value compute both the QR decomposition and the Singular Value
...@@ -685,7 +685,7 @@ def compression_matrix(data, q, n_power_iter=0, seed=None, compute=False): ...@@ -685,7 +685,7 @@ def compression_matrix(data, q, n_power_iter=0, seed=None, compute=False):
def svd_compressed(a, k, n_power_iter=0, seed=None, compute=False): def svd_compressed(a, k, n_power_iter=0, seed=None, compute=False):
""" Randomly compressed rank-k thin Singular Value Decomposition. """Randomly compressed rank-k thin Singular Value Decomposition.
This computes the approximate singular value decomposition of a large This computes the approximate singular value decomposition of a large
array. This algorithm is generally faster than the normal algorithm array. This algorithm is generally faster than the normal algorithm
......
...@@ -9,6 +9,7 @@ _numpy_115 = LooseVersion(np.__version__) >= "1.15.0" ...@@ -9,6 +9,7 @@ _numpy_115 = LooseVersion(np.__version__) >= "1.15.0"
_numpy_116 = LooseVersion(np.__version__) >= "1.16.0" _numpy_116 = LooseVersion(np.__version__) >= "1.16.0"
_numpy_117 = LooseVersion(np.__version__) >= "1.17.0" _numpy_117 = LooseVersion(np.__version__) >= "1.17.0"
_numpy_118 = LooseVersion(np.__version__) >= "1.18.0" _numpy_118 = LooseVersion(np.__version__) >= "1.18.0"
_numpy_120 = LooseVersion(np.__version__) >= "1.20.0"
# Taken from scikit-learn: # Taken from scikit-learn:
...@@ -18,7 +19,7 @@ try: ...@@ -18,7 +19,7 @@ try:
if ( if (
not np.allclose( not np.allclose(
np.divide(0.4, 1, casting="unsafe"), np.divide(0.4, 1, casting="unsafe"),
np.divide(0.4, 1, casting="unsafe", dtype=np.float), np.divide(0.4, 1, casting="unsafe", dtype=float),
) )
or not np.allclose(np.divide(1, 0.5, dtype="i8"), 2) or not np.allclose(np.divide(1, 0.5, dtype="i8"), 2)
or not np.allclose(np.divide(0.4, 1), 0.4) or not np.allclose(np.divide(0.4, 1), 0.4)
......
...@@ -29,7 +29,7 @@ def optimize( ...@@ -29,7 +29,7 @@ def optimize(
rename_fused_keys=True, rename_fused_keys=True,
**kwargs **kwargs
): ):
""" Optimize dask for array computation """Optimize dask for array computation
1. Cull tasks not necessary to evaluate keys 1. Cull tasks not necessary to evaluate keys
2. Remove full slicing, e.g. x[:] 2. Remove full slicing, e.g. x[:]
...@@ -71,7 +71,7 @@ def optimize( ...@@ -71,7 +71,7 @@ def optimize(
def hold_keys(dsk, dependencies): def hold_keys(dsk, dependencies):
""" Find keys to avoid fusion """Find keys to avoid fusion
We don't want to fuse data present in the graph because it is easier to We don't want to fuse data present in the graph because it is easier to
serialize as a raw value. serialize as a raw value.
...@@ -109,7 +109,7 @@ def hold_keys(dsk, dependencies): ...@@ -109,7 +109,7 @@ def hold_keys(dsk, dependencies):
def optimize_slices(dsk): def optimize_slices(dsk):
""" Optimize slices """Optimize slices
1. Fuse repeated slices, like x[5:][2:6] -> x[7:11] 1. Fuse repeated slices, like x[5:][2:6] -> x[7:11]
2. Remove full slices, like x[:] -> x 2. Remove full slices, like x[:] -> x
...@@ -196,7 +196,7 @@ def optimize_slices(dsk): ...@@ -196,7 +196,7 @@ def optimize_slices(dsk):
def normalize_slice(s): def normalize_slice(s):
""" Replace Nones in slices with integers """Replace Nones in slices with integers
>>> normalize_slice(slice(None, None, None)) >>> normalize_slice(slice(None, None, None))
slice(0, None, 1) slice(0, None, 1)
...@@ -228,7 +228,7 @@ def check_for_nonfusible_fancy_indexing(fancy, normal): ...@@ -228,7 +228,7 @@ def check_for_nonfusible_fancy_indexing(fancy, normal):
def fuse_slice(a, b): def fuse_slice(a, b):
""" Fuse stacked slices together """Fuse stacked slices together
Fuse a pair of repeated slices into a single slice: Fuse a pair of repeated slices into a single slice:
......
...@@ -61,7 +61,7 @@ def fractional_slice(task, axes): ...@@ -61,7 +61,7 @@ def fractional_slice(task, axes):
def expand_key(k, dims, name=None, axes=None): def expand_key(k, dims, name=None, axes=None):
""" Get all neighboring keys around center """Get all neighboring keys around center
Parameters Parameters
---------- ----------
...@@ -116,7 +116,7 @@ def expand_key(k, dims, name=None, axes=None): ...@@ -116,7 +116,7 @@ def expand_key(k, dims, name=None, axes=None):
def overlap_internal(x, axes): def overlap_internal(x, axes):
""" Share boundaries between neighboring blocks """Share boundaries between neighboring blocks
Parameters Parameters
---------- ----------
...@@ -197,7 +197,7 @@ def trim_overlap(x, depth, boundary=None): ...@@ -197,7 +197,7 @@ def trim_overlap(x, depth, boundary=None):
def trim_internal(x, axes, boundary=None): def trim_internal(x, axes, boundary=None):
""" Trim sides from each block """Trim sides from each block
This couples well with the overlap operation, which may leave excess data on This couples well with the overlap operation, which may leave excess data on
each block each block
...@@ -234,7 +234,11 @@ def trim_internal(x, axes, boundary=None): ...@@ -234,7 +234,11 @@ def trim_internal(x, axes, boundary=None):
chunks = tuple(olist) chunks = tuple(olist)
return map_blocks( return map_blocks(
partial(_trim, axes=axes, boundary=boundary), x, chunks=chunks, dtype=x.dtype partial(_trim, axes=axes, boundary=boundary),
x,
chunks=chunks,
dtype=x.dtype,
meta=x._meta,
) )
...@@ -275,7 +279,7 @@ def _trim(x, axes, boundary, block_info): ...@@ -275,7 +279,7 @@ def _trim(x, axes, boundary, block_info):
def periodic(x, axis, depth): def periodic(x, axis, depth):
""" Copy a slice of an array around to its other side """Copy a slice of an array around to its other side
Useful to create periodic boundary conditions for overlap Useful to create periodic boundary conditions for overlap
""" """
...@@ -299,7 +303,7 @@ def periodic(x, axis, depth): ...@@ -299,7 +303,7 @@ def periodic(x, axis, depth):
def reflect(x, axis, depth): def reflect(x, axis, depth):
""" Reflect boundaries of array on the same side """Reflect boundaries of array on the same side
This is the converse of ``periodic`` This is the converse of ``periodic``
""" """
...@@ -329,7 +333,7 @@ def reflect(x, axis, depth): ...@@ -329,7 +333,7 @@ def reflect(x, axis, depth):
def nearest(x, axis, depth): def nearest(x, axis, depth):
""" Each reflect each boundary value outwards """Each reflect each boundary value outwards
This mimics what the skimage.filters.gaussian_filter(... mode="nearest") This mimics what the skimage.filters.gaussian_filter(... mode="nearest")
does. does.
...@@ -386,7 +390,7 @@ def _remove_overlap_boundaries(l, r, axis, depth): ...@@ -386,7 +390,7 @@ def _remove_overlap_boundaries(l, r, axis, depth):
def boundaries(x, depth=None, kind=None): def boundaries(x, depth=None, kind=None):
""" Add boundary conditions to an array before overlaping """Add boundary conditions to an array before overlaping
See Also See Also
-------- --------
...@@ -419,7 +423,7 @@ def boundaries(x, depth=None, kind=None): ...@@ -419,7 +423,7 @@ def boundaries(x, depth=None, kind=None):
def overlap(x, depth, boundary): def overlap(x, depth, boundary):
""" Share boundaries between neighboring blocks """Share boundaries between neighboring blocks
Parameters Parameters
---------- ----------
...@@ -537,7 +541,7 @@ def add_dummy_padding(x, depth, boundary): ...@@ -537,7 +541,7 @@ def add_dummy_padding(x, depth, boundary):
def map_overlap( def map_overlap(
func, *args, depth=None, boundary=None, trim=True, align_arrays=True, **kwargs func, *args, depth=None, boundary=None, trim=True, align_arrays=True, **kwargs
): ):
""" Map a function over blocks of arrays with some overlap """Map a function over blocks of arrays with some overlap
We share neighboring zones between blocks of the array, map a We share neighboring zones between blocks of the array, map a
function, and then trim away the neighboring strips. function, and then trim away the neighboring strips.
......
...@@ -52,7 +52,7 @@ def _percentiles_from_tdigest(qs, digests): ...@@ -52,7 +52,7 @@ def _percentiles_from_tdigest(qs, digests):
def percentile(a, q, interpolation="linear", method="default"): def percentile(a, q, interpolation="linear", method="default"):
""" Approximate percentile of 1-D array """Approximate percentile of 1-D array
Parameters Parameters
---------- ----------
...@@ -150,7 +150,7 @@ def percentile(a, q, interpolation="linear", method="default"): ...@@ -150,7 +150,7 @@ def percentile(a, q, interpolation="linear", method="default"):
def merge_percentiles(finalq, qs, vals, interpolation="lower", Ns=None): def merge_percentiles(finalq, qs, vals, interpolation="lower", Ns=None):
""" Combine several percentile calculations of different data. """Combine several percentile calculations of different data.
Parameters Parameters
---------- ----------
......
...@@ -78,7 +78,7 @@ class RandomState(object): ...@@ -78,7 +78,7 @@ class RandomState(object):
def _wrap( def _wrap(
self, funcname, *args, size=None, chunks="auto", extra_chunks=(), **kwargs self, funcname, *args, size=None, chunks="auto", extra_chunks=(), **kwargs
): ):
""" Wrap numpy random function to produce dask.array random function """Wrap numpy random function to produce dask.array random function
extra_chunks should be a chunks tuple to append to the end of chunks extra_chunks should be a chunks tuple to append to the end of chunks
""" """
......
...@@ -26,7 +26,7 @@ from .. import config ...@@ -26,7 +26,7 @@ from .. import config
def cumdims_label(chunks, const): def cumdims_label(chunks, const):
""" Internal utility for cumulative sum with label. """Internal utility for cumulative sum with label.
>>> cumdims_label(((5, 3, 3), (2, 2, 1)), 'n') # doctest: +NORMALIZE_WHITESPACE >>> cumdims_label(((5, 3, 3), (2, 2, 1)), 'n') # doctest: +NORMALIZE_WHITESPACE
[(('n', 0), ('n', 5), ('n', 8), ('n', 11)), [(('n', 0), ('n', 5), ('n', 8), ('n', 11)),
...@@ -112,7 +112,7 @@ def _intersect_1d(breaks): ...@@ -112,7 +112,7 @@ def _intersect_1d(breaks):
def _old_to_new(old_chunks, new_chunks): def _old_to_new(old_chunks, new_chunks):
""" Helper to build old_chunks to new_chunks. """Helper to build old_chunks to new_chunks.
Handles missing values, as long as the missing dimension Handles missing values, as long as the missing dimension
is unchanged. is unchanged.
...@@ -263,8 +263,7 @@ def _largest_block_size(chunks): ...@@ -263,8 +263,7 @@ def _largest_block_size(chunks):
def estimate_graph_size(old_chunks, new_chunks): def estimate_graph_size(old_chunks, new_chunks):
""" Estimate the graph size during a rechunk computation. """Estimate the graph size during a rechunk computation."""
"""
# Estimate the number of intermediate blocks that will be produced # Estimate the number of intermediate blocks that will be produced
# (we don't use intersect_chunks() which is much more expensive) # (we don't use intersect_chunks() which is much more expensive)
crossed_size = reduce( crossed_size = reduce(
...@@ -278,7 +277,7 @@ def estimate_graph_size(old_chunks, new_chunks): ...@@ -278,7 +277,7 @@ def estimate_graph_size(old_chunks, new_chunks):
def divide_to_width(desired_chunks, max_width): def divide_to_width(desired_chunks, max_width):
""" Minimally divide the given chunks so as to make the largest chunk """Minimally divide the given chunks so as to make the largest chunk
width less or equal than *max_width*. width less or equal than *max_width*.
""" """
chunks = [] chunks = []
...@@ -293,7 +292,7 @@ def divide_to_width(desired_chunks, max_width): ...@@ -293,7 +292,7 @@ def divide_to_width(desired_chunks, max_width):
def merge_to_number(desired_chunks, max_number): def merge_to_number(desired_chunks, max_number):
""" Minimally merge the given chunks so as to drop the number of """Minimally merge the given chunks so as to drop the number of
chunks below *max_number*, while minimizing the largest width. chunks below *max_number*, while minimizing the largest width.
""" """
if len(desired_chunks) <= max_number: if len(desired_chunks) <= max_number:
...@@ -450,7 +449,7 @@ def find_split_rechunk(old_chunks, new_chunks, graph_size_limit): ...@@ -450,7 +449,7 @@ def find_split_rechunk(old_chunks, new_chunks, graph_size_limit):
def plan_rechunk( def plan_rechunk(
old_chunks, new_chunks, itemsize, threshold=None, block_size_limit=None old_chunks, new_chunks, itemsize, threshold=None, block_size_limit=None
): ):
""" Plan an iterative rechunking from *old_chunks* to *new_chunks*. """Plan an iterative rechunking from *old_chunks* to *new_chunks*.
The plan aims to minimize the rechunk graph size. The plan aims to minimize the rechunk graph size.
Parameters Parameters
...@@ -530,8 +529,7 @@ def plan_rechunk( ...@@ -530,8 +529,7 @@ def plan_rechunk(
def _compute_rechunk(x, chunks): def _compute_rechunk(x, chunks):
""" Compute the rechunk of *x* to the given *chunks*. """Compute the rechunk of *x* to the given *chunks*."""
"""
if x.size == 0: if x.size == 0:
# Special case for empty array, as the algorithm below does not behave correctly # Special case for empty array, as the algorithm below does not behave correctly
return empty(x.shape, chunks=chunks, dtype=x.dtype) return empty(x.shape, chunks=chunks, dtype=x.dtype)
......
...@@ -53,7 +53,7 @@ def reduction( ...@@ -53,7 +53,7 @@ def reduction(
output_size=1, output_size=1,
meta=None, meta=None,
): ):
""" General version of reductions """General version of reductions
Parameters Parameters
---------- ----------
...@@ -206,7 +206,7 @@ def _tree_reduce( ...@@ -206,7 +206,7 @@ def _tree_reduce(
concatenate=True, concatenate=True,
reduced_meta=None, reduced_meta=None,
): ):
""" Perform the tree reduction step of a reduction. """Perform the tree reduction step of a reduction.
Lower level, users should use ``reduction`` or ``arg_reduction`` directly. Lower level, users should use ``reduction`` or ``arg_reduction`` directly.
""" """
...@@ -255,7 +255,7 @@ def _tree_reduce( ...@@ -255,7 +255,7 @@ def _tree_reduce(
def partial_reduce( def partial_reduce(
func, x, split_every, keepdims=False, dtype=None, name=None, reduced_meta=None func, x, split_every, keepdims=False, dtype=None, name=None, reduced_meta=None
): ):
""" Partial reduction across multiple axes. """Partial reduction across multiple axes.
Parameters Parameters
---------- ----------
...@@ -945,7 +945,7 @@ def nanarg_agg(func, argfunc, data, axis=None, **kwargs): ...@@ -945,7 +945,7 @@ def nanarg_agg(func, argfunc, data, axis=None, **kwargs):
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None): def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None):
""" Generic function for argreduction. """Generic function for argreduction.
Parameters Parameters
---------- ----------
...@@ -1004,7 +1004,7 @@ def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None) ...@@ -1004,7 +1004,7 @@ def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None)
def make_arg_reduction(func, argfunc, is_nan_func=False): def make_arg_reduction(func, argfunc, is_nan_func=False):
""" Create an argreduction callable """Create an argreduction callable
Parameters Parameters
---------- ----------
...@@ -1051,7 +1051,7 @@ nanargmax = make_arg_reduction(chunk.nanmax, _nanargmax, True) ...@@ -1051,7 +1051,7 @@ nanargmax = make_arg_reduction(chunk.nanmax, _nanargmax, True)
def cumreduction(func, binop, ident, x, axis=None, dtype=None, out=None): def cumreduction(func, binop, ident, x, axis=None, dtype=None, out=None):
""" Generic function for cumulative reduction """Generic function for cumulative reduction
Parameters Parameters
---------- ----------
...@@ -1144,7 +1144,7 @@ def cumprod(x, axis=None, dtype=None, out=None): ...@@ -1144,7 +1144,7 @@ def cumprod(x, axis=None, dtype=None, out=None):
def topk(a, k, axis=-1, split_every=None): def topk(a, k, axis=-1, split_every=None):
""" Extract the k largest elements from a on the given axis, """Extract the k largest elements from a on the given axis,
and return them sorted from largest to smallest. and return them sorted from largest to smallest.
If k is negative, extract the -k smallest elements instead, If k is negative, extract the -k smallest elements instead,
and return them sorted from smallest to largest. and return them sorted from smallest to largest.
...@@ -1203,7 +1203,7 @@ def topk(a, k, axis=-1, split_every=None): ...@@ -1203,7 +1203,7 @@ def topk(a, k, axis=-1, split_every=None):
def argtopk(a, k, axis=-1, split_every=None): def argtopk(a, k, axis=-1, split_every=None):
""" Extract the indices of the k largest elements from a on the given axis, """Extract the indices of the k largest elements from a on the given axis,
and return them sorted from largest to smallest. If k is negative, extract and return them sorted from largest to smallest. If k is negative, extract
the indices of the -k smallest elements instead, and return them sorted the indices of the -k smallest elements instead, and return them sorted
from smallest to largest. from smallest to largest.
......
...@@ -111,7 +111,7 @@ def expand_tuple(chunks, factor): ...@@ -111,7 +111,7 @@ def expand_tuple(chunks, factor):
def contract_tuple(chunks, factor): def contract_tuple(chunks, factor):
""" Return simple chunks tuple such that factor divides all elements """Return simple chunks tuple such that factor divides all elements
Examples Examples
-------- --------
...@@ -134,7 +134,7 @@ def contract_tuple(chunks, factor): ...@@ -134,7 +134,7 @@ def contract_tuple(chunks, factor):
def reshape(x, shape): def reshape(x, shape):
""" Reshape array to new shape """Reshape array to new shape
This is a parallelized version of the ``np.reshape`` function with the This is a parallelized version of the ``np.reshape`` function with the
following limitations: following limitations:
......
...@@ -659,20 +659,55 @@ def histogram(a, bins=None, range=None, normed=False, weights=None, density=None ...@@ -659,20 +659,55 @@ def histogram(a, bins=None, range=None, normed=False, weights=None, density=None
""" """
Blocked variant of :func:`numpy.histogram`. Blocked variant of :func:`numpy.histogram`.
Follows the signature of :func:`numpy.histogram` exactly with the following Parameters
exceptions: ----------
a : array_like
- Either an iterable specifying the ``bins`` or the number of ``bins`` Input data. The histogram is computed over the flattened array.
bins : int or sequence of scalars, optional
Either an iterable specifying the ``bins`` or the number of ``bins``