Commit b41c932e authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Redo lost changes

parent 8d4ead19
......@@ -15,7 +15,7 @@ def add_leading_dimension(x):
def imread(filename, imread=None, preprocess=None):
""" Read a stack of images into a dask array
"""Read a stack of images into a dask array
Parameters
----------
......
......@@ -48,7 +48,7 @@ def _wrapped_qr(a):
def tsqr(data, compute_svd=False, _max_vchunk_size=None):
""" Direct Tall-and-Skinny QR algorithm
"""Direct Tall-and-Skinny QR algorithm
As presented in:
......@@ -506,7 +506,7 @@ def tsqr(data, compute_svd=False, _max_vchunk_size=None):
def sfqr(data, name=None):
""" Direct Short-and-Fat QR
"""Direct Short-and-Fat QR
Currently, this is a quick hack for non-tall-and-skinny matrices which
are one chunk tall and (unless they are one chunk wide) have chunks
......@@ -611,7 +611,7 @@ def sfqr(data, name=None):
def compression_level(n, q, oversampling=10, min_subspace_size=20):
""" Compression level to use in svd_compressed
"""Compression level to use in svd_compressed
Given the size ``n`` of a space, compress that that to one of size
``q`` plus oversampling.
......@@ -630,7 +630,7 @@ def compression_level(n, q, oversampling=10, min_subspace_size=20):
def compression_matrix(data, q, n_power_iter=0, seed=None, compute=False):
""" Randomly sample matrix to find most active subspace
"""Randomly sample matrix to find most active subspace
This compression matrix returned by this algorithm can be used to
compute both the QR decomposition and the Singular Value
......@@ -685,7 +685,7 @@ def compression_matrix(data, q, n_power_iter=0, seed=None, compute=False):
def svd_compressed(a, k, n_power_iter=0, seed=None, compute=False):
""" Randomly compressed rank-k thin Singular Value Decomposition.
"""Randomly compressed rank-k thin Singular Value Decomposition.
This computes the approximate singular value decomposition of a large
array. This algorithm is generally faster than the normal algorithm
......
......@@ -9,6 +9,7 @@ _numpy_115 = LooseVersion(np.__version__) >= "1.15.0"
_numpy_116 = LooseVersion(np.__version__) >= "1.16.0"
_numpy_117 = LooseVersion(np.__version__) >= "1.17.0"
_numpy_118 = LooseVersion(np.__version__) >= "1.18.0"
_numpy_120 = LooseVersion(np.__version__) >= "1.20.0"
# Taken from scikit-learn:
......@@ -18,7 +19,7 @@ try:
if (
not np.allclose(
np.divide(0.4, 1, casting="unsafe"),
np.divide(0.4, 1, casting="unsafe", dtype=np.float),
np.divide(0.4, 1, casting="unsafe", dtype=float),
)
or not np.allclose(np.divide(1, 0.5, dtype="i8"), 2)
or not np.allclose(np.divide(0.4, 1), 0.4)
......
......@@ -29,7 +29,7 @@ def optimize(
rename_fused_keys=True,
**kwargs
):
""" Optimize dask for array computation
"""Optimize dask for array computation
1. Cull tasks not necessary to evaluate keys
2. Remove full slicing, e.g. x[:]
......@@ -71,7 +71,7 @@ def optimize(
def hold_keys(dsk, dependencies):
""" Find keys to avoid fusion
"""Find keys to avoid fusion
We don't want to fuse data present in the graph because it is easier to
serialize as a raw value.
......@@ -109,7 +109,7 @@ def hold_keys(dsk, dependencies):
def optimize_slices(dsk):
""" Optimize slices
"""Optimize slices
1. Fuse repeated slices, like x[5:][2:6] -> x[7:11]
2. Remove full slices, like x[:] -> x
......@@ -196,7 +196,7 @@ def optimize_slices(dsk):
def normalize_slice(s):
""" Replace Nones in slices with integers
"""Replace Nones in slices with integers
>>> normalize_slice(slice(None, None, None))
slice(0, None, 1)
......@@ -228,7 +228,7 @@ def check_for_nonfusible_fancy_indexing(fancy, normal):
def fuse_slice(a, b):
""" Fuse stacked slices together
"""Fuse stacked slices together
Fuse a pair of repeated slices into a single slice:
......
......@@ -61,7 +61,7 @@ def fractional_slice(task, axes):
def expand_key(k, dims, name=None, axes=None):
""" Get all neighboring keys around center
"""Get all neighboring keys around center
Parameters
----------
......@@ -116,7 +116,7 @@ def expand_key(k, dims, name=None, axes=None):
def overlap_internal(x, axes):
""" Share boundaries between neighboring blocks
"""Share boundaries between neighboring blocks
Parameters
----------
......@@ -197,7 +197,7 @@ def trim_overlap(x, depth, boundary=None):
def trim_internal(x, axes, boundary=None):
""" Trim sides from each block
"""Trim sides from each block
This couples well with the overlap operation, which may leave excess data on
each block
......@@ -234,7 +234,11 @@ def trim_internal(x, axes, boundary=None):
chunks = tuple(olist)
return map_blocks(
partial(_trim, axes=axes, boundary=boundary), x, chunks=chunks, dtype=x.dtype
partial(_trim, axes=axes, boundary=boundary),
x,
chunks=chunks,
dtype=x.dtype,
meta=x._meta,
)
......@@ -275,7 +279,7 @@ def _trim(x, axes, boundary, block_info):
def periodic(x, axis, depth):
""" Copy a slice of an array around to its other side
"""Copy a slice of an array around to its other side
Useful to create periodic boundary conditions for overlap
"""
......@@ -299,7 +303,7 @@ def periodic(x, axis, depth):
def reflect(x, axis, depth):
""" Reflect boundaries of array on the same side
"""Reflect boundaries of array on the same side
This is the converse of ``periodic``
"""
......@@ -329,7 +333,7 @@ def reflect(x, axis, depth):
def nearest(x, axis, depth):
""" Each reflect each boundary value outwards
"""Each reflect each boundary value outwards
This mimics what the skimage.filters.gaussian_filter(... mode="nearest")
does.
......@@ -386,7 +390,7 @@ def _remove_overlap_boundaries(l, r, axis, depth):
def boundaries(x, depth=None, kind=None):
""" Add boundary conditions to an array before overlaping
"""Add boundary conditions to an array before overlaping
See Also
--------
......@@ -419,7 +423,7 @@ def boundaries(x, depth=None, kind=None):
def overlap(x, depth, boundary):
""" Share boundaries between neighboring blocks
"""Share boundaries between neighboring blocks
Parameters
----------
......@@ -537,7 +541,7 @@ def add_dummy_padding(x, depth, boundary):
def map_overlap(
func, *args, depth=None, boundary=None, trim=True, align_arrays=True, **kwargs
):
""" Map a function over blocks of arrays with some overlap
"""Map a function over blocks of arrays with some overlap
We share neighboring zones between blocks of the array, map a
function, and then trim away the neighboring strips.
......
......@@ -52,7 +52,7 @@ def _percentiles_from_tdigest(qs, digests):
def percentile(a, q, interpolation="linear", method="default"):
""" Approximate percentile of 1-D array
"""Approximate percentile of 1-D array
Parameters
----------
......@@ -150,7 +150,7 @@ def percentile(a, q, interpolation="linear", method="default"):
def merge_percentiles(finalq, qs, vals, interpolation="lower", Ns=None):
""" Combine several percentile calculations of different data.
"""Combine several percentile calculations of different data.
Parameters
----------
......
......@@ -78,7 +78,7 @@ class RandomState(object):
def _wrap(
self, funcname, *args, size=None, chunks="auto", extra_chunks=(), **kwargs
):
""" Wrap numpy random function to produce dask.array random function
"""Wrap numpy random function to produce dask.array random function
extra_chunks should be a chunks tuple to append to the end of chunks
"""
......
......@@ -26,7 +26,7 @@ from .. import config
def cumdims_label(chunks, const):
""" Internal utility for cumulative sum with label.
"""Internal utility for cumulative sum with label.
>>> cumdims_label(((5, 3, 3), (2, 2, 1)), 'n') # doctest: +NORMALIZE_WHITESPACE
[(('n', 0), ('n', 5), ('n', 8), ('n', 11)),
......@@ -112,7 +112,7 @@ def _intersect_1d(breaks):
def _old_to_new(old_chunks, new_chunks):
""" Helper to build old_chunks to new_chunks.
"""Helper to build old_chunks to new_chunks.
Handles missing values, as long as the missing dimension
is unchanged.
......@@ -263,8 +263,7 @@ def _largest_block_size(chunks):
def estimate_graph_size(old_chunks, new_chunks):
""" Estimate the graph size during a rechunk computation.
"""
"""Estimate the graph size during a rechunk computation."""
# Estimate the number of intermediate blocks that will be produced
# (we don't use intersect_chunks() which is much more expensive)
crossed_size = reduce(
......@@ -278,7 +277,7 @@ def estimate_graph_size(old_chunks, new_chunks):
def divide_to_width(desired_chunks, max_width):
""" Minimally divide the given chunks so as to make the largest chunk
"""Minimally divide the given chunks so as to make the largest chunk
width less or equal than *max_width*.
"""
chunks = []
......@@ -293,7 +292,7 @@ def divide_to_width(desired_chunks, max_width):
def merge_to_number(desired_chunks, max_number):
""" Minimally merge the given chunks so as to drop the number of
"""Minimally merge the given chunks so as to drop the number of
chunks below *max_number*, while minimizing the largest width.
"""
if len(desired_chunks) <= max_number:
......@@ -450,7 +449,7 @@ def find_split_rechunk(old_chunks, new_chunks, graph_size_limit):
def plan_rechunk(
old_chunks, new_chunks, itemsize, threshold=None, block_size_limit=None
):
""" Plan an iterative rechunking from *old_chunks* to *new_chunks*.
"""Plan an iterative rechunking from *old_chunks* to *new_chunks*.
The plan aims to minimize the rechunk graph size.
Parameters
......@@ -530,8 +529,7 @@ def plan_rechunk(
def _compute_rechunk(x, chunks):
""" Compute the rechunk of *x* to the given *chunks*.
"""
"""Compute the rechunk of *x* to the given *chunks*."""
if x.size == 0:
# Special case for empty array, as the algorithm below does not behave correctly
return empty(x.shape, chunks=chunks, dtype=x.dtype)
......
......@@ -53,7 +53,7 @@ def reduction(
output_size=1,
meta=None,
):
""" General version of reductions
"""General version of reductions
Parameters
----------
......@@ -206,7 +206,7 @@ def _tree_reduce(
concatenate=True,
reduced_meta=None,
):
""" Perform the tree reduction step of a reduction.
"""Perform the tree reduction step of a reduction.
Lower level, users should use ``reduction`` or ``arg_reduction`` directly.
"""
......@@ -255,7 +255,7 @@ def _tree_reduce(
def partial_reduce(
func, x, split_every, keepdims=False, dtype=None, name=None, reduced_meta=None
):
""" Partial reduction across multiple axes.
"""Partial reduction across multiple axes.
Parameters
----------
......@@ -945,7 +945,7 @@ def nanarg_agg(func, argfunc, data, axis=None, **kwargs):
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None):
""" Generic function for argreduction.
"""Generic function for argreduction.
Parameters
----------
......@@ -1004,7 +1004,7 @@ def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None)
def make_arg_reduction(func, argfunc, is_nan_func=False):
""" Create an argreduction callable
"""Create an argreduction callable
Parameters
----------
......@@ -1051,7 +1051,7 @@ nanargmax = make_arg_reduction(chunk.nanmax, _nanargmax, True)
def cumreduction(func, binop, ident, x, axis=None, dtype=None, out=None):
""" Generic function for cumulative reduction
"""Generic function for cumulative reduction
Parameters
----------
......@@ -1144,7 +1144,7 @@ def cumprod(x, axis=None, dtype=None, out=None):
def topk(a, k, axis=-1, split_every=None):
""" Extract the k largest elements from a on the given axis,
"""Extract the k largest elements from a on the given axis,
and return them sorted from largest to smallest.
If k is negative, extract the -k smallest elements instead,
and return them sorted from smallest to largest.
......@@ -1203,7 +1203,7 @@ def topk(a, k, axis=-1, split_every=None):
def argtopk(a, k, axis=-1, split_every=None):
""" Extract the indices of the k largest elements from a on the given axis,
"""Extract the indices of the k largest elements from a on the given axis,
and return them sorted from largest to smallest. If k is negative, extract
the indices of the -k smallest elements instead, and return them sorted
from smallest to largest.
......
......@@ -111,7 +111,7 @@ def expand_tuple(chunks, factor):
def contract_tuple(chunks, factor):
""" Return simple chunks tuple such that factor divides all elements
"""Return simple chunks tuple such that factor divides all elements
Examples
--------
......@@ -134,7 +134,7 @@ def contract_tuple(chunks, factor):
def reshape(x, shape):
""" Reshape array to new shape
"""Reshape array to new shape
This is a parallelized version of the ``np.reshape`` function with the
following limitations:
......
......@@ -659,20 +659,55 @@ def histogram(a, bins=None, range=None, normed=False, weights=None, density=None
"""
Blocked variant of :func:`numpy.histogram`.
Follows the signature of :func:`numpy.histogram` exactly with the following
exceptions:
- Either an iterable specifying the ``bins`` or the number of ``bins``
Parameters
----------
a : array_like
Input data. The histogram is computed over the flattened array.
bins : int or sequence of scalars, optional
Either an iterable specifying the ``bins`` or the number of ``bins``
and a ``range`` argument is required as computing ``min`` and ``max``
over blocked arrays is an expensive operation that must be performed
explicitly.
- ``weights`` must be a dask.array.Array with the same block structure
as ``a``.
- If ``density`` is True, ``bins`` cannot be a single-number delayed
If `bins` is an int, it defines the number of equal-width
bins in the given range (10, by default). If `bins` is a
sequence, it defines a monotonically increasing array of bin edges,
including the rightmost edge, allowing for non-uniform bin widths.
range : (float, float), optional
The lower and upper range of the bins. If not provided, range
is simply ``(a.min(), a.max())``. Values outside the range are
ignored. The first element of the range must be less than or
equal to the second. `range` affects the automatic bin
computation as well. While bin width is computed to be optimal
based on the actual data within `range`, the bin count will fill
the entire range including portions containing no data.
normed : bool, optional
This is equivalent to the ``density`` argument, but produces incorrect
results for unequal bin widths. It should not be used.
weights : array_like, optional
A dask.array.Array of weights, of the same block structure as ``a``. Each value in
``a`` only contributes its associated weight towards the bin count
(instead of 1). If ``density`` is True, the weights are
normalized, so that the integral of the density over the range
remains 1.
density : bool, optional
If ``False``, the result will contain the number of samples in
each bin. If ``True``, the result is the value of the
probability *density* function at the bin, normalized such that
the *integral* over the range is 1. Note that the sum of the
histogram values will not be equal to 1 unless bins of unity
width are chosen; it is not a probability *mass* function.
Overrides the ``normed`` keyword if given.
If ``density`` is True, ``bins`` cannot be a single-number delayed
value. It must be a concrete number, or a (possibly-delayed)
array/sequence of the bin edges.
Returns
-------
hist : dask Array
The values of the histogram. See `density` and `weights` for a
description of the possible semantics.
bin_edges : dask Array of dtype float
Return the bin edges ``(length(hist)+1)``.
Examples
--------
......@@ -1395,7 +1430,7 @@ def coarsen(reduction, x, axes, trim_excess=False, **kwargs):
def split_at_breaks(array, breaks, axis=0):
""" Split an array into a list of arrays (using slices) at the given breaks
"""Split an array into a list of arrays (using slices) at the given breaks
>>> split_at_breaks(np.arange(6), [3, 5])
[array([0, 1, 2]), array([3, 4]), array([5])]
......
......@@ -31,7 +31,7 @@ def _sanitize_index_element(ind):
def sanitize_index(ind):
""" Sanitize the elements for indexing along one axis
"""Sanitize the elements for indexing along one axis
>>> sanitize_index([2, 3, 5])
array([2, 3, 5])
......@@ -509,7 +509,7 @@ def partition_by_size(sizes, seq):
def issorted(seq):
""" Is sequence sorted?
"""Is sequence sorted?
>>> issorted([1, 2, 3])
True
......@@ -522,7 +522,7 @@ def issorted(seq):
def slicing_plan(chunks, index):
""" Construct a plan to slice chunks with the given index
"""Construct a plan to slice chunks with the given index
Parameters
----------
......@@ -555,7 +555,7 @@ def slicing_plan(chunks, index):
def take(outname, inname, chunks, index, axis=0):
""" Index array with an iterable of index
"""Index array with an iterable of index
Handles a single index by a single list
......@@ -615,7 +615,7 @@ def take(outname, inname, chunks, index, axis=0):
def posify_index(shape, ind):
""" Flip negative indices around to positive ones
"""Flip negative indices around to positive ones
>>> posify_index(10, 3)
3
......@@ -707,7 +707,7 @@ def new_blockdim(dim_shape, lengths, index):
def replace_ellipsis(n, index):
""" Replace ... with slices, :, : ,:
"""Replace ... with slices, :, : ,:
>>> replace_ellipsis(4, (3, Ellipsis, 2))
(3, slice(None, None, None), slice(None, None, None), 2)
......@@ -728,7 +728,7 @@ def replace_ellipsis(n, index):
def normalize_slice(idx, dim):
""" Normalize slices to canonical form
"""Normalize slices to canonical form
Parameters
----------
......@@ -764,7 +764,7 @@ def normalize_slice(idx, dim):
def normalize_index(idx, shape):
""" Normalize slicing indexes
"""Normalize slicing indexes
1. Replaces ellipses with many full slices
2. Adds full slices to end of index
......@@ -832,7 +832,7 @@ def normalize_index(idx, shape):
def check_index(ind, dimension):
""" Check validity of index for a given dimension
"""Check validity of index for a given dimension
Examples
--------
......@@ -902,7 +902,7 @@ def check_index(ind, dimension):
def slice_with_int_dask_array(x, index):
""" Slice x with at most one 1D dask arrays of ints.
"""Slice x with at most one 1D dask arrays of ints.
This is a helper function of :meth:`Array.__getitem__`.
......@@ -955,7 +955,7 @@ def slice_with_int_dask_array(x, index):
def slice_with_int_dask_array_on_axis(x, idx, axis):
""" Slice a ND dask array with a 1D dask arrays of ints along the given
"""Slice a ND dask array with a 1D dask arrays of ints along the given
axis.
This is a helper function of :func:`slice_with_int_dask_array`.
......@@ -1019,7 +1019,7 @@ def slice_with_int_dask_array_on_axis(x, idx, axis):
def slice_with_bool_dask_array(x, index):
""" Slice x with one or more dask arrays of bools
"""Slice x with one or more dask arrays of bools
This is a helper function of `Array.__getitem__`.
......
......@@ -5,7 +5,7 @@ import numpy as np
def svg(chunks, size=200, **kwargs):
""" Convert chunks from Dask Array into an SVG Image
"""Convert chunks from Dask Array into an SVG Image
Parameters
----------
......@@ -161,7 +161,7 @@ def svg_nd(chunks, size=200):
def svg_lines(x1, y1, x2, y2):
""" Convert points into lines of text for an SVG plot
"""Convert points into lines of text for an SVG plot
Examples
--------
......@@ -181,7 +181,7 @@ def svg_lines(x1, y1, x2, y2):
def svg_grid(x, y, offset=(0, 0), skew=(0, 0)):
""" Create lines of SVG text that show a grid
"""Create lines of SVG text that show a grid
Parameters
----------
......@@ -253,7 +253,7 @@ def draw_sizes(shape, size=200):
def ratio_response(x):
""" How we display actual size ratios
"""How we display actual size ratios
Common ratios in sizes span several orders of magnitude,
which is hard for us to perceive.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment