Commit b41c932e authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Redo lost changes

parent 8d4ead19
......@@ -21,7 +21,7 @@ import dask.array as da
import dask.dataframe
from dask.base import tokenize, compute_as_if_collection
from dask.delayed import Delayed, delayed
from dask.utils import ignoring, tmpfile, tmpdir, key_split
from dask.utils import ignoring, tmpfile, tmpdir, key_split, apply
from dask.utils_test import inc, dec
from dask.array.core import (
......@@ -51,6 +51,7 @@ from dask.blockwise import (
optimize_blockwise,
)
from dask.array.utils import assert_eq, same_keys
from dask.array.numpy_compat import _numpy_120
from numpy import nancumsum, nancumprod
......@@ -96,6 +97,13 @@ def test_top():
}
def test_top_with_kwargs():
assert top(add, "z", "i", "x", "i", numblocks={"x": (2, 0)}, b=100) == {
("z", 0): (apply, add, [("x", 0)], {"b": 100}),
("z", 1): (apply, add, [("x", 1)], {"b": 100}),
}
def test_top_supports_broadcasting_rules():
assert top(
add, "z", "ij", "x", "ij", "y", "ij", numblocks={"x": (1, 2), "y": (2, 1)}
......@@ -1466,7 +1474,7 @@ def test_map_blocks_infer_chunks_broadcast():
dx = da.from_array([[1, 2, 3, 4]], chunks=((1,), (2, 2)))
dy = da.from_array([[10, 20], [30, 40]], chunks=((1, 1), (2,)))
result = da.map_blocks(lambda x, y: x + y, dx, dy)
assert result.chunks == ((1, 1), (2, 2),)
assert result.chunks == ((1, 1), (2, 2))
assert_eq(result, np.array([[11, 22, 13, 24], [31, 42, 33, 44]]))
......@@ -2405,7 +2413,12 @@ def test_from_array_scalar(type_):
dx = da.from_array(x, chunks=-1)
assert_eq(np.array(x), dx)
assert isinstance(dx.dask[dx.name,], np.ndarray)
assert isinstance(
dx.dask[
dx.name,
],
np.ndarray,
)
@pytest.mark.parametrize("asarray,cls", [(True, np.ndarray), (False, np.matrix)])
......@@ -4273,6 +4286,7 @@ def test_no_warnings_from_blockwise():
assert not record
@pytest.mark.xfail(_numpy_120, reason="https://github.com/pydata/sparse/issues/383")
def test_from_array_meta():
sparse = pytest.importorskip("sparse")
x = np.ones(10)
......
......@@ -3,6 +3,7 @@ import numpy as np
import dask.array as da
from dask.array.utils import assert_eq, IS_NEP18_ACTIVE
from dask.array.numpy_compat import _numpy_120
missing_arrfunc_cond = not IS_NEP18_ACTIVE
missing_arrfunc_reason = "NEP-18 support is not available in NumPy"
......@@ -88,6 +89,7 @@ def test_array_function_sparse(func):
@pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason)
@pytest.mark.xfail(_numpy_120, reason="sparse-383")
def test_array_function_sparse_tensordot():
sparse = pytest.importorskip("sparse")
x = np.random.random((2, 3, 4))
......
......@@ -2,6 +2,7 @@ import numpy as np
import pytest
import dask.array as da
from dask.array.numpy_compat import _numpy_120
from dask.array.utils import meta_from_array, assert_eq
asarrays = [np.asarray]
......@@ -23,6 +24,9 @@ except ImportError:
@pytest.mark.parametrize("asarray", asarrays)
def test_meta_from_array(asarray):
if "COO.from_numpy" in str(asarray) and _numpy_120:
raise pytest.xfail(reason="sparse-383")
x = np.array(1)
assert meta_from_array(x, ndim=1).shape == (0,)
......@@ -57,7 +61,7 @@ def test_meta_from_array_type_inputs():
chunks=(5, 5),
shape=(5, 5),
meta=np.ndarray,
dtype=np.float,
dtype=float,
)
assert_eq(x, x)
......
......@@ -807,7 +807,10 @@ def test_pad(shape, chunks, pad_width, mode, kwargs):
reason="Bug when pad_width is larger than dimension: https://github.com/dask/dask/issues/5303"
),
),
pytest.param("median", marks=pytest.mark.skip(reason="Not implemented"),),
pytest.param(
"median",
marks=pytest.mark.skip(reason="Not implemented"),
),
pytest.param(
"empty",
marks=pytest.mark.skip(
......
......@@ -7,6 +7,7 @@ from dask.array.gufunc import apply_gufunc
from dask.sizeof import sizeof
cupy = pytest.importorskip("cupy")
cupyx = pytest.importorskip("cupyx")
functions = [
......@@ -820,11 +821,11 @@ def test_sparse_hstack_vstack_csr():
x = cupy.arange(24, dtype=cupy.float32).reshape(4, 6)
sp = da.from_array(x, chunks=(2, 3), asarray=False, fancy=False)
sp = sp.map_blocks(cupy.sparse.csr_matrix, dtype=cupy.float32)
sp = sp.map_blocks(cupyx.scipy.sparse.csr_matrix, dtype=cupy.float32)
y = sp.compute()
assert cupy.sparse.isspmatrix(y)
assert cupyx.scipy.sparse.isspmatrix(y)
assert_eq(x, y.todense())
......@@ -833,7 +834,7 @@ def test_cupy_sparse_concatenate(axis):
pytest.importorskip("cupyx")
rs = da.random.RandomState(RandomState=cupy.random.RandomState)
meta = cupy.sparse.csr_matrix((0, 0))
meta = cupyx.scipy.sparse.csr_matrix((0, 0))
xs = []
ys = []
......@@ -841,16 +842,18 @@ def test_cupy_sparse_concatenate(axis):
x = rs.random((1000, 10), chunks=(100, 10))
x[x < 0.9] = 0
xs.append(x)
ys.append(x.map_blocks(cupy.sparse.csr_matrix, meta=meta))
ys.append(x.map_blocks(cupyx.scipy.sparse.csr_matrix, meta=meta))
z = da.concatenate(ys, axis=axis)
z = z.compute()
if axis == 0:
sp_concatenate = cupy.sparse.vstack
sp_concatenate = cupyx.scipy.sparse.vstack
elif axis == 1:
sp_concatenate = cupy.sparse.hstack
z_expected = sp_concatenate([cupy.sparse.csr_matrix(e.compute()) for e in xs])
sp_concatenate = cupyx.scipy.sparse.hstack
z_expected = sp_concatenate(
[cupyx.scipy.sparse.csr_matrix(e.compute()) for e in xs]
)
assert (z.toarray() == z_expected.toarray()).all()
......
......@@ -12,6 +12,7 @@ from dask.array.gufunc import (
gufunc,
as_gufunc,
)
from dask.array.numpy_compat import _numpy_120
from dask.array.utils import IS_NEP18_ACTIVE
......@@ -173,6 +174,7 @@ def test_apply_gufunc_02():
a = da.random.normal(size=(20, 30), chunks=(5, 30))
b = da.random.normal(size=(10, 1, 40), chunks=(10, 1, 40))
c = apply_gufunc(outer_product, "(i),(j)->(i,j)", a, b, output_dtypes=a.dtype)
assert c.compute().shape == (10, 20, 30, 40)
......@@ -597,6 +599,7 @@ def test_apply_gufunc_via_numba_02():
@pytest.mark.skipif(
not IS_NEP18_ACTIVE, reason="NEP18 required for sparse meta propagation"
)
@pytest.mark.xfail(_numpy_120, reason="https://github.com/pydata/sparse/issues/383")
def test_preserve_meta_type():
sparse = pytest.importorskip("sparse")
......@@ -613,3 +616,15 @@ def test_preserve_meta_type():
assert_eq(sum, sum)
assert_eq(mean, mean)
def test_apply_gufunc_with_meta():
def stats(x):
return np.mean(x, axis=-1), np.std(x, axis=-1, dtype=np.float32)
a = da.random.normal(size=(10, 20, 30), chunks=(5, 5, 30))
meta = (np.ones(0, dtype=np.float64), np.ones(0, dtype=np.float32))
result = apply_gufunc(stats, "(i)->(),()", a, meta=meta)
expected = stats(a.compute())
assert_eq(expected[0], result[0])
assert_eq(expected[1], result[1])
......@@ -196,12 +196,17 @@ def test_array_broadcasting():
assert da.random.normal(
np.ones((1, 4)), da.ones((2, 3, 4), chunks=(2, 3, 4)), chunks=(2, 3, 4)
).compute().shape == (2, 3, 4)
assert da.random.normal(
scale=np.ones((1, 4)),
loc=da.ones((2, 3, 4), chunks=(2, 3, 4)),
size=(2, 2, 3, 4),
chunks=(2, 2, 3, 4),
).compute().shape == (2, 2, 3, 4)
assert (
da.random.normal(
scale=np.ones((1, 4)),
loc=da.ones((2, 3, 4), chunks=(2, 3, 4)),
size=(2, 2, 3, 4),
chunks=(2, 2, 3, 4),
)
.compute()
.shape
== (2, 2, 3, 4)
)
with pytest.raises(ValueError):
da.random.normal(arr, np.ones((3, 1)), size=(2, 3, 4), chunks=3)
......
......@@ -15,7 +15,7 @@ import dask.array as da
def test_rechunk_internals_1():
""" Test the cumdims_label and _breakpoints and
"""Test the cumdims_label and _breakpoints and
_intersect_1d internal funcs to rechunk."""
new = cumdims_label(((1, 1, 2), (1, 5, 1)), "n")
old = cumdims_label(((4,), (1,) * 5), "o")
......
......@@ -515,7 +515,7 @@ def test_reduction_names():
def test_general_reduction_names():
dtype = np.int
dtype = int
a = da.reduction(
da.ones(10, dtype, chunks=2), np.sum, np.sum, dtype=dtype, name="foo"
)
......
......@@ -347,7 +347,7 @@ def test_dot_method():
@pytest.mark.parametrize("shape, chunks", [((20,), (6,)), ((4, 5), (2, 3))])
def test_vdot(shape, chunks):
np.random.random(1337)
np.random.seed(1337)
x = 2 * np.random.random((2,) + shape) - 1
x = x[0] + 1j * x[1]
......@@ -365,7 +365,7 @@ def test_vdot(shape, chunks):
@pytest.mark.parametrize("shape1, shape2", [((20,), (6,)), ((4, 5), (2, 3))])
def test_outer(shape1, shape2):
np.random.random(1337)
np.random.seed(1337)
x = 2 * np.random.random(shape1) - 1
y = 2 * np.random.random(shape2) - 1
......
......@@ -681,7 +681,7 @@ def test_index_with_int_dask_array_dtypes(dtype):
def test_index_with_int_dask_array_nocompute():
""" Test that when the indices are a dask array
"""Test that when the indices are a dask array
they are not accidentally computed
"""
......@@ -895,7 +895,7 @@ def test_cached_cumsum_non_tuple():
@pytest.mark.parametrize("params", [(2, 2, 1), (5, 3, 2)])
def test_setitem_with_different_chunks_preserves_shape(params):
""" Reproducer for https://github.com/dask/dask/issues/3730.
"""Reproducer for https://github.com/dask/dask/issues/3730.
Mutating based on an array with different chunks can cause new chunks to be
used. We need to ensure those new chunk sizes are applied to the mutated
......
......@@ -5,7 +5,7 @@ import pytest
import dask
import dask.array as da
from dask.array.numpy_compat import _numpy_117
from dask.array.numpy_compat import _numpy_117, _numpy_120
from dask.array.utils import assert_eq, IS_NEP18_ACTIVE
sparse = pytest.importorskip("sparse")
......@@ -15,6 +15,10 @@ if sparse:
# searchsorted() got an unexpected keyword argument 'side'
pytest.importorskip("numba", minversion="0.40.0")
numpy_120_xfail = pytest.mark.xfail(
_numpy_120, reason="https://github.com/pydata/sparse/issues/383"
)
functions = [
lambda x: x,
......@@ -22,15 +26,15 @@ functions = [
lambda x: 2 * x,
lambda x: x / 2,
lambda x: x ** 2,
lambda x: x + x,
lambda x: x * x,
lambda x: x[0],
lambda x: x[:, 1],
lambda x: x[:1, None, 1:3],
pytest.param(lambda x: x + x, marks=numpy_120_xfail),
pytest.param(lambda x: x * x, marks=numpy_120_xfail),
pytest.param(lambda x: x[0], marks=numpy_120_xfail),
pytest.param(lambda x: x[:, 1], marks=numpy_120_xfail),
pytest.param(lambda x: x[:1, None, 1:3], marks=numpy_120_xfail),
lambda x: x.T,
lambda x: da.transpose(x, (1, 2, 0)),
lambda x: x.sum(),
lambda x: x.mean(),
pytest.param(lambda x: x.sum(), marks=numpy_120_xfail),
pytest.param(lambda x: x.mean(), marks=numpy_120_xfail),
lambda x: x.moment(order=0),
pytest.param(
lambda x: x.std(),
......@@ -44,20 +48,25 @@ functions = [
reason="fixed in https://github.com/pydata/sparse/pull/243"
),
),
lambda x: x.dot(np.arange(x.shape[-1])),
lambda x: x.dot(np.eye(x.shape[-1])),
lambda x: da.tensordot(x, np.ones(x.shape[:2]), axes=[(0, 1), (0, 1)]),
lambda x: x.sum(axis=0),
lambda x: x.max(axis=0),
lambda x: x.sum(axis=(1, 2)),
pytest.param(lambda x: x.dot(np.arange(x.shape[-1])), marks=numpy_120_xfail),
pytest.param(lambda x: x.dot(np.eye(x.shape[-1])), marks=numpy_120_xfail),
pytest.param(
lambda x: da.tensordot(x, np.ones(x.shape[:2]), axes=[(0, 1), (0, 1)]),
marks=numpy_120_xfail,
),
pytest.param(lambda x: x.sum(axis=0), marks=numpy_120_xfail),
pytest.param(lambda x: x.max(axis=0), marks=numpy_120_xfail),
pytest.param(lambda x: x.sum(axis=(1, 2)), marks=numpy_120_xfail),
lambda x: x.astype(np.complex128),
lambda x: x.map_blocks(lambda x: x * 2),
lambda x: x.map_overlap(lambda x: x * 2, depth=0, trim=True),
lambda x: x.map_overlap(lambda x: x * 2, depth=0, trim=False),
lambda x: x.round(1),
lambda x: x.reshape((x.shape[0] * x.shape[1], x.shape[2])),
lambda x: abs(x),
lambda x: x > 0.5,
lambda x: x.rechunk((4, 4, 4)),
lambda x: x.rechunk((2, 2, 1)),
pytest.param(lambda x: x.rechunk((2, 2, 1)), marks=numpy_120_xfail),
lambda x: np.isneginf(x),
lambda x: np.isposinf(x),
]
......@@ -152,6 +161,7 @@ def test_mixed_output_type():
assert zz.nnz == y.compute().nnz
@numpy_120_xfail
def test_metadata():
y = da.random.random((10, 10), chunks=(5, 5))
y[y < 0.8] = 0
......@@ -187,6 +197,7 @@ def test_html_repr():
assert "Bytes" not in text
@numpy_120_xfail
def test_from_delayed_meta():
def f():
return sparse.COO.from_numpy(np.eye(3))
......@@ -197,6 +208,7 @@ def test_from_delayed_meta():
assert_eq(x, x)
@numpy_120_xfail
def test_from_array():
x = sparse.COO.from_numpy(np.eye(10))
d = da.from_array(x, chunks=(5, 5))
......@@ -206,6 +218,7 @@ def test_from_array():
assert isinstance(d.compute(), sparse.COO)
@numpy_120_xfail
def test_map_blocks():
x = da.eye(10, chunks=5)
y = x.map_blocks(sparse.COO.from_numpy, meta=sparse.COO.from_numpy(np.eye(1)))
......@@ -213,6 +226,7 @@ def test_map_blocks():
assert_eq(y, y)
@numpy_120_xfail
def test_meta_from_array():
x = sparse.COO.from_numpy(np.eye(1))
y = da.utils.meta_from_array(x, ndim=2)
......
......@@ -29,7 +29,7 @@ def normalize_to_array(x):
def meta_from_array(x, ndim=None, dtype=None):
""" Normalize an array to appropriate meta object
"""Normalize an array to appropriate meta object
Parameters
----------
......
......@@ -134,9 +134,40 @@ def wrap(wrap_func, func, **kwargs):
w = wrap(wrap_func_shape_as_first_arg)
ones = w(np.ones, dtype="f8")
zeros = w(np.zeros, dtype="f8")
empty = w(np.empty, dtype="f8")
def broadcast_trick(func):
"""
Provide a decorator to wrap common numpy function with a broadcast trick.
Dask arrays are currently immutable; thus when we know an array is uniform,
we can replace the actual data by a single value and have all elements point
to it, thus reducing the size.
>>> x = np.broadcast_to(1, (100,100,100))
>>> x.base.nbytes
8
Those array are not only more efficient locally, but dask serialisation is
aware of the _real_ size of those array and thus can send them around
efficiently and schedule accordingly.
Note that those array are read-only and numpy will refuse to assign to them,
so should be safe.
"""
def inner(shape, *args, **kwargs):
return np.broadcast_to(func((), *args, **kwargs), shape)
if func.__doc__ is not None:
inner.__doc__ = func.__doc__
inner.__name__ = func.__name__
return inner
ones = w(broadcast_trick(np.ones), dtype="f8")
zeros = w(broadcast_trick(np.zeros), dtype="f8")
empty = w(broadcast_trick(np.empty), dtype="f8")
w_like = wrap(wrap_func_like_safe)
......@@ -147,7 +178,7 @@ empty_like = w_like(np.empty, func_like=np.empty_like)
# full and full_like require special casing due to argument check on fill_value
# Generate wrapped functions only once
_full = w(np.full)
_full = w(broadcast_trick(np.full))
_full_like = w_like(np.full, func_like=np.full_like)
......@@ -166,7 +197,12 @@ def full_like(a, fill_value, *args, **kwargs):
raise ValueError(
f"fill_value must be scalar. Received {type(fill_value).__name__} instead."
)
return _full_like(a=a, fill_value=fill_value, *args, **kwargs,)
return _full_like(
a=a,
fill_value=fill_value,
*args,
**kwargs,
)
full.__doc__ = _full.__doc__
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment