main.py 18.3 KB
Newer Older
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
1
from flask import Flask, request
2 3 4
import numpy as np
from flask_cors import CORS
from time import time
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
5
import pandas as pd
6
import orjson
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
7 8
import bigwig
import bbi
9
import _ucrdtw
10 11
import _lsh
import math
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
12 13
import dask.dataframe as dd
import os.path
14
from random import sample
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
15 16 17
from DBA_multivariate import performDBA
from tslearn.metrics import dtw
from sklearn import preprocessing
18 19 20 21 22
from collections import defaultdict
from dtaidistance import dtw_ndim
from scipy.spatial.distance import euclidean

from fastdtw import fastdtw
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
23

24
reload = False
25 26 27 28 29 30 31 32 33 34

app = Flask(__name__)
CORS(app)

@app.route('/', methods=['GET'])
def index():
    return "hi"

@app.route('/read-data', methods=['GET'])
def read_data():
35
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
36 37 38 39
    size = bbi.chromsizes('test.bigWig')['chr1']
    bins = 100000
    data = bigwig.get('test.bigWig', 'chr1', 0, size, bins)
    print(data.shape)
40 41 42 43 44 45 46 47 48 49 50 51 52 53
    response = [
        {
            "index": list(range(0, size, int(size/(bins)))),
            "values": data.tolist()
        },
        {
            "index": list(range(0, size, int(size / (bins)))),
            "values": data.tolist()
        },
        {
            "index": list(range(0, size, int(size / (bins)))),
            "values": data.tolist()
        }
    ]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
54
    response = orjson.dumps(response)
55
    print('Data read: ' + str(time()-t0))
56 57
    return response

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
58 59 60 61 62 63 64
@app.route('/read-mts-data', methods=['GET'])
def read_mts_data():
    filename = 'data.pkl'
    if (not os.path.isfile(filename)):
        print("start")
        df = dd.read_csv("NW_Ground_Stations_2016.csv", usecols=['number_sta', 'date', 't', 'hu', 'td'])
        print("read file")
65
        df = df.loc[df['number_sta'].isin([14066001, 14137001, 14216001, 14372001, 22092001, 22113006, 22135001])].fillna(0)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
66 67 68 69 70 71 72 73
        print("split rows")
        df = df.compute()
        df.to_pickle(filename)
        print("to_pandas")
    df = pd.read_pickle(filename)
    df.dropna(subset=['t'], inplace=True)
    response = [
        {
74 75
            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
            "values": df.loc[df['number_sta'] == 14066001].loc[:, 't'].values.tolist()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
76 77
        },
        {
78 79
            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
            "values": df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].values.tolist()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
80 81
        },
        {
82 83
            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
            "values": df.loc[df['number_sta'] == 14066001].loc[:, 'td'].values.tolist()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
84 85 86 87 88 89 90 91 92 93 94 95 96
        }
    ]
    print("response ready")
    response = orjson.dumps(response)
    return response

@app.route('/create-mts-windows', methods=['POST'])
def create_mts_windows():
    t0 = time()
    if (not os.path.isfile('processed-data.npy')):
        filename = 'data.pkl'
        df = pd.read_pickle(filename)
        channels = list()
97 98 99
        channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 't'].fillna(0).values.tolist())
        channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].fillna(0).values.tolist())
        channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'td'].fillna(0).values.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
100 101 102 103
        print("Data read: " + str(time()-t0))
        # raw_data = request.json
        window_size = 120 #int(raw_data['parameters']["windowsize"])
        print("Processing: " + str(time()-t0))
104
        data = [([values[i:i+window_size] for values in channels]) for i in range(0, len(channels[0]) - window_size, 1)]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
105 106 107 108 109 110 111 112
        print("Raw windows: " + str(time()-t0))
        windows = []
        for i in range(len(data)):
            if i % 5000 == 0:
                print(i)
            windows.append(preprocessing.minmax_scale(data[i], (-1, 1), axis=1))
        print("Preprocessed: " + str(time()-t0))
        np.save('processed-data', windows)
113 114 115 116
    # data = np.load('processed-data.npy')
    # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
    # r, a, sd = preprocess(data, 11.5)
    # np.save('parameters', np.array([r, a, sd]))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
117 118 119 120
    print("Sending response: " + str(time()-t0))
    return '1'


121 122
@app.route('/create-windows', methods=['POST'])
def create_windows():
123
    t0 = time()
124 125 126 127
    if (not os.path.isfile('processed-data.npy')):
        # raw_data = request.json
        # window_size = int(raw_data['parameters']["windowsize"])
        window_size = 120
128 129 130 131 132 133 134 135
        data = bigwig.chunk(
            'test.bigWig',
            12000,
            int(12000 / window_size),
            int(12000 / 6),
            ['chr1'],
            verbose=True,
        )
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
        data = np.reshape(data, (len(data), 1, len(data[0])))
        data2 = np.copy(data)
        np.random.shuffle(data2)
        data3 = np.copy(data)
        np.random.shuffle(data3)

        data = np.concatenate((data, data2), axis=1)
        data = np.concatenate((data, data3), axis=1)
        # data = np.repeat(data, repeats=3, axis=1)
        np.save('processed-data', data)
    print('Windows created: ' + str(time()-t0))
    return '1'

@app.route('/create-test-windows', methods=['POST'])
def create_test_windows():
    t0 = time()
    if (not os.path.isfile('processed-data.npy')):
        datafile = '21.csv'

        data = pd.read_csv(datafile, header=None)

        # and convert it to numpy array:
        npdata = np.array(data)
        print('data loaded')
        window_data = [npdata[i:i + 120, 0:5] for i in range(0, npdata.shape[0] - 120, int(120 / 8))]
        del npdata
        print('data created')
        np_window_data = np.repeat(window_data, repeats=3, axis=0)
        print(np_window_data.shape)
        del window_data
        data = np.reshape(np_window_data, (len(np_window_data), 5, len(np_window_data[0])))
        print(data.shape)
168
        np.save('processed-data', data)
169
    print('Windows created: ' + str(time()-t0))
170
    return '1'
171

172

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
173 174 175
@app.route('/initialize', methods=['POST'])
def initialize():
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
176
    raw_data = orjson.loads(request.data)
177
    data = np.load('processed-data.npy')
178 179
    data = np.swapaxes(data, 1, 2)
    # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
180
    query = raw_data["query"]
181 182 183 184
    query = np.swapaxes(query, 0, 1)
    # query = np.reshape(query, (len(query[0]), len(query)))
    parameters = preprocess(data)
    # parameters = np.load('parameters.npy')
185 186 187
    r = parameters[0]
    a = parameters[1]
    sd = parameters[2]
188

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
189
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
190
    print(distances)
191

192 193 194 195 196 197 198 199 200
    dict = defaultdict(int)
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
            for i in range(len(candidates[0][0])):
                dict[candidates[l][k][i]] += distances[l][k][i]
    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
    average_candidates = list(sorted_dict.keys())
    average_distances = list(sorted_dict.values())

201
    tables = []
202
    samples_set = set()
203 204 205
    candidates = candidates.tolist()
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
206
            samples_set.update(candidates[l][k][0:5])
207 208 209 210 211 212 213 214 215
            dict = defaultdict(list)
            length = len(distances[l][k])
            median = distances[l][k][math.ceil(length/2)]
            stepsize = median / 10
            indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), distances[l][k]))
            for i in range(len(candidates[0][0])):
                dict[str(indices[i])].append(candidates[l][k][i])
            tables.append(dict)

216 217
    samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist()

218

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
219
    response = {
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
220
        "hash_functions": hf.reshape((len(candidates) * len(candidates[0]), len(query[0]))).tolist(),
221 222 223 224
        "candidates": candidates,
        "tables": tables,
        "distances": distances.tolist(),
        "samples": list(samples),
225 226
        "average_candidates": np.array(average_candidates).tolist(),
        "average_distances": np.array(average_distances).tolist(),
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
227
        "parameters": [float(r), float(a), float(sd)]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
228 229
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
230
    print('LSH done: ' + str(time()-t0))
231 232
    return response

233 234
@app.route('/weights', methods=['POST'])
def weights():
235
    alpha = 0.2
236
    raw_data = orjson.loads(request.data)
237
    labels = raw_data["labels"]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
238
    hash_functions = raw_data["hash_functions"]
239 240 241 242 243 244 245 246 247 248
    query = raw_data["query"]
    old_weights = raw_data["weights"]
    data = np.load('processed-data.npy')
    all_good_windows = data[[[int(index) for index, value in labels.items() if value is True]]]

    good_distances = np.zeros(len(query))
    for window in all_good_windows:
        for i in range(len(all_good_windows[0])):
            good_distances[i] += _ucrdtw.ucrdtw(query[i], window[i], 0.05, False)[1]
    if len(all_good_windows) != 0:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
249
        good_distances = np.square(good_distances)
250 251 252 253 254 255
        good_distances /= np.sum(good_distances)
        good_distances = np.ones(len(query)) - good_distances
        good_distances /= np.sum(good_distances)
        good_distances *= len(all_good_windows[0])
        good_distances = np.sqrt(good_distances)

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
    if len(hash_functions) != 0:
        summed_hash_functions = np.sum(hash_functions, axis=0)
        summed_hash_functions = np.square(summed_hash_functions)
        normalized_hash_functions = summed_hash_functions / np.sum(summed_hash_functions)
        normalized_hash_functions *= len(hash_functions[0])

    if len(hash_functions) + len(all_good_windows) == 0:
        print("no update")
        new_weights = old_weights
    elif len(hash_functions) == 0:
        print("only windows")
        new_weights = alpha * np.array(old_weights) + (1 - alpha) * good_distances
    elif len(all_good_windows) == 0:
        print("only tables")
        new_weights = alpha * np.array(old_weights) + (1 - alpha) * normalized_hash_functions
    else:
        print("tables & windows")
        new_weights = alpha * np.array(old_weights) + 0.5 * (1-alpha) * good_distances + 0.5 * (1-alpha) * normalized_hash_functions

    print(new_weights)

    response = orjson.dumps(new_weights.tolist())
278 279 280
    return response


281 282 283 284 285
@app.route('/update', methods=['POST'])
def update():
    t0 = time()
    raw_data = orjson.loads(request.data)
    data = np.load('processed-data.npy')
286 287
    data = np.swapaxes(data, 1, 2)
    # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
288
    query = raw_data["query"]
289 290
    query = np.swapaxes(query, 0, 1)
    # query = np.reshape(query, (len(query[0]), len(query)))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
291
    weights = raw_data["weights"]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
292 293
    parameters = raw_data["parameters"]

294
    candidates, distances, hf = _lsh.lsh(data, query, parameters[0], parameters[1], parameters[2], weights)
295 296 297 298 299 300 301 302 303 304
    dict = defaultdict(int)
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
            for i in range(len(candidates[0][0])):
                dict[candidates[l][k][i]] += distances[l][k][i]
    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
    average_candidates = list(sorted_dict.keys())
    average_distances = list(sorted_dict.values())

    tables = []
305
    samples_set = set()
306 307 308
    candidates = candidates.tolist()
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
309
            samples_set.update(candidates[l][k][0:5])
310 311 312 313 314 315 316 317 318
            dict = defaultdict(list)
            length = len(distances[l][k])
            median = distances[l][k][math.ceil(length/2)]
            stepsize = median / 10
            indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), distances[l][k]))
            for i in range(len(candidates[0][0])):
                dict[str(indices[i])].append(candidates[l][k][i])
            tables.append(dict)

319 320
    samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist()

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
321
    response = {
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
322
        "hash_functions": hf.reshape((len(candidates) * len(candidates[0]), len(query[0]))).tolist(),
323 324 325
        "candidates": candidates,
        "tables": tables,
        "samples": list(samples),
326 327
        "average_candidates": np.array(average_candidates).tolist(),
        "average_distances": np.array(average_distances).tolist(),
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
328 329 330
        "distances": distances.tolist(),
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
331
    print('LSH done: ' + str(time()-t0))
332 333
    return response

334 335
@app.route('/query', methods=['POST'])
def query():
336
    t0 = time()
337
    raw_data = orjson.loads(request.data)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
338 339 340 341
    windowIndices = raw_data['window']
    if isinstance(windowIndices, int):
        output = np.load('processed-data.npy')[windowIndices]
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
342 343
        print("Query done: " + str(time() - t0))
        return response
344
    else:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
345
        indices = [int(index) for index, value in windowIndices.items() if value is True]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
346
        data = np.load('processed-data.npy')[indices]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
347 348
        output = performDBA(data)
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
349 350 351 352 353 354 355 356 357
        print("Query done: " + str(time()-t0))
        return response

@app.route('/window', methods=['POST'])
def window():
    t0 = time()
    raw_data = orjson.loads(request.data)
    indices = raw_data['indices']
    output = np.load('processed-data.npy')[indices]
358
    response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
359
    print("Query done: " + str(time() - t0))
360 361
    return response

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
362 363
@app.route('/table-info', methods=['POST'])
def table_info():
364 365 366 367
    t0 = time()
    raw_data = orjson.loads(request.data)
    all_windows = raw_data['windows']
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
368
    prototypes = []
369
    for windows in all_windows:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
370
        actual_windows = data[windows]
371 372 373 374
        average_values = np.average(actual_windows, 0)
        std_values = np.std(actual_windows, 0)
        max_values = average_values + std_values
        min_values = average_values - std_values
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
375
        prototypes.append({
376 377 378
            'average': average_values.tolist(),
            'max': max_values.tolist(),
            'min': min_values.tolist()
379
        })
380 381
    # distances = [[dtw(np.array(v["average"]), np.array(w["average"]), global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05 * 120)) for j, w in enumerate(prototypes)] for i, v in enumerate(prototypes)]
    response = orjson.dumps({'prototypes': prototypes, 'distances': []})
382
    print("Averages calculated: " + str(time() - t0))
383 384
    return response

385
def preprocess(data, r=10.0):
386 387
    # return 0.10882589134534404, 3.1202154563478928, 0.9705780396843037
    # data = np.load('processed-data.npy')
388
    # data = np.reshape(data, (59999, 20, 120))
389
    # data = np.repeat(data, repeats=1, axis=1)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
390 391 392
    subset = []
    t0 = time()

393 394 395
    i = 0
    while i < len(data):
        if i % 999 == 0:
396
            print(r)
397 398 399 400 401 402 403 404 405
            print(str(i) + ':' + str(len(subset)))

        state = 1
        for s in subset:
            if np.linalg.norm(data[i] - data[s]) < r:
                state = 0
                break
        if state == 1:
            subset.append(i)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
406

407 408 409 410 411 412 413 414 415 416 417
        i = i + 1
        if i == 10000 and len(subset) < 10:
            r = r / 2
            subset = []
            i = 0
        if len(subset) > 200:
            r = r + r / 2
            subset = []
            i = 0

    # subset = sample(list(range(len(data))), 200)
418
    print("r = " + str(r))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
419 420 421 422 423 424 425 426
    dtw_distances = []
    eq_distances = []
    for i, index_1 in enumerate(subset):
        print(i)
        for j, index_2 in enumerate(subset):
            if index_1 == index_2:
                continue
            e = np.linalg.norm(data[index_1] - data[index_2])
427 428 429
            if (math.isnan(e) or e == 0):
                eq_distances.append(0.0001)
                dtw_distances.append(0.0001)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
430
                continue
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
431
            eq_distances.append(e)
432 433
            d = 0
            # d, _ = fastdtw(data[index_1], data[index_2], dist=euclidean)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
434 435
            d = dtw(data[index_1], data[index_2], global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120))
            # d = _ucrdtw.ucrdtw(data[index_1], data[index_2], 0.05, False)[1]
436
            # d = dtw.dtw(data[index_1], data[index_2], dist_method="Euclidean", window_type="sakoechiba", window_args={"window_size": 120}).distance
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
437
            dtw_distances.append(d)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
438

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
439 440 441 442 443 444 445 446 447 448
    ratios = np.array(dtw_distances)/np.array(eq_distances)
    mean_dtw = np.mean(dtw_distances)
    sd_dtw = np.std(dtw_distances)
    mean_eq = np.mean(eq_distances)
    sd_eq = np.std(eq_distances)
    a = np.mean(ratios)
    sd = np.std(ratios)
    theta = mean_dtw + -2.58 * sd_dtw
    # theta = mean_eq + -2.58 * sd_eq
    r = theta / ((a-sd)*math.sqrt(120))
449 450
    if r < 0:
        r = mean_dtw / 100
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
451
    # r = theta / (math.sqrt(120))
452 453 454 455 456 457
    print('Mean: ' + str(mean_dtw))
    print('Stdev: ' + str(sd_dtw))
    print('Ratio mean: ' + str(a))
    print('Ratio stdev: ' + str(sd))
    print('Theta: ' + str(theta))
    print('r: ' + str(r))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
458
    print('Preprocessing time: ' + str(time() - t0))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
459
    return r, a, sd
460

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
461
def debug_test_lsh():
462
    data = np.load('processed-data.npy')
463
    # data = np.repeat(data, repeats=7, axis=1)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
464
    print(data.shape)
465 466
    data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))

467
    r, a, sd = preprocess(data, 11.25)
468
    create_windows()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
469 470 471
    query_n = 1234
    t0 = time()
    query = data[query_n]
472 473
    data = data.astype('double')
    dict = defaultdict(int)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
474
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
475
    print("Calculated approximate in: " + str(time()-t0))
476 477 478 479 480 481 482
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
            for i in range(len(candidates[0][0])):
                dict[candidates[l][k][i]] += distances[l][k][i]
    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
    candidates = list(sorted_dict.keys())

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
483
    print(candidates[0:20])
484

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
485
    t0 = time()
486
    # distances = [dtw_ndim.distance_fast(window, query) for window in data]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
487
    distances = [dtw(window, query, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120)) for window in data]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
488
    topk_dtw = sorted(range(len(distances)), key=lambda k: distances[k])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
489
    print("Calculated exact dtw in: " + str(time()-t0))
490
    print(topk_dtw[0:20])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
491

492 493 494 495
    t0 = time()
    l2distances = [np.linalg.norm(window - query) for window in data]
    print("Calculated exact l2 in: " + str(time()-t0))

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
496 497 498
    # # distances_ed = [distance.euclidean(query, window) for window in data]
    # # topk_ed = sorted(range(len(distances_ed)), key=lambda k: distances_ed[k])
    #
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
499
    accuracy = 0
500
    for index in topk_dtw[0:20]:
501
        if index in candidates:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
502 503
            accuracy += 1
    print(accuracy)
504

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
505 506
# read_mts_data()
# create_mts_windows()
507
# debug_test_lsh()