main.py 18.3 KB
Newer Older
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
1
from flask import Flask, request
2
3
4
import numpy as np
from flask_cors import CORS
from time import time
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
5
import pandas as pd
6
import orjson
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
7
8
import bigwig
import bbi
9
import _ucrdtw
10
11
import _lsh
import math
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
12
13
import dask.dataframe as dd
import os.path
14
from random import sample
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
15
16
17
from DBA_multivariate import performDBA
from tslearn.metrics import dtw
from sklearn import preprocessing
18
19
20
21
22
from collections import defaultdict
from dtaidistance import dtw_ndim
from scipy.spatial.distance import euclidean

from fastdtw import fastdtw
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
23

24
reload = False
25
26
27
28
29
30
31
32
33
34

app = Flask(__name__)
CORS(app)

@app.route('/', methods=['GET'])
def index():
    return "hi"

@app.route('/read-data', methods=['GET'])
def read_data():
35
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
36
37
38
39
    size = bbi.chromsizes('test.bigWig')['chr1']
    bins = 100000
    data = bigwig.get('test.bigWig', 'chr1', 0, size, bins)
    print(data.shape)
40
41
42
43
44
45
46
47
48
49
50
51
52
53
    response = [
        {
            "index": list(range(0, size, int(size/(bins)))),
            "values": data.tolist()
        },
        {
            "index": list(range(0, size, int(size / (bins)))),
            "values": data.tolist()
        },
        {
            "index": list(range(0, size, int(size / (bins)))),
            "values": data.tolist()
        }
    ]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
54
    response = orjson.dumps(response)
55
    print('Data read: ' + str(time()-t0))
56
57
    return response

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
58
59
60
61
62
63
64
@app.route('/read-mts-data', methods=['GET'])
def read_mts_data():
    filename = 'data.pkl'
    if (not os.path.isfile(filename)):
        print("start")
        df = dd.read_csv("NW_Ground_Stations_2016.csv", usecols=['number_sta', 'date', 't', 'hu', 'td'])
        print("read file")
65
        df = df.loc[df['number_sta'].isin([14066001, 14137001, 14216001, 14372001, 22092001, 22113006, 22135001])].fillna(0)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
66
67
68
69
70
71
72
73
        print("split rows")
        df = df.compute()
        df.to_pickle(filename)
        print("to_pandas")
    df = pd.read_pickle(filename)
    df.dropna(subset=['t'], inplace=True)
    response = [
        {
74
75
            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
            "values": df.loc[df['number_sta'] == 14066001].loc[:, 't'].values.tolist()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
76
77
        },
        {
78
79
            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
            "values": df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].values.tolist()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
80
81
        },
        {
82
83
            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
            "values": df.loc[df['number_sta'] == 14066001].loc[:, 'td'].values.tolist()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
84
85
86
87
88
89
90
91
92
93
94
95
96
        }
    ]
    print("response ready")
    response = orjson.dumps(response)
    return response

@app.route('/create-mts-windows', methods=['POST'])
def create_mts_windows():
    t0 = time()
    if (not os.path.isfile('processed-data.npy')):
        filename = 'data.pkl'
        df = pd.read_pickle(filename)
        channels = list()
97
98
99
        channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 't'].fillna(0).values.tolist())
        channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].fillna(0).values.tolist())
        channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'td'].fillna(0).values.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
100
101
102
103
        print("Data read: " + str(time()-t0))
        # raw_data = request.json
        window_size = 120 #int(raw_data['parameters']["windowsize"])
        print("Processing: " + str(time()-t0))
104
        data = [([values[i:i+window_size] for values in channels]) for i in range(0, len(channels[0]) - window_size, 1)]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
105
106
107
108
109
110
111
112
        print("Raw windows: " + str(time()-t0))
        windows = []
        for i in range(len(data)):
            if i % 5000 == 0:
                print(i)
            windows.append(preprocessing.minmax_scale(data[i], (-1, 1), axis=1))
        print("Preprocessed: " + str(time()-t0))
        np.save('processed-data', windows)
113
114
115
116
    # data = np.load('processed-data.npy')
    # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
    # r, a, sd = preprocess(data, 11.5)
    # np.save('parameters', np.array([r, a, sd]))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
117
118
119
120
    print("Sending response: " + str(time()-t0))
    return '1'


121
122
@app.route('/create-windows', methods=['POST'])
def create_windows():
123
    t0 = time()
124
125
126
127
    if (not os.path.isfile('processed-data.npy')):
        # raw_data = request.json
        # window_size = int(raw_data['parameters']["windowsize"])
        window_size = 120
128
129
130
131
132
133
134
135
        data = bigwig.chunk(
            'test.bigWig',
            12000,
            int(12000 / window_size),
            int(12000 / 6),
            ['chr1'],
            verbose=True,
        )
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
        data = np.reshape(data, (len(data), 1, len(data[0])))
        data2 = np.copy(data)
        np.random.shuffle(data2)
        data3 = np.copy(data)
        np.random.shuffle(data3)

        data = np.concatenate((data, data2), axis=1)
        data = np.concatenate((data, data3), axis=1)
        # data = np.repeat(data, repeats=3, axis=1)
        np.save('processed-data', data)
    print('Windows created: ' + str(time()-t0))
    return '1'

@app.route('/create-test-windows', methods=['POST'])
def create_test_windows():
    t0 = time()
    if (not os.path.isfile('processed-data.npy')):
        datafile = '21.csv'

        data = pd.read_csv(datafile, header=None)

        # and convert it to numpy array:
        npdata = np.array(data)
        print('data loaded')
        window_data = [npdata[i:i + 120, 0:5] for i in range(0, npdata.shape[0] - 120, int(120 / 8))]
        del npdata
        print('data created')
        np_window_data = np.repeat(window_data, repeats=3, axis=0)
        print(np_window_data.shape)
        del window_data
        data = np.reshape(np_window_data, (len(np_window_data), 5, len(np_window_data[0])))
        print(data.shape)
168
        np.save('processed-data', data)
169
    print('Windows created: ' + str(time()-t0))
170
    return '1'
171

172

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
173
174
175
@app.route('/initialize', methods=['POST'])
def initialize():
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
176
    raw_data = orjson.loads(request.data)
177
    data = np.load('processed-data.npy')
178
179
    data = np.swapaxes(data, 1, 2)
    # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
180
    query = raw_data["query"]
181
182
183
184
    query = np.swapaxes(query, 0, 1)
    # query = np.reshape(query, (len(query[0]), len(query)))
    parameters = preprocess(data)
    # parameters = np.load('parameters.npy')
185
186
187
    r = parameters[0]
    a = parameters[1]
    sd = parameters[2]
188

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
189
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
190
    print(distances)
191

192
193
194
195
196
197
198
199
200
    dict = defaultdict(int)
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
            for i in range(len(candidates[0][0])):
                dict[candidates[l][k][i]] += distances[l][k][i]
    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
    average_candidates = list(sorted_dict.keys())
    average_distances = list(sorted_dict.values())

201
    tables = []
202
    samples_set = set()
203
204
205
    candidates = candidates.tolist()
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
206
            samples_set.update(candidates[l][k][0:5])
207
208
209
210
211
212
213
214
215
            dict = defaultdict(list)
            length = len(distances[l][k])
            median = distances[l][k][math.ceil(length/2)]
            stepsize = median / 10
            indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), distances[l][k]))
            for i in range(len(candidates[0][0])):
                dict[str(indices[i])].append(candidates[l][k][i])
            tables.append(dict)

216
217
    samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist()

218

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
219
    response = {
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
220
        "hash_functions": hf.reshape((len(candidates) * len(candidates[0]), len(query[0]))).tolist(),
221
222
223
224
        "candidates": candidates,
        "tables": tables,
        "distances": distances.tolist(),
        "samples": list(samples),
225
226
        "average_candidates": np.array(average_candidates).tolist(),
        "average_distances": np.array(average_distances).tolist(),
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
227
        "parameters": [float(r), float(a), float(sd)]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
228
229
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
230
    print('LSH done: ' + str(time()-t0))
231
232
    return response

233
234
@app.route('/weights', methods=['POST'])
def weights():
235
    alpha = 0.2
236
    raw_data = orjson.loads(request.data)
237
    labels = raw_data["labels"]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
238
    hash_functions = raw_data["hash_functions"]
239
240
241
242
243
244
245
246
247
248
    query = raw_data["query"]
    old_weights = raw_data["weights"]
    data = np.load('processed-data.npy')
    all_good_windows = data[[[int(index) for index, value in labels.items() if value is True]]]

    good_distances = np.zeros(len(query))
    for window in all_good_windows:
        for i in range(len(all_good_windows[0])):
            good_distances[i] += _ucrdtw.ucrdtw(query[i], window[i], 0.05, False)[1]
    if len(all_good_windows) != 0:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
249
        good_distances = np.square(good_distances)
250
251
252
253
254
255
        good_distances /= np.sum(good_distances)
        good_distances = np.ones(len(query)) - good_distances
        good_distances /= np.sum(good_distances)
        good_distances *= len(all_good_windows[0])
        good_distances = np.sqrt(good_distances)

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
    if len(hash_functions) != 0:
        summed_hash_functions = np.sum(hash_functions, axis=0)
        summed_hash_functions = np.square(summed_hash_functions)
        normalized_hash_functions = summed_hash_functions / np.sum(summed_hash_functions)
        normalized_hash_functions *= len(hash_functions[0])

    if len(hash_functions) + len(all_good_windows) == 0:
        print("no update")
        new_weights = old_weights
    elif len(hash_functions) == 0:
        print("only windows")
        new_weights = alpha * np.array(old_weights) + (1 - alpha) * good_distances
    elif len(all_good_windows) == 0:
        print("only tables")
        new_weights = alpha * np.array(old_weights) + (1 - alpha) * normalized_hash_functions
    else:
        print("tables & windows")
        new_weights = alpha * np.array(old_weights) + 0.5 * (1-alpha) * good_distances + 0.5 * (1-alpha) * normalized_hash_functions

    print(new_weights)

    response = orjson.dumps(new_weights.tolist())
278
279
280
    return response


281
282
283
284
285
@app.route('/update', methods=['POST'])
def update():
    t0 = time()
    raw_data = orjson.loads(request.data)
    data = np.load('processed-data.npy')
286
287
    data = np.swapaxes(data, 1, 2)
    # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
288
    query = raw_data["query"]
289
290
    query = np.swapaxes(query, 0, 1)
    # query = np.reshape(query, (len(query[0]), len(query)))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
291
    weights = raw_data["weights"]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
292
293
    parameters = raw_data["parameters"]

294
    candidates, distances, hf = _lsh.lsh(data, query, parameters[0], parameters[1], parameters[2], weights)
295
296
297
298
299
300
301
302
303
304
    dict = defaultdict(int)
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
            for i in range(len(candidates[0][0])):
                dict[candidates[l][k][i]] += distances[l][k][i]
    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
    average_candidates = list(sorted_dict.keys())
    average_distances = list(sorted_dict.values())

    tables = []
305
    samples_set = set()
306
307
308
    candidates = candidates.tolist()
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
309
            samples_set.update(candidates[l][k][0:5])
310
311
312
313
314
315
316
317
318
            dict = defaultdict(list)
            length = len(distances[l][k])
            median = distances[l][k][math.ceil(length/2)]
            stepsize = median / 10
            indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), distances[l][k]))
            for i in range(len(candidates[0][0])):
                dict[str(indices[i])].append(candidates[l][k][i])
            tables.append(dict)

319
320
    samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist()

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
321
    response = {
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
322
        "hash_functions": hf.reshape((len(candidates) * len(candidates[0]), len(query[0]))).tolist(),
323
324
325
        "candidates": candidates,
        "tables": tables,
        "samples": list(samples),
326
327
        "average_candidates": np.array(average_candidates).tolist(),
        "average_distances": np.array(average_distances).tolist(),
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
328
329
330
        "distances": distances.tolist(),
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
331
    print('LSH done: ' + str(time()-t0))
332
333
    return response

334
335
@app.route('/query', methods=['POST'])
def query():
336
    t0 = time()
337
    raw_data = orjson.loads(request.data)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
338
339
340
341
    windowIndices = raw_data['window']
    if isinstance(windowIndices, int):
        output = np.load('processed-data.npy')[windowIndices]
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
342
343
        print("Query done: " + str(time() - t0))
        return response
344
    else:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
345
        indices = [int(index) for index, value in windowIndices.items() if value is True]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
346
        data = np.load('processed-data.npy')[indices]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
347
348
        output = performDBA(data)
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
349
350
351
352
353
354
355
356
357
        print("Query done: " + str(time()-t0))
        return response

@app.route('/window', methods=['POST'])
def window():
    t0 = time()
    raw_data = orjson.loads(request.data)
    indices = raw_data['indices']
    output = np.load('processed-data.npy')[indices]
358
    response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
359
    print("Query done: " + str(time() - t0))
360
361
    return response

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
362
363
@app.route('/table-info', methods=['POST'])
def table_info():
364
365
366
367
    t0 = time()
    raw_data = orjson.loads(request.data)
    all_windows = raw_data['windows']
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
368
    prototypes = []
369
    for windows in all_windows:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
370
        actual_windows = data[windows]
371
372
373
374
        average_values = np.average(actual_windows, 0)
        std_values = np.std(actual_windows, 0)
        max_values = average_values + std_values
        min_values = average_values - std_values
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
375
        prototypes.append({
376
377
378
            'average': average_values.tolist(),
            'max': max_values.tolist(),
            'min': min_values.tolist()
379
        })
380
381
    # distances = [[dtw(np.array(v["average"]), np.array(w["average"]), global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05 * 120)) for j, w in enumerate(prototypes)] for i, v in enumerate(prototypes)]
    response = orjson.dumps({'prototypes': prototypes, 'distances': []})
382
    print("Averages calculated: " + str(time() - t0))
383
384
    return response

385
def preprocess(data, r=10.0):
386
387
    # return 0.10882589134534404, 3.1202154563478928, 0.9705780396843037
    # data = np.load('processed-data.npy')
388
    # data = np.reshape(data, (59999, 20, 120))
389
    # data = np.repeat(data, repeats=1, axis=1)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
390
391
392
    subset = []
    t0 = time()

393
394
395
    i = 0
    while i < len(data):
        if i % 999 == 0:
396
            print(r)
397
398
399
400
401
402
403
404
405
            print(str(i) + ':' + str(len(subset)))

        state = 1
        for s in subset:
            if np.linalg.norm(data[i] - data[s]) < r:
                state = 0
                break
        if state == 1:
            subset.append(i)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
406

407
408
409
410
411
412
413
414
415
416
417
        i = i + 1
        if i == 10000 and len(subset) < 10:
            r = r / 2
            subset = []
            i = 0
        if len(subset) > 200:
            r = r + r / 2
            subset = []
            i = 0

    # subset = sample(list(range(len(data))), 200)
418
    print("r = " + str(r))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
419
420
421
422
423
424
425
426
    dtw_distances = []
    eq_distances = []
    for i, index_1 in enumerate(subset):
        print(i)
        for j, index_2 in enumerate(subset):
            if index_1 == index_2:
                continue
            e = np.linalg.norm(data[index_1] - data[index_2])
427
428
429
            if (math.isnan(e) or e == 0):
                eq_distances.append(0.0001)
                dtw_distances.append(0.0001)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
430
                continue
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
431
            eq_distances.append(e)
432
433
            d = 0
            # d, _ = fastdtw(data[index_1], data[index_2], dist=euclidean)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
434
435
            d = dtw(data[index_1], data[index_2], global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120))
            # d = _ucrdtw.ucrdtw(data[index_1], data[index_2], 0.05, False)[1]
436
            # d = dtw.dtw(data[index_1], data[index_2], dist_method="Euclidean", window_type="sakoechiba", window_args={"window_size": 120}).distance
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
437
            dtw_distances.append(d)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
438

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
439
440
441
442
443
444
445
446
447
448
    ratios = np.array(dtw_distances)/np.array(eq_distances)
    mean_dtw = np.mean(dtw_distances)
    sd_dtw = np.std(dtw_distances)
    mean_eq = np.mean(eq_distances)
    sd_eq = np.std(eq_distances)
    a = np.mean(ratios)
    sd = np.std(ratios)
    theta = mean_dtw + -2.58 * sd_dtw
    # theta = mean_eq + -2.58 * sd_eq
    r = theta / ((a-sd)*math.sqrt(120))
449
450
    if r < 0:
        r = mean_dtw / 100
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
451
    # r = theta / (math.sqrt(120))
452
453
454
455
456
457
    print('Mean: ' + str(mean_dtw))
    print('Stdev: ' + str(sd_dtw))
    print('Ratio mean: ' + str(a))
    print('Ratio stdev: ' + str(sd))
    print('Theta: ' + str(theta))
    print('r: ' + str(r))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
458
    print('Preprocessing time: ' + str(time() - t0))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
459
    return r, a, sd
460

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
461
def debug_test_lsh():
462
    data = np.load('processed-data.npy')
463
    # data = np.repeat(data, repeats=7, axis=1)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
464
    print(data.shape)
465
466
    data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))

467
    r, a, sd = preprocess(data, 11.25)
468
    create_windows()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
469
470
471
    query_n = 1234
    t0 = time()
    query = data[query_n]
472
473
    data = data.astype('double')
    dict = defaultdict(int)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
474
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
475
    print("Calculated approximate in: " + str(time()-t0))
476
477
478
479
480
481
482
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
            for i in range(len(candidates[0][0])):
                dict[candidates[l][k][i]] += distances[l][k][i]
    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
    candidates = list(sorted_dict.keys())

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
483
    print(candidates[0:20])
484

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
485
    t0 = time()
486
    # distances = [dtw_ndim.distance_fast(window, query) for window in data]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
487
    distances = [dtw(window, query, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120)) for window in data]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
488
    topk_dtw = sorted(range(len(distances)), key=lambda k: distances[k])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
489
    print("Calculated exact dtw in: " + str(time()-t0))
490
    print(topk_dtw[0:20])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
491

492
493
494
495
    t0 = time()
    l2distances = [np.linalg.norm(window - query) for window in data]
    print("Calculated exact l2 in: " + str(time()-t0))

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
496
497
498
    # # distances_ed = [distance.euclidean(query, window) for window in data]
    # # topk_ed = sorted(range(len(distances_ed)), key=lambda k: distances_ed[k])
    #
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
499
    accuracy = 0
500
    for index in topk_dtw[0:20]:
501
        if index in candidates:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
502
503
            accuracy += 1
    print(accuracy)
504

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
505
506
# read_mts_data()
# create_mts_windows()
507
# debug_test_lsh()