main.py 17.7 KB
Newer Older
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
1
from flask import Flask, request
2
3
4
import numpy as np
from flask_cors import CORS
from time import time
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
5
import pandas as pd
6
import orjson
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
7
8
import bigwig
import bbi
9
import _ucrdtw
10
11
import _lsh
import math
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
12
13
import dask.dataframe as dd
import os.path
14
from random import sample
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
15
16
17
from DBA_multivariate import performDBA
from tslearn.metrics import dtw
from sklearn import preprocessing
18
19
20
21
22
from collections import defaultdict
from dtaidistance import dtw_ndim
from scipy.spatial.distance import euclidean

from fastdtw import fastdtw
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
23

24
reload = False
25
26
27
28
29
30
31
32
33
34

app = Flask(__name__)
CORS(app)

@app.route('/', methods=['GET'])
def index():
    return "hi"

@app.route('/read-data', methods=['GET'])
def read_data():
35
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
36
37
38
39
    size = bbi.chromsizes('test.bigWig')['chr1']
    bins = 100000
    data = bigwig.get('test.bigWig', 'chr1', 0, size, bins)
    print(data.shape)
40
41
42
43
44
45
46
47
48
49
50
51
52
53
    response = [
        {
            "index": list(range(0, size, int(size/(bins)))),
            "values": data.tolist()
        },
        {
            "index": list(range(0, size, int(size / (bins)))),
            "values": data.tolist()
        },
        {
            "index": list(range(0, size, int(size / (bins)))),
            "values": data.tolist()
        }
    ]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
54
    response = orjson.dumps(response)
55
    print('Data read: ' + str(time()-t0))
56
57
    return response

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
58
59
60
61
62
63
64
@app.route('/read-mts-data', methods=['GET'])
def read_mts_data():
    filename = 'data.pkl'
    if (not os.path.isfile(filename)):
        print("start")
        df = dd.read_csv("NW_Ground_Stations_2016.csv", usecols=['number_sta', 'date', 't', 'hu', 'td'])
        print("read file")
65
        df = df.loc[df['number_sta'].isin([14066001, 14137001, 14216001, 14372001, 22092001, 22113006, 22135001])].fillna(0)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
66
67
68
69
70
71
72
73
        print("split rows")
        df = df.compute()
        df.to_pickle(filename)
        print("to_pandas")
    df = pd.read_pickle(filename)
    df.dropna(subset=['t'], inplace=True)
    response = [
        {
74
75
            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
            "values": df.loc[df['number_sta'] == 14066001].loc[:, 't'].values.tolist()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
76
77
        },
        {
78
79
            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
            "values": df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].values.tolist()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
80
81
        },
        {
82
83
            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
            "values": df.loc[df['number_sta'] == 14066001].loc[:, 'td'].values.tolist()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
84
85
86
87
88
89
90
91
92
93
94
95
96
        }
    ]
    print("response ready")
    response = orjson.dumps(response)
    return response

@app.route('/create-mts-windows', methods=['POST'])
def create_mts_windows():
    t0 = time()
    if (not os.path.isfile('processed-data.npy')):
        filename = 'data.pkl'
        df = pd.read_pickle(filename)
        channels = list()
97
98
99
        channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 't'].fillna(0).values.tolist())
        channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].fillna(0).values.tolist())
        channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'td'].fillna(0).values.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
100
101
102
103
        print("Data read: " + str(time()-t0))
        # raw_data = request.json
        window_size = 120 #int(raw_data['parameters']["windowsize"])
        print("Processing: " + str(time()-t0))
104
        data = [([values[i:i+window_size] for values in channels]) for i in range(0, len(channels[0]) - window_size, 1)]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
105
106
107
108
109
110
111
112
        print("Raw windows: " + str(time()-t0))
        windows = []
        for i in range(len(data)):
            if i % 5000 == 0:
                print(i)
            windows.append(preprocessing.minmax_scale(data[i], (-1, 1), axis=1))
        print("Preprocessed: " + str(time()-t0))
        np.save('processed-data', windows)
113
114
115
116
    # data = np.load('processed-data.npy')
    # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
    # r, a, sd = preprocess(data, 11.5)
    # np.save('parameters', np.array([r, a, sd]))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
117
118
119
120
    print("Sending response: " + str(time()-t0))
    return '1'


121
122
@app.route('/create-windows', methods=['POST'])
def create_windows():
123
    t0 = time()
124
125
126
127
    if (not os.path.isfile('processed-data.npy')):
        # raw_data = request.json
        # window_size = int(raw_data['parameters']["windowsize"])
        window_size = 120
128
129
130
131
132
133
134
135
        data = bigwig.chunk(
            'test.bigWig',
            12000,
            int(12000 / window_size),
            int(12000 / 6),
            ['chr1'],
            verbose=True,
        )
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
        data = np.reshape(data, (len(data), 1, len(data[0])))
        data2 = np.copy(data)
        np.random.shuffle(data2)
        data3 = np.copy(data)
        np.random.shuffle(data3)

        data = np.concatenate((data, data2), axis=1)
        data = np.concatenate((data, data3), axis=1)
        # data = np.repeat(data, repeats=3, axis=1)
        np.save('processed-data', data)
    print('Windows created: ' + str(time()-t0))
    return '1'

@app.route('/create-test-windows', methods=['POST'])
def create_test_windows():
    t0 = time()
    if (not os.path.isfile('processed-data.npy')):
        datafile = '21.csv'

        data = pd.read_csv(datafile, header=None)

        # and convert it to numpy array:
        npdata = np.array(data)
        print('data loaded')
        window_data = [npdata[i:i + 120, 0:5] for i in range(0, npdata.shape[0] - 120, int(120 / 8))]
        del npdata
        print('data created')
        np_window_data = np.repeat(window_data, repeats=3, axis=0)
        print(np_window_data.shape)
        del window_data
        data = np.reshape(np_window_data, (len(np_window_data), 5, len(np_window_data[0])))
        print(data.shape)
168
        np.save('processed-data', data)
169
    print('Windows created: ' + str(time()-t0))
170
    return '1'
171

172

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
173
174
175
@app.route('/initialize', methods=['POST'])
def initialize():
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
176
    raw_data = orjson.loads(request.data)
177
    data = np.load('processed-data.npy')
178
179
    data = np.swapaxes(data, 1, 2)
    # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
180
    query = raw_data["query"]
181
182
183
184
    query = np.swapaxes(query, 0, 1)
    # query = np.reshape(query, (len(query[0]), len(query)))
    parameters = preprocess(data)
    # parameters = np.load('parameters.npy')
185
186
187
    r = parameters[0]
    a = parameters[1]
    sd = parameters[2]
188

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
189
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
190
    print(distances)
191

192
193
194
195
196
197
198
199
200
    dict = defaultdict(int)
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
            for i in range(len(candidates[0][0])):
                dict[candidates[l][k][i]] += distances[l][k][i]
    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
    average_candidates = list(sorted_dict.keys())
    average_distances = list(sorted_dict.values())

201
    tables = []
202
    samples_set = set()
203
204
205
    candidates = candidates.tolist()
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
206
            samples_set.update(candidates[l][k][0:5])
207
208
209
210
211
212
213
214
215
            dict = defaultdict(list)
            length = len(distances[l][k])
            median = distances[l][k][math.ceil(length/2)]
            stepsize = median / 10
            indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), distances[l][k]))
            for i in range(len(candidates[0][0])):
                dict[str(indices[i])].append(candidates[l][k][i])
            tables.append(dict)

216
217
    samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist()

218

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
219
220
    response = {
        "hash_functions": hf.tolist(),
221
222
223
224
        "candidates": candidates,
        "tables": tables,
        "distances": distances.tolist(),
        "samples": list(samples),
225
226
        "average_candidates": np.array(average_candidates).tolist(),
        "average_distances": np.array(average_distances).tolist(),
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
227
        "parameters": [float(r), float(a), float(sd)]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
228
229
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
230
    print('LSH done: ' + str(time()-t0))
231
232
    return response

233
234
@app.route('/weights', methods=['POST'])
def weights():
235
    alpha = 0.2
236
    raw_data = orjson.loads(request.data)
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
    labels = raw_data["labels"]
    query = raw_data["query"]
    old_weights = raw_data["weights"]
    data = np.load('processed-data.npy')
    all_good_windows = data[[[int(index) for index, value in labels.items() if value is True]]]
    all_bad_windows = data[[[int(index) for index, value in labels.items() if value is False]]]

    good_distances = np.zeros(len(query))
    for window in all_good_windows:
        for i in range(len(all_good_windows[0])):
            good_distances[i] += _ucrdtw.ucrdtw(query[i], window[i], 0.05, False)[1]
    if len(all_good_windows) != 0:
        good_distances /= np.sum(good_distances)
        good_distances = np.ones(len(query)) - good_distances
        good_distances /= np.sum(good_distances)
        good_distances *= len(all_good_windows[0])
        good_distances = np.sqrt(good_distances)
    good_distances = alpha * np.array(old_weights) + (1-alpha) * good_distances

    # bad_distances = np.zeros(len(query))
    # for window in all_bad_windows:
    #     for i in range(len(all_bad_windows[0])):
    #         bad_distances[i] += _ucrdtw.ucrdtw(query[i], window[i], 0.05, False)[1]
    # if len(all_bad_windows) != 0:
    #     bad_distances /= np.sum(bad_distances)
    #     bad_distances = np.ones(len(query)) - bad_distances

    print(good_distances)
265
266
267

    # Caculate weights

268
    response = orjson.dumps(good_distances.tolist())
269
270
271
    return response


272
273
274
275
276
@app.route('/update', methods=['POST'])
def update():
    t0 = time()
    raw_data = orjson.loads(request.data)
    data = np.load('processed-data.npy')
277
278
    data = np.swapaxes(data, 1, 2)
    # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
279
    query = raw_data["query"]
280
281
    query = np.swapaxes(query, 0, 1)
    # query = np.reshape(query, (len(query[0]), len(query)))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
282
    weights = raw_data["weights"]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
283
284
    parameters = raw_data["parameters"]

285
    candidates, distances, hf = _lsh.lsh(data, query, parameters[0], parameters[1], parameters[2], weights)
286
287
288
289
290
291
292
293
294
295
    dict = defaultdict(int)
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
            for i in range(len(candidates[0][0])):
                dict[candidates[l][k][i]] += distances[l][k][i]
    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
    average_candidates = list(sorted_dict.keys())
    average_distances = list(sorted_dict.values())

    tables = []
296
    samples_set = set()
297
298
299
    candidates = candidates.tolist()
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
300
            samples_set.update(candidates[l][k][0:5])
301
302
303
304
305
306
307
308
309
            dict = defaultdict(list)
            length = len(distances[l][k])
            median = distances[l][k][math.ceil(length/2)]
            stepsize = median / 10
            indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), distances[l][k]))
            for i in range(len(candidates[0][0])):
                dict[str(indices[i])].append(candidates[l][k][i])
            tables.append(dict)

310
311
    samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist()

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
312
313
    response = {
        "hash_functions": hf.tolist(),
314
315
316
        "candidates": candidates,
        "tables": tables,
        "samples": list(samples),
317
318
        "average_candidates": np.array(average_candidates).tolist(),
        "average_distances": np.array(average_distances).tolist(),
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
319
320
321
        "distances": distances.tolist(),
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
322
    print('LSH done: ' + str(time()-t0))
323
324
    return response

325
326
@app.route('/query', methods=['POST'])
def query():
327
    t0 = time()
328
    raw_data = orjson.loads(request.data)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
329
330
331
332
    windowIndices = raw_data['window']
    if isinstance(windowIndices, int):
        output = np.load('processed-data.npy')[windowIndices]
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
333
334
        print("Query done: " + str(time() - t0))
        return response
335
    else:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
336
        indices = [int(index) for index, value in windowIndices.items() if value is True]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
337
        data = np.load('processed-data.npy')[indices]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
338
339
        output = performDBA(data)
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
340
341
342
343
344
345
346
347
348
        print("Query done: " + str(time()-t0))
        return response

@app.route('/window', methods=['POST'])
def window():
    t0 = time()
    raw_data = orjson.loads(request.data)
    indices = raw_data['indices']
    output = np.load('processed-data.npy')[indices]
349
    response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
350
    print("Query done: " + str(time() - t0))
351
352
    return response

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
353
354
@app.route('/table-info', methods=['POST'])
def table_info():
355
356
357
358
    t0 = time()
    raw_data = orjson.loads(request.data)
    all_windows = raw_data['windows']
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
359
    prototypes = []
360
    for windows in all_windows:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
361
        actual_windows = data[windows]
362
363
364
365
        average_values = np.average(actual_windows, 0)
        std_values = np.std(actual_windows, 0)
        max_values = average_values + std_values
        min_values = average_values - std_values
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
366
        prototypes.append({
367
368
369
            'average': average_values.tolist(),
            'max': max_values.tolist(),
            'min': min_values.tolist()
370
        })
371
372
    # distances = [[dtw(np.array(v["average"]), np.array(w["average"]), global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05 * 120)) for j, w in enumerate(prototypes)] for i, v in enumerate(prototypes)]
    response = orjson.dumps({'prototypes': prototypes, 'distances': []})
373
    print("Averages calculated: " + str(time() - t0))
374
375
    return response

376
def preprocess(data, r=10.0):
377
378
    # return 0.10882589134534404, 3.1202154563478928, 0.9705780396843037
    # data = np.load('processed-data.npy')
379
    # data = np.reshape(data, (59999, 20, 120))
380
    # data = np.repeat(data, repeats=1, axis=1)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
381
382
383
    subset = []
    t0 = time()

384
385
386
    i = 0
    while i < len(data):
        if i % 999 == 0:
387
            print(r)
388
389
390
391
392
393
394
395
396
            print(str(i) + ':' + str(len(subset)))

        state = 1
        for s in subset:
            if np.linalg.norm(data[i] - data[s]) < r:
                state = 0
                break
        if state == 1:
            subset.append(i)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
397

398
399
400
401
402
403
404
405
406
407
408
        i = i + 1
        if i == 10000 and len(subset) < 10:
            r = r / 2
            subset = []
            i = 0
        if len(subset) > 200:
            r = r + r / 2
            subset = []
            i = 0

    # subset = sample(list(range(len(data))), 200)
409
    print("r = " + str(r))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
410
411
412
413
414
415
416
417
    dtw_distances = []
    eq_distances = []
    for i, index_1 in enumerate(subset):
        print(i)
        for j, index_2 in enumerate(subset):
            if index_1 == index_2:
                continue
            e = np.linalg.norm(data[index_1] - data[index_2])
418
419
420
            if (math.isnan(e) or e == 0):
                eq_distances.append(0.0001)
                dtw_distances.append(0.0001)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
421
                continue
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
422
            eq_distances.append(e)
423
424
            d = 0
            # d, _ = fastdtw(data[index_1], data[index_2], dist=euclidean)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
425
426
            d = dtw(data[index_1], data[index_2], global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120))
            # d = _ucrdtw.ucrdtw(data[index_1], data[index_2], 0.05, False)[1]
427
            # d = dtw.dtw(data[index_1], data[index_2], dist_method="Euclidean", window_type="sakoechiba", window_args={"window_size": 120}).distance
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
428
            dtw_distances.append(d)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
429

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
430
431
432
433
434
435
436
437
438
439
    ratios = np.array(dtw_distances)/np.array(eq_distances)
    mean_dtw = np.mean(dtw_distances)
    sd_dtw = np.std(dtw_distances)
    mean_eq = np.mean(eq_distances)
    sd_eq = np.std(eq_distances)
    a = np.mean(ratios)
    sd = np.std(ratios)
    theta = mean_dtw + -2.58 * sd_dtw
    # theta = mean_eq + -2.58 * sd_eq
    r = theta / ((a-sd)*math.sqrt(120))
440
441
    if r < 0:
        r = mean_dtw / 100
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
442
    # r = theta / (math.sqrt(120))
443
444
445
446
447
448
    print('Mean: ' + str(mean_dtw))
    print('Stdev: ' + str(sd_dtw))
    print('Ratio mean: ' + str(a))
    print('Ratio stdev: ' + str(sd))
    print('Theta: ' + str(theta))
    print('r: ' + str(r))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
449
    print('Preprocessing time: ' + str(time() - t0))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
450
    return r, a, sd
451

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
452
def debug_test_lsh():
453
    data = np.load('processed-data.npy')
454
    # data = np.repeat(data, repeats=7, axis=1)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
455
    print(data.shape)
456
457
    data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))

458
    r, a, sd = preprocess(data, 11.25)
459
    create_windows()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
460
461
462
    query_n = 1234
    t0 = time()
    query = data[query_n]
463
464
    data = data.astype('double')
    dict = defaultdict(int)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
465
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
466
    print("Calculated approximate in: " + str(time()-t0))
467
468
469
470
471
472
473
    for l in range(len(candidates)):
        for k in range(len(candidates[0])):
            for i in range(len(candidates[0][0])):
                dict[candidates[l][k][i]] += distances[l][k][i]
    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
    candidates = list(sorted_dict.keys())

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
474
    print(candidates[0:20])
475

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
476
    t0 = time()
477
    # distances = [dtw_ndim.distance_fast(window, query) for window in data]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
478
    distances = [dtw(window, query, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120)) for window in data]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
479
    topk_dtw = sorted(range(len(distances)), key=lambda k: distances[k])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
480
    print("Calculated exact dtw in: " + str(time()-t0))
481
    print(topk_dtw[0:20])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
482

483
484
485
486
    t0 = time()
    l2distances = [np.linalg.norm(window - query) for window in data]
    print("Calculated exact l2 in: " + str(time()-t0))

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
487
488
489
    # # distances_ed = [distance.euclidean(query, window) for window in data]
    # # topk_ed = sorted(range(len(distances_ed)), key=lambda k: distances_ed[k])
    #
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
490
    accuracy = 0
491
    for index in topk_dtw[0:20]:
492
        if index in candidates:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
493
494
            accuracy += 1
    print(accuracy)
495

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
496
497
# read_mts_data()
# create_mts_windows()
498
# debug_test_lsh()