main.py 9.91 KB
Newer Older
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
1
from flask import Flask, request
2
3
4
import numpy as np
from flask_cors import CORS
from time import time
5
import orjson
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
6
7
import bigwig
import bbi
8
import _ucrdtw
9
10
11
12
import _lsh
import dtw
import math
from random import sample
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
13
from DBA import performDBA
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
14

15
reload = False
16
17
18
19
20
21
22
23
24
25

app = Flask(__name__)
CORS(app)

@app.route('/', methods=['GET'])
def index():
    return "hi"

@app.route('/read-data', methods=['GET'])
def read_data():
26
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
27
28
29
30
    size = bbi.chromsizes('test.bigWig')['chr1']
    bins = 100000
    data = bigwig.get('test.bigWig', 'chr1', 0, size, bins)
    print(data.shape)
31
    response = {
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
32
33
        "index": list(range(0, size, int(size/(bins)))),
        "values": data.tolist()
34
    }
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
35
    response = orjson.dumps(response)
36
    print('Data read: ' + str(time()-t0))
37
38
39
40
    return response

@app.route('/create-windows', methods=['POST'])
def create_windows():
41
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
42
    if reload:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
43
44
        raw_data = request.json
        window_size = int(raw_data['parameters']["windowsize"])
45
        chromsize = bbi.chromsizes('test.bigWig')['chr1']
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
        step_size = int(12000 / 6)
        start_bps = np.arange(0, chromsize - 12000 + step_size, step_size)
        end_bps = np.arange(12000, chromsize + step_size, step_size)
        data = bigwig.chunk(
            'test.bigWig',
            12000,
            int(12000 / window_size),
            int(12000 / 6),
            ['chr1'],
            verbose=True,
        )
        # data = bbi.stackup(
        #     'test.bigWig',
        #     ['chr1'] * start_bps.size,
        #     start_bps,
        #     end_bps,
        #     bins=window_size,
        #     missing=0.0,
        #     oob=0.0,
        # )
        # data = (data - np.min(data))/np.ptp(data)
67
        np.save('processed-data', data)
68
        np.savetxt('processed-data', data, delimiter=' ', fmt='%f')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
69
        np.savetxt('query', data[80503], delimiter=' ', fmt='%f')
70
    print('Windows created: ' + str(time()-t0))
71
    return '1'
72

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
73
74
75
@app.route('/initialize', methods=['POST'])
def initialize():
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
76
    raw_data = orjson.loads(request.data)
77
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
78
    data = np.reshape(data, (len(data), len(data[0]), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
79
    # data = np.repeat(data, repeats=1, axis=2)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
80
81
    query = raw_data["query"]
    query = np.reshape(query, (len(query), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
82
    # query = np.repeat(query, repeats=1, axis=1)
83

84
    r, a, sd = preprocess()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
85
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
86

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
87
88
89
90
    response = {
        "hash_functions": hf.tolist(),
        "candidates": candidates.tolist(),
        "distances": distances.tolist(),
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
91
        "parameters": [float(r), float(a), float(sd)]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
92
93
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
94
    print('LSH done: ' + str(time()-t0))
95
96
97
98
99
100
101
    return response

@app.route('/update', methods=['POST'])
def update():
    t0 = time()
    raw_data = orjson.loads(request.data)
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
102
    data = np.reshape(data, (len(data), len(data[0]), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
103
    # data = np.repeat(data, repeats=1, axis=2)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
104
105
106
107
108
109
    hash_functions = raw_data["hash_functions"]
    hash_functions = np.array(hash_functions, dtype='double')
    hash_functions = (hash_functions - np.min(hash_functions)) / np.ptp(hash_functions)
    hash_functions = np.reshape(hash_functions, (len(data[0]), 1))
    query = raw_data["query"]
    query = np.reshape(query, (len(query), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
110
    # query = np.repeat(query, repeats=1, axis=1)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
111
112
113
114
115
116
117
118
119
    parameters = raw_data["parameters"]

    candidates, distances, hf = _lsh.lsh(data, query, parameters[0], parameters[1], parameters[2], hash_functions)
    response = {
        "hash_functions": hf.tolist(),
        "distances": distances.tolist(),
        "candidates": candidates.tolist()
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
120
    print('LSH done: ' + str(time()-t0))
121
122
    return response

123
124
@app.route('/query', methods=['POST'])
def query():
125
    t0 = time()
126
    raw_data = orjson.loads(request.data)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
127
128
129
130
    windowIndices = raw_data['window']
    if isinstance(windowIndices, int):
        output = np.load('processed-data.npy')[windowIndices]
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
131
132
        print("Query done: " + str(time() - t0))
        return response
133
    else:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
134
        indices = [int(index) for index, value in windowIndices.items() if value is True]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
135
        data = np.load('processed-data.npy')[indices]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
136
137
        output = performDBA(data)
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
138
139
140
141
142
143
144
145
146
        print("Query done: " + str(time()-t0))
        return response

@app.route('/window', methods=['POST'])
def window():
    t0 = time()
    raw_data = orjson.loads(request.data)
    indices = raw_data['indices']
    output = np.load('processed-data.npy')[indices]
147
    response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
148
    print("Query done: " + str(time() - t0))
149
150
    return response

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
151
152
@app.route('/table-info', methods=['POST'])
def table_info():
153
154
155
156
    t0 = time()
    raw_data = orjson.loads(request.data)
    all_windows = raw_data['windows']
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
157
    prototypes = []
158
    for windows in all_windows:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
159
        actual_windows = data[windows]
160
161
162
163
        average_values = np.average(actual_windows, 0)
        std_values = np.std(actual_windows, 0)
        max_values = average_values + std_values
        min_values = average_values - std_values
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
164
        prototypes.append({
165
166
167
            'average': average_values.tolist(),
            'max': max_values.tolist(),
            'min': min_values.tolist()
168
        })
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
169
170
    distances = [[_ucrdtw.ucrdtw(np.array(v["average"]), np.array(w["average"]), 0.05 * 120, False)[1] for j, w in enumerate(prototypes)] for i, v in enumerate(prototypes)]
    response = orjson.dumps({'prototypes': prototypes, 'distances': distances})
171
    print("Averages calculated: " + str(time() - t0))
172
173
174
    return response

def preprocess():
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
    return 0.10882589134534404, 3.1202154563478928, 0.9705780396843037
    data = np.load('processed-data.npy')
    data = np.array(data, dtype='double')
    data = np.reshape(data, (int(len(data) / 1), 1, len(data[0])))
    data = np.repeat(data, repeats=1, axis=1)
    subset = []
    t0 = time()

    r = 3
    for i, window in enumerate(data):
        if i % 10000 == 0:
            print(str(i) + ':' + str(len(subset)))
        state = 1
        for s in subset:
            if np.linalg.norm(window - data[s]) < r:
                state = 0
                break
        if state == 1:
            subset.append(i)

    # subset = sample(list(range(len(data))), 50)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
196

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
197
198
199
200
201
202
203
204
205
206
207
    dtw_distances = []
    eq_distances = []
    for i, index_1 in enumerate(subset):
        print(i)
        for j, index_2 in enumerate(subset):
            if index_1 == index_2:
                continue
            e = np.linalg.norm(data[index_1] - data[index_2])
            eq_distances.append(e)
            d = dtw.dtw(data[index_1], data[index_2], dist_method="Euclidean", window_type="sakoechiba", window_args={"window_size": 120}).distance
            dtw_distances.append(d)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
208

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
209
210
211
212
213
214
215
216
217
218
219
    ratios = np.array(dtw_distances)/np.array(eq_distances)
    mean_dtw = np.mean(dtw_distances)
    sd_dtw = np.std(dtw_distances)
    mean_eq = np.mean(eq_distances)
    sd_eq = np.std(eq_distances)
    a = np.mean(ratios)
    sd = np.std(ratios)
    theta = mean_dtw + -2.58 * sd_dtw
    # theta = mean_eq + -2.58 * sd_eq
    r = theta / ((a-sd)*math.sqrt(120))
    # r = theta / (math.sqrt(120))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
220
221
222
223
224
225
226
    print('Mean: ' + mean_dtw)
    print('Stdev: ' + sd_dtw)
    print('Ratio mean: ' + a)
    print('Ratio stdev: ' + sd)
    print('Theta: ' + theta)
    print('r: ' + r)
    print('Preprocessing time: ' + str(time() - t0))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
227
    return r, a, sd
228

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
229
230
def debug_test_lsh():
    r, a, sd = preprocess()
231
232
233
    create_windows()
    query_n = 80503
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
234
235
    query = performDBA(data[[80503, 11514]])
    query = np.reshape(query, (len(data[0]), 1))
236
237
238
    data= np.array(data, dtype='double')
    data = np.reshape(data, (len(data), len(data[0]), 1))
    data = np.repeat(data, repeats=1, axis=2)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
239
240
    # query = data[query_n]

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
241
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
242
243
244
245
    print(repr(candidates[0:20]))
    print(distances[0:10])
    print(np.where(candidates == 80503))
    print(np.where(candidates == 11514))
246

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
247
248
    data = np.load('processed-data.npy')
    query = data[query_n]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
249
    distances = [_ucrdtw.ucrdtw(window, query, 0.05 * 120, False)[1] for window in data]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
250
251
    topk_dtw = sorted(range(len(distances)), key=lambda k: distances[k])
    print(topk_dtw[0:10])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
252
253
254
255
256
257
258
    #
    # for candidate in candidates[0:20]:
    #     print(_ucrdtw.ucrdtw(data[candidate], query, 0.05, False)[1])
    #
    # # distances_ed = [distance.euclidean(query, window) for window in data]
    # # topk_ed = sorted(range(len(distances_ed)), key=lambda k: distances_ed[k])
    #
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
259
    # accuracy = 0
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
260
    # for index in topk_dtw[0:20]:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
261
262
263
264
265
    #     if index in candidates[0:20]:
    #         accuracy += 1
    # print(accuracy)
    #
    # accuracy = 0
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
266
    # for index in topk_dtw[0:20]:
267
268
269
    #     if index in candidates[0:50]:
    #         accuracy += 1
    # print(accuracy)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
    #
    # # accuracy = 0
    # # for index in topk_ed[0:20]:
    # #     if index in candidates[0:20]:
    # #         accuracy += 1
    # # print(accuracy)
    # #
    # # accuracy = 0
    # # for index in topk_ed[0:50]:
    # #     if index in candidates[0:50]:
    # #         accuracy += 1
    # # print(accuracy)
    #
    # accuracy = 0
    # for index in topk_dtw[0:50]:
    #     if index in candidates[0:1000]:
    #         accuracy += 1
    # print(accuracy)
    #
    # accuracy = 0
    # for index in topk_dtw[0:50]:
    #     if index in candidates[0:5000]:
    #         accuracy += 1
    # print(accuracy)
    #
    # accuracy = 0
    # for index in topk_dtw[0:50]:
    #     if index in candidates[0:10000]:
    #         accuracy += 1
    # print(accuracy)
    #
    # accuracy = 0
    # for index in topk_dtw[0:50]:
    #     if index in candidates[0:50000]:
    #         accuracy += 1
    # print(accuracy)
    #
    # accuracy = 0
    # for index in topk_dtw[0:50]:
    #     if index in candidates:
    #         accuracy += 1
    # print(accuracy)
312

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
313
# debug_test_lsh(r, a, sd)