main.py 8.33 KB
Newer Older
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
1
from flask import Flask, request
2
3
4
import numpy as np
from flask_cors import CORS
from time import time
5
import orjson
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
6
7
import bigwig
import bbi
8
import _ucrdtw
9
10
11
12
import _lsh
import dtw
import math
from random import sample
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
13
from DBA import performDBA
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
14

15
reload = False
16
17
18
19
20
21
22
23
24
25

app = Flask(__name__)
CORS(app)

@app.route('/', methods=['GET'])
def index():
    return "hi"

@app.route('/read-data', methods=['GET'])
def read_data():
26
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
27
28
29
30
    size = bbi.chromsizes('test.bigWig')['chr1']
    bins = 100000
    data = bigwig.get('test.bigWig', 'chr1', 0, size, bins)
    print(data.shape)
31
    response = {
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
32
33
        "index": list(range(0, size, int(size/(bins)))),
        "values": data.tolist()
34
    }
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
35
    response = orjson.dumps(response)
36
    print('Data read: ' + str(time()-t0))
37
38
39
40
    return response

@app.route('/create-windows', methods=['POST'])
def create_windows():
41
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
42
    if reload:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
43
44
        raw_data = request.json
        window_size = int(raw_data['parameters']["windowsize"])
45
        chromsize = bbi.chromsizes('test.bigWig')['chr1']
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
        step_size = int(12000 / 6)
        start_bps = np.arange(0, chromsize - 12000 + step_size, step_size)
        end_bps = np.arange(12000, chromsize + step_size, step_size)
        data = bigwig.chunk(
            'test.bigWig',
            12000,
            int(12000 / window_size),
            int(12000 / 6),
            ['chr1'],
            verbose=True,
        )
        # data = bbi.stackup(
        #     'test.bigWig',
        #     ['chr1'] * start_bps.size,
        #     start_bps,
        #     end_bps,
        #     bins=window_size,
        #     missing=0.0,
        #     oob=0.0,
        # )
        # data = (data - np.min(data))/np.ptp(data)
67
        np.save('processed-data', data)
68
        np.savetxt('processed-data', data, delimiter=' ', fmt='%f')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
69
        np.savetxt('query', data[80503], delimiter=' ', fmt='%f')
70
    print('Windows created: ' + str(time()-t0))
71
    return '1'
72

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
73
74
75
@app.route('/initialize', methods=['POST'])
def initialize():
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
76
    raw_data = orjson.loads(request.data)
77
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
78
    data = np.reshape(data, (len(data), len(data[0]), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
79
    # data = np.repeat(data, repeats=1, axis=2)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
80
81
    query = raw_data["query"]
    query = np.reshape(query, (len(query), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
82
    # query = np.repeat(query, repeats=1, axis=1)
83

84
    r, a, sd = preprocess()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
85
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
86

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
87
88
89
90
    response = {
        "hash_functions": hf.tolist(),
        "candidates": candidates.tolist(),
        "distances": distances.tolist(),
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
91
        "parameters": [float(r), float(a), float(sd)]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
92
93
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
94
    print('LSH done: ' + str(time()-t0))
95
96
97
98
99
100
101
    return response

@app.route('/update', methods=['POST'])
def update():
    t0 = time()
    raw_data = orjson.loads(request.data)
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
102
    data = np.reshape(data, (len(data), len(data[0]), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
103
    # data = np.repeat(data, repeats=1, axis=2)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
104
    weights = raw_data["weights"]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
105
106
    query = raw_data["query"]
    query = np.reshape(query, (len(query), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
107
    # query = np.repeat(query, repeats=1, axis=1)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
108
109
    parameters = raw_data["parameters"]

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
110
    candidates, distances, hf = _lsh.lsh(data, query, parameters[0], parameters[1], parameters[2])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
111
112
113
114
115
116
    response = {
        "hash_functions": hf.tolist(),
        "distances": distances.tolist(),
        "candidates": candidates.tolist()
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
117
    print('LSH done: ' + str(time()-t0))
118
119
    return response

120
121
@app.route('/query', methods=['POST'])
def query():
122
    t0 = time()
123
    raw_data = orjson.loads(request.data)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
124
125
126
127
    windowIndices = raw_data['window']
    if isinstance(windowIndices, int):
        output = np.load('processed-data.npy')[windowIndices]
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
128
129
        print("Query done: " + str(time() - t0))
        return response
130
    else:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
131
        indices = [int(index) for index, value in windowIndices.items() if value is True]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
132
        data = np.load('processed-data.npy')[indices]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
133
134
        output = performDBA(data)
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
135
136
137
138
139
140
141
142
143
        print("Query done: " + str(time()-t0))
        return response

@app.route('/window', methods=['POST'])
def window():
    t0 = time()
    raw_data = orjson.loads(request.data)
    indices = raw_data['indices']
    output = np.load('processed-data.npy')[indices]
144
    response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
145
    print("Query done: " + str(time() - t0))
146
147
    return response

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
148
149
@app.route('/table-info', methods=['POST'])
def table_info():
150
151
152
153
    t0 = time()
    raw_data = orjson.loads(request.data)
    all_windows = raw_data['windows']
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
154
    prototypes = []
155
    for windows in all_windows:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
156
        actual_windows = data[windows]
157
158
159
160
        average_values = np.average(actual_windows, 0)
        std_values = np.std(actual_windows, 0)
        max_values = average_values + std_values
        min_values = average_values - std_values
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
161
        prototypes.append({
162
163
164
            'average': average_values.tolist(),
            'max': max_values.tolist(),
            'min': min_values.tolist()
165
        })
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
166
167
    distances = [[_ucrdtw.ucrdtw(np.array(v["average"]), np.array(w["average"]), 0.05 * 120, False)[1] for j, w in enumerate(prototypes)] for i, v in enumerate(prototypes)]
    response = orjson.dumps({'prototypes': prototypes, 'distances': distances})
168
    print("Averages calculated: " + str(time() - t0))
169
170
171
    return response

def preprocess():
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
    return 0.10882589134534404, 3.1202154563478928, 0.9705780396843037
    data = np.load('processed-data.npy')
    data = np.array(data, dtype='double')
    data = np.reshape(data, (int(len(data) / 1), 1, len(data[0])))
    data = np.repeat(data, repeats=1, axis=1)
    subset = []
    t0 = time()

    r = 3
    for i, window in enumerate(data):
        if i % 10000 == 0:
            print(str(i) + ':' + str(len(subset)))
        state = 1
        for s in subset:
            if np.linalg.norm(window - data[s]) < r:
                state = 0
                break
        if state == 1:
            subset.append(i)

    # subset = sample(list(range(len(data))), 50)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
193

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
194
195
196
197
198
199
200
201
202
203
204
    dtw_distances = []
    eq_distances = []
    for i, index_1 in enumerate(subset):
        print(i)
        for j, index_2 in enumerate(subset):
            if index_1 == index_2:
                continue
            e = np.linalg.norm(data[index_1] - data[index_2])
            eq_distances.append(e)
            d = dtw.dtw(data[index_1], data[index_2], dist_method="Euclidean", window_type="sakoechiba", window_args={"window_size": 120}).distance
            dtw_distances.append(d)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
205

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
206
207
208
209
210
211
212
213
214
215
216
    ratios = np.array(dtw_distances)/np.array(eq_distances)
    mean_dtw = np.mean(dtw_distances)
    sd_dtw = np.std(dtw_distances)
    mean_eq = np.mean(eq_distances)
    sd_eq = np.std(eq_distances)
    a = np.mean(ratios)
    sd = np.std(ratios)
    theta = mean_dtw + -2.58 * sd_dtw
    # theta = mean_eq + -2.58 * sd_eq
    r = theta / ((a-sd)*math.sqrt(120))
    # r = theta / (math.sqrt(120))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
217
218
219
220
221
222
223
    print('Mean: ' + mean_dtw)
    print('Stdev: ' + sd_dtw)
    print('Ratio mean: ' + a)
    print('Ratio stdev: ' + sd)
    print('Theta: ' + theta)
    print('r: ' + r)
    print('Preprocessing time: ' + str(time() - t0))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
224
    return r, a, sd
225

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
226
227
def debug_test_lsh():
    r, a, sd = preprocess()
228
229
230
    create_windows()
    query_n = 80503
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
231
    query = data[query_n] # performDBA(data[[80503, 11514]])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
232
    query = np.reshape(query, (len(data[0]), 1))
233
234
235
    data= np.array(data, dtype='double')
    data = np.reshape(data, (len(data), len(data[0]), 1))
    data = np.repeat(data, repeats=1, axis=2)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
236

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
237
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
238
239
    print(repr(candidates[0:20]))
    print(distances[0:10])
240

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
241
242
    data = np.load('processed-data.npy')
    query = data[query_n]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
243
244
245
246
    print(data[0])
    distances = [_ucrdtw.ucrdtw(window, query, 0.05, False)[1] for window in data]
    sorted_distances = sorted(distances)
    print(sorted_distances[0:10])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
247
248
    topk_dtw = sorted(range(len(distances)), key=lambda k: distances[k])
    print(topk_dtw[0:10])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
249

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
250
251
252
    # # distances_ed = [distance.euclidean(query, window) for window in data]
    # # topk_ed = sorted(range(len(distances_ed)), key=lambda k: distances_ed[k])
    #
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
253
254
255
256
257
    accuracy = 0
    for index in topk_dtw[0:20]:
        if index in candidates[0:20]:
            accuracy += 1
    print(accuracy)
258

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
259
debug_test_lsh()