main.py 9.91 KB
Newer Older
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
1
from flask import Flask, request
2 3 4
import numpy as np
from flask_cors import CORS
from time import time
5
import orjson
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
6 7
import bigwig
import bbi
8
import _ucrdtw
9 10 11 12
import _lsh
import dtw
import math
from random import sample
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
13
from DBA import performDBA
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
14

15
reload = False
16 17 18 19 20 21 22 23 24 25

app = Flask(__name__)
CORS(app)

@app.route('/', methods=['GET'])
def index():
    return "hi"

@app.route('/read-data', methods=['GET'])
def read_data():
26
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
27 28 29 30
    size = bbi.chromsizes('test.bigWig')['chr1']
    bins = 100000
    data = bigwig.get('test.bigWig', 'chr1', 0, size, bins)
    print(data.shape)
31
    response = {
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
32 33
        "index": list(range(0, size, int(size/(bins)))),
        "values": data.tolist()
34
    }
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
35
    response = orjson.dumps(response)
36
    print('Data read: ' + str(time()-t0))
37 38 39 40
    return response

@app.route('/create-windows', methods=['POST'])
def create_windows():
41
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
42
    if reload:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
43 44
        raw_data = request.json
        window_size = int(raw_data['parameters']["windowsize"])
45
        chromsize = bbi.chromsizes('test.bigWig')['chr1']
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
        step_size = int(12000 / 6)
        start_bps = np.arange(0, chromsize - 12000 + step_size, step_size)
        end_bps = np.arange(12000, chromsize + step_size, step_size)
        data = bigwig.chunk(
            'test.bigWig',
            12000,
            int(12000 / window_size),
            int(12000 / 6),
            ['chr1'],
            verbose=True,
        )
        # data = bbi.stackup(
        #     'test.bigWig',
        #     ['chr1'] * start_bps.size,
        #     start_bps,
        #     end_bps,
        #     bins=window_size,
        #     missing=0.0,
        #     oob=0.0,
        # )
        # data = (data - np.min(data))/np.ptp(data)
67
        np.save('processed-data', data)
68
        np.savetxt('processed-data', data, delimiter=' ', fmt='%f')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
69
        np.savetxt('query', data[80503], delimiter=' ', fmt='%f')
70
    print('Windows created: ' + str(time()-t0))
71
    return '1'
72

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
73 74 75
@app.route('/initialize', methods=['POST'])
def initialize():
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
76
    raw_data = orjson.loads(request.data)
77
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
78
    data = np.reshape(data, (len(data), len(data[0]), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
79
    # data = np.repeat(data, repeats=1, axis=2)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
80 81
    query = raw_data["query"]
    query = np.reshape(query, (len(query), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
82
    # query = np.repeat(query, repeats=1, axis=1)
83

84
    r, a, sd = preprocess()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
85
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
86

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
87 88 89 90
    response = {
        "hash_functions": hf.tolist(),
        "candidates": candidates.tolist(),
        "distances": distances.tolist(),
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
91
        "parameters": [float(r), float(a), float(sd)]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
92 93
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
94
    print('LSH done: ' + str(time()-t0))
95 96 97 98 99 100 101
    return response

@app.route('/update', methods=['POST'])
def update():
    t0 = time()
    raw_data = orjson.loads(request.data)
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
102
    data = np.reshape(data, (len(data), len(data[0]), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
103
    # data = np.repeat(data, repeats=1, axis=2)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
104 105 106 107 108 109
    hash_functions = raw_data["hash_functions"]
    hash_functions = np.array(hash_functions, dtype='double')
    hash_functions = (hash_functions - np.min(hash_functions)) / np.ptp(hash_functions)
    hash_functions = np.reshape(hash_functions, (len(data[0]), 1))
    query = raw_data["query"]
    query = np.reshape(query, (len(query), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
110
    # query = np.repeat(query, repeats=1, axis=1)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
111 112 113 114 115 116 117 118 119
    parameters = raw_data["parameters"]

    candidates, distances, hf = _lsh.lsh(data, query, parameters[0], parameters[1], parameters[2], hash_functions)
    response = {
        "hash_functions": hf.tolist(),
        "distances": distances.tolist(),
        "candidates": candidates.tolist()
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
120
    print('LSH done: ' + str(time()-t0))
121 122
    return response

123 124
@app.route('/query', methods=['POST'])
def query():
125
    t0 = time()
126
    raw_data = orjson.loads(request.data)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
127 128 129 130
    windowIndices = raw_data['window']
    if isinstance(windowIndices, int):
        output = np.load('processed-data.npy')[windowIndices]
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
131 132
        print("Query done: " + str(time() - t0))
        return response
133
    else:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
134
        indices = [int(index) for index, value in windowIndices.items() if value is True]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
135
        data = np.load('processed-data.npy')[indices]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
136 137
        output = performDBA(data)
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
138 139 140 141 142 143 144 145 146
        print("Query done: " + str(time()-t0))
        return response

@app.route('/window', methods=['POST'])
def window():
    t0 = time()
    raw_data = orjson.loads(request.data)
    indices = raw_data['indices']
    output = np.load('processed-data.npy')[indices]
147
    response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
148
    print("Query done: " + str(time() - t0))
149 150
    return response

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
151 152
@app.route('/table-info', methods=['POST'])
def table_info():
153 154 155 156
    t0 = time()
    raw_data = orjson.loads(request.data)
    all_windows = raw_data['windows']
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
157
    prototypes = []
158
    for windows in all_windows:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
159
        actual_windows = data[windows]
160 161 162 163
        average_values = np.average(actual_windows, 0)
        std_values = np.std(actual_windows, 0)
        max_values = average_values + std_values
        min_values = average_values - std_values
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
164
        prototypes.append({
165 166 167
            'average': average_values.tolist(),
            'max': max_values.tolist(),
            'min': min_values.tolist()
168
        })
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
169 170
    distances = [[_ucrdtw.ucrdtw(np.array(v["average"]), np.array(w["average"]), 0.05 * 120, False)[1] for j, w in enumerate(prototypes)] for i, v in enumerate(prototypes)]
    response = orjson.dumps({'prototypes': prototypes, 'distances': distances})
171
    print("Averages calculated: " + str(time() - t0))
172 173 174
    return response

def preprocess():
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
    return 0.10882589134534404, 3.1202154563478928, 0.9705780396843037
    data = np.load('processed-data.npy')
    data = np.array(data, dtype='double')
    data = np.reshape(data, (int(len(data) / 1), 1, len(data[0])))
    data = np.repeat(data, repeats=1, axis=1)
    subset = []
    t0 = time()

    r = 3
    for i, window in enumerate(data):
        if i % 10000 == 0:
            print(str(i) + ':' + str(len(subset)))
        state = 1
        for s in subset:
            if np.linalg.norm(window - data[s]) < r:
                state = 0
                break
        if state == 1:
            subset.append(i)

    # subset = sample(list(range(len(data))), 50)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
196

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
197 198 199 200 201 202 203 204 205 206 207
    dtw_distances = []
    eq_distances = []
    for i, index_1 in enumerate(subset):
        print(i)
        for j, index_2 in enumerate(subset):
            if index_1 == index_2:
                continue
            e = np.linalg.norm(data[index_1] - data[index_2])
            eq_distances.append(e)
            d = dtw.dtw(data[index_1], data[index_2], dist_method="Euclidean", window_type="sakoechiba", window_args={"window_size": 120}).distance
            dtw_distances.append(d)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
208

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
209 210 211 212 213 214 215 216 217 218 219
    ratios = np.array(dtw_distances)/np.array(eq_distances)
    mean_dtw = np.mean(dtw_distances)
    sd_dtw = np.std(dtw_distances)
    mean_eq = np.mean(eq_distances)
    sd_eq = np.std(eq_distances)
    a = np.mean(ratios)
    sd = np.std(ratios)
    theta = mean_dtw + -2.58 * sd_dtw
    # theta = mean_eq + -2.58 * sd_eq
    r = theta / ((a-sd)*math.sqrt(120))
    # r = theta / (math.sqrt(120))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
220 221 222 223 224 225 226
    print('Mean: ' + mean_dtw)
    print('Stdev: ' + sd_dtw)
    print('Ratio mean: ' + a)
    print('Ratio stdev: ' + sd)
    print('Theta: ' + theta)
    print('r: ' + r)
    print('Preprocessing time: ' + str(time() - t0))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
227
    return r, a, sd
228

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
229 230
def debug_test_lsh():
    r, a, sd = preprocess()
231 232 233
    create_windows()
    query_n = 80503
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
234 235
    query = performDBA(data[[80503, 11514]])
    query = np.reshape(query, (len(data[0]), 1))
236 237 238
    data= np.array(data, dtype='double')
    data = np.reshape(data, (len(data), len(data[0]), 1))
    data = np.repeat(data, repeats=1, axis=2)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
239 240
    # query = data[query_n]

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
241
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
242 243 244 245
    print(repr(candidates[0:20]))
    print(distances[0:10])
    print(np.where(candidates == 80503))
    print(np.where(candidates == 11514))
246

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
247 248
    data = np.load('processed-data.npy')
    query = data[query_n]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
249
    distances = [_ucrdtw.ucrdtw(window, query, 0.05 * 120, False)[1] for window in data]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
250 251
    topk_dtw = sorted(range(len(distances)), key=lambda k: distances[k])
    print(topk_dtw[0:10])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
252 253 254 255 256 257 258
    #
    # for candidate in candidates[0:20]:
    #     print(_ucrdtw.ucrdtw(data[candidate], query, 0.05, False)[1])
    #
    # # distances_ed = [distance.euclidean(query, window) for window in data]
    # # topk_ed = sorted(range(len(distances_ed)), key=lambda k: distances_ed[k])
    #
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
259
    # accuracy = 0
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
260
    # for index in topk_dtw[0:20]:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
261 262 263 264 265
    #     if index in candidates[0:20]:
    #         accuracy += 1
    # print(accuracy)
    #
    # accuracy = 0
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
266
    # for index in topk_dtw[0:20]:
267 268 269
    #     if index in candidates[0:50]:
    #         accuracy += 1
    # print(accuracy)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
    #
    # # accuracy = 0
    # # for index in topk_ed[0:20]:
    # #     if index in candidates[0:20]:
    # #         accuracy += 1
    # # print(accuracy)
    # #
    # # accuracy = 0
    # # for index in topk_ed[0:50]:
    # #     if index in candidates[0:50]:
    # #         accuracy += 1
    # # print(accuracy)
    #
    # accuracy = 0
    # for index in topk_dtw[0:50]:
    #     if index in candidates[0:1000]:
    #         accuracy += 1
    # print(accuracy)
    #
    # accuracy = 0
    # for index in topk_dtw[0:50]:
    #     if index in candidates[0:5000]:
    #         accuracy += 1
    # print(accuracy)
    #
    # accuracy = 0
    # for index in topk_dtw[0:50]:
    #     if index in candidates[0:10000]:
    #         accuracy += 1
    # print(accuracy)
    #
    # accuracy = 0
    # for index in topk_dtw[0:50]:
    #     if index in candidates[0:50000]:
    #         accuracy += 1
    # print(accuracy)
    #
    # accuracy = 0
    # for index in topk_dtw[0:50]:
    #     if index in candidates:
    #         accuracy += 1
    # print(accuracy)
312

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
313
# debug_test_lsh(r, a, sd)