main.py 8.66 KB
Newer Older
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
1
from flask import Flask, request
2 3 4
import numpy as np
from flask_cors import CORS
from time import time
5
import orjson
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
6 7
import bigwig
import bbi
8
import _ucrdtw
9 10 11 12
import _lsh
import dtw
import math
from random import sample
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
13
from DBA import performDBA
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
14

15
reload = False
16 17 18 19 20 21 22 23 24 25

app = Flask(__name__)
CORS(app)

@app.route('/', methods=['GET'])
def index():
    return "hi"

@app.route('/read-data', methods=['GET'])
def read_data():
26
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
27 28 29 30
    size = bbi.chromsizes('test.bigWig')['chr1']
    bins = 100000
    data = bigwig.get('test.bigWig', 'chr1', 0, size, bins)
    print(data.shape)
31
    response = {
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
32 33
        "index": list(range(0, size, int(size/(bins)))),
        "values": data.tolist()
34
    }
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
35
    response = orjson.dumps(response)
36
    print('Data read: ' + str(time()-t0))
37 38 39 40
    return response

@app.route('/create-windows', methods=['POST'])
def create_windows():
41
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
42
    if reload:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
43 44
        raw_data = request.json
        window_size = int(raw_data['parameters']["windowsize"])
45
        chromsize = bbi.chromsizes('test.bigWig')['chr1']
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
        step_size = int(12000 / 6)
        start_bps = np.arange(0, chromsize - 12000 + step_size, step_size)
        end_bps = np.arange(12000, chromsize + step_size, step_size)
        data = bigwig.chunk(
            'test.bigWig',
            12000,
            int(12000 / window_size),
            int(12000 / 6),
            ['chr1'],
            verbose=True,
        )
        # data = bbi.stackup(
        #     'test.bigWig',
        #     ['chr1'] * start_bps.size,
        #     start_bps,
        #     end_bps,
        #     bins=window_size,
        #     missing=0.0,
        #     oob=0.0,
        # )
        # data = (data - np.min(data))/np.ptp(data)
67
        np.save('processed-data', data)
68
        np.savetxt('processed-data', data, delimiter=' ', fmt='%f')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
69
        np.savetxt('query', data[80503], delimiter=' ', fmt='%f')
70
    print('Windows created: ' + str(time()-t0))
71
    return '1'
72

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
73 74 75
@app.route('/initialize', methods=['POST'])
def initialize():
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
76
    raw_data = orjson.loads(request.data)
77
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
78
    data = np.reshape(data, (len(data), len(data[0]), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
79
    # data = np.repeat(data, repeats=1, axis=2)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
80 81
    query = raw_data["query"]
    query = np.reshape(query, (len(query), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
82
    # query = np.repeat(query, repeats=1, axis=1)
83

84
    r, a, sd = preprocess(data)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
85
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
86

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
87 88 89 90
    response = {
        "hash_functions": hf.tolist(),
        "candidates": candidates.tolist(),
        "distances": distances.tolist(),
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
91
        "parameters": [float(r), float(a), float(sd)]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
92 93
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
94
    print('LSH done: ' + str(time()-t0))
95 96
    return response

97 98 99 100 101 102 103 104 105 106 107
@app.route('/weights', methods=['POST'])
def weights():
    raw_data = orjson.loads(request.data)
    parameters = raw_data["labels"]

    # Caculate weights

    response = weights
    return response


108 109 110 111 112
@app.route('/update', methods=['POST'])
def update():
    t0 = time()
    raw_data = orjson.loads(request.data)
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
113
    data = np.reshape(data, (len(data), len(data[0]), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
114
    # data = np.repeat(data, repeats=1, axis=2)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
115
    weights = raw_data["weights"]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
116 117
    query = raw_data["query"]
    query = np.reshape(query, (len(query), 1))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
118
    # query = np.repeat(query, repeats=1, axis=1)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
119 120
    parameters = raw_data["parameters"]

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
121
    candidates, distances, hf = _lsh.lsh(data, query, parameters[0], parameters[1], parameters[2])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
122 123 124 125 126 127
    response = {
        "hash_functions": hf.tolist(),
        "distances": distances.tolist(),
        "candidates": candidates.tolist()
    }
    response = orjson.dumps(response)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
128
    print('LSH done: ' + str(time()-t0))
129 130
    return response

131 132
@app.route('/query', methods=['POST'])
def query():
133
    t0 = time()
134
    raw_data = orjson.loads(request.data)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
135 136 137 138
    windowIndices = raw_data['window']
    if isinstance(windowIndices, int):
        output = np.load('processed-data.npy')[windowIndices]
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
139 140
        print("Query done: " + str(time() - t0))
        return response
141
    else:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
142
        indices = [int(index) for index, value in windowIndices.items() if value is True]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
143
        data = np.load('processed-data.npy')[indices]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
144 145
        output = performDBA(data)
        response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
146 147 148 149 150 151 152 153 154
        print("Query done: " + str(time()-t0))
        return response

@app.route('/window', methods=['POST'])
def window():
    t0 = time()
    raw_data = orjson.loads(request.data)
    indices = raw_data['indices']
    output = np.load('processed-data.npy')[indices]
155
    response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
156
    print("Query done: " + str(time() - t0))
157 158
    return response

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
159 160
@app.route('/table-info', methods=['POST'])
def table_info():
161 162 163 164
    t0 = time()
    raw_data = orjson.loads(request.data)
    all_windows = raw_data['windows']
    data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
165
    prototypes = []
166
    for windows in all_windows:
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
167
        actual_windows = data[windows]
168 169 170 171
        average_values = np.average(actual_windows, 0)
        std_values = np.std(actual_windows, 0)
        max_values = average_values + std_values
        min_values = average_values - std_values
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
172
        prototypes.append({
173 174 175
            'average': average_values.tolist(),
            'max': max_values.tolist(),
            'min': min_values.tolist()
176
        })
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
177 178
    distances = [[_ucrdtw.ucrdtw(np.array(v["average"]), np.array(w["average"]), 0.05 * 120, False)[1] for j, w in enumerate(prototypes)] for i, v in enumerate(prototypes)]
    response = orjson.dumps({'prototypes': prototypes, 'distances': distances})
179
    print("Averages calculated: " + str(time() - t0))
180 181
    return response

182 183 184
def preprocess(data):
    # return 0.10882589134534404, 3.1202154563478928, 0.9705780396843037
    # data = np.load('processed-data.npy')
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
185
    data = np.array(data, dtype='double')
186 187
    # data = np.reshape(data, (int(len(data) / 1), 1, len(data[0])))
    # data = np.repeat(data, repeats=1, axis=1)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
    subset = []
    t0 = time()

    r = 3
    for i, window in enumerate(data):
        if i % 10000 == 0:
            print(str(i) + ':' + str(len(subset)))
        state = 1
        for s in subset:
            if np.linalg.norm(window - data[s]) < r:
                state = 0
                break
        if state == 1:
            subset.append(i)

    # subset = sample(list(range(len(data))), 50)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
204

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
205 206 207 208 209 210 211 212 213
    dtw_distances = []
    eq_distances = []
    for i, index_1 in enumerate(subset):
        print(i)
        for j, index_2 in enumerate(subset):
            if index_1 == index_2:
                continue
            e = np.linalg.norm(data[index_1] - data[index_2])
            eq_distances.append(e)
214 215
            d = _ucrdtw.ucrdtw(data[index_1], data[index_2], 0.05, False)[1]
            # d = dtw.dtw(data[index_1], data[index_2], dist_method="Euclidean", window_type="sakoechiba", window_args={"window_size": 120}).distance
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
216
            dtw_distances.append(d)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
217

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
218 219 220 221 222 223 224 225 226 227 228
    ratios = np.array(dtw_distances)/np.array(eq_distances)
    mean_dtw = np.mean(dtw_distances)
    sd_dtw = np.std(dtw_distances)
    mean_eq = np.mean(eq_distances)
    sd_eq = np.std(eq_distances)
    a = np.mean(ratios)
    sd = np.std(ratios)
    theta = mean_dtw + -2.58 * sd_dtw
    # theta = mean_eq + -2.58 * sd_eq
    r = theta / ((a-sd)*math.sqrt(120))
    # r = theta / (math.sqrt(120))
229 230 231 232 233 234
    print('Mean: ' + str(mean_dtw))
    print('Stdev: ' + str(sd_dtw))
    print('Ratio mean: ' + str(a))
    print('Ratio stdev: ' + str(sd))
    print('Theta: ' + str(theta))
    print('r: ' + str(r))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
235
    print('Preprocessing time: ' + str(time() - t0))
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
236
    return r, a, sd
237

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
238
def debug_test_lsh():
239 240
    data = np.load('processed-data.npy')
    r, a, sd = preprocess(data)
241 242
    create_windows()
    query_n = 80503
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
243
    query = data[query_n] # performDBA(data[[80503, 11514]])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
244
    query = np.reshape(query, (len(data[0]), 1))
245 246 247
    data= np.array(data, dtype='double')
    data = np.reshape(data, (len(data), len(data[0]), 1))
    data = np.repeat(data, repeats=1, axis=2)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
248

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
249
    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
250 251
    print(repr(candidates[0:20]))
    print(distances[0:10])
252

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
253 254
    data = np.load('processed-data.npy')
    query = data[query_n]
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
255 256 257 258
    print(data[0])
    distances = [_ucrdtw.ucrdtw(window, query, 0.05, False)[1] for window in data]
    sorted_distances = sorted(distances)
    print(sorted_distances[0:10])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
259 260
    topk_dtw = sorted(range(len(distances)), key=lambda k: distances[k])
    print(topk_dtw[0:10])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
261

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
262 263 264
    # # distances_ed = [distance.euclidean(query, window) for window in data]
    # # topk_ed = sorted(range(len(distances_ed)), key=lambda k: distances_ed[k])
    #
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
265 266 267 268 269
    accuracy = 0
    for index in topk_dtw[0:20]:
        if index in candidates[0:20]:
            accuracy += 1
    print(accuracy)
270

271
# debug_test_lsh()