main.py 9.12 KB
Newer Older
1
2
3
4
from flask import Flask, jsonify, request
import pandas as pd
import numpy as np
from flask_cors import CORS
5
from collections import defaultdict, Counter
6
from time import time
7
8
9
import os.path
import json
from sklearn import preprocessing
10
import orjson
11
import dask.dataframe as dd
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
12
13
14
15
import bigwig
import bbi

reload = False
16
17
18
19
20
21
22
23
24
25

app = Flask(__name__)
CORS(app)

@app.route('/', methods=['GET'])
def index():
    return "hi"

@app.route('/read-data', methods=['GET'])
def read_data():
26
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
27
28
29
30
    size = bbi.chromsizes('test.bigWig')['chr1']
    bins = 100000
    data = bigwig.get('test.bigWig', 'chr1', 0, size, bins)
    print(data.shape)
31
    response = {
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
32
33
        "index": list(range(0, size, int(size/(bins)))),
        "values": data.tolist()
34
    }
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
35
    response = orjson.dumps(response)
36
    print('Data read: ' + str(time()-t0))
37
38
39
40
    return response

@app.route('/create-windows', methods=['POST'])
def create_windows():
41
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
42
    if reload:
43
44
        raw_data = request.json
        window_size = int(raw_data['parameters']["windowsize"])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
45
46
47
        data = bigwig.chunk(
            'test.bigWig',
            12000,
48
49
            int(12000 / window_size),
            int(12000 / 6),
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
50
51
52
53
            ['chr1'],
            verbose=True,
        )
        print(data.shape)
54
        np.save('processed-data', data)
55
    print('Windows created: ' + str(time()-t0))
56
    return '1'
57
58
59

@app.route('/create-tables', methods=['POST'])
def create_tables():
60
61
62
63
64
    data = np.load('processed-data.npy')
    raw_data = orjson.loads(request.data)
    window_size = int(raw_data['parameters']["windowsize"])
    hash_size = int(raw_data['parameters']["hashsize"])
    table_size = int(raw_data['parameters']["tablesize"])
65

66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    hash_functions, tables = lsh(data, window_size, hash_size, table_size)

    response = {}
    for table_index in range(table_size):
        response[str(table_index)] = {
            "hash": hash_functions[table_index],
            "entries": tables[table_index]
        }
    response = orjson.dumps(response)
    return response


def lsh(data, window_size, hash_size, table_size):
    t0 = time()
    print('Starting: ' + str(time() - t0))
    tables_hash_function = []
82
83
84
85
86
    print('Init time: ' + str(time() - t0))
    tables = []
    for index in range(table_size):
        t1 = time()
        table = defaultdict(list)
87
        signatures, hash_function = calculate_signatures_random_weights(data, window_size=window_size, hash_size=hash_size)
88
89
90
        for i in range(len(signatures)):
            table[signatures[i]].append(i)
        tables.append(table)
91
92
        tables_hash_function.append(hash_function.tolist())
        print(time() - t1)
93
94

    print('Creation time: ' + str(time() - t0))
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
    hash_functions = tables_hash_function
    return hash_functions, tables


def calculate_signatures_random_weights(data, window_size=None, hash_size=None, hash_function=None):
    if hash_function is None:
        hash_function = np.random.uniform(-100, 100, size=(window_size, hash_size))
    signatures_bool = np.dot(data, hash_function) > 0
    if signatures_bool.ndim == 1:
        return ''.join(['1' if x else '0' for x in signatures_bool])
    return [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool], hash_function


@app.route('/similarity', methods=['POST'])
def similarity():
    t0 = time()
    raw_data = orjson.loads(request.data)
    window = raw_data['query']
    tables = raw_data["tables"]
    neighbours = []
    output = defaultdict(list)

    for t in tables.values():
        signature = calculate_signatures_random_weights(window, hash_function=t["hash"])
        neighbours.extend(t["entries"][signature])
    neighbours_with_frequency = dict(Counter(neighbours))
    for index, frequency in neighbours_with_frequency.items():
        output[str(frequency)].append(index)

    response = orjson.dumps(output)
    print("Similarity done: " + str(time()-t0))
    return response

@app.route('/update', methods=['POST'])
def update():
    t0 = time()
    raw_data = orjson.loads(request.data)
    data = np.load('processed-data.npy')
    label_data = raw_data["labelData"]
    tables = raw_data["tables"]
    window = raw_data["query"]
    window_size = int(raw_data['parameters']["windowsize"])
    hash_size = int(raw_data['parameters']["hashsize"])
    table_size = int(raw_data['parameters']["tablesize"])
    new_tables = []

    correct_indices = [int(index) for index, value in label_data.items() if value is True]
    incorrect_indices = [int(index) for index, value in label_data.items() if value is False]

    for t in tables.values():
        valid = True
        signature = calculate_signatures_random_weights(window, hash_function=t['hash'])
        neighbours = t["entries"][signature]
        for index in correct_indices:
            if index not in neighbours:
                valid = False
                break
        for index in incorrect_indices:
            if index in neighbours:
                valid = False
                break
        if valid:
            new_tables.append(t)

    for index in range(table_size - len(new_tables)):
        entries = defaultdict(list)
        t1 = time()
        while True:
            correct_signatures, hash_function = calculate_signatures_random_weights(data[correct_indices], window_size=window_size, hash_size=hash_size)
            incorrect_signatures, _ = calculate_signatures_random_weights(data[incorrect_indices], hash_function=hash_function)
            if correct_signatures.count(correct_signatures[0]) == len(correct_signatures) and incorrect_signatures.count(correct_signatures[0]) == 0:
                break
        signatures, _ = calculate_signatures_random_weights(data, hash_function=hash_function)
        for i in range(len(signatures)):
            entries[signatures[i]].append(i)
        print(str(index) + ": " + str(time() - t1))
        new_tables.append({
            "hash": hash_function.tolist(),
            "entries": entries
        })

    print('Update time: ' + str(time() - t0))
177
    response = {}
178
179
180
181
    for table_index in range(len(new_tables)):
        response[table_index] = {
            "hash": new_tables[table_index]["hash"],
            "entries": new_tables[table_index]["entries"]
182
        }
183
    response = jsonify(response)
184
185
    return response

186
187
@app.route('/query', methods=['POST'])
def query():
188
    t0 = time()
189
    raw_data = orjson.loads(request.data)
190
    window = raw_data['window']
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
    if isinstance(window, int):
        output = np.load('processed-data.npy')[window]
        response = orjson.dumps(output.tolist())
        print("Query done: " + str(time() - t0))
        return response
    else :
        output = preprocessing.minmax_scale(window, (-1, 1))
        response = orjson.dumps(output.tolist())
        print("Query done: " + str(time()-t0))
        return response

@app.route('/window', methods=['POST'])
def window():
    t0 = time()
    raw_data = orjson.loads(request.data)
    indices = raw_data['indices']
    output = np.load('processed-data.npy')[indices]
208
    response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
209
    print("Query done: " + str(time() - t0))
210
211
212
213
214
215
216
217
218
219
    return response

@app.route('/average-progress', methods=['POST'])
def average_progress():
    t0 = time()
    raw_data = orjson.loads(request.data)
    all_windows = raw_data['windows']
    data = np.load('processed-data.npy')
    output = []
    actual_windows = []
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
220
    print("Starting average progress")
221
222
223
    print("Initialized: " + str(time() - t0))
    for windows in all_windows:
        t1 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
224
        actual_windows.extend(data[windows])
225
226
227
        if len(actual_windows) == 0:
            output.append([])
            continue
228
229
230
        max_values = np.maximum.reduce(actual_windows).tolist()
        min_values = np.minimum.reduce(actual_windows).tolist()
        average_values = (np.sum(actual_windows, 0)/len(actual_windows)).tolist()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
231
        output = [({
232
233
234
            'average': average_values,
            'max': max_values,
            'min': min_values
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
235
        })] + output
236
237
238
        print("Average calculated: " + str(time() - t1))
    response = orjson.dumps(output)
    print("Averages calculated: " + str(time() - t0))
239
240
    return response

241
242
@app.route('/average-table', methods=['POST'])
def average_table():
243
244
245
246
247
    t0 = time()
    raw_data = orjson.loads(request.data)
    all_windows = raw_data['windows']
    data = np.load('processed-data.npy')
    output = []
248
    print("Initialized: " + str(time() - t0))
249
250
    for windows in all_windows:
        t1 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
251
        actual_windows = data[windows]
252
        print(len(actual_windows))
253
254
255
256
257
258
259
        average_values = np.average(actual_windows, 0)
        # average_values = (np.sum(actual_windows, 0) / len(actual_windows))
        std_values = np.std(actual_windows, 0)
        max_values = average_values + std_values
        min_values = average_values - std_values
        # max_values = np.maximum.reduce(actual_windows).tolist()
        # min_values = np.minimum.reduce(actual_windows).tolist()
260
        output.append({
261
262
263
            'average': average_values.tolist(),
            'max': max_values.tolist(),
            'min': min_values.tolist()
264
        })
265
266
        print("Average calculated: " + str(time() - t1))
    response = orjson.dumps(output)
267
    print("Averages calculated: " + str(time() - t0))
268
    return response