main.py 9.12 KB
Newer Older
1 2 3 4
from flask import Flask, jsonify, request
import pandas as pd
import numpy as np
from flask_cors import CORS
5
from collections import defaultdict, Counter
6
from time import time
7 8 9
import os.path
import json
from sklearn import preprocessing
10
import orjson
11
import dask.dataframe as dd
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
12 13 14 15
import bigwig
import bbi

reload = False
16 17 18 19 20 21 22 23 24 25

app = Flask(__name__)
CORS(app)

@app.route('/', methods=['GET'])
def index():
    return "hi"

@app.route('/read-data', methods=['GET'])
def read_data():
26
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
27 28 29 30
    size = bbi.chromsizes('test.bigWig')['chr1']
    bins = 100000
    data = bigwig.get('test.bigWig', 'chr1', 0, size, bins)
    print(data.shape)
31
    response = {
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
32 33
        "index": list(range(0, size, int(size/(bins)))),
        "values": data.tolist()
34
    }
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
35
    response = orjson.dumps(response)
36
    print('Data read: ' + str(time()-t0))
37 38 39 40
    return response

@app.route('/create-windows', methods=['POST'])
def create_windows():
41
    t0 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
42
    if reload:
43 44
        raw_data = request.json
        window_size = int(raw_data['parameters']["windowsize"])
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
45 46 47
        data = bigwig.chunk(
            'test.bigWig',
            12000,
48 49
            int(12000 / window_size),
            int(12000 / 6),
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
50 51 52 53
            ['chr1'],
            verbose=True,
        )
        print(data.shape)
54
        np.save('processed-data', data)
55
    print('Windows created: ' + str(time()-t0))
56
    return '1'
57 58 59

@app.route('/create-tables', methods=['POST'])
def create_tables():
60 61 62 63 64 65
    t0 = time()
    data = np.load('processed-data.npy')
    raw_data = orjson.loads(request.data)
    window_size = int(raw_data['parameters']["windowsize"])
    hash_size = int(raw_data['parameters']["hashsize"])
    table_size = int(raw_data['parameters']["tablesize"])
66

67
    print('Starting: ' + str(time()-t0))
68
    tables_hash_function = [np.random.uniform(-100, 100, size=(window_size, hash_size)) for _ in range(table_size)]
69 70 71 72 73
    print('Init time: ' + str(time() - t0))
    tables = []
    for index in range(table_size):
        t1 = time()
        table = defaultdict(list)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
74
        signatures_bool = np.dot(data, tables_hash_function[index]) > 0
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
        signatures = [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool]
        for i in range(len(signatures)):
            table[signatures[i]].append(i)
        print(time()-t1)
        tables.append(table)

    print('Creation time: ' + str(time() - t0))
    hash_functions = np.array(tables_hash_function).tolist()
    response = {}
    for table_index in range(table_size):
        response[str(table_index)] = {
            "hash": hash_functions[table_index],
            "entries": tables[table_index]
        }
    response = orjson.dumps(response)
    return response

92 93
@app.route('/query', methods=['POST'])
def query():
94
    t0 = time()
95
    raw_data = orjson.loads(request.data)
96
    window = raw_data['window']
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
    if isinstance(window, int):
        output = np.load('processed-data.npy')[window]
        response = orjson.dumps(output.tolist())
        print("Query done: " + str(time() - t0))
        return response
    else :
        output = preprocessing.minmax_scale(window, (-1, 1))
        response = orjson.dumps(output.tolist())
        print("Query done: " + str(time()-t0))
        return response

@app.route('/window', methods=['POST'])
def window():
    t0 = time()
    raw_data = orjson.loads(request.data)
    indices = raw_data['indices']
    output = np.load('processed-data.npy')[indices]
114
    response = orjson.dumps(output.tolist())
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
115
    print("Query done: " + str(time() - t0))
116 117 118 119 120 121 122
    return response

@app.route('/similarity', methods=['POST'])
def similarity():
    t0 = time()
    raw_data = orjson.loads(request.data)
    window = raw_data['query']
123 124 125
    tables = raw_data["tables"]
    neighbours = []

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
126
    output = defaultdict(list)
127 128

    for t in tables.values():
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
129
        signature_bool = np.dot(window, t["hash"]) > 0
130
        signature = ''.join(['1' if x else '0' for x in signature_bool])
131
        neighbours.extend(t["entries"][signature])
132 133
    neighbours_with_frequency = dict(Counter(neighbours))
    for index, frequency in neighbours_with_frequency.items():
134
        output[str(frequency)].append(index)
135
    response = orjson.dumps(output)
136 137 138 139 140 141 142 143 144 145 146
    print("Similarity done: " + str(time()-t0))
    return response

@app.route('/average-progress', methods=['POST'])
def average_progress():
    t0 = time()
    raw_data = orjson.loads(request.data)
    all_windows = raw_data['windows']
    data = np.load('processed-data.npy')
    output = []
    actual_windows = []
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
147
    print("Starting average progress")
148 149 150
    print("Initialized: " + str(time() - t0))
    for windows in all_windows:
        t1 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
151
        actual_windows.extend(data[windows])
152 153 154
        if len(actual_windows) == 0:
            output.append([])
            continue
155 156 157
        max_values = np.maximum.reduce(actual_windows).tolist()
        min_values = np.minimum.reduce(actual_windows).tolist()
        average_values = (np.sum(actual_windows, 0)/len(actual_windows)).tolist()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
158
        output = [({
159 160 161
            'average': average_values,
            'max': max_values,
            'min': min_values
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
162
        })] + output
163 164 165
        print("Average calculated: " + str(time() - t1))
    response = orjson.dumps(output)
    print("Averages calculated: " + str(time() - t0))
166 167
    return response

168 169
@app.route('/average-table', methods=['POST'])
def average_table():
170 171 172 173 174
    t0 = time()
    raw_data = orjson.loads(request.data)
    all_windows = raw_data['windows']
    data = np.load('processed-data.npy')
    output = []
175
    print("Initialized: " + str(time() - t0))
176 177
    for windows in all_windows:
        t1 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
178
        actual_windows = data[windows]
179
        print(len(actual_windows))
180 181 182 183 184 185 186
        average_values = np.average(actual_windows, 0)
        # average_values = (np.sum(actual_windows, 0) / len(actual_windows))
        std_values = np.std(actual_windows, 0)
        max_values = average_values + std_values
        min_values = average_values - std_values
        # max_values = np.maximum.reduce(actual_windows).tolist()
        # min_values = np.minimum.reduce(actual_windows).tolist()
187
        output.append({
188 189 190
            'average': average_values.tolist(),
            'max': max_values.tolist(),
            'min': min_values.tolist()
191
        })
192 193
        print("Average calculated: " + str(time() - t1))
    response = orjson.dumps(output)
194
    print("Averages calculated: " + str(time() - t0))
195 196 197 198 199
    return response

@app.route('/update', methods=['POST'])
def update():
    t0 = time()
200
    print("Start")
201
    raw_data = orjson.loads(request.data)
202 203
    print("Data loaded: " + str(time() - t0))
    data = np.load('processed-data.npy')
204 205
    label_data = raw_data["labelData"]
    tables = raw_data["tables"]
206
    window = raw_data["query"]
207 208 209 210 211 212

    window_size = int(raw_data['parameters']["windowsize"])
    hash_size = int(raw_data['parameters']["hashsize"])
    table_size = int(raw_data['parameters']["tablesize"])
    new_tables = []

213 214
    correct_indices = [int(index) for index, value in label_data.items() if value is True]
    incorrect_indices = [int(index) for index, value in label_data.items() if value is False]
215

216
    print("Initialized: " + str(time() - t0))
217 218
    for t in tables.values():
        valid = True
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
219
        signature = ''.join((np.dot(window, t["hash"]) > 0).astype('int').astype('str'))
220
        neighbours = t["entries"][signature]
221 222 223 224 225 226 227 228 229 230
        for index in correct_indices:
            if index not in neighbours:
                valid = False
                break
        for index in incorrect_indices:
            if index in neighbours:
                valid = False
                break
        if valid:
            new_tables.append(t)
231 232 233 234 235 236
    print("Filtered good tables: " + str(time() - t0))
    for index in range(table_size - len(new_tables)):
        entries = defaultdict(list)
        t1 = time()
        while True:
            hash_function = np.random.randn(window_size, hash_size)
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
237
            correct_signatures = [''.join((np.dot(data[i], hash_function) > 0).astype('int').astype('str')) for
238
                                  i in
239
                                  correct_indices]
240 241
            incorrect_signatures = [''.join((np.dot(data[i], hash_function) > 0).astype('int').astype('str')) for
                                    i
242 243 244 245 246 247 248
                                    in incorrect_indices]
            if correct_signatures.count(correct_signatures[0]) == len(
                    correct_signatures) and incorrect_signatures.count(
                    correct_signatures[0]) == 0:
                break
        print("first: " + str(time() - t1))
        t2 = time()
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
249
        signatures_bool = np.dot(data, hash_function) > 0
250 251 252 253 254 255 256 257
        signatures = [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool]
        for i in range(len(signatures)):
            entries[signatures[i]].append(i)
        print("second: " + str(time() - t2))
        new_tables.append({
            "hash": hash_function.tolist(),
            "entries": entries
        })
258

259 260 261 262 263 264 265 266
    print('Update time: ' + str(time() - t0))
    response = {}
    for table_index in range(len(new_tables)):
        response[table_index] = {
            "hash": new_tables[table_index]["hash"],
            "entries": new_tables[table_index]["entries"]
        }
    response = jsonify(response)
267
    return response