from flask import Flask, jsonify, request import pandas as pd import numpy as np from flask_cors import CORS from collections import defaultdict, Counter from time import time import os.path import json from sklearn import preprocessing import orjson import dask.dataframe as dd import bigwig import bbi reload = False app = Flask(__name__) CORS(app) @app.route('/', methods=['GET']) def index(): return "hi" @app.route('/read-data', methods=['GET']) def read_data(): t0 = time() size = bbi.chromsizes('test.bigWig')['chr1'] bins = 100000 data = bigwig.get('test.bigWig', 'chr1', 0, size, bins) print(data.shape) response = { "index": list(range(0, size, int(size/(bins)))), "values": data.tolist() } response = orjson.dumps(response) print('Data read: ' + str(time()-t0)) return response @app.route('/create-windows', methods=['POST']) def create_windows(): t0 = time() if reload: raw_data = request.json window_size = int(raw_data['parameters']["windowsize"]) data = bigwig.chunk( 'test.bigWig', 12000, int(12000 / window_size), int(12000 / 6), ['chr1'], verbose=True, ) print(data.shape) np.save('processed-data', data) print('Windows created: ' + str(time()-t0)) return '1' @app.route('/create-tables', methods=['POST']) def create_tables(): data = np.load('processed-data.npy') raw_data = orjson.loads(request.data) window_size = int(raw_data['parameters']["windowsize"]) hash_size = int(raw_data['parameters']["hashsize"]) table_size = int(raw_data['parameters']["tablesize"]) hash_functions, tables = lsh(data, window_size, hash_size, table_size) response = {} for table_index in range(table_size): response[str(table_index)] = { "hash": hash_functions[table_index], "entries": tables[table_index] } response = orjson.dumps(response) return response def lsh(data, window_size, hash_size, table_size): t0 = time() print('Starting: ' + str(time() - t0)) tables_hash_function = [] print('Init time: ' + str(time() - t0)) tables = [] for index in range(table_size): t1 = time() table = defaultdict(list) signatures, hash_function = calculate_signatures_random_weights(data, window_size=window_size, hash_size=hash_size) for i in range(len(signatures)): table[signatures[i]].append(i) tables.append(table) tables_hash_function.append(hash_function.tolist()) print(time() - t1) print('Creation time: ' + str(time() - t0)) hash_functions = tables_hash_function return hash_functions, tables def calculate_signatures_random_weights(data, window_size=None, hash_size=None, hash_function=None): if hash_function is None: hash_function = np.random.uniform(-100, 100, size=(window_size, hash_size)) signatures_bool = np.dot(data, hash_function) > 0 if signatures_bool.ndim == 1: return ''.join(['1' if x else '0' for x in signatures_bool]) return [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool], hash_function @app.route('/similarity', methods=['POST']) def similarity(): t0 = time() raw_data = orjson.loads(request.data) window = raw_data['query'] tables = raw_data["tables"] neighbours = [] output = defaultdict(list) for t in tables.values(): signature = calculate_signatures_random_weights(window, hash_function=t["hash"]) neighbours.extend(t["entries"][signature]) neighbours_with_frequency = dict(Counter(neighbours)) for index, frequency in neighbours_with_frequency.items(): output[str(frequency)].append(index) response = orjson.dumps(output) print("Similarity done: " + str(time()-t0)) return response @app.route('/update', methods=['POST']) def update(): t0 = time() raw_data = orjson.loads(request.data) data = np.load('processed-data.npy') label_data = raw_data["labelData"] tables = raw_data["tables"] window = raw_data["query"] window_size = int(raw_data['parameters']["windowsize"]) hash_size = int(raw_data['parameters']["hashsize"]) table_size = int(raw_data['parameters']["tablesize"]) new_tables = [] correct_indices = [int(index) for index, value in label_data.items() if value is True] incorrect_indices = [int(index) for index, value in label_data.items() if value is False] for t in tables.values(): valid = True signature = calculate_signatures_random_weights(window, hash_function=t['hash']) neighbours = t["entries"][signature] for index in correct_indices: if index not in neighbours: valid = False break for index in incorrect_indices: if index in neighbours: valid = False break if valid: new_tables.append(t) for index in range(table_size - len(new_tables)): entries = defaultdict(list) t1 = time() while True: correct_signatures, hash_function = calculate_signatures_random_weights(data[correct_indices], window_size=window_size, hash_size=hash_size) incorrect_signatures, _ = calculate_signatures_random_weights(data[incorrect_indices], hash_function=hash_function) if correct_signatures.count(correct_signatures[0]) == len(correct_signatures) and incorrect_signatures.count(correct_signatures[0]) == 0: break signatures, _ = calculate_signatures_random_weights(data, hash_function=hash_function) for i in range(len(signatures)): entries[signatures[i]].append(i) print(str(index) + ": " + str(time() - t1)) new_tables.append({ "hash": hash_function.tolist(), "entries": entries }) print('Update time: ' + str(time() - t0)) response = {} for table_index in range(len(new_tables)): response[table_index] = { "hash": new_tables[table_index]["hash"], "entries": new_tables[table_index]["entries"] } response = jsonify(response) return response @app.route('/query', methods=['POST']) def query(): t0 = time() raw_data = orjson.loads(request.data) window = raw_data['window'] if isinstance(window, int): output = np.load('processed-data.npy')[window] response = orjson.dumps(output.tolist()) print("Query done: " + str(time() - t0)) return response else : output = preprocessing.minmax_scale(window, (-1, 1)) response = orjson.dumps(output.tolist()) print("Query done: " + str(time()-t0)) return response @app.route('/window', methods=['POST']) def window(): t0 = time() raw_data = orjson.loads(request.data) indices = raw_data['indices'] output = np.load('processed-data.npy')[indices] response = orjson.dumps(output.tolist()) print("Query done: " + str(time() - t0)) return response @app.route('/average-progress', methods=['POST']) def average_progress(): t0 = time() raw_data = orjson.loads(request.data) all_windows = raw_data['windows'] data = np.load('processed-data.npy') output = [] actual_windows = [] print("Starting average progress") print("Initialized: " + str(time() - t0)) for windows in all_windows: t1 = time() actual_windows.extend(data[windows]) if len(actual_windows) == 0: output.append([]) continue max_values = np.maximum.reduce(actual_windows).tolist() min_values = np.minimum.reduce(actual_windows).tolist() average_values = (np.sum(actual_windows, 0)/len(actual_windows)).tolist() output = [({ 'average': average_values, 'max': max_values, 'min': min_values })] + output print("Average calculated: " + str(time() - t1)) response = orjson.dumps(output) print("Averages calculated: " + str(time() - t0)) return response @app.route('/average-table', methods=['POST']) def average_table(): t0 = time() raw_data = orjson.loads(request.data) all_windows = raw_data['windows'] data = np.load('processed-data.npy') output = [] print("Initialized: " + str(time() - t0)) for windows in all_windows: t1 = time() actual_windows = data[windows] print(len(actual_windows)) average_values = np.average(actual_windows, 0) # average_values = (np.sum(actual_windows, 0) / len(actual_windows)) std_values = np.std(actual_windows, 0) max_values = average_values + std_values min_values = average_values - std_values # max_values = np.maximum.reduce(actual_windows).tolist() # min_values = np.minimum.reduce(actual_windows).tolist() output.append({ 'average': average_values.tolist(), 'max': max_values.tolist(), 'min': min_values.tolist() }) print("Average calculated: " + str(time() - t1)) response = orjson.dumps(output) print("Averages calculated: " + str(time() - t0)) return response