main.py 6.91 KB
Newer Older
1 2 3 4
from flask import Flask, jsonify, request
import pandas as pd
import numpy as np
from flask_cors import CORS
5
from collections import defaultdict, Counter
6
from time import time
7 8 9
import os.path
import json
from sklearn import preprocessing
10
import orjson
11 12 13 14 15 16 17 18 19 20

app = Flask(__name__)
CORS(app)

@app.route('/', methods=['GET'])
def index():
    return "hi"

@app.route('/read-data', methods=['GET'])
def read_data():
21
    filename = 'processed-data.pkl'
Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
22 23 24 25 26 27 28 29 30
    # if (not os.path.isfile(filename)):
    #     print("start")
    #     df = dd.read_csv("NW_Ground_Stations_2016.csv", usecols=['number_sta', 'date', 't'])
    #     print("read file")
    #     df = df.loc[df['number_sta'] == 14066001]
    #     print("split rows")
    #     df = df.compute()
    #     df.to_pickle(filename)
    #     print("to_pandas")
31 32
    df = pd.read_pickle(filename)
    df.dropna(subset=['t'], inplace=True)
33
    response = {
34 35
        "index": json.dumps(df.loc[:, 'date'].values.astype(str).tolist()),
        "values": json.dumps(df.loc[:, 't'].values.astype(str).tolist())
36
    }
37
    print("response ready")
38 39 40
    response = jsonify(response)
    return response

41 42 43 44 45 46 47 48 49 50 51 52
# @app.route('/read-data', methods=['GET'])
# def read_data():
#     df = pd.read_csv("1.csv", index_col=3)
#     df.index = pd.to_datetime(df.index)
#     df.sort_index(inplace=True)
#     meantemp = df.loc[:, 7].copy()
#     response = {
#         "index": meantemp.index.values.astype(str).tolist(),
#         "values": meantemp.values.tolist()
#     }
#     response = jsonify(response)
#     return response
53 54 55

@app.route('/create-windows', methods=['POST'])
def create_windows():
56
    t0 = time()
57 58
    raw_data = request.json
    values = raw_data["values"]
59
    window_size = int(raw_data['parameters']["windowsize"])
60 61
    data = [values[i:i+window_size] for i in range(len(values) - window_size)]
    data = preprocessing.minmax_scale(data, (-1, 1), axis=1)
62 63 64 65 66 67 68
    print("Created windows: " + str(time()-t0))
    data = data.tolist()
    print("data converted: " + str(time()-t0))
    # response = {'data': data}
    print("Sending response: " + str(time()-t0))
    response = orjson.dumps(data)
    print("Sending response: " + str(time()-t0))
69 70 71 72 73
    return response

@app.route('/create-tables', methods=['POST'])
def create_tables():
    t0 = time()
74 75 76
    raw_data = orjson.loads(request.data)
    print(time()-t0)
    global data
77
    data = raw_data["windows"]
78 79 80
    window_size = int(raw_data['parameters']["windowsize"])
    hash_size = int(raw_data['parameters']["hashsize"])
    table_size = int(raw_data['parameters']["tablesize"])
81
    data = np.array(data)
82 83
    print('Starting: ' + str(time()-t0))
    global tables_hash_function
84 85
    tables_hash_function = [np.random.uniform(-1, 1, size=(window_size, hash_size)) for _ in range(table_size)]
    print('Init time: ' + str(time() - t0))
86 87 88 89
    tables = []
    for index in range(table_size):
        t1 = time()
        table = defaultdict(list)
90 91
        signatures_bool = np.dot(data, tables_hash_function[index]) > 0
        signatures = [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool]
92 93 94 95
        for i in range(len(signatures)):
            table[signatures[i]].append(i)
        print(time()-t1)
        tables.append(table)
96

97 98 99 100
    print('Creation time: ' + str(time() - t0))
    hash_functions = np.array(tables_hash_function).tolist()
    response = {}
    for table_index in range(table_size):
101
        response[str(table_index)] = {
102 103 104
            "hash": hash_functions[table_index],
            "entries": tables[table_index]
        }
105
    response = orjson.dumps(response)
106 107 108 109
    return response

@app.route('/query', methods=['POST'])
def query():
110
    raw_data = orjson.loads(request.data)
111 112 113 114
    window = raw_data["window"]
    tables = raw_data["tables"]
    neighbours = []

Kruyff,D.L.W. (Dylan)'s avatar
Kruyff,D.L.W. (Dylan) committed
115
    output = defaultdict(list)
116 117

    for t in tables.values():
118 119
        signature = ''.join((np.dot(window, t["hash"]) > 0).astype('int').astype('str'))
        neighbours.extend(t["entries"][signature])
120 121
    neighbours_with_frequency = dict(Counter(neighbours))
    for index, frequency in neighbours_with_frequency.items():
122
        output[str(frequency)].append(index)
123
    response = orjson.dumps(output)
124 125 126 127 128
    return response

@app.route('/update', methods=['POST'])
def update():
    t0 = time()
129
    raw_data = orjson.loads(request.data)
130

131 132 133 134 135 136 137 138 139 140
    data = raw_data["windows"]
    data = np.array(data)
    label_data = raw_data["labelData"]
    tables = raw_data["tables"]

    window_size = int(raw_data['parameters']["windowsize"])
    hash_size = int(raw_data['parameters']["hashsize"])
    table_size = int(raw_data['parameters']["tablesize"])
    new_tables = []

141 142
    correct_indices = [int(index) for index, value in label_data.items() if value is True]
    incorrect_indices = [int(index) for index, value in label_data.items() if value is False]
143 144

    window = data[correct_indices[0]]
145
    print("Initialized: " + str(time() - t0))
146 147
    for t in tables.values():
        valid = True
148 149
        signature = ''.join((np.dot(window, t["hash"]) > 0).astype('int').astype('str'))
        neighbours = t["entries"][signature]
150 151 152 153 154 155 156 157 158 159
        for index in correct_indices:
            if index not in neighbours:
                valid = False
                break
        for index in incorrect_indices:
            if index in neighbours:
                valid = False
                break
        if valid:
            new_tables.append(t)
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
    print("Filtered good tables: " + str(time() - t0))
    for index in range(table_size - len(new_tables)):
        entries = defaultdict(list)
        t1 = time()
        while True:
            hash_function = np.random.randn(window_size, hash_size)
            correct_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for
                                  index in
                                  correct_indices]
            incorrect_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for
                                    index
                                    in incorrect_indices]
            if correct_signatures.count(correct_signatures[0]) == len(
                    correct_signatures) and incorrect_signatures.count(
                    correct_signatures[0]) == 0:
                break
        print("first: " + str(time() - t1))
        t2 = time()
        signatures_bool = np.dot(data, hash_function) > 0
        signatures = [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool]
        for i in range(len(signatures)):
            entries[signatures[i]].append(i)
        print("second: " + str(time() - t2))
        new_tables.append({
            "hash": hash_function.tolist(),
            "entries": entries
        })
187

188 189 190 191 192 193 194 195
    print('Update time: ' + str(time() - t0))
    response = {}
    for table_index in range(len(new_tables)):
        response[table_index] = {
            "hash": new_tables[table_index]["hash"],
            "entries": new_tables[table_index]["entries"]
        }
    response = jsonify(response)
196
    return response