main.py 5.26 KB
Newer Older
1
2
3
4
5
from flask import Flask, jsonify, request
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from flask_cors import CORS
6
from collections import defaultdict, Counter
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from time import time

app = Flask(__name__)
CORS(app)

@app.route('/', methods=['GET'])
def index():
    return "hi"

@app.route('/read-data', methods=['GET'])
def read_data():
    df = pd.read_csv("DailyDelhiClimateTrain.csv", index_col=0)
    df.index = pd.to_datetime(df.index)
    df.sort_index(inplace=True)
    meantemp = df.loc[:, 'meantemp'].copy()
    response = {
        "index": meantemp.index.values.astype(str).tolist(),
        "values": meantemp.values.tolist()
    }
    response = jsonify(response)
    return response


@app.route('/create-windows', methods=['POST'])
def create_windows():
    raw_data = request.json
    values = raw_data["values"]
34
    window_size = int(raw_data['parameters']["windowsize"])
35
36
37
38
39
40
41
42
43
44
45
46
47
48
    data = []
    for index in range(len(values) - window_size):
        window = values[index:index + window_size]
        norm = np.linalg.norm(window)
        if norm == 0:
            data.append(window)
        else:
            data.append((window / norm).tolist())
    response = jsonify(data)
    return response

@app.route('/create-tables', methods=['POST'])
def create_tables():
    t0 = time()
49
50
    raw_data = request.json
    data = raw_data["windows"]
51
52
53
    window_size = int(raw_data['parameters']["windowsize"])
    hash_size = int(raw_data['parameters']["hashsize"])
    table_size = int(raw_data['parameters']["tablesize"])
54
55
56
57
58
59
60
61
62
63
64
    data = np.array(data)
    tables = [defaultdict(list) for _ in range(table_size)]
    tables_hash_function = [np.random.randn(window_size, hash_size) for _ in range(table_size)]

    for table_index in range(table_size):
        table = tables[table_index]
        hash_function = tables_hash_function[table_index]
        for window_index in range(data.shape[0]):
            signature = (np.dot(data[window_index], hash_function) > 0).astype('int')
            table[str(signature)].append(window_index)

65
66
67
68
69
70
71
72
    print('Creation time: ' + str(time() - t0))
    hash_functions = np.array(tables_hash_function).tolist()
    response = {}
    for table_index in range(table_size):
        response[table_index] = {
            "hash": hash_functions[table_index],
            "entries": tables[table_index]
        }
73
74
75
76
77
78
79
80
81
82
    response = jsonify(response)
    return response

@app.route('/query', methods=['POST'])
def query():
    raw_data = request.json
    window = raw_data["window"]
    tables = raw_data["tables"]
    neighbours = []

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
    output = {}

    for t in tables.values():
        signature = (np.dot(window, t["hash"]) > 0).astype('int')
        neighbours.extend(t["entries"][str(signature)])
    neighbours_with_frequency = dict(Counter(neighbours))
    for index, frequency in neighbours_with_frequency.items():
        if not frequency in output:
            output[frequency] = []
        output[frequency].append(index)
    response = jsonify(output)
    return response

@app.route('/update', methods=['POST'])
def update():
    t0 = time()
    raw_data = request.json
100

101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    data = raw_data["windows"]
    data = np.array(data)
    label_data = raw_data["labelData"]
    tables = raw_data["tables"]

    window_size = int(raw_data['parameters']["windowsize"])
    hash_size = int(raw_data['parameters']["hashsize"])
    table_size = int(raw_data['parameters']["tablesize"])
    new_tables = []

    correct_indices = [index for index, value in enumerate(label_data) if value is True]
    incorrect_indices = [index for index, value in enumerate(label_data) if value is False]

    window = data[correct_indices[0]]

    for t in tables.values():
        valid = True
        signature = (np.dot(window, t["hash"]) > 0).astype('int')
        neighbours = t["entries"][str(signature)]
        for index in correct_indices:
            if index not in neighbours:
                valid = False
                break
        for index in incorrect_indices:
            if index in neighbours:
                valid = False
                break
        if valid:
            new_tables.append(t)

    for i in range(table_size - len(new_tables)):
        entries = defaultdict(list)
        while True:
            hash_function = np.random.randn(window_size, hash_size)
            correct_signatures = [str((np.dot(data[index], hash_function) > 0).astype('int')) for index in correct_indices]
            incorrect_signatures = [str((np.dot(data[index], hash_function) > 0).astype('int')) for index in incorrect_indices]
            if correct_signatures.count(correct_signatures[0]) == len(correct_signatures) and incorrect_signatures.count(correct_signatures[0]) == 0:
                break
        for window_index in range(data.shape[0]):
            signature = (np.dot(data[window_index], hash_function) > 0).astype('int')
            entries[str(signature)].append(window_index)
        new_tables.append({
            "hash": hash_function.tolist(),
            "entries": entries
        })
    print('Update time: ' + str(time() - t0))
    response = {}
    for table_index in range(len(new_tables)):
        response[table_index] = {
            "hash": new_tables[table_index]["hash"],
            "entries": new_tables[table_index]["entries"]
        }
    response = jsonify(response)
154
    return response