Next tuesday 25th january around 21.30 we'll be upgrading to GitLab version 14.7

Commit b5d4198f authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Super fast table creation

parent 1189f2bf
......@@ -2,7 +2,6 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="556080ba-825c-4b55-a92a-867a4df4fb32" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts" afterDir="false" />
<change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
......@@ -16,8 +15,8 @@
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="272">
<caret line="117" column="28" lean-forward="true" selection-start-line="117" selection-start-column="28" selection-end-line="117" selection-end-column="28" />
<state relative-caret-position="369">
<caret line="198" column="19" lean-forward="true" selection-start-line="198" selection-start-column="19" selection-end-line="198" selection-end-column="19" />
<folding>
<element signature="e#0#41#0" expanded="true" />
</folding>
......@@ -206,12 +205,12 @@
<workItem from="1594589515579" duration="1044000" />
<workItem from="1594719112139" duration="10388000" />
<workItem from="1595247298901" duration="17719000" />
<workItem from="1597658111794" duration="30822000" />
<workItem from="1597658111794" duration="32577000" />
</task>
<servers />
</component>
<component name="TimeTrackingManager">
<option name="totallyTimeSpent" value="65290000" />
<option name="totallyTimeSpent" value="67045000" />
</component>
<component name="ToolWindowManager">
<frame x="-7" y="-7" width="1295" height="695" extended-state="6" />
......@@ -270,8 +269,8 @@
</entry>
<entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="272">
<caret line="117" column="28" lean-forward="true" selection-start-line="117" selection-start-column="28" selection-end-line="117" selection-end-column="28" />
<state relative-caret-position="369">
<caret line="198" column="19" lean-forward="true" selection-start-line="198" selection-start-column="19" selection-end-line="198" selection-end-column="19" />
<folding>
<element signature="e#0#41#0" expanded="true" />
</folding>
......
from flask import Flask, jsonify, request
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from flask_cors import CORS
......@@ -9,10 +8,6 @@ import dask.dataframe as dd
import os.path
import json
from sklearn import preprocessing
from functools import partial
from itertools import groupby
from multiprocessing import Pool
import rapidjson
import orjson
app = Flask(__name__)
......@@ -74,15 +69,6 @@ def create_windows():
print("Sending response: " + str(time()-t0))
return response
def fill_table(data, tables_hash_function, index):
print(index)
table = defaultdict(list)
signatures = [''.join(list(map(lambda x: '1' if x > 0 else '0', np.dot(data[window_index], tables_hash_function[index])))) for window_index in
range(data.shape[0])]
for i in range(len(signatures)):
table[signatures[i]].append(i)
return table
@app.route('/create-tables', methods=['POST'])
def create_tables():
t0 = time()
......@@ -101,29 +87,13 @@ def create_tables():
tables = []
for index in range(table_size):
t1 = time()
print('------------')
print(index)
table = defaultdict(list)
print(time()-t1)
signatures1 = [
np.dot(data[window_index], tables_hash_function[index]) > 0
for window_index in
range(data.shape[0])]
print(time() - t1)
signatures = [''.join(['1' if x else '0' for x in lst]) for lst in signatures1]
print(time()-t1)
signatures_bool = np.dot(data, tables_hash_function[index]) > 0
signatures = [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool]
for i in range(len(signatures)):
table[signatures[i]].append(i)
print(time()-t1)
tables.append(table)
# try:
# pool = Pool()
# func = partial(fill_table, data, tables_hash_function)
# print('Starting pool: ' + str(time() - t0))
# tables = pool.map(func, range(table_size))
# finally:
# pool.close()
# pool.join()
print('Creation time: ' + str(time() - t0))
hash_functions = np.array(tables_hash_function).tolist()
......@@ -138,7 +108,7 @@ def create_tables():
@app.route('/query', methods=['POST'])
def query():
raw_data = request.json
raw_data = orjson.loads(request.data)
window = raw_data["window"]
tables = raw_data["tables"]
neighbours = []
......@@ -151,34 +121,15 @@ def query():
neighbours_with_frequency = dict(Counter(neighbours))
for index, frequency in neighbours_with_frequency.items():
if not frequency in output:
output[frequency] = []
output[frequency].append(index)
output[str(frequency)] = []
output[str(frequency)].append(index)
response = orjson.dumps(output)
return response
def create_valid_table(data, window_size, hash_size, correct_indices, incorrect_indices, index):
entries = defaultdict(list)
while True:
hash_function = np.random.randn(window_size, hash_size)
correct_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for index in
correct_indices]
incorrect_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for index
in incorrect_indices]
if correct_signatures.count(correct_signatures[0]) == len(correct_signatures) and incorrect_signatures.count(
correct_signatures[0]) == 0:
break
for window_index in range(data.shape[0]):
signature = ''.join((np.dot(data[window_index], hash_function) > 0).astype('int').astype('str'))
entries[signature].append(window_index)
return {
"hash": hash_function.tolist(),
"entries": entries
}
@app.route('/update', methods=['POST'])
def update():
t0 = time()
raw_data = request.json
raw_data = orjson.loads(request.data)
data = raw_data["windows"]
data = np.array(data)
......@@ -194,7 +145,7 @@ def update():
incorrect_indices = [int(index) for index, value in label_data.items() if value is False]
window = data[correct_indices[0]]
print("Initialized: " + str(time() - t0))
for t in tables.values():
valid = True
signature = ''.join((np.dot(window, t["hash"]) > 0).astype('int').astype('str'))
......@@ -209,15 +160,33 @@ def update():
break
if valid:
new_tables.append(t)
try:
pool = Pool()
func = partial(create_valid_table, data, window_size, hash_size, correct_indices, incorrect_indices)
print('Starting pool: ' + str(time() - t0))
new_tables.extend(pool.map(func, range(table_size - len(new_tables))))
finally:
pool.close()
pool.join()
print("Filtered good tables: " + str(time() - t0))
for index in range(table_size - len(new_tables)):
entries = defaultdict(list)
t1 = time()
while True:
hash_function = np.random.randn(window_size, hash_size)
correct_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for
index in
correct_indices]
incorrect_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for
index
in incorrect_indices]
if correct_signatures.count(correct_signatures[0]) == len(
correct_signatures) and incorrect_signatures.count(
correct_signatures[0]) == 0:
break
print("first: " + str(time() - t1))
t2 = time()
signatures_bool = np.dot(data, hash_function) > 0
signatures = [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool]
for i in range(len(signatures)):
entries[signatures[i]].append(i)
print("second: " + str(time() - t2))
new_tables.append({
"hash": hash_function.tolist(),
"entries": entries
})
print('Update time: ' + str(time() - t0))
response = {}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment