Commit d156b8db authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Super fast table creation

parent 537d59dc
......@@ -2,7 +2,6 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="556080ba-825c-4b55-a92a-867a4df4fb32" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts" afterDir="false" />
<change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
......@@ -16,8 +15,8 @@
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="272">
<caret line="117" column="28" lean-forward="true" selection-start-line="117" selection-start-column="28" selection-end-line="117" selection-end-column="28" />
<state relative-caret-position="369">
<caret line="198" column="19" lean-forward="true" selection-start-line="198" selection-start-column="19" selection-end-line="198" selection-end-column="19" />
<folding>
<element signature="e#0#41#0" expanded="true" />
</folding>
......@@ -206,12 +205,12 @@
<workItem from="1594589515579" duration="1044000" />
<workItem from="1594719112139" duration="10388000" />
<workItem from="1595247298901" duration="17719000" />
<workItem from="1597658111794" duration="30822000" />
<workItem from="1597658111794" duration="32577000" />
</task>
<servers />
</component>
<component name="TimeTrackingManager">
<option name="totallyTimeSpent" value="65290000" />
<option name="totallyTimeSpent" value="67045000" />
</component>
<component name="ToolWindowManager">
<frame x="-7" y="-7" width="1295" height="695" extended-state="6" />
......@@ -270,8 +269,8 @@
</entry>
<entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="272">
<caret line="117" column="28" lean-forward="true" selection-start-line="117" selection-start-column="28" selection-end-line="117" selection-end-column="28" />
<state relative-caret-position="369">
<caret line="198" column="19" lean-forward="true" selection-start-line="198" selection-start-column="19" selection-end-line="198" selection-end-column="19" />
<folding>
<element signature="e#0#41#0" expanded="true" />
</folding>
......
from flask import Flask, jsonify, request
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from flask_cors import CORS
......@@ -9,10 +8,6 @@ import dask.dataframe as dd
import os.path
import json
from sklearn import preprocessing
from functools import partial
from itertools import groupby
from multiprocessing import Pool
import rapidjson
import orjson
app = Flask(__name__)
......@@ -74,15 +69,6 @@ def create_windows():
print("Sending response: " + str(time()-t0))
return response
def fill_table(data, tables_hash_function, index):
print(index)
table = defaultdict(list)
signatures = [''.join(list(map(lambda x: '1' if x > 0 else '0', np.dot(data[window_index], tables_hash_function[index])))) for window_index in
range(data.shape[0])]
for i in range(len(signatures)):
table[signatures[i]].append(i)
return table
@app.route('/create-tables', methods=['POST'])
def create_tables():
t0 = time()
......@@ -101,29 +87,13 @@ def create_tables():
tables = []
for index in range(table_size):
t1 = time()
print('------------')
print(index)
table = defaultdict(list)
print(time()-t1)
signatures1 = [
np.dot(data[window_index], tables_hash_function[index]) > 0
for window_index in
range(data.shape[0])]
print(time() - t1)
signatures = [''.join(['1' if x else '0' for x in lst]) for lst in signatures1]
print(time()-t1)
signatures_bool = np.dot(data, tables_hash_function[index]) > 0
signatures = [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool]
for i in range(len(signatures)):
table[signatures[i]].append(i)
print(time()-t1)
tables.append(table)
# try:
# pool = Pool()
# func = partial(fill_table, data, tables_hash_function)
# print('Starting pool: ' + str(time() - t0))
# tables = pool.map(func, range(table_size))
# finally:
# pool.close()
# pool.join()
print('Creation time: ' + str(time() - t0))
hash_functions = np.array(tables_hash_function).tolist()
......@@ -138,7 +108,7 @@ def create_tables():
@app.route('/query', methods=['POST'])
def query():
raw_data = request.json
raw_data = orjson.loads(request.data)
window = raw_data["window"]
tables = raw_data["tables"]
neighbours = []
......@@ -151,34 +121,15 @@ def query():
neighbours_with_frequency = dict(Counter(neighbours))
for index, frequency in neighbours_with_frequency.items():
if not frequency in output:
output[frequency] = []
output[frequency].append(index)
output[str(frequency)] = []
output[str(frequency)].append(index)
response = orjson.dumps(output)
return response
def create_valid_table(data, window_size, hash_size, correct_indices, incorrect_indices, index):
entries = defaultdict(list)
while True:
hash_function = np.random.randn(window_size, hash_size)
correct_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for index in
correct_indices]
incorrect_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for index
in incorrect_indices]
if correct_signatures.count(correct_signatures[0]) == len(correct_signatures) and incorrect_signatures.count(
correct_signatures[0]) == 0:
break
for window_index in range(data.shape[0]):
signature = ''.join((np.dot(data[window_index], hash_function) > 0).astype('int').astype('str'))
entries[signature].append(window_index)
return {
"hash": hash_function.tolist(),
"entries": entries
}
@app.route('/update', methods=['POST'])
def update():
t0 = time()
raw_data = request.json
raw_data = orjson.loads(request.data)
data = raw_data["windows"]
data = np.array(data)
......@@ -194,7 +145,7 @@ def update():
incorrect_indices = [int(index) for index, value in label_data.items() if value is False]
window = data[correct_indices[0]]
print("Initialized: " + str(time() - t0))
for t in tables.values():
valid = True
signature = ''.join((np.dot(window, t["hash"]) > 0).astype('int').astype('str'))
......@@ -209,15 +160,33 @@ def update():
break
if valid:
new_tables.append(t)
try:
pool = Pool()
func = partial(create_valid_table, data, window_size, hash_size, correct_indices, incorrect_indices)
print('Starting pool: ' + str(time() - t0))
new_tables.extend(pool.map(func, range(table_size - len(new_tables))))
finally:
pool.close()
pool.join()
print("Filtered good tables: " + str(time() - t0))
for index in range(table_size - len(new_tables)):
entries = defaultdict(list)
t1 = time()
while True:
hash_function = np.random.randn(window_size, hash_size)
correct_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for
index in
correct_indices]
incorrect_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for
index
in incorrect_indices]
if correct_signatures.count(correct_signatures[0]) == len(
correct_signatures) and incorrect_signatures.count(
correct_signatures[0]) == 0:
break
print("first: " + str(time() - t1))
t2 = time()
signatures_bool = np.dot(data, hash_function) > 0
signatures = [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool]
for i in range(len(signatures)):
entries[signatures[i]].append(i)
print("second: " + str(time() - t2))
new_tables.append({
"hash": hash_function.tolist(),
"entries": entries
})
print('Update time: ' + str(time() - t0))
response = {}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment