Commit d156b8db authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Super fast table creation

parent 537d59dc
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
<project version="4"> <project version="4">
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="556080ba-825c-4b55-a92a-867a4df4fb32" name="Default Changelist" comment=""> <list default="true" id="556080ba-825c-4b55-a92a-867a4df4fb32" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts" afterDir="false" />
<change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" /> <change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" />
</list> </list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" /> <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
...@@ -16,8 +15,8 @@ ...@@ -16,8 +15,8 @@
<file pinned="false" current-in-tab="true"> <file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/main.py"> <entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="272"> <state relative-caret-position="369">
<caret line="117" column="28" lean-forward="true" selection-start-line="117" selection-start-column="28" selection-end-line="117" selection-end-column="28" /> <caret line="198" column="19" lean-forward="true" selection-start-line="198" selection-start-column="19" selection-end-line="198" selection-end-column="19" />
<folding> <folding>
<element signature="e#0#41#0" expanded="true" /> <element signature="e#0#41#0" expanded="true" />
</folding> </folding>
...@@ -206,12 +205,12 @@ ...@@ -206,12 +205,12 @@
<workItem from="1594589515579" duration="1044000" /> <workItem from="1594589515579" duration="1044000" />
<workItem from="1594719112139" duration="10388000" /> <workItem from="1594719112139" duration="10388000" />
<workItem from="1595247298901" duration="17719000" /> <workItem from="1595247298901" duration="17719000" />
<workItem from="1597658111794" duration="30822000" /> <workItem from="1597658111794" duration="32577000" />
</task> </task>
<servers /> <servers />
</component> </component>
<component name="TimeTrackingManager"> <component name="TimeTrackingManager">
<option name="totallyTimeSpent" value="65290000" /> <option name="totallyTimeSpent" value="67045000" />
</component> </component>
<component name="ToolWindowManager"> <component name="ToolWindowManager">
<frame x="-7" y="-7" width="1295" height="695" extended-state="6" /> <frame x="-7" y="-7" width="1295" height="695" extended-state="6" />
...@@ -270,8 +269,8 @@ ...@@ -270,8 +269,8 @@
</entry> </entry>
<entry file="file://$PROJECT_DIR$/main.py"> <entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="272"> <state relative-caret-position="369">
<caret line="117" column="28" lean-forward="true" selection-start-line="117" selection-start-column="28" selection-end-line="117" selection-end-column="28" /> <caret line="198" column="19" lean-forward="true" selection-start-line="198" selection-start-column="19" selection-end-line="198" selection-end-column="19" />
<folding> <folding>
<element signature="e#0#41#0" expanded="true" /> <element signature="e#0#41#0" expanded="true" />
</folding> </folding>
......
from flask import Flask, jsonify, request from flask import Flask, jsonify, request
import matplotlib.pyplot as plt
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from flask_cors import CORS from flask_cors import CORS
...@@ -9,10 +8,6 @@ import dask.dataframe as dd ...@@ -9,10 +8,6 @@ import dask.dataframe as dd
import os.path import os.path
import json import json
from sklearn import preprocessing from sklearn import preprocessing
from functools import partial
from itertools import groupby
from multiprocessing import Pool
import rapidjson
import orjson import orjson
app = Flask(__name__) app = Flask(__name__)
...@@ -74,15 +69,6 @@ def create_windows(): ...@@ -74,15 +69,6 @@ def create_windows():
print("Sending response: " + str(time()-t0)) print("Sending response: " + str(time()-t0))
return response return response
def fill_table(data, tables_hash_function, index):
print(index)
table = defaultdict(list)
signatures = [''.join(list(map(lambda x: '1' if x > 0 else '0', np.dot(data[window_index], tables_hash_function[index])))) for window_index in
range(data.shape[0])]
for i in range(len(signatures)):
table[signatures[i]].append(i)
return table
@app.route('/create-tables', methods=['POST']) @app.route('/create-tables', methods=['POST'])
def create_tables(): def create_tables():
t0 = time() t0 = time()
...@@ -101,29 +87,13 @@ def create_tables(): ...@@ -101,29 +87,13 @@ def create_tables():
tables = [] tables = []
for index in range(table_size): for index in range(table_size):
t1 = time() t1 = time()
print('------------')
print(index)
table = defaultdict(list) table = defaultdict(list)
print(time()-t1) signatures_bool = np.dot(data, tables_hash_function[index]) > 0
signatures1 = [ signatures = [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool]
np.dot(data[window_index], tables_hash_function[index]) > 0
for window_index in
range(data.shape[0])]
print(time() - t1)
signatures = [''.join(['1' if x else '0' for x in lst]) for lst in signatures1]
print(time()-t1)
for i in range(len(signatures)): for i in range(len(signatures)):
table[signatures[i]].append(i) table[signatures[i]].append(i)
print(time()-t1) print(time()-t1)
tables.append(table) tables.append(table)
# try:
# pool = Pool()
# func = partial(fill_table, data, tables_hash_function)
# print('Starting pool: ' + str(time() - t0))
# tables = pool.map(func, range(table_size))
# finally:
# pool.close()
# pool.join()
print('Creation time: ' + str(time() - t0)) print('Creation time: ' + str(time() - t0))
hash_functions = np.array(tables_hash_function).tolist() hash_functions = np.array(tables_hash_function).tolist()
...@@ -138,7 +108,7 @@ def create_tables(): ...@@ -138,7 +108,7 @@ def create_tables():
@app.route('/query', methods=['POST']) @app.route('/query', methods=['POST'])
def query(): def query():
raw_data = request.json raw_data = orjson.loads(request.data)
window = raw_data["window"] window = raw_data["window"]
tables = raw_data["tables"] tables = raw_data["tables"]
neighbours = [] neighbours = []
...@@ -151,34 +121,15 @@ def query(): ...@@ -151,34 +121,15 @@ def query():
neighbours_with_frequency = dict(Counter(neighbours)) neighbours_with_frequency = dict(Counter(neighbours))
for index, frequency in neighbours_with_frequency.items(): for index, frequency in neighbours_with_frequency.items():
if not frequency in output: if not frequency in output:
output[frequency] = [] output[str(frequency)] = []
output[frequency].append(index) output[str(frequency)].append(index)
response = orjson.dumps(output) response = orjson.dumps(output)
return response return response
def create_valid_table(data, window_size, hash_size, correct_indices, incorrect_indices, index):
entries = defaultdict(list)
while True:
hash_function = np.random.randn(window_size, hash_size)
correct_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for index in
correct_indices]
incorrect_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for index
in incorrect_indices]
if correct_signatures.count(correct_signatures[0]) == len(correct_signatures) and incorrect_signatures.count(
correct_signatures[0]) == 0:
break
for window_index in range(data.shape[0]):
signature = ''.join((np.dot(data[window_index], hash_function) > 0).astype('int').astype('str'))
entries[signature].append(window_index)
return {
"hash": hash_function.tolist(),
"entries": entries
}
@app.route('/update', methods=['POST']) @app.route('/update', methods=['POST'])
def update(): def update():
t0 = time() t0 = time()
raw_data = request.json raw_data = orjson.loads(request.data)
data = raw_data["windows"] data = raw_data["windows"]
data = np.array(data) data = np.array(data)
...@@ -194,7 +145,7 @@ def update(): ...@@ -194,7 +145,7 @@ def update():
incorrect_indices = [int(index) for index, value in label_data.items() if value is False] incorrect_indices = [int(index) for index, value in label_data.items() if value is False]
window = data[correct_indices[0]] window = data[correct_indices[0]]
print("Initialized: " + str(time() - t0))
for t in tables.values(): for t in tables.values():
valid = True valid = True
signature = ''.join((np.dot(window, t["hash"]) > 0).astype('int').astype('str')) signature = ''.join((np.dot(window, t["hash"]) > 0).astype('int').astype('str'))
...@@ -209,15 +160,33 @@ def update(): ...@@ -209,15 +160,33 @@ def update():
break break
if valid: if valid:
new_tables.append(t) new_tables.append(t)
print("Filtered good tables: " + str(time() - t0))
try: for index in range(table_size - len(new_tables)):
pool = Pool() entries = defaultdict(list)
func = partial(create_valid_table, data, window_size, hash_size, correct_indices, incorrect_indices) t1 = time()
print('Starting pool: ' + str(time() - t0)) while True:
new_tables.extend(pool.map(func, range(table_size - len(new_tables)))) hash_function = np.random.randn(window_size, hash_size)
finally: correct_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for
pool.close() index in
pool.join() correct_indices]
incorrect_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for
index
in incorrect_indices]
if correct_signatures.count(correct_signatures[0]) == len(
correct_signatures) and incorrect_signatures.count(
correct_signatures[0]) == 0:
break
print("first: " + str(time() - t1))
t2 = time()
signatures_bool = np.dot(data, hash_function) > 0
signatures = [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool]
for i in range(len(signatures)):
entries[signatures[i]].append(i)
print("second: " + str(time() - t2))
new_tables.append({
"hash": hash_function.tolist(),
"entries": entries
})
print('Update time: ' + str(time() - t0)) print('Update time: ' + str(time() - t0))
response = {} response = {}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment