Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
vig
Sublinear Algorithms for VA
pseudo
Commits
d156b8db
Commit
d156b8db
authored
Aug 27, 2020
by
Kruyff,D.L.W. (Dylan)
Browse files
Super fast table creation
parent
537d59dc
Changes
3
Hide whitespace changes
Inline
Side-by-side
Flaskserver/.idea/workspace.xml
View file @
d156b8db
...
...
@@ -2,7 +2,6 @@
<project
version=
"4"
>
<component
name=
"ChangeListManager"
>
<list
default=
"true"
id=
"556080ba-825c-4b55-a92a-867a4df4fb32"
name=
"Default Changelist"
comment=
""
>
<change
beforePath=
"$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/main.py"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/main.py"
afterDir=
"false"
/>
</list>
<option
name=
"EXCLUDED_CONVERTED_TO_IGNORED"
value=
"true"
/>
...
...
@@ -16,8 +15,8 @@
<file
pinned=
"false"
current-in-tab=
"true"
>
<entry
file=
"file://$PROJECT_DIR$/main.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
272
"
>
<caret
line=
"1
17
"
column=
"
28
"
lean-forward=
"true"
selection-start-line=
"1
17
"
selection-start-column=
"
28
"
selection-end-line=
"1
17
"
selection-end-column=
"
28
"
/>
<state
relative-caret-position=
"
369
"
>
<caret
line=
"1
98
"
column=
"
19
"
lean-forward=
"true"
selection-start-line=
"1
98
"
selection-start-column=
"
19
"
selection-end-line=
"1
98
"
selection-end-column=
"
19
"
/>
<folding>
<element
signature=
"e#0#41#0"
expanded=
"true"
/>
</folding>
...
...
@@ -206,12 +205,12 @@
<workItem
from=
"1594589515579"
duration=
"1044000"
/>
<workItem
from=
"1594719112139"
duration=
"10388000"
/>
<workItem
from=
"1595247298901"
duration=
"17719000"
/>
<workItem
from=
"1597658111794"
duration=
"3
0822
000"
/>
<workItem
from=
"1597658111794"
duration=
"3
2577
000"
/>
</task>
<servers
/>
</component>
<component
name=
"TimeTrackingManager"
>
<option
name=
"totallyTimeSpent"
value=
"6
5290
000"
/>
<option
name=
"totallyTimeSpent"
value=
"6
7045
000"
/>
</component>
<component
name=
"ToolWindowManager"
>
<frame
x=
"-7"
y=
"-7"
width=
"1295"
height=
"695"
extended-state=
"6"
/>
...
...
@@ -270,8 +269,8 @@
</entry>
<entry
file=
"file://$PROJECT_DIR$/main.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
272
"
>
<caret
line=
"1
17
"
column=
"
28
"
lean-forward=
"true"
selection-start-line=
"1
17
"
selection-start-column=
"
28
"
selection-end-line=
"1
17
"
selection-end-column=
"
28
"
/>
<state
relative-caret-position=
"
369
"
>
<caret
line=
"1
98
"
column=
"
19
"
lean-forward=
"true"
selection-start-line=
"1
98
"
selection-start-column=
"
19
"
selection-end-line=
"1
98
"
selection-end-column=
"
19
"
/>
<folding>
<element
signature=
"e#0#41#0"
expanded=
"true"
/>
</folding>
...
...
Flaskserver/__pycache__/main.cpython-38.pyc
View file @
d156b8db
No preview for this file type
Flaskserver/main.py
View file @
d156b8db
from
flask
import
Flask
,
jsonify
,
request
import
matplotlib.pyplot
as
plt
import
pandas
as
pd
import
numpy
as
np
from
flask_cors
import
CORS
...
...
@@ -9,10 +8,6 @@ import dask.dataframe as dd
import
os.path
import
json
from
sklearn
import
preprocessing
from
functools
import
partial
from
itertools
import
groupby
from
multiprocessing
import
Pool
import
rapidjson
import
orjson
app
=
Flask
(
__name__
)
...
...
@@ -74,15 +69,6 @@ def create_windows():
print
(
"Sending response: "
+
str
(
time
()
-
t0
))
return
response
def
fill_table
(
data
,
tables_hash_function
,
index
):
print
(
index
)
table
=
defaultdict
(
list
)
signatures
=
[
''
.
join
(
list
(
map
(
lambda
x
:
'1'
if
x
>
0
else
'0'
,
np
.
dot
(
data
[
window_index
],
tables_hash_function
[
index
]))))
for
window_index
in
range
(
data
.
shape
[
0
])]
for
i
in
range
(
len
(
signatures
)):
table
[
signatures
[
i
]].
append
(
i
)
return
table
@
app
.
route
(
'/create-tables'
,
methods
=
[
'POST'
])
def
create_tables
():
t0
=
time
()
...
...
@@ -101,29 +87,13 @@ def create_tables():
tables
=
[]
for
index
in
range
(
table_size
):
t1
=
time
()
print
(
'------------'
)
print
(
index
)
table
=
defaultdict
(
list
)
print
(
time
()
-
t1
)
signatures1
=
[
np
.
dot
(
data
[
window_index
],
tables_hash_function
[
index
])
>
0
for
window_index
in
range
(
data
.
shape
[
0
])]
print
(
time
()
-
t1
)
signatures
=
[
''
.
join
([
'1'
if
x
else
'0'
for
x
in
lst
])
for
lst
in
signatures1
]
print
(
time
()
-
t1
)
signatures_bool
=
np
.
dot
(
data
,
tables_hash_function
[
index
])
>
0
signatures
=
[
''
.
join
([
'1'
if
x
else
'0'
for
x
in
lst
])
for
lst
in
signatures_bool
]
for
i
in
range
(
len
(
signatures
)):
table
[
signatures
[
i
]].
append
(
i
)
print
(
time
()
-
t1
)
tables
.
append
(
table
)
# try:
# pool = Pool()
# func = partial(fill_table, data, tables_hash_function)
# print('Starting pool: ' + str(time() - t0))
# tables = pool.map(func, range(table_size))
# finally:
# pool.close()
# pool.join()
print
(
'Creation time: '
+
str
(
time
()
-
t0
))
hash_functions
=
np
.
array
(
tables_hash_function
).
tolist
()
...
...
@@ -138,7 +108,7 @@ def create_tables():
@
app
.
route
(
'/query'
,
methods
=
[
'POST'
])
def
query
():
raw_data
=
request
.
json
raw_data
=
orjson
.
loads
(
request
.
data
)
window
=
raw_data
[
"window"
]
tables
=
raw_data
[
"tables"
]
neighbours
=
[]
...
...
@@ -151,34 +121,15 @@ def query():
neighbours_with_frequency
=
dict
(
Counter
(
neighbours
))
for
index
,
frequency
in
neighbours_with_frequency
.
items
():
if
not
frequency
in
output
:
output
[
frequency
]
=
[]
output
[
frequency
].
append
(
index
)
output
[
str
(
frequency
)
]
=
[]
output
[
str
(
frequency
)
].
append
(
index
)
response
=
orjson
.
dumps
(
output
)
return
response
def
create_valid_table
(
data
,
window_size
,
hash_size
,
correct_indices
,
incorrect_indices
,
index
):
entries
=
defaultdict
(
list
)
while
True
:
hash_function
=
np
.
random
.
randn
(
window_size
,
hash_size
)
correct_signatures
=
[
''
.
join
((
np
.
dot
(
data
[
index
],
hash_function
)
>
0
).
astype
(
'int'
).
astype
(
'str'
))
for
index
in
correct_indices
]
incorrect_signatures
=
[
''
.
join
((
np
.
dot
(
data
[
index
],
hash_function
)
>
0
).
astype
(
'int'
).
astype
(
'str'
))
for
index
in
incorrect_indices
]
if
correct_signatures
.
count
(
correct_signatures
[
0
])
==
len
(
correct_signatures
)
and
incorrect_signatures
.
count
(
correct_signatures
[
0
])
==
0
:
break
for
window_index
in
range
(
data
.
shape
[
0
]):
signature
=
''
.
join
((
np
.
dot
(
data
[
window_index
],
hash_function
)
>
0
).
astype
(
'int'
).
astype
(
'str'
))
entries
[
signature
].
append
(
window_index
)
return
{
"hash"
:
hash_function
.
tolist
(),
"entries"
:
entries
}
@
app
.
route
(
'/update'
,
methods
=
[
'POST'
])
def
update
():
t0
=
time
()
raw_data
=
request
.
json
raw_data
=
orjson
.
loads
(
request
.
data
)
data
=
raw_data
[
"windows"
]
data
=
np
.
array
(
data
)
...
...
@@ -194,7 +145,7 @@ def update():
incorrect_indices
=
[
int
(
index
)
for
index
,
value
in
label_data
.
items
()
if
value
is
False
]
window
=
data
[
correct_indices
[
0
]]
print
(
"Initialized: "
+
str
(
time
()
-
t0
))
for
t
in
tables
.
values
():
valid
=
True
signature
=
''
.
join
((
np
.
dot
(
window
,
t
[
"hash"
])
>
0
).
astype
(
'int'
).
astype
(
'str'
))
...
...
@@ -209,15 +160,33 @@ def update():
break
if
valid
:
new_tables
.
append
(
t
)
try
:
pool
=
Pool
()
func
=
partial
(
create_valid_table
,
data
,
window_size
,
hash_size
,
correct_indices
,
incorrect_indices
)
print
(
'Starting pool: '
+
str
(
time
()
-
t0
))
new_tables
.
extend
(
pool
.
map
(
func
,
range
(
table_size
-
len
(
new_tables
))))
finally
:
pool
.
close
()
pool
.
join
()
print
(
"Filtered good tables: "
+
str
(
time
()
-
t0
))
for
index
in
range
(
table_size
-
len
(
new_tables
)):
entries
=
defaultdict
(
list
)
t1
=
time
()
while
True
:
hash_function
=
np
.
random
.
randn
(
window_size
,
hash_size
)
correct_signatures
=
[
''
.
join
((
np
.
dot
(
data
[
index
],
hash_function
)
>
0
).
astype
(
'int'
).
astype
(
'str'
))
for
index
in
correct_indices
]
incorrect_signatures
=
[
''
.
join
((
np
.
dot
(
data
[
index
],
hash_function
)
>
0
).
astype
(
'int'
).
astype
(
'str'
))
for
index
in
incorrect_indices
]
if
correct_signatures
.
count
(
correct_signatures
[
0
])
==
len
(
correct_signatures
)
and
incorrect_signatures
.
count
(
correct_signatures
[
0
])
==
0
:
break
print
(
"first: "
+
str
(
time
()
-
t1
))
t2
=
time
()
signatures_bool
=
np
.
dot
(
data
,
hash_function
)
>
0
signatures
=
[
''
.
join
([
'1'
if
x
else
'0'
for
x
in
lst
])
for
lst
in
signatures_bool
]
for
i
in
range
(
len
(
signatures
)):
entries
[
signatures
[
i
]].
append
(
i
)
print
(
"second: "
+
str
(
time
()
-
t2
))
new_tables
.
append
({
"hash"
:
hash_function
.
tolist
(),
"entries"
:
entries
})
print
(
'Update time: '
+
str
(
time
()
-
t0
))
response
=
{}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment