Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
P
pseudo
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
vig
Sublinear Algorithms for VA
pseudo
Commits
b5d4198f
Commit
b5d4198f
authored
4 years ago
by
Kruyff,D.L.W. (Dylan)
Browse files
Options
Downloads
Patches
Plain Diff
Super fast table creation
parent
1189f2bf
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
Flaskserver/.idea/workspace.xml
+6
-7
6 additions, 7 deletions
Flaskserver/.idea/workspace.xml
Flaskserver/__pycache__/main.cpython-38.pyc
+0
-0
0 additions, 0 deletions
Flaskserver/__pycache__/main.cpython-38.pyc
Flaskserver/main.py
+34
-65
34 additions, 65 deletions
Flaskserver/main.py
with
40 additions
and
72 deletions
Flaskserver/.idea/workspace.xml
+
6
−
7
View file @
b5d4198f
...
...
@@ -2,7 +2,6 @@
<project
version=
"4"
>
<component
name=
"ChangeListManager"
>
<list
default=
"true"
id=
"556080ba-825c-4b55-a92a-867a4df4fb32"
name=
"Default Changelist"
comment=
""
>
<change
beforePath=
"$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/main.py"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/main.py"
afterDir=
"false"
/>
</list>
<option
name=
"EXCLUDED_CONVERTED_TO_IGNORED"
value=
"true"
/>
...
...
@@ -16,8 +15,8 @@
<file
pinned=
"false"
current-in-tab=
"true"
>
<entry
file=
"file://$PROJECT_DIR$/main.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
272
"
>
<caret
line=
"1
17
"
column=
"
28
"
lean-forward=
"true"
selection-start-line=
"1
17
"
selection-start-column=
"
28
"
selection-end-line=
"1
17
"
selection-end-column=
"
28
"
/>
<state
relative-caret-position=
"
369
"
>
<caret
line=
"1
98
"
column=
"
19
"
lean-forward=
"true"
selection-start-line=
"1
98
"
selection-start-column=
"
19
"
selection-end-line=
"1
98
"
selection-end-column=
"
19
"
/>
<folding>
<element
signature=
"e#0#41#0"
expanded=
"true"
/>
</folding>
...
...
@@ -206,12 +205,12 @@
<workItem
from=
"1594589515579"
duration=
"1044000"
/>
<workItem
from=
"1594719112139"
duration=
"10388000"
/>
<workItem
from=
"1595247298901"
duration=
"17719000"
/>
<workItem
from=
"1597658111794"
duration=
"3
0822
000"
/>
<workItem
from=
"1597658111794"
duration=
"3
2577
000"
/>
</task>
<servers
/>
</component>
<component
name=
"TimeTrackingManager"
>
<option
name=
"totallyTimeSpent"
value=
"6
5290
000"
/>
<option
name=
"totallyTimeSpent"
value=
"6
7045
000"
/>
</component>
<component
name=
"ToolWindowManager"
>
<frame
x=
"-7"
y=
"-7"
width=
"1295"
height=
"695"
extended-state=
"6"
/>
...
...
@@ -270,8 +269,8 @@
</entry>
<entry
file=
"file://$PROJECT_DIR$/main.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
272
"
>
<caret
line=
"1
17
"
column=
"
28
"
lean-forward=
"true"
selection-start-line=
"1
17
"
selection-start-column=
"
28
"
selection-end-line=
"1
17
"
selection-end-column=
"
28
"
/>
<state
relative-caret-position=
"
369
"
>
<caret
line=
"1
98
"
column=
"
19
"
lean-forward=
"true"
selection-start-line=
"1
98
"
selection-start-column=
"
19
"
selection-end-line=
"1
98
"
selection-end-column=
"
19
"
/>
<folding>
<element
signature=
"e#0#41#0"
expanded=
"true"
/>
</folding>
...
...
This diff is collapsed.
Click to expand it.
Flaskserver/__pycache__/main.cpython-38.pyc
+
0
−
0
View file @
b5d4198f
No preview for this file type
This diff is collapsed.
Click to expand it.
Flaskserver/main.py
+
34
−
65
View file @
b5d4198f
from
flask
import
Flask
,
jsonify
,
request
import
matplotlib.pyplot
as
plt
import
pandas
as
pd
import
numpy
as
np
from
flask_cors
import
CORS
...
...
@@ -9,10 +8,6 @@ import dask.dataframe as dd
import
os.path
import
json
from
sklearn
import
preprocessing
from
functools
import
partial
from
itertools
import
groupby
from
multiprocessing
import
Pool
import
rapidjson
import
orjson
app
=
Flask
(
__name__
)
...
...
@@ -74,15 +69,6 @@ def create_windows():
print
(
"
Sending response:
"
+
str
(
time
()
-
t0
))
return
response
def
fill_table
(
data
,
tables_hash_function
,
index
):
print
(
index
)
table
=
defaultdict
(
list
)
signatures
=
[
''
.
join
(
list
(
map
(
lambda
x
:
'
1
'
if
x
>
0
else
'
0
'
,
np
.
dot
(
data
[
window_index
],
tables_hash_function
[
index
]))))
for
window_index
in
range
(
data
.
shape
[
0
])]
for
i
in
range
(
len
(
signatures
)):
table
[
signatures
[
i
]].
append
(
i
)
return
table
@app.route
(
'
/create-tables
'
,
methods
=
[
'
POST
'
])
def
create_tables
():
t0
=
time
()
...
...
@@ -101,29 +87,13 @@ def create_tables():
tables
=
[]
for
index
in
range
(
table_size
):
t1
=
time
()
print
(
'
------------
'
)
print
(
index
)
table
=
defaultdict
(
list
)
print
(
time
()
-
t1
)
signatures1
=
[
np
.
dot
(
data
[
window_index
],
tables_hash_function
[
index
])
>
0
for
window_index
in
range
(
data
.
shape
[
0
])]
print
(
time
()
-
t1
)
signatures
=
[
''
.
join
([
'
1
'
if
x
else
'
0
'
for
x
in
lst
])
for
lst
in
signatures1
]
print
(
time
()
-
t1
)
signatures_bool
=
np
.
dot
(
data
,
tables_hash_function
[
index
])
>
0
signatures
=
[
''
.
join
([
'
1
'
if
x
else
'
0
'
for
x
in
lst
])
for
lst
in
signatures_bool
]
for
i
in
range
(
len
(
signatures
)):
table
[
signatures
[
i
]].
append
(
i
)
print
(
time
()
-
t1
)
tables
.
append
(
table
)
# try:
# pool = Pool()
# func = partial(fill_table, data, tables_hash_function)
# print('Starting pool: ' + str(time() - t0))
# tables = pool.map(func, range(table_size))
# finally:
# pool.close()
# pool.join()
print
(
'
Creation time:
'
+
str
(
time
()
-
t0
))
hash_functions
=
np
.
array
(
tables_hash_function
).
tolist
()
...
...
@@ -138,7 +108,7 @@ def create_tables():
@app.route
(
'
/query
'
,
methods
=
[
'
POST
'
])
def
query
():
raw_data
=
request
.
json
raw_data
=
orjson
.
loads
(
request
.
data
)
window
=
raw_data
[
"
window
"
]
tables
=
raw_data
[
"
tables
"
]
neighbours
=
[]
...
...
@@ -151,34 +121,15 @@ def query():
neighbours_with_frequency
=
dict
(
Counter
(
neighbours
))
for
index
,
frequency
in
neighbours_with_frequency
.
items
():
if
not
frequency
in
output
:
output
[
frequency
]
=
[]
output
[
frequency
].
append
(
index
)
output
[
str
(
frequency
)
]
=
[]
output
[
str
(
frequency
)
].
append
(
index
)
response
=
orjson
.
dumps
(
output
)
return
response
def
create_valid_table
(
data
,
window_size
,
hash_size
,
correct_indices
,
incorrect_indices
,
index
):
entries
=
defaultdict
(
list
)
while
True
:
hash_function
=
np
.
random
.
randn
(
window_size
,
hash_size
)
correct_signatures
=
[
''
.
join
((
np
.
dot
(
data
[
index
],
hash_function
)
>
0
).
astype
(
'
int
'
).
astype
(
'
str
'
))
for
index
in
correct_indices
]
incorrect_signatures
=
[
''
.
join
((
np
.
dot
(
data
[
index
],
hash_function
)
>
0
).
astype
(
'
int
'
).
astype
(
'
str
'
))
for
index
in
incorrect_indices
]
if
correct_signatures
.
count
(
correct_signatures
[
0
])
==
len
(
correct_signatures
)
and
incorrect_signatures
.
count
(
correct_signatures
[
0
])
==
0
:
break
for
window_index
in
range
(
data
.
shape
[
0
]):
signature
=
''
.
join
((
np
.
dot
(
data
[
window_index
],
hash_function
)
>
0
).
astype
(
'
int
'
).
astype
(
'
str
'
))
entries
[
signature
].
append
(
window_index
)
return
{
"
hash
"
:
hash_function
.
tolist
(),
"
entries
"
:
entries
}
@app.route
(
'
/update
'
,
methods
=
[
'
POST
'
])
def
update
():
t0
=
time
()
raw_data
=
request
.
json
raw_data
=
orjson
.
loads
(
request
.
data
)
data
=
raw_data
[
"
windows
"
]
data
=
np
.
array
(
data
)
...
...
@@ -194,7 +145,7 @@ def update():
incorrect_indices
=
[
int
(
index
)
for
index
,
value
in
label_data
.
items
()
if
value
is
False
]
window
=
data
[
correct_indices
[
0
]]
print
(
"
Initialized:
"
+
str
(
time
()
-
t0
))
for
t
in
tables
.
values
():
valid
=
True
signature
=
''
.
join
((
np
.
dot
(
window
,
t
[
"
hash
"
])
>
0
).
astype
(
'
int
'
).
astype
(
'
str
'
))
...
...
@@ -209,15 +160,33 @@ def update():
break
if
valid
:
new_tables
.
append
(
t
)
try
:
pool
=
Pool
()
func
=
partial
(
create_valid_table
,
data
,
window_size
,
hash_size
,
correct_indices
,
incorrect_indices
)
print
(
'
Starting pool:
'
+
str
(
time
()
-
t0
))
new_tables
.
extend
(
pool
.
map
(
func
,
range
(
table_size
-
len
(
new_tables
))))
finally
:
pool
.
close
()
pool
.
join
()
print
(
"
Filtered good tables:
"
+
str
(
time
()
-
t0
))
for
index
in
range
(
table_size
-
len
(
new_tables
)):
entries
=
defaultdict
(
list
)
t1
=
time
()
while
True
:
hash_function
=
np
.
random
.
randn
(
window_size
,
hash_size
)
correct_signatures
=
[
''
.
join
((
np
.
dot
(
data
[
index
],
hash_function
)
>
0
).
astype
(
'
int
'
).
astype
(
'
str
'
))
for
index
in
correct_indices
]
incorrect_signatures
=
[
''
.
join
((
np
.
dot
(
data
[
index
],
hash_function
)
>
0
).
astype
(
'
int
'
).
astype
(
'
str
'
))
for
index
in
incorrect_indices
]
if
correct_signatures
.
count
(
correct_signatures
[
0
])
==
len
(
correct_signatures
)
and
incorrect_signatures
.
count
(
correct_signatures
[
0
])
==
0
:
break
print
(
"
first:
"
+
str
(
time
()
-
t1
))
t2
=
time
()
signatures_bool
=
np
.
dot
(
data
,
hash_function
)
>
0
signatures
=
[
''
.
join
([
'
1
'
if
x
else
'
0
'
for
x
in
lst
])
for
lst
in
signatures_bool
]
for
i
in
range
(
len
(
signatures
)):
entries
[
signatures
[
i
]].
append
(
i
)
print
(
"
second:
"
+
str
(
time
()
-
t2
))
new_tables
.
append
({
"
hash
"
:
hash_function
.
tolist
(),
"
entries
"
:
entries
})
print
(
'
Update time:
'
+
str
(
time
()
-
t0
))
response
=
{}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment