Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
vig
Sublinear Algorithms for VA
pseudo
Commits
bcdac582
Commit
bcdac582
authored
Sep 25, 2020
by
Kruyff,D.L.W. (Dylan)
Browse files
Slight speed up on table creation
Former-commit-id:
6375d5da
parent
9b234f1e
Changes
6
Hide whitespace changes
Inline
Side-by-side
AngularApp/prototype/src/app/cache.service.ts
View file @
bcdac582
...
...
@@ -19,7 +19,7 @@ export class CacheService {
public
windowSize
=
120
;
public
nrOfTables
=
20
;
public
hashSize
=
4
;
public
hashSize
=
8
;
public
stepSize
=
200
;
public
querySelectionMode
=
true
;
...
...
@@ -69,6 +69,7 @@ export class CacheService {
async
createTables
():
Promise
<
void
>
{
this
.
tables
=
await
this
.
api
.
createTables
(
this
.
parameters
);
console
.
log
(
this
.
tables
);
}
async
getSimilarWindows
():
Promise
<
any
>
{
...
...
AngularApp/prototype/src/app/labeling-window/labeling-window.component.ts
View file @
bcdac582
...
...
@@ -10,7 +10,7 @@ export class LabelingWindowComponent implements OnInit {
public
topk
;
public
subplots
=
[];
public
labels
:
boolean
[]
=
[];
private
k
=
5
;
private
k
=
12
;
constructor
(
private
service
:
CacheService
)
{
}
...
...
Flaskserver/.idea/workspace.xml
View file @
bcdac582
...
...
@@ -20,8 +20,10 @@
</component>
<component
name=
"ChangeListManager"
>
<list
default=
"true"
id=
"556080ba-825c-4b55-a92a-867a4df4fb32"
name=
"Default Changelist"
comment=
""
>
<change
beforePath=
"$PROJECT_DIR$/../AngularApp/prototype/src/app/cache.service.ts"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/../AngularApp/prototype/src/app/cache.service.ts"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/.idea/workspace.xml"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/.idea/workspace.xml"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/main.py"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/main.py"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/processed-data.npy"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/processed-data.npy"
afterDir=
"false"
/>
</list>
<option
name=
"SHOW_DIALOG"
value=
"false"
/>
<option
name=
"HIGHLIGHT_CONFLICTS"
value=
"true"
/>
...
...
Flaskserver/__pycache__/main.cpython-38.pyc
View file @
bcdac582
No preview for this file type
Flaskserver/main.py
View file @
bcdac582
...
...
@@ -11,12 +11,39 @@ import orjson
import
dask.dataframe
as
dd
import
bigwig
import
bbi
from
bitarray
import
bitarray
import
_ucrdtw
reload
=
False
app
=
Flask
(
__name__
)
CORS
(
app
)
def
calculate_signatures_random_weights
(
data
,
window_size
=
None
,
hash_size
=
None
,
hash_function
=
None
):
if
hash_function
is
None
:
hash_function
=
np
.
random
.
uniform
(
-
1
,
1
,
size
=
(
window_size
,
hash_size
))
signatures_bool
=
np
.
dot
(
data
,
hash_function
)
>
0
if
signatures_bool
.
ndim
==
1
:
return
''
.
join
([
'1'
if
x
else
'0'
for
x
in
signatures_bool
])
return
[
''
.
join
([
'1'
if
x
else
'0'
for
x
in
lst
])
for
lst
in
signatures_bool
],
hash_function
def
calculate_signatures_cumsum_weights
(
data
,
window_size
=
None
,
hash_size
=
None
,
hash_function
=
None
):
if
hash_function
is
None
:
hash_function
=
np
.
array
([
np
.
cumsum
(
np
.
random
.
uniform
(
-
1
,
1
,
window_size
))
for
_
in
range
(
hash_size
)]).
transpose
()
signatures_bool
=
np
.
dot
(
data
,
hash_function
)
>
0
signatures_int
=
np
.
packbits
(
signatures_bool
)
return
signatures_int
.
tolist
(),
hash_function
def
calculate_signatures_cumsum_weights
(
data
,
window_size
=
None
,
hash_size
=
None
,
hash_function
=
None
):
if
hash_function
is
None
:
hash_function
=
np
.
array
([
np
.
cumsum
(
np
.
random
.
uniform
(
-
1
,
1
,
window_size
))
for
_
in
range
(
hash_size
)]).
transpose
()
signatures_bool
=
np
.
dot
(
data
,
hash_function
)
>
0
signatures_int
=
np
.
packbits
(
signatures_bool
)
return
signatures_int
.
tolist
(),
hash_function
lsh_function
=
calculate_signatures_cumsum_weights
@
app
.
route
(
'/'
,
methods
=
[
'GET'
])
def
index
():
return
"hi"
...
...
@@ -34,6 +61,11 @@ def read_data():
}
response
=
orjson
.
dumps
(
response
)
print
(
'Data read: '
+
str
(
time
()
-
t0
))
query
=
data
[
10000
:
11200
]
print
(
query
)
loc
,
dist
=
_ucrdtw
.
ucrdtw
(
data
,
query
,
0.05
,
True
)
print
(
data
[
loc
:
loc
+
120
])
print
(
'found query: '
+
str
(
loc
)
+
'['
+
str
(
time
()
-
t0
)
+
']'
)
return
response
@
app
.
route
(
'/create-windows'
,
methods
=
[
'POST'
])
...
...
@@ -63,6 +95,7 @@ def create_tables():
hash_size
=
int
(
raw_data
[
'parameters'
][
"hashsize"
])
table_size
=
int
(
raw_data
[
'parameters'
][
"tablesize"
])
t0
=
time
()
hash_functions
,
tables
=
lsh
(
data
,
window_size
,
hash_size
,
table_size
)
response
=
{}
...
...
@@ -71,40 +104,26 @@ def create_tables():
"hash"
:
hash_functions
[
table_index
],
"entries"
:
tables
[
table_index
]
}
response
=
orjson
.
dumps
(
response
)
response
=
jsonify
(
response
)
print
(
'done: '
+
str
(
time
()
-
t0
))
return
response
def
lsh
(
data
,
window_size
,
hash_size
,
table_size
):
t0
=
time
()
print
(
'Starting: '
+
str
(
time
()
-
t0
))
tables_hash_function
=
[]
print
(
'Init time: '
+
str
(
time
()
-
t0
))
tables
=
[]
print
(
data
.
shape
)
for
index
in
range
(
table_size
):
t1
=
time
()
table
=
defaultdict
(
list
)
signatures
,
hash_function
=
calculate_signatures_random_weights
(
data
,
window_size
=
window_size
,
hash_size
=
hash_size
)
for
i
in
range
(
len
(
signatures
)):
table
[
signatures
[
i
]].
append
(
i
)
signatures
,
hash_function
=
lsh_function
(
data
,
window_size
=
window_size
,
hash_size
=
hash_size
)
table
=
{
k
:
v
for
v
,
k
in
enumerate
(
signatures
)}
tables
.
append
(
table
)
tables_hash_function
.
append
(
hash_function
.
tolist
())
print
(
time
()
-
t1
)
print
(
'Creation time: '
+
str
(
time
()
-
t0
))
hash_functions
=
tables_hash_function
return
hash_functions
,
tables
def
calculate_signatures_random_weights
(
data
,
window_size
=
None
,
hash_size
=
None
,
hash_function
=
None
):
if
hash_function
is
None
:
hash_function
=
np
.
random
.
uniform
(
-
100
,
100
,
size
=
(
window_size
,
hash_size
))
signatures_bool
=
np
.
dot
(
data
,
hash_function
)
>
0
if
signatures_bool
.
ndim
==
1
:
return
''
.
join
([
'1'
if
x
else
'0'
for
x
in
signatures_bool
])
return
[
''
.
join
([
'1'
if
x
else
'0'
for
x
in
lst
])
for
lst
in
signatures_bool
],
hash_function
@
app
.
route
(
'/similarity'
,
methods
=
[
'POST'
])
def
similarity
():
t0
=
time
()
...
...
@@ -115,7 +134,7 @@ def similarity():
output
=
defaultdict
(
list
)
for
t
in
tables
.
values
():
signature
=
calculate_signatures_random_weights
(
window
,
hash_function
=
t
[
"hash"
])
signature
=
lsh_function
(
window
,
hash_function
=
t
[
"hash"
])
neighbours
.
extend
(
t
[
"entries"
][
signature
])
neighbours_with_frequency
=
dict
(
Counter
(
neighbours
))
for
index
,
frequency
in
neighbours_with_frequency
.
items
():
...
...
@@ -143,7 +162,7 @@ def update():
for
t
in
tables
.
values
():
valid
=
True
signature
=
calculate_signatures_random_weights
(
window
,
hash_function
=
t
[
'hash'
])
signature
=
lsh_function
(
window
,
hash_function
=
t
[
'hash'
])
neighbours
=
t
[
"entries"
][
signature
]
for
index
in
correct_indices
:
if
index
not
in
neighbours
:
...
...
@@ -160,11 +179,11 @@ def update():
entries
=
defaultdict
(
list
)
t1
=
time
()
while
True
:
correct_signatures
,
hash_function
=
calculate_signatures_random_weights
(
data
[
correct_indices
],
window_size
=
window_size
,
hash_size
=
hash_size
)
incorrect_signatures
,
_
=
calculate_signatures_random_weights
(
data
[
incorrect_indices
],
hash_function
=
hash_function
)
correct_signatures
,
hash_function
=
lsh_function
(
data
[
correct_indices
],
window_size
=
window_size
,
hash_size
=
hash_size
)
incorrect_signatures
,
_
=
lsh_function
(
data
[
incorrect_indices
],
hash_function
=
hash_function
)
if
correct_signatures
.
count
(
correct_signatures
[
0
])
==
len
(
correct_signatures
)
and
incorrect_signatures
.
count
(
correct_signatures
[
0
])
==
0
:
break
signatures
,
_
=
calculate_signatures_random_weights
(
data
,
hash_function
=
hash_function
)
signatures
,
_
=
lsh_function
(
data
,
hash_function
=
hash_function
)
for
i
in
range
(
len
(
signatures
)):
entries
[
signatures
[
i
]].
append
(
i
)
print
(
str
(
index
)
+
": "
+
str
(
time
()
-
t1
))
...
...
Flaskserver/processed-data.npy
View file @
bcdac582
No preview for this file type
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment