Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
vig
Sublinear Algorithms for VA
pseudo
Commits
2750c150
Commit
2750c150
authored
Sep 30, 2020
by
Kruyff,D.L.W. (Dylan)
Browse files
Trying simple hashing with low memory use (not working)
Former-commit-id:
bb598b63
parent
bcdac582
Changes
5
Hide whitespace changes
Inline
Side-by-side
AngularApp/prototype/src/app/cache.service.ts
View file @
2750c150
...
...
@@ -17,9 +17,9 @@ export class CacheService {
private
_sliderValue
;
private
_queryWindow
;
public
windowSize
=
1
20
;
public
nrOfTables
=
20
;
public
hashSize
=
8
;
public
windowSize
=
2
0
0
;
public
nrOfTables
=
5
;
public
hashSize
=
2
;
public
stepSize
=
200
;
public
querySelectionMode
=
true
;
...
...
Flaskserver/.idea/workspace.xml
View file @
2750c150
...
...
@@ -126,13 +126,17 @@
<option
name=
"oldMeFiltersMigrated"
value=
"true"
/>
</component>
<component
name=
"WindowStateProjectService"
>
<state
x=
"686"
y=
"355"
width=
"6
10
"
height=
"403"
key=
"#com.intellij.fileTypes.FileTypeChooser"
timestamp=
"160
0727680781
"
>
<state
x=
"686"
y=
"355"
width=
"6
64
"
height=
"403"
key=
"#com.intellij.fileTypes.FileTypeChooser"
timestamp=
"160
1384380854
"
>
<screen
x=
"72"
y=
"27"
width=
"1848"
height=
"1053"
/>
</state>
<state
x=
"686"
y=
"355"
width=
"6
10
"
height=
"403"
key=
"#com.intellij.fileTypes.FileTypeChooser/72.27.1848.1053@72.27.1848.1053"
timestamp=
"160
0727680781
"
/>
<state
x=
"7
79
"
y=
"
311
"
width=
"
424
"
height=
"
491
"
key=
"
FileChooser
Dialog
Impl
"
timestamp=
"160
0726193087
"
>
<state
x=
"686"
y=
"355"
width=
"6
64
"
height=
"403"
key=
"#com.intellij.fileTypes.FileTypeChooser/72.27.1848.1053@72.27.1848.1053"
timestamp=
"160
1384380854
"
/>
<state
x=
"7
21
"
y=
"
422
"
width=
"
1200
"
height=
"
800
"
key=
"
DiffContext
Dialog"
timestamp=
"160
1242420342
"
>
<screen
x=
"72"
y=
"27"
width=
"1848"
height=
"1053"
/>
</state>
<state
x=
"779"
y=
"311"
width=
"424"
height=
"491"
key=
"FileChooserDialogImpl/72.27.1848.1053@72.27.1848.1053"
timestamp=
"1600726193087"
/>
<state
x=
"721"
y=
"422"
width=
"1200"
height=
"800"
key=
"DiffContextDialog/72.27.1848.1053@72.27.1848.1053"
timestamp=
"1601242420342"
/>
<state
x=
"779"
y=
"311"
width=
"424"
height=
"491"
key=
"FileChooserDialogImpl"
timestamp=
"1601285087193"
>
<screen
x=
"72"
y=
"27"
width=
"1848"
height=
"1053"
/>
</state>
<state
x=
"779"
y=
"311"
width=
"424"
height=
"491"
key=
"FileChooserDialogImpl/72.27.1848.1053@72.27.1848.1053"
timestamp=
"1601285087193"
/>
</component>
</project>
\ No newline at end of file
Flaskserver/__pycache__/main.cpython-38.pyc
View file @
2750c150
No preview for this file type
Flaskserver/main.py
View file @
2750c150
...
...
@@ -13,8 +13,9 @@ import bigwig
import
bbi
from
bitarray
import
bitarray
import
_ucrdtw
from
scipy.sparse
import
dia_matrix
reload
=
Fals
e
reload
=
Tru
e
app
=
Flask
(
__name__
)
CORS
(
app
)
...
...
@@ -35,14 +36,38 @@ def calculate_signatures_cumsum_weights(data, window_size=None, hash_size=None,
signatures_int
=
np
.
packbits
(
signatures_bool
)
return
signatures_int
.
tolist
(),
hash_function
def
calculate_signatures_
cumsum_weights
(
data
,
window_size
=
None
,
hash_size
=
None
,
hash_function
=
None
):
def
calculate_signatures_
new
(
data
,
window_size
=
None
,
hash_size
=
None
,
hash_function
=
None
):
if
hash_function
is
None
:
hash_function
=
np
.
array
([
np
.
cumsum
(
np
.
random
.
uniform
(
-
1
,
1
,
window_size
))
for
_
in
range
(
hash_size
)]).
transpose
()
signatures_bool
=
np
.
dot
(
data
,
hash_function
)
>
0
signatures_int
=
np
.
packbits
(
signatures_bool
)
if
len
(
data
)
==
len
(
np
.
array
(
hash_function
)[:,
0
]):
signatures_bool
=
np
.
dot
(
data
,
hash_function
)
>
0
output
=
signatures_bool
.
astype
(
int
)[
0
]
print
(
output
)
return
output
print
(
'starting hashing'
)
t0
=
time
()
all_signatures
=
[]
batch_size
=
20
data
=
data
.
transpose
()
temp
=
np
.
zeros
((
batch_size
,
window_size
+
batch_size
-
1
))
for
h
in
range
(
hash_size
):
for
i
in
range
(
batch_size
):
temp
[
i
,
i
:
i
+
window_size
]
=
hash_function
[:,
h
]
print
(
'first: '
+
str
(
time
()
-
t0
))
signatures_bool
=
[
np
.
dot
(
temp
,
data
[
i
:
i
+
window_size
+
batch_size
-
1
])
>
0
for
i
in
range
(
0
,
len
(
data
)
-
window_size
,
batch_size
)]
# signatures_bool = []
# for i in range(0, len(data) - window_size, batch_size):
# if i % 1000000 == 0:
# print(i)
# signatures_bool.append(np.dot(temp, data[i:i + window_size + batch_size - 1]) > 0)
print
(
'second: '
+
str
(
time
()
-
t0
))
all_signatures
.
append
(
np
.
array
(
signatures_bool
).
flatten
().
astype
(
int
))
print
(
'done'
)
signatures_int
=
np
.
packbits
(
np
.
stack
(
np
.
array
(
all_signatures
),
axis
=
1
),
axis
=
0
).
flatten
()
return
signatures_int
.
tolist
(),
hash_function
lsh_function
=
calculate_signatures_cumsum_weights
lsh_function
=
calculate_signatures_new
@
app
.
route
(
'/'
,
methods
=
[
'GET'
])
def
index
():
...
...
@@ -61,11 +86,10 @@ def read_data():
}
response
=
orjson
.
dumps
(
response
)
print
(
'Data read: '
+
str
(
time
()
-
t0
))
query
=
data
[
10000
:
11200
]
print
(
query
)
loc
,
dist
=
_ucrdtw
.
ucrdtw
(
data
,
query
,
0.05
,
True
)
print
(
data
[
loc
:
loc
+
120
])
print
(
'found query: '
+
str
(
loc
)
+
'['
+
str
(
time
()
-
t0
)
+
']'
)
# query = data[12000:24000]
# loc, dist = _ucrdtw.ucrdtw(data, query, 0.05, True)
# print(data[loc:loc+120])
# print('found query: ' + str(loc) + '[' + str(time()-t0) + ']')
return
response
@
app
.
route
(
'/create-windows'
,
methods
=
[
'POST'
])
...
...
@@ -74,14 +98,10 @@ def create_windows():
if
reload
:
raw_data
=
request
.
json
window_size
=
int
(
raw_data
[
'parameters'
][
"windowsize"
])
data
=
bigwig
.
chunk
(
'test.bigWig'
,
12000
,
int
(
12000
/
window_size
),
int
(
12000
/
6
),
[
'chr1'
],
verbose
=
True
,
)
chromsize
=
bbi
.
chromsizes
(
'test.bigWig'
)[
'chr1'
]
step_size
=
chromsize
/
10000
data
=
bigwig
.
get
(
'test.bigWig'
,
'chr1'
,
0
,
chromsize
,
20000000
)
data
=
(
data
-
np
.
min
(
data
))
/
np
.
ptp
(
data
)
print
(
data
.
shape
)
np
.
save
(
'processed-data'
,
data
)
print
(
'Windows created: '
+
str
(
time
()
-
t0
))
...
...
@@ -116,7 +136,10 @@ def lsh(data, window_size, hash_size, table_size):
for
index
in
range
(
table_size
):
signatures
,
hash_function
=
lsh_function
(
data
,
window_size
=
window_size
,
hash_size
=
hash_size
)
table
=
{
k
:
v
for
v
,
k
in
enumerate
(
signatures
)}
print
(
'creating dictionary'
)
table
=
defaultdict
(
list
)
for
v
,
k
in
enumerate
(
signatures
):
table
[
k
].
append
(
v
)
tables
.
append
(
table
)
tables_hash_function
.
append
(
hash_function
.
tolist
())
...
...
@@ -135,7 +158,7 @@ def similarity():
for
t
in
tables
.
values
():
signature
=
lsh_function
(
window
,
hash_function
=
t
[
"hash"
])
neighbours
.
extend
(
t
[
"entries"
][
signature
])
neighbours
.
extend
(
t
[
"entries"
][
str
(
signature
)
])
neighbours_with_frequency
=
dict
(
Counter
(
neighbours
))
for
index
,
frequency
in
neighbours_with_frequency
.
items
():
output
[
str
(
frequency
)].
append
(
index
)
...
...
@@ -208,7 +231,7 @@ def query():
raw_data
=
orjson
.
loads
(
request
.
data
)
window
=
raw_data
[
'window'
]
if
isinstance
(
window
,
int
):
output
=
np
.
load
(
'processed-data.npy'
)[
window
]
output
=
np
.
load
(
'processed-data.npy'
)[
window
:
window
+
12000
]
response
=
orjson
.
dumps
(
output
.
tolist
())
print
(
"Query done: "
+
str
(
time
()
-
t0
))
return
response
...
...
Flaskserver/processed-data.npy
View file @
2750c150
No preview for this file type
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment