Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
vig
Sublinear Algorithms for VA
pseudo
Commits
d746d449
Commit
d746d449
authored
Sep 24, 2020
by
Kruyff,D.L.W. (Dylan)
Browse files
Extracted lsh method from table creation
Former-commit-id:
95e90860
parent
1270bdb5
Changes
3
Hide whitespace changes
Inline
Side-by-side
Flaskserver/.idea/workspace.xml
View file @
d746d449
...
...
@@ -20,7 +20,6 @@
</component>
<component
name=
"ChangeListManager"
>
<list
default=
"true"
id=
"556080ba-825c-4b55-a92a-867a4df4fb32"
name=
"Default Changelist"
comment=
""
>
<change
beforePath=
"$PROJECT_DIR$/../AngularApp/prototype/src/app/cache.service.ts"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/../AngularApp/prototype/src/app/cache.service.ts"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/.idea/workspace.xml"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/.idea/workspace.xml"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/main.py"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/main.py"
afterDir=
"false"
/>
</list>
...
...
Flaskserver/__pycache__/main.cpython-38.pyc
View file @
d746d449
No preview for this file type
Flaskserver/main.py
View file @
d746d449
...
...
@@ -57,36 +57,130 @@ def create_windows():
@
app
.
route
(
'/create-tables'
,
methods
=
[
'POST'
])
def
create_tables
():
t0
=
time
()
data
=
np
.
load
(
'processed-data.npy'
)
raw_data
=
orjson
.
loads
(
request
.
data
)
window_size
=
int
(
raw_data
[
'parameters'
][
"windowsize"
])
hash_size
=
int
(
raw_data
[
'parameters'
][
"hashsize"
])
table_size
=
int
(
raw_data
[
'parameters'
][
"tablesize"
])
print
(
'Starting: '
+
str
(
time
()
-
t0
))
tables_hash_function
=
[
np
.
random
.
uniform
(
-
100
,
100
,
size
=
(
window_size
,
hash_size
))
for
_
in
range
(
table_size
)]
hash_functions
,
tables
=
lsh
(
data
,
window_size
,
hash_size
,
table_size
)
response
=
{}
for
table_index
in
range
(
table_size
):
response
[
str
(
table_index
)]
=
{
"hash"
:
hash_functions
[
table_index
],
"entries"
:
tables
[
table_index
]
}
response
=
orjson
.
dumps
(
response
)
return
response
def
lsh
(
data
,
window_size
,
hash_size
,
table_size
):
t0
=
time
()
print
(
'Starting: '
+
str
(
time
()
-
t0
))
tables_hash_function
=
[]
print
(
'Init time: '
+
str
(
time
()
-
t0
))
tables
=
[]
for
index
in
range
(
table_size
):
t1
=
time
()
table
=
defaultdict
(
list
)
signatures_bool
=
np
.
dot
(
data
,
tables_hash_function
[
index
])
>
0
signatures
=
[
''
.
join
([
'1'
if
x
else
'0'
for
x
in
lst
])
for
lst
in
signatures_bool
]
signatures
,
hash_function
=
calculate_signatures_random_weights
(
data
,
window_size
=
window_size
,
hash_size
=
hash_size
)
for
i
in
range
(
len
(
signatures
)):
table
[
signatures
[
i
]].
append
(
i
)
print
(
time
()
-
t1
)
tables
.
append
(
table
)
tables_hash_function
.
append
(
hash_function
.
tolist
())
print
(
time
()
-
t1
)
print
(
'Creation time: '
+
str
(
time
()
-
t0
))
hash_functions
=
np
.
array
(
tables_hash_function
).
tolist
()
hash_functions
=
tables_hash_function
return
hash_functions
,
tables
def
calculate_signatures_random_weights
(
data
,
window_size
=
None
,
hash_size
=
None
,
hash_function
=
None
):
if
hash_function
is
None
:
hash_function
=
np
.
random
.
uniform
(
-
100
,
100
,
size
=
(
window_size
,
hash_size
))
signatures_bool
=
np
.
dot
(
data
,
hash_function
)
>
0
if
signatures_bool
.
ndim
==
1
:
return
''
.
join
([
'1'
if
x
else
'0'
for
x
in
signatures_bool
])
return
[
''
.
join
([
'1'
if
x
else
'0'
for
x
in
lst
])
for
lst
in
signatures_bool
],
hash_function
@
app
.
route
(
'/similarity'
,
methods
=
[
'POST'
])
def
similarity
():
t0
=
time
()
raw_data
=
orjson
.
loads
(
request
.
data
)
window
=
raw_data
[
'query'
]
tables
=
raw_data
[
"tables"
]
neighbours
=
[]
output
=
defaultdict
(
list
)
for
t
in
tables
.
values
():
signature
=
calculate_signatures_random_weights
(
window
,
hash_function
=
t
[
"hash"
])
neighbours
.
extend
(
t
[
"entries"
][
signature
])
neighbours_with_frequency
=
dict
(
Counter
(
neighbours
))
for
index
,
frequency
in
neighbours_with_frequency
.
items
():
output
[
str
(
frequency
)].
append
(
index
)
response
=
orjson
.
dumps
(
output
)
print
(
"Similarity done: "
+
str
(
time
()
-
t0
))
return
response
@
app
.
route
(
'/update'
,
methods
=
[
'POST'
])
def
update
():
t0
=
time
()
raw_data
=
orjson
.
loads
(
request
.
data
)
data
=
np
.
load
(
'processed-data.npy'
)
label_data
=
raw_data
[
"labelData"
]
tables
=
raw_data
[
"tables"
]
window
=
raw_data
[
"query"
]
window_size
=
int
(
raw_data
[
'parameters'
][
"windowsize"
])
hash_size
=
int
(
raw_data
[
'parameters'
][
"hashsize"
])
table_size
=
int
(
raw_data
[
'parameters'
][
"tablesize"
])
new_tables
=
[]
correct_indices
=
[
int
(
index
)
for
index
,
value
in
label_data
.
items
()
if
value
is
True
]
incorrect_indices
=
[
int
(
index
)
for
index
,
value
in
label_data
.
items
()
if
value
is
False
]
for
t
in
tables
.
values
():
valid
=
True
signature
=
calculate_signatures_random_weights
(
window
,
hash_function
=
t
[
'hash'
])
neighbours
=
t
[
"entries"
][
signature
]
for
index
in
correct_indices
:
if
index
not
in
neighbours
:
valid
=
False
break
for
index
in
incorrect_indices
:
if
index
in
neighbours
:
valid
=
False
break
if
valid
:
new_tables
.
append
(
t
)
for
index
in
range
(
table_size
-
len
(
new_tables
)):
entries
=
defaultdict
(
list
)
t1
=
time
()
while
True
:
correct_signatures
,
hash_function
=
calculate_signatures_random_weights
(
data
[
correct_indices
],
window_size
=
window_size
,
hash_size
=
hash_size
)
incorrect_signatures
,
_
=
calculate_signatures_random_weights
(
data
[
incorrect_indices
],
hash_function
=
hash_function
)
if
correct_signatures
.
count
(
correct_signatures
[
0
])
==
len
(
correct_signatures
)
and
incorrect_signatures
.
count
(
correct_signatures
[
0
])
==
0
:
break
signatures
,
_
=
calculate_signatures_random_weights
(
data
,
hash_function
=
hash_function
)
for
i
in
range
(
len
(
signatures
)):
entries
[
signatures
[
i
]].
append
(
i
)
print
(
str
(
index
)
+
": "
+
str
(
time
()
-
t1
))
new_tables
.
append
({
"hash"
:
hash_function
.
tolist
(),
"entries"
:
entries
})
print
(
'Update time: '
+
str
(
time
()
-
t0
))
response
=
{}
for
table_index
in
range
(
table_size
):
response
[
str
(
table_index
)
]
=
{
"hash"
:
hash_function
s
[
table_index
],
"entries"
:
tables
[
table_index
]
for
table_index
in
range
(
len
(
new_tables
)
):
response
[
table_index
]
=
{
"hash"
:
new_table
s
[
table_index
]
[
"hash"
]
,
"entries"
:
new_
tables
[
table_index
]
[
"entries"
]
}
response
=
or
json
.
dumps
(
response
)
response
=
json
ify
(
response
)
return
response
@
app
.
route
(
'/query'
,
methods
=
[
'POST'
])
...
...
@@ -115,27 +209,6 @@ def window():
print
(
"Query done: "
+
str
(
time
()
-
t0
))
return
response
@
app
.
route
(
'/similarity'
,
methods
=
[
'POST'
])
def
similarity
():
t0
=
time
()
raw_data
=
orjson
.
loads
(
request
.
data
)
window
=
raw_data
[
'query'
]
tables
=
raw_data
[
"tables"
]
neighbours
=
[]
output
=
defaultdict
(
list
)
for
t
in
tables
.
values
():
signature_bool
=
np
.
dot
(
window
,
t
[
"hash"
])
>
0
signature
=
''
.
join
([
'1'
if
x
else
'0'
for
x
in
signature_bool
])
neighbours
.
extend
(
t
[
"entries"
][
signature
])
neighbours_with_frequency
=
dict
(
Counter
(
neighbours
))
for
index
,
frequency
in
neighbours_with_frequency
.
items
():
output
[
str
(
frequency
)].
append
(
index
)
response
=
orjson
.
dumps
(
output
)
print
(
"Similarity done: "
+
str
(
time
()
-
t0
))
return
response
@
app
.
route
(
'/average-progress'
,
methods
=
[
'POST'
])
def
average_progress
():
t0
=
time
()
...
...
@@ -192,76 +265,4 @@ def average_table():
print
(
"Average calculated: "
+
str
(
time
()
-
t1
))
response
=
orjson
.
dumps
(
output
)
print
(
"Averages calculated: "
+
str
(
time
()
-
t0
))
return
response
@
app
.
route
(
'/update'
,
methods
=
[
'POST'
])
def
update
():
t0
=
time
()
print
(
"Start"
)
raw_data
=
orjson
.
loads
(
request
.
data
)
print
(
"Data loaded: "
+
str
(
time
()
-
t0
))
data
=
np
.
load
(
'processed-data.npy'
)
label_data
=
raw_data
[
"labelData"
]
tables
=
raw_data
[
"tables"
]
window
=
raw_data
[
"query"
]
window_size
=
int
(
raw_data
[
'parameters'
][
"windowsize"
])
hash_size
=
int
(
raw_data
[
'parameters'
][
"hashsize"
])
table_size
=
int
(
raw_data
[
'parameters'
][
"tablesize"
])
new_tables
=
[]
correct_indices
=
[
int
(
index
)
for
index
,
value
in
label_data
.
items
()
if
value
is
True
]
incorrect_indices
=
[
int
(
index
)
for
index
,
value
in
label_data
.
items
()
if
value
is
False
]
print
(
"Initialized: "
+
str
(
time
()
-
t0
))
for
t
in
tables
.
values
():
valid
=
True
signature
=
''
.
join
((
np
.
dot
(
window
,
t
[
"hash"
])
>
0
).
astype
(
'int'
).
astype
(
'str'
))
neighbours
=
t
[
"entries"
][
signature
]
for
index
in
correct_indices
:
if
index
not
in
neighbours
:
valid
=
False
break
for
index
in
incorrect_indices
:
if
index
in
neighbours
:
valid
=
False
break
if
valid
:
new_tables
.
append
(
t
)
print
(
"Filtered good tables: "
+
str
(
time
()
-
t0
))
for
index
in
range
(
table_size
-
len
(
new_tables
)):
entries
=
defaultdict
(
list
)
t1
=
time
()
while
True
:
hash_function
=
np
.
random
.
randn
(
window_size
,
hash_size
)
correct_signatures
=
[
''
.
join
((
np
.
dot
(
data
[
i
],
hash_function
)
>
0
).
astype
(
'int'
).
astype
(
'str'
))
for
i
in
correct_indices
]
incorrect_signatures
=
[
''
.
join
((
np
.
dot
(
data
[
i
],
hash_function
)
>
0
).
astype
(
'int'
).
astype
(
'str'
))
for
i
in
incorrect_indices
]
if
correct_signatures
.
count
(
correct_signatures
[
0
])
==
len
(
correct_signatures
)
and
incorrect_signatures
.
count
(
correct_signatures
[
0
])
==
0
:
break
print
(
"first: "
+
str
(
time
()
-
t1
))
t2
=
time
()
signatures_bool
=
np
.
dot
(
data
,
hash_function
)
>
0
signatures
=
[
''
.
join
([
'1'
if
x
else
'0'
for
x
in
lst
])
for
lst
in
signatures_bool
]
for
i
in
range
(
len
(
signatures
)):
entries
[
signatures
[
i
]].
append
(
i
)
print
(
"second: "
+
str
(
time
()
-
t2
))
new_tables
.
append
({
"hash"
:
hash_function
.
tolist
(),
"entries"
:
entries
})
print
(
'Update time: '
+
str
(
time
()
-
t0
))
response
=
{}
for
table_index
in
range
(
len
(
new_tables
)):
response
[
table_index
]
=
{
"hash"
:
new_tables
[
table_index
][
"hash"
],
"entries"
:
new_tables
[
table_index
][
"entries"
]
}
response
=
jsonify
(
response
)
return
response
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment