Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
vig
Sublinear Algorithms for VA
pseudo
Commits
be2e7008
Commit
be2e7008
authored
Jul 27, 2021
by
Yuncong Yu
Browse files
Extract config files; refactor cache files; reformat code.
parent
8104396f
Changes
6
Hide whitespace changes
Inline
Side-by-side
.gitignore
View file @
be2e7008
...
...
@@ -7,8 +7,7 @@ __pycache__
*.pyd
*.pyo
*.obj
201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01.h5
201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01_compressed.h5
EGR/
Angu:arApp/prototype/node_modules
backend/cache/
clion-log.txt
...
...
backend/src/main.py
View file @
be2e7008
import
logging
import
yaml
from
pathlib
import
Path
from
time
import
time
,
perf_counter
from
typing
import
Dict
,
List
from
typing
import
Any
,
Dict
,
List
,
Union
import
numpy
as
np
import
orjson
...
...
@@ -12,21 +13,38 @@ from src import preprocessing
from
src
import
pseudo
# Config
path_preprocessed_data_npy
=
'cache/processed-data.npy'
reload
=
False
# Read config
def
read_config
(
path_global_config_yml
:
Union
[
Path
,
str
]
=
Path
(
"../config.yml"
))
->
Dict
[
str
,
Any
]:
with
open
(
path_global_config_yml
,
"r"
)
as
fp
:
config
=
yaml
.
load
(
fp
,
yaml
.
FullLoader
)
# reload = False
reload
=
config
[
"reload"
]
path_config_yml
=
Path
(
config
[
"path_config_yml"
])
with
open
(
path_config_yml
,
"r"
)
as
fp
:
config
=
yaml
.
load
(
fp
,
yaml
.
FullLoader
)
return
config
config
=
read_config
()
# path_preprocessed_data_npy = 'cache/preprocessed-data.npy'
path_data_hdf
=
Path
(
config
[
"path_data_hdf"
])
path_meta_json
=
Path
(
config
[
"path_meta_json"
])
path_preprocessed_data_npy
=
Path
(
config
[
"dir_cache"
])
/
"preprocessed_data.npy"
channel_names
=
config
[
"channels"
]
dir_in_hdf
=
config
[
"dir_in_hdf"
]
logging
.
basicConfig
(
level
=
logging
.
INFO
)
app
=
Flask
(
__name__
)
CORS
(
app
)
@
app
.
route
(
'/'
,
methods
=
[
'
GET
'
])
@
app
.
route
(
"/"
,
methods
=
[
"
GET
"
])
def
index
():
return
"hi"
@
app
.
route
(
'
/read-data
'
,
methods
=
[
'
GET
'
])
@
app
.
route
(
"
/read-data
"
,
methods
=
[
"
GET
"
])
def
read_data
():
"""
Load raw data.
...
...
@@ -43,19 +61,22 @@ def read_data():
"""
logging
.
info
(
'
Loading data ...
'
)
logging
.
info
(
"
Loading data ...
"
)
time_start
=
perf_counter
()
# response = preprocessing.read_weather_data()
response
=
preprocessing
.
read_egr_data
()
# response = preprocessing.read_egr_data()
response
=
preprocessing
.
read_data
(
path_data_hdf
=
path_data_hdf
,
path_meta_json
=
path_meta_json
,
channel_names
=
channel_names
,
dir_in_hdf
=
dir_in_hdf
)
response
=
orjson
.
dumps
(
response
)
logging
.
info
(
f
'
Completed loading data with
{
perf_counter
()
-
time_start
:
.
2
f
}
second(s).
'
)
logging
.
info
(
f
"
Completed loading data with
{
perf_counter
()
-
time_start
:
.
2
f
}
second(s).
"
)
return
response
@
app
.
route
(
'
/create-windows
'
,
methods
=
[
'
POST
'
])
@
app
.
route
(
"
/create-windows
"
,
methods
=
[
"
POST
"
])
def
create_windows
():
"""
Creates windows to transform the local pattern search problem to time series indexing.
...
...
@@ -73,21 +94,21 @@ def create_windows():
"""
logging
.
info
(
'
Creating window ...
'
)
logging
.
info
(
"
Creating window ...
"
)
time_start
=
perf_counter
()
if
not
Path
(
path_preprocessed_data_npy
).
is_file
():
raw_data
=
request
.
json
window_size
=
int
(
raw_data
[
'
parameters
'
][
"windowsize"
])
window_size
=
int
(
raw_data
[
"
parameters
"
][
"windowsize"
])
# preprocessing.create_eeg_windows(window_size, 5)
preprocessing
.
create_egr_windows
(
window_size
)
logging
.
info
(
f
'
Completed windows with
{
perf_counter
()
-
time_start
:
.
2
f
}
second(s).
'
)
logging
.
info
(
f
"
Completed windows with
{
perf_counter
()
-
time_start
:
.
2
f
}
second(s).
"
)
return
'1'
return
"1"
@
app
.
route
(
'
/initialize
'
,
methods
=
[
'
POST
'
])
@
app
.
route
(
"
/initialize
"
,
methods
=
[
"
POST
"
])
def
initialize
():
"""
Conduct the initial LSH.
...
...
@@ -137,12 +158,12 @@ def initialize():
lsh_data
=
pseudo
.
lsh
(
data_windowized
,
query
)
response
=
orjson
.
dumps
(
lsh_data
)
logging
.
info
(
f
'
Completed the initial LSH with
{
perf_counter
()
-
time_start
:
2
f
}
second(s)
'
)
logging
.
info
(
f
"
Completed the initial LSH with
{
perf_counter
()
-
time_start
:
2
f
}
second(s)
"
)
return
response
@
app
.
route
(
'
/get-lsh-parameters
'
,
methods
=
[
'
POST
'
])
@
app
.
route
(
"
/get-lsh-parameters
"
,
methods
=
[
"
POST
"
])
def
get_lsh_parameters
():
"""
Calculates LSH parameters based on the dataset
...
...
@@ -163,11 +184,11 @@ def get_lsh_parameters():
parameters
=
pseudo
.
get_lsh_parameters
(
data
,
window_size
)
response
=
orjson
.
dumps
(
parameters
)
print
(
'
Parameter calculation done:
'
+
str
(
time
()
-
t0
))
print
(
"
Parameter calculation done:
"
+
str
(
time
()
-
t0
))
return
response
@
app
.
route
(
'
/update
'
,
methods
=
[
'
POST
'
])
@
app
.
route
(
"
/update
"
,
methods
=
[
"
POST
"
])
def
update
():
"""
Does LSH and returns a bunch of useful information
...
...
@@ -203,11 +224,11 @@ def update():
lsh_data
=
pseudo
.
lsh
(
data
,
query
,
parameters
=
parameters
,
weights
=
weights
)
response
=
orjson
.
dumps
(
lsh_data
)
print
(
'
LSH done:
'
+
str
(
time
()
-
t0
))
print
(
"
LSH done:
"
+
str
(
time
()
-
t0
))
return
response
@
app
.
route
(
'
/weights
'
,
methods
=
[
'
POST
'
])
@
app
.
route
(
"
/weights
"
,
methods
=
[
"
POST
"
])
def
weights
():
"""
Calculates new weights for LSH algorithm
...
...
@@ -234,7 +255,7 @@ def weights():
return
response
@
app
.
route
(
'
/query
'
,
methods
=
[
'
POST
'
])
@
app
.
route
(
"
/query
"
,
methods
=
[
"
POST
"
])
def
query
():
"""
Calculate the query based on the given indices.
...
...
@@ -251,9 +272,9 @@ def query():
raw_data
=
orjson
.
loads
(
request
.
data
)
# print(raw_data)
start_index
=
raw_data
[
'
start_index
'
]
query_size
=
raw_data
[
'
query_size
'
]
window_indices
=
raw_data
[
'
indices
'
]
start_index
=
raw_data
[
"
start_index
"
]
query_size
=
raw_data
[
"
query_size
"
]
window_indices
=
raw_data
[
"
indices
"
]
if
start_index
is
not
None
:
# preprocessing.create_weather_windows(query_size)
...
...
@@ -269,7 +290,7 @@ def query():
return
response
@
app
.
route
(
'
/window
'
,
methods
=
[
'
POST
'
])
@
app
.
route
(
"
/window
"
,
methods
=
[
"
POST
"
])
def
window
():
"""
Returns values of windows on given indices
...
...
@@ -282,7 +303,7 @@ def window():
"""
t0
=
time
()
raw_data
=
orjson
.
loads
(
request
.
data
)
indices
=
raw_data
[
'
indices
'
]
indices
=
raw_data
[
"
indices
"
]
output
=
np
.
load
(
path_preprocessed_data_npy
)[
indices
]
...
...
@@ -291,7 +312,7 @@ def window():
return
response
@
app
.
route
(
'
/table-info
'
,
methods
=
[
'
POST
'
])
@
app
.
route
(
"
/table-info
"
,
methods
=
[
"
POST
"
])
def
table_info
():
"""
Returns additional information on given table
...
...
@@ -311,7 +332,7 @@ def table_info():
"""
t0
=
time
()
raw_data
=
orjson
.
loads
(
request
.
data
)
table
=
raw_data
[
'
table
'
]
table
=
raw_data
[
"
table
"
]
data
=
np
.
load
(
path_preprocessed_data_npy
)
response
=
pseudo
.
table_info
(
data
,
table
)
...
...
backend/src/preprocessing.py
View file @
be2e7008
import
json
import
logging
import
os.path
from
pathlib
import
Path
from
typing
import
Union
from
typing
import
Any
,
Dict
,
List
,
Union
# from libs import bigwig
# import bbi
...
...
@@ -12,7 +13,7 @@ import tables
from
sklearn.preprocessing
import
minmax_scale
logging
.
basicConfig
(
level
=
logging
.
INFO
)
data_path
=
"cache/processed-data.npy"
processed_
data_path
=
"cache/
pre
processed-data.npy"
# def read_data():
...
...
@@ -31,7 +32,7 @@ data_path = "cache/processed-data.npy"
def
create_peax_windows_12kb
(
window_size
):
data
=
bigwig
.
chunk
(
"test.bigWig"
,
12000
,
int
(
12000
/
window_size
),
int
(
12000
/
6
),
[
"chr1"
],
verbose
=
True
,)
data
=
np
.
reshape
(
data
,
(
len
(
data
),
1
,
len
(
data
[
0
])))
np
.
save
(
data_path
,
data
)
np
.
save
(
processed_
data_path
,
data
)
return
"1"
...
...
@@ -45,7 +46,7 @@ def create_peax_windows_12kb_mts(window_size):
data
=
np
.
concatenate
((
data
,
data2
),
axis
=
1
)
data
=
np
.
concatenate
((
data
,
data3
),
axis
=
1
)
np
.
save
(
data_path
,
data
)
np
.
save
(
processed_
data_path
,
data
)
return
"1"
...
...
@@ -181,7 +182,7 @@ def read_egr_data():
# Config
path_data_original
:
Union
[
Path
,
str
]
=
"../data/201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01_compressed.h5"
]
=
"../data/
egr/
201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01_compressed.h5"
channel_names
=
[
"time"
,
"ACM_Egrrate_demand_managed"
,
"ACM_Egrrate_feedback_filt"
,
"ACM_Egr_enable"
]
# Load data
...
...
@@ -203,9 +204,9 @@ def read_egr_data():
def
create_egr_windows
(
window_size
):
"""Create windows for EGR dataset."""
# Config
path_data_original_hdf
=
"../data/201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01_compressed.h5"
path_data_original_hdf
=
"../data/
egr/
201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01_compressed.h5"
path_data_cached_npy
=
f
"cache/egr_cached_
{
window_size
}
.npy"
path_data_preprocessed_npy
=
f
"cache/processed
-
data.npy"
path_data_preprocessed_npy
=
f
"cache/
pre
processed
_
data.npy"
# Created cached data
if
not
Path
(
path_data_cached_npy
).
is_file
():
...
...
@@ -217,3 +218,47 @@ def create_egr_windows(window_size):
np
.
save
(
path_data_preprocessed_npy
,
data
)
return
"1"
def
read_data
(
path_data_hdf
:
Union
[
Path
,
str
],
path_meta_json
:
Union
[
Path
,
str
],
channel_names
:
List
[
str
],
dir_in_hdf
:
str
)
->
List
[
Dict
[
str
,
Any
]]:
"""Read named channels in the given data file.
Parameters
----------
path_data_hdf : Path | str
Path of the HDF file with data values. Rows corresponds to time steps and columns to channels.
path_meta_json : Path | str
Path of the JSON file with meta information.
channel_names : List[str]
Names of channels.
Returns
-------
List[Dict]
Response with loaded data. It has the shape {"index": [0, 1, ..., n_time_steps-1], "values": 1D-array, "name": str}[].
"""
# Load data
logging
.
info
(
f
"Loading data from
{
path_data_hdf
}
"
)
with
tables
.
open_file
(
path_data_hdf
)
as
fp
:
data
:
np
.
ndarray
=
getattr
(
fp
.
root
,
dir_in_hdf
)[:,
:]
logging
.
info
(
f
"Completed loading data with
{
data
.
shape
[
1
]
-
1
}
channels and
{
data
.
shape
[
0
]
}
time steps."
)
# Load channel names
with
open
(
path_meta_json
,
"r"
)
as
fp
:
meta
=
json
.
load
(
fp
)
channel_names_in_file
=
[
"time"
]
+
meta
[
"short_names"
][
1
:]
# Sort channels
sorted_indices
=
[
channel_names_in_file
.
index
(
channel_name
)
for
channel_name
in
channel_names
]
data
=
data
[:,
sorted_indices
]
# Create response
response
=
[
{
"index"
:
list
(
range
(
0
,
len
(
data
))),
"values"
:
channel
.
tolist
(),
"name"
:
channel_name
}
for
channel
,
channel_name
in
zip
(
data
.
T
,
channel_names
)
]
return
response
# def create_windows()
backend/src/pseudo.py
View file @
be2e7008
...
...
@@ -18,8 +18,8 @@ def get_lsh_parameters(data, window_size):
"""
if
(
not
os
.
path
.
isfile
(
'cache/parameters-'
+
str
(
window_size
)
+
'.npy'
)):
parameters
=
preprocess
(
data
)
np
.
save
(
'cache/parameters
-
'
+
str
(
window_size
),
[
float
(
parameters
[
0
]),
float
(
parameters
[
1
]),
float
(
parameters
[
2
])])
return
np
.
load
(
'cache/parameters
-
'
+
str
(
window_size
)
+
'.npy'
).
tolist
()
np
.
save
(
'cache/parameters
_
'
+
str
(
window_size
),
[
float
(
parameters
[
0
]),
float
(
parameters
[
1
]),
float
(
parameters
[
2
])])
return
np
.
load
(
'cache/parameters
_
'
+
str
(
window_size
)
+
'.npy'
).
tolist
()
def
lsh
(
data
,
query
,
parameters
=
None
,
weights
=
None
):
...
...
@@ -269,7 +269,7 @@ def query(data, window_indices):
def
debug_test_lsh
():
data
=
np
.
load
(
'cache/processed
-
data.npy'
)
data
=
np
.
load
(
'cache/
pre
processed
_
data.npy'
)
# data = np.repeat(data, repeats=7, axis=1)
print
(
data
.
shape
)
data
=
np
.
reshape
(
data
,
(
len
(
data
),
len
(
data
[
0
][
0
]),
len
(
data
[
0
])))
...
...
config.yml
0 → 100644
View file @
be2e7008
---
# Global configurations
reload
:
false
# Specific configuration file
path_config_yml
:
D:\Projects\pseudo\experiments\configs\config_egr.yml
\ No newline at end of file
experiments/configs/config_egr.yml
0 → 100644
View file @
be2e7008
---
dataset
:
egr
path_data_hdf
:
D:\Projects\pseudo\data\EGR\201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01_compressed.h5
path_meta_json
:
D:\Projects\pseudo\data\EGR\metadata.json
channels
:
-
time
-
ACM_Egrrate_demand_managed\ETKC:1
-
ACM_Egrrate_feedback_filt\ETKC:1
-
ACM_Egr_enable\ETKC:1
dir_in_hdf
:
resampled
dir_cache
:
cache\
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment