Commit be2e7008 authored by Yuncong Yu's avatar Yuncong Yu
Browse files

Extract config files; refactor cache files; reformat code.

parent 8104396f
......@@ -7,8 +7,7 @@ __pycache__
*.pyd
*.pyo
*.obj
201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01.h5
201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01_compressed.h5
EGR/
Angu:arApp/prototype/node_modules
backend/cache/
clion-log.txt
......
import logging
import yaml
from pathlib import Path
from time import time, perf_counter
from typing import Dict, List
from typing import Any, Dict, List, Union
import numpy as np
import orjson
......@@ -12,21 +13,38 @@ from src import preprocessing
from src import pseudo
# Config
path_preprocessed_data_npy = 'cache/processed-data.npy'
reload = False
# Read config
def read_config(path_global_config_yml: Union[Path, str] = Path("../config.yml")) -> Dict[str, Any]:
with open(path_global_config_yml, "r") as fp:
config = yaml.load(fp, yaml.FullLoader)
# reload = False
reload = config["reload"]
path_config_yml = Path(config["path_config_yml"])
with open(path_config_yml, "r") as fp:
config = yaml.load(fp, yaml.FullLoader)
return config
config = read_config()
# path_preprocessed_data_npy = 'cache/preprocessed-data.npy'
path_data_hdf = Path(config["path_data_hdf"])
path_meta_json = Path(config["path_meta_json"])
path_preprocessed_data_npy = Path(config["dir_cache"]) / "preprocessed_data.npy"
channel_names = config["channels"]
dir_in_hdf = config["dir_in_hdf"]
logging.basicConfig(level=logging.INFO)
app = Flask(__name__)
CORS(app)
@app.route('/', methods=['GET'])
@app.route("/", methods=["GET"])
def index():
return "hi"
@app.route('/read-data', methods=['GET'])
@app.route("/read-data", methods=["GET"])
def read_data():
"""
Load raw data.
......@@ -43,19 +61,22 @@ def read_data():
"""
logging.info('Loading data ...')
logging.info("Loading data ...")
time_start = perf_counter()
# response = preprocessing.read_weather_data()
response = preprocessing.read_egr_data()
# response = preprocessing.read_egr_data()
response = preprocessing.read_data(
path_data_hdf=path_data_hdf, path_meta_json=path_meta_json, channel_names=channel_names, dir_in_hdf=dir_in_hdf
)
response = orjson.dumps(response)
logging.info(f'Completed loading data with {perf_counter() - time_start:.2f} second(s).')
logging.info(f"Completed loading data with {perf_counter() - time_start:.2f} second(s).")
return response
@app.route('/create-windows', methods=['POST'])
@app.route("/create-windows", methods=["POST"])
def create_windows():
"""
Creates windows to transform the local pattern search problem to time series indexing.
......@@ -73,21 +94,21 @@ def create_windows():
"""
logging.info('Creating window ...')
logging.info("Creating window ...")
time_start = perf_counter()
if not Path(path_preprocessed_data_npy).is_file():
raw_data = request.json
window_size = int(raw_data['parameters']["windowsize"])
window_size = int(raw_data["parameters"]["windowsize"])
# preprocessing.create_eeg_windows(window_size, 5)
preprocessing.create_egr_windows(window_size)
logging.info(f'Completed windows with {perf_counter() - time_start:.2f} second(s).')
logging.info(f"Completed windows with {perf_counter() - time_start:.2f} second(s).")
return '1'
return "1"
@app.route('/initialize', methods=['POST'])
@app.route("/initialize", methods=["POST"])
def initialize():
"""
Conduct the initial LSH.
......@@ -137,12 +158,12 @@ def initialize():
lsh_data = pseudo.lsh(data_windowized, query)
response = orjson.dumps(lsh_data)
logging.info(f'Completed the initial LSH with {perf_counter() - time_start:2f} second(s)')
logging.info(f"Completed the initial LSH with {perf_counter() - time_start:2f} second(s)")
return response
@app.route('/get-lsh-parameters', methods=['POST'])
@app.route("/get-lsh-parameters", methods=["POST"])
def get_lsh_parameters():
"""
Calculates LSH parameters based on the dataset
......@@ -163,11 +184,11 @@ def get_lsh_parameters():
parameters = pseudo.get_lsh_parameters(data, window_size)
response = orjson.dumps(parameters)
print('Parameter calculation done: ' + str(time() - t0))
print("Parameter calculation done: " + str(time() - t0))
return response
@app.route('/update', methods=['POST'])
@app.route("/update", methods=["POST"])
def update():
"""
Does LSH and returns a bunch of useful information
......@@ -203,11 +224,11 @@ def update():
lsh_data = pseudo.lsh(data, query, parameters=parameters, weights=weights)
response = orjson.dumps(lsh_data)
print('LSH done: ' + str(time() - t0))
print("LSH done: " + str(time() - t0))
return response
@app.route('/weights', methods=['POST'])
@app.route("/weights", methods=["POST"])
def weights():
"""
Calculates new weights for LSH algorithm
......@@ -234,7 +255,7 @@ def weights():
return response
@app.route('/query', methods=['POST'])
@app.route("/query", methods=["POST"])
def query():
"""
Calculate the query based on the given indices.
......@@ -251,9 +272,9 @@ def query():
raw_data = orjson.loads(request.data)
# print(raw_data)
start_index = raw_data['start_index']
query_size = raw_data['query_size']
window_indices = raw_data['indices']
start_index = raw_data["start_index"]
query_size = raw_data["query_size"]
window_indices = raw_data["indices"]
if start_index is not None:
# preprocessing.create_weather_windows(query_size)
......@@ -269,7 +290,7 @@ def query():
return response
@app.route('/window', methods=['POST'])
@app.route("/window", methods=["POST"])
def window():
"""
Returns values of windows on given indices
......@@ -282,7 +303,7 @@ def window():
"""
t0 = time()
raw_data = orjson.loads(request.data)
indices = raw_data['indices']
indices = raw_data["indices"]
output = np.load(path_preprocessed_data_npy)[indices]
......@@ -291,7 +312,7 @@ def window():
return response
@app.route('/table-info', methods=['POST'])
@app.route("/table-info", methods=["POST"])
def table_info():
"""
Returns additional information on given table
......@@ -311,7 +332,7 @@ def table_info():
"""
t0 = time()
raw_data = orjson.loads(request.data)
table = raw_data['table']
table = raw_data["table"]
data = np.load(path_preprocessed_data_npy)
response = pseudo.table_info(data, table)
......
import json
import logging
import os.path
from pathlib import Path
from typing import Union
from typing import Any, Dict, List, Union
# from libs import bigwig
# import bbi
......@@ -12,7 +13,7 @@ import tables
from sklearn.preprocessing import minmax_scale
logging.basicConfig(level=logging.INFO)
data_path = "cache/processed-data.npy"
processed_data_path = "cache/preprocessed-data.npy"
# def read_data():
......@@ -31,7 +32,7 @@ data_path = "cache/processed-data.npy"
def create_peax_windows_12kb(window_size):
data = bigwig.chunk("test.bigWig", 12000, int(12000 / window_size), int(12000 / 6), ["chr1"], verbose=True,)
data = np.reshape(data, (len(data), 1, len(data[0])))
np.save(data_path, data)
np.save(processed_data_path, data)
return "1"
......@@ -45,7 +46,7 @@ def create_peax_windows_12kb_mts(window_size):
data = np.concatenate((data, data2), axis=1)
data = np.concatenate((data, data3), axis=1)
np.save(data_path, data)
np.save(processed_data_path, data)
return "1"
......@@ -181,7 +182,7 @@ def read_egr_data():
# Config
path_data_original: Union[
Path, str
] = "../data/201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01_compressed.h5"
] = "../data/egr/201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01_compressed.h5"
channel_names = ["time", "ACM_Egrrate_demand_managed", "ACM_Egrrate_feedback_filt", "ACM_Egr_enable"]
# Load data
......@@ -203,9 +204,9 @@ def read_egr_data():
def create_egr_windows(window_size):
"""Create windows for EGR dataset."""
# Config
path_data_original_hdf = "../data/201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01_compressed.h5"
path_data_original_hdf = "../data/egr/201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01_compressed.h5"
path_data_cached_npy = f"cache/egr_cached_{window_size}.npy"
path_data_preprocessed_npy = f"cache/processed-data.npy"
path_data_preprocessed_npy = f"cache/preprocessed_data.npy"
# Created cached data
if not Path(path_data_cached_npy).is_file():
......@@ -217,3 +218,47 @@ def create_egr_windows(window_size):
np.save(path_data_preprocessed_npy, data)
return "1"
def read_data(path_data_hdf: Union[Path, str], path_meta_json: Union[Path, str], channel_names: List[str], dir_in_hdf: str) -> List[Dict[str, Any]]:
"""Read named channels in the given data file.
Parameters
----------
path_data_hdf : Path | str
Path of the HDF file with data values. Rows corresponds to time steps and columns to channels.
path_meta_json : Path | str
Path of the JSON file with meta information.
channel_names : List[str]
Names of channels.
Returns
-------
List[Dict]
Response with loaded data. It has the shape {"index": [0, 1, ..., n_time_steps-1], "values": 1D-array, "name": str}[].
"""
# Load data
logging.info(f"Loading data from {path_data_hdf}")
with tables.open_file(path_data_hdf) as fp:
data: np.ndarray = getattr(fp.root, dir_in_hdf)[:, :]
logging.info(f"Completed loading data with {data.shape[1] - 1} channels and {data.shape[0]} time steps.")
# Load channel names
with open(path_meta_json, "r") as fp:
meta = json.load(fp)
channel_names_in_file = ["time"] + meta["short_names"][1:]
# Sort channels
sorted_indices = [channel_names_in_file.index(channel_name) for channel_name in channel_names]
data = data[:, sorted_indices]
# Create response
response = [
{"index": list(range(0, len(data))), "values": channel.tolist(), "name": channel_name}
for channel, channel_name in zip(data.T, channel_names)
]
return response
# def create_windows()
......@@ -18,8 +18,8 @@ def get_lsh_parameters(data, window_size):
"""
if (not os.path.isfile('cache/parameters-' + str(window_size) + '.npy')):
parameters = preprocess(data)
np.save('cache/parameters-' + str(window_size), [float(parameters[0]), float(parameters[1]), float(parameters[2])])
return np.load('cache/parameters-' + str(window_size) + '.npy').tolist()
np.save('cache/parameters_' + str(window_size), [float(parameters[0]), float(parameters[1]), float(parameters[2])])
return np.load('cache/parameters_' + str(window_size) + '.npy').tolist()
def lsh(data, query, parameters=None, weights=None):
......@@ -269,7 +269,7 @@ def query(data, window_indices):
def debug_test_lsh():
data = np.load('cache/processed-data.npy')
data = np.load('cache/preprocessed_data.npy')
# data = np.repeat(data, repeats=7, axis=1)
print(data.shape)
data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
......
---
# Global configurations
reload: false
# Specific configuration file
path_config_yml: D:\Projects\pseudo\experiments\configs\config_egr.yml
\ No newline at end of file
---
dataset: egr
path_data_hdf: D:\Projects\pseudo\data\EGR\201207_IAVHeKu_212-SM-9221_WMA4ID41_DS18_TestV_10_EU5FM_800m_0C_freie_Fahrt_nrm_01_compressed.h5
path_meta_json: D:\Projects\pseudo\data\EGR\metadata.json
channels:
- time
- ACM_Egrrate_demand_managed\ETKC:1
- ACM_Egrrate_feedback_filt\ETKC:1
- ACM_Egr_enable\ETKC:1
dir_in_hdf: resampled
dir_cache: cache\
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment