Commit 0bc4db2d authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Roughly stable version for demo


Former-commit-id: 33ad2457
parent 4b03da69

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.
...@@ -8,7 +8,7 @@ export interface RawData { ...@@ -8,7 +8,7 @@ export interface RawData {
export interface LshData { export interface LshData {
candidates: number[]; candidates: number[];
distances: number[]; distances: number[];
hash_functions: number[][][][]; hash_functions: number[];// number[][][][];
parameters?: number[]; parameters?: number[];
} }
......
...@@ -44,7 +44,9 @@ export class LabelingWindowComponent implements OnInit { ...@@ -44,7 +44,9 @@ export class LabelingWindowComponent implements OnInit {
} }
async showSamples() { async showSamples() {
this.topk = this.state.lshData.candidates.slice(0, this.k); this.topk = this.state.lshData.candidates
.filter((candidate) => this.state.labels[candidate] !== true)
.slice(0, this.k);
this.subplots = []; this.subplots = [];
const values = await this.state.getWindow(this.topk); const values = await this.state.getWindow(this.topk);
this.topk.forEach((index, i) => { this.topk.forEach((index, i) => {
......
...@@ -125,6 +125,8 @@ export class ProgressViewComponent implements OnInit { ...@@ -125,6 +125,8 @@ export class ProgressViewComponent implements OnInit {
const width = +svg.attr('width'); const width = +svg.attr('width');
const height = +svg.attr('height'); const height = +svg.attr('height');
svg.selectAll('*').remove();
const simulation = d3.forceSimulation() const simulation = d3.forceSimulation()
.force('link', d3.forceLink().id((d: any) => d.id)) .force('link', d3.forceLink().id((d: any) => d.id))
.force('charge', d3.forceManyBody().strength(100)) // Gravity force .force('charge', d3.forceManyBody().strength(100)) // Gravity force
......
...@@ -86,7 +86,7 @@ export class StateService { ...@@ -86,7 +86,7 @@ export class StateService {
} }
public createTable() { public createTable() {
const indices: number[] = this.lshData.distances.map((x) => x > 500 ? 100 : Math.floor(x / 5)); const indices: number[] = this.lshData.distances.map((x) => x > 500 ? 100 : Math.floor(x / 10));
const table = {}; const table = {};
this.lshData.candidates.forEach((candidate, index) => { this.lshData.candidates.forEach((candidate, index) => {
if (table[indices[index]] === undefined) if (table[indices[index]] === undefined)
......
This diff is collapsed.
...@@ -17,7 +17,7 @@ import cooler ...@@ -17,7 +17,7 @@ import cooler
import numpy as np import numpy as np
import os import os
import pandas as pd import pandas as pd
import utils from sklearn.preprocessing import MinMaxScaler
TILE_SIZE = 1024 TILE_SIZE = 1024
...@@ -26,6 +26,13 @@ TILESET_INFO = {"filetype": "bigwig", "datatype": "vector"} ...@@ -26,6 +26,13 @@ TILESET_INFO = {"filetype": "bigwig", "datatype": "vector"}
FILE_EXT = {"bigwig", "bw"} FILE_EXT = {"bigwig", "bw"}
def normalize_data(data, percentile: float = 99.9):
cutoff = np.percentile(data, (0, percentile))
data_norm = np.copy(data)
data_norm[np.where(data_norm < cutoff[0])] = cutoff[0]
data_norm[np.where(data_norm > cutoff[1])] = cutoff[1]
return MinMaxScaler().fit_transform(data_norm)
def is_bigwig(filepath=None, filetype=None): def is_bigwig(filepath=None, filetype=None):
if filepath is None: if filepath is None:
...@@ -286,7 +293,7 @@ def chunk( ...@@ -286,7 +293,7 @@ def chunk(
) )
if normalize: if normalize:
values[start:end] = utils.normalize( values[start:end] = normalize_data(
values[start:end], percentile=percentile values[start:end], percentile=percentile
) )
......
...@@ -81,7 +81,7 @@ def initialize(): ...@@ -81,7 +81,7 @@ def initialize():
query = np.reshape(query, (len(query), 1)) query = np.reshape(query, (len(query), 1))
# query = np.repeat(query, repeats=1, axis=1) # query = np.repeat(query, repeats=1, axis=1)
r, a, sd = preprocess() r, a, sd = preprocess(data)
candidates, distances, hf = _lsh.lsh(data, query, r, a, sd) candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
response = { response = {
...@@ -168,12 +168,12 @@ def table_info(): ...@@ -168,12 +168,12 @@ def table_info():
print("Averages calculated: " + str(time() - t0)) print("Averages calculated: " + str(time() - t0))
return response return response
def preprocess(): def preprocess(data):
return 0.10882589134534404, 3.1202154563478928, 0.9705780396843037 # return 0.10882589134534404, 3.1202154563478928, 0.9705780396843037
data = np.load('processed-data.npy') # data = np.load('processed-data.npy')
data = np.array(data, dtype='double') data = np.array(data, dtype='double')
data = np.reshape(data, (int(len(data) / 1), 1, len(data[0]))) # data = np.reshape(data, (int(len(data) / 1), 1, len(data[0])))
data = np.repeat(data, repeats=1, axis=1) # data = np.repeat(data, repeats=1, axis=1)
subset = [] subset = []
t0 = time() t0 = time()
...@@ -200,7 +200,8 @@ def preprocess(): ...@@ -200,7 +200,8 @@ def preprocess():
continue continue
e = np.linalg.norm(data[index_1] - data[index_2]) e = np.linalg.norm(data[index_1] - data[index_2])
eq_distances.append(e) eq_distances.append(e)
d = dtw.dtw(data[index_1], data[index_2], dist_method="Euclidean", window_type="sakoechiba", window_args={"window_size": 120}).distance d = _ucrdtw.ucrdtw(data[index_1], data[index_2], 0.05, False)[1]
# d = dtw.dtw(data[index_1], data[index_2], dist_method="Euclidean", window_type="sakoechiba", window_args={"window_size": 120}).distance
dtw_distances.append(d) dtw_distances.append(d)
ratios = np.array(dtw_distances)/np.array(eq_distances) ratios = np.array(dtw_distances)/np.array(eq_distances)
...@@ -214,20 +215,20 @@ def preprocess(): ...@@ -214,20 +215,20 @@ def preprocess():
# theta = mean_eq + -2.58 * sd_eq # theta = mean_eq + -2.58 * sd_eq
r = theta / ((a-sd)*math.sqrt(120)) r = theta / ((a-sd)*math.sqrt(120))
# r = theta / (math.sqrt(120)) # r = theta / (math.sqrt(120))
print('Mean: ' + mean_dtw) print('Mean: ' + str(mean_dtw))
print('Stdev: ' + sd_dtw) print('Stdev: ' + str(sd_dtw))
print('Ratio mean: ' + a) print('Ratio mean: ' + str(a))
print('Ratio stdev: ' + sd) print('Ratio stdev: ' + str(sd))
print('Theta: ' + theta) print('Theta: ' + str(theta))
print('r: ' + r) print('r: ' + str(r))
print('Preprocessing time: ' + str(time() - t0)) print('Preprocessing time: ' + str(time() - t0))
return r, a, sd return r, a, sd
def debug_test_lsh(): def debug_test_lsh():
r, a, sd = preprocess() data = np.load('processed-data.npy')
r, a, sd = preprocess(data)
create_windows() create_windows()
query_n = 80503 query_n = 80503
data = np.load('processed-data.npy')
query = data[query_n] # performDBA(data[[80503, 11514]]) query = data[query_n] # performDBA(data[[80503, 11514]])
query = np.reshape(query, (len(data[0]), 1)) query = np.reshape(query, (len(data[0]), 1))
data= np.array(data, dtype='double') data= np.array(data, dtype='double')
...@@ -256,4 +257,4 @@ def debug_test_lsh(): ...@@ -256,4 +257,4 @@ def debug_test_lsh():
accuracy += 1 accuracy += 1
print(accuracy) print(accuracy)
debug_test_lsh() # debug_test_lsh()
\ No newline at end of file \ No newline at end of file
...@@ -11,6 +11,7 @@ dependencies: ...@@ -11,6 +11,7 @@ dependencies:
- flask_cors - flask_cors
- orjson - orjson
- dask[dataframe] - dask[dataframe]
- jupyter
- pip - pip
- pip: - pip:
- pybbi - pybbi
......
This diff is collapsed.
'''
/*******************************************************************************
* Copyright (C) 2018 Francois Petitjean
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
'''
from __future__ import division
import numpy as np
from functools import reduce
__author__ ="Francois Petitjean"
def performDBA(series, n_iterations=10):
n_series = len(series)
max_length = reduce(max, map(len, series))
cost_mat = np.zeros((max_length, max_length))
delta_mat = np.zeros((max_length, max_length))
path_mat = np.zeros((max_length, max_length), dtype=np.int8)
medoid_ind = approximate_medoid_index(series,cost_mat,delta_mat)
center = series[medoid_ind]
for i in range(0,n_iterations):
center = DBA_update(center, series, cost_mat, path_mat, delta_mat)
return center
def approximate_medoid_index(series,cost_mat,delta_mat):
if len(series)<=50:
indices = range(0,len(series))
else:
indices = np.random.choice(range(0,len(series)),50,replace=False)
medoid_ind = -1
best_ss = 1e20
for index_candidate in indices:
candidate = series[index_candidate]
ss = sum_of_squares(candidate,series,cost_mat,delta_mat)
if(medoid_ind==-1 or ss<best_ss):
best_ss = ss
medoid_ind = index_candidate
return medoid_ind
def sum_of_squares(s,series,cost_mat,delta_mat):
return sum(map(lambda t:squared_DTW(s,t,cost_mat,delta_mat),series))
def DTW(s,t,cost_mat,delta_mat):
return np.sqrt(squared_DTW(s,t,cost_mat,delta_mat))
def squared_DTW(s,t,cost_mat,delta_mat):
s_len = len(s)
t_len = len(t)
length = len(s)
fill_delta_mat_dtw(s, t, delta_mat)
cost_mat[0, 0] = delta_mat[0, 0]
for i in range(1, s_len):
cost_mat[i, 0] = cost_mat[i-1, 0]+delta_mat[i, 0]
for j in range(1, t_len):
cost_mat[0, j] = cost_mat[0, j-1]+delta_mat[0, j]
for i in range(1, s_len):
for j in range(1, t_len):
diag,left,top =cost_mat[i-1, j-1], cost_mat[i, j-1], cost_mat[i-1, j]
if(diag <=left):
if(diag<=top):
res = diag
else:
res = top
else:
if(left<=top):
res = left
else:
res = top
cost_mat[i, j] = res+delta_mat[i, j]
return cost_mat[s_len-1,t_len-1]
def fill_delta_mat_dtw(center, s, delta_mat):
slim = delta_mat[:len(center),:len(s)]
np.subtract.outer(center, s,out=slim)
np.square(slim, out=slim)
def DBA_update(center, series, cost_mat, path_mat, delta_mat):
options_argmin = [(-1, -1), (0, -1), (-1, 0)]
updated_center = np.zeros(center.shape)
n_elements = np.array(np.zeros(center.shape), dtype=int)
center_length = len(center)
for s in series:
s_len = len(s)
fill_delta_mat_dtw(center, s, delta_mat)
cost_mat[0, 0] = delta_mat[0, 0]
path_mat[0, 0] = -1
for i in range(1, center_length):
cost_mat[i, 0] = cost_mat[i-1, 0]+delta_mat[i, 0]
path_mat[i, 0] = 2
for j in range(1, s_len):
cost_mat[0, j] = cost_mat[0, j-1]+delta_mat[0, j]
path_mat[0, j] = 1
for i in range(1, center_length):
for j in range(1, s_len):
diag,left,top =cost_mat[i-1, j-1], cost_mat[i, j-1], cost_mat[i-1, j]
if(diag <=left):
if(diag<=top):
res = diag
path_mat[i,j] = 0
else:
res = top
path_mat[i,j] = 2
else:
if(left<=top):
res = left
path_mat[i,j] = 1
else:
res = top
path_mat[i,j] = 2
cost_mat[i, j] = res+delta_mat[i, j]
i = center_length-1
j = s_len-1
while(path_mat[i, j] != -1):
updated_center[i] += s[j]
n_elements[i] += 1
move = options_argmin[path_mat[i, j]]
i += move[0]
j += move[1]
assert(i == 0 and j == 0)
updated_center[i] += s[j]
n_elements[i] += 1
return np.divide(updated_center, n_elements)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment