Commit 4944fbae authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Make prototype compatible for larger data size

parent 12614e4f
...@@ -15,7 +15,10 @@ export class ApiService { ...@@ -15,7 +15,10 @@ export class ApiService {
// Read input data // Read input data
async readFile(): Promise<RawData> { async readFile(): Promise<RawData> {
const response = await fetch('http://127.0.0.1:5000/read-data'); const response = await fetch('http://127.0.0.1:5000/read-data');
return await response.json(); const temp = await response.json();
const index = JSON.parse(temp.index);
const values = JSON.parse(temp.values).map(Number);
return {index, values};
} }
// Split data into windows and normalize // Split data into windows and normalize
......
<app-overview-window></app-overview-window> <div style="display: flex; justify-content: space-between;">
<mat-tab-group animationDuration="0ms"> <div style="width: 80%;">
<mat-tab label="Query"> <app-overview-window></app-overview-window>
</div>
<div style="width: 20%;">
<app-query-window></app-query-window> <app-query-window></app-query-window>
</mat-tab> </div>
</div>
<mat-tab-group animationDuration="0ms" (selectedTabChange)="changeTab($event)">
<mat-tab label="Samples"> <mat-tab label="Samples">
<app-labeling-window></app-labeling-window> <app-labeling-window></app-labeling-window>
</mat-tab> </mat-tab>
......
import { Component } from '@angular/core'; import { Component } from '@angular/core';
import {CacheService} from './cache.service';
@Component({ @Component({
selector: 'app-root', selector: 'app-root',
templateUrl: './app.component.html', templateUrl: './app.component.html',
}) })
export class AppComponent { export class AppComponent {
constructor(private service: CacheService) {
}
changeTab(tab) {
this.service.currentTab = tab.index;
}
} }
...@@ -8,13 +8,14 @@ export class CacheService { ...@@ -8,13 +8,14 @@ export class CacheService {
public rawValues: number[]; public rawValues: number[];
public rawIndices: string[]; public rawIndices: string[];
public _windows: number[][]; private _currentTab: number;
private _windows: number[][];
private _query = undefined; private _query = undefined;
public _labels = {}; private _labels = {};
public _tables; private _tables;
public _windowSimilarity; private _windowSimilarity;
public windowSize = 20; public windowSize = 60;
public nrOfTables = 10; public nrOfTables = 10;
public hashSize = 10; public hashSize = 10;
...@@ -23,6 +24,7 @@ export class CacheService { ...@@ -23,6 +24,7 @@ export class CacheService {
public onNewQuery: EventEmitter<void> = new EventEmitter<void>(); public onNewQuery: EventEmitter<void> = new EventEmitter<void>();
public onNewTables: EventEmitter<void> = new EventEmitter<void>(); public onNewTables: EventEmitter<void> = new EventEmitter<void>();
public onNewWindows: EventEmitter<void> = new EventEmitter<void>(); public onNewWindows: EventEmitter<void> = new EventEmitter<void>();
public onNewTab: EventEmitter<void> = new EventEmitter<void>();
public initialized: Promise<void>; public initialized: Promise<void>;
...@@ -45,16 +47,19 @@ export class CacheService { ...@@ -45,16 +47,19 @@ export class CacheService {
async getRawData(): Promise<void> { async getRawData(): Promise<void> {
const rawData: RawData = await this.api.readFile(); const rawData: RawData = await this.api.readFile();
console.log(rawData);
this.rawIndices = rawData.index; this.rawIndices = rawData.index;
this.rawValues = rawData.values; this.rawValues = rawData.values;
} }
async getWindows(): Promise<void> { async getWindows(): Promise<void> {
this.windows = await this.api.createWindows(this.rawValues, this.parameters); this.windows = await this.api.createWindows(this.rawValues, this.parameters);
console.log(this.windows);
} }
async createTables(): Promise<void> { async createTables(): Promise<void> {
this.tables = await this.api.createTables(this.windows, this.parameters); this.tables = await this.api.createTables(this.windows, this.parameters);
console.log(this.tables);
} }
async getSimilarWindows(window): Promise<any> { async getSimilarWindows(window): Promise<any> {
...@@ -111,6 +116,16 @@ export class CacheService { ...@@ -111,6 +116,16 @@ export class CacheService {
return this._windowSimilarity; return this._windowSimilarity;
} }
public set currentTab(v) {
this._currentTab = v;
console.log(this.currentTab);
this.onNewTab.emit();
}
public get currentTab() {
return this._currentTab;
}
public get parameters(): {[parameter: string]: any} { public get parameters(): {[parameter: string]: any} {
return { return {
windowsize: this.windowSize, windowsize: this.windowSize,
......
<plotly-plot *ngIf="showPlot" [data]="data" [layout]="layout" (plotly_click)="clicked($event)"></plotly-plot> <div style="overflow: auto">
<plotly-plot *ngIf="showPlot" [data]="data" [layout]="layout" (plotly_click)="clicked($event)"></plotly-plot>
</div>
...@@ -8,9 +8,9 @@ import {throwError} from 'rxjs'; ...@@ -8,9 +8,9 @@ import {throwError} from 'rxjs';
styleUrls: ['./overview-window.component.css'] styleUrls: ['./overview-window.component.css']
}) })
export class OverviewWindowComponent implements OnInit { export class OverviewWindowComponent implements OnInit {
public defaultColors: string[] = []; public defaultColors: string[];
public defaultSizes: number[] = []; public defaultSizes: number[];
public defaultOpacity: number[] = []; public defaultOpacity: number[];
public showPlot = false; public showPlot = false;
public data; public data;
...@@ -29,15 +29,14 @@ export class OverviewWindowComponent implements OnInit { ...@@ -29,15 +29,14 @@ export class OverviewWindowComponent implements OnInit {
async initializePlot() { async initializePlot() {
this.service.query = undefined; this.service.query = undefined;
for (const _ of this.service.rawValues) { const size = this.service.rawValues.length;
this.defaultColors.push('#a3a7e4'); this.defaultColors = Array(size).fill('#a3a7e4');
this.defaultSizes.push(5); this.defaultSizes = Array(size).fill(5);
this.defaultOpacity.push(1); this.defaultOpacity = Array(size).fill(1);
}
this.data = [{ this.data = [{
x: this.service.rawIndices, x: this.service.rawIndices,
y: this.service.rawValues, y: this.service.rawValues,
type: 'scatter', type: 'scattergl',
mode: 'markers', mode: 'markers',
marker: { marker: {
size: this.defaultSizes.slice(), size: this.defaultSizes.slice(),
...@@ -47,15 +46,20 @@ export class OverviewWindowComponent implements OnInit { ...@@ -47,15 +46,20 @@ export class OverviewWindowComponent implements OnInit {
hovermode: 'closest', hovermode: 'closest',
autosize: true, autosize: true,
margin: { margin: {
l: 0, l: 40,
r: 0, r: 0,
b: 40, b: 40,
t: 0, t: 0,
pad: 4 pad: 4
}, },
height: 200, height: 200,
xaxis: {
showticklabels: false,
// rangeslider: {}
},
}; };
this.showPlot = true; this.showPlot = true;
console.log("showing plot");
} }
async clicked(clickData) { async clicked(clickData) {
...@@ -73,15 +77,31 @@ export class OverviewWindowComponent implements OnInit { ...@@ -73,15 +77,31 @@ export class OverviewWindowComponent implements OnInit {
const sizes: number[] = []; const sizes: number[] = [];
const opacity: number[] = []; const opacity: number[] = [];
// Similarity
const windowSimilarity = await this.service.getSimilarWindows(this.service.windows[this.service.query]); const windowSimilarity = await this.service.getSimilarWindows(this.service.windows[this.service.query]);
for (const frequency in windowSimilarity){ for (const frequency in windowSimilarity){
for (const index of windowSimilarity[frequency]) { for (const index of windowSimilarity[frequency]) {
colors[index] = this.getColor(Number(frequency) / this.service.nrOfTables); colors[index] = this.getColor(Number(frequency) / this.service.nrOfTables);
sizes[index] = (Number(frequency) / this.service.nrOfTables) * 10; sizes[index] = 5;
opacity[index] = Number(frequency) / this.service.nrOfTables; opacity[index] = Math.max(Number(frequency) / this.service.nrOfTables, 0.5);
}
} }
// Labeled
for (const index in this.service.labels) {
colors[Number(index)] = this.service.labels[index] ? '#4caf50' : '#f44336';
sizes[Number(index)] = 10;
opacity[Number(index)] = 1;
} }
// Query
colors[this.service.query] = '#cf00ff';
sizes[this.service.query] = 10;
opacity[this.service.query] = 1;
this.data[0].marker.color = colors; this.data[0].marker.color = colors;
this.data[0].marker.size = sizes;
this.data[0].marker.opacity = opacity;
} }
public getColor(value: number) { public getColor(value: number) {
......
.query-container {
margin: auto;
border: 2px solid black;
width: 80%;
display: flex;
justify-content: center;
}
.query-contents {
margin: auto;
}
<div *ngIf="!query"> <div class="query-container">
<div *ngIf="!query">
Select a point in the data to start the similarity search. Select a point in the data to start the similarity search.
</div> </div>
<div *ngIf="query"> <div *ngIf="query" class="query-contents">
<span style="display: flex; justify-content: center"><b>Current query</b></span>
<plotly-plot [data]="plot.data" [layout]="plot.layout"></plotly-plot> <plotly-plot [data]="plot.data" [layout]="plot.layout"></plotly-plot>
</div>
</div> </div>
...@@ -34,7 +34,7 @@ export class QueryWindowComponent implements OnInit { ...@@ -34,7 +34,7 @@ export class QueryWindowComponent implements OnInit {
hovermode: 'closest', hovermode: 'closest',
autosize: true, autosize: true,
margin: { margin: {
l: 30, l: 50,
r: 30, r: 30,
t: 30, t: 30,
pad: 4 pad: 4
......
...@@ -26,7 +26,7 @@ export class TableOverviewComponent implements OnInit { ...@@ -26,7 +26,7 @@ export class TableOverviewComponent implements OnInit {
{ {
data: [{ data: [{
x: Object.keys(table.entries).map((hash: string) => { x: Object.keys(table.entries).map((hash: string) => {
return hash; return Number('0b' + hash);
} }
), ),
y: Object.values(table.entries).map((values: number[]) => values.length / this.service.windows.length), y: Object.values(table.entries).map((values: number[]) => values.length / this.service.windows.length),
......
This diff is collapsed.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
...@@ -5,6 +5,13 @@ import numpy as np ...@@ -5,6 +5,13 @@ import numpy as np
from flask_cors import CORS from flask_cors import CORS
from collections import defaultdict, Counter from collections import defaultdict, Counter
from time import time from time import time
import dask.dataframe as dd
import os.path
import json
from sklearn import preprocessing
from functools import partial
from itertools import groupby
from multiprocessing import Pool
app = Flask(__name__) app = Flask(__name__)
CORS(app) CORS(app)
...@@ -15,34 +22,58 @@ def index(): ...@@ -15,34 +22,58 @@ def index():
@app.route('/read-data', methods=['GET']) @app.route('/read-data', methods=['GET'])
def read_data(): def read_data():
df = pd.read_csv("DailyDelhiClimateTrain.csv", index_col=0) filename = 'processed-data.pkl'
df.index = pd.to_datetime(df.index) if (not os.path.isfile(filename)):
df.sort_index(inplace=True) print("start")
meantemp = df.loc[:, 'meantemp'].copy() df = dd.read_csv("NW_Ground_Stations_2016.csv", usecols=['number_sta', 'date', 't'])
print("read file")
df = df.loc[df['number_sta'] == 14066001]
print("split rows")
df = df.compute()
df.to_pickle(filename)
print("to_pandas")
df = pd.read_pickle(filename)
df.dropna(subset=['t'], inplace=True)
response = { response = {
"index": meantemp.index.values.astype(str).tolist(), "index": json.dumps(df.loc[:, 'date'].values.astype(str).tolist()),
"values": meantemp.values.tolist() "values": json.dumps(df.loc[:, 't'].values.astype(str).tolist())
} }
print("response ready")
response = jsonify(response) response = jsonify(response)
return response return response
# @app.route('/read-data', methods=['GET'])
# def read_data():
# df = pd.read_csv("1.csv", index_col=3)
# df.index = pd.to_datetime(df.index)
# df.sort_index(inplace=True)
# meantemp = df.loc[:, 7].copy()
# response = {
# "index": meantemp.index.values.astype(str).tolist(),
# "values": meantemp.values.tolist()
# }
# response = jsonify(response)
# return response
@app.route('/create-windows', methods=['POST']) @app.route('/create-windows', methods=['POST'])
def create_windows(): def create_windows():
raw_data = request.json raw_data = request.json
values = raw_data["values"] values = raw_data["values"]
window_size = int(raw_data['parameters']["windowsize"]) window_size = int(raw_data['parameters']["windowsize"])
data = [] data = [values[i:i+window_size] for i in range(len(values) - window_size)]
for index in range(len(values) - window_size): data = preprocessing.minmax_scale(data, (-1, 1), axis=1)
window = values[index:index + window_size] response = jsonify(data.tolist())
norm = np.linalg.norm(window)
if norm == 0:
data.append(window)
else:
data.append((window / norm).tolist())
response = jsonify(data)
return response return response
def fill_table(data, hash_functions, index):
table = defaultdict(list)
signatures = [''.join((np.dot(data[window_index], hash_functions[index]) > 0).astype('int').astype('str')) for window_index in
range(data.shape[0])]
counted_sig = enumerate(signatures)
for i, x in counted_sig:
table[x].append(i)
return table
@app.route('/create-tables', methods=['POST']) @app.route('/create-tables', methods=['POST'])
def create_tables(): def create_tables():
t0 = time() t0 = time()
...@@ -52,15 +83,17 @@ def create_tables(): ...@@ -52,15 +83,17 @@ def create_tables():
hash_size = int(raw_data['parameters']["hashsize"]) hash_size = int(raw_data['parameters']["hashsize"])
table_size = int(raw_data['parameters']["tablesize"]) table_size = int(raw_data['parameters']["tablesize"])
data = np.array(data) data = np.array(data)
tables = [defaultdict(list) for _ in range(table_size)] tables_hash_function = [np.random.uniform(-1, 1, size=(window_size, hash_size)) for _ in range(table_size)]
tables_hash_function = [np.random.randn(window_size, hash_size) for _ in range(table_size)] print('Init time: ' + str(time() - t0))
for table_index in range(table_size): try:
table = tables[table_index] pool = Pool()
hash_function = tables_hash_function[table_index] func = partial(fill_table, data, tables_hash_function)
for window_index in range(data.shape[0]): print('Starting pool: ' + str(time() - t0))
signature = (np.dot(data[window_index], hash_function) > 0).astype('int') tables = pool.map(func, range(table_size))
table[str(signature)].append(window_index) finally:
pool.close()
pool.join()
print('Creation time: ' + str(time() - t0)) print('Creation time: ' + str(time() - t0))
hash_functions = np.array(tables_hash_function).tolist() hash_functions = np.array(tables_hash_function).tolist()
...@@ -83,8 +116,8 @@ def query(): ...@@ -83,8 +116,8 @@ def query():
output = {} output = {}
for t in tables.values(): for t in tables.values():
signature = (np.dot(window, t["hash"]) > 0).astype('int') signature = ''.join((np.dot(window, t["hash"]) > 0).astype('int').astype('str'))
neighbours.extend(t["entries"][str(signature)]) neighbours.extend(t["entries"][signature])
neighbours_with_frequency = dict(Counter(neighbours)) neighbours_with_frequency = dict(Counter(neighbours))
for index, frequency in neighbours_with_frequency.items(): for index, frequency in neighbours_with_frequency.items():
if not frequency in output: if not frequency in output:
...@@ -93,6 +126,25 @@ def query(): ...@@ -93,6 +126,25 @@ def query():
response = jsonify(output) response = jsonify(output)
return response return response
def create_valid_table(data, window_size, hash_size, correct_indices, incorrect_indices, index):
entries = defaultdict(list)
while True:
hash_function = np.random.randn(window_size, hash_size)
correct_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for index in
correct_indices]
incorrect_signatures = [''.join((np.dot(data[index], hash_function) > 0).astype('int').astype('str')) for index
in incorrect_indices]
if correct_signatures.count(correct_signatures[0]) == len(correct_signatures) and incorrect_signatures.count(
correct_signatures[0]) == 0:
break
for window_index in range(data.shape[0]):
signature = ''.join((np.dot(data[window_index], hash_function) > 0).astype('int').astype('str'))
entries[signature].append(window_index)
return {
"hash": hash_function.tolist(),
"entries": entries
}
@app.route('/update', methods=['POST']) @app.route('/update', methods=['POST'])
def update(): def update():
t0 = time() t0 = time()
...@@ -115,8 +167,8 @@ def update(): ...@@ -115,8 +167,8 @@ def update():
for t in tables.values(): for t in tables.values():
valid = True valid = True
signature = (np.dot(window, t["hash"]) > 0).astype('int') signature = ''.join((np.dot(window, t["hash"]) > 0).astype('int').astype('str'))
neighbours = t["entries"][str(signature)] neighbours = t["entries"][signature]
for index in correct_indices: for index in correct_indices:
if index not in neighbours: if index not in neighbours:
valid = False valid = False
...@@ -128,21 +180,15 @@ def update(): ...@@ -128,21 +180,15 @@ def update():
if valid: if valid:
new_tables.append(t) new_tables.append(t)
for i in range(table_size - len(new_tables)): try:
entries = defaultdict(list) pool = Pool()
while True: func = partial(create_valid_table, data, window_size, hash_size, correct_indices, incorrect_indices)
hash_function = np.random.randn(window_size, hash_size) print('Starting pool: ' + str(time() - t0))
correct_signatures = [str((np.dot(data[index], hash_function) > 0).astype('int')) for index in correct_indices] new_tables.extend(pool.map(func, range(table_size - len(new_tables))))
incorrect_signatures = [str((np.dot(data[index], hash_function) > 0).astype('int')) for index in incorrect_indices] finally:
if correct_signatures.count(correct_signatures[0]) == len(correct_signatures) and incorrect_signatures.count(correct_signatures[0]) == 0: pool.close()
break pool.join()
for window_index in range(data.shape[0]):
signature = (np.dot(data[window_index], hash_function) > 0).astype('int')
entries[str(signature)].append(window_index)
new_tables.append({
"hash": hash_function.tolist(),
"entries": entries
})
print('Update time: ' + str(time() - t0)) print('Update time: ' + str(time() - t0))
response = {} response = {}
for table_index in range(len(new_tables)): for table_index in range(len(new_tables)):
......