diff --git a/AngularApp/prototype/src/app/api.service.ts b/AngularApp/prototype/src/app/api.service.ts index 80573ff403c5852ee394d0e328258e02dd98b58a..9b23e33d4dd0a60e37a46dac73ab03b314525498 100644 --- a/AngularApp/prototype/src/app/api.service.ts +++ b/AngularApp/prototype/src/app/api.service.ts @@ -7,11 +7,12 @@ export interface RawData { export interface LshData { candidates: number[][][]; - tables: {[bucket: string]: number[]}[]; + distances: number[][][]; average_candidates: number[]; average_distances: number[]; + tables: {[bucket: string]: number[]}[]; + average_table: {[bucket: string]: number[]}; samples: number[]; - distances: number[][][]; hash_functions: number[][]; parameters?: number[]; } @@ -25,23 +26,37 @@ export interface TableInfoData { distances: number[][]; } +export interface Parameters { + windowsize: number; + hashsize: number; + tablesize: number; + stepsize: number; +} + @Injectable({ providedIn: 'root' }) +/** + * This service acts as the interface between the client and server side. + */ export class ApiService { constructor() { } - // Read input data + /** + * Read input data. The format is a list of channels, where each channel is an object of type RawData + */ async readFile(): Promise<RawData[]> { - const response = await fetch('http://127.0.0.1:5000/read-mts-data'); + const response = await fetch('http://127.0.0.1:5000/read-data'); return await response.json(); } - // Split data into windows and normalize - async createWindows(parameters): Promise<any> { + /** + * Split the data into windows (server side) + */ + async createWindows(parameters: Parameters): Promise<any> { const postData = {parameters}; - const response = await fetch('http://127.0.0.1:5000/create-mts-windows', { + await fetch('http://127.0.0.1:5000/create-windows', { method: 'POST', headers: { 'Accept': 'application/json', @@ -51,32 +66,39 @@ export class ApiService { }); } - // Calculate parameters for LSH + find candidates using LSH - async lshInitial(query): Promise<LshData> { - const response = await fetch('http://127.0.0.1:5000/initialize', { + /** + * Get weights which will be applied to the LSH hash functions + */ + async getWeights(query: number[][], labels: {[index: number]: boolean}, weights: number[], hash_functions: number[][]): Promise<number[]> { + const response = await fetch('http://127.0.0.1:5000/weights', { method: 'POST', headers: { 'Accept': 'application/json', 'Content-Type': 'application/json' }, - body: new Blob( [ JSON.stringify({query}) ], { type: 'text/plain' } ) + body: new Blob( [ JSON.stringify({query, labels, weights, hash_functions}) ], { type: 'text/plain' } ) }); return await response.json(); } - async getWeights(query: number[][], labels: {[index: number]: boolean}, weights: number[], hash_functions: number[][]): Promise<number[]> { - const response = await fetch('http://127.0.0.1:5000/weights', { + /** + * Do the first iteration of LSH and return important information + */ + async lshInitial(query: number[][]): Promise<LshData> { + const response = await fetch('http://127.0.0.1:5000/initialize', { method: 'POST', headers: { 'Accept': 'application/json', 'Content-Type': 'application/json' }, - body: new Blob( [ JSON.stringify({query, labels, weights, hash_functions}) ], { type: 'text/plain' } ) + body: new Blob( [ JSON.stringify({query}) ], { type: 'text/plain' } ) }); return await response.json(); } - // Find candidates using LSH with weights + /** + * Do another iteration of LSH, with weights, and return important information + */ async lshUpdate(query, weights, parameters): Promise<LshData> { const response = await fetch('http://127.0.0.1:5000/update', { method: 'POST', @@ -89,20 +111,24 @@ export class ApiService { return await response.json(); } - // Get query window based on windows labeled correct - async getQueryWindow(window): Promise<number[][]> { + /** + * Get query window based on windows labeled correct + */ + async getQueryWindow(indices: number | {[index: number]: boolean}): Promise<number[][]> { const response = await fetch('http://127.0.0.1:5000/query', { method: 'POST', headers: { 'Accept': 'application/json', 'Content-Type': 'application/json' }, - body: JSON.stringify({window}) + body: JSON.stringify({indices}) }); return await response.json(); } - // Get data of a window by indices + /** + * Get data of a window by indices + */ async getWindowByIndices(indices: number[]): Promise<number[][][]> { const response = await fetch('http://127.0.0.1:5000/window', { method: 'POST', @@ -115,14 +141,17 @@ export class ApiService { return await response.json(); } - async getTableInfo(windows): Promise<TableInfoData> { + /** + * Get additional information for a given table + */ + async getTableInfo(table: number[][]): Promise<TableInfoData> { const response = await fetch('http://127.0.0.1:5000/table-info', { method: 'POST', headers: { 'Accept': 'application/json', 'Content-Type': 'application/json' }, - body: JSON.stringify({windows}) + body: JSON.stringify({table}) }); return await response.json(); } diff --git a/AngularApp/prototype/src/app/progress-view/progress-view.component.ts b/AngularApp/prototype/src/app/progress-view/progress-view.component.ts index 69e5c65704952f30924327f815b012c798da163c..c379c9e575757414d791a7589be984ab6e096a2a 100644 --- a/AngularApp/prototype/src/app/progress-view/progress-view.component.ts +++ b/AngularApp/prototype/src/app/progress-view/progress-view.component.ts @@ -1,6 +1,7 @@ import {Component, OnInit, ViewChild} from '@angular/core'; import {StateService} from '../state.service'; import * as d3 from 'd3'; +import {TableInfoData} from '../api.service'; @Component({ selector: 'app-progress-view', @@ -21,12 +22,14 @@ export class ProgressViewComponent implements OnInit { constructor(private state: StateService) { } ngOnInit(): void { - this.state.onNewTableInfo.subscribe(() => { this.showgraph(); }); - this.state.onNewTableInfo.subscribe(() => { this.showHistogram(); }); + this.state.onNewLshData.subscribe(() => { + this.showgraph(); + this.showHistogram(); + }); } showHistogram() { - const table = this.state._averageTable; + const table = this.state.lshData.average_table; this.hist = { data: [{ x: Object.keys(table), @@ -162,7 +165,7 @@ export class ProgressViewComponent implements OnInit { d3.selectAll('circle').transition().style('stroke', undefined); d3.select('#node-' + v.value).transition().style('stroke', 'black').style('stroke-width', 20); const data = this.hist; - data.data[0].marker.line.width = Object.keys(this.state._averageTable).map((key) => { + data.data[0].marker.line.width = Object.keys(this.state.lshData.average_table).map((key) => { return Number(key) === v.value ? 4 : 0; }); this.hist = data; @@ -177,102 +180,15 @@ export class ProgressViewComponent implements OnInit { } public get table() { - return this.state._averageTable; + return this.state.lshData.average_table; } async showgraph() { - const nodes = []; - const links = []; - const keys = Object.keys(this.table); - this.hoverPlot(this.state.tableInfo.prototypes); - const distances = this.state.tableInfo.distances; - - // for (const key in this.table) { - // const size = this.table[key].length; - // nodes.push({id: key, group: Number(key), size: size}); - // } - // for (const key in this.table) { - // for (const key2 in this.table) { - // if (key === key2) { - // continue; - // } - // links.push({source: key, target: key2, value: 0.001 * (100 - 5 * distances[keys.indexOf(key)][keys.indexOf(key2)])}); - // } - // } - // const graph = {nodes, links}; - // - // const svg = d3.select('#visual'); - // const width = +svg.attr('width'); - // const height = +svg.attr('height'); - // - // svg.selectAll('*').remove(); - // - // const simulation = d3.forceSimulation() - // .force('link', d3.forceLink().id((d: any) => d.id)) - // .force('charge', d3.forceManyBody().strength(100)) // Gravity force - // .force('collide', d3.forceCollide().radius(25).iterations(3)) // Repulsion force - // .force('center', d3.forceCenter(width / 2, height / 2)); // Position force - // - // const link = svg.append('g') - // .selectAll('line') - // .data(graph.links) - // .enter().append('line') - // .attr('stroke', 'grey') - // .attr('stroke-width', (d: any) => d.value); - // - // const node = svg.append('g') - // .selectAll('circle') - // .data(graph.nodes) - // .enter().append('circle') - // .attr('r', (d: any) => 5 * Math.log(d.size) / Math.log(10)) - // .attr('fill', (d: any) => this.getColor(d.group / graph.nodes.length)) - // .attr('id', (d: any) => 'node-' + d.group) - // .on('mouseover', (d: any) => {this.sliderValue = d.group; }) - // .call(d3.drag() - // .on('start', dragstarted) - // .on('drag', dragged) - // .on('end', dragended)); - // - // simulation - // .nodes(graph.nodes as any) - // .on('tick', ticked); - // - // simulation.force<any>('link') - // .links(graph.links); - // - // function ticked() { - // link - // .attr('x1', (d: any) => d.source.x) - // .attr('y1', (d: any) => d.source.y) - // .attr('x2', (d: any) => d.target.x) - // .attr('y2', (d: any) => d.target.y); - // - // node - // .attr('cx', (d: any) => d.x) - // .attr('cy', (d: any) => d.y); - // } - // - // function dragstarted(d) { - // if (!d3.event.active) { - // simulation.alphaTarget(0.1).restart(); - // } - // d.fx = d.x; - // d.fy = d.y; - // } - // - // function dragged(d) { - // d.fx = d3.event.x; - // d.fy = d3.event.y; - // } - // - // function dragended(d) { - // if (!d3.event.active) { - // simulation.alphaTarget(0); - // } - // d.fx = null; - // d.fy = null; - // } + const tableInfo: TableInfoData = await this.state.getTableInfo(Object.values(this.state.lshData.average_table)); + this.hoverPlot(tableInfo.prototypes); + const distances = tableInfo.distances; } + getColor(value) { const hue=((1-value)*120).toString(10); return ["hsl(",hue,",100%,50%)"].join(""); diff --git a/AngularApp/prototype/src/app/state.service.ts b/AngularApp/prototype/src/app/state.service.ts index c711c42c6b0c594d7201ead864fd80b8f573522e..4745c9eb1c88d392cd5ed8a1cdcdc731b1f181b2 100644 --- a/AngularApp/prototype/src/app/state.service.ts +++ b/AngularApp/prototype/src/app/state.service.ts @@ -1,38 +1,42 @@ import {EventEmitter, Injectable} from '@angular/core'; -import {ApiService, LshData, RawData, TableInfoData} from './api.service'; +import {ApiService, LshData, Parameters, RawData, TableInfoData} from './api.service'; @Injectable({ providedIn: 'root' }) +/** + * This service acts as the state of the entire application. Components can subscribe to EventEmitters within this state to update their + * contents. + */ export class StateService { - public loadingProgress: number = 0; - + /** + * These are all LSH specific variables. The variables can be accessed using the getters and setters + */ private _rawData: RawData[]; private _lshData: LshData; - private _tableInfo: TableInfoData; private _queryWindow: number[][]; - private _table: {[bucket: string]: number[]}[]; - public _averageTable: {[bucket: string]: number[]}; private _weights: number[]; - - private _currentTab: number; private _labels = {}; - private _sliderValue; private _lshParameters: number[]; - - private states = []; - public windowSize = 120; public nrOfTables = 5; public hashSize = 5; public stepSize = 200; + + /** + * These are all GUI variables + */ + public loadingProgress = 0; public querySelectionMode = true; + private _currentTab: number; + private _sliderValue; + /** + * These are all EventEmitters. Subscribe to these if you want to be informed about an update in state. + */ public onNewData: EventEmitter<void> = new EventEmitter<void>(); public onNewWindows: EventEmitter<void> = new EventEmitter<void>(); public onNewQuery: EventEmitter<void> = new EventEmitter<void>(); - public onNewTable: EventEmitter<void> = new EventEmitter<void>(); - public onNewTableInfo: EventEmitter<void> = new EventEmitter<void>(); public onNewLshData: EventEmitter<void> = new EventEmitter<void>(); public onNewLabels: EventEmitter<void> = new EventEmitter<void>(); @@ -43,6 +47,9 @@ export class StateService { this.initialize(); } + /** + * This function initializes the application. It retrieves the raw data and creates windows. + */ async initialize(): Promise<void> { this.loadingProgress = 0; await this.getRawData(); @@ -51,72 +58,73 @@ export class StateService { this.loadingProgress = 100; } + /** + * This function resets the application. It re-creates the windows + */ async reset(): Promise<void> { this.loadingProgress = 50; await this.createWindows(); this.loadingProgress = 100; } + /** + * This function retrieves the raw data + */ async getRawData(): Promise<void> { this.rawData = await this.api.readFile(); } + /** + * This function creates the windows on the server side + */ async createWindows(): Promise<void> { await this.api.createWindows(this.parameters); this.onNewWindows.emit(); } + /** + * This function performs the first iteration of LSH + */ async lshInitial(): Promise<void> { + this._weights = Array(this._queryWindow.length).fill(1); this.lshData = await this.api.lshInitial(this._queryWindow); - console.log('data loaded'); this._lshParameters = this.lshData.parameters; - this._weights = [1, 1, 1]; - this.createTable(); } + /** + * This function performs every other iteration of LSH + */ async update(labels, hashFunctions): Promise<void> { this._weights = await this.api.getWeights(this._queryWindow, labels, this._weights, hashFunctions); - console.log(this._weights); this.lshData = await this.api.lshUpdate(this._queryWindow, this._weights, this._lshParameters); - this.createTable(); } + /** + * This function retrieves additional information given a table + */ async getTableInfo(table: number[][]): Promise<TableInfoData> { - // console.log(this.tableInfo); return await this.api.getTableInfo(table); } + /** + * This function retrieves the query + */ async getQueryWindow(windowIndex: number | {[index: number]: boolean}): Promise<number[][]> { this.queryWindow = await this.api.getQueryWindow(windowIndex); console.log(this.queryWindow); return this._queryWindow; } + /** + * This function retrieves the window given the window index + */ async getWindow(indices: number[]): Promise<number[][][]> { return await this.api.getWindowByIndices(indices); } - async createTable() { - console.log('setting table param'); - this.table = this.lshData.tables; - console.log('table param set'); - const averageTable = {}; - const length = this.lshData.average_distances.length; - const median = this.lshData.average_distances[Math.ceil(length / 2)]; - const stepsize = median / 10; - const indices: number[] = this.lshData.average_distances.map((x) => x > median * 2 ? 19 : Math.floor(x / stepsize)); - this.lshData.average_candidates.forEach((candidate: number, index: number) => { - if (averageTable[indices[index]] === undefined) - { - averageTable[indices[index]] = []; - } - averageTable[indices[index]].push(candidate); - }); - this._averageTable = averageTable; - console.log('table created'); - this.tableInfo = await this.getTableInfo(Object.values(this._averageTable)); - } - + /** + * These are all setters and getters + */ public set rawData(v: RawData[]) { this._rawData = v; console.log(this._rawData); @@ -137,26 +145,6 @@ export class StateService { return this._lshData; } - public set tableInfo(v: TableInfoData) { - this._tableInfo = v; - this.onNewTableInfo.emit(); - } - - public get tableInfo(): TableInfoData { - return this._tableInfo; - } - - public set table(v: {[bucket: string]: number[]}[]) { - console.log(v); - this._table = v; - console.log('emitting onNewTable'); - this.onNewTable.emit(); - } - - public get table(): {[bucket: string]: number[]}[] { - return this._table; - } - public set labels(v) { this._labels = v; this.onNewLabels.emit(); @@ -197,7 +185,7 @@ export class StateService { return this._lshParameters; } - public get parameters(): {[parameter: string]: number} { + public get parameters(): Parameters { return { windowsize: this.windowSize, hashsize: this.hashSize, diff --git a/AngularApp/prototype/src/app/table-overview/table-overview.component.ts b/AngularApp/prototype/src/app/table-overview/table-overview.component.ts index e7894e5394489d10b1004e75e382919550a8150c..0aef1ba5026c9e5e1dcc771b4162214f9d365f5c 100644 --- a/AngularApp/prototype/src/app/table-overview/table-overview.component.ts +++ b/AngularApp/prototype/src/app/table-overview/table-overview.component.ts @@ -15,7 +15,7 @@ export class TableOverviewComponent implements OnInit { constructor(private state: StateService) { } ngOnInit(): void { - this.state.onNewTable.subscribe(() => { + this.state.onNewLshData.subscribe(() => { this.createHistograms(); this.createPrototypes(); }); @@ -132,7 +132,7 @@ export class TableOverviewComponent implements OnInit { console.log('creating table histograms'); this.subplots = []; this.averages = []; - const tables = this.state.table; + const tables = this.state.lshData.tables; console.log('start of table histograms'); tables.forEach((table, index) => { console.log(index); @@ -181,7 +181,7 @@ export class TableOverviewComponent implements OnInit { // } public get tables() { - return this.state.table; + return this.state.lshData.tables; } public get visible() { diff --git a/Flaskserver/.idea/workspace.xml b/Flaskserver/.idea/workspace.xml index 884596810c8bc96a3ed3d93e243bdc0adc09d6d4..25b40fe7986889f3e28b65f4b07fff584e190874 100644 --- a/Flaskserver/.idea/workspace.xml +++ b/Flaskserver/.idea/workspace.xml @@ -20,18 +20,23 @@ </component> <component name="ChangeListManager"> <list default="true" id="556080ba-825c-4b55-a92a-867a4df4fb32" name="Default Changelist" comment=""> - <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/api.service.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/api.service.ts" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/labeling-window/labeling-window.component.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/labeling-window/labeling-window.component.ts" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts" afterDir="false" /> <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/state.service.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/state.service.ts" afterDir="false" /> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/pseudo.py" beforeDir="false" afterPath="$PROJECT_DIR$/pseudo.py" afterDir="false" /> </list> <option name="SHOW_DIALOG" value="false" /> <option name="HIGHLIGHT_CONFLICTS" value="true" /> <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> <option name="LAST_RESOLUTION" value="IGNORE" /> </component> + <component name="FileTemplateManagerImpl"> + <option name="RECENT_TEMPLATES"> + <list> + <option value="Python Script" /> + </list> + </option> + </component> <component name="Git.Settings"> <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/.." /> </component> @@ -50,6 +55,10 @@ <property name="nodejs_npm_path_reset_for_default_project" value="true" /> </component> <component name="RecentsManager"> + <key name="MoveFile.RECENT_KEYS"> + <recent name="$PROJECT_DIR$/data" /> + <recent name="$PROJECT_DIR$/libs" /> + </key> <key name="CopyFile.RECENT_KEYS"> <recent name="$PROJECT_DIR$" /> </key> @@ -144,6 +153,10 @@ <screen x="72" y="27" width="1848" height="1053" /> </state> <state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl/72.27.1848.1053@72.27.1848.1053" timestamp="1606260652750" /> + <state x="687" y="162" width="618" height="783" key="find.popup" timestamp="1606586473850"> + <screen x="72" y="27" width="1848" height="1053" /> + </state> + <state x="687" y="162" width="618" height="783" key="find.popup/72.27.1848.1053@72.27.1848.1053" timestamp="1606586473850" /> <state x="659" y="259" width="672" height="678" key="search.everywhere.popup" timestamp="1604929652702"> <screen x="72" y="27" width="1848" height="1053" /> </state> diff --git a/Flaskserver/__pycache__/DBA.cpython-38.pyc b/Flaskserver/__pycache__/DBA.cpython-38.pyc deleted file mode 100644 index ee756e972d4a8aac20fcbf3c7ac617a69c472ca0..0000000000000000000000000000000000000000 Binary files a/Flaskserver/__pycache__/DBA.cpython-38.pyc and /dev/null differ diff --git a/Flaskserver/__pycache__/DBA.cpython-39.pyc b/Flaskserver/__pycache__/DBA.cpython-39.pyc deleted file mode 100644 index 368c5be97531cb8f6473dc6bef50b905d96221c0..0000000000000000000000000000000000000000 Binary files a/Flaskserver/__pycache__/DBA.cpython-39.pyc and /dev/null differ diff --git a/Flaskserver/__pycache__/bigwig.cpython-38.pyc b/Flaskserver/__pycache__/bigwig.cpython-38.pyc deleted file mode 100644 index 00c1792d524e86a06d07155df2a61697b8296f77..0000000000000000000000000000000000000000 Binary files a/Flaskserver/__pycache__/bigwig.cpython-38.pyc and /dev/null differ diff --git a/Flaskserver/__pycache__/main.cpython-38.pyc b/Flaskserver/__pycache__/main.cpython-38.pyc index 0a689a6143fa33130b1d62a5e9afc3f056bdce18..d7e1394f0ec8adca440344b7cccf0ad1393d6745 100644 Binary files a/Flaskserver/__pycache__/main.cpython-38.pyc and b/Flaskserver/__pycache__/main.cpython-38.pyc differ diff --git a/Flaskserver/__pycache__/preprocessing.cpython-38.pyc b/Flaskserver/__pycache__/preprocessing.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7235ecbfce8b71c221b4345ca9ee868b8370c8b Binary files /dev/null and b/Flaskserver/__pycache__/preprocessing.cpython-38.pyc differ diff --git a/Flaskserver/__pycache__/pseudo.cpython-38.pyc b/Flaskserver/__pycache__/pseudo.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..59cff614cd0a683799a6b40d1e3c54bddacc3b8a Binary files /dev/null and b/Flaskserver/__pycache__/pseudo.cpython-38.pyc differ diff --git a/Flaskserver/__pycache__/utils.cpython-38.pyc b/Flaskserver/__pycache__/utils.cpython-38.pyc deleted file mode 100644 index 53aa8e25f8cca63b270439f74b880d04a1d5e23d..0000000000000000000000000000000000000000 Binary files a/Flaskserver/__pycache__/utils.cpython-38.pyc and /dev/null differ diff --git a/Flaskserver/.gitattributes b/Flaskserver/data/.gitattributes similarity index 100% rename from Flaskserver/.gitattributes rename to Flaskserver/data/.gitattributes diff --git a/Flaskserver/21.csv b/Flaskserver/data/21.csv similarity index 100% rename from Flaskserver/21.csv rename to Flaskserver/data/21.csv diff --git a/Flaskserver/NW_Ground_Stations_2016.csv b/Flaskserver/data/NW_Ground_Stations_2016.csv similarity index 100% rename from Flaskserver/NW_Ground_Stations_2016.csv rename to Flaskserver/data/NW_Ground_Stations_2016.csv diff --git a/Flaskserver/chip_w-3000_r-25.h5 b/Flaskserver/data/chip_w-3000_r-25.h5 similarity index 100% rename from Flaskserver/chip_w-3000_r-25.h5 rename to Flaskserver/data/chip_w-3000_r-25.h5 diff --git a/Flaskserver/data.pkl b/Flaskserver/data/data.pkl similarity index 100% rename from Flaskserver/data.pkl rename to Flaskserver/data/data.pkl diff --git a/Flaskserver/parameters.npy b/Flaskserver/data/parameters.npy similarity index 100% rename from Flaskserver/parameters.npy rename to Flaskserver/data/parameters.npy diff --git a/Flaskserver/processed-data b/Flaskserver/data/processed-data similarity index 100% rename from Flaskserver/processed-data rename to Flaskserver/data/processed-data diff --git a/Flaskserver/processed-data.npy b/Flaskserver/data/processed-data.npy similarity index 100% rename from Flaskserver/processed-data.npy rename to Flaskserver/data/processed-data.npy diff --git a/Flaskserver/query b/Flaskserver/data/query similarity index 100% rename from Flaskserver/query rename to Flaskserver/data/query diff --git a/Flaskserver/test.bigWig b/Flaskserver/data/test.bigWig similarity index 100% rename from Flaskserver/test.bigWig rename to Flaskserver/data/test.bigWig diff --git a/Flaskserver/DBA.py b/Flaskserver/libs/DBA.py similarity index 100% rename from Flaskserver/DBA.py rename to Flaskserver/libs/DBA.py diff --git a/Flaskserver/DBA_multivariate.py b/Flaskserver/libs/DBA_multivariate.py similarity index 100% rename from Flaskserver/DBA_multivariate.py rename to Flaskserver/libs/DBA_multivariate.py diff --git a/Flaskserver/__pycache__/DBA_multivariate.cpython-38.pyc b/Flaskserver/libs/__pycache__/DBA_multivariate.cpython-38.pyc similarity index 97% rename from Flaskserver/__pycache__/DBA_multivariate.cpython-38.pyc rename to Flaskserver/libs/__pycache__/DBA_multivariate.cpython-38.pyc index eaac5b3ae6a624df5de796f13360e27285d379c7..c05779e917b5cab3b52eb89a19ee2a3f8aa6e2ff 100644 Binary files a/Flaskserver/__pycache__/DBA_multivariate.cpython-38.pyc and b/Flaskserver/libs/__pycache__/DBA_multivariate.cpython-38.pyc differ diff --git a/Flaskserver/__pycache__/bigwig.cpython-39.pyc b/Flaskserver/libs/__pycache__/bigwig.cpython-38.pyc similarity index 71% rename from Flaskserver/__pycache__/bigwig.cpython-39.pyc rename to Flaskserver/libs/__pycache__/bigwig.cpython-38.pyc index d7b5cc2d9c3f2b82802447276d67013e31df8328..53394d44f75209bebbfc7b6452b784f4de06c190 100644 Binary files a/Flaskserver/__pycache__/bigwig.cpython-39.pyc and b/Flaskserver/libs/__pycache__/bigwig.cpython-38.pyc differ diff --git a/Flaskserver/bigwig.py b/Flaskserver/libs/bigwig.py similarity index 100% rename from Flaskserver/bigwig.py rename to Flaskserver/libs/bigwig.py diff --git a/Flaskserver/setup.py b/Flaskserver/libs/setup.py similarity index 100% rename from Flaskserver/setup.py rename to Flaskserver/libs/setup.py diff --git a/Flaskserver/utils.py b/Flaskserver/libs/utils.py similarity index 100% rename from Flaskserver/utils.py rename to Flaskserver/libs/utils.py diff --git a/Flaskserver/main.py b/Flaskserver/main.py index 713ce99f4b44d3aa934e26bb66bb0bad72c60fed..fbfcdbad80d524ffdbb862f77a8c30b5b50ef5b9 100644 --- a/Flaskserver/main.py +++ b/Flaskserver/main.py @@ -2,24 +2,12 @@ from flask import Flask, request import numpy as np from flask_cors import CORS from time import time -import pandas as pd import orjson -import bigwig -import bbi -import _ucrdtw -import _lsh -import math -import dask.dataframe as dd import os.path -from random import sample -from DBA_multivariate import performDBA -from tslearn.metrics import dtw -from sklearn import preprocessing -from collections import defaultdict -from dtaidistance import dtw_ndim -from scipy.spatial.distance import euclidean +import pseudo +import preprocessing -from fastdtw import fastdtw +data_path = 'data/processed-data.npy' reload = False @@ -30,478 +18,222 @@ CORS(app) def index(): return "hi" + +""" +Returns raw data + +Output: [{ + index: 1d array [x] + values: 1d array [x] +}] +""" @app.route('/read-data', methods=['GET']) def read_data(): t0 = time() - size = bbi.chromsizes('test.bigWig')['chr1'] - bins = 100000 - data = bigwig.get('test.bigWig', 'chr1', 0, size, bins) - print(data.shape) - response = [ - { - "index": list(range(0, size, int(size/(bins)))), - "values": data.tolist() - }, - { - "index": list(range(0, size, int(size / (bins)))), - "values": data.tolist() - }, - { - "index": list(range(0, size, int(size / (bins)))), - "values": data.tolist() - } - ] + response = preprocessing.read_mts_data() response = orjson.dumps(response) print('Data read: ' + str(time()-t0)) return response -@app.route('/read-mts-data', methods=['GET']) -def read_mts_data(): - filename = 'data.pkl' - if (not os.path.isfile(filename)): - print("start") - df = dd.read_csv("NW_Ground_Stations_2016.csv", usecols=['number_sta', 'date', 't', 'hu', 'td']) - print("read file") - df = df.loc[df['number_sta'].isin([14066001, 14137001, 14216001, 14372001, 22092001, 22113006, 22135001])].fillna(0) - print("split rows") - df = df.compute() - df.to_pickle(filename) - print("to_pandas") - df = pd.read_pickle(filename) - df.dropna(subset=['t'], inplace=True) - response = [ - { - "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(), - "values": df.loc[df['number_sta'] == 14066001].loc[:, 't'].values.tolist() - }, - { - "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(), - "values": df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].values.tolist() - }, - { - "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(), - "values": df.loc[df['number_sta'] == 14066001].loc[:, 'td'].values.tolist() - } - ] - print("response ready") - response = orjson.dumps(response) - return response -@app.route('/create-mts-windows', methods=['POST']) -def create_mts_windows(): - t0 = time() - if (not os.path.isfile('processed-data.npy')): - filename = 'data.pkl' - df = pd.read_pickle(filename) - channels = list() - channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 't'].fillna(0).values.tolist()) - channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].fillna(0).values.tolist()) - channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'td'].fillna(0).values.tolist()) - print("Data read: " + str(time()-t0)) - # raw_data = request.json - window_size = 120 #int(raw_data['parameters']["windowsize"]) - print("Processing: " + str(time()-t0)) - data = [([values[i:i+window_size] for values in channels]) for i in range(0, len(channels[0]) - window_size, 1)] - print("Raw windows: " + str(time()-t0)) - windows = [] - for i in range(len(data)): - if i % 5000 == 0: - print(i) - windows.append(preprocessing.minmax_scale(data[i], (-1, 1), axis=1)) - print("Preprocessed: " + str(time()-t0)) - np.save('processed-data', windows) - # data = np.load('processed-data.npy') - # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0]))) - # r, a, sd = preprocess(data, 11.5) - # np.save('parameters', np.array([r, a, sd])) - print("Sending response: " + str(time()-t0)) - return '1' +""" +Creates windows +Input: { + parameters: { + windowssize: int + } +} +Output: '1' +""" @app.route('/create-windows', methods=['POST']) def create_windows(): t0 = time() - if (not os.path.isfile('processed-data.npy')): - # raw_data = request.json - # window_size = int(raw_data['parameters']["windowsize"]) - window_size = 120 - data = bigwig.chunk( - 'test.bigWig', - 12000, - int(12000 / window_size), - int(12000 / 6), - ['chr1'], - verbose=True, - ) - data = np.reshape(data, (len(data), 1, len(data[0]))) - data2 = np.copy(data) - np.random.shuffle(data2) - data3 = np.copy(data) - np.random.shuffle(data3) - - data = np.concatenate((data, data2), axis=1) - data = np.concatenate((data, data3), axis=1) - # data = np.repeat(data, repeats=3, axis=1) - np.save('processed-data', data) - print('Windows created: ' + str(time()-t0)) - return '1' - -@app.route('/create-test-windows', methods=['POST']) -def create_test_windows(): - t0 = time() - if (not os.path.isfile('processed-data.npy')): - datafile = '21.csv' - - data = pd.read_csv(datafile, header=None) - - # and convert it to numpy array: - npdata = np.array(data) - print('data loaded') - window_data = [npdata[i:i + 120, 0:5] for i in range(0, npdata.shape[0] - 120, int(120 / 8))] - del npdata - print('data created') - np_window_data = np.repeat(window_data, repeats=3, axis=0) - print(np_window_data.shape) - del window_data - data = np.reshape(np_window_data, (len(np_window_data), 5, len(np_window_data[0]))) - print(data.shape) - np.save('processed-data', data) + if (not os.path.isfile(data_path)): + raw_data = request.json + window_size = int(raw_data['parameters']["windowsize"]) + preprocessing.create_eeg_windows(window_size, 5) print('Windows created: ' + str(time()-t0)) return '1' +""" +Does first iteration of LSH and returns a bunch of useful information + +Input: { + query: 2d array [d][t] +} + +Output: { + hash_functions: 3d array [k][l][d] + candidates: 3d array [k][l][i] + distances: 3d array [k][l][i] + average_candidates: 1d array [i] + average_distances: 1d array [i] + tables: [{ + bucket: 1d array + }] + average_table: { + bucket: 1d array + } + samples: 1d array + parameters: 1d array +} +""" @app.route('/initialize', methods=['POST']) def initialize(): t0 = time() raw_data = orjson.loads(request.data) - data = np.load('processed-data.npy') + data = np.load(data_path) data = np.swapaxes(data, 1, 2) - # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0]))) query = raw_data["query"] query = np.swapaxes(query, 0, 1) - # query = np.reshape(query, (len(query[0]), len(query))) - parameters = preprocess(data) # parameters = np.load('parameters.npy') - r = parameters[0] - a = parameters[1] - sd = parameters[2] - - candidates, distances, hf = _lsh.lsh(data, query, r, a, sd) - print(distances) - - dict = defaultdict(int) - for l in range(len(candidates)): - for k in range(len(candidates[0])): - for i in range(len(candidates[0][0])): - dict[candidates[l][k][i]] += distances[l][k][i] - sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])} - average_candidates = list(sorted_dict.keys()) - average_distances = list(sorted_dict.values()) - - tables = [] - samples_set = set() - candidates = candidates.tolist() - for l in range(len(candidates)): - for k in range(len(candidates[0])): - samples_set.update(candidates[l][k][0:5]) - dict = defaultdict(list) - length = len(distances[l][k]) - median = distances[l][k][math.ceil(length/2)] - stepsize = median / 10 - indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), distances[l][k])) - for i in range(len(candidates[0][0])): - dict[str(indices[i])].append(candidates[l][k][i]) - tables.append(dict) - - samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist() - - - response = { - "hash_functions": hf.reshape((len(candidates) * len(candidates[0]), len(query[0]))).tolist(), - "candidates": candidates, - "tables": tables, - "distances": distances.tolist(), - "samples": list(samples), - "average_candidates": np.array(average_candidates).tolist(), - "average_distances": np.array(average_distances).tolist(), - "parameters": [float(r), float(a), float(sd)] - } - response = orjson.dumps(response) - print('LSH done: ' + str(time()-t0)) - return response -@app.route('/weights', methods=['POST']) -def weights(): - alpha = 0.2 - raw_data = orjson.loads(request.data) - labels = raw_data["labels"] - hash_functions = raw_data["hash_functions"] - query = raw_data["query"] - old_weights = raw_data["weights"] - data = np.load('processed-data.npy') - all_good_windows = data[[[int(index) for index, value in labels.items() if value is True]]] - - good_distances = np.zeros(len(query)) - for window in all_good_windows: - for i in range(len(all_good_windows[0])): - good_distances[i] += _ucrdtw.ucrdtw(query[i], window[i], 0.05, False)[1] - if len(all_good_windows) != 0: - good_distances = np.square(good_distances) - good_distances /= np.sum(good_distances) - good_distances = np.ones(len(query)) - good_distances - good_distances /= np.sum(good_distances) - good_distances *= len(all_good_windows[0]) - good_distances = np.sqrt(good_distances) - - if len(hash_functions) != 0: - summed_hash_functions = np.sum(hash_functions, axis=0) - summed_hash_functions = np.square(summed_hash_functions) - normalized_hash_functions = summed_hash_functions / np.sum(summed_hash_functions) - normalized_hash_functions *= len(hash_functions[0]) - - if len(hash_functions) + len(all_good_windows) == 0: - print("no update") - new_weights = old_weights - elif len(hash_functions) == 0: - print("only windows") - new_weights = alpha * np.array(old_weights) + (1 - alpha) * good_distances - elif len(all_good_windows) == 0: - print("only tables") - new_weights = alpha * np.array(old_weights) + (1 - alpha) * normalized_hash_functions - else: - print("tables & windows") - new_weights = alpha * np.array(old_weights) + 0.5 * (1-alpha) * good_distances + 0.5 * (1-alpha) * normalized_hash_functions - - print(new_weights) - - response = orjson.dumps(new_weights.tolist()) + lsh_data = pseudo.lsh(data, query) + + response = orjson.dumps(lsh_data) + print('LSH done: ' + str(time()-t0)) return response +""" +Does LSH and returns a bunch of useful information + +Input: { + query: 2d array [d][t] +} + +Output: { + hash_functions: 3d array [k][l][d] + candidates: 3d array [k][l][i] + distances: 3d array [k][l][i] + average_candidates: 1d array [i] + average_distances: 1d array [i] + tables: [{ + bucket: 1d array + }] + average_table: { + bucket: 1d array + } + samples: 1d array +} +""" @app.route('/update', methods=['POST']) def update(): t0 = time() raw_data = orjson.loads(request.data) - data = np.load('processed-data.npy') + data = np.load(data_path) data = np.swapaxes(data, 1, 2) - # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0]))) query = raw_data["query"] query = np.swapaxes(query, 0, 1) - # query = np.reshape(query, (len(query[0]), len(query))) weights = raw_data["weights"] parameters = raw_data["parameters"] - candidates, distances, hf = _lsh.lsh(data, query, parameters[0], parameters[1], parameters[2], weights) - dict = defaultdict(int) - for l in range(len(candidates)): - for k in range(len(candidates[0])): - for i in range(len(candidates[0][0])): - dict[candidates[l][k][i]] += distances[l][k][i] - sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])} - average_candidates = list(sorted_dict.keys()) - average_distances = list(sorted_dict.values()) - - tables = [] - samples_set = set() - candidates = candidates.tolist() - for l in range(len(candidates)): - for k in range(len(candidates[0])): - samples_set.update(candidates[l][k][0:5]) - dict = defaultdict(list) - length = len(distances[l][k]) - median = distances[l][k][math.ceil(length/2)] - stepsize = median / 10 - indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), distances[l][k])) - for i in range(len(candidates[0][0])): - dict[str(indices[i])].append(candidates[l][k][i]) - tables.append(dict) - - samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist() - - response = { - "hash_functions": hf.reshape((len(candidates) * len(candidates[0]), len(query[0]))).tolist(), - "candidates": candidates, - "tables": tables, - "samples": list(samples), - "average_candidates": np.array(average_candidates).tolist(), - "average_distances": np.array(average_distances).tolist(), - "distances": distances.tolist(), - } - response = orjson.dumps(response) + lsh_data = pseudo.lsh(data, query, parameters=parameters, weights=weights) + + response = orjson.dumps(lsh_data) print('LSH done: ' + str(time()-t0)) return response + +""" +Calculates new weights for LSH algorithm + +Input: { + labels: 1d array [?] + hash_functions: 2d array [?][d] + query: 2d array [d][t] + weights: 1d array [d] +} + +Output: 1d array [d] +""" +@app.route('/weights', methods=['POST']) +def weights(): + raw_data = orjson.loads(request.data) + labels = raw_data["labels"] + hash_functions = raw_data["hash_functions"] + query = raw_data["query"] + old_weights = raw_data["weights"] + data = np.load(data_path) + + new_weights = pseudo.weights(data, query, old_weights, labels, hash_functions) + + response = orjson.dumps(new_weights) + return response + + +""" +Calculates query based on given indices + +Input: { + indices: 1d array [?] +} + +Output: 2d array [d][t] +""" @app.route('/query', methods=['POST']) def query(): t0 = time() raw_data = orjson.loads(request.data) - windowIndices = raw_data['window'] - if isinstance(windowIndices, int): - output = np.load('processed-data.npy')[windowIndices] - response = orjson.dumps(output.tolist()) - print("Query done: " + str(time() - t0)) - return response - else: - indices = [int(index) for index, value in windowIndices.items() if value is True] - data = np.load('processed-data.npy')[indices] - output = performDBA(data) - response = orjson.dumps(output.tolist()) - print("Query done: " + str(time()-t0)) - return response + window_indices = raw_data['indices'] + data = np.load(data_path) + + response = pseudo.query(data, window_indices) + + response = orjson.dumps(response) + print("Query done: " + str(time() - t0)) + return response + + +""" +Returns values of windows on given indices +Input: { + indices: 1d array [x] +} + +Output: 3d array [x][d][t] +""" @app.route('/window', methods=['POST']) def window(): t0 = time() raw_data = orjson.loads(request.data) indices = raw_data['indices'] - output = np.load('processed-data.npy')[indices] + + output = np.load(data_path)[indices] + response = orjson.dumps(output.tolist()) - print("Query done: " + str(time() - t0)) + print("Window(s) done: " + str(time() - t0)) return response + +""" +Returns additional information on given table + +Input: { + table: 2d array [x][?] +} + +Output: { + prototypes: { + average: 1d array [t] + max: 1d array [t] + min: 1d array [t] + } + distances: 2d array [x][x] +} +""" @app.route('/table-info', methods=['POST']) def table_info(): t0 = time() raw_data = orjson.loads(request.data) - all_windows = raw_data['windows'] - data = np.load('processed-data.npy') - prototypes = [] - for windows in all_windows: - actual_windows = data[windows] - average_values = np.average(actual_windows, 0) - std_values = np.std(actual_windows, 0) - max_values = average_values + std_values - min_values = average_values - std_values - prototypes.append({ - 'average': average_values.tolist(), - 'max': max_values.tolist(), - 'min': min_values.tolist() - }) - # distances = [[dtw(np.array(v["average"]), np.array(w["average"]), global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05 * 120)) for j, w in enumerate(prototypes)] for i, v in enumerate(prototypes)] - response = orjson.dumps({'prototypes': prototypes, 'distances': []}) - print("Averages calculated: " + str(time() - t0)) - return response - -def preprocess(data, r=10.0): - # return 0.10882589134534404, 3.1202154563478928, 0.9705780396843037 - # data = np.load('processed-data.npy') - # data = np.reshape(data, (59999, 20, 120)) - # data = np.repeat(data, repeats=1, axis=1) - subset = [] - t0 = time() - - i = 0 - while i < len(data): - if i % 999 == 0: - print(r) - print(str(i) + ':' + str(len(subset))) - - state = 1 - for s in subset: - if np.linalg.norm(data[i] - data[s]) < r: - state = 0 - break - if state == 1: - subset.append(i) - - i = i + 1 - if i == 10000 and len(subset) < 10: - r = r / 2 - subset = [] - i = 0 - if len(subset) > 200: - r = r + r / 2 - subset = [] - i = 0 - - # subset = sample(list(range(len(data))), 200) - print("r = " + str(r)) - dtw_distances = [] - eq_distances = [] - for i, index_1 in enumerate(subset): - print(i) - for j, index_2 in enumerate(subset): - if index_1 == index_2: - continue - e = np.linalg.norm(data[index_1] - data[index_2]) - if (math.isnan(e) or e == 0): - eq_distances.append(0.0001) - dtw_distances.append(0.0001) - continue - eq_distances.append(e) - d = 0 - # d, _ = fastdtw(data[index_1], data[index_2], dist=euclidean) - d = dtw(data[index_1], data[index_2], global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120)) - # d = _ucrdtw.ucrdtw(data[index_1], data[index_2], 0.05, False)[1] - # d = dtw.dtw(data[index_1], data[index_2], dist_method="Euclidean", window_type="sakoechiba", window_args={"window_size": 120}).distance - dtw_distances.append(d) - - ratios = np.array(dtw_distances)/np.array(eq_distances) - mean_dtw = np.mean(dtw_distances) - sd_dtw = np.std(dtw_distances) - mean_eq = np.mean(eq_distances) - sd_eq = np.std(eq_distances) - a = np.mean(ratios) - sd = np.std(ratios) - theta = mean_dtw + -2.58 * sd_dtw - # theta = mean_eq + -2.58 * sd_eq - r = theta / ((a-sd)*math.sqrt(120)) - if r < 0: - r = mean_dtw / 100 - # r = theta / (math.sqrt(120)) - print('Mean: ' + str(mean_dtw)) - print('Stdev: ' + str(sd_dtw)) - print('Ratio mean: ' + str(a)) - print('Ratio stdev: ' + str(sd)) - print('Theta: ' + str(theta)) - print('r: ' + str(r)) - print('Preprocessing time: ' + str(time() - t0)) - return r, a, sd - -def debug_test_lsh(): - data = np.load('processed-data.npy') - # data = np.repeat(data, repeats=7, axis=1) - print(data.shape) - data = np.reshape(data, (len(data), len(data[0][0]), len(data[0]))) - - r, a, sd = preprocess(data, 11.25) - create_windows() - query_n = 1234 - t0 = time() - query = data[query_n] - data = data.astype('double') - dict = defaultdict(int) - candidates, distances, hf = _lsh.lsh(data, query, r, a, sd) - print("Calculated approximate in: " + str(time()-t0)) - for l in range(len(candidates)): - for k in range(len(candidates[0])): - for i in range(len(candidates[0][0])): - dict[candidates[l][k][i]] += distances[l][k][i] - sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])} - candidates = list(sorted_dict.keys()) - - print(candidates[0:20]) + table = raw_data['table'] + data = np.load(data_path) - t0 = time() - # distances = [dtw_ndim.distance_fast(window, query) for window in data] - distances = [dtw(window, query, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120)) for window in data] - topk_dtw = sorted(range(len(distances)), key=lambda k: distances[k]) - print("Calculated exact dtw in: " + str(time()-t0)) - print(topk_dtw[0:20]) + response = pseudo.table_info(data, table) - t0 = time() - l2distances = [np.linalg.norm(window - query) for window in data] - print("Calculated exact l2 in: " + str(time()-t0)) - - # # distances_ed = [distance.euclidean(query, window) for window in data] - # # topk_ed = sorted(range(len(distances_ed)), key=lambda k: distances_ed[k]) - # - accuracy = 0 - for index in topk_dtw[0:20]: - if index in candidates: - accuracy += 1 - print(accuracy) - -# read_mts_data() -# create_mts_windows() -# debug_test_lsh() \ No newline at end of file + print("Averages calculated: " + str(time() - t0)) + return response \ No newline at end of file diff --git a/Flaskserver/preprocessing.py b/Flaskserver/preprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..fe38926877a790c2944f2962afd2aad93f7ee8c0 --- /dev/null +++ b/Flaskserver/preprocessing.py @@ -0,0 +1,118 @@ +import numpy as np +import pandas as pd +from libs import bigwig +import bbi +import dask.dataframe as dd +import os.path +from sklearn import preprocessing + +def read_data(): + size = bbi.chromsizes('test.bigWig')['chr1'] + bins = 100000 + data = bigwig.get('data/test.bigWig', 'chr1', 0, size, bins) + print(data.shape) + response = [ + { + "index": list(range(0, size, int(size/(bins)))), + "values": data.tolist() + }, + { + "index": list(range(0, size, int(size / (bins)))), + "values": data.tolist() + }, + { + "index": list(range(0, size, int(size / (bins)))), + "values": data.tolist() + } + ] + return response + +def read_mts_data(): + filename = 'data/data.pkl' + if (not os.path.isfile(filename)): + print("start") + df = dd.read_csv("NW_Ground_Stations_2016.csv", usecols=['number_sta', 'date', 't', 'hu', 'td']) + print("read file") + df = df.loc[df['number_sta'].isin([14066001, 14137001, 14216001, 14372001, 22092001, 22113006, 22135001])].fillna(0) + print("split rows") + df = df.compute() + df.to_pickle(filename) + print("to_pandas") + df = pd.read_pickle(filename) + df.dropna(subset=['t'], inplace=True) + response = [ + { + "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(), + "values": df.loc[df['number_sta'] == 14066001].loc[:, 't'].values.tolist() + }, + { + "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(), + "values": df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].values.tolist() + }, + { + "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(), + "values": df.loc[df['number_sta'] == 14066001].loc[:, 'td'].values.tolist() + } + ] + return response + +def create_peax_windows_12kb(window_size): + data = bigwig.chunk( + 'test.bigWig', + 12000, + int(12000 / window_size), + int(12000 / 6), + ['chr1'], + verbose=True, + ) + data = np.reshape(data, (len(data), 1, len(data[0]))) + np.save(data_path, data) + return '1' + +def create_peax_windows_12kb_mts(window_size): + data = bigwig.chunk( + 'test.bigWig', + 12000, + int(12000 / window_size), + int(12000 / 6), + ['chr1'], + verbose=True, + ) + data = np.reshape(data, (len(data), 1, len(data[0]))) + data2 = np.copy(data) + np.random.shuffle(data2) + data3 = np.copy(data) + np.random.shuffle(data3) + + data = np.concatenate((data, data2), axis=1) + data = np.concatenate((data, data3), axis=1) + np.save(data_path, data) + return '1' + +def create_eeg_windows(window_size, nr_of_channels): + datafile = '21.csv' + data = pd.read_csv(datafile, header=None) + npdata = np.array(data) + window_data = [npdata[i:i + window_size, 0:nr_of_channels] for i in range(0, npdata.shape[0] - window_size, int(window_size / 8))] + del npdata + np_window_data = np.repeat(window_data, repeats=3, axis=0) + del window_data + data = np.reshape(np_window_data, (len(np_window_data), nr_of_channels, len(np_window_data[0]))) + np.save(data_path, data) + return '1' + +def create_weather_windows(window_size): + filename = 'data/data.pkl' + df = pd.read_pickle(filename) + channels = list() + channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 't'].fillna(0).values.tolist()) + channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].fillna(0).values.tolist()) + channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'td'].fillna(0).values.tolist()) + data = [([values[i:i+window_size] for values in channels]) for i in range(0, len(channels[0]) - window_size, 1)] + windows = [] + for i in range(len(data)): + if i % 5000 == 0: + print(i) + windows.append(preprocessing.minmax_scale(data[i], (-1, 1), axis=1)) + np.save('processed-data', windows) + return '1' \ No newline at end of file diff --git a/Flaskserver/pseudo.py b/Flaskserver/pseudo.py new file mode 100644 index 0000000000000000000000000000000000000000..9adf51dbafa0847f4b849ca9bbbedc3c205436cf --- /dev/null +++ b/Flaskserver/pseudo.py @@ -0,0 +1,243 @@ +import numpy as np +from time import time +import _ucrdtw +import _lsh +import math +from libs.DBA_multivariate import performDBA +from tslearn.metrics import dtw +from collections import defaultdict + +def lsh(data, query, parameters = None, weights = None): + if parameters is None: + parameters = preprocess(data) + r = parameters[0] + a = parameters[1] + sd = parameters[2] + + if weights is None: + candidates, distances, hf = _lsh.lsh(data, query, r, a, sd) + else: + candidates, distances, hf = _lsh.lsh(data, query, r, a, sd, weights) + + dict = defaultdict(int) + for l in range(len(candidates)): + for k in range(len(candidates[0])): + for i in range(len(candidates[0][0])): + dict[candidates[l][k][i]] += distances[l][k][i] + sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])} + average_candidates = np.array(list(sorted_dict.keys())).tolist() + average_distances = np.array(list(sorted_dict.values())).tolist() + + tables = [] + samples_set = set() + candidates = candidates.tolist() + for l in range(len(candidates)): + for k in range(len(candidates[0])): + samples_set.update(candidates[l][k][0:5]) + dict = defaultdict(list) + length = len(distances[l][k]) + median = distances[l][k][math.ceil(length/2)] + stepsize = median / 10 + indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), distances[l][k])) + for i in range(len(candidates[0][0])): + dict[str(indices[i])].append(candidates[l][k][i]) + tables.append(dict) + + length = len(average_distances) + median = average_distances[math.ceil(length/2)] + stepsize = median / 10 + indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), average_distances)) + average_table = defaultdict(list) + for i in range(len(average_candidates)): + average_table[str(indices[i])].append(average_candidates[i]) + + samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist() + + + response = { + "hash_functions": hf.reshape((len(candidates) * len(candidates[0]), len(query[0]))).tolist(), + "candidates": candidates, + "distances": distances.tolist(), + "average_candidates": average_candidates, + "average_distances": average_distances, + "tables": tables, + "average_table": average_table, + "samples": list(samples), + "parameters": [float(r), float(a), float(sd)] + } + return response + +def preprocess(data, r=10.0): + subset = [] + t0 = time() + + i = 0 + while i < len(data): + if i % 999 == 0: + print(r) + print(str(i) + ':' + str(len(subset))) + + state = 1 + for s in subset: + if np.linalg.norm(data[i] - data[s]) < r: + state = 0 + break + if state == 1: + subset.append(i) + + i = i + 1 + if i == 10000 and len(subset) < 10: + r = r / 2 + subset = [] + i = 0 + if len(subset) > 200: + r = r + r / 2 + subset = [] + i = 0 + + # subset = sample(list(range(len(data))), 200) + print("r = " + str(r)) + dtw_distances = [] + eq_distances = [] + for i, index_1 in enumerate(subset): + print(i) + for j, index_2 in enumerate(subset): + if index_1 == index_2: + continue + e = np.linalg.norm(data[index_1] - data[index_2]) + if (math.isnan(e) or e == 0): + eq_distances.append(0.0001) + dtw_distances.append(0.0001) + continue + eq_distances.append(e) + d = dtw(data[index_1], data[index_2], global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120)) + dtw_distances.append(d) + + ratios = np.array(dtw_distances)/np.array(eq_distances) + mean_dtw = np.mean(dtw_distances) + sd_dtw = np.std(dtw_distances) + mean_eq = np.mean(eq_distances) + sd_eq = np.std(eq_distances) + a = np.mean(ratios) + sd = np.std(ratios) + theta = mean_dtw + -2.58 * sd_dtw + # theta = mean_eq + -2.58 * sd_eq + r = theta / ((a-sd)*math.sqrt(120)) + if r < 0: + r = mean_dtw / 100 + # r = theta / (math.sqrt(120)) + print('Mean: ' + str(mean_dtw)) + print('Stdev: ' + str(sd_dtw)) + print('Ratio mean: ' + str(a)) + print('Ratio stdev: ' + str(sd)) + print('Theta: ' + str(theta)) + print('r: ' + str(r)) + print('Preprocessing time: ' + str(time() - t0)) + return r, a, sd + +def weights(data, query, old_weights, labels, hash_functions): + alpha = 0.2 + all_good_windows = data[[[int(index) for index, value in labels.items() if value is True]]] + + good_distances = np.zeros(len(query)) + for window in all_good_windows: + for i in range(len(all_good_windows[0])): + good_distances[i] += _ucrdtw.ucrdtw(query[i], window[i], 0.05, False)[1] + if len(all_good_windows) != 0: + good_distances = np.square(good_distances) + if np.sum(good_distances) != 0: + good_distances /= np.sum(good_distances) + good_distances = np.ones(len(query)) - good_distances + good_distances /= np.sum(good_distances) + good_distances *= len(all_good_windows[0]) + good_distances = np.sqrt(good_distances) + + if len(hash_functions) != 0: + summed_hash_functions = np.sum(hash_functions, axis=0) + summed_hash_functions = np.square(summed_hash_functions) + normalized_hash_functions = summed_hash_functions / np.sum(summed_hash_functions) + normalized_hash_functions *= len(hash_functions[0]) + + if len(hash_functions) + len(all_good_windows) == 0: + print("no update") + new_weights = old_weights + elif len(hash_functions) == 0: + print("only windows") + new_weights = alpha * np.array(old_weights) + (1 - alpha) * good_distances + elif len(all_good_windows) == 0: + print("only tables") + new_weights = alpha * np.array(old_weights) + (1 - alpha) * normalized_hash_functions + else: + print("tables & windows") + new_weights = alpha * np.array(old_weights) + 0.5 * (1-alpha) * good_distances + 0.5 * (1-alpha) * normalized_hash_functions + + print(new_weights) + return new_weights.tolist() + +def table_info(data, table): + prototypes = [] + for cluster in table: + windows = data[cluster] + average_values = np.average(windows, 0) + std_values = np.std(windows, 0) + max_values = average_values + std_values + min_values = average_values - std_values + prototypes.append({ + 'average': average_values.tolist(), + 'max': max_values.tolist(), + 'min': min_values.tolist() + }) + # distances = [[dtw(np.array(v["average"]), np.array(w["average"]), global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05 * 120)) for j, w in enumerate(prototypes)] for i, v in enumerate(prototypes)] + return {'prototypes': prototypes, 'distances': []} + +def query(data, window_indices): + if isinstance(window_indices, int): + output = data[window_indices] + else: + indices = [int(index) for index, value in window_indices.items() if value is True] + indices_windows = data[indices] + output = performDBA(indices_windows) + return output.tolist() + +def debug_test_lsh(): + data = np.load('processed-data.npy') + # data = np.repeat(data, repeats=7, axis=1) + print(data.shape) + data = np.reshape(data, (len(data), len(data[0][0]), len(data[0]))) + + r, a, sd = preprocess(data, 11.25) + query_n = 1234 + t0 = time() + query = data[query_n] + data = data.astype('double') + dict = defaultdict(int) + candidates, distances, hf = _lsh.lsh(data, query, r, a, sd) + print("Calculated approximate in: " + str(time()-t0)) + for l in range(len(candidates)): + for k in range(len(candidates[0])): + for i in range(len(candidates[0][0])): + dict[candidates[l][k][i]] += distances[l][k][i] + sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])} + candidates = list(sorted_dict.keys()) + + print(candidates[0:20]) + + t0 = time() + # distances = [dtw_ndim.distance_fast(window, query) for window in data] + distances = [dtw(window, query, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120)) for window in data] + topk_dtw = sorted(range(len(distances)), key=lambda k: distances[k]) + print("Calculated exact dtw in: " + str(time()-t0)) + print(topk_dtw[0:20]) + + t0 = time() + l2distances = [np.linalg.norm(window - query) for window in data] + print("Calculated exact l2 in: " + str(time()-t0)) + + # # distances_ed = [distance.euclidean(query, window) for window in data] + # # topk_ed = sorted(range(len(distances_ed)), key=lambda k: distances_ed[k]) + # + accuracy = 0 + for index in topk_dtw[0:20]: + if index in candidates: + accuracy += 1 + print(accuracy) diff --git a/Flaskserver/topk.npy b/Flaskserver/topk.npy deleted file mode 100644 index cd3b20f8532b120360e57d68e3a2fed81fa534bc..0000000000000000000000000000000000000000 --- a/Flaskserver/topk.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:76c9d862591f8291da412257fb4eff58ec5f567d7c7b14a46de3d5269958c863 -size 997096