diff --git a/AngularApp/prototype/src/app/api.service.ts b/AngularApp/prototype/src/app/api.service.ts index e54f93dad0dc5b2cd16a1441c32fa5a1eec16a58..de3bedb569c63ce572aa63a1a1dd56cccba1eed4 100644 --- a/AngularApp/prototype/src/app/api.service.ts +++ b/AngularApp/prototype/src/app/api.service.ts @@ -6,9 +6,11 @@ export interface RawData { } export interface LshData { - candidates: {[bucket: string]: number[]}[]; + candidates: number[][][]; + tables: {[bucket: string]: number[]}[]; average_candidates: number[]; average_distances: number[]; + samples: number[]; distances: number[][][]; hash_functions: number[][][][]; parameters?: number[]; @@ -62,6 +64,18 @@ export class ApiService { return await response.json(); } + async getWeights(query: number[][], labels: {[index: number]: boolean}, weights: number[]): Promise<number[]> { + const response = await fetch('http://127.0.0.1:5000/weights', { + method: 'POST', + headers: { + 'Accept': 'application/json', + 'Content-Type': 'application/json' + }, + body: new Blob( [ JSON.stringify({query, labels, weights}) ], { type: 'text/plain' } ) + }); + return await response.json(); + } + // Find candidates using LSH with weights async lshUpdate(query, weights, parameters): Promise<LshData> { const response = await fetch('http://127.0.0.1:5000/update', { diff --git a/AngularApp/prototype/src/app/labeling-window/labeling-window.component.ts b/AngularApp/prototype/src/app/labeling-window/labeling-window.component.ts index 0638fff1d5e23aef9759ccd0b38a79acf09b1e3f..1e3304b02c84b9736d835ede099868a88ba4b548 100644 --- a/AngularApp/prototype/src/app/labeling-window/labeling-window.component.ts +++ b/AngularApp/prototype/src/app/labeling-window/labeling-window.component.ts @@ -10,7 +10,7 @@ export class LabelingWindowComponent implements OnInit { public topk: number[]; public subplots = []; public labels: boolean[] = []; - private k = 12; + private k = 5; constructor(private state: StateService) { } @@ -21,7 +21,7 @@ export class LabelingWindowComponent implements OnInit { async train() { this.state.labels = Object.assign({}, this.state.labels, this.labels); await this.state.getQueryWindow(this.state.labels); - await this.state.update(); + await this.state.update(Object.assign({}, this.labels)); } async updateQuery() { @@ -44,9 +44,14 @@ export class LabelingWindowComponent implements OnInit { } async showSamples() { - this.topk = this.state.lshData.average_candidates - .filter((candidate) => this.state.labels[candidate] !== true) - .slice(0, this.k); + // const allowedCandidates = this.state.lshData.average_candidates.filter((candidate) => this.state.labels[candidate] !== true); + // this.topk = allowedCandidates.slice(0, this.k); + // for (let i = 0; i < this.k; i++) { + // this.topk.push(allowedCandidates[Math.floor(Math.random() * allowedCandidates.length)]); + // } + this.labels = []; + this.topk = this.state.lshData.samples; + this.subplots = []; const values: number[][][] = await this.state.getWindow(this.topk); for (const idx in this.topk) { diff --git a/AngularApp/prototype/src/app/overview-window/overview-window.component.ts b/AngularApp/prototype/src/app/overview-window/overview-window.component.ts index 77e1cf247b4c5c51107701150e8f2f255f5f514b..6d0d25d20d9f7c5b63d6307e1b8751dea09fe0a1 100644 --- a/AngularApp/prototype/src/app/overview-window/overview-window.component.ts +++ b/AngularApp/prototype/src/app/overview-window/overview-window.component.ts @@ -148,7 +148,7 @@ export class OverviewWindowComponent implements OnInit { }; console.log(this.config); console.log("showing plot"); - // await this.debugClicked(); + await this.debugClicked(); } async updatePlot() { @@ -256,7 +256,7 @@ export class OverviewWindowComponent implements OnInit { } async debugClicked() { - const index = 1234; + const index = 6713; await this.state.getQueryWindow(index); await this.state.lshInitial(); } diff --git a/AngularApp/prototype/src/app/state.service.ts b/AngularApp/prototype/src/app/state.service.ts index 495a99b794da44158328a072fbd844b6eab5460e..2b9673f992618a5e44c2af30f8cb86e6a7821517 100644 --- a/AngularApp/prototype/src/app/state.service.ts +++ b/AngularApp/prototype/src/app/state.service.ts @@ -13,6 +13,7 @@ export class StateService { private _queryWindow: number[][]; private _table: {[bucket: string]: number[]}[]; public _averageTable: {[bucket: string]: number[]}; + private _weights: number[]; private _currentTab: number; private _labels = {}; @@ -69,18 +70,20 @@ export class StateService { this.lshData = await this.api.lshInitial(this._queryWindow); console.log('data loaded'); this._lshParameters = this.lshData.parameters; + this._weights = [1, 1, 1]; this.createTable(); } - async update(): Promise<void> { - this.lshData = await this.api.lshUpdate(this._queryWindow, [], this._lshParameters); + async update(labels): Promise<void> { + this._weights = await this.api.getWeights(this._queryWindow, labels, this._weights); + console.log(this._weights); + this.lshData = await this.api.lshUpdate(this._queryWindow, this._weights, this._lshParameters); this.createTable(); } - async getTableInfo(): Promise<TableInfoData> { - this.tableInfo = await this.api.getTableInfo(Object.values(this._averageTable)); - console.log(this.tableInfo); - return this.tableInfo; + async getTableInfo(table: number[][]): Promise<TableInfoData> { + // console.log(this.tableInfo); + return await this.api.getTableInfo(table); } async getQueryWindow(windowIndex: number | {[index: number]: boolean}): Promise<number[][]> { @@ -93,9 +96,9 @@ export class StateService { return await this.api.getWindowByIndices(indices); } - public createTable() { + async createTable() { console.log('setting table param'); - this.table = this.lshData.candidates; + this.table = this.lshData.tables; console.log('table param set'); const averageTable = {}; const length = this.lshData.average_distances.length; @@ -111,7 +114,7 @@ export class StateService { }); this._averageTable = averageTable; console.log('table created'); - this.getTableInfo(); + this.tableInfo = await this.getTableInfo(Object.values(this._averageTable)); } public set rawData(v: RawData[]) { diff --git a/AngularApp/prototype/src/app/table-overview/table-overview.component.html b/AngularApp/prototype/src/app/table-overview/table-overview.component.html index 5b1e6ac5556e3d96096b52e2e8cd3e4534deb057..bb66931827c3fc3c571a7c1d57b3649741003408 100644 --- a/AngularApp/prototype/src/app/table-overview/table-overview.component.html +++ b/AngularApp/prototype/src/app/table-overview/table-overview.component.html @@ -6,7 +6,7 @@ </div> <div class="plots"> <div class="subplot" *ngFor="let subplot of averages"> - <plotly-plot class="plotly-plot" [data]="subplot.data" [layout]="subplot.layout"></plotly-plot> + <plotly-plot class="plotly-plot" [data]="subplot" [layout]="layout"></plotly-plot> <!-- <div class="button-holder">--> <!-- <button class="query-button" (click)="setQuery(subplot.data)">⇄</button>--> <!-- </div>--> diff --git a/AngularApp/prototype/src/app/table-overview/table-overview.component.ts b/AngularApp/prototype/src/app/table-overview/table-overview.component.ts index 0ba53144d34a8947226af15b1b34b2c9a8ae9c40..f8474a2de1f6f15eb83ab730c9e6567d3abdc3ad 100644 --- a/AngularApp/prototype/src/app/table-overview/table-overview.component.ts +++ b/AngularApp/prototype/src/app/table-overview/table-overview.component.ts @@ -9,65 +9,106 @@ import {StateService} from '../state.service'; export class TableOverviewComponent implements OnInit { public subplots; public averages; + public layout; constructor(private state: StateService) { } ngOnInit(): void { - this.state.onNewTable.subscribe(() => this.createHistograms()); - // this.state.onNewTableInfo.subscribe(() => this.createPrototypes()); + this.state.onNewTable.subscribe(() => { + this.createHistograms(); + this.createPrototypes(); + }); } - createPrototypes(): void { - this.averages = this.state.tableInfo.prototypes.map(prototype => { - return { - data: [ - { - x: [...Array(prototype.average.length).keys()], - y: prototype.average, - type: 'line', - }, - { - x: [...Array(prototype.average.length).keys()], - y: prototype.max, - type: 'scatter', - fill: null, - mode: 'lines', - line: { - color: 'rgb(55, 128, 191)', - width: 3 - } - }, - { - x: [...Array(prototype.average.length).keys()], - y: prototype.min, - type: 'scatter', - fill: 'tonexty', - mode: 'lines', - line: { - color: 'rgb(55, 128, 191)', - width: 3 - } + async createPrototypes(): Promise<void> { + const representatives: number[][] = []; + this.state.lshData.candidates.forEach((grouphash) => { + grouphash.forEach((candidates) => { + representatives.push(candidates.slice(0, 20)); + }); + }); + const prototypes = await this.state.getTableInfo(representatives); + const subplots = []; + this.averages = prototypes.prototypes.map((prototype) => { + const channelData = []; + prototype.max.forEach((channel, index) => { + channelData.push({ + x: [...Array(channel.length).keys()], + y: channel, + xaxis: 'x', + yaxis: `y${index + 2}`, + type: 'scatter', + fill: null, + mode: 'lines', + line: { + color: 'rgb(55, 128, 191)', + width: 3 } - ], - layout: { - showlegend: false, - hovermode: 'closest', - autosize: true, - margin: { - l: 10, - r: 10, - t: 10, - b: 10, - pad: 4 - }, - xaxis: { - showticklabels: false - }, - yaxis: { - showticklabels: false - }, - height: 100, - width: screen.width * 0.1, - } + }); + }); + prototype.min.forEach((channel, index) => { + channelData.push({ + x: [...Array(channel.length).keys()], + y: channel, + xaxis: 'x', + yaxis: `y${index + 2}`, + type: 'scatter', + fill: 'tonexty', + mode: 'lines', + line: { + color: 'rgb(55, 128, 191)', + width: 3 + } + }); + }); + prototype.average.forEach((channel, index) => { + channelData.push({ + x: [...Array(channel.length).keys()], + y: channel, + xaxis: 'x', + yaxis: `y${index + 2}`, + type: 'line', + line: { + color: 'red', + width: 3 + } + }); + }); + return channelData; + }); + for (let index = 0; index < this.state.queryWindow.length; index++) { + subplots.push([`xy${index + 2}`]); + } + this.layout = { + grid: { + rows: this.state.queryWindow.length, + columns: 1, + subplots: subplots, + }, + showlegend: false, + hovermode: 'closest', + autosize: true, + margin: { + l: 10, + r: 10, + t: 30, + pad: 4 + }, + xaxis: { + showgrid: false, + zeroline: false, + showticklabels: false, + }, + yaxis: { + zeroline: false, + showticklabels: false, + }, + height: 300, + width: screen.width * 0.1, + }; + this.state.queryWindow.forEach((channel: number[], index: number) => { + this.layout[`yaxis${index + 2}`] = { + zeroline: false, + showticklabels: false, }; }); } diff --git a/Flaskserver/.idea/workspace.xml b/Flaskserver/.idea/workspace.xml index 721dc20a7acdf4a05afd2771b946c33a4e879705..06c6e2ef8bd653a41119e755133b79f8b2e9fea8 100644 --- a/Flaskserver/.idea/workspace.xml +++ b/Flaskserver/.idea/workspace.xml @@ -23,12 +23,16 @@ <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/api.service.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/api.service.ts" afterDir="false" /> <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/labeling-window/labeling-window.component.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/labeling-window/labeling-window.component.ts" afterDir="false" /> <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/progress-view/progress-view.component.css" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/progress-view/progress-view.component.css" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/progress-view/progress-view.component.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/progress-view/progress-view.component.ts" afterDir="false" /> <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/state.service.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/state.service.ts" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/table-overview/table-overview.component.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/table-overview/table-overview.component.ts" afterDir="false" /> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/_lsh.cpython-38-x86_64-linux-gnu.so" beforeDir="false" afterPath="$PROJECT_DIR$/_lsh.cpython-38-x86_64-linux-gnu.so" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/build/lib.linux-x86_64-3.8/_lsh.cpython-38-x86_64-linux-gnu.so" beforeDir="false" afterPath="$PROJECT_DIR$/build/lib.linux-x86_64-3.8/_lsh.cpython-38-x86_64-linux-gnu.so" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/build/temp.linux-x86_64-3.8/locality-sensitive-hashing-visual-analytics/lsh-fast/_lsh.o" beforeDir="false" afterPath="$PROJECT_DIR$/build/temp.linux-x86_64-3.8/locality-sensitive-hashing-visual-analytics/lsh-fast/_lsh.o" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/build/temp.linux-x86_64-3.8/locality-sensitive-hashing-visual-analytics/lsh-fast/lsh.o" beforeDir="false" afterPath="$PROJECT_DIR$/build/temp.linux-x86_64-3.8/locality-sensitive-hashing-visual-analytics/lsh-fast/lsh.o" afterDir="false" /> <change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/processed-data.npy" beforeDir="false" /> + <change beforePath="$PROJECT_DIR$/../experiments/.ipynb_checkpoints/EEG data test-checkpoint.ipynb" beforeDir="false" afterPath="$PROJECT_DIR$/../experiments/.ipynb_checkpoints/EEG data test-checkpoint.ipynb" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/../experiments/EEG data test.ipynb" beforeDir="false" afterPath="$PROJECT_DIR$/../experiments/EEG data test.ipynb" afterDir="false" /> </list> <option name="SHOW_DIALOG" value="false" /> <option name="HIGHLIGHT_CONFLICTS" value="true" /> @@ -48,7 +52,7 @@ <property name="RunOnceActivity.OpenProjectViewOnStart" value="true" /> <property name="SHARE_PROJECT_CONFIGURATION_FILES" value="true" /> <property name="WebServerToolWindowFactoryState" value="false" /> - <property name="last_opened_file_path" value="$PROJECT_DIR$/../../hashing-based-network-discovery-master" /> + <property name="last_opened_file_path" value="$PROJECT_DIR$" /> <property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" /> <property name="nodejs_npm_path_reset_for_default_project" value="true" /> </component> @@ -143,10 +147,10 @@ <screen x="72" y="27" width="1848" height="1053" /> </state> <state x="479" y="254" width="1200" height="800" key="DiffContextDialog/72.27.1848.1053@72.27.1848.1053" timestamp="1603129938934" /> - <state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl" timestamp="1605528689693"> + <state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl" timestamp="1606260652750"> <screen x="72" y="27" width="1848" height="1053" /> </state> - <state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl/72.27.1848.1053@72.27.1848.1053" timestamp="1605528689693" /> + <state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl/72.27.1848.1053@72.27.1848.1053" timestamp="1606260652750" /> <state x="659" y="259" width="672" height="678" key="search.everywhere.popup" timestamp="1604929652702"> <screen x="72" y="27" width="1848" height="1053" /> </state> diff --git a/Flaskserver/__pycache__/main.cpython-38.pyc b/Flaskserver/__pycache__/main.cpython-38.pyc index 9a8bc0ea78434daa9c2f9526641252400423cfc4..ef36865934ebe23a7a32933d88e7a8fca86fd1f9 100644 Binary files a/Flaskserver/__pycache__/main.cpython-38.pyc and b/Flaskserver/__pycache__/main.cpython-38.pyc differ diff --git a/Flaskserver/_lsh.cpython-38-x86_64-linux-gnu.so b/Flaskserver/_lsh.cpython-38-x86_64-linux-gnu.so index 1ce4cbdb210c48ccba6ee862db8d0b002716780d..7c4f653f514557fd1480603746e2815bd9b585ac 100755 Binary files a/Flaskserver/_lsh.cpython-38-x86_64-linux-gnu.so and b/Flaskserver/_lsh.cpython-38-x86_64-linux-gnu.so differ diff --git a/Flaskserver/build/lib.linux-x86_64-3.8/_lsh.cpython-38-x86_64-linux-gnu.so b/Flaskserver/build/lib.linux-x86_64-3.8/_lsh.cpython-38-x86_64-linux-gnu.so index 1ce4cbdb210c48ccba6ee862db8d0b002716780d..7c4f653f514557fd1480603746e2815bd9b585ac 100755 Binary files a/Flaskserver/build/lib.linux-x86_64-3.8/_lsh.cpython-38-x86_64-linux-gnu.so and b/Flaskserver/build/lib.linux-x86_64-3.8/_lsh.cpython-38-x86_64-linux-gnu.so differ diff --git a/Flaskserver/build/temp.linux-x86_64-3.8/locality-sensitive-hashing-visual-analytics/lsh-fast/_lsh.o b/Flaskserver/build/temp.linux-x86_64-3.8/locality-sensitive-hashing-visual-analytics/lsh-fast/_lsh.o index 0f89449a5a0bd841da42a0f675f3f998f7215dd7..0781171e271ea7cbf6bcac8631bfcbe239ce775f 100644 Binary files a/Flaskserver/build/temp.linux-x86_64-3.8/locality-sensitive-hashing-visual-analytics/lsh-fast/_lsh.o and b/Flaskserver/build/temp.linux-x86_64-3.8/locality-sensitive-hashing-visual-analytics/lsh-fast/_lsh.o differ diff --git a/Flaskserver/build/temp.linux-x86_64-3.8/locality-sensitive-hashing-visual-analytics/lsh-fast/lsh.o b/Flaskserver/build/temp.linux-x86_64-3.8/locality-sensitive-hashing-visual-analytics/lsh-fast/lsh.o index 7e5ab8ae90e3bb350b57378a2f85de76e2e094d4..99a7abdc54bc44a0fd083ffbc3e1c1f7a652eb0c 100644 Binary files a/Flaskserver/build/temp.linux-x86_64-3.8/locality-sensitive-hashing-visual-analytics/lsh-fast/lsh.o and b/Flaskserver/build/temp.linux-x86_64-3.8/locality-sensitive-hashing-visual-analytics/lsh-fast/lsh.o differ diff --git a/Flaskserver/main.py b/Flaskserver/main.py index 4083079ca98e18d650cfa34a0292de2322b2f109..a313dc388cdb4f7210d6b9d206754940a58eb7da 100644 --- a/Flaskserver/main.py +++ b/Flaskserver/main.py @@ -175,15 +175,19 @@ def initialize(): t0 = time() raw_data = orjson.loads(request.data) data = np.load('processed-data.npy') - data = np.reshape(data, (len(data), len(data[0][0]), len(data[0]))) + data = np.swapaxes(data, 1, 2) + # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0]))) query = raw_data["query"] - query = np.reshape(query, (len(query[0]), len(query))) - parameters = np.load('parameters.npy') + query = np.swapaxes(query, 0, 1) + # query = np.reshape(query, (len(query[0]), len(query))) + parameters = preprocess(data) + # parameters = np.load('parameters.npy') r = parameters[0] a = parameters[1] sd = parameters[2] candidates, distances, hf = _lsh.lsh(data, query, r, a, sd) + print(distances) dict = defaultdict(int) for l in range(len(candidates)): @@ -195,9 +199,11 @@ def initialize(): average_distances = list(sorted_dict.values()) tables = [] + samples_set = set() candidates = candidates.tolist() for l in range(len(candidates)): for k in range(len(candidates[0])): + samples_set.update(candidates[l][k][0:5]) dict = defaultdict(list) length = len(distances[l][k]) median = distances[l][k][math.ceil(length/2)] @@ -207,13 +213,17 @@ def initialize(): dict[str(indices[i])].append(candidates[l][k][i]) tables.append(dict) + samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist() + response = { "hash_functions": hf.tolist(), - "candidates": tables, + "candidates": candidates, + "tables": tables, + "distances": distances.tolist(), + "samples": list(samples), "average_candidates": np.array(average_candidates).tolist(), "average_distances": np.array(average_distances).tolist(), - "distances": distances.tolist(), "parameters": [float(r), float(a), float(sd)] } response = orjson.dumps(response) @@ -222,12 +232,40 @@ def initialize(): @app.route('/weights', methods=['POST']) def weights(): + alpha = 0.2 raw_data = orjson.loads(request.data) - parameters = raw_data["labels"] + labels = raw_data["labels"] + query = raw_data["query"] + old_weights = raw_data["weights"] + data = np.load('processed-data.npy') + all_good_windows = data[[[int(index) for index, value in labels.items() if value is True]]] + all_bad_windows = data[[[int(index) for index, value in labels.items() if value is False]]] + + good_distances = np.zeros(len(query)) + for window in all_good_windows: + for i in range(len(all_good_windows[0])): + good_distances[i] += _ucrdtw.ucrdtw(query[i], window[i], 0.05, False)[1] + if len(all_good_windows) != 0: + good_distances /= np.sum(good_distances) + good_distances = np.ones(len(query)) - good_distances + good_distances /= np.sum(good_distances) + good_distances *= len(all_good_windows[0]) + good_distances = np.sqrt(good_distances) + good_distances = alpha * np.array(old_weights) + (1-alpha) * good_distances + + # bad_distances = np.zeros(len(query)) + # for window in all_bad_windows: + # for i in range(len(all_bad_windows[0])): + # bad_distances[i] += _ucrdtw.ucrdtw(query[i], window[i], 0.05, False)[1] + # if len(all_bad_windows) != 0: + # bad_distances /= np.sum(bad_distances) + # bad_distances = np.ones(len(query)) - bad_distances + + print(good_distances) # Caculate weights - response = weights + response = orjson.dumps(good_distances.tolist()) return response @@ -236,13 +274,15 @@ def update(): t0 = time() raw_data = orjson.loads(request.data) data = np.load('processed-data.npy') - data = np.reshape(data, (len(data), len(data[0][0]), len(data[0]))) + data = np.swapaxes(data, 1, 2) + # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0]))) query = raw_data["query"] - query = np.reshape(query, (len(query[0]), len(query))) + query = np.swapaxes(query, 0, 1) + # query = np.reshape(query, (len(query[0]), len(query))) weights = raw_data["weights"] parameters = raw_data["parameters"] - candidates, distances, hf = _lsh.lsh(data, query, parameters[0], parameters[1], parameters[2]) + candidates, distances, hf = _lsh.lsh(data, query, parameters[0], parameters[1], parameters[2], weights) dict = defaultdict(int) for l in range(len(candidates)): for k in range(len(candidates[0])): @@ -253,9 +293,11 @@ def update(): average_distances = list(sorted_dict.values()) tables = [] + samples_set = set() candidates = candidates.tolist() for l in range(len(candidates)): for k in range(len(candidates[0])): + samples_set.update(candidates[l][k][0:5]) dict = defaultdict(list) length = len(distances[l][k]) median = distances[l][k][math.ceil(length/2)] @@ -265,9 +307,13 @@ def update(): dict[str(indices[i])].append(candidates[l][k][i]) tables.append(dict) + samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist() + response = { "hash_functions": hf.tolist(), - "candidates": tables, + "candidates": candidates, + "tables": tables, + "samples": list(samples), "average_candidates": np.array(average_candidates).tolist(), "average_distances": np.array(average_distances).tolist(), "distances": distances.tolist(), @@ -322,8 +368,8 @@ def table_info(): 'max': max_values.tolist(), 'min': min_values.tolist() }) - distances = [[dtw(np.array(v["average"]), np.array(w["average"]), global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05 * 120)) for j, w in enumerate(prototypes)] for i, v in enumerate(prototypes)] - response = orjson.dumps({'prototypes': prototypes, 'distances': distances}) + # distances = [[dtw(np.array(v["average"]), np.array(w["average"]), global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05 * 120)) for j, w in enumerate(prototypes)] for i, v in enumerate(prototypes)] + response = orjson.dumps({'prototypes': prototypes, 'distances': []}) print("Averages calculated: " + str(time() - t0)) return response diff --git a/Flaskserver/processed-data.npy b/Flaskserver/processed-data.npy index 1605f90d42d30125534e5f1c7f77e97f9facb3e1..9c7f6cabbd615eefbc8dc2a2c297eec9a98bffeb 100644 Binary files a/Flaskserver/processed-data.npy and b/Flaskserver/processed-data.npy differ diff --git a/experiments/.ipynb_checkpoints/EEG data test-checkpoint.ipynb b/experiments/.ipynb_checkpoints/EEG data test-checkpoint.ipynb index 9c4242682d04aa70f9006f47cc0405f18d9c97a2..e5df9079235124ac8d1667e115bbdd3be0401dc6 100644 --- a/experiments/.ipynb_checkpoints/EEG data test-checkpoint.ipynb +++ b/experiments/.ipynb_checkpoints/EEG data test-checkpoint.ipynb @@ -2,9 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 26, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -12,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -39,45 +48,30 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "(59999, 74, 120)\n" + "(59999, 120, 40)\n" ] } ], "source": [ - "window_data = [npdata[i:i+120] for i in range(0, npdata.shape[0]-120, int(120/8))]\n", + "window_data = [npdata[i:i+120, 0:40] for i in range(0, npdata.shape[0]-120, int(120/8))]\n", + "del npdata\n", "data = np.reshape(window_data, (len(window_data), len(window_data[0][0]), len(window_data[0])))\n", + "del window_data\n", + "data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))\n", + "# data = np.concatenate((data, data))\n", "print(data.shape)" ] }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(59999, 20, 120)\n" - ] - } - ], - "source": [ - "data = data[:,0:20,:]\n", - "# data = np.concatenate((data, data), axis=0)\n", - "print(data.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -86,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -94,6 +88,129 @@ "output_type": "stream", "text": [ "Preprocessing:\n", + "1730\n", + "0:0\n", + "1730\n", + "999:59\n", + "1730\n", + "1998:70\n", + "1730\n", + "2997:78\n", + "1730\n", + "3996:81\n", + "1730\n", + "4995:81\n", + "1730\n", + "5994:81\n", + "1730\n", + "6993:84\n", + "1730\n", + "7992:84\n", + "1730\n", + "8991:84\n", + "1730\n", + "9990:84\n", + "1730\n", + "10989:84\n", + "1730\n", + "11988:84\n", + "1730\n", + "12987:84\n", + "1730\n", + "13986:91\n", + "1730\n", + "14985:91\n", + "1730\n", + "15984:91\n", + "1730\n", + "16983:91\n", + "1730\n", + "17982:91\n", + "1730\n", + "18981:91\n", + "1730\n", + "19980:95\n", + "1730\n", + "20979:95\n", + "1730\n", + "21978:95\n", + "1730\n", + "22977:95\n", + "1730\n", + "23976:95\n", + "1730\n", + "24975:95\n", + "1730\n", + "25974:95\n", + "1730\n", + "26973:99\n", + "1730\n", + "27972:99\n", + "1730\n", + "28971:99\n", + "1730\n", + "29970:99\n", + "1730\n", + "30969:102\n", + "1730\n", + "31968:102\n", + "1730\n", + "32967:103\n", + "1730\n", + "33966:105\n", + "1730\n", + "34965:105\n", + "1730\n", + "35964:105\n", + "1730\n", + "36963:105\n", + "1730\n", + "37962:109\n", + "1730\n", + "38961:110\n", + "1730\n", + "39960:114\n", + "1730\n", + "40959:114\n", + "1730\n", + "41958:115\n", + "1730\n", + "42957:116\n", + "1730\n", + "43956:116\n", + "1730\n", + "44955:116\n", + "1730\n", + "45954:122\n", + "1730\n", + "46953:126\n", + "1730\n", + "47952:126\n", + "1730\n", + "48951:126\n", + "1730\n", + "49950:128\n", + "1730\n", + "50949:128\n", + "1730\n", + "51948:128\n", + "1730\n", + "52947:128\n", + "1730\n", + "53946:130\n", + "1730\n", + "54945:134\n", + "1730\n", + "55944:134\n", + "1730\n", + "56943:134\n", + "1730\n", + "57942:143\n", + "1730\n", + "58941:143\n", + "1730\n", + "59940:145\n", + "r = 1730\n", "0\n", "1\n", "2\n", @@ -239,69 +356,14 @@ "142\n", "143\n", "144\n", - "145\n", - "146\n", - "147\n", - "148\n", - "149\n", - "150\n", - "151\n", - "152\n", - "153\n", - "154\n", - "155\n", - "156\n", - "157\n", - "158\n", - "159\n", - "160\n", - "161\n", - "162\n", - "163\n", - "164\n", - "165\n", - "166\n", - "167\n", - "168\n", - "169\n", - "170\n", - "171\n", - "172\n", - "173\n", - "174\n", - "175\n", - "176\n", - "177\n", - "178\n", - "179\n", - "180\n", - "181\n", - "182\n", - "183\n", - "184\n", - "185\n", - "186\n", - "187\n", - "188\n", - "189\n", - "190\n", - "191\n", - "192\n", - "193\n", - "194\n", - "195\n", - "196\n", - "197\n", - "198\n", - "199\n", - "Mean: 5913.00974473566\n", - "Stdev: 3955.359094555733\n", - "Ratio mean: 1.0\n", - "Ratio stdev: 1.3531645071565974e-16\n", - "Theta: -4291.816719218132\n", - "r: 0\n", - "Preprocessing time: 4.290005922317505\n", - "Preprocessing done. Took 4.29 seconds (0.1 minutes).\n" + "Mean: 16672.21312363323\n", + "Stdev: 7180.272654591725\n", + "Ratio mean: 0.9379277278060563\n", + "Ratio stdev: 0.15076175892196642\n", + "Theta: -1852.8903252134187\n", + "r: 166.7221312363323\n", + "Preprocessing time: 14.979660749435425\n", + "Preprocessing done. Took 14.98 seconds (0.2 minutes).\n" ] } ], @@ -318,10 +380,37 @@ "\n", "print('Preprocessing:')\n", "t0 = time()\n", - "r,a,sd = preprocess(data, 20)\n", + "r,a,sd = preprocess(data, 1730)\n", "print('Preprocessing done. Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))\n", - "\n", - "for i, target in enumerate(targets[0:1]):\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "doing lsh\n", + "Target #0 done! Took 3.11 seconds (0.1 minutes).\n", + "doing lsh\n", + "Target #1 done! Took 2.91 seconds (0.0 minutes).\n", + "doing lsh\n", + "Target #2 done! Took 2.80 seconds (0.0 minutes).\n", + "doing lsh\n", + "Target #3 done! Took 2.82 seconds (0.0 minutes).\n", + "doing lsh\n", + "Target #4 done! Took 2.85 seconds (0.0 minutes).\n", + "Done! Took 14.50 seconds (0.2 minutes).\n" + ] + } + ], + "source": [ + "t0 = time()\n", + "for i, target in enumerate(targets):\n", " t1 = time()\n", " query = data[target]\n", " print('doing lsh')\n", @@ -342,8 +431,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Done! Took 4.65 seconds (0.1 minutes).\n", - "[43895, 43894, 35703, 27715, 27716, 58230, 27714, 27717, 24030, 50244]\n" + "Target #0 done! Took 5.99 seconds (0.1 minutes).\n", + "Target #1 done! Took 5.71 seconds (0.1 minutes).\n", + "Target #2 done! Took 5.76 seconds (0.1 minutes).\n", + "Target #3 done! Took 5.65 seconds (0.1 minutes).\n", + "Target #4 done! Took 5.84 seconds (0.1 minutes).\n", + "Done! Took 28.96 seconds (0.5 minutes).\n", + "[11975, 18529, 3579, 2144, 18528, 11974, 4602, 11976, 9108, 8084]\n" ] } ], @@ -353,8 +447,11 @@ "from time import time\n", "\n", "t0 = time()\n", - "target = data[targets[0]]\n", - "dtw_distances = [dtw(window, target, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120)) for window in data]\n", + "for i, target in enumerate(targets):\n", + " t1 = time()\n", + " query = data[target]\n", + " dtw_distances = [dtw(window, query, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05)) for window in data]\n", + " print('Target #{} done! Took {:.2f} seconds ({:.1f} minutes).'.format(i, time() - t1, (time() - t1) / 60))\n", "dtw_candidates = sorted(range(len(dtw_distances)), key=lambda k: dtw_distances[k])\n", "print('Done! Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))\n", "print(dtw_candidates[0:10])" @@ -362,9 +459,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[11975, 18529, 4602, 2144, 1325, 14433, 5421, 9108, 5217, 3579]\n" + ] + } + ], "source": [ "from collections import defaultdict\n", "\n", @@ -380,10 +485,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20\n", + "[11975, 18529, 3579, 2144, 18528, 11974, 4602, 11976, 9108, 8084, 4807, 1325, 14433, 5422, 9312, 5421, 4603, 18938, 3578, 9928]\n", + "[11975, 18529, 4602, 2144, 1325, 14433, 5421, 9108, 5217, 3579, 18528, 8084, 4807, 3578, 4603, 59898, 9312, 15662, 4601, 11974]\n" + ] + } + ], "source": [ + "accuracy = 0\n", + "for index in dtw_candidates[0:20]:\n", + " if index in candidates:\n", + " accuracy += 1\n", + "print(accuracy)\n", "accuracy = 0\n", "for index in dtw_candidates[0:20]:\n", " if index in candidates[0:20]:\n", @@ -393,6 +513,23 @@ "print(candidates[0:20])\n" ] }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18218\n" + ] + } + ], + "source": [ + "print(len(candidates))" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/experiments/.ipynb_checkpoints/Update test-checkpoint.ipynb b/experiments/.ipynb_checkpoints/Update test-checkpoint.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..27f5a7e40aad53248d5c5ab7fccd3aecc3577f48 --- /dev/null +++ b/experiments/.ipynb_checkpoints/Update test-checkpoint.ipynb @@ -0,0 +1,546 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(900096, 74)\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "datafile = 'data/21.csv'\n", + "\n", + "data = pd.read_csv(datafile, header=None)\n", + "\n", + "#and convert it to numpy array:\n", + "npdata = np.array(data)\n", + "\n", + "print(npdata.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "window_data = [npdata[i:i+20] for i in range(0, npdata.shape[0]-20, int(20/4))]\n", + "del npdata\n", + "data = np.reshape(window_data, (len(window_data), len(window_data[0][0]), len(window_data[0])))\n", + "del window_data\n", + "data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))\n", + "# data = np.concatenate((data, data))\n", + "print(data.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "targets = [43895, 33430, 42575, 1060, 11975]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Preprocessing:\n", + "1730\n", + "0:0\n", + "1730\n", + "999:59\n", + "1730\n", + "1998:70\n", + "1730\n", + "2997:78\n", + "1730\n", + "3996:81\n", + "1730\n", + "4995:81\n", + "1730\n", + "5994:81\n", + "1730\n", + "6993:84\n", + "1730\n", + "7992:84\n", + "1730\n", + "8991:84\n", + "1730\n", + "9990:84\n", + "1730\n", + "10989:84\n", + "1730\n", + "11988:84\n", + "1730\n", + "12987:84\n", + "1730\n", + "13986:91\n", + "1730\n", + "14985:91\n", + "1730\n", + "15984:91\n", + "1730\n", + "16983:91\n", + "1730\n", + "17982:91\n", + "1730\n", + "18981:91\n", + "1730\n", + "19980:95\n", + "1730\n", + "20979:95\n", + "1730\n", + "21978:95\n", + "1730\n", + "22977:95\n", + "1730\n", + "23976:95\n", + "1730\n", + "24975:95\n", + "1730\n", + "25974:95\n", + "1730\n", + "26973:99\n", + "1730\n", + "27972:99\n", + "1730\n", + "28971:99\n", + "1730\n", + "29970:99\n", + "1730\n", + "30969:102\n", + "1730\n", + "31968:102\n", + "1730\n", + "32967:103\n", + "1730\n", + "33966:105\n", + "1730\n", + "34965:105\n", + "1730\n", + "35964:105\n", + "1730\n", + "36963:105\n", + "1730\n", + "37962:109\n", + "1730\n", + "38961:110\n", + "1730\n", + "39960:114\n", + "1730\n", + "40959:114\n", + "1730\n", + "41958:115\n", + "1730\n", + "42957:116\n", + "1730\n", + "43956:116\n", + "1730\n", + "44955:116\n", + "1730\n", + "45954:122\n", + "1730\n", + "46953:126\n", + "1730\n", + "47952:126\n", + "1730\n", + "48951:126\n", + "1730\n", + "49950:128\n", + "1730\n", + "50949:128\n", + "1730\n", + "51948:128\n", + "1730\n", + "52947:128\n", + "1730\n", + "53946:130\n", + "1730\n", + "54945:134\n", + "1730\n", + "55944:134\n", + "1730\n", + "56943:134\n", + "1730\n", + "57942:143\n", + "1730\n", + "58941:143\n", + "1730\n", + "59940:145\n", + "r = 1730\n", + "0\n", + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n", + "10\n", + "11\n", + "12\n", + "13\n", + "14\n", + "15\n", + "16\n", + "17\n", + "18\n", + "19\n", + "20\n", + "21\n", + "22\n", + "23\n", + "24\n", + "25\n", + "26\n", + "27\n", + "28\n", + "29\n", + "30\n", + "31\n", + "32\n", + "33\n", + "34\n", + "35\n", + "36\n", + "37\n", + "38\n", + "39\n", + "40\n", + "41\n", + "42\n", + "43\n", + "44\n", + "45\n", + "46\n", + "47\n", + "48\n", + "49\n", + "50\n", + "51\n", + "52\n", + "53\n", + "54\n", + "55\n", + "56\n", + "57\n", + "58\n", + "59\n", + "60\n", + "61\n", + "62\n", + "63\n", + "64\n", + "65\n", + "66\n", + "67\n", + "68\n", + "69\n", + "70\n", + "71\n", + "72\n", + "73\n", + "74\n", + "75\n", + "76\n", + "77\n", + "78\n", + "79\n", + "80\n", + "81\n", + "82\n", + "83\n", + "84\n", + "85\n", + "86\n", + "87\n", + "88\n", + "89\n", + "90\n", + "91\n", + "92\n", + "93\n", + "94\n", + "95\n", + "96\n", + "97\n", + "98\n", + "99\n", + "100\n", + "101\n", + "102\n", + "103\n", + "104\n", + "105\n", + "106\n", + "107\n", + "108\n", + "109\n", + "110\n", + "111\n", + "112\n", + "113\n", + "114\n", + "115\n", + "116\n", + "117\n", + "118\n", + "119\n", + "120\n", + "121\n", + "122\n", + "123\n", + "124\n", + "125\n", + "126\n", + "127\n", + "128\n", + "129\n", + "130\n", + "131\n", + "132\n", + "133\n", + "134\n", + "135\n", + "136\n", + "137\n", + "138\n", + "139\n", + "140\n", + "141\n", + "142\n", + "143\n", + "144\n", + "Mean: 16672.21312363323\n", + "Stdev: 7180.272654591725\n", + "Ratio mean: 0.9379277278060563\n", + "Ratio stdev: 0.15076175892196642\n", + "Theta: -1852.8903252134187\n", + "r: 166.7221312363323\n", + "Preprocessing time: 14.979660749435425\n", + "Preprocessing done. Took 14.98 seconds (0.2 minutes).\n" + ] + } + ], + "source": [ + "import sys\n", + "from time import time\n", + "\n", + "sys.path.insert(0, '../Flaskserver')\n", + "import importlib\n", + "from main import preprocess\n", + "import _lsh\n", + "\n", + "topk_dtw = []\n", + "\n", + "print('Preprocessing:')\n", + "t0 = time()\n", + "r,a,sd = preprocess(data, 1730)\n", + "print('Preprocessing done. Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "doing lsh\n", + "Target #0 done! Took 3.11 seconds (0.1 minutes).\n", + "doing lsh\n", + "Target #1 done! Took 2.91 seconds (0.0 minutes).\n", + "doing lsh\n", + "Target #2 done! Took 2.80 seconds (0.0 minutes).\n", + "doing lsh\n", + "Target #3 done! Took 2.82 seconds (0.0 minutes).\n", + "doing lsh\n", + "Target #4 done! Took 2.85 seconds (0.0 minutes).\n", + "Done! Took 14.50 seconds (0.2 minutes).\n" + ] + } + ], + "source": [ + "t0 = time()\n", + "for i, target in enumerate(targets):\n", + " t1 = time()\n", + " query = data[target]\n", + " print('doing lsh')\n", + " lsh_candidates, lsh_distances, _ = _lsh.lsh(data, query, r, a, sd)\n", + "# topk_dtw.append(candidates)\n", + " print('Target #{} done! Took {:.2f} seconds ({:.1f} minutes).'.format(i, time() - t1, (time() - t1) / 60))\n", + " \n", + "# print(candidates[0:10])\n", + "print('Done! Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target #0 done! Took 5.99 seconds (0.1 minutes).\n", + "Target #1 done! Took 5.71 seconds (0.1 minutes).\n", + "Target #2 done! Took 5.76 seconds (0.1 minutes).\n", + "Target #3 done! Took 5.65 seconds (0.1 minutes).\n", + "Target #4 done! Took 5.84 seconds (0.1 minutes).\n", + "Done! Took 28.96 seconds (0.5 minutes).\n", + "[11975, 18529, 3579, 2144, 18528, 11974, 4602, 11976, 9108, 8084]\n" + ] + } + ], + "source": [ + "from scipy.spatial.distance import cdist\n", + "from tslearn.metrics import dtw\n", + "from time import time\n", + "\n", + "t0 = time()\n", + "for i, target in enumerate(targets):\n", + " t1 = time()\n", + " query = data[target]\n", + " dtw_distances = [dtw(window, query, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05)) for window in data]\n", + " print('Target #{} done! Took {:.2f} seconds ({:.1f} minutes).'.format(i, time() - t1, (time() - t1) / 60))\n", + "dtw_candidates = sorted(range(len(dtw_distances)), key=lambda k: dtw_distances[k])\n", + "print('Done! Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))\n", + "print(dtw_candidates[0:10])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[11975, 18529, 4602, 2144, 1325, 14433, 5421, 9108, 5217, 3579]\n" + ] + } + ], + "source": [ + "from collections import defaultdict\n", + "\n", + "dict = defaultdict(int)\n", + "for l in range(len(lsh_candidates)):\n", + " for k in range(len(lsh_candidates[0])):\n", + " for i in range(len(lsh_candidates[0][0])):\n", + " dict[lsh_candidates[l][k][i]] += lsh_distances[l][k][i]\n", + "sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}\n", + "candidates = list(sorted_dict.keys())\n", + "print(candidates[0:10])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20\n", + "16\n", + "[11975, 18529, 3579, 2144, 18528, 11974, 4602, 11976, 9108, 8084, 4807, 1325, 14433, 5422, 9312, 5421, 4603, 18938, 3578, 9928]\n", + "[11975, 18529, 4602, 2144, 1325, 14433, 5421, 9108, 5217, 3579, 18528, 8084, 4807, 3578, 4603, 59898, 9312, 15662, 4601, 11974]\n" + ] + } + ], + "source": [ + "accuracy = 0\n", + "for index in dtw_candidates[0:20]:\n", + " if index in candidates:\n", + " accuracy += 1\n", + "print(accuracy)\n", + "accuracy = 0\n", + "for index in dtw_candidates[0:20]:\n", + " if index in candidates[0:20]:\n", + " accuracy += 1\n", + "print(accuracy)\n", + "print(dtw_candidates[0:20])\n", + "print(candidates[0:20])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18218\n" + ] + } + ], + "source": [ + "print(len(candidates))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/experiments/EEG data test.ipynb b/experiments/EEG data test.ipynb index febfeef58cd37aac72307907e553de7e6afed5bf..f2454028fa2548906c5507e816bd016ab4d93b2e 100644 --- a/experiments/EEG data test.ipynb +++ b/experiments/EEG data test.ipynb @@ -2,9 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 26, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -12,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -39,26 +48,30 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "(59999, 20, 120)\n" + "(59999, 120, 40)\n" ] } ], "source": [ - "window_data = [npdata[i:i+120, 0:20] for i in range(0, npdata.shape[0]-120, int(120/8))]\n", + "window_data = [npdata[i:i+120, 0:40] for i in range(0, npdata.shape[0]-120, int(120/8))]\n", + "del npdata\n", "data = np.reshape(window_data, (len(window_data), len(window_data[0][0]), len(window_data[0])))\n", + "del window_data\n", + "data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))\n", + "# data = np.concatenate((data, data))\n", "print(data.shape)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -67,9 +80,293 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Preprocessing:\n", + "1730\n", + "0:0\n", + "1730\n", + "999:59\n", + "1730\n", + "1998:70\n", + "1730\n", + "2997:78\n", + "1730\n", + "3996:81\n", + "1730\n", + "4995:81\n", + "1730\n", + "5994:81\n", + "1730\n", + "6993:84\n", + "1730\n", + "7992:84\n", + "1730\n", + "8991:84\n", + "1730\n", + "9990:84\n", + "1730\n", + "10989:84\n", + "1730\n", + "11988:84\n", + "1730\n", + "12987:84\n", + "1730\n", + "13986:91\n", + "1730\n", + "14985:91\n", + "1730\n", + "15984:91\n", + "1730\n", + "16983:91\n", + "1730\n", + "17982:91\n", + "1730\n", + "18981:91\n", + "1730\n", + "19980:95\n", + "1730\n", + "20979:95\n", + "1730\n", + "21978:95\n", + "1730\n", + "22977:95\n", + "1730\n", + "23976:95\n", + "1730\n", + "24975:95\n", + "1730\n", + "25974:95\n", + "1730\n", + "26973:99\n", + "1730\n", + "27972:99\n", + "1730\n", + "28971:99\n", + "1730\n", + "29970:99\n", + "1730\n", + "30969:102\n", + "1730\n", + "31968:102\n", + "1730\n", + "32967:103\n", + "1730\n", + "33966:105\n", + "1730\n", + "34965:105\n", + "1730\n", + "35964:105\n", + "1730\n", + "36963:105\n", + "1730\n", + "37962:109\n", + "1730\n", + "38961:110\n", + "1730\n", + "39960:114\n", + "1730\n", + "40959:114\n", + "1730\n", + "41958:115\n", + "1730\n", + "42957:116\n", + "1730\n", + "43956:116\n", + "1730\n", + "44955:116\n", + "1730\n", + "45954:122\n", + "1730\n", + "46953:126\n", + "1730\n", + "47952:126\n", + "1730\n", + "48951:126\n", + "1730\n", + "49950:128\n", + "1730\n", + "50949:128\n", + "1730\n", + "51948:128\n", + "1730\n", + "52947:128\n", + "1730\n", + "53946:130\n", + "1730\n", + "54945:134\n", + "1730\n", + "55944:134\n", + "1730\n", + "56943:134\n", + "1730\n", + "57942:143\n", + "1730\n", + "58941:143\n", + "1730\n", + "59940:145\n", + "r = 1730\n", + "0\n", + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n", + "10\n", + "11\n", + "12\n", + "13\n", + "14\n", + "15\n", + "16\n", + "17\n", + "18\n", + "19\n", + "20\n", + "21\n", + "22\n", + "23\n", + "24\n", + "25\n", + "26\n", + "27\n", + "28\n", + "29\n", + "30\n", + "31\n", + "32\n", + "33\n", + "34\n", + "35\n", + "36\n", + "37\n", + "38\n", + "39\n", + "40\n", + "41\n", + "42\n", + "43\n", + "44\n", + "45\n", + "46\n", + "47\n", + "48\n", + "49\n", + "50\n", + "51\n", + "52\n", + "53\n", + "54\n", + "55\n", + "56\n", + "57\n", + "58\n", + "59\n", + "60\n", + "61\n", + "62\n", + "63\n", + "64\n", + "65\n", + "66\n", + "67\n", + "68\n", + "69\n", + "70\n", + "71\n", + "72\n", + "73\n", + "74\n", + "75\n", + "76\n", + "77\n", + "78\n", + "79\n", + "80\n", + "81\n", + "82\n", + "83\n", + "84\n", + "85\n", + "86\n", + "87\n", + "88\n", + "89\n", + "90\n", + "91\n", + "92\n", + "93\n", + "94\n", + "95\n", + "96\n", + "97\n", + "98\n", + "99\n", + "100\n", + "101\n", + "102\n", + "103\n", + "104\n", + "105\n", + "106\n", + "107\n", + "108\n", + "109\n", + "110\n", + "111\n", + "112\n", + "113\n", + "114\n", + "115\n", + "116\n", + "117\n", + "118\n", + "119\n", + "120\n", + "121\n", + "122\n", + "123\n", + "124\n", + "125\n", + "126\n", + "127\n", + "128\n", + "129\n", + "130\n", + "131\n", + "132\n", + "133\n", + "134\n", + "135\n", + "136\n", + "137\n", + "138\n", + "139\n", + "140\n", + "141\n", + "142\n", + "143\n", + "144\n", + "Mean: 16672.21312363323\n", + "Stdev: 7180.272654591725\n", + "Ratio mean: 0.9379277278060563\n", + "Ratio stdev: 0.15076175892196642\n", + "Theta: -1852.8903252134187\n", + "r: 166.7221312363323\n", + "Preprocessing time: 14.979660749435425\n", + "Preprocessing done. Took 14.98 seconds (0.2 minutes).\n" + ] + } + ], "source": [ "import sys\n", "from time import time\n", @@ -83,10 +380,37 @@ "\n", "print('Preprocessing:')\n", "t0 = time()\n", - "r,a,sd = preprocess(data, 20)\n", + "r,a,sd = preprocess(data, 1730)\n", "print('Preprocessing done. Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))\n", - "\n", - "for i, target in enumerate(targets[0:1]):\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "doing lsh\n", + "Target #0 done! Took 3.11 seconds (0.1 minutes).\n", + "doing lsh\n", + "Target #1 done! Took 2.91 seconds (0.0 minutes).\n", + "doing lsh\n", + "Target #2 done! Took 2.80 seconds (0.0 minutes).\n", + "doing lsh\n", + "Target #3 done! Took 2.82 seconds (0.0 minutes).\n", + "doing lsh\n", + "Target #4 done! Took 2.85 seconds (0.0 minutes).\n", + "Done! Took 14.50 seconds (0.2 minutes).\n" + ] + } + ], + "source": [ + "t0 = time()\n", + "for i, target in enumerate(targets):\n", " t1 = time()\n", " query = data[target]\n", " print('doing lsh')\n", @@ -100,17 +424,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target #0 done! Took 5.99 seconds (0.1 minutes).\n", + "Target #1 done! Took 5.71 seconds (0.1 minutes).\n", + "Target #2 done! Took 5.76 seconds (0.1 minutes).\n", + "Target #3 done! Took 5.65 seconds (0.1 minutes).\n", + "Target #4 done! Took 5.84 seconds (0.1 minutes).\n", + "Done! Took 28.96 seconds (0.5 minutes).\n", + "[11975, 18529, 3579, 2144, 18528, 11974, 4602, 11976, 9108, 8084]\n" + ] + } + ], "source": [ "from scipy.spatial.distance import cdist\n", "from tslearn.metrics import dtw\n", "from time import time\n", "\n", "t0 = time()\n", - "target = data[targets[0]]\n", - "dtw_distances = [dtw(window, target, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120)) for window in data]\n", + "for i, target in enumerate(targets):\n", + " t1 = time()\n", + " query = data[target]\n", + " dtw_distances = [dtw(window, query, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05)) for window in data]\n", + " print('Target #{} done! Took {:.2f} seconds ({:.1f} minutes).'.format(i, time() - t1, (time() - t1) / 60))\n", "dtw_candidates = sorted(range(len(dtw_distances)), key=lambda k: dtw_distances[k])\n", "print('Done! Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))\n", "print(dtw_candidates[0:10])" @@ -118,9 +459,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[11975, 18529, 4602, 2144, 1325, 14433, 5421, 9108, 5217, 3579]\n" + ] + } + ], "source": [ "from collections import defaultdict\n", "\n", @@ -136,10 +485,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20\n", + "16\n", + "[11975, 18529, 3579, 2144, 18528, 11974, 4602, 11976, 9108, 8084, 4807, 1325, 14433, 5422, 9312, 5421, 4603, 18938, 3578, 9928]\n", + "[11975, 18529, 4602, 2144, 1325, 14433, 5421, 9108, 5217, 3579, 18528, 8084, 4807, 3578, 4603, 59898, 9312, 15662, 4601, 11974]\n" + ] + } + ], "source": [ + "accuracy = 0\n", + "for index in dtw_candidates[0:20]:\n", + " if index in candidates:\n", + " accuracy += 1\n", + "print(accuracy)\n", "accuracy = 0\n", "for index in dtw_candidates[0:20]:\n", " if index in candidates[0:20]:\n", @@ -149,6 +514,23 @@ "print(candidates[0:20])\n" ] }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18218\n" + ] + } + ], + "source": [ + "print(len(candidates))" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/experiments/Update test.ipynb b/experiments/Update test.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..9c4e40b88dc61e886cec59a60b83ac1890d77128 --- /dev/null +++ b/experiments/Update test.ipynb @@ -0,0 +1,231 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/dev-laptop/Dylan/locality-sensitive-hashing-visual-analytics/experiments/generator.py:35: RuntimeWarning: divide by zero encountered in double_scalars\n", + " slope = delta / float(length)\n", + "/home/dev-laptop/Dylan/locality-sensitive-hashing-visual-analytics/experiments/generator.py:35: RuntimeWarning: divide by zero encountered in double_scalars\n", + " slope = delta / float(length)\n", + "/home/dev-laptop/Dylan/locality-sensitive-hashing-visual-analytics/experiments/generator.py:35: RuntimeWarning: divide by zero encountered in double_scalars\n", + " slope = delta / float(length)\n", + "/home/dev-laptop/Dylan/locality-sensitive-hashing-visual-analytics/experiments/generator.py:35: RuntimeWarning: divide by zero encountered in double_scalars\n", + " slope = delta / float(length)\n", + "/home/dev-laptop/Dylan/locality-sensitive-hashing-visual-analytics/experiments/generator.py:35: RuntimeWarning: divide by zero encountered in double_scalars\n", + " slope = delta / float(length)\n", + "/home/dev-laptop/Dylan/locality-sensitive-hashing-visual-analytics/experiments/generator.py:35: RuntimeWarning: divide by zero encountered in double_scalars\n", + " slope = delta / float(length)\n" + ] + }, + { + "ename": "OSError", + "evalue": "Failed to interpret file 'samples.pkl' as a pickle", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mEOFError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m~/miniconda3/envs/pseudo/lib/python3.8/site-packages/numpy/lib/npyio.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(file, mmap_mode, allow_pickle, fix_imports, encoding)\u001b[0m\n\u001b[1;32m 446\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 447\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mpickle_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 448\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mEOFError\u001b[0m: Ran out of input", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-1-ce70ae319d2e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mgenerator\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mcreate_new\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mcreate_new\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Dylan/locality-sensitive-hashing-visual-analytics/experiments/generator.py\u001b[0m in \u001b[0;36mcreate_new\u001b[0;34m()\u001b[0m\n\u001b[1;32m 478\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 479\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'single'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 480\u001b[0;31m \u001b[0mgenerator_single\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_samples\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtspan\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mN\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm_range\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ml_min\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdropout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msigma_bs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf_gauss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf_sin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0musuage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcategory\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_plot\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 481\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'pair'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 482\u001b[0m \u001b[0mgenerator_pair\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_samples\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtspan\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mN\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm_range\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ml_min\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdropout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msigma_bs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf_gauss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf_sin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmutate_bs_local\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmutate_seg_types\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmutate_deltas\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0musuage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcategory\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_plot\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Dylan/locality-sensitive-hashing-visual-analytics/experiments/generator.py\u001b[0m in \u001b[0;36mgenerator_single\u001b[0;34m(n_samples, tspan, N, m_range, l_min, dropout, sigma_bs, f_gauss, f_sin, usuage, category, n_plot)\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0msave_file\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'samples.pkl'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msave_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msamples\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 294\u001b[0;31m \u001b[0msamples\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msave_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallow_pickle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 295\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[0;31m# plot n_plot samples\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pseudo/lib/python3.8/site-packages/numpy/lib/npyio.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(file, mmap_mode, allow_pickle, fix_imports, encoding)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mpickle_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 448\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 449\u001b[0;31m raise IOError(\n\u001b[0m\u001b[1;32m 450\u001b[0m \"Failed to interpret file %s as a pickle\" % repr(file))\n\u001b[1;32m 451\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mOSError\u001b[0m: Failed to interpret file 'samples.pkl' as a pickle" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from generator import create_new\n", + "\n", + "create_new()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "window_data = [npdata[i:i+20] for i in range(0, npdata.shape[0]-20, int(20/4))]\n", + "del npdata\n", + "data = np.reshape(window_data, (len(window_data), len(window_data[0][0]), len(window_data[0])))\n", + "del window_data\n", + "data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))\n", + "# data = np.concatenate((data, data))\n", + "print(data.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "targets = [43895, 33430, 42575, 1060, 11975]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from time import time\n", + "\n", + "sys.path.insert(0, '../Flaskserver')\n", + "import importlib\n", + "from main import preprocess\n", + "import _lsh\n", + "\n", + "topk_dtw = []\n", + "\n", + "print('Preprocessing:')\n", + "t0 = time()\n", + "r,a,sd = preprocess(data, 1730)\n", + "print('Preprocessing done. Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sys.path.insert(0, '../Flaskserver')\n", + "import _lsh\n", + "\n", + "t0 = time()\n", + "for i, target in enumerate(targets[0:1]):\n", + " t1 = time()\n", + " query = data[target]\n", + " print('doing lsh')\n", + " lsh_candidates, lsh_distances, _ = _lsh.lsh(data, query, r, a, sd)\n", + "# topk_dtw.append(candidates)\n", + " print('Target #{} done! Took {:.2f} seconds ({:.1f} minutes).'.format(i, time() - t1, (time() - t1) / 60))\n", + " \n", + "# print(candidates[0:10])\n", + "print('Done! Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.spatial.distance import cdist\n", + "from tslearn.metrics import dtw\n", + "from time import time\n", + "\n", + "t0 = time()\n", + "for i, target in enumerate(targets):\n", + " t1 = time()\n", + " query = data[target]\n", + " dtw_distances = [dtw(window, query, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05)) for window in data]\n", + " print('Target #{} done! Took {:.2f} seconds ({:.1f} minutes).'.format(i, time() - t1, (time() - t1) / 60))\n", + "dtw_candidates = sorted(range(len(dtw_distances)), key=lambda k: dtw_distances[k])\n", + "print('Done! Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))\n", + "print(dtw_candidates[0:10])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "\n", + "dict = defaultdict(int)\n", + "for l in range(len(lsh_candidates)):\n", + " for k in range(len(lsh_candidates[0])):\n", + " for i in range(len(lsh_candidates[0][0])):\n", + " dict[lsh_candidates[l][k][i]] += lsh_distances[l][k][i]\n", + "sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}\n", + "candidates = list(sorted_dict.keys())\n", + "print(candidates[0:10])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "accuracy = 0\n", + "for index in dtw_candidates[0:20]:\n", + " if index in candidates:\n", + " accuracy += 1\n", + "print(accuracy)\n", + "accuracy = 0\n", + "for index in dtw_candidates[0:20]:\n", + " if index in candidates[0:20]:\n", + " accuracy += 1\n", + "print(accuracy)\n", + "print(dtw_candidates[0:20])\n", + "print(candidates[0:20])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(len(candidates))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/experiments/__pycache__/generator.cpython-38.pyc b/experiments/__pycache__/generator.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bfb74bd787d589ff27318590017d9897dc7b0755 Binary files /dev/null and b/experiments/__pycache__/generator.cpython-38.pyc differ diff --git a/experiments/generator.py b/experiments/generator.py new file mode 100644 index 0000000000000000000000000000000000000000..15cb65660a333b0009ecfbea3827b13573bcac45 --- /dev/null +++ b/experiments/generator.py @@ -0,0 +1,484 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Jul 25 09:28:32 2018 + +@author: dt3t6ux +""" + +import os +import datetime +import pickle +import copy +import numpy as np +import matplotlib.pyplot as plt +from random import sample +from sklearn.preprocessing import MinMaxScaler +#from itertools import izip + + +#%% +def gen_seg(tspan, seg_0, length, seg_type, delta, f_gauss, f_sin): + """ + generate a single segment + input: + - tspan: length of the whole time series + - seg_0: last value of the last segment + - length: length of the segment + - seg_type: type of the segment + - delta: range of the segment + output: + - seg: values in the segment + - parameter: slope for linear segment, time constand for pt1 and None for step + """ + # linear segment + if seg_type == 0: + slope = delta / float(length) + seg = np.linspace(seg_0, seg_0+delta, length+1)[1:] + parameter = slope + + # pt1 segment + elif seg_type == 1: + # time constant + T = np.random.uniform(low=1., high=tspan/float(10)) + seg = np.array([seg_0 + delta*(1-np.exp(-1./T*k)) for k in range(length)]) + parameter = T + + # step + elif seg_type == 2: + seg = np.full(length, seg_0+delta) + parameter = seg_0+delta + + # gaussian distributed noise + seg += np.random.normal(0, scale=f_gauss*abs(delta), size=length) + + # sinusoidal noises + n_sin = 5 + omegas = np.random.uniform(low=20*np.pi/tspan, high=np.pi, size=n_sin) # at least 10 waves, at most with Nyquist rate (2 Hz) + phases = np.random.uniform(high=2*np.pi, size=n_sin) + for i_sin in range(n_sin): + seg += np.random.normal(0, f_sin*abs(delta)) * np.sin(omegas[i_sin]*np.array(range(length))+phases[i_sin]) + + # return + return seg, parameter + + +#%% +def gen_ts(tspan, N, bs_local, m_local, seg_types, deltas, f_gauss, f_sin): + """ + generate a single time series + """ + # initialize time series + ts = np.zeros((tspan + 1, N)) + + #% initialize the parameter of each segment + parameters = [] + for i in range(N): + parameters.append(np.zeros(m_local[i], dtype=np.float32)) + + # loop to generate data in i-th channel + for i in range(N): + + #% loop to generate data in j-th segment of i-th channel + for j in range(m_local[i]): + + # indeces + b_j_lf = bs_local[i][j] + b_j_rt = bs_local[i][j+1] + + # prepare parameter for gen_seg + seg_0 = ts[b_j_lf, i] + length = b_j_rt - b_j_lf + + # run gen_seg + seg, parameters[i][j] = gen_seg(tspan, seg_0, length, seg_types[i][j], deltas[i][j], f_gauss, f_sin) + + # integrate seg in ts + ts[b_j_lf+1:b_j_rt+1, i] = seg + + # for j==0 when seg_type=0 + if seg_types[i][0]==2: + ts[0 ,i] = ts[1, i] + + return ts, parameters + +#%% +def gen_samples_single(n_samples, tspan, N, m_range, l_min, dropout, sigma_bs, f_gauss, f_sin): + """ + generate data with single time series per sample + return: + samples with + - ts: axis values in time series; 0 - sample, axis 1 - time, axis 2 - channel + - bs_global: array with global boundaries; axis 0 - sample, axis 1 - channel + - bs_local: array with local boundaries; axis 0 - sample, axis 1 - channel + - seg_types: array with segment types; axis 0 - sample, axis 1 - channel + - parameters: array with time constant / slope / step; axis 0 - sample, axis 1 - channel + - scaler + """ + # initialze output data + samples = [] + + #% loop to generate each sample + for i_sample in range(n_samples): + + #% global boundaries + + # randomly initialize number of global boundaries m_global + m_global = int(np.random.uniform(m_range[0], m_range[1])) + + # randomly pick boundaries, length of segment at least l_min + bs_global = np.zeros(m_global+1, dtype=int) + sample_pool = np.arange(l_min, tspan-l_min+1) + for i_m in range(1, m_global): + bs_global[i_m] = 1 + sample_pool = np.setdiff1d(sample_pool, np.arange(bs_global[i_m]-l_min+1, bs_global[i_m]+l_min)) + bs_global[-1] = tspan + bs_global.sort() + + #% local boundaries + + # initialize local boundaries + bs_local = [] + + # dropout + bs_local_drop = [bs_global.copy() for i in range(N)] + # loop for do dropout for each global boundary + for i_m in range(1, m_global): + + # choose one channel to be forced to implement the boundary + channel_forced = np.random.randint(N) + # mark channels to dropout for this boundary i_m as -1 + for i in range(N): + if i == channel_forced: + continue + else: + if np.random.random() < dropout: + bs_local_drop[i][i_m] = -1 + for i in range(N): + bs_local_drop[i] = bs_local_drop[i][bs_local_drop[i] != -1] + + # dispersion + for i in range(N): + + bs_local.append(bs_local_drop[i] + np.random.normal(0, sigma_bs, size=len(bs_local_drop[i]))) + scaler_t = MinMaxScaler(feature_range=(0, tspan)) + bs_local[i] = sorted(np.rint(scaler_t.fit_transform(bs_local[i].reshape(-1, 1)).ravel()).astype(int)) + + #% number of boundaries in each channel + + # initialize segment number of each channel + m_local = np.zeros(N) + + # loop for bounary number in each channel + for i in range(N): + m_local[i] = len(bs_local[i]) - 1 + + m_local = list(map(int, m_local)) + + #% segment type of each segment + + # initialize segment types + seg_types = [] + + # loop for segment types in each channel + for i in range(N): + seg_types.append(np.random.randint(3, size=m_local[i])) + + #% delta of each segment + + # initialize the delta of each channel + deltas = [] + + for i in range(N): + deltas.append(np.random.uniform(low=-1., high=1., size=m_local[i])) + + # generate i_sample-th sample + ts, parameters = gen_ts(tspan, N, bs_local, m_local, seg_types, deltas, f_gauss, f_sin) + + # normalize values in the time series to [0, 1] + scaler = MinMaxScaler() + ts = scaler.fit_transform(ts) + + # delete start and end time point as boundaries + bs_global = bs_global[1:-1] + bs_local = [bs_local[i][1:-1] for i in range(len(bs_local))] + + samples.append({'ts':ts.astype(np.float32), 'bs_global':bs_global, 'bs_local':bs_local, 'seg_types':seg_types, 'deltas':deltas,'parameters':parameters, 'scaler':scaler}) + + return samples + + +#%% +def gen_samples_pair(n_samples, tspan, N, m_range, l_min, dropout, sigma_bs, f_gauss, f_sin, mutate_bs_local, mutate_seg_types, mutate_deltas): + """ + generate data with paired time series per sample + return: + samples with + - ts: axis values in time series; 0 - sample, axis 1 - time, axis 2 - channel + - bs_global: array with global boundaries; axis 0 - sample, axis 1 - channel + - bs_local: array with local boundaries; axis 0 - sample, axis 1 - channel + - seg_types: array with segment types; axis 0 - sample, axis 1 - channel + - parameters: array with time constant / slope / step; axis 0 - sample, axis 1 - channel + - scaler + """ + # generate measurement data + samples_measurement = gen_samples_single(n_samples, tspan, N, m_range, l_min, dropout, sigma_bs, f_gauss, f_sin) + + # initialze simulation data + samples_simulation = [] + + # loop to generate simulation data + for i_sample in range(n_samples): + + # extract bs_global + bs_global = samples_measurement[i_sample]['bs_global'] + + # mutate bs_local + bs_local = copy.deepcopy(samples_measurement[i_sample]['bs_local']) + for i in range(len(bs_local)): + bs_local[i].insert(0, 0) + bs_local[i].append(tspan) + bs_local = [bs_i + np.random.normal(0, mutate_bs_local, size = len(bs_i)) for bs_i in bs_local] + scaler_t = MinMaxScaler(feature_range=(0, tspan)) + bs_local = [sorted(np.rint(scaler_t.fit_transform(bs_local_i.reshape(-1, 1)).ravel()).astype(int)) for bs_local_i in bs_local] + m_local = np.array(map(len, bs_local)) - 1 + + # mutate seg_types + seg_types = samples_measurement[i_sample]['seg_types'] + for i in range(N): + m_i = len(seg_types[i]) + for j in range(m_i): + if np.random.random() < mutate_seg_types: + seg_types[i][j] = np.random.randint(3) + + # mutate deltas + deltas = samples_measurement[i_sample]['deltas'] + for i in range(N): + m_i = len(deltas[i]) + for j in range(m_i): + deltas[i][j] += np.random.uniform(low=-mutate_deltas*deltas[i][j], high=mutate_deltas*deltas[i][j]) + + # generate i_sample-th sample for simulation + ts, parameters = gen_ts(tspan, N, bs_local, m_local, seg_types, deltas, f_gauss, f_sin) + + # normalize values to [0, 1] + scaler = samples_measurement[i_sample]['scaler'] + ts = scaler.transform(ts) + + # delete start and end time point as boundaries + bs_local = [bs_local[i][1:-1] for i in range(len(bs_local))] + + # assenble data for simulation in a variable samples_simulation + samples_simulation.append({'ts':ts.astype(np.float32), 'bs_global':bs_global, 'bs_local':bs_local, 'seg_types':seg_types, 'deltas':deltas,'parameters':parameters, 'scaler':scaler}) + + # integrate simulation in samples + #samples = [{'measurement':sample_measurement_i, 'simulation':sample_simulation_i} for sample_measurement_i, sample_simulation_i in izip(samples_measurement, samples_simulation)] + + return samples + +#%% +def generator_single(n_samples, tspan, N, m_range, l_min, dropout, sigma_bs, f_gauss, f_sin, usuage, category, n_plot): + """ + generate and save data with single time series per sample + output: + - samples data.pickle + """ + + # generate data + samples = gen_samples_single(n_samples, tspan, N, m_range, l_min, dropout, sigma_bs, f_gauss, f_sin) + + # save data + # for test +# save_folder = os.path.join('..', 'data', 'single_'+usuage, category, category+'_' + 'mixed' + '_' + str(N) + '_' + datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")) + save_file = 'samples.pkl' + np.save(save_file, np.array(samples)) + samples = np.load(save_file, allow_pickle=True) + + # plot n_plot samples + for i_plot in n_plot: + t = np.arange(tspan+1) + + sample_i = samples[i_plot] + ts = sample_i['ts'] + bs_global = sample_i['bs_global'] + bs_local = sample_i['bs_local'] + + fig = plt.figure() + + for i in range(N): + + # plot i-th channel of the time series + plt.plot(t, ts[:,i], label='Channel '+str(i+1)) + + # plot local boundaries in i-th channel + b_values_i = ts[bs_local[i], i] + plt.scatter(bs_local[i], b_values_i) + + # plot global boundaries + for b_global in bs_global: + plt.axvline(x=b_global, color = 'y', ls = '--') + + fig.legend(loc=9, bbox_to_anchor=(0.5, 1), ncol=N) + plt.xlabel('Time') + plt.ylabel('Values') + plt.title('Sample '+str(i_plot), pad=40) + plt.grid(True) + plt.show() + plt.close() + + +#%% generate and save data for comparator +def generator_pair(n_samples, tspan, N, m_range, l_min, dropout, sigma_bs, f_gauss, f_sin, mutate_bs_local, mutate_seg_types, mutate_deltas, usuage, category, n_plot): + """ + generate and save data for comparator + output: + - data.pickle: samples + """ + + # generate data + samples = gen_samples_pair(n_samples, tspan, N, m_range, l_min, dropout, sigma_bs, f_gauss, f_sin, mutate_bs_local, mutate_seg_types, mutate_deltas) + + # save data + # for test + # save_folder = os.path.join('..', '..', 'data', 'pair_'+usuage, category, category+'_' + 'mixed' + '_' + str(N) + '_' + datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")) + # os.mkdir(save_folder) + np.save('samples', np.array(samples)) + save_file = 'samples.pkl' + # with open(save_file, 'w') as f: + # pickle.dump(samples, f) + # + # # load data + # with open(save_file, 'r') as f: + # samples = pickle.load(f) + + # plot n_plot samples + t = np.arange(tspan+1) + for i_plot in n_plot: + + fig = plt.figure() + + sample_i = samples[i_plot] + sample_measurement_i = sample_i['measurement'] + sample_simulation_i = sample_i['simulation'] + + bs_global = sample_measurement_i['bs_global'] + + ts_measurement = sample_measurement_i['ts'] + bs_local_measurement = sample_measurement_i['bs_local'] + + ts_simulation = sample_simulation_i['ts'] + bs_local_simulation = sample_simulation_i['bs_local'] + + for i in range(N): + + ax = plt.subplot(N, 1, i+1) + + # plot i-th channel of the time series + plt.plot(t, ts_measurement[:,i]) + plt.plot(t, ts_simulation[:,i]) + + # plot global boundaries +# for b_global in bs_global: +# plt.axvline(x=b_global, color = 'y', ls = '--') + + # values of time series at boundaries in i-th channel + b_values_measurement_i = ts_measurement[bs_local_measurement[i], i] + b_values_simulation_i = ts_simulation[bs_local_simulation[i], i] + + # plot local boundaries in i-th channel + plt.scatter(bs_local_measurement[i], b_values_measurement_i) + plt.scatter(bs_local_simulation[i], b_values_simulation_i) + + if i < N-1: + plt.setp(ax.get_xticklabels(), visible=False) + plt.ylabel('Channel '+str(i+1)) + plt.grid(True) + + fig.legend(('measurement', 'simulation'), loc=9, bbox_to_anchor=(0.5, 1), ncol=2) + plt.xlabel('time') + plt.suptitle('Sample '+str(i_plot), y=1.05) + plt.show() + plt.close() + + +#%% main program +def create_new(): + + #% config (only modify this part for generator) + + # number of time series to generate + n_samples = int(1e1) + + # duration of time + # t_begin = 0, t_end = tspan; default 99 (0 ~ 99s) + tspan = 120 + + # number of channels + N = 3 + + # range of global segment number m_global + # HACK: large l_min and m_range at the same time may cause conflict + # default (3, 10) for single_analyse_cnn, (3, 6) for single_compare_cnn, (3, 5) for other pair + m_range=(3, 6) + + # minimum length of a segment + # HACK: large l_min and m_range at the same time may cause conflict + # default 5 for single_analyse_cnn, 10 for pair + l_min = 10 + + # rate of dropout + # neglection of boundaries, not dropout for NN training; default 0.2 + dropout = 0.2 + + # standard deviation of boundary dispersion + # default 0.5 + sigma_bs = 0.5 + + # factor for the amplitude of sinusoidal noises + # default 0.01 + f_sin = 0.01 + + # factor for the amplitude of gaussian distributed noises + # default 0.01 + f_gauss = 0.01 + + # factor for displacements of local boundaries + # default 2 + mutate_bs_local = 2 + + # probability for mutation of segment types + # default 0.1 (10%) + mutate_seg_types = 0.1 + + # factor for range change of segments + # default 0.2 (20%) + mutate_deltas = 0.2 + + # mode of generator + # 'single': one time series per sample for segmentation; + # 'pair': two similar time series per sample for comparason. + mode = 'single' + + # usuage of data + # used to generate path to save the generated data + # 'analyse_cnn': data used to train the CNN for segmentation; mode must be 'single', default n_samples = 1e6 + # 'analyse': data used to test the performance of the time series analyser; mode must be 'pair' + # 'compare_cnn': data used to train the CNN for identification of segment types; mode must be 'single', default n_sample = + # 'compare': data used to test the performance of time series comparator and interpreter + usuage = 'analyse_cnn' + + # data category + # 'train_raw', 'train', 'valid', 'test_raw', 'test', 'debug'; used in the directory and file name if needed + category = 'debug' + + # number of samples to plot after generation of all samples + # used to get a quick picture of the generated data; default range(3) + n_plot = range(10) + + #% generation process + + if mode == 'single': + generator_single(n_samples, tspan, N, m_range, l_min, dropout, sigma_bs, f_gauss, f_sin, usuage, category, n_plot) + elif mode == 'pair': + generator_pair(n_samples, tspan, N, m_range, l_min, dropout, sigma_bs, f_gauss, f_sin, mutate_bs_local, mutate_seg_types, mutate_deltas, usuage, category, n_plot) + else: + raise ValueError("mode unknown, it can only be 'analyse' or 'compare'") diff --git a/experiments/samples.pkl b/experiments/samples.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/samples.pkl.npy b/experiments/samples.pkl.npy new file mode 100644 index 0000000000000000000000000000000000000000..2c9e45e1dc75939c0448fa97ba763f6fbc23992b Binary files /dev/null and b/experiments/samples.pkl.npy differ