diff --git a/AngularApp/prototype/src/app/api.service.ts b/AngularApp/prototype/src/app/api.service.ts
index 80573ff403c5852ee394d0e328258e02dd98b58a..9b23e33d4dd0a60e37a46dac73ab03b314525498 100644
--- a/AngularApp/prototype/src/app/api.service.ts
+++ b/AngularApp/prototype/src/app/api.service.ts
@@ -7,11 +7,12 @@ export interface RawData {
 
 export interface LshData {
   candidates: number[][][];
-  tables: {[bucket: string]: number[]}[];
+  distances: number[][][];
   average_candidates: number[];
   average_distances: number[];
+  tables: {[bucket: string]: number[]}[];
+  average_table: {[bucket: string]: number[]};
   samples: number[];
-  distances: number[][][];
   hash_functions: number[][];
   parameters?: number[];
 }
@@ -25,23 +26,37 @@ export interface TableInfoData {
   distances: number[][];
 }
 
+export interface Parameters {
+  windowsize: number;
+  hashsize: number;
+  tablesize: number;
+  stepsize: number;
+}
+
 @Injectable({
   providedIn: 'root'
 })
+/**
+ * This service acts as the interface between the client and server side.
+ */
 export class ApiService {
 
   constructor() { }
 
-  // Read input data
+  /**
+   * Read input data. The format is a list of channels, where each channel is an object of type RawData
+   */
   async readFile(): Promise<RawData[]> {
-    const response = await fetch('http://127.0.0.1:5000/read-mts-data');
+    const response = await fetch('http://127.0.0.1:5000/read-data');
     return await response.json();
   }
 
-  // Split data into windows and normalize
-  async createWindows(parameters): Promise<any> {
+  /**
+   * Split the data into windows (server side)
+   */
+  async createWindows(parameters: Parameters): Promise<any> {
     const postData = {parameters};
-    const response = await fetch('http://127.0.0.1:5000/create-mts-windows', {
+    await fetch('http://127.0.0.1:5000/create-windows', {
       method: 'POST',
       headers: {
         'Accept': 'application/json',
@@ -51,32 +66,39 @@ export class ApiService {
     });
   }
 
-  // Calculate parameters for LSH + find candidates using LSH
-  async lshInitial(query): Promise<LshData> {
-    const response = await fetch('http://127.0.0.1:5000/initialize', {
+  /**
+   * Get weights which will be applied to the LSH hash functions
+   */
+  async getWeights(query: number[][], labels: {[index: number]: boolean}, weights: number[], hash_functions: number[][]): Promise<number[]> {
+    const response = await fetch('http://127.0.0.1:5000/weights', {
       method: 'POST',
       headers: {
         'Accept': 'application/json',
         'Content-Type': 'application/json'
       },
-      body: new Blob( [ JSON.stringify({query}) ], { type: 'text/plain' } )
+      body: new Blob( [ JSON.stringify({query, labels, weights, hash_functions}) ], { type: 'text/plain' } )
     });
     return await response.json();
   }
 
-  async getWeights(query: number[][], labels: {[index: number]: boolean}, weights: number[], hash_functions: number[][]): Promise<number[]> {
-    const response = await fetch('http://127.0.0.1:5000/weights', {
+  /**
+   * Do the first iteration of LSH and return important information
+   */
+  async lshInitial(query: number[][]): Promise<LshData> {
+    const response = await fetch('http://127.0.0.1:5000/initialize', {
       method: 'POST',
       headers: {
         'Accept': 'application/json',
         'Content-Type': 'application/json'
       },
-      body: new Blob( [ JSON.stringify({query, labels, weights, hash_functions}) ], { type: 'text/plain' } )
+      body: new Blob( [ JSON.stringify({query}) ], { type: 'text/plain' } )
     });
     return await response.json();
   }
 
-  // Find candidates using LSH with weights
+  /**
+   * Do another iteration of LSH, with weights, and return important information
+   */
   async lshUpdate(query, weights, parameters): Promise<LshData> {
     const response = await fetch('http://127.0.0.1:5000/update', {
       method: 'POST',
@@ -89,20 +111,24 @@ export class ApiService {
     return await response.json();
   }
 
-  // Get query window based on windows labeled correct
-  async getQueryWindow(window): Promise<number[][]> {
+  /**
+   * Get query window based on windows labeled correct
+   */
+  async getQueryWindow(indices: number | {[index: number]: boolean}): Promise<number[][]> {
     const response = await fetch('http://127.0.0.1:5000/query', {
       method: 'POST',
       headers: {
         'Accept': 'application/json',
         'Content-Type': 'application/json'
       },
-      body: JSON.stringify({window})
+      body: JSON.stringify({indices})
     });
     return await response.json();
   }
 
-  // Get data of a window by indices
+  /**
+   * Get data of a window by indices
+   */
   async getWindowByIndices(indices: number[]): Promise<number[][][]> {
     const response = await fetch('http://127.0.0.1:5000/window', {
       method: 'POST',
@@ -115,14 +141,17 @@ export class ApiService {
     return await response.json();
   }
 
-  async getTableInfo(windows): Promise<TableInfoData> {
+  /**
+   * Get additional information for a given table
+   */
+  async getTableInfo(table: number[][]): Promise<TableInfoData> {
     const response = await fetch('http://127.0.0.1:5000/table-info', {
       method: 'POST',
       headers: {
         'Accept': 'application/json',
         'Content-Type': 'application/json'
       },
-      body: JSON.stringify({windows})
+      body: JSON.stringify({table})
     });
     return await response.json();
   }
diff --git a/AngularApp/prototype/src/app/progress-view/progress-view.component.ts b/AngularApp/prototype/src/app/progress-view/progress-view.component.ts
index 69e5c65704952f30924327f815b012c798da163c..c379c9e575757414d791a7589be984ab6e096a2a 100644
--- a/AngularApp/prototype/src/app/progress-view/progress-view.component.ts
+++ b/AngularApp/prototype/src/app/progress-view/progress-view.component.ts
@@ -1,6 +1,7 @@
 import {Component, OnInit, ViewChild} from '@angular/core';
 import {StateService} from '../state.service';
 import * as d3 from 'd3';
+import {TableInfoData} from '../api.service';
 
 @Component({
   selector: 'app-progress-view',
@@ -21,12 +22,14 @@ export class ProgressViewComponent implements OnInit {
   constructor(private state: StateService) { }
 
   ngOnInit(): void {
-    this.state.onNewTableInfo.subscribe(() => { this.showgraph(); });
-    this.state.onNewTableInfo.subscribe(() => { this.showHistogram(); });
+    this.state.onNewLshData.subscribe(() => {
+      this.showgraph();
+      this.showHistogram();
+    });
   }
 
   showHistogram() {
-    const table = this.state._averageTable;
+    const table = this.state.lshData.average_table;
     this.hist = {
       data: [{
         x: Object.keys(table),
@@ -162,7 +165,7 @@ export class ProgressViewComponent implements OnInit {
     d3.selectAll('circle').transition().style('stroke', undefined);
     d3.select('#node-' + v.value).transition().style('stroke', 'black').style('stroke-width', 20);
     const data = this.hist;
-    data.data[0].marker.line.width = Object.keys(this.state._averageTable).map((key) => {
+    data.data[0].marker.line.width = Object.keys(this.state.lshData.average_table).map((key) => {
       return Number(key) === v.value ? 4 : 0;
     });
     this.hist = data;
@@ -177,102 +180,15 @@ export class ProgressViewComponent implements OnInit {
 }
 
   public get table() {
-    return this.state._averageTable;
+    return this.state.lshData.average_table;
   }
 
   async showgraph() {
-    const nodes = [];
-    const links = [];
-    const keys = Object.keys(this.table);
-    this.hoverPlot(this.state.tableInfo.prototypes);
-    const distances = this.state.tableInfo.distances;
-
-    // for (const key in this.table) {
-    //   const size = this.table[key].length;
-    //   nodes.push({id: key, group: Number(key), size: size});
-    // }
-    // for (const key in this.table) {
-    //   for (const key2 in this.table) {
-    //     if (key === key2) {
-    //       continue;
-    //     }
-    //     links.push({source: key, target: key2, value: 0.001 * (100 - 5 * distances[keys.indexOf(key)][keys.indexOf(key2)])});
-    //   }
-    // }
-    // const graph = {nodes, links};
-    //
-    // const svg = d3.select('#visual');
-    // const width = +svg.attr('width');
-    // const height = +svg.attr('height');
-    //
-    // svg.selectAll('*').remove();
-    //
-    // const simulation = d3.forceSimulation()
-    //   .force('link', d3.forceLink().id((d: any) => d.id))
-    //   .force('charge', d3.forceManyBody().strength(100)) // Gravity force
-    //   .force('collide', d3.forceCollide().radius(25).iterations(3)) // Repulsion force
-    //   .force('center', d3.forceCenter(width / 2, height / 2)); // Position force
-    //
-    // const link = svg.append('g')
-    //   .selectAll('line')
-    //   .data(graph.links)
-    //   .enter().append('line')
-    //   .attr('stroke', 'grey')
-    //   .attr('stroke-width', (d: any) => d.value);
-    //
-    // const node = svg.append('g')
-    //   .selectAll('circle')
-    //   .data(graph.nodes)
-    //   .enter().append('circle')
-    //   .attr('r', (d: any) => 5 * Math.log(d.size) / Math.log(10))
-    //   .attr('fill', (d: any) => this.getColor(d.group / graph.nodes.length))
-    //   .attr('id', (d: any) => 'node-' + d.group)
-    //   .on('mouseover', (d: any) => {this.sliderValue = d.group; })
-    //   .call(d3.drag()
-    //     .on('start', dragstarted)
-    //     .on('drag', dragged)
-    //     .on('end', dragended));
-    //
-    // simulation
-    //   .nodes(graph.nodes as any)
-    //   .on('tick', ticked);
-    //
-    // simulation.force<any>('link')
-    //   .links(graph.links);
-    //
-    // function ticked() {
-    //   link
-    //     .attr('x1', (d: any) => d.source.x)
-    //     .attr('y1', (d: any) => d.source.y)
-    //     .attr('x2', (d: any) => d.target.x)
-    //     .attr('y2', (d: any) => d.target.y);
-    //
-    //   node
-    //     .attr('cx', (d: any) => d.x)
-    //     .attr('cy', (d: any) => d.y);
-    // }
-    //
-    // function dragstarted(d) {
-    //   if (!d3.event.active) {
-    //     simulation.alphaTarget(0.1).restart();
-    //   }
-    //   d.fx = d.x;
-    //   d.fy = d.y;
-    // }
-    //
-    // function dragged(d) {
-    //   d.fx = d3.event.x;
-    //   d.fy = d3.event.y;
-    // }
-    //
-    // function dragended(d) {
-    //   if (!d3.event.active) {
-    //     simulation.alphaTarget(0);
-    //   }
-    //   d.fx = null;
-    //   d.fy = null;
-    // }
+    const tableInfo: TableInfoData = await this.state.getTableInfo(Object.values(this.state.lshData.average_table));
+    this.hoverPlot(tableInfo.prototypes);
+    const distances = tableInfo.distances;
   }
+
   getColor(value) {
     const hue=((1-value)*120).toString(10);
     return ["hsl(",hue,",100%,50%)"].join("");
diff --git a/AngularApp/prototype/src/app/state.service.ts b/AngularApp/prototype/src/app/state.service.ts
index c711c42c6b0c594d7201ead864fd80b8f573522e..4745c9eb1c88d392cd5ed8a1cdcdc731b1f181b2 100644
--- a/AngularApp/prototype/src/app/state.service.ts
+++ b/AngularApp/prototype/src/app/state.service.ts
@@ -1,38 +1,42 @@
 import {EventEmitter, Injectable} from '@angular/core';
-import {ApiService, LshData, RawData, TableInfoData} from './api.service';
+import {ApiService, LshData, Parameters, RawData, TableInfoData} from './api.service';
 
 @Injectable({
   providedIn: 'root'
 })
+/**
+ * This service acts as the state of the entire application. Components can subscribe to EventEmitters within this state to update their
+ * contents.
+ */
 export class StateService {
-  public loadingProgress: number = 0;
-
+  /**
+   * These are all LSH specific variables. The variables can be accessed using the getters and setters
+   */
   private _rawData: RawData[];
   private _lshData: LshData;
-  private _tableInfo: TableInfoData;
   private _queryWindow: number[][];
-  private _table: {[bucket: string]: number[]}[];
-  public _averageTable: {[bucket: string]: number[]};
   private _weights: number[];
-
-  private _currentTab: number;
   private _labels = {};
-  private _sliderValue;
   private _lshParameters: number[];
-
-  private states = [];
-
   public windowSize = 120;
   public nrOfTables = 5;
   public hashSize = 5;
   public stepSize = 200;
+
+  /**
+   * These are all GUI variables
+   */
+  public loadingProgress = 0;
   public querySelectionMode = true;
+  private _currentTab: number;
+  private _sliderValue;
 
+  /**
+   * These are all EventEmitters. Subscribe to these if you want to be informed about an update in state.
+   */
   public onNewData: EventEmitter<void> = new EventEmitter<void>();
   public onNewWindows: EventEmitter<void> = new EventEmitter<void>();
   public onNewQuery: EventEmitter<void> = new EventEmitter<void>();
-  public onNewTable: EventEmitter<void> = new EventEmitter<void>();
-  public onNewTableInfo: EventEmitter<void> = new EventEmitter<void>();
   public onNewLshData: EventEmitter<void> = new EventEmitter<void>();
 
   public onNewLabels: EventEmitter<void> = new EventEmitter<void>();
@@ -43,6 +47,9 @@ export class StateService {
      this.initialize();
   }
 
+  /**
+   * This function initializes the application. It retrieves the raw data and creates windows.
+   */
   async initialize(): Promise<void> {
     this.loadingProgress = 0;
     await this.getRawData();
@@ -51,72 +58,73 @@ export class StateService {
     this.loadingProgress = 100;
   }
 
+  /**
+   * This function resets the application. It re-creates the windows
+   */
   async reset(): Promise<void> {
     this.loadingProgress = 50;
     await this.createWindows();
     this.loadingProgress = 100;
   }
 
+  /**
+   * This function retrieves the raw data
+   */
   async getRawData(): Promise<void> {
     this.rawData = await this.api.readFile();
   }
 
+  /**
+   * This function creates the windows on the server side
+   */
   async createWindows(): Promise<void> {
     await this.api.createWindows(this.parameters);
     this.onNewWindows.emit();
   }
 
+  /**
+   * This function performs the first iteration of LSH
+   */
   async lshInitial(): Promise<void> {
+    this._weights = Array(this._queryWindow.length).fill(1);
     this.lshData = await this.api.lshInitial(this._queryWindow);
-    console.log('data loaded');
     this._lshParameters = this.lshData.parameters;
-    this._weights = [1, 1, 1];
-    this.createTable();
   }
 
+  /**
+   * This function performs every other iteration of LSH
+   */
   async update(labels, hashFunctions): Promise<void> {
     this._weights = await this.api.getWeights(this._queryWindow, labels, this._weights, hashFunctions);
-    console.log(this._weights);
     this.lshData = await this.api.lshUpdate(this._queryWindow, this._weights, this._lshParameters);
-    this.createTable();
   }
 
+  /**
+   * This function retrieves additional information given a table
+   */
   async getTableInfo(table: number[][]): Promise<TableInfoData> {
-    // console.log(this.tableInfo);
     return await this.api.getTableInfo(table);
   }
 
+  /**
+   * This function retrieves the query
+   */
   async getQueryWindow(windowIndex: number | {[index: number]: boolean}): Promise<number[][]> {
     this.queryWindow = await this.api.getQueryWindow(windowIndex);
     console.log(this.queryWindow);
     return this._queryWindow;
   }
 
+  /**
+   * This function retrieves the window given the window index
+   */
   async getWindow(indices: number[]): Promise<number[][][]> {
     return await this.api.getWindowByIndices(indices);
   }
 
-  async createTable() {
-    console.log('setting table param');
-    this.table = this.lshData.tables;
-    console.log('table param set');
-    const averageTable = {};
-    const length = this.lshData.average_distances.length;
-    const median = this.lshData.average_distances[Math.ceil(length / 2)];
-    const stepsize = median / 10;
-    const indices: number[] = this.lshData.average_distances.map((x) => x > median * 2 ? 19 : Math.floor(x / stepsize));
-    this.lshData.average_candidates.forEach((candidate: number, index: number) => {
-      if (averageTable[indices[index]] === undefined)
-      {
-        averageTable[indices[index]] = [];
-      }
-      averageTable[indices[index]].push(candidate);
-    });
-    this._averageTable = averageTable;
-    console.log('table created');
-    this.tableInfo = await this.getTableInfo(Object.values(this._averageTable));
-  }
-
+  /**
+   * These are all setters and getters
+   */
   public set rawData(v: RawData[]) {
     this._rawData = v;
     console.log(this._rawData);
@@ -137,26 +145,6 @@ export class StateService {
     return this._lshData;
   }
 
-  public set tableInfo(v: TableInfoData) {
-    this._tableInfo = v;
-    this.onNewTableInfo.emit();
-  }
-
-  public get tableInfo(): TableInfoData {
-    return this._tableInfo;
-  }
-
-  public set table(v: {[bucket: string]: number[]}[]) {
-    console.log(v);
-    this._table = v;
-    console.log('emitting onNewTable');
-    this.onNewTable.emit();
-  }
-
-  public get table(): {[bucket: string]: number[]}[] {
-    return this._table;
-  }
-
   public set labels(v) {
     this._labels = v;
     this.onNewLabels.emit();
@@ -197,7 +185,7 @@ export class StateService {
     return this._lshParameters;
   }
 
-  public get parameters(): {[parameter: string]: number} {
+  public get parameters(): Parameters {
     return {
       windowsize: this.windowSize,
       hashsize: this.hashSize,
diff --git a/AngularApp/prototype/src/app/table-overview/table-overview.component.ts b/AngularApp/prototype/src/app/table-overview/table-overview.component.ts
index e7894e5394489d10b1004e75e382919550a8150c..0aef1ba5026c9e5e1dcc771b4162214f9d365f5c 100644
--- a/AngularApp/prototype/src/app/table-overview/table-overview.component.ts
+++ b/AngularApp/prototype/src/app/table-overview/table-overview.component.ts
@@ -15,7 +15,7 @@ export class TableOverviewComponent implements OnInit {
   constructor(private state: StateService) { }
 
   ngOnInit(): void {
-    this.state.onNewTable.subscribe(() => {
+    this.state.onNewLshData.subscribe(() => {
       this.createHistograms();
       this.createPrototypes();
     });
@@ -132,7 +132,7 @@ export class TableOverviewComponent implements OnInit {
     console.log('creating table histograms');
     this.subplots = [];
     this.averages = [];
-    const tables = this.state.table;
+    const tables = this.state.lshData.tables;
     console.log('start of table histograms');
     tables.forEach((table, index) => {
       console.log(index);
@@ -181,7 +181,7 @@ export class TableOverviewComponent implements OnInit {
   // }
 
   public get tables() {
-    return this.state.table;
+    return this.state.lshData.tables;
   }
 
   public get visible() {
diff --git a/Flaskserver/.idea/workspace.xml b/Flaskserver/.idea/workspace.xml
index 884596810c8bc96a3ed3d93e243bdc0adc09d6d4..25b40fe7986889f3e28b65f4b07fff584e190874 100644
--- a/Flaskserver/.idea/workspace.xml
+++ b/Flaskserver/.idea/workspace.xml
@@ -20,18 +20,23 @@
   </component>
   <component name="ChangeListManager">
     <list default="true" id="556080ba-825c-4b55-a92a-867a4df4fb32" name="Default Changelist" comment="">
-      <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/api.service.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/api.service.ts" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/labeling-window/labeling-window.component.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/labeling-window/labeling-window.component.ts" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/overview-window/overview-window.component.ts" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/state.service.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/state.service.ts" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/pseudo.py" beforeDir="false" afterPath="$PROJECT_DIR$/pseudo.py" afterDir="false" />
     </list>
     <option name="SHOW_DIALOG" value="false" />
     <option name="HIGHLIGHT_CONFLICTS" value="true" />
     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
     <option name="LAST_RESOLUTION" value="IGNORE" />
   </component>
+  <component name="FileTemplateManagerImpl">
+    <option name="RECENT_TEMPLATES">
+      <list>
+        <option value="Python Script" />
+      </list>
+    </option>
+  </component>
   <component name="Git.Settings">
     <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/.." />
   </component>
@@ -50,6 +55,10 @@
     <property name="nodejs_npm_path_reset_for_default_project" value="true" />
   </component>
   <component name="RecentsManager">
+    <key name="MoveFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$/data" />
+      <recent name="$PROJECT_DIR$/libs" />
+    </key>
     <key name="CopyFile.RECENT_KEYS">
       <recent name="$PROJECT_DIR$" />
     </key>
@@ -144,6 +153,10 @@
       <screen x="72" y="27" width="1848" height="1053" />
     </state>
     <state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl/72.27.1848.1053@72.27.1848.1053" timestamp="1606260652750" />
+    <state x="687" y="162" width="618" height="783" key="find.popup" timestamp="1606586473850">
+      <screen x="72" y="27" width="1848" height="1053" />
+    </state>
+    <state x="687" y="162" width="618" height="783" key="find.popup/72.27.1848.1053@72.27.1848.1053" timestamp="1606586473850" />
     <state x="659" y="259" width="672" height="678" key="search.everywhere.popup" timestamp="1604929652702">
       <screen x="72" y="27" width="1848" height="1053" />
     </state>
diff --git a/Flaskserver/__pycache__/DBA.cpython-38.pyc b/Flaskserver/__pycache__/DBA.cpython-38.pyc
deleted file mode 100644
index ee756e972d4a8aac20fcbf3c7ac617a69c472ca0..0000000000000000000000000000000000000000
Binary files a/Flaskserver/__pycache__/DBA.cpython-38.pyc and /dev/null differ
diff --git a/Flaskserver/__pycache__/DBA.cpython-39.pyc b/Flaskserver/__pycache__/DBA.cpython-39.pyc
deleted file mode 100644
index 368c5be97531cb8f6473dc6bef50b905d96221c0..0000000000000000000000000000000000000000
Binary files a/Flaskserver/__pycache__/DBA.cpython-39.pyc and /dev/null differ
diff --git a/Flaskserver/__pycache__/bigwig.cpython-38.pyc b/Flaskserver/__pycache__/bigwig.cpython-38.pyc
deleted file mode 100644
index 00c1792d524e86a06d07155df2a61697b8296f77..0000000000000000000000000000000000000000
Binary files a/Flaskserver/__pycache__/bigwig.cpython-38.pyc and /dev/null differ
diff --git a/Flaskserver/__pycache__/main.cpython-38.pyc b/Flaskserver/__pycache__/main.cpython-38.pyc
index 0a689a6143fa33130b1d62a5e9afc3f056bdce18..d7e1394f0ec8adca440344b7cccf0ad1393d6745 100644
Binary files a/Flaskserver/__pycache__/main.cpython-38.pyc and b/Flaskserver/__pycache__/main.cpython-38.pyc differ
diff --git a/Flaskserver/__pycache__/preprocessing.cpython-38.pyc b/Flaskserver/__pycache__/preprocessing.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a7235ecbfce8b71c221b4345ca9ee868b8370c8b
Binary files /dev/null and b/Flaskserver/__pycache__/preprocessing.cpython-38.pyc differ
diff --git a/Flaskserver/__pycache__/pseudo.cpython-38.pyc b/Flaskserver/__pycache__/pseudo.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..59cff614cd0a683799a6b40d1e3c54bddacc3b8a
Binary files /dev/null and b/Flaskserver/__pycache__/pseudo.cpython-38.pyc differ
diff --git a/Flaskserver/__pycache__/utils.cpython-38.pyc b/Flaskserver/__pycache__/utils.cpython-38.pyc
deleted file mode 100644
index 53aa8e25f8cca63b270439f74b880d04a1d5e23d..0000000000000000000000000000000000000000
Binary files a/Flaskserver/__pycache__/utils.cpython-38.pyc and /dev/null differ
diff --git a/Flaskserver/.gitattributes b/Flaskserver/data/.gitattributes
similarity index 100%
rename from Flaskserver/.gitattributes
rename to Flaskserver/data/.gitattributes
diff --git a/Flaskserver/21.csv b/Flaskserver/data/21.csv
similarity index 100%
rename from Flaskserver/21.csv
rename to Flaskserver/data/21.csv
diff --git a/Flaskserver/NW_Ground_Stations_2016.csv b/Flaskserver/data/NW_Ground_Stations_2016.csv
similarity index 100%
rename from Flaskserver/NW_Ground_Stations_2016.csv
rename to Flaskserver/data/NW_Ground_Stations_2016.csv
diff --git a/Flaskserver/chip_w-3000_r-25.h5 b/Flaskserver/data/chip_w-3000_r-25.h5
similarity index 100%
rename from Flaskserver/chip_w-3000_r-25.h5
rename to Flaskserver/data/chip_w-3000_r-25.h5
diff --git a/Flaskserver/data.pkl b/Flaskserver/data/data.pkl
similarity index 100%
rename from Flaskserver/data.pkl
rename to Flaskserver/data/data.pkl
diff --git a/Flaskserver/parameters.npy b/Flaskserver/data/parameters.npy
similarity index 100%
rename from Flaskserver/parameters.npy
rename to Flaskserver/data/parameters.npy
diff --git a/Flaskserver/processed-data b/Flaskserver/data/processed-data
similarity index 100%
rename from Flaskserver/processed-data
rename to Flaskserver/data/processed-data
diff --git a/Flaskserver/processed-data.npy b/Flaskserver/data/processed-data.npy
similarity index 100%
rename from Flaskserver/processed-data.npy
rename to Flaskserver/data/processed-data.npy
diff --git a/Flaskserver/query b/Flaskserver/data/query
similarity index 100%
rename from Flaskserver/query
rename to Flaskserver/data/query
diff --git a/Flaskserver/test.bigWig b/Flaskserver/data/test.bigWig
similarity index 100%
rename from Flaskserver/test.bigWig
rename to Flaskserver/data/test.bigWig
diff --git a/Flaskserver/DBA.py b/Flaskserver/libs/DBA.py
similarity index 100%
rename from Flaskserver/DBA.py
rename to Flaskserver/libs/DBA.py
diff --git a/Flaskserver/DBA_multivariate.py b/Flaskserver/libs/DBA_multivariate.py
similarity index 100%
rename from Flaskserver/DBA_multivariate.py
rename to Flaskserver/libs/DBA_multivariate.py
diff --git a/Flaskserver/__pycache__/DBA_multivariate.cpython-38.pyc b/Flaskserver/libs/__pycache__/DBA_multivariate.cpython-38.pyc
similarity index 97%
rename from Flaskserver/__pycache__/DBA_multivariate.cpython-38.pyc
rename to Flaskserver/libs/__pycache__/DBA_multivariate.cpython-38.pyc
index eaac5b3ae6a624df5de796f13360e27285d379c7..c05779e917b5cab3b52eb89a19ee2a3f8aa6e2ff 100644
Binary files a/Flaskserver/__pycache__/DBA_multivariate.cpython-38.pyc and b/Flaskserver/libs/__pycache__/DBA_multivariate.cpython-38.pyc differ
diff --git a/Flaskserver/__pycache__/bigwig.cpython-39.pyc b/Flaskserver/libs/__pycache__/bigwig.cpython-38.pyc
similarity index 71%
rename from Flaskserver/__pycache__/bigwig.cpython-39.pyc
rename to Flaskserver/libs/__pycache__/bigwig.cpython-38.pyc
index d7b5cc2d9c3f2b82802447276d67013e31df8328..53394d44f75209bebbfc7b6452b784f4de06c190 100644
Binary files a/Flaskserver/__pycache__/bigwig.cpython-39.pyc and b/Flaskserver/libs/__pycache__/bigwig.cpython-38.pyc differ
diff --git a/Flaskserver/bigwig.py b/Flaskserver/libs/bigwig.py
similarity index 100%
rename from Flaskserver/bigwig.py
rename to Flaskserver/libs/bigwig.py
diff --git a/Flaskserver/setup.py b/Flaskserver/libs/setup.py
similarity index 100%
rename from Flaskserver/setup.py
rename to Flaskserver/libs/setup.py
diff --git a/Flaskserver/utils.py b/Flaskserver/libs/utils.py
similarity index 100%
rename from Flaskserver/utils.py
rename to Flaskserver/libs/utils.py
diff --git a/Flaskserver/main.py b/Flaskserver/main.py
index 713ce99f4b44d3aa934e26bb66bb0bad72c60fed..fbfcdbad80d524ffdbb862f77a8c30b5b50ef5b9 100644
--- a/Flaskserver/main.py
+++ b/Flaskserver/main.py
@@ -2,24 +2,12 @@ from flask import Flask, request
 import numpy as np
 from flask_cors import CORS
 from time import time
-import pandas as pd
 import orjson
-import bigwig
-import bbi
-import _ucrdtw
-import _lsh
-import math
-import dask.dataframe as dd
 import os.path
-from random import sample
-from DBA_multivariate import performDBA
-from tslearn.metrics import dtw
-from sklearn import preprocessing
-from collections import defaultdict
-from dtaidistance import dtw_ndim
-from scipy.spatial.distance import euclidean
+import pseudo
+import preprocessing
 
-from fastdtw import fastdtw
+data_path = 'data/processed-data.npy'
 
 reload = False
 
@@ -30,478 +18,222 @@ CORS(app)
 def index():
     return "hi"
 
+
+"""
+Returns raw data
+
+Output: [{
+    index: 1d array [x]
+    values: 1d array [x]
+}]
+"""
 @app.route('/read-data', methods=['GET'])
 def read_data():
     t0 = time()
-    size = bbi.chromsizes('test.bigWig')['chr1']
-    bins = 100000
-    data = bigwig.get('test.bigWig', 'chr1', 0, size, bins)
-    print(data.shape)
-    response = [
-        {
-            "index": list(range(0, size, int(size/(bins)))),
-            "values": data.tolist()
-        },
-        {
-            "index": list(range(0, size, int(size / (bins)))),
-            "values": data.tolist()
-        },
-        {
-            "index": list(range(0, size, int(size / (bins)))),
-            "values": data.tolist()
-        }
-    ]
+    response = preprocessing.read_mts_data()
     response = orjson.dumps(response)
     print('Data read: ' + str(time()-t0))
     return response
 
-@app.route('/read-mts-data', methods=['GET'])
-def read_mts_data():
-    filename = 'data.pkl'
-    if (not os.path.isfile(filename)):
-        print("start")
-        df = dd.read_csv("NW_Ground_Stations_2016.csv", usecols=['number_sta', 'date', 't', 'hu', 'td'])
-        print("read file")
-        df = df.loc[df['number_sta'].isin([14066001, 14137001, 14216001, 14372001, 22092001, 22113006, 22135001])].fillna(0)
-        print("split rows")
-        df = df.compute()
-        df.to_pickle(filename)
-        print("to_pandas")
-    df = pd.read_pickle(filename)
-    df.dropna(subset=['t'], inplace=True)
-    response = [
-        {
-            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
-            "values": df.loc[df['number_sta'] == 14066001].loc[:, 't'].values.tolist()
-        },
-        {
-            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
-            "values": df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].values.tolist()
-        },
-        {
-            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
-            "values": df.loc[df['number_sta'] == 14066001].loc[:, 'td'].values.tolist()
-        }
-    ]
-    print("response ready")
-    response = orjson.dumps(response)
-    return response
 
-@app.route('/create-mts-windows', methods=['POST'])
-def create_mts_windows():
-    t0 = time()
-    if (not os.path.isfile('processed-data.npy')):
-        filename = 'data.pkl'
-        df = pd.read_pickle(filename)
-        channels = list()
-        channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 't'].fillna(0).values.tolist())
-        channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].fillna(0).values.tolist())
-        channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'td'].fillna(0).values.tolist())
-        print("Data read: " + str(time()-t0))
-        # raw_data = request.json
-        window_size = 120 #int(raw_data['parameters']["windowsize"])
-        print("Processing: " + str(time()-t0))
-        data = [([values[i:i+window_size] for values in channels]) for i in range(0, len(channels[0]) - window_size, 1)]
-        print("Raw windows: " + str(time()-t0))
-        windows = []
-        for i in range(len(data)):
-            if i % 5000 == 0:
-                print(i)
-            windows.append(preprocessing.minmax_scale(data[i], (-1, 1), axis=1))
-        print("Preprocessed: " + str(time()-t0))
-        np.save('processed-data', windows)
-    # data = np.load('processed-data.npy')
-    # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
-    # r, a, sd = preprocess(data, 11.5)
-    # np.save('parameters', np.array([r, a, sd]))
-    print("Sending response: " + str(time()-t0))
-    return '1'
+"""
+Creates windows
 
+Input: {
+    parameters: {
+        windowssize: int
+    }
+}
 
+Output: '1'
+"""
 @app.route('/create-windows', methods=['POST'])
 def create_windows():
     t0 = time()
-    if (not os.path.isfile('processed-data.npy')):
-        # raw_data = request.json
-        # window_size = int(raw_data['parameters']["windowsize"])
-        window_size = 120
-        data = bigwig.chunk(
-            'test.bigWig',
-            12000,
-            int(12000 / window_size),
-            int(12000 / 6),
-            ['chr1'],
-            verbose=True,
-        )
-        data = np.reshape(data, (len(data), 1, len(data[0])))
-        data2 = np.copy(data)
-        np.random.shuffle(data2)
-        data3 = np.copy(data)
-        np.random.shuffle(data3)
-
-        data = np.concatenate((data, data2), axis=1)
-        data = np.concatenate((data, data3), axis=1)
-        # data = np.repeat(data, repeats=3, axis=1)
-        np.save('processed-data', data)
-    print('Windows created: ' + str(time()-t0))
-    return '1'
-
-@app.route('/create-test-windows', methods=['POST'])
-def create_test_windows():
-    t0 = time()
-    if (not os.path.isfile('processed-data.npy')):
-        datafile = '21.csv'
-
-        data = pd.read_csv(datafile, header=None)
-
-        # and convert it to numpy array:
-        npdata = np.array(data)
-        print('data loaded')
-        window_data = [npdata[i:i + 120, 0:5] for i in range(0, npdata.shape[0] - 120, int(120 / 8))]
-        del npdata
-        print('data created')
-        np_window_data = np.repeat(window_data, repeats=3, axis=0)
-        print(np_window_data.shape)
-        del window_data
-        data = np.reshape(np_window_data, (len(np_window_data), 5, len(np_window_data[0])))
-        print(data.shape)
-        np.save('processed-data', data)
+    if (not os.path.isfile(data_path)):
+        raw_data = request.json
+        window_size = int(raw_data['parameters']["windowsize"])
+        preprocessing.create_eeg_windows(window_size, 5)
     print('Windows created: ' + str(time()-t0))
     return '1'
 
 
+"""
+Does first iteration of LSH and returns a bunch of useful information
+
+Input: {
+    query: 2d array [d][t]
+}
+
+Output: {
+    hash_functions: 3d array [k][l][d]
+    candidates: 3d array [k][l][i]
+    distances: 3d array [k][l][i]
+    average_candidates: 1d array [i]
+    average_distances: 1d array [i]
+    tables: [{
+        bucket: 1d array
+    }]
+    average_table: {
+        bucket: 1d array
+    }
+    samples: 1d array
+    parameters: 1d array
+}
+"""
 @app.route('/initialize', methods=['POST'])
 def initialize():
     t0 = time()
     raw_data = orjson.loads(request.data)
-    data = np.load('processed-data.npy')
+    data = np.load(data_path)
     data = np.swapaxes(data, 1, 2)
-    # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
     query = raw_data["query"]
     query = np.swapaxes(query, 0, 1)
-    # query = np.reshape(query, (len(query[0]), len(query)))
-    parameters = preprocess(data)
     # parameters = np.load('parameters.npy')
-    r = parameters[0]
-    a = parameters[1]
-    sd = parameters[2]
-
-    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
-    print(distances)
-
-    dict = defaultdict(int)
-    for l in range(len(candidates)):
-        for k in range(len(candidates[0])):
-            for i in range(len(candidates[0][0])):
-                dict[candidates[l][k][i]] += distances[l][k][i]
-    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
-    average_candidates = list(sorted_dict.keys())
-    average_distances = list(sorted_dict.values())
-
-    tables = []
-    samples_set = set()
-    candidates = candidates.tolist()
-    for l in range(len(candidates)):
-        for k in range(len(candidates[0])):
-            samples_set.update(candidates[l][k][0:5])
-            dict = defaultdict(list)
-            length = len(distances[l][k])
-            median = distances[l][k][math.ceil(length/2)]
-            stepsize = median / 10
-            indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), distances[l][k]))
-            for i in range(len(candidates[0][0])):
-                dict[str(indices[i])].append(candidates[l][k][i])
-            tables.append(dict)
-
-    samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist()
-
-
-    response = {
-        "hash_functions": hf.reshape((len(candidates) * len(candidates[0]), len(query[0]))).tolist(),
-        "candidates": candidates,
-        "tables": tables,
-        "distances": distances.tolist(),
-        "samples": list(samples),
-        "average_candidates": np.array(average_candidates).tolist(),
-        "average_distances": np.array(average_distances).tolist(),
-        "parameters": [float(r), float(a), float(sd)]
-    }
-    response = orjson.dumps(response)
-    print('LSH done: ' + str(time()-t0))
-    return response
 
-@app.route('/weights', methods=['POST'])
-def weights():
-    alpha = 0.2
-    raw_data = orjson.loads(request.data)
-    labels = raw_data["labels"]
-    hash_functions = raw_data["hash_functions"]
-    query = raw_data["query"]
-    old_weights = raw_data["weights"]
-    data = np.load('processed-data.npy')
-    all_good_windows = data[[[int(index) for index, value in labels.items() if value is True]]]
-
-    good_distances = np.zeros(len(query))
-    for window in all_good_windows:
-        for i in range(len(all_good_windows[0])):
-            good_distances[i] += _ucrdtw.ucrdtw(query[i], window[i], 0.05, False)[1]
-    if len(all_good_windows) != 0:
-        good_distances = np.square(good_distances)
-        good_distances /= np.sum(good_distances)
-        good_distances = np.ones(len(query)) - good_distances
-        good_distances /= np.sum(good_distances)
-        good_distances *= len(all_good_windows[0])
-        good_distances = np.sqrt(good_distances)
-
-    if len(hash_functions) != 0:
-        summed_hash_functions = np.sum(hash_functions, axis=0)
-        summed_hash_functions = np.square(summed_hash_functions)
-        normalized_hash_functions = summed_hash_functions / np.sum(summed_hash_functions)
-        normalized_hash_functions *= len(hash_functions[0])
-
-    if len(hash_functions) + len(all_good_windows) == 0:
-        print("no update")
-        new_weights = old_weights
-    elif len(hash_functions) == 0:
-        print("only windows")
-        new_weights = alpha * np.array(old_weights) + (1 - alpha) * good_distances
-    elif len(all_good_windows) == 0:
-        print("only tables")
-        new_weights = alpha * np.array(old_weights) + (1 - alpha) * normalized_hash_functions
-    else:
-        print("tables & windows")
-        new_weights = alpha * np.array(old_weights) + 0.5 * (1-alpha) * good_distances + 0.5 * (1-alpha) * normalized_hash_functions
-
-    print(new_weights)
-
-    response = orjson.dumps(new_weights.tolist())
+    lsh_data = pseudo.lsh(data, query)
+
+    response = orjson.dumps(lsh_data)
+    print('LSH done: ' + str(time()-t0))
     return response
 
 
+"""
+Does LSH and returns a bunch of useful information
+
+Input: {
+    query: 2d array [d][t]
+}
+
+Output: {
+    hash_functions: 3d array [k][l][d]
+    candidates: 3d array [k][l][i]
+    distances: 3d array [k][l][i]
+    average_candidates: 1d array [i]
+    average_distances: 1d array [i]
+    tables: [{
+        bucket: 1d array
+    }]
+    average_table: {
+        bucket: 1d array
+    }
+    samples: 1d array
+}
+"""
 @app.route('/update', methods=['POST'])
 def update():
     t0 = time()
     raw_data = orjson.loads(request.data)
-    data = np.load('processed-data.npy')
+    data = np.load(data_path)
     data = np.swapaxes(data, 1, 2)
-    # data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
     query = raw_data["query"]
     query = np.swapaxes(query, 0, 1)
-    # query = np.reshape(query, (len(query[0]), len(query)))
     weights = raw_data["weights"]
     parameters = raw_data["parameters"]
 
-    candidates, distances, hf = _lsh.lsh(data, query, parameters[0], parameters[1], parameters[2], weights)
-    dict = defaultdict(int)
-    for l in range(len(candidates)):
-        for k in range(len(candidates[0])):
-            for i in range(len(candidates[0][0])):
-                dict[candidates[l][k][i]] += distances[l][k][i]
-    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
-    average_candidates = list(sorted_dict.keys())
-    average_distances = list(sorted_dict.values())
-
-    tables = []
-    samples_set = set()
-    candidates = candidates.tolist()
-    for l in range(len(candidates)):
-        for k in range(len(candidates[0])):
-            samples_set.update(candidates[l][k][0:5])
-            dict = defaultdict(list)
-            length = len(distances[l][k])
-            median = distances[l][k][math.ceil(length/2)]
-            stepsize = median / 10
-            indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), distances[l][k]))
-            for i in range(len(candidates[0][0])):
-                dict[str(indices[i])].append(candidates[l][k][i])
-            tables.append(dict)
-
-    samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist()
-
-    response = {
-        "hash_functions": hf.reshape((len(candidates) * len(candidates[0]), len(query[0]))).tolist(),
-        "candidates": candidates,
-        "tables": tables,
-        "samples": list(samples),
-        "average_candidates": np.array(average_candidates).tolist(),
-        "average_distances": np.array(average_distances).tolist(),
-        "distances": distances.tolist(),
-    }
-    response = orjson.dumps(response)
+    lsh_data = pseudo.lsh(data, query, parameters=parameters, weights=weights)
+
+    response = orjson.dumps(lsh_data)
     print('LSH done: ' + str(time()-t0))
     return response
 
+
+"""
+Calculates new weights for LSH algorithm
+
+Input: {
+    labels: 1d array [?]
+    hash_functions: 2d array [?][d]
+    query: 2d array [d][t]
+    weights: 1d array [d]
+}
+
+Output: 1d array [d]
+"""
+@app.route('/weights', methods=['POST'])
+def weights():
+    raw_data = orjson.loads(request.data)
+    labels = raw_data["labels"]
+    hash_functions = raw_data["hash_functions"]
+    query = raw_data["query"]
+    old_weights = raw_data["weights"]
+    data = np.load(data_path)
+
+    new_weights = pseudo.weights(data, query, old_weights, labels, hash_functions)
+
+    response = orjson.dumps(new_weights)
+    return response
+
+
+"""
+Calculates query based on given indices
+
+Input: {
+    indices: 1d array [?]
+}
+
+Output: 2d array [d][t]
+"""
 @app.route('/query', methods=['POST'])
 def query():
     t0 = time()
     raw_data = orjson.loads(request.data)
-    windowIndices = raw_data['window']
-    if isinstance(windowIndices, int):
-        output = np.load('processed-data.npy')[windowIndices]
-        response = orjson.dumps(output.tolist())
-        print("Query done: " + str(time() - t0))
-        return response
-    else:
-        indices = [int(index) for index, value in windowIndices.items() if value is True]
-        data = np.load('processed-data.npy')[indices]
-        output = performDBA(data)
-        response = orjson.dumps(output.tolist())
-        print("Query done: " + str(time()-t0))
-        return response
+    window_indices = raw_data['indices']
+    data = np.load(data_path)
+
+    response = pseudo.query(data, window_indices)
+
+    response = orjson.dumps(response)
+    print("Query done: " + str(time() - t0))
+    return response
+
+
+"""
+Returns values of windows on given indices
 
+Input: {
+    indices: 1d array [x]
+}
+
+Output: 3d array [x][d][t]
+"""
 @app.route('/window', methods=['POST'])
 def window():
     t0 = time()
     raw_data = orjson.loads(request.data)
     indices = raw_data['indices']
-    output = np.load('processed-data.npy')[indices]
+
+    output = np.load(data_path)[indices]
+
     response = orjson.dumps(output.tolist())
-    print("Query done: " + str(time() - t0))
+    print("Window(s) done: " + str(time() - t0))
     return response
 
+
+"""
+Returns additional information on given table
+
+Input: {
+    table: 2d array [x][?]
+}
+
+Output: {
+    prototypes: {
+        average: 1d array [t]
+        max: 1d array [t]
+        min: 1d array [t]
+    }
+    distances: 2d array [x][x]
+}
+"""
 @app.route('/table-info', methods=['POST'])
 def table_info():
     t0 = time()
     raw_data = orjson.loads(request.data)
-    all_windows = raw_data['windows']
-    data = np.load('processed-data.npy')
-    prototypes = []
-    for windows in all_windows:
-        actual_windows = data[windows]
-        average_values = np.average(actual_windows, 0)
-        std_values = np.std(actual_windows, 0)
-        max_values = average_values + std_values
-        min_values = average_values - std_values
-        prototypes.append({
-            'average': average_values.tolist(),
-            'max': max_values.tolist(),
-            'min': min_values.tolist()
-        })
-    # distances = [[dtw(np.array(v["average"]), np.array(w["average"]), global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05 * 120)) for j, w in enumerate(prototypes)] for i, v in enumerate(prototypes)]
-    response = orjson.dumps({'prototypes': prototypes, 'distances': []})
-    print("Averages calculated: " + str(time() - t0))
-    return response
-
-def preprocess(data, r=10.0):
-    # return 0.10882589134534404, 3.1202154563478928, 0.9705780396843037
-    # data = np.load('processed-data.npy')
-    # data = np.reshape(data, (59999, 20, 120))
-    # data = np.repeat(data, repeats=1, axis=1)
-    subset = []
-    t0 = time()
-
-    i = 0
-    while i < len(data):
-        if i % 999 == 0:
-            print(r)
-            print(str(i) + ':' + str(len(subset)))
-
-        state = 1
-        for s in subset:
-            if np.linalg.norm(data[i] - data[s]) < r:
-                state = 0
-                break
-        if state == 1:
-            subset.append(i)
-
-        i = i + 1
-        if i == 10000 and len(subset) < 10:
-            r = r / 2
-            subset = []
-            i = 0
-        if len(subset) > 200:
-            r = r + r / 2
-            subset = []
-            i = 0
-
-    # subset = sample(list(range(len(data))), 200)
-    print("r = " + str(r))
-    dtw_distances = []
-    eq_distances = []
-    for i, index_1 in enumerate(subset):
-        print(i)
-        for j, index_2 in enumerate(subset):
-            if index_1 == index_2:
-                continue
-            e = np.linalg.norm(data[index_1] - data[index_2])
-            if (math.isnan(e) or e == 0):
-                eq_distances.append(0.0001)
-                dtw_distances.append(0.0001)
-                continue
-            eq_distances.append(e)
-            d = 0
-            # d, _ = fastdtw(data[index_1], data[index_2], dist=euclidean)
-            d = dtw(data[index_1], data[index_2], global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120))
-            # d = _ucrdtw.ucrdtw(data[index_1], data[index_2], 0.05, False)[1]
-            # d = dtw.dtw(data[index_1], data[index_2], dist_method="Euclidean", window_type="sakoechiba", window_args={"window_size": 120}).distance
-            dtw_distances.append(d)
-
-    ratios = np.array(dtw_distances)/np.array(eq_distances)
-    mean_dtw = np.mean(dtw_distances)
-    sd_dtw = np.std(dtw_distances)
-    mean_eq = np.mean(eq_distances)
-    sd_eq = np.std(eq_distances)
-    a = np.mean(ratios)
-    sd = np.std(ratios)
-    theta = mean_dtw + -2.58 * sd_dtw
-    # theta = mean_eq + -2.58 * sd_eq
-    r = theta / ((a-sd)*math.sqrt(120))
-    if r < 0:
-        r = mean_dtw / 100
-    # r = theta / (math.sqrt(120))
-    print('Mean: ' + str(mean_dtw))
-    print('Stdev: ' + str(sd_dtw))
-    print('Ratio mean: ' + str(a))
-    print('Ratio stdev: ' + str(sd))
-    print('Theta: ' + str(theta))
-    print('r: ' + str(r))
-    print('Preprocessing time: ' + str(time() - t0))
-    return r, a, sd
-
-def debug_test_lsh():
-    data = np.load('processed-data.npy')
-    # data = np.repeat(data, repeats=7, axis=1)
-    print(data.shape)
-    data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
-
-    r, a, sd = preprocess(data, 11.25)
-    create_windows()
-    query_n = 1234
-    t0 = time()
-    query = data[query_n]
-    data = data.astype('double')
-    dict = defaultdict(int)
-    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
-    print("Calculated approximate in: " + str(time()-t0))
-    for l in range(len(candidates)):
-        for k in range(len(candidates[0])):
-            for i in range(len(candidates[0][0])):
-                dict[candidates[l][k][i]] += distances[l][k][i]
-    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
-    candidates = list(sorted_dict.keys())
-
-    print(candidates[0:20])
+    table = raw_data['table']
+    data = np.load(data_path)
 
-    t0 = time()
-    # distances = [dtw_ndim.distance_fast(window, query) for window in data]
-    distances = [dtw(window, query, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120)) for window in data]
-    topk_dtw = sorted(range(len(distances)), key=lambda k: distances[k])
-    print("Calculated exact dtw in: " + str(time()-t0))
-    print(topk_dtw[0:20])
+    response = pseudo.table_info(data, table)
 
-    t0 = time()
-    l2distances = [np.linalg.norm(window - query) for window in data]
-    print("Calculated exact l2 in: " + str(time()-t0))
-
-    # # distances_ed = [distance.euclidean(query, window) for window in data]
-    # # topk_ed = sorted(range(len(distances_ed)), key=lambda k: distances_ed[k])
-    #
-    accuracy = 0
-    for index in topk_dtw[0:20]:
-        if index in candidates:
-            accuracy += 1
-    print(accuracy)
-
-# read_mts_data()
-# create_mts_windows()
-# debug_test_lsh()
\ No newline at end of file
+    print("Averages calculated: " + str(time() - t0))
+    return response
\ No newline at end of file
diff --git a/Flaskserver/preprocessing.py b/Flaskserver/preprocessing.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe38926877a790c2944f2962afd2aad93f7ee8c0
--- /dev/null
+++ b/Flaskserver/preprocessing.py
@@ -0,0 +1,118 @@
+import numpy as np
+import pandas as pd
+from libs import bigwig
+import bbi
+import dask.dataframe as dd
+import os.path
+from sklearn import preprocessing
+
+def read_data():
+    size = bbi.chromsizes('test.bigWig')['chr1']
+    bins = 100000
+    data = bigwig.get('data/test.bigWig', 'chr1', 0, size, bins)
+    print(data.shape)
+    response = [
+        {
+            "index": list(range(0, size, int(size/(bins)))),
+            "values": data.tolist()
+        },
+        {
+            "index": list(range(0, size, int(size / (bins)))),
+            "values": data.tolist()
+        },
+        {
+            "index": list(range(0, size, int(size / (bins)))),
+            "values": data.tolist()
+        }
+    ]
+    return response
+
+def read_mts_data():
+    filename = 'data/data.pkl'
+    if (not os.path.isfile(filename)):
+        print("start")
+        df = dd.read_csv("NW_Ground_Stations_2016.csv", usecols=['number_sta', 'date', 't', 'hu', 'td'])
+        print("read file")
+        df = df.loc[df['number_sta'].isin([14066001, 14137001, 14216001, 14372001, 22092001, 22113006, 22135001])].fillna(0)
+        print("split rows")
+        df = df.compute()
+        df.to_pickle(filename)
+        print("to_pandas")
+    df = pd.read_pickle(filename)
+    df.dropna(subset=['t'], inplace=True)
+    response = [
+        {
+            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
+            "values": df.loc[df['number_sta'] == 14066001].loc[:, 't'].values.tolist()
+        },
+        {
+            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
+            "values": df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].values.tolist()
+        },
+        {
+            "index": df.loc[df['number_sta'] == 14066001].loc[:, 'date'].values.astype(str).tolist(),
+            "values": df.loc[df['number_sta'] == 14066001].loc[:, 'td'].values.tolist()
+        }
+    ]
+    return response
+
+def create_peax_windows_12kb(window_size):
+    data = bigwig.chunk(
+        'test.bigWig',
+        12000,
+        int(12000 / window_size),
+        int(12000 / 6),
+        ['chr1'],
+        verbose=True,
+    )
+    data = np.reshape(data, (len(data), 1, len(data[0])))
+    np.save(data_path, data)
+    return '1'
+
+def create_peax_windows_12kb_mts(window_size):
+    data = bigwig.chunk(
+        'test.bigWig',
+        12000,
+        int(12000 / window_size),
+        int(12000 / 6),
+        ['chr1'],
+        verbose=True,
+    )
+    data = np.reshape(data, (len(data), 1, len(data[0])))
+    data2 = np.copy(data)
+    np.random.shuffle(data2)
+    data3 = np.copy(data)
+    np.random.shuffle(data3)
+
+    data = np.concatenate((data, data2), axis=1)
+    data = np.concatenate((data, data3), axis=1)
+    np.save(data_path, data)
+    return '1'
+
+def create_eeg_windows(window_size, nr_of_channels):
+    datafile = '21.csv'
+    data = pd.read_csv(datafile, header=None)
+    npdata = np.array(data)
+    window_data = [npdata[i:i + window_size, 0:nr_of_channels] for i in range(0, npdata.shape[0] - window_size, int(window_size / 8))]
+    del npdata
+    np_window_data = np.repeat(window_data, repeats=3, axis=0)
+    del window_data
+    data = np.reshape(np_window_data, (len(np_window_data), nr_of_channels, len(np_window_data[0])))
+    np.save(data_path, data)
+    return '1'
+
+def create_weather_windows(window_size):
+    filename = 'data/data.pkl'
+    df = pd.read_pickle(filename)
+    channels = list()
+    channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 't'].fillna(0).values.tolist())
+    channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'hu'].fillna(0).values.tolist())
+    channels.append(df.loc[df['number_sta'] == 14066001].loc[:, 'td'].fillna(0).values.tolist())
+    data = [([values[i:i+window_size] for values in channels]) for i in range(0, len(channels[0]) - window_size, 1)]
+    windows = []
+    for i in range(len(data)):
+        if i % 5000 == 0:
+            print(i)
+        windows.append(preprocessing.minmax_scale(data[i], (-1, 1), axis=1))
+    np.save('processed-data', windows)
+    return '1'
\ No newline at end of file
diff --git a/Flaskserver/pseudo.py b/Flaskserver/pseudo.py
new file mode 100644
index 0000000000000000000000000000000000000000..9adf51dbafa0847f4b849ca9bbbedc3c205436cf
--- /dev/null
+++ b/Flaskserver/pseudo.py
@@ -0,0 +1,243 @@
+import numpy as np
+from time import time
+import _ucrdtw
+import _lsh
+import math
+from libs.DBA_multivariate import performDBA
+from tslearn.metrics import dtw
+from collections import defaultdict
+
+def lsh(data, query, parameters = None, weights = None):
+    if parameters is None:
+        parameters = preprocess(data)
+    r = parameters[0]
+    a = parameters[1]
+    sd = parameters[2]
+
+    if weights is None:
+        candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
+    else:
+        candidates, distances, hf = _lsh.lsh(data, query, r, a, sd, weights)
+
+    dict = defaultdict(int)
+    for l in range(len(candidates)):
+        for k in range(len(candidates[0])):
+            for i in range(len(candidates[0][0])):
+                dict[candidates[l][k][i]] += distances[l][k][i]
+    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
+    average_candidates = np.array(list(sorted_dict.keys())).tolist()
+    average_distances = np.array(list(sorted_dict.values())).tolist()
+
+    tables = []
+    samples_set = set()
+    candidates = candidates.tolist()
+    for l in range(len(candidates)):
+        for k in range(len(candidates[0])):
+            samples_set.update(candidates[l][k][0:5])
+            dict = defaultdict(list)
+            length = len(distances[l][k])
+            median = distances[l][k][math.ceil(length/2)]
+            stepsize = median / 10
+            indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), distances[l][k]))
+            for i in range(len(candidates[0][0])):
+                dict[str(indices[i])].append(candidates[l][k][i])
+            tables.append(dict)
+
+    length = len(average_distances)
+    median = average_distances[math.ceil(length/2)]
+    stepsize = median / 10
+    indices = list(map(lambda x: 19 if x > median * 2 else math.floor(x / stepsize), average_distances))
+    average_table = defaultdict(list)
+    for i in range(len(average_candidates)):
+        average_table[str(indices[i])].append(average_candidates[i])
+
+    samples = np.array(list(filter(lambda x: x in samples_set, average_candidates))).tolist()
+
+
+    response = {
+        "hash_functions": hf.reshape((len(candidates) * len(candidates[0]), len(query[0]))).tolist(),
+        "candidates": candidates,
+        "distances": distances.tolist(),
+        "average_candidates": average_candidates,
+        "average_distances": average_distances,
+        "tables": tables,
+        "average_table": average_table,
+        "samples": list(samples),
+        "parameters": [float(r), float(a), float(sd)]
+    }
+    return response
+
+def preprocess(data, r=10.0):
+    subset = []
+    t0 = time()
+
+    i = 0
+    while i < len(data):
+        if i % 999 == 0:
+            print(r)
+            print(str(i) + ':' + str(len(subset)))
+
+        state = 1
+        for s in subset:
+            if np.linalg.norm(data[i] - data[s]) < r:
+                state = 0
+                break
+        if state == 1:
+            subset.append(i)
+
+        i = i + 1
+        if i == 10000 and len(subset) < 10:
+            r = r / 2
+            subset = []
+            i = 0
+        if len(subset) > 200:
+            r = r + r / 2
+            subset = []
+            i = 0
+
+    # subset = sample(list(range(len(data))), 200)
+    print("r = " + str(r))
+    dtw_distances = []
+    eq_distances = []
+    for i, index_1 in enumerate(subset):
+        print(i)
+        for j, index_2 in enumerate(subset):
+            if index_1 == index_2:
+                continue
+            e = np.linalg.norm(data[index_1] - data[index_2])
+            if (math.isnan(e) or e == 0):
+                eq_distances.append(0.0001)
+                dtw_distances.append(0.0001)
+                continue
+            eq_distances.append(e)
+            d = dtw(data[index_1], data[index_2], global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120))
+            dtw_distances.append(d)
+
+    ratios = np.array(dtw_distances)/np.array(eq_distances)
+    mean_dtw = np.mean(dtw_distances)
+    sd_dtw = np.std(dtw_distances)
+    mean_eq = np.mean(eq_distances)
+    sd_eq = np.std(eq_distances)
+    a = np.mean(ratios)
+    sd = np.std(ratios)
+    theta = mean_dtw + -2.58 * sd_dtw
+    # theta = mean_eq + -2.58 * sd_eq
+    r = theta / ((a-sd)*math.sqrt(120))
+    if r < 0:
+        r = mean_dtw / 100
+    # r = theta / (math.sqrt(120))
+    print('Mean: ' + str(mean_dtw))
+    print('Stdev: ' + str(sd_dtw))
+    print('Ratio mean: ' + str(a))
+    print('Ratio stdev: ' + str(sd))
+    print('Theta: ' + str(theta))
+    print('r: ' + str(r))
+    print('Preprocessing time: ' + str(time() - t0))
+    return r, a, sd
+
+def weights(data, query, old_weights, labels, hash_functions):
+    alpha = 0.2
+    all_good_windows = data[[[int(index) for index, value in labels.items() if value is True]]]
+
+    good_distances = np.zeros(len(query))
+    for window in all_good_windows:
+        for i in range(len(all_good_windows[0])):
+            good_distances[i] += _ucrdtw.ucrdtw(query[i], window[i], 0.05, False)[1]
+    if len(all_good_windows) != 0:
+        good_distances = np.square(good_distances)
+        if np.sum(good_distances) != 0:
+            good_distances /= np.sum(good_distances)
+        good_distances = np.ones(len(query)) - good_distances
+        good_distances /= np.sum(good_distances)
+        good_distances *= len(all_good_windows[0])
+        good_distances = np.sqrt(good_distances)
+
+    if len(hash_functions) != 0:
+        summed_hash_functions = np.sum(hash_functions, axis=0)
+        summed_hash_functions = np.square(summed_hash_functions)
+        normalized_hash_functions = summed_hash_functions / np.sum(summed_hash_functions)
+        normalized_hash_functions *= len(hash_functions[0])
+
+    if len(hash_functions) + len(all_good_windows) == 0:
+        print("no update")
+        new_weights = old_weights
+    elif len(hash_functions) == 0:
+        print("only windows")
+        new_weights = alpha * np.array(old_weights) + (1 - alpha) * good_distances
+    elif len(all_good_windows) == 0:
+        print("only tables")
+        new_weights = alpha * np.array(old_weights) + (1 - alpha) * normalized_hash_functions
+    else:
+        print("tables & windows")
+        new_weights = alpha * np.array(old_weights) + 0.5 * (1-alpha) * good_distances + 0.5 * (1-alpha) * normalized_hash_functions
+
+    print(new_weights)
+    return new_weights.tolist()
+
+def table_info(data, table):
+    prototypes = []
+    for cluster in table:
+        windows = data[cluster]
+        average_values = np.average(windows, 0)
+        std_values = np.std(windows, 0)
+        max_values = average_values + std_values
+        min_values = average_values - std_values
+        prototypes.append({
+            'average': average_values.tolist(),
+            'max': max_values.tolist(),
+            'min': min_values.tolist()
+        })
+    # distances = [[dtw(np.array(v["average"]), np.array(w["average"]), global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05 * 120)) for j, w in enumerate(prototypes)] for i, v in enumerate(prototypes)]
+    return {'prototypes': prototypes, 'distances': []}
+
+def query(data, window_indices):
+    if isinstance(window_indices, int):
+        output = data[window_indices]
+    else:
+        indices = [int(index) for index, value in window_indices.items() if value is True]
+        indices_windows = data[indices]
+        output = performDBA(indices_windows)
+    return output.tolist()
+
+def debug_test_lsh():
+    data = np.load('processed-data.npy')
+    # data = np.repeat(data, repeats=7, axis=1)
+    print(data.shape)
+    data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))
+
+    r, a, sd = preprocess(data, 11.25)
+    query_n = 1234
+    t0 = time()
+    query = data[query_n]
+    data = data.astype('double')
+    dict = defaultdict(int)
+    candidates, distances, hf = _lsh.lsh(data, query, r, a, sd)
+    print("Calculated approximate in: " + str(time()-t0))
+    for l in range(len(candidates)):
+        for k in range(len(candidates[0])):
+            for i in range(len(candidates[0][0])):
+                dict[candidates[l][k][i]] += distances[l][k][i]
+    sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}
+    candidates = list(sorted_dict.keys())
+
+    print(candidates[0:20])
+
+    t0 = time()
+    # distances = [dtw_ndim.distance_fast(window, query) for window in data]
+    distances = [dtw(window, query, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05*120)) for window in data]
+    topk_dtw = sorted(range(len(distances)), key=lambda k: distances[k])
+    print("Calculated exact dtw in: " + str(time()-t0))
+    print(topk_dtw[0:20])
+
+    t0 = time()
+    l2distances = [np.linalg.norm(window - query) for window in data]
+    print("Calculated exact l2 in: " + str(time()-t0))
+
+    # # distances_ed = [distance.euclidean(query, window) for window in data]
+    # # topk_ed = sorted(range(len(distances_ed)), key=lambda k: distances_ed[k])
+    #
+    accuracy = 0
+    for index in topk_dtw[0:20]:
+        if index in candidates:
+            accuracy += 1
+    print(accuracy)
diff --git a/Flaskserver/topk.npy b/Flaskserver/topk.npy
deleted file mode 100644
index cd3b20f8532b120360e57d68e3a2fed81fa534bc..0000000000000000000000000000000000000000
--- a/Flaskserver/topk.npy
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:76c9d862591f8291da412257fb4eff58ec5f567d7c7b14a46de3d5269958c863
-size 997096