{ "cells": [ { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(900096, 74)\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "datafile = 'data/21.csv'\n", "\n", "data = pd.read_csv(datafile, header=None)\n", "\n", "#and convert it to numpy array:\n", "npdata = np.array(data)\n", "\n", "print(npdata.shape)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(59999, 120, 40)\n" ] } ], "source": [ "window_data = [npdata[i:i+120, 0:40] for i in range(0, npdata.shape[0]-120, int(120/8))]\n", "del npdata\n", "data = np.reshape(window_data, (len(window_data), len(window_data[0][0]), len(window_data[0])))\n", "del window_data\n", "data = np.reshape(data, (len(data), len(data[0][0]), len(data[0])))\n", "# data = np.concatenate((data, data))\n", "print(data.shape)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "targets = [43895, 33430, 42575, 1060, 11975]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Preprocessing:\n", "1730\n", "0:0\n", "1730\n", "999:59\n", "1730\n", "1998:70\n", "1730\n", "2997:78\n", "1730\n", "3996:81\n", "1730\n", "4995:81\n", "1730\n", "5994:81\n", "1730\n", "6993:84\n", "1730\n", "7992:84\n", "1730\n", "8991:84\n", "1730\n", "9990:84\n", "1730\n", "10989:84\n", "1730\n", "11988:84\n", "1730\n", "12987:84\n", "1730\n", "13986:91\n", "1730\n", "14985:91\n", "1730\n", "15984:91\n", "1730\n", "16983:91\n", "1730\n", "17982:91\n", "1730\n", "18981:91\n", "1730\n", "19980:95\n", "1730\n", "20979:95\n", "1730\n", "21978:95\n", "1730\n", "22977:95\n", "1730\n", "23976:95\n", "1730\n", "24975:95\n", "1730\n", "25974:95\n", "1730\n", "26973:99\n", "1730\n", "27972:99\n", "1730\n", "28971:99\n", "1730\n", "29970:99\n", "1730\n", "30969:102\n", "1730\n", "31968:102\n", "1730\n", "32967:103\n", "1730\n", "33966:105\n", "1730\n", "34965:105\n", "1730\n", "35964:105\n", "1730\n", "36963:105\n", "1730\n", "37962:109\n", "1730\n", "38961:110\n", "1730\n", "39960:114\n", "1730\n", "40959:114\n", "1730\n", "41958:115\n", "1730\n", "42957:116\n", "1730\n", "43956:116\n", "1730\n", "44955:116\n", "1730\n", "45954:122\n", "1730\n", "46953:126\n", "1730\n", "47952:126\n", "1730\n", "48951:126\n", "1730\n", "49950:128\n", "1730\n", "50949:128\n", "1730\n", "51948:128\n", "1730\n", "52947:128\n", "1730\n", "53946:130\n", "1730\n", "54945:134\n", "1730\n", "55944:134\n", "1730\n", "56943:134\n", "1730\n", "57942:143\n", "1730\n", "58941:143\n", "1730\n", "59940:145\n", "r = 1730\n", "0\n", "1\n", "2\n", "3\n", "4\n", "5\n", "6\n", "7\n", "8\n", "9\n", "10\n", "11\n", "12\n", "13\n", "14\n", "15\n", "16\n", "17\n", "18\n", "19\n", "20\n", "21\n", "22\n", "23\n", "24\n", "25\n", "26\n", "27\n", "28\n", "29\n", "30\n", "31\n", "32\n", "33\n", "34\n", "35\n", "36\n", "37\n", "38\n", "39\n", "40\n", "41\n", "42\n", "43\n", "44\n", "45\n", "46\n", "47\n", "48\n", "49\n", "50\n", "51\n", "52\n", "53\n", "54\n", "55\n", "56\n", "57\n", "58\n", "59\n", "60\n", "61\n", "62\n", "63\n", "64\n", "65\n", "66\n", "67\n", "68\n", "69\n", "70\n", "71\n", "72\n", "73\n", "74\n", "75\n", "76\n", "77\n", "78\n", "79\n", "80\n", "81\n", "82\n", "83\n", "84\n", "85\n", "86\n", "87\n", "88\n", "89\n", "90\n", "91\n", "92\n", "93\n", "94\n", "95\n", "96\n", "97\n", "98\n", "99\n", "100\n", "101\n", "102\n", "103\n", "104\n", "105\n", "106\n", "107\n", "108\n", "109\n", "110\n", "111\n", "112\n", "113\n", "114\n", "115\n", "116\n", "117\n", "118\n", "119\n", "120\n", "121\n", "122\n", "123\n", "124\n", "125\n", "126\n", "127\n", "128\n", "129\n", "130\n", "131\n", "132\n", "133\n", "134\n", "135\n", "136\n", "137\n", "138\n", "139\n", "140\n", "141\n", "142\n", "143\n", "144\n", "Mean: 16672.21312363323\n", "Stdev: 7180.272654591725\n", "Ratio mean: 0.9379277278060563\n", "Ratio stdev: 0.15076175892196642\n", "Theta: -1852.8903252134187\n", "r: 166.7221312363323\n", "Preprocessing time: 14.979660749435425\n", "Preprocessing done. Took 14.98 seconds (0.2 minutes).\n" ] } ], "source": [ "import sys\n", "from time import time\n", "\n", "sys.path.insert(0, '../Flaskserver')\n", "import importlib\n", "from main import preprocess\n", "import _lsh\n", "\n", "topk_dtw = []\n", "\n", "print('Preprocessing:')\n", "t0 = time()\n", "r,a,sd = preprocess(data, 1730)\n", "print('Preprocessing done. Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))\n", "\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "doing lsh\n", "Target #0 done! Took 3.11 seconds (0.1 minutes).\n", "doing lsh\n", "Target #1 done! Took 2.91 seconds (0.0 minutes).\n", "doing lsh\n", "Target #2 done! Took 2.80 seconds (0.0 minutes).\n", "doing lsh\n", "Target #3 done! Took 2.82 seconds (0.0 minutes).\n", "doing lsh\n", "Target #4 done! Took 2.85 seconds (0.0 minutes).\n", "Done! Took 14.50 seconds (0.2 minutes).\n" ] } ], "source": [ "t0 = time()\n", "for i, target in enumerate(targets):\n", " t1 = time()\n", " query = data[target]\n", " print('doing lsh')\n", " lsh_candidates, lsh_distances, _ = _lsh.lsh(data, query, r, a, sd)\n", "# topk_dtw.append(candidates)\n", " print('Target #{} done! Took {:.2f} seconds ({:.1f} minutes).'.format(i, time() - t1, (time() - t1) / 60))\n", " \n", "# print(candidates[0:10])\n", "print('Done! Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Target #0 done! Took 5.99 seconds (0.1 minutes).\n", "Target #1 done! Took 5.71 seconds (0.1 minutes).\n", "Target #2 done! Took 5.76 seconds (0.1 minutes).\n", "Target #3 done! Took 5.65 seconds (0.1 minutes).\n", "Target #4 done! Took 5.84 seconds (0.1 minutes).\n", "Done! Took 28.96 seconds (0.5 minutes).\n", "[11975, 18529, 3579, 2144, 18528, 11974, 4602, 11976, 9108, 8084]\n" ] } ], "source": [ "from scipy.spatial.distance import cdist\n", "from tslearn.metrics import dtw\n", "from time import time\n", "\n", "t0 = time()\n", "for i, target in enumerate(targets):\n", " t1 = time()\n", " query = data[target]\n", " dtw_distances = [dtw(window, query, global_constraint='sakoe_chiba', sakoe_chiba_radius=int(0.05)) for window in data]\n", " print('Target #{} done! Took {:.2f} seconds ({:.1f} minutes).'.format(i, time() - t1, (time() - t1) / 60))\n", "dtw_candidates = sorted(range(len(dtw_distances)), key=lambda k: dtw_distances[k])\n", "print('Done! Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))\n", "print(dtw_candidates[0:10])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[11975, 18529, 4602, 2144, 1325, 14433, 5421, 9108, 5217, 3579]\n" ] } ], "source": [ "from collections import defaultdict\n", "\n", "dict = defaultdict(int)\n", "for l in range(len(lsh_candidates)):\n", " for k in range(len(lsh_candidates[0])):\n", " for i in range(len(lsh_candidates[0][0])):\n", " dict[lsh_candidates[l][k][i]] += lsh_distances[l][k][i]\n", "sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}\n", "candidates = list(sorted_dict.keys())\n", "print(candidates[0:10])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "20\n", "[11975, 18529, 3579, 2144, 18528, 11974, 4602, 11976, 9108, 8084, 4807, 1325, 14433, 5422, 9312, 5421, 4603, 18938, 3578, 9928]\n", "[11975, 18529, 4602, 2144, 1325, 14433, 5421, 9108, 5217, 3579, 18528, 8084, 4807, 3578, 4603, 59898, 9312, 15662, 4601, 11974]\n" ] } ], "source": [ "accuracy = 0\n", "for index in dtw_candidates[0:20]:\n", " if index in candidates:\n", " accuracy += 1\n", "print(accuracy)\n", "accuracy = 0\n", "for index in dtw_candidates[0:20]:\n", " if index in candidates[0:20]:\n", " accuracy += 1\n", "print(accuracy)\n", "print(dtw_candidates[0:20])\n", "print(candidates[0:20])\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "18218\n" ] } ], "source": [ "print(len(candidates))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }