{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from time import time\n", "\n", "datafile = 'data/21.csv'\n", "\n", "N = 100\n", "T = 100\n", "M = 100000\n", "\n", "data = np.random.uniform(size=(M, T, N))\n", "\n", "#and convert it to numpy array:\n", "data = np.array(data, dtype = \"float32\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We sample a number of subwindows which will be used as query for the search algorithms" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[23484, 81670, 8609, 31051, 42832, 14303, 85653, 67886, 17319, 93188]\n" ] } ], "source": [ "import random\n", "from time import time\n", "\n", "targets = random.sample(list(range(len(data))), 10)\n", "print(targets)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## PSEUDo" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For the LSH algorithm some preprocessing is done to find the right LSH parameters." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Preprocessing:\n", "r = 100\n", "smaller\n", "r = 50.0\n", "smaller\n", "r = 25.0\n", "bigger\n", "r = 37.5\n", "bigger\n", "r = 43.75\n", "smaller\n", "r = 40.625\n", "bigger\n", "r = 42.1875\n", "smaller\n", "r = 41.40625\n", "smaller\n", "r = 41.015625\n", "Mean: 41.028657241907524\n", "Stdev: 0.16600796717920388\n", "Ratio mean: 0.9930308931432998\n", "Ratio stdev: 0.004133342888573115\n", "Theta: 40.60035668658518\n", "r: 3.7478993980502606\n", "Preprocessing time: 11.971427917480469\n", "Preprocessing done. Took 11.97 seconds (0.2 minutes).\n" ] } ], "source": [ "import sys\n", "\n", "sys.path.insert(0, '../Flaskserver')\n", "import importlib\n", "from pseudo import preprocess\n", "import _lsh\n", "\n", "topk_dtw = []\n", "\n", "print('Preprocessing:')\n", "t0 = time()\n", "r,a,sd = preprocess(data, data.shape[2])\n", "print('Preprocessing done. Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))\n", "pseudo_preprocess_time = time() - t0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we run the LSH algorithm for all targets and calculate the most similar subwindows" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "doing lsh\n", "Target #0 done! Took 14.48 seconds (0.2 minutes).\n", "doing lsh\n", "Target #1 done! Took 14.31 seconds (0.2 minutes).\n", "doing lsh\n", "Target #2 done! Took 14.30 seconds (0.2 minutes).\n", "doing lsh\n", "Target #3 done! Took 14.17 seconds (0.2 minutes).\n", "doing lsh\n", "Target #4 done! Took 14.30 seconds (0.2 minutes).\n", "doing lsh\n", "Target #5 done! Took 14.31 seconds (0.2 minutes).\n", "doing lsh\n", "Target #6 done! Took 14.18 seconds (0.2 minutes).\n", "doing lsh\n", "Target #7 done! Took 13.75 seconds (0.2 minutes).\n", "doing lsh\n", "Target #8 done! Took 13.80 seconds (0.2 minutes).\n", "doing lsh\n", "Target #9 done! Took 14.17 seconds (0.2 minutes).\n", "Done! Took 141.76 seconds (2.4 minutes).\n" ] } ], "source": [ "from collections import defaultdict\n", "t0 = time()\n", "total_lsh_times = []\n", "all_lsh_candidates = []\n", "for i, target in enumerate(targets):\n", " t1 = time()\n", " query = data[target]\n", " print('doing lsh')\n", " lsh_candidates, lsh_distances, _ = _lsh.lsh(data, query, r, a, sd, 0)\n", "# topk_dtw.append(candidates)\n", " dict = defaultdict(int)\n", " for l in range(len(lsh_candidates)):\n", " for k in range(len(lsh_candidates[0])):\n", " for a in range(len(lsh_candidates[0][0])):\n", " dict[lsh_candidates[l][k][a]] += lsh_distances[l][k][a]\n", " sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}\n", " candidates = list(sorted_dict.keys())\n", " total_lsh_times.append(time()-t1)\n", " print('Target #{} done! Took {:.2f} seconds ({:.1f} minutes).'.format(i, time() - t1, (time() - t1) / 60))\n", " all_lsh_candidates.append(candidates)\n", " \n", "# print(candidates[0:10])\n", "print('Done! Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "doing lsh\n", "Target #0 done! Took 9.85 seconds (0.2 minutes).\n", "doing lsh\n", "Target #1 done! Took 9.73 seconds (0.2 minutes).\n", "doing lsh\n", "Target #2 done! Took 9.58 seconds (0.2 minutes).\n", "doing lsh\n", "Target #3 done! Took 9.60 seconds (0.2 minutes).\n", "doing lsh\n", "Target #4 done! Took 9.84 seconds (0.2 minutes).\n", "doing lsh\n", "Target #5 done! Took 9.74 seconds (0.2 minutes).\n", "doing lsh\n" ] } ], "source": [ "from collections import defaultdict\n", "t0 = time()\n", "total_lsh_times_ed = []\n", "all_lsh_candidates_ed = []\n", "for i, target in enumerate(targets):\n", " t1 = time()\n", " query = data[target]\n", " print('doing lsh')\n", " lsh_candidates, lsh_distances, _ = _lsh.lsh(data, query, r, a, sd, 1)\n", "# topk_dtw.append(candidates)\n", " dict = defaultdict(int)\n", " for l in range(len(lsh_candidates)):\n", " for k in range(len(lsh_candidates[0])):\n", " for a in range(len(lsh_candidates[0][0])):\n", " dict[lsh_candidates[l][k][a]] += lsh_distances[l][k][a]\n", " sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[1])}\n", " candidates = list(sorted_dict.keys())\n", " total_lsh_times_ed.append(time()-t1)\n", " print('Target #{} done! Took {:.2f} seconds ({:.1f} minutes).'.format(i, time() - t1, (time() - t1) / 60))\n", " all_lsh_candidates_ed.append(candidates)\n", " \n", "# print(candidates[0:10])\n", "print('Done! Took {:.2f} seconds ({:.1f} minutes).'.format(time() - t0, (time() - t0) / 60))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }