Commit 5defad76 authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Restructured project

parent 6f6c3f19
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="556080ba-825c-4b55-a92a-867a4df4fb32" name="Default Changelist" comment=""> <list default="true" id="556080ba-825c-4b55-a92a-867a4df4fb32" name="Default Changelist" comment="" />
<change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" /> <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" /> <option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" /> <option name="HIGHLIGHT_CONFLICTS" value="true" />
...@@ -16,7 +14,7 @@ ...@@ -16,7 +14,7 @@
<entry file="file://$PROJECT_DIR$/main.py"> <entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="369"> <state relative-caret-position="369">
<caret line="198" column="19" lean-forward="true" selection-start-line="198" selection-start-column="19" selection-end-line="198" selection-end-column="19" /> <caret line="198" column="15" lean-forward="true" selection-start-line="198" selection-start-column="15" selection-end-line="198" selection-end-column="15" />
<folding> <folding>
<element signature="e#0#41#0" expanded="true" /> <element signature="e#0#41#0" expanded="true" />
</folding> </folding>
...@@ -75,7 +73,7 @@ ...@@ -75,7 +73,7 @@
</list> </list>
</option> </option>
</component> </component>
<component name="ProjectFrameBounds" extendedState="6"> <component name="ProjectFrameBounds" extendedState="7">
<option name="x" value="12" /> <option name="x" value="12" />
<option name="y" value="-36" /> <option name="y" value="-36" />
<option name="width" value="1890" /> <option name="width" value="1890" />
...@@ -205,15 +203,15 @@ ...@@ -205,15 +203,15 @@
<workItem from="1594589515579" duration="1044000" /> <workItem from="1594589515579" duration="1044000" />
<workItem from="1594719112139" duration="10388000" /> <workItem from="1594719112139" duration="10388000" />
<workItem from="1595247298901" duration="17719000" /> <workItem from="1595247298901" duration="17719000" />
<workItem from="1597658111794" duration="32577000" /> <workItem from="1597658111794" duration="33720000" />
</task> </task>
<servers /> <servers />
</component> </component>
<component name="TimeTrackingManager"> <component name="TimeTrackingManager">
<option name="totallyTimeSpent" value="67045000" /> <option name="totallyTimeSpent" value="68188000" />
</component> </component>
<component name="ToolWindowManager"> <component name="ToolWindowManager">
<frame x="-7" y="-7" width="1295" height="695" extended-state="6" /> <frame x="-7" y="-7" width="1295" height="695" extended-state="7" />
<editor active="true" /> <editor active="true" />
<layout> <layout>
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.26354083" /> <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.26354083" />
...@@ -270,7 +268,7 @@ ...@@ -270,7 +268,7 @@
<entry file="file://$PROJECT_DIR$/main.py"> <entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="369"> <state relative-caret-position="369">
<caret line="198" column="19" lean-forward="true" selection-start-line="198" selection-start-column="19" selection-end-line="198" selection-end-column="19" /> <caret line="198" column="15" lean-forward="true" selection-start-line="198" selection-start-column="15" selection-end-line="198" selection-end-column="15" />
<folding> <folding>
<element signature="e#0#41#0" expanded="true" /> <element signature="e#0#41#0" expanded="true" />
</folding> </folding>
......
%% Cell type:markdown id: tags:
# Analysing Time Series using Sax
%% Cell type:markdown id: tags:
This notebook contains a propototype approach for analyzing time series data.
The main goals of these applications are to:
1. Interactive view of the data (zoom in/out, show info when hovering over)
2. Cluster *windows* (consecutive series of data-points) based on similarity
3. Visualise the clusters in some way
The applications are to be used in a large-scale data context. Thus the clustering and retrieval of similar windows should be fast. There are algorithms that compare the raw data of all windows to eachother (e.g. DTW), but this has a time-complexity of O(n^2), which is way too big for big data.
A faster way to cluster windows is to define a window to a fixed length symbol. If the length of the symbol is fixed, we can assign each window to a bucket. A cluster would then be all the elements within the bucket. Would we recieve a (new) window, we can simply compute the according symbol and find all similar windows.
One example of this is Symbolic Aggregate approXimation (SAX). SAX is used to transform a sequence of rational numbers (i.e., a time series) into a sequence of letters (i.e., a string).
%% Cell type:markdown id: tags:
We start of with a simple dataset containing weather information from 2013-2017. The code below shows a snippet of the dataset
%% Cell type:code id: tags:
``` python
import pandas as pd
df = pd.read_csv("DailyDelhiClimateTrain.csv", index_col=0)
df.index = pd.to_datetime(df.index)
df.sort_index(inplace=True)
df.head()
```
%% Output
meantemp humidity wind_speed meanpressure
date
2013-01-01 10.000000 84.500000 0.000000 1015.666667
2013-01-02 7.400000 92.000000 2.980000 1017.800000
2013-01-03 7.166667 87.000000 4.633333 1018.666667
2013-01-04 8.666667 71.333333 1.233333 1017.166667
2013-01-05 6.000000 86.833333 3.700000 1016.500000
%% Cell type:markdown id: tags:
Too keep things simple, we will only use the mean temperature variable (meantemp) for our testing data.
%% Cell type:code id: tags: