Commit 9cabbe5f authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Slight speed up on table creation


Former-commit-id: 6375d5da
parent d746d449
......@@ -19,7 +19,7 @@ export class CacheService {
public windowSize = 120;
public nrOfTables = 20;
public hashSize = 4;
public hashSize = 8;
public stepSize = 200;
public querySelectionMode = true;
......@@ -69,6 +69,7 @@ export class CacheService {
async createTables(): Promise<void> {
this.tables = await this.api.createTables(this.parameters);
console.log(this.tables);
}
async getSimilarWindows(): Promise<any> {
......
......@@ -10,7 +10,7 @@ export class LabelingWindowComponent implements OnInit {
public topk;
public subplots = [];
public labels: boolean[] = [];
private k = 5;
private k = 12;
constructor(private service: CacheService) { }
......
......@@ -20,8 +20,10 @@
</component>
<component name="ChangeListManager">
<list default="true" id="556080ba-825c-4b55-a92a-867a4df4fb32" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/cache.service.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/cache.service.ts" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/processed-data.npy" beforeDir="false" afterPath="$PROJECT_DIR$/processed-data.npy" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
......
......@@ -11,12 +11,39 @@ import orjson
import dask.dataframe as dd
import bigwig
import bbi
from bitarray import bitarray
import _ucrdtw
reload = False
app = Flask(__name__)
CORS(app)
def calculate_signatures_random_weights(data, window_size=None, hash_size=None, hash_function=None):
if hash_function is None:
hash_function = np.random.uniform(-1, 1, size=(window_size, hash_size))
signatures_bool = np.dot(data, hash_function) > 0
if signatures_bool.ndim == 1:
return ''.join(['1' if x else '0' for x in signatures_bool])
return [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool], hash_function
def calculate_signatures_cumsum_weights(data, window_size=None, hash_size=None, hash_function=None):
if hash_function is None:
hash_function = np.array([np.cumsum(np.random.uniform(-1, 1, window_size)) for _ in range(hash_size)]).transpose()
signatures_bool = np.dot(data, hash_function) > 0
signatures_int = np.packbits(signatures_bool)
return signatures_int.tolist(), hash_function
def calculate_signatures_cumsum_weights(data, window_size=None, hash_size=None, hash_function=None):
if hash_function is None:
hash_function = np.array([np.cumsum(np.random.uniform(-1, 1, window_size)) for _ in range(hash_size)]).transpose()
signatures_bool = np.dot(data, hash_function) > 0
signatures_int = np.packbits(signatures_bool)
return signatures_int.tolist(), hash_function
lsh_function = calculate_signatures_cumsum_weights
@app.route('/', methods=['GET'])
def index():
return "hi"
......@@ -34,6 +61,11 @@ def read_data():
}
response = orjson.dumps(response)
print('Data read: ' + str(time()-t0))
query = data[10000:11200]
print(query)
loc, dist = _ucrdtw.ucrdtw(data, query, 0.05, True)
print(data[loc:loc+120])
print('found query: ' + str(loc) + '[' + str(time()-t0) + ']')
return response
@app.route('/create-windows', methods=['POST'])
......@@ -63,6 +95,7 @@ def create_tables():
hash_size = int(raw_data['parameters']["hashsize"])
table_size = int(raw_data['parameters']["tablesize"])
t0 = time()
hash_functions, tables = lsh(data, window_size, hash_size, table_size)
response = {}
......@@ -71,40 +104,26 @@ def create_tables():
"hash": hash_functions[table_index],
"entries": tables[table_index]
}
response = orjson.dumps(response)
response = jsonify(response)
print('done: ' + str(time()-t0))
return response
def lsh(data, window_size, hash_size, table_size):
t0 = time()
print('Starting: ' + str(time() - t0))
tables_hash_function = []
print('Init time: ' + str(time() - t0))
tables = []
print(data.shape)
for index in range(table_size):
t1 = time()
table = defaultdict(list)
signatures, hash_function = calculate_signatures_random_weights(data, window_size=window_size, hash_size=hash_size)
for i in range(len(signatures)):
table[signatures[i]].append(i)
signatures, hash_function = lsh_function(data, window_size=window_size, hash_size=hash_size)
table = {k: v for v, k in enumerate(signatures)}
tables.append(table)
tables_hash_function.append(hash_function.tolist())
print(time() - t1)
print('Creation time: ' + str(time() - t0))
hash_functions = tables_hash_function
return hash_functions, tables
def calculate_signatures_random_weights(data, window_size=None, hash_size=None, hash_function=None):
if hash_function is None:
hash_function = np.random.uniform(-100, 100, size=(window_size, hash_size))
signatures_bool = np.dot(data, hash_function) > 0
if signatures_bool.ndim == 1:
return ''.join(['1' if x else '0' for x in signatures_bool])
return [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool], hash_function
@app.route('/similarity', methods=['POST'])
def similarity():
t0 = time()
......@@ -115,7 +134,7 @@ def similarity():
output = defaultdict(list)
for t in tables.values():
signature = calculate_signatures_random_weights(window, hash_function=t["hash"])
signature = lsh_function(window, hash_function=t["hash"])
neighbours.extend(t["entries"][signature])
neighbours_with_frequency = dict(Counter(neighbours))
for index, frequency in neighbours_with_frequency.items():
......@@ -143,7 +162,7 @@ def update():
for t in tables.values():
valid = True
signature = calculate_signatures_random_weights(window, hash_function=t['hash'])
signature = lsh_function(window, hash_function=t['hash'])
neighbours = t["entries"][signature]
for index in correct_indices:
if index not in neighbours:
......@@ -160,11 +179,11 @@ def update():
entries = defaultdict(list)
t1 = time()
while True:
correct_signatures, hash_function = calculate_signatures_random_weights(data[correct_indices], window_size=window_size, hash_size=hash_size)
incorrect_signatures, _ = calculate_signatures_random_weights(data[incorrect_indices], hash_function=hash_function)
correct_signatures, hash_function = lsh_function(data[correct_indices], window_size=window_size, hash_size=hash_size)
incorrect_signatures, _ = lsh_function(data[incorrect_indices], hash_function=hash_function)
if correct_signatures.count(correct_signatures[0]) == len(correct_signatures) and incorrect_signatures.count(correct_signatures[0]) == 0:
break
signatures, _ = calculate_signatures_random_weights(data, hash_function=hash_function)
signatures, _ = lsh_function(data, hash_function=hash_function)
for i in range(len(signatures)):
entries[signatures[i]].append(i)
print(str(index) + ": " + str(time() - t1))
......
No preview for this file type
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment