Next tuesday 25th january around 21.30 we'll be upgrading to GitLab version 14.7

Commit 9cabbe5f authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Slight speed up on table creation


Former-commit-id: 6375d5da
parent d746d449
......@@ -19,7 +19,7 @@ export class CacheService {
public windowSize = 120;
public nrOfTables = 20;
public hashSize = 4;
public hashSize = 8;
public stepSize = 200;
public querySelectionMode = true;
......@@ -69,6 +69,7 @@ export class CacheService {
async createTables(): Promise<void> {
this.tables = await this.api.createTables(this.parameters);
console.log(this.tables);
}
async getSimilarWindows(): Promise<any> {
......
......@@ -10,7 +10,7 @@ export class LabelingWindowComponent implements OnInit {
public topk;
public subplots = [];
public labels: boolean[] = [];
private k = 5;
private k = 12;
constructor(private service: CacheService) { }
......
......@@ -20,8 +20,10 @@
</component>
<component name="ChangeListManager">
<list default="true" id="556080ba-825c-4b55-a92a-867a4df4fb32" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/../AngularApp/prototype/src/app/cache.service.ts" beforeDir="false" afterPath="$PROJECT_DIR$/../AngularApp/prototype/src/app/cache.service.ts" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/processed-data.npy" beforeDir="false" afterPath="$PROJECT_DIR$/processed-data.npy" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
......
......@@ -11,12 +11,39 @@ import orjson
import dask.dataframe as dd
import bigwig
import bbi
from bitarray import bitarray
import _ucrdtw
reload = False
app = Flask(__name__)
CORS(app)
def calculate_signatures_random_weights(data, window_size=None, hash_size=None, hash_function=None):
if hash_function is None:
hash_function = np.random.uniform(-1, 1, size=(window_size, hash_size))
signatures_bool = np.dot(data, hash_function) > 0
if signatures_bool.ndim == 1:
return ''.join(['1' if x else '0' for x in signatures_bool])
return [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool], hash_function
def calculate_signatures_cumsum_weights(data, window_size=None, hash_size=None, hash_function=None):
if hash_function is None:
hash_function = np.array([np.cumsum(np.random.uniform(-1, 1, window_size)) for _ in range(hash_size)]).transpose()
signatures_bool = np.dot(data, hash_function) > 0
signatures_int = np.packbits(signatures_bool)
return signatures_int.tolist(), hash_function
def calculate_signatures_cumsum_weights(data, window_size=None, hash_size=None, hash_function=None):
if hash_function is None:
hash_function = np.array([np.cumsum(np.random.uniform(-1, 1, window_size)) for _ in range(hash_size)]).transpose()
signatures_bool = np.dot(data, hash_function) > 0
signatures_int = np.packbits(signatures_bool)
return signatures_int.tolist(), hash_function
lsh_function = calculate_signatures_cumsum_weights
@app.route('/', methods=['GET'])
def index():
return "hi"
......@@ -34,6 +61,11 @@ def read_data():
}
response = orjson.dumps(response)
print('Data read: ' + str(time()-t0))
query = data[10000:11200]
print(query)
loc, dist = _ucrdtw.ucrdtw(data, query, 0.05, True)
print(data[loc:loc+120])
print('found query: ' + str(loc) + '[' + str(time()-t0) + ']')
return response
@app.route('/create-windows', methods=['POST'])
......@@ -63,6 +95,7 @@ def create_tables():
hash_size = int(raw_data['parameters']["hashsize"])
table_size = int(raw_data['parameters']["tablesize"])
t0 = time()
hash_functions, tables = lsh(data, window_size, hash_size, table_size)
response = {}
......@@ -71,40 +104,26 @@ def create_tables():
"hash": hash_functions[table_index],
"entries": tables[table_index]
}
response = orjson.dumps(response)
response = jsonify(response)
print('done: ' + str(time()-t0))
return response
def lsh(data, window_size, hash_size, table_size):
t0 = time()
print('Starting: ' + str(time() - t0))
tables_hash_function = []
print('Init time: ' + str(time() - t0))
tables = []
print(data.shape)
for index in range(table_size):
t1 = time()
table = defaultdict(list)
signatures, hash_function = calculate_signatures_random_weights(data, window_size=window_size, hash_size=hash_size)
for i in range(len(signatures)):
table[signatures[i]].append(i)
signatures, hash_function = lsh_function(data, window_size=window_size, hash_size=hash_size)
table = {k: v for v, k in enumerate(signatures)}
tables.append(table)
tables_hash_function.append(hash_function.tolist())
print(time() - t1)
print('Creation time: ' + str(time() - t0))
hash_functions = tables_hash_function
return hash_functions, tables
def calculate_signatures_random_weights(data, window_size=None, hash_size=None, hash_function=None):
if hash_function is None:
hash_function = np.random.uniform(-100, 100, size=(window_size, hash_size))
signatures_bool = np.dot(data, hash_function) > 0
if signatures_bool.ndim == 1:
return ''.join(['1' if x else '0' for x in signatures_bool])
return [''.join(['1' if x else '0' for x in lst]) for lst in signatures_bool], hash_function
@app.route('/similarity', methods=['POST'])
def similarity():
t0 = time()
......@@ -115,7 +134,7 @@ def similarity():
output = defaultdict(list)
for t in tables.values():
signature = calculate_signatures_random_weights(window, hash_function=t["hash"])
signature = lsh_function(window, hash_function=t["hash"])
neighbours.extend(t["entries"][signature])
neighbours_with_frequency = dict(Counter(neighbours))
for index, frequency in neighbours_with_frequency.items():
......@@ -143,7 +162,7 @@ def update():
for t in tables.values():
valid = True
signature = calculate_signatures_random_weights(window, hash_function=t['hash'])
signature = lsh_function(window, hash_function=t['hash'])
neighbours = t["entries"][signature]
for index in correct_indices:
if index not in neighbours:
......@@ -160,11 +179,11 @@ def update():
entries = defaultdict(list)
t1 = time()
while True:
correct_signatures, hash_function = calculate_signatures_random_weights(data[correct_indices], window_size=window_size, hash_size=hash_size)
incorrect_signatures, _ = calculate_signatures_random_weights(data[incorrect_indices], hash_function=hash_function)
correct_signatures, hash_function = lsh_function(data[correct_indices], window_size=window_size, hash_size=hash_size)
incorrect_signatures, _ = lsh_function(data[incorrect_indices], hash_function=hash_function)
if correct_signatures.count(correct_signatures[0]) == len(correct_signatures) and incorrect_signatures.count(correct_signatures[0]) == 0:
break
signatures, _ = calculate_signatures_random_weights(data, hash_function=hash_function)
signatures, _ = lsh_function(data, hash_function=hash_function)
for i in range(len(signatures)):
entries[signatures[i]].append(i)
print(str(index) + ": " + str(time() - t1))
......
No preview for this file type
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment