Commit 2750c150 authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Trying simple hashing with low memory use (not working)


Former-commit-id: bb598b63
parent bcdac582
...@@ -17,9 +17,9 @@ export class CacheService { ...@@ -17,9 +17,9 @@ export class CacheService {
private _sliderValue; private _sliderValue;
private _queryWindow; private _queryWindow;
public windowSize = 120; public windowSize = 200;
public nrOfTables = 20; public nrOfTables = 5;
public hashSize = 8; public hashSize = 2;
public stepSize = 200; public stepSize = 200;
public querySelectionMode = true; public querySelectionMode = true;
......
...@@ -126,13 +126,17 @@ ...@@ -126,13 +126,17 @@
<option name="oldMeFiltersMigrated" value="true" /> <option name="oldMeFiltersMigrated" value="true" />
</component> </component>
<component name="WindowStateProjectService"> <component name="WindowStateProjectService">
<state x="686" y="355" width="610" height="403" key="#com.intellij.fileTypes.FileTypeChooser" timestamp="1600727680781"> <state x="686" y="355" width="664" height="403" key="#com.intellij.fileTypes.FileTypeChooser" timestamp="1601384380854">
<screen x="72" y="27" width="1848" height="1053" /> <screen x="72" y="27" width="1848" height="1053" />
</state> </state>
<state x="686" y="355" width="610" height="403" key="#com.intellij.fileTypes.FileTypeChooser/72.27.1848.1053@72.27.1848.1053" timestamp="1600727680781" /> <state x="686" y="355" width="664" height="403" key="#com.intellij.fileTypes.FileTypeChooser/72.27.1848.1053@72.27.1848.1053" timestamp="1601384380854" />
<state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl" timestamp="1600726193087"> <state x="721" y="422" width="1200" height="800" key="DiffContextDialog" timestamp="1601242420342">
<screen x="72" y="27" width="1848" height="1053" /> <screen x="72" y="27" width="1848" height="1053" />
</state> </state>
<state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl/72.27.1848.1053@72.27.1848.1053" timestamp="1600726193087" /> <state x="721" y="422" width="1200" height="800" key="DiffContextDialog/72.27.1848.1053@72.27.1848.1053" timestamp="1601242420342" />
<state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl" timestamp="1601285087193">
<screen x="72" y="27" width="1848" height="1053" />
</state>
<state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl/72.27.1848.1053@72.27.1848.1053" timestamp="1601285087193" />
</component> </component>
</project> </project>
\ No newline at end of file
...@@ -13,8 +13,9 @@ import bigwig ...@@ -13,8 +13,9 @@ import bigwig
import bbi import bbi
from bitarray import bitarray from bitarray import bitarray
import _ucrdtw import _ucrdtw
from scipy.sparse import dia_matrix
reload = False reload = True
app = Flask(__name__) app = Flask(__name__)
CORS(app) CORS(app)
...@@ -35,14 +36,38 @@ def calculate_signatures_cumsum_weights(data, window_size=None, hash_size=None, ...@@ -35,14 +36,38 @@ def calculate_signatures_cumsum_weights(data, window_size=None, hash_size=None,
signatures_int = np.packbits(signatures_bool) signatures_int = np.packbits(signatures_bool)
return signatures_int.tolist(), hash_function return signatures_int.tolist(), hash_function
def calculate_signatures_cumsum_weights(data, window_size=None, hash_size=None, hash_function=None): def calculate_signatures_new(data, window_size=None, hash_size=None, hash_function=None):
if hash_function is None: if hash_function is None:
hash_function = np.array([np.cumsum(np.random.uniform(-1, 1, window_size)) for _ in range(hash_size)]).transpose() hash_function = np.array([np.cumsum(np.random.uniform(-1, 1, window_size)) for _ in range(hash_size)]).transpose()
signatures_bool = np.dot(data, hash_function) > 0 if len(data) == len(np.array(hash_function)[:, 0]):
signatures_int = np.packbits(signatures_bool) signatures_bool = np.dot(data, hash_function) > 0
output = signatures_bool.astype(int)[0]
print(output)
return output
print('starting hashing')
t0 = time()
all_signatures = []
batch_size = 20
data = data.transpose()
temp = np.zeros((batch_size, window_size + batch_size - 1))
for h in range(hash_size):
for i in range(batch_size):
temp[i, i:i + window_size] = hash_function[:, h]
print('first: ' + str(time() - t0))
signatures_bool = [np.dot(temp, data[i:i + window_size + batch_size - 1]) > 0 for i in range(0, len(data) - window_size, batch_size)]
# signatures_bool = []
# for i in range(0, len(data) - window_size, batch_size):
# if i % 1000000 == 0:
# print(i)
# signatures_bool.append(np.dot(temp, data[i:i + window_size + batch_size - 1]) > 0)
print('second: ' + str(time() - t0))
all_signatures.append(np.array(signatures_bool).flatten().astype(int))
print('done')
signatures_int = np.packbits(np.stack(np.array(all_signatures), axis=1), axis=0).flatten()
return signatures_int.tolist(), hash_function return signatures_int.tolist(), hash_function
lsh_function = calculate_signatures_cumsum_weights
lsh_function = calculate_signatures_new
@app.route('/', methods=['GET']) @app.route('/', methods=['GET'])
def index(): def index():
...@@ -61,11 +86,10 @@ def read_data(): ...@@ -61,11 +86,10 @@ def read_data():
} }
response = orjson.dumps(response) response = orjson.dumps(response)
print('Data read: ' + str(time()-t0)) print('Data read: ' + str(time()-t0))
query = data[10000:11200] # query = data[12000:24000]
print(query) # loc, dist = _ucrdtw.ucrdtw(data, query, 0.05, True)
loc, dist = _ucrdtw.ucrdtw(data, query, 0.05, True) # print(data[loc:loc+120])
print(data[loc:loc+120]) # print('found query: ' + str(loc) + '[' + str(time()-t0) + ']')
print('found query: ' + str(loc) + '[' + str(time()-t0) + ']')
return response return response
@app.route('/create-windows', methods=['POST']) @app.route('/create-windows', methods=['POST'])
...@@ -74,14 +98,10 @@ def create_windows(): ...@@ -74,14 +98,10 @@ def create_windows():
if reload: if reload:
raw_data = request.json raw_data = request.json
window_size = int(raw_data['parameters']["windowsize"]) window_size = int(raw_data['parameters']["windowsize"])
data = bigwig.chunk( chromsize = bbi.chromsizes('test.bigWig')['chr1']
'test.bigWig', step_size = chromsize / 10000
12000, data = bigwig.get('test.bigWig', 'chr1', 0, chromsize, 20000000)
int(12000 / window_size), data = (data - np.min(data))/np.ptp(data)
int(12000 / 6),
['chr1'],
verbose=True,
)
print(data.shape) print(data.shape)
np.save('processed-data', data) np.save('processed-data', data)
print('Windows created: ' + str(time()-t0)) print('Windows created: ' + str(time()-t0))
...@@ -116,7 +136,10 @@ def lsh(data, window_size, hash_size, table_size): ...@@ -116,7 +136,10 @@ def lsh(data, window_size, hash_size, table_size):
for index in range(table_size): for index in range(table_size):
signatures, hash_function = lsh_function(data, window_size=window_size, hash_size=hash_size) signatures, hash_function = lsh_function(data, window_size=window_size, hash_size=hash_size)
table = {k: v for v, k in enumerate(signatures)} print('creating dictionary')
table = defaultdict(list)
for v, k in enumerate(signatures):
table[k].append(v)
tables.append(table) tables.append(table)
tables_hash_function.append(hash_function.tolist()) tables_hash_function.append(hash_function.tolist())
...@@ -135,7 +158,7 @@ def similarity(): ...@@ -135,7 +158,7 @@ def similarity():
for t in tables.values(): for t in tables.values():
signature = lsh_function(window, hash_function=t["hash"]) signature = lsh_function(window, hash_function=t["hash"])
neighbours.extend(t["entries"][signature]) neighbours.extend(t["entries"][str(signature)])
neighbours_with_frequency = dict(Counter(neighbours)) neighbours_with_frequency = dict(Counter(neighbours))
for index, frequency in neighbours_with_frequency.items(): for index, frequency in neighbours_with_frequency.items():
output[str(frequency)].append(index) output[str(frequency)].append(index)
...@@ -208,7 +231,7 @@ def query(): ...@@ -208,7 +231,7 @@ def query():
raw_data = orjson.loads(request.data) raw_data = orjson.loads(request.data)
window = raw_data['window'] window = raw_data['window']
if isinstance(window, int): if isinstance(window, int):
output = np.load('processed-data.npy')[window] output = np.load('processed-data.npy')[window:window+12000]
response = orjson.dumps(output.tolist()) response = orjson.dumps(output.tolist())
print("Query done: " + str(time() - t0)) print("Query done: " + str(time() - t0))
return response return response
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment