Next tuesday 25th january around 21.30 we'll be upgrading to GitLab version 14.7

Commit a224e6da authored by Kruyff,D.L.W. (Dylan)'s avatar Kruyff,D.L.W. (Dylan)
Browse files

Trying simple hashing with low memory use (not working)


Former-commit-id: bb598b63
parent 9cabbe5f
......@@ -17,9 +17,9 @@ export class CacheService {
private _sliderValue;
private _queryWindow;
public windowSize = 120;
public nrOfTables = 20;
public hashSize = 8;
public windowSize = 200;
public nrOfTables = 5;
public hashSize = 2;
public stepSize = 200;
public querySelectionMode = true;
......
......@@ -126,13 +126,17 @@
<option name="oldMeFiltersMigrated" value="true" />
</component>
<component name="WindowStateProjectService">
<state x="686" y="355" width="610" height="403" key="#com.intellij.fileTypes.FileTypeChooser" timestamp="1600727680781">
<state x="686" y="355" width="664" height="403" key="#com.intellij.fileTypes.FileTypeChooser" timestamp="1601384380854">
<screen x="72" y="27" width="1848" height="1053" />
</state>
<state x="686" y="355" width="610" height="403" key="#com.intellij.fileTypes.FileTypeChooser/72.27.1848.1053@72.27.1848.1053" timestamp="1600727680781" />
<state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl" timestamp="1600726193087">
<state x="686" y="355" width="664" height="403" key="#com.intellij.fileTypes.FileTypeChooser/72.27.1848.1053@72.27.1848.1053" timestamp="1601384380854" />
<state x="721" y="422" width="1200" height="800" key="DiffContextDialog" timestamp="1601242420342">
<screen x="72" y="27" width="1848" height="1053" />
</state>
<state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl/72.27.1848.1053@72.27.1848.1053" timestamp="1600726193087" />
<state x="721" y="422" width="1200" height="800" key="DiffContextDialog/72.27.1848.1053@72.27.1848.1053" timestamp="1601242420342" />
<state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl" timestamp="1601285087193">
<screen x="72" y="27" width="1848" height="1053" />
</state>
<state x="779" y="311" width="424" height="491" key="FileChooserDialogImpl/72.27.1848.1053@72.27.1848.1053" timestamp="1601285087193" />
</component>
</project>
\ No newline at end of file
......@@ -13,8 +13,9 @@ import bigwig
import bbi
from bitarray import bitarray
import _ucrdtw
from scipy.sparse import dia_matrix
reload = False
reload = True
app = Flask(__name__)
CORS(app)
......@@ -35,14 +36,38 @@ def calculate_signatures_cumsum_weights(data, window_size=None, hash_size=None,
signatures_int = np.packbits(signatures_bool)
return signatures_int.tolist(), hash_function
def calculate_signatures_cumsum_weights(data, window_size=None, hash_size=None, hash_function=None):
def calculate_signatures_new(data, window_size=None, hash_size=None, hash_function=None):
if hash_function is None:
hash_function = np.array([np.cumsum(np.random.uniform(-1, 1, window_size)) for _ in range(hash_size)]).transpose()
if len(data) == len(np.array(hash_function)[:, 0]):
signatures_bool = np.dot(data, hash_function) > 0
signatures_int = np.packbits(signatures_bool)
output = signatures_bool.astype(int)[0]
print(output)
return output
print('starting hashing')
t0 = time()
all_signatures = []
batch_size = 20
data = data.transpose()
temp = np.zeros((batch_size, window_size + batch_size - 1))
for h in range(hash_size):
for i in range(batch_size):
temp[i, i:i + window_size] = hash_function[:, h]
print('first: ' + str(time() - t0))
signatures_bool = [np.dot(temp, data[i:i + window_size + batch_size - 1]) > 0 for i in range(0, len(data) - window_size, batch_size)]
# signatures_bool = []
# for i in range(0, len(data) - window_size, batch_size):
# if i % 1000000 == 0:
# print(i)
# signatures_bool.append(np.dot(temp, data[i:i + window_size + batch_size - 1]) > 0)
print('second: ' + str(time() - t0))
all_signatures.append(np.array(signatures_bool).flatten().astype(int))
print('done')
signatures_int = np.packbits(np.stack(np.array(all_signatures), axis=1), axis=0).flatten()
return signatures_int.tolist(), hash_function
lsh_function = calculate_signatures_cumsum_weights
lsh_function = calculate_signatures_new
@app.route('/', methods=['GET'])
def index():
......@@ -61,11 +86,10 @@ def read_data():
}
response = orjson.dumps(response)
print('Data read: ' + str(time()-t0))
query = data[10000:11200]
print(query)
loc, dist = _ucrdtw.ucrdtw(data, query, 0.05, True)
print(data[loc:loc+120])
print('found query: ' + str(loc) + '[' + str(time()-t0) + ']')
# query = data[12000:24000]
# loc, dist = _ucrdtw.ucrdtw(data, query, 0.05, True)
# print(data[loc:loc+120])
# print('found query: ' + str(loc) + '[' + str(time()-t0) + ']')
return response
@app.route('/create-windows', methods=['POST'])
......@@ -74,14 +98,10 @@ def create_windows():
if reload:
raw_data = request.json
window_size = int(raw_data['parameters']["windowsize"])
data = bigwig.chunk(
'test.bigWig',
12000,
int(12000 / window_size),
int(12000 / 6),
['chr1'],
verbose=True,
)
chromsize = bbi.chromsizes('test.bigWig')['chr1']
step_size = chromsize / 10000
data = bigwig.get('test.bigWig', 'chr1', 0, chromsize, 20000000)
data = (data - np.min(data))/np.ptp(data)
print(data.shape)
np.save('processed-data', data)
print('Windows created: ' + str(time()-t0))
......@@ -116,7 +136,10 @@ def lsh(data, window_size, hash_size, table_size):
for index in range(table_size):
signatures, hash_function = lsh_function(data, window_size=window_size, hash_size=hash_size)
table = {k: v for v, k in enumerate(signatures)}
print('creating dictionary')
table = defaultdict(list)
for v, k in enumerate(signatures):
table[k].append(v)
tables.append(table)
tables_hash_function.append(hash_function.tolist())
......@@ -135,7 +158,7 @@ def similarity():
for t in tables.values():
signature = lsh_function(window, hash_function=t["hash"])
neighbours.extend(t["entries"][signature])
neighbours.extend(t["entries"][str(signature)])
neighbours_with_frequency = dict(Counter(neighbours))
for index, frequency in neighbours_with_frequency.items():
output[str(frequency)].append(index)
......@@ -208,7 +231,7 @@ def query():
raw_data = orjson.loads(request.data)
window = raw_data['window']
if isinstance(window, int):
output = np.load('processed-data.npy')[window]
output = np.load('processed-data.npy')[window:window+12000]
response = orjson.dumps(output.tolist())
print("Query done: " + str(time() - t0))
return response
......
No preview for this file type
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment