Skip to content
Snippets Groups Projects
Commit cce736a6 authored by IsolatedSushi's avatar IsolatedSushi
Browse files

Merge branch 'Fix-Services' of...

Merge branch 'Fix-Services' of https://git.science.uu.nl/vig/provee/dummy-vue-grpc into Fix-Services
parents a014051c 66d255d0
No related branches found
No related tags found
1 merge request!13Major update
import numpy as np
import faiss
from tabulate import tabulate
fileName = "word2vec_reddit_300_10000.txt"
#Read in data
with open(fileName) as f:
content = f.readlines()
content = [x.split(' ') for x in content]
ids = [x[0] for x in content]
wordToID = {k: v for v,k in enumerate(ids)}
db = [x[1:] for x in content]
db = [[np.float32(y) for y in x] for x in db]
allData = np.array(db)
index = faiss.IndexFlatL2(len(allData[0])) # build the index
index.add(allData) # add vectors to the index
def printClosest(indices, distances):
words = [ids[x] for x in indices[0]]
tableForm = tabulate(list(zip(words,distances[0])),headers=['Word','Distance'])
print(tableForm)
def knn(word,k):
if word not in wordToID:
print(word, "not in the set")
return
wordIndex = wordToID[word]
vector = allData[wordIndex]
knnVector(vector,k)
def knnVector(vector,k):
D, I = index.search(np.asarray([vector]), k) # sanity check
printClosest(I, D)
#knn("woman",5)
def knnSemantic(word1,word2,word3,k):
if word1 not in wordToID or word2 not in wordToID or word3 not in wordToID:
print("a word is not in the set")
return
vector1 = allData[wordToID[word1]]
vector2 = allData[wordToID[word2]]
vector3 = allData[wordToID[word3]]
knnVector(vector1 - vector2 + vector3,k)
knnSemantic("king","man","woman",10)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment