diff --git a/backend/README.md b/backend/README.md index 0bf4cc9068ad572fa049da77c557bace18937b52..06e4afbbe434979d8eac692b0506f9fc5a8aa65f 100644 --- a/backend/README.md +++ b/backend/README.md @@ -1,5 +1,16 @@ # provee-backend +## Local development +Run the gateway in `\webSocketGateway` using `npm start`. +Run the projector in `\gRPCProjector\node` or `\gRPCProjector\python` using `npm start` or `python3 grpcServer.py` respectively. + + +## Deploy with kubernetes + +With minikube you can deploy the deployment.yaml file. +Then run `minikube tunnel` to expose the external IP + + ## Development `Dockerfile.dev` defines a development container with `nodemon`. It requires to attach the source code directory as volume for automatic reloading of code changes. diff --git a/backend/deploymentNew.yaml b/backend/deploymentNew.yaml new file mode 100644 index 0000000000000000000000000000000000000000..41a12ba2b194fd66d495e584cef6e3e20101e41b --- /dev/null +++ b/backend/deploymentNew.yaml @@ -0,0 +1,132 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: proveespace +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: proveegatewaydeployment + namespace: proveespace +spec: + replicas: 1 + selector: + matchLabels: + app: gateway + strategy: {} + template: + metadata: + labels: + app: gateway + spec: + containers: + - image: isolatedsushi/gateway + name: gateway + env: + - name: KNN_TARGET + value: "proveeknnservice:50052" + - name: PROJECTOR_TARGET + value: "proveeprojectorservice:50051" + ports: + - containerPort: 9898 + name: gateway + resources: {} +status: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: proveegatewayservice + namespace: proveespace +spec: + selector: + app: gateway + ports: + - name: grpc + protocol: TCP + port: 9898 + targetPort: 9898 + type: LoadBalancer + + + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: proveeprojectordeployment + namespace: proveespace +spec: + replicas: 1 + selector: + matchLabels: + app: grpcprojector + strategy: {} + template: + metadata: + labels: + app: grpcprojector + spec: + containers: + - name: grpcprojector + image: isolatedsushi/grpcprojector + ports: + - containerPort: 50051 + resources: {} +status: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: proveeprojectorservice + namespace: proveespace +spec: + selector: + app: grpcprojector + ports: + - name: grpcprojector + protocol: TCP + port: 50051 + targetPort: 50051 + type: LoadBalancer + + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: proveeknndeployment + namespace: proveespace +spec: + replicas: 1 + selector: + matchLabels: + app: grpcknn + strategy: {} + template: + metadata: + labels: + app: grpcknn + spec: + containers: + - name: grpcknn + image: isolatedsushi/grpcknn + ports: + - containerPort: 50052 + resources: {} +status: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: proveeknnservice + namespace: proveespace +spec: + selector: + app: grpcknn + ports: + - name: grpcknn + protocol: TCP + port: 50052 + targetPort: 50052 + type: LoadBalancer \ No newline at end of file diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml index 0c3c1dfa2878aadf63fdb092519ca3ff24f66b01..b919d8e7a9e151cf6d87babc7bf8120cb8fbbbf8 100644 --- a/backend/docker-compose.yml +++ b/backend/docker-compose.yml @@ -1,24 +1,23 @@ version: '3.8' services: - envoy: - image: envoyproxy/envoy:v1.15.0 - ports: - - "8080:8080" - volumes: - - ./envoy/envoy.yaml:/etc/envoy/envoy.yaml:ro gateway: build: - context: ./gateway - dockerfile: Dockerfile.dev - ports: - - "9090:9090" - volumes: - - ./gateway:/app + context: ./webSocketGateway + dockerfile: Dockerfile environment: - - PROJECTOR_URL=http://projector:8090 - projector: + - PROJECTOR_TARGET=grpcprojector:50051 + - KNN_TARGET=grpcknn:50052 + ports: + - "9898:9898" + grpcprojector: build: - context: ./projector - dockerfile: Dockerfile.dev - volumes: - - ./projector:/app \ No newline at end of file + context: ./gRPCProjector/python + dockerfile: Dockerfile + ports: + - "50051:50051" + grpcknn: + build: + context: ./grpcKNN + dockerfile: Dockerfile + ports: + - "50052:50052" diff --git a/backend/gRPCProjector/node/src/grpcServer.js b/backend/gRPCProjector/node/src/grpcServer.js index 01e333448f90ab7d30d8883ed8de408e47863494..ee5c2de8f8440d859696802195895fdb1f2631e8 100644 --- a/backend/gRPCProjector/node/src/grpcServer.js +++ b/backend/gRPCProjector/node/src/grpcServer.js @@ -60,6 +60,10 @@ function echoPoint(call, hdvector){ var response = {id : pointID, x: hdvector[0], y: hdvector[1]}; call.write(response); pointID += 1; + + if(pointID%1000==0){ + console.log("Processed " + pointID + " points") + } } //In case of no points diff --git a/backend/gRPCProjector/python/Dockerfile b/backend/gRPCProjector/python/Dockerfile index a824e44acfe1d055d23a906976283040dc5b0b18..efb307d71aa986878d72e31bc487c61f712d343e 100644 --- a/backend/gRPCProjector/python/Dockerfile +++ b/backend/gRPCProjector/python/Dockerfile @@ -3,6 +3,5 @@ WORKDIR /code COPY requirements.txt . RUN pip install -r requirements.txt COPY /. . -EXPOSE 50051 ENV PYTHONUNBUFFERED=1 CMD ["python3","-u", "./grpcServer.py"] \ No newline at end of file diff --git a/backend/gRPCProjector/python/__pycache__/projector_pb2.cpython-38.pyc b/backend/gRPCProjector/python/__pycache__/projector_pb2.cpython-38.pyc index 3cbd6b4a5c76bdd4ba014460a1c79fa1730ea8d8..f75695b668d0271b578b691d105712252c1b70c1 100644 Binary files a/backend/gRPCProjector/python/__pycache__/projector_pb2.cpython-38.pyc and b/backend/gRPCProjector/python/__pycache__/projector_pb2.cpython-38.pyc differ diff --git a/backend/gRPCProjector/python/oldGenerated/projector_pb2.py b/backend/gRPCProjector/python/oldGenerated/projector_pb2.py new file mode 100644 index 0000000000000000000000000000000000000000..3f34f7013e3463ae66d6eccbf6a8ba4069907ad1 --- /dev/null +++ b/backend/gRPCProjector/python/oldGenerated/projector_pb2.py @@ -0,0 +1,239 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: projector.proto + +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='projector.proto', + package='provee', + syntax='proto3', + serialized_options=b'\n\017nl.uuvig.proveeB\022ProveProjectorGRCPP\001\242\002\006PROVEE', + create_key=_descriptor._internal_create_key, + serialized_pb=b'\n\x0fprojector.proto\x12\x06provee\x1a\x1bgoogle/protobuf/empty.proto\")\n\x05Point\x12\n\n\x02id\x18\x01 \x01(\x05\x12\t\n\x01x\x18\x02 \x01(\x02\x12\t\n\x01y\x18\x03 \x01(\x02\"D\n\x0bTrainingSet\x12\x0f\n\x07modelid\x18\x01 \x01(\t\x12$\n\x04rows\x18\x02 \x03(\x0b\x32\x16.provee.TrainingSetRow\"2\n\x0eTrainingSetRow\x12\n\n\x02id\x18\x01 \x01(\t\x12\x14\n\x08hdvector\x18\x02 \x03(\x01\x42\x02\x10\x01\x32\xf9\x01\n\tProjector\x12\x37\n\x05start\x12\x16.google.protobuf.Empty\x1a\x16.google.protobuf.Empty\x12\x36\n\x04stop\x12\x16.google.protobuf.Empty\x1a\x16.google.protobuf.Empty\x12\x42\n\x13getProjectionPoints\x12\x16.provee.TrainingSetRow\x1a\r.provee.Point\"\x00(\x01\x30\x01\x12\x37\n\ngetUpdates\x12\x16.google.protobuf.Empty\x1a\r.provee.Point\"\x00\x30\x01\x42\x30\n\x0fnl.uuvig.proveeB\x12ProveProjectorGRCPP\x01\xa2\x02\x06PROVEEb\x06proto3' + , + dependencies=[google_dot_protobuf_dot_empty__pb2.DESCRIPTOR,]) + + + + +_POINT = _descriptor.Descriptor( + name='Point', + full_name='provee.Point', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='id', full_name='provee.Point.id', index=0, + number=1, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='x', full_name='provee.Point.x', index=1, + number=2, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='y', full_name='provee.Point.y', index=2, + number=3, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=56, + serialized_end=97, +) + + +_TRAININGSET = _descriptor.Descriptor( + name='TrainingSet', + full_name='provee.TrainingSet', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='modelid', full_name='provee.TrainingSet.modelid', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='rows', full_name='provee.TrainingSet.rows', index=1, + number=2, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=99, + serialized_end=167, +) + + +_TRAININGSETROW = _descriptor.Descriptor( + name='TrainingSetRow', + full_name='provee.TrainingSetRow', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='id', full_name='provee.TrainingSetRow.id', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='hdvector', full_name='provee.TrainingSetRow.hdvector', index=1, + number=2, type=1, cpp_type=5, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=b'\020\001', file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=169, + serialized_end=219, +) + +_TRAININGSET.fields_by_name['rows'].message_type = _TRAININGSETROW +DESCRIPTOR.message_types_by_name['Point'] = _POINT +DESCRIPTOR.message_types_by_name['TrainingSet'] = _TRAININGSET +DESCRIPTOR.message_types_by_name['TrainingSetRow'] = _TRAININGSETROW +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +Point = _reflection.GeneratedProtocolMessageType('Point', (_message.Message,), { + 'DESCRIPTOR' : _POINT, + '__module__' : 'projector_pb2' + # @@protoc_insertion_point(class_scope:provee.Point) + }) +_sym_db.RegisterMessage(Point) + +TrainingSet = _reflection.GeneratedProtocolMessageType('TrainingSet', (_message.Message,), { + 'DESCRIPTOR' : _TRAININGSET, + '__module__' : 'projector_pb2' + # @@protoc_insertion_point(class_scope:provee.TrainingSet) + }) +_sym_db.RegisterMessage(TrainingSet) + +TrainingSetRow = _reflection.GeneratedProtocolMessageType('TrainingSetRow', (_message.Message,), { + 'DESCRIPTOR' : _TRAININGSETROW, + '__module__' : 'projector_pb2' + # @@protoc_insertion_point(class_scope:provee.TrainingSetRow) + }) +_sym_db.RegisterMessage(TrainingSetRow) + + +DESCRIPTOR._options = None +_TRAININGSETROW.fields_by_name['hdvector']._options = None + +_PROJECTOR = _descriptor.ServiceDescriptor( + name='Projector', + full_name='provee.Projector', + file=DESCRIPTOR, + index=0, + serialized_options=None, + create_key=_descriptor._internal_create_key, + serialized_start=222, + serialized_end=471, + methods=[ + _descriptor.MethodDescriptor( + name='start', + full_name='provee.Projector.start', + index=0, + containing_service=None, + input_type=google_dot_protobuf_dot_empty__pb2._EMPTY, + output_type=google_dot_protobuf_dot_empty__pb2._EMPTY, + serialized_options=None, + create_key=_descriptor._internal_create_key, + ), + _descriptor.MethodDescriptor( + name='stop', + full_name='provee.Projector.stop', + index=1, + containing_service=None, + input_type=google_dot_protobuf_dot_empty__pb2._EMPTY, + output_type=google_dot_protobuf_dot_empty__pb2._EMPTY, + serialized_options=None, + create_key=_descriptor._internal_create_key, + ), + _descriptor.MethodDescriptor( + name='getProjectionPoints', + full_name='provee.Projector.getProjectionPoints', + index=2, + containing_service=None, + input_type=_TRAININGSETROW, + output_type=_POINT, + serialized_options=None, + create_key=_descriptor._internal_create_key, + ), + _descriptor.MethodDescriptor( + name='getUpdates', + full_name='provee.Projector.getUpdates', + index=3, + containing_service=None, + input_type=google_dot_protobuf_dot_empty__pb2._EMPTY, + output_type=_POINT, + serialized_options=None, + create_key=_descriptor._internal_create_key, + ), +]) +_sym_db.RegisterServiceDescriptor(_PROJECTOR) + +DESCRIPTOR.services_by_name['Projector'] = _PROJECTOR + +# @@protoc_insertion_point(module_scope) diff --git a/backend/gRPCProjector/python/oldGenerated/projector_pb2_grpc.py b/backend/gRPCProjector/python/oldGenerated/projector_pb2_grpc.py new file mode 100644 index 0000000000000000000000000000000000000000..f36b3e05eee43d386a43cd7478846f73688d6141 --- /dev/null +++ b/backend/gRPCProjector/python/oldGenerated/projector_pb2_grpc.py @@ -0,0 +1,183 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc + +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 +import projector_pb2 as projector__pb2 + + +class ProjectorStub(object): + """Interface exported by the server. + """ + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.start = channel.unary_unary( + '/provee.Projector/start', + request_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + response_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, + ) + self.stop = channel.unary_unary( + '/provee.Projector/stop', + request_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + response_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, + ) + self.getProjectionPoints = channel.stream_stream( + '/provee.Projector/getProjectionPoints', + request_serializer=projector__pb2.TrainingSetRow.SerializeToString, + response_deserializer=projector__pb2.Point.FromString, + ) + self.getUpdates = channel.unary_stream( + '/provee.Projector/getUpdates', + request_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + response_deserializer=projector__pb2.Point.FromString, + ) + + +class ProjectorServicer(object): + """Interface exported by the server. + """ + + def start(self, request, context): + """A simple RPC. + + Start the Projector calculation + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def stop(self, request, context): + """Stop the Projector calculation + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def getProjectionPoints(self, request_iterator, context): + """A server-to-client streaming RPC. + + Obtains the Projection Points in 2D given the trainings values stored in a row format. Results are + streamed rather than returned at once (e.g. in a response message with a + repeated field). + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def getUpdates(self, request, context): + """A server-to-client streaming RPC. + + Obtains the Projection Points in 2D given the trainings values stored in a row format. Results are + streamed rather than returned at once (e.g. in a response message with a + repeated field). + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_ProjectorServicer_to_server(servicer, server): + rpc_method_handlers = { + 'start': grpc.unary_unary_rpc_method_handler( + servicer.start, + request_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, + response_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + ), + 'stop': grpc.unary_unary_rpc_method_handler( + servicer.stop, + request_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, + response_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + ), + 'getProjectionPoints': grpc.stream_stream_rpc_method_handler( + servicer.getProjectionPoints, + request_deserializer=projector__pb2.TrainingSetRow.FromString, + response_serializer=projector__pb2.Point.SerializeToString, + ), + 'getUpdates': grpc.unary_stream_rpc_method_handler( + servicer.getUpdates, + request_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, + response_serializer=projector__pb2.Point.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'provee.Projector', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + + + # This class is part of an EXPERIMENTAL API. +class Projector(object): + """Interface exported by the server. + """ + + @staticmethod + def start(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/provee.Projector/start', + google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + google_dot_protobuf_dot_empty__pb2.Empty.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def stop(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/provee.Projector/stop', + google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + google_dot_protobuf_dot_empty__pb2.Empty.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def getProjectionPoints(request_iterator, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.stream_stream(request_iterator, target, '/provee.Projector/getProjectionPoints', + projector__pb2.TrainingSetRow.SerializeToString, + projector__pb2.Point.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def getUpdates(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_stream(request, target, '/provee.Projector/getUpdates', + google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + projector__pb2.Point.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/gRPCProjector/python/protos/README.md b/backend/gRPCProjector/python/protos/README.md deleted file mode 100644 index 128e4b2fa1fce1c43af79e2956e931e79b266d90..0000000000000000000000000000000000000000 --- a/backend/gRPCProjector/python/protos/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# protos - -Contains all Protos (gRCP) files that are used to communicate between services in Provee \ No newline at end of file diff --git a/backend/grpcKNN/Dockerfile b/backend/grpcKNN/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..f0ce763951403bcc161003eccfd66ede16a19de6 --- /dev/null +++ b/backend/grpcKNN/Dockerfile @@ -0,0 +1,7 @@ +FROM python:3.8 +WORKDIR /code +COPY requirements.txt . +RUN pip install -r requirements.txt +COPY /. . +ENV PYTHONUNBUFFERED=1 +CMD ["python3","-u", "./knnServer.py"] \ No newline at end of file diff --git a/backend/grpcKNN/faissTest.py b/backend/grpcKNN/faissTest.py new file mode 100644 index 0000000000000000000000000000000000000000..91b5a4e4048e501d432b4243e61e49bc0783d594 --- /dev/null +++ b/backend/grpcKNN/faissTest.py @@ -0,0 +1,58 @@ +import numpy as np +import faiss +from tabulate import tabulate +fileName = "word2vec_reddit_300_10000.txt" + + +#Read in data +with open(fileName) as f: + content = f.readlines() + content = [x.split(' ') for x in content] + ids = [x[0] for x in content] + wordToID = {k: v for v,k in enumerate(ids)} + db = [x[1:] for x in content] + db = [[np.float32(y) for y in x] for x in db] + allData = np.array(db) + +index = faiss.IndexFlatL2(len(allData[0])) # build the index +index.add(allData) # add vectors to the index + + +def printClosest(indices, distances): + words = [ids[x] for x in indices[0]] + tableForm = tabulate(list(zip(words,distances[0])),headers=['Word','Distance']) + print(tableForm) + +def knn(word,k): + if word not in wordToID: + print(word, "not in the set") + return + + wordIndex = wordToID[word] + vector = allData[wordIndex] + knnVector(vector,k) + + + +def knnVector(vector,k): + D, I = index.search(np.asarray([vector]), k) # sanity check + printClosest(I, D) + +#knn("woman",5) + + + + +def knnSemantic(word1,word2,word3,k): + if word1 not in wordToID or word2 not in wordToID or word3 not in wordToID: + print("a word is not in the set") + return + vector1 = allData[wordToID[word1]] + vector2 = allData[wordToID[word2]] + vector3 = allData[wordToID[word3]] + + knnVector(vector1 - vector2 + vector3,k) + +knnSemantic("king","man","woman",10) + + diff --git a/backend/grpcKNN/knnServer.py b/backend/grpcKNN/knnServer.py new file mode 100644 index 0000000000000000000000000000000000000000..63298ca82d78c1ae3712983019f3c32430e35c8d --- /dev/null +++ b/backend/grpcKNN/knnServer.py @@ -0,0 +1,115 @@ +from concurrent import futures +import grpc +import knn_pb2_grpc as rpc +import faiss +import knn_pb2 as knn +import numpy as np +import re +import operator +from tabulate import tabulate + +class Data(): + def __init__(self): + self.allData = [] + self.wordToID = {} + self.ids = [] + self.index = None + +class KNNService(rpc.KNNServicer): + def __init__(self): + self.clientList = [] + self.pointID = 0 + + #Get client ID + def metaToID(self,context): + try: + metadict = dict(context.invocation_metadata()) + userid = int(metadict['id']) + return userid + except Exception as e: + print("Error in metaToID (forgot to add user id?)") + print(e) + + #Store all the data from a new point (can be improved ater) + def storePoint(self,message, context): + try: + dataObject = self.clientList[self.metaToID(context)] + hdvector = [np.float32(x) for x in message.hdvector] + + if not dataObject.index: + dataObject.index = faiss.IndexFlatL2(len(hdvector)) + dataObject.ids.append(message.id) + dataObject.allData.append(hdvector) + dataObject.wordToID[message.id] = len(dataObject.allData)-1 + dataObject.index.add(np.asarray([hdvector])) + except Exception as e: + print(e) + + #Supply client with ID so that it can give it later on with metadata to identify + def getIDfromServer(self, request,context): + clientID = len(self.clientList) + self.clientList.append(Data()) + print("New id: ",clientID) + return knn.ID(id=str(clientID)) + + def calculateFinalVector(self,dataObject,request): + ops = { "+": operator.add, "-": operator.sub } + + strippedWord = "".join(request.words.split()) + wordList = re.split('-|\+',strippedWord) + + wordList = [x.lower() for x in wordList] + print("Wordlist",wordList) + for word in wordList: + if word not in dataObject.wordToID: + print(word,"not in set yet?") + return knn.Neighbours() + + orderedOperators = ''.join(c for c in request.words if c in ["-","+"]) + firstID = dataObject.wordToID[wordList[0]] + resultedVector = np.array(dataObject.allData[firstID]) + + for i in range(1,len(wordList)): + nextID = dataObject.wordToID[wordList[i]] + resultedVector = ops[orderedOperators[i-1]](resultedVector, np.array(dataObject.allData[nextID])) + + return resultedVector + #Do simple linalg expression with 3 vector, improve later + def getKNNRequest(self,request,context): + dataObject = self.clientList[self.metaToID(context)] + + resultedVector = self.calculateFinalVector(dataObject,request) + neighbours = self.knnVector(resultedVector,10,dataObject) + return neighbours + + #Perform actual KNN + def knnVector(self,vector,k,dataObject): + print("vector",vector) + D, I = dataObject.index.search(np.asarray([vector]), k) + words = [dataObject.ids[x] for x in I[0]] + distances = D[0] + returnRows = [knn.Row(id=x,distance=y) for x,y in zip(words,distances)] + neighbours = knn.Neighbours(rows=returnRows) + return neighbours + + def sendProjectionPoints(self, request_iterator, context): + print("Connected") + for message in request_iterator: + self.storePoint(message,context) + + if self.pointID % 1000 == 0: + print("Received {} points!".format(self.pointID)) + self.pointID+= 1 + +def serveServer(): + port = '[::]:50052' + server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) + rpc.add_KNNServicer_to_server(KNNService(), server) + server.add_insecure_port(port) + server.start() + print("Listening on port: " + port) + server.wait_for_termination() + +if __name__ == '__main__': + print("Starting the KNN server") + serveServer() diff --git a/backend/grpcKNN/knn_pb2.py b/backend/grpcKNN/knn_pb2.py new file mode 100644 index 0000000000000000000000000000000000000000..4f830a62bf7d51dc5b433cf9f3c3ca68b929f89f --- /dev/null +++ b/backend/grpcKNN/knn_pb2.py @@ -0,0 +1,302 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: knn.proto + +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='knn.proto', + package='provee', + syntax='proto3', + serialized_options=b'\n\017nl.uuvig.proveeB\022ProveProjectorGRCPP\001\242\002\006PROVEE', + create_key=_descriptor._internal_create_key, + serialized_pb=b'\n\tknn.proto\x12\x06provee\x1a\x1bgoogle/protobuf/empty.proto\"\x10\n\x02ID\x12\n\n\x02id\x18\x01 \x01(\t\"&\n\nknnRequest\x12\t\n\x01k\x18\x01 \x01(\x05\x12\r\n\x05words\x18\x02 \x01(\t\"\'\n\nNeighbours\x12\x19\n\x04rows\x18\x01 \x03(\x0b\x32\x0b.provee.Row\"#\n\x03Row\x12\n\n\x02id\x18\x01 \x01(\t\x12\x10\n\x08\x64istance\x18\x02 \x01(\x02\"2\n\x0eTrainingSetRow\x12\n\n\x02id\x18\x01 \x01(\t\x12\x14\n\x08hdvector\x18\x02 \x03(\x01\x42\x02\x10\x01\x32\xc5\x01\n\x03KNN\x12J\n\x14sendProjectionPoints\x12\x16.provee.TrainingSetRow\x1a\x16.google.protobuf.Empty\"\x00(\x01\x12\x39\n\rgetKNNRequest\x12\x12.provee.knnRequest\x1a\x12.provee.Neighbours\"\x00\x12\x37\n\x0fgetIDfromServer\x12\x16.google.protobuf.Empty\x1a\n.provee.ID\"\x00\x42\x30\n\x0fnl.uuvig.proveeB\x12ProveProjectorGRCPP\x01\xa2\x02\x06PROVEEb\x06proto3' + , + dependencies=[google_dot_protobuf_dot_empty__pb2.DESCRIPTOR,]) + + + + +_ID = _descriptor.Descriptor( + name='ID', + full_name='provee.ID', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='id', full_name='provee.ID.id', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=50, + serialized_end=66, +) + + +_KNNREQUEST = _descriptor.Descriptor( + name='knnRequest', + full_name='provee.knnRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='k', full_name='provee.knnRequest.k', index=0, + number=1, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='words', full_name='provee.knnRequest.words', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=68, + serialized_end=106, +) + + +_NEIGHBOURS = _descriptor.Descriptor( + name='Neighbours', + full_name='provee.Neighbours', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='rows', full_name='provee.Neighbours.rows', index=0, + number=1, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=108, + serialized_end=147, +) + + +_ROW = _descriptor.Descriptor( + name='Row', + full_name='provee.Row', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='id', full_name='provee.Row.id', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='distance', full_name='provee.Row.distance', index=1, + number=2, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=149, + serialized_end=184, +) + + +_TRAININGSETROW = _descriptor.Descriptor( + name='TrainingSetRow', + full_name='provee.TrainingSetRow', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='id', full_name='provee.TrainingSetRow.id', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='hdvector', full_name='provee.TrainingSetRow.hdvector', index=1, + number=2, type=1, cpp_type=5, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=b'\020\001', file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=186, + serialized_end=236, +) + +_NEIGHBOURS.fields_by_name['rows'].message_type = _ROW +DESCRIPTOR.message_types_by_name['ID'] = _ID +DESCRIPTOR.message_types_by_name['knnRequest'] = _KNNREQUEST +DESCRIPTOR.message_types_by_name['Neighbours'] = _NEIGHBOURS +DESCRIPTOR.message_types_by_name['Row'] = _ROW +DESCRIPTOR.message_types_by_name['TrainingSetRow'] = _TRAININGSETROW +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +ID = _reflection.GeneratedProtocolMessageType('ID', (_message.Message,), { + 'DESCRIPTOR' : _ID, + '__module__' : 'knn_pb2' + # @@protoc_insertion_point(class_scope:provee.ID) + }) +_sym_db.RegisterMessage(ID) + +knnRequest = _reflection.GeneratedProtocolMessageType('knnRequest', (_message.Message,), { + 'DESCRIPTOR' : _KNNREQUEST, + '__module__' : 'knn_pb2' + # @@protoc_insertion_point(class_scope:provee.knnRequest) + }) +_sym_db.RegisterMessage(knnRequest) + +Neighbours = _reflection.GeneratedProtocolMessageType('Neighbours', (_message.Message,), { + 'DESCRIPTOR' : _NEIGHBOURS, + '__module__' : 'knn_pb2' + # @@protoc_insertion_point(class_scope:provee.Neighbours) + }) +_sym_db.RegisterMessage(Neighbours) + +Row = _reflection.GeneratedProtocolMessageType('Row', (_message.Message,), { + 'DESCRIPTOR' : _ROW, + '__module__' : 'knn_pb2' + # @@protoc_insertion_point(class_scope:provee.Row) + }) +_sym_db.RegisterMessage(Row) + +TrainingSetRow = _reflection.GeneratedProtocolMessageType('TrainingSetRow', (_message.Message,), { + 'DESCRIPTOR' : _TRAININGSETROW, + '__module__' : 'knn_pb2' + # @@protoc_insertion_point(class_scope:provee.TrainingSetRow) + }) +_sym_db.RegisterMessage(TrainingSetRow) + + +DESCRIPTOR._options = None +_TRAININGSETROW.fields_by_name['hdvector']._options = None + +_KNN = _descriptor.ServiceDescriptor( + name='KNN', + full_name='provee.KNN', + file=DESCRIPTOR, + index=0, + serialized_options=None, + create_key=_descriptor._internal_create_key, + serialized_start=239, + serialized_end=436, + methods=[ + _descriptor.MethodDescriptor( + name='sendProjectionPoints', + full_name='provee.KNN.sendProjectionPoints', + index=0, + containing_service=None, + input_type=_TRAININGSETROW, + output_type=google_dot_protobuf_dot_empty__pb2._EMPTY, + serialized_options=None, + create_key=_descriptor._internal_create_key, + ), + _descriptor.MethodDescriptor( + name='getKNNRequest', + full_name='provee.KNN.getKNNRequest', + index=1, + containing_service=None, + input_type=_KNNREQUEST, + output_type=_NEIGHBOURS, + serialized_options=None, + create_key=_descriptor._internal_create_key, + ), + _descriptor.MethodDescriptor( + name='getIDfromServer', + full_name='provee.KNN.getIDfromServer', + index=2, + containing_service=None, + input_type=google_dot_protobuf_dot_empty__pb2._EMPTY, + output_type=_ID, + serialized_options=None, + create_key=_descriptor._internal_create_key, + ), +]) +_sym_db.RegisterServiceDescriptor(_KNN) + +DESCRIPTOR.services_by_name['KNN'] = _KNN + +# @@protoc_insertion_point(module_scope) diff --git a/backend/grpcKNN/knn_pb2_grpc.py b/backend/grpcKNN/knn_pb2_grpc.py new file mode 100644 index 0000000000000000000000000000000000000000..84f4bb8724a48c0b8abed6482c9d66448a3e28b8 --- /dev/null +++ b/backend/grpcKNN/knn_pb2_grpc.py @@ -0,0 +1,136 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc + +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 +import knn_pb2 as knn__pb2 + + +class KNNStub(object): + """Interface exported by the server. + """ + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.sendProjectionPoints = channel.stream_unary( + '/provee.KNN/sendProjectionPoints', + request_serializer=knn__pb2.TrainingSetRow.SerializeToString, + response_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, + ) + self.getKNNRequest = channel.unary_unary( + '/provee.KNN/getKNNRequest', + request_serializer=knn__pb2.knnRequest.SerializeToString, + response_deserializer=knn__pb2.Neighbours.FromString, + ) + self.getIDfromServer = channel.unary_unary( + '/provee.KNN/getIDfromServer', + request_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + response_deserializer=knn__pb2.ID.FromString, + ) + + +class KNNServicer(object): + """Interface exported by the server. + """ + + def sendProjectionPoints(self, request_iterator, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def getKNNRequest(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def getIDfromServer(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_KNNServicer_to_server(servicer, server): + rpc_method_handlers = { + 'sendProjectionPoints': grpc.stream_unary_rpc_method_handler( + servicer.sendProjectionPoints, + request_deserializer=knn__pb2.TrainingSetRow.FromString, + response_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + ), + 'getKNNRequest': grpc.unary_unary_rpc_method_handler( + servicer.getKNNRequest, + request_deserializer=knn__pb2.knnRequest.FromString, + response_serializer=knn__pb2.Neighbours.SerializeToString, + ), + 'getIDfromServer': grpc.unary_unary_rpc_method_handler( + servicer.getIDfromServer, + request_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, + response_serializer=knn__pb2.ID.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'provee.KNN', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + + + # This class is part of an EXPERIMENTAL API. +class KNN(object): + """Interface exported by the server. + """ + + @staticmethod + def sendProjectionPoints(request_iterator, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.stream_unary(request_iterator, target, '/provee.KNN/sendProjectionPoints', + knn__pb2.TrainingSetRow.SerializeToString, + google_dot_protobuf_dot_empty__pb2.Empty.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def getKNNRequest(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/provee.KNN/getKNNRequest', + knn__pb2.knnRequest.SerializeToString, + knn__pb2.Neighbours.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def getIDfromServer(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/provee.KNN/getIDfromServer', + google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + knn__pb2.ID.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/grpcKNN/requirements.txt b/backend/grpcKNN/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..d5d065e06c399475d7d96565cee5fbb0dad3365a --- /dev/null +++ b/backend/grpcKNN/requirements.txt @@ -0,0 +1,8 @@ +grpcio==1.34.0 +grpcio-tools==1.34.0 +protobuf==3.14.0 +six==1.15.0 +faiss-cpu==1.7.0 +numpy==1.19.2 +tabulate==0.8.7 +regex==2020.7.14 \ No newline at end of file diff --git a/backend/webSocketGateway/protos/v3/knn.proto b/backend/webSocketGateway/protos/v3/knn.proto new file mode 100644 index 0000000000000000000000000000000000000000..2bdc51b9b654be823d2db65e7ee50e7b4dacf903 --- /dev/null +++ b/backend/webSocketGateway/protos/v3/knn.proto @@ -0,0 +1,48 @@ +syntax = "proto3"; + +package provee; + +import "google/protobuf/empty.proto"; + +option java_multiple_files = true; +option java_package = "nl.uuvig.provee"; +option java_outer_classname = "ProveProjectorGRCP"; +option objc_class_prefix = "PROVEE"; + + +// Interface exported by the server. +service KNN { + + + rpc sendProjectionPoints(stream TrainingSetRow) returns (google.protobuf.Empty) {} + rpc getKNNRequest(knnRequest) returns (Neighbours) {} + rpc getIDfromServer(google.protobuf.Empty) returns (ID) {} +} + +message ID{ + string id = 1; +} + +message knnRequest{ + int32 k = 1; + string words = 2; +} + +message Neighbours { + repeated Row rows = 1; +} + +message Row { + string id = 1; + float distance = 2; +} + +// Needs documentation +message TrainingSetRow { + // The id of the row, e.g., row index. + string id = 1; + + // The hd vector of the item. + repeated double hdvector = 2 [packed=true]; +} + diff --git a/backend/webSocketGateway/protos/v3/old/knn.proto b/backend/webSocketGateway/protos/v3/old/knn.proto new file mode 100644 index 0000000000000000000000000000000000000000..2bdc51b9b654be823d2db65e7ee50e7b4dacf903 --- /dev/null +++ b/backend/webSocketGateway/protos/v3/old/knn.proto @@ -0,0 +1,48 @@ +syntax = "proto3"; + +package provee; + +import "google/protobuf/empty.proto"; + +option java_multiple_files = true; +option java_package = "nl.uuvig.provee"; +option java_outer_classname = "ProveProjectorGRCP"; +option objc_class_prefix = "PROVEE"; + + +// Interface exported by the server. +service KNN { + + + rpc sendProjectionPoints(stream TrainingSetRow) returns (google.protobuf.Empty) {} + rpc getKNNRequest(knnRequest) returns (Neighbours) {} + rpc getIDfromServer(google.protobuf.Empty) returns (ID) {} +} + +message ID{ + string id = 1; +} + +message knnRequest{ + int32 k = 1; + string words = 2; +} + +message Neighbours { + repeated Row rows = 1; +} + +message Row { + string id = 1; + float distance = 2; +} + +// Needs documentation +message TrainingSetRow { + // The id of the row, e.g., row index. + string id = 1; + + // The hd vector of the item. + repeated double hdvector = 2 [packed=true]; +} + diff --git a/backend/gRPCProjector/python/protos/v3/projector.proto b/backend/webSocketGateway/protos/v3/old/projector.proto similarity index 100% rename from backend/gRPCProjector/python/protos/v3/projector.proto rename to backend/webSocketGateway/protos/v3/old/projector.proto diff --git a/backend/webSocketGateway/src/webSocketGateway.js b/backend/webSocketGateway/src/webSocketGateway.js index bacf19e0c830415ddc4872eadd8b92cf8fdbc828..d8aec6e5220ff600da9a1e4291d81672359490d7 100644 --- a/backend/webSocketGateway/src/webSocketGateway.js +++ b/backend/webSocketGateway/src/webSocketGateway.js @@ -1,42 +1,47 @@ const WebSocket = require('ws'); +var grpc = require('@grpc/grpc-js'); +var protoLoader = require('@grpc/proto-loader'); +const { callErrorFromStatus } = require('@grpc/grpc-js/build/src/call'); + +//Setup webserver const webSocketHost = "0.0.0.0" const webSocketPort = 9898 +var projectorTarget = process.env.PROJECTOR_TARGET ||'127.0.0.1:50051'; //var target = "proveeprojectorservice:50051" +var knnTarget = process.env.KNN_TARGET || '127.0.0.1:50052'; //"proveeknnservice:50052" const wsServer = new WebSocket.Server({ host: webSocketHost, port: webSocketPort }); -const http = require("http"); - -var PROTO_PATH = __dirname + '/../protos/v3/projector.proto'; -var grpc = require('@grpc/grpc-js'); -var protoLoader = require('@grpc/proto-loader'); -const { callErrorFromStatus } = require('@grpc/grpc-js/build/src/call'); -var packageDefinition = protoLoader.loadSync( - PROTO_PATH, - { - keepCase: true, - longs: String, - enums: String, - defaults: true, - oneofs: true - }); -var target = '127.0.0.1:50051'; +console.log("Projector target " +projectorTarget); +console.log("KNN target: " + knnTarget) +var projectorPackage = getGRPCPackage( __dirname + '/../protos/v3/projector.proto'); +var KNNPackage = getGRPCPackage(__dirname + '/../protos/v3/knn.proto'); var count = 0; var pointID = 0; -class ProjectionRequest { - constructor(ws, client, calls, lineAmount) { - this.index = count; + +class ProjectionRequest{ + constructor(ws){ + this.index = 0; count += 1; this.ws = ws; - this.client = client; - this.calls = calls; - this.lineAmount = lineAmount + this.projectorConn = null; + this.knnConn = null; + this.lineAmount = 0; } } +class GrpcConnection{ + constructor(client){ + this.client = client + this.calls = null + this.id = null + } +} + + wsServer.on('connection', function connection(ws) { console.log(`Client connected with websocket`); - var currConnection = new ProjectionRequest(ws, null, null, 0); + var currConnection = new ProjectionRequest(ws); ws.on('message', function incoming(message) { parseMessage(message, currConnection); }); @@ -49,18 +54,33 @@ wsServer.on('connection', function connection(ws) { }) }); +function getGRPCPackage(PROTO_PATH){ + var packageDefinition = protoLoader.loadSync( + PROTO_PATH, + { + keepCase: true, + longs: String, + enums: String, + defaults: true, + oneofs: true + }); + + return packageDefinition; +} -function getGRPCCLient() { - let pkg = grpc.loadPackageDefinition(packageDefinition); - let projector = pkg.provee["Projector"]; - client = new projector(target, grpc.credentials.createInsecure()); - return client + +function getGRPCClient(_target,package,name){ + let pkg = grpc.loadPackageDefinition(package); + let clientClass = pkg.provee[name]; + client = new clientClass(_target, grpc.credentials.createInsecure()); + let connection = new GrpcConnection(client); + return connection; } //Connects to grpc after first message -function getConnection(ws, client) { +function getProjectorConnection(ws,client){ call = client.getProjectionPoints(); - console.log(`Connecting to grpc Server at: ${target}`) + console.log(`Connecting to grpc Server`) call.on("data", (response) => { sendDataStreamToClient(JSON.stringify(response), ws) @@ -81,6 +101,47 @@ function getConnection(ws, client) { } +function getKNNConnection(connection){ + connection.knnConn.client.getIDfromServer({},function(error,response){ + if(!response){ + return; + } + console.log(error); + connection.knnConn.id = response["id"]; + const meta = new grpc.Metadata(); + meta.add('id',connection.knnConn.id); + let call = connection.knnConn.client.sendProjectionPoints(meta,function(error,response){ + console.log("finished points"); + console.log(error); + }); + + connection.knnConn.calls=[call] + }); + +} +function sendError(ws,message){ + ws.send(JSON.stringify({type: "error", message: message})); + } + + +function KNNNeighbourRequest(connection,words,k){ + console.log("Requesting " + words); + const meta = new grpc.Metadata(); + meta.add('id',connection.knnConn.id); + connection.knnConn.client.getKNNRequest({k:k,words: words},meta,function(err,response){ + if(response){ + console.log("got response"); + sendNeighbour(response,connection.ws); + } + else{ + sendError(connection.ws,"No valid request: " + words) + return; + } + + console.log(err); + }); +} + //Parse the message from browser function parseMessage(message, connection) { const jsonMessage = JSON.parse(message); @@ -95,28 +156,48 @@ function parseMessage(message, connection) { case "setLineCount": connection.lineAmount = parseInt(jsonMessage["amount"]); break; - case "setProjectorAmount": - var amount = parseInt(jsonMessage["amount"]); - connection.client = getGRPCCLient(); - var allCalls = [] - - //Create that amount of grpc connections - for (var i = 0; i < amount; i += 1) { - var grpcConnection = getConnection(connection.ws, connection.client); - allCalls.push(grpcConnection) + case "getKNNNeighbours": + var k = parseInt(jsonMessage["amount"]); + var words = jsonMessage["word"] + + if(!connection.knnConn){ + console.log("No KNN microservice"); + sendError(connection.ws,"No KNN microservice") + break; } - - connection.calls = allCalls; + KNNNeighbourRequest(connection,words,k); + break; + case "getKNN": + console.log("setup KNN"); + connection.knnConn = getGRPCClient(knnTarget,KNNPackage,"KNN"); + getKNNConnection(connection) break; + case "setProjectorAmount": + var amount = parseInt(jsonMessage["amount"]); + SetupKProjectors(amount,connection) + break; default: console.log("Error! Unknown request:" + jsonMessage["type"]); } } + +function SetupKProjectors(amount,connection){ + connection.projectorConn = getGRPCClient(projectorTarget,projectorPackage,"Projector"); + var allCalls = [] + + //Create that amount of grpc connections + for(var i = 0; i< amount; i+=1){ + var grpcConnection = getProjectorConnection(connection.ws,connection.projectorConn.client); + console.log("test") + allCalls.push(grpcConnection) + } + connection.projectorConn.calls = allCalls; +} + function generatePoint() { const id = pointID++; const x = Math.floor(Math.random() * 100); const y = Math.floor(Math.random() * 100); - return { id, x, y }; } @@ -160,12 +241,10 @@ function sendDataStreamToClient(data, ws) { for (var i = 0; i < list.length; i++) { try { var response = list[i]; - ws.send(response); + ws.send(JSON.stringify({type: "point", point: response})); } catch (e) { console.log("error"); console.log(e); } } -} - - +} \ No newline at end of file diff --git a/frontend/.idea/vcs.xml b/frontend/.idea/vcs.xml new file mode 100644 index 0000000000000000000000000000000000000000..6c0b8635858dc7ad44b93df54b762707ce49eefc --- /dev/null +++ b/frontend/.idea/vcs.xml @@ -0,0 +1,6 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="VcsDirectoryMappings"> + <mapping directory="$PROJECT_DIR$/.." vcs="Git" /> + </component> +</project> \ No newline at end of file diff --git a/frontend/.idea/workspace.xml b/frontend/.idea/workspace.xml new file mode 100644 index 0000000000000000000000000000000000000000..4d8361b75d6f8ca2970cf879805eebc7a470c7e6 --- /dev/null +++ b/frontend/.idea/workspace.xml @@ -0,0 +1,62 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="BranchesTreeState"> + <expand> + <path> + <item name="ROOT" type="e8cecc67:BranchNodeDescriptor" /> + <item name="LOCAL_ROOT" type="e8cecc67:BranchNodeDescriptor" /> + </path> + <path> + <item name="ROOT" type="e8cecc67:BranchNodeDescriptor" /> + <item name="REMOTE_ROOT" type="e8cecc67:BranchNodeDescriptor" /> + </path> + <path> + <item name="ROOT" type="e8cecc67:BranchNodeDescriptor" /> + <item name="REMOTE_ROOT" type="e8cecc67:BranchNodeDescriptor" /> + <item name="GROUP_NODE:origin" type="e8cecc67:BranchNodeDescriptor" /> + </path> + </expand> + <select /> + </component> + <component name="ChangeListManager"> + <list default="true" id="61321bbf-d1d1-412e-a2a1-23c0d3dab7e9" name="Default Changelist" comment="" /> + <option name="SHOW_DIALOG" value="false" /> + <option name="HIGHLIGHT_CONFLICTS" value="true" /> + <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> + <option name="LAST_RESOLUTION" value="IGNORE" /> + </component> + <component name="Git.Settings"> + <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/.." /> + </component> + <component name="ProjectId" id="1oDFUgY2aaYL8kkjAENi72U5TKf" /> + <component name="ProjectLevelVcsManager" settingsEditedManually="true" /> + <component name="ProjectViewState"> + <option name="hideEmptyMiddlePackages" value="true" /> + <option name="showLibraryContents" value="true" /> + </component> + <component name="PropertiesComponent"> + <property name="RunOnceActivity.OpenProjectViewOnStart" value="true" /> + </component> + <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" /> + <component name="TaskManager"> + <task active="true" id="Default" summary="Default task"> + <changelist id="61321bbf-d1d1-412e-a2a1-23c0d3dab7e9" name="Default Changelist" comment="" /> + <created>1612813272491</created> + <option name="number" value="Default" /> + <option name="presentableId" value="Default" /> + <updated>1612813272491</updated> + </task> + <servers /> + </component> + <component name="Vcs.Log.Tabs.Properties"> + <option name="TAB_STATES"> + <map> + <entry key="MAIN"> + <value> + <State /> + </value> + </entry> + </map> + </option> + </component> +</project> \ No newline at end of file diff --git a/frontend/src/components/webSocketStreaming.vue b/frontend/src/components/webSocketStreaming.vue index 94cf18ff472db136ab2c161122e8f67248a6c4fd..7ae779bcb673caf22efd8e1f540bcb5d2cdd0ef2 100644 --- a/frontend/src/components/webSocketStreaming.vue +++ b/frontend/src/components/webSocketStreaming.vue @@ -35,7 +35,7 @@ ></v-file-input> <v-btn color="primary" text @click="submitFiles">SUBMIT</v-btn> - + <v-checkbox v-model="KNNcheckbox" :label="`use KNN`"></v-checkbox> <v-spacer></v-spacer> </v-layout> <v-layout row> @@ -51,6 +51,9 @@ value="1" onblur /> + <h3>KNN word:</h3> + <v-text-field v-model="KNNword" clearable></v-text-field> + <v-btn color="primary" text @click="submitWord">SUBMIT</v-btn> <v-spacer></v-spacer> </v-layout> @@ -69,6 +72,8 @@ export default { launched: false, file: null, connection: null, + KNNcheckbox: true, + KNNword: "King", }; }, @@ -88,9 +93,25 @@ export default { }; this.connection.onmessage = function (event) { - const pointJSON = JSON.parse(event.data); - if (pointJSON) { - self.$emit("newPoint", pointJSON); + const jsonMessage = JSON.parse(event.data); + switch (jsonMessage["type"]) { + case "error": + var error = jsonMessage["message"]; + console.log(error); + break; + case "point": + var point = JSON.parse(jsonMessage["point"]); + if (point) { + self.$emit("newPoint", point); + } + break; + case "neighbour": + var neighbour = jsonMessage["neighbour"]; + var distance = jsonMessage["distance"]; + console.log("Word: " + neighbour + " Distance: " + distance); + break; + default: + console.log("No implementation for " + jsonMessage["type"]); } }; @@ -125,7 +146,7 @@ export default { }); }, - parseData(file, lineCount) { + parseData(lineCount) { let self = this; //Send necessary information @@ -135,6 +156,7 @@ export default { amount: this.projectorAmount, }) ); + self.connection.send( JSON.stringify({ type: "setLineCount", @@ -142,6 +164,19 @@ export default { }) ); + if (this.KNNcheckbox) { + self.connection.send( + JSON.stringify({ + type: "getKNN", + }) + ); + console.log("Send knn"); + } + + this.uploadFileStream(lineCount) + return; + }, + uploadFileStream(lineCount) { console.log("Filesize: ", this.file.size); console.log(lineCount + " lines"); var rowBuffer = []; @@ -149,7 +184,7 @@ export default { Papa.parse(this.file, { worker: true, delimiter: " ", - fastMode: true, + fastMode: false, step: function (row) { rowBuffer.push(row.data); if (rowBuffer.length % 100 == 0) { @@ -179,9 +214,29 @@ export default { console.log("All done!"); }, }); - return; }, - + + submitWord() { + if (this.KNNword == "") { + console.log("No word entered"); + return; + } + if (this.KNNword.includes(" ")) { + console.log( + "Dont use spaces inbetween words, for example use King,man,woman" + ); + return; + } + this.connection.send( + JSON.stringify({ + type: "getKNNNeighbours", + k: "10", + word: this.KNNword, + }) + ); + console.log("Send request for: " + this.KNNword); + }, + submitFiles() { //Start up normal connection if (!this.file) { @@ -191,7 +246,7 @@ export default { } //First get the lineCount, (we need that beforehand to know where to distribute the points) this.getLineCountEstimate(this.file, (lineCount) => { - this.parseData(this.file, lineCount); + this.parseData(lineCount); }); }, }, diff --git a/protos/build.sh b/protos/build.sh index cd978c8b4ab906fa3256e48639a28f0c90ad3959..40fedf74c664ca83ad545b3ee7e3ae90a6129ab0 100755 --- a/protos/build.sh +++ b/protos/build.sh @@ -1,18 +1,28 @@ #!/bin/bash - +echo "test" DIR="$(dirname "$0")" +PROTOS_DIRECTORY="$PWD/protos" +PROJECTOR_PROTO_FILE="$PROTOS_DIRECTORY/projector.proto" +KNN_PROTO_FILE="$PROTOS_DIRECTORY/knn.proto" -PROJECTOR_PROTO_FILE="$DIR/projector.proto" - -FRONTEND_GENERATED_DIR="$DIR/../frontend/src/generated" -BACKEND_GATEWAY_GENERATED_DIR="$DIR/../backend/gateway/src/generated" +BACKEND_PROJECTOR_GENERATED_DIR="$PWD/../backend/gRPCProjector/python" +BACKEND_KNN_GENERATED_DIR="$PWD/../backend/grpcKNN" +BACKEND_GATEWAY_PROTO_DIR="$PWD/../backend/webSocketGateway/protos/v3" +#Projector +python -m grpc_tools.protoc \ + -I "$PROTOS_DIRECTORY" \ + --python_out="$BACKEND_PROJECTOR_GENERATED_DIR" \ + --grpc_python_out="$BACKEND_PROJECTOR_GENERATED_DIR" \ + "$PROJECTOR_PROTO_FILE" -protoc \ - "$PROJECTOR_PROTO_FILE" \ - --js_out=import_style=commonjs:"$FRONTEND_GENERATED_DIR" \ - --grpc-web_out=import_style=commonjs,mode=grpcwebtext:"$FRONTEND_GENERATED_DIR" +#KNN +python -m grpc_tools.protoc \ + -I "$PROTOS_DIRECTORY" \ + --python_out="$BACKEND_KNN_GENERATED_DIR" \ + --grpc_python_out="$BACKEND_KNN_GENERATED_DIR" \ + "$KNN_PROTO_FILE" -grpc_tools_node_protoc \ - "$PROJECTOR_PROTO_FILE" \ - --js_out=import_style=commonjs:"$BACKEND_GATEWAY_GENERATED_DIR" \ - --grpc_out=grpc_js:"$BACKEND_GATEWAY_GENERATED_DIR" \ No newline at end of file +#Copy for the gateway +echo "$BACKEND_GATEWAY_PROTO_DIR" +cp -fr "$PROJECTOR_PROTO_FILE" "$BACKEND_GATEWAY_PROTO_DIR" +cp -fr "$KNN_PROTO_FILE" "$BACKEND_GATEWAY_PROTO_DIR" \ No newline at end of file diff --git a/protos/protos/knn.proto b/protos/protos/knn.proto new file mode 100644 index 0000000000000000000000000000000000000000..2bdc51b9b654be823d2db65e7ee50e7b4dacf903 --- /dev/null +++ b/protos/protos/knn.proto @@ -0,0 +1,48 @@ +syntax = "proto3"; + +package provee; + +import "google/protobuf/empty.proto"; + +option java_multiple_files = true; +option java_package = "nl.uuvig.provee"; +option java_outer_classname = "ProveProjectorGRCP"; +option objc_class_prefix = "PROVEE"; + + +// Interface exported by the server. +service KNN { + + + rpc sendProjectionPoints(stream TrainingSetRow) returns (google.protobuf.Empty) {} + rpc getKNNRequest(knnRequest) returns (Neighbours) {} + rpc getIDfromServer(google.protobuf.Empty) returns (ID) {} +} + +message ID{ + string id = 1; +} + +message knnRequest{ + int32 k = 1; + string words = 2; +} + +message Neighbours { + repeated Row rows = 1; +} + +message Row { + string id = 1; + float distance = 2; +} + +// Needs documentation +message TrainingSetRow { + // The id of the row, e.g., row index. + string id = 1; + + // The hd vector of the item. + repeated double hdvector = 2 [packed=true]; +} + diff --git a/protos/projector.proto b/protos/protos/projector.proto similarity index 94% rename from protos/projector.proto rename to protos/protos/projector.proto index a2a1dab64955b20ba82e8482070f4131dc483881..de425c04e965f933bfdeac12364f6048b8374035 100644 --- a/protos/projector.proto +++ b/protos/protos/projector.proto @@ -25,7 +25,7 @@ service Projector { // Obtains the Projection Points in 2D given the trainings values stored in a row format. Results are // streamed rather than returned at once (e.g. in a response message with a // repeated field). - rpc getProjectionPoints(TrainingSet) returns (stream Point) {} + rpc getProjectionPoints(stream TrainingSetRow) returns (stream Point) {} // A server-to-client streaming RPC. // @@ -50,8 +50,8 @@ service Projector { // Points are represented as x-y pairs message Point { int32 id = 1; - int32 x = 2; - int32 y = 3; + float x = 2; + float y = 3; } // Needs description