diff --git a/README.md b/README.md index ccf838159..b59d2c33d 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,48 @@ Delete package if no more required: $ pip uninstall pyspla ``` +## Example of usage + +This example demonstrates basic library primitives usage and shows how to implement simple breadth-first search +algorithm using `spla` primitives in a few lines of code and run it on your GPU using OpenCL backend for acceleration. + +```python +from pyspla import * + +def bfs(s: int, A: Matrix): + v = Vector(A.n_rows, INT) # to store depths + + front = Vector.from_lists([s], [1], A.n_rows, INT) # front of new vertices to study + front_size = 1 # current front size + depth = Scalar(INT, 0) # depth of search + count = 0 # num of reached vertices + + while front_size > 0: # while have something to study + depth += 1 + count += front_size + v.assign(front, depth, op_assign=INT.SECOND, op_select=INT.NQZERO) # assign depths + front = front.vxm(v, A, op_mult=INT.LAND, op_add=INT.LOR, op_select=INT.EQZERO) # do traversal + front_size = front.reduce(op_reduce=INT.PLUS).get() # update front count to end algorithm + + return v, count, depth.get() +``` + +Create an adjacency matrix for a simple graph containing 4 vertices and 5 edges. + +```python +I = [0, 1, 2, 2, 3] +J = [1, 2, 0, 3, 2] +V = [1, 1, 1, 1, 1] +A = Matrix.from_lists(I, J, V, shape=(4, 4), dtype=INT) +``` + +Run bfs algorithm starting from 0-vertex with the graph adjacency matrix created earlier. None, that `spla` will +automatically select GPU-based acceleration backed for computations. + +```python +v, c, d = bfs(0, A) +``` + ## Performance ### Comparison on a Nvidia GPU diff --git a/python/example.py b/python/example.py index 010d05438..86beca930 100644 --- a/python/example.py +++ b/python/example.py @@ -1,16 +1,31 @@ from pyspla import * -M = Matrix.from_lists([0, 1, 2, 2], [1, 2, 0, 4], [1, 2, 3, 4], (3, 5), INT) -print(M) -N = Matrix.from_lists([0, 1, 2, 3], [1, 2, 0, 3], [2, 3, 4, 5], (4, 5), INT) -print(N) +def bfs(s: int, A: Matrix): + v = Vector(A.n_rows, INT) # to store depths -mask = Matrix.dense((3, 4), INT, fill_value=1) -print(mask) + front = Vector.from_lists([s], [1], A.n_rows, INT) # front of new vertices to study + front_size = 1 # current front size + depth = Scalar(INT, 0) # depth of search + count = 0 # num of reached vertices -R = M.mxmT(mask, N, INT.MULT, INT.PLUS, INT.GTZERO) -print(R) + while front_size > 0: # while have something to study + depth += 1 + count += front_size + v.assign(front, depth, op_assign=INT.SECOND, op_select=INT.NQZERO) # assign depths + front = front.vxm(v, A, op_mult=INT.LAND, op_add=INT.LOR, op_select=INT.EQZERO) # do traversal + front_size = front.reduce(op_reduce=INT.PLUS).get() # update front count to end algorithm -R = M.mxmT(mask, N, INT.MULT, INT.PLUS, INT.EQZERO) -print(R) \ No newline at end of file + return v, count, depth.get() + + +I = [0, 1, 2, 2, 3] +J = [1, 2, 0, 3, 2] +V = [1, 1, 1, 1, 1] +A = Matrix.from_lists(I, J, V, shape=(4, 4), dtype=INT) +print(A) + +v, c, d = bfs(0, A) +print(v) +print(c) +print(d) diff --git a/python/pyspla/__init__.py b/python/pyspla/__init__.py index c4e700341..bb7bbbeae 100644 --- a/python/pyspla/__init__.py +++ b/python/pyspla/__init__.py @@ -57,6 +57,75 @@ it to speed-up some operations. All GPU communication, data transformations and transfers done internally automatically without any efforts from user perspective. +Example of usage +---------------- + +This example demonstrates basic library primitives usage and shows how to implement simple breadth-first search +algorithm using `spla` primitives in a few lines of code and run it on your GPU using OpenCL backend for acceleration. + +Import `spla` package to your python script. +>>> from pyspla import * + +Create an adjacency matrix of graph using lists of row-column indices and values. +>>> I = [0, 1, 2, 2, 3] +>>> J = [1, 2, 0, 3, 2] +>>> V = [1, 1, 1, 1, 1] +>>> A = Matrix.from_lists(I, J, V, shape=(4, 4), dtype=INT) +>>> print(A) +' + 0 1 2 3 + 0| . 1 . .| 0 + 1| . . 1 .| 1 + 2| 1 . . 1| 2 + 3| . . 1 .| 3 + 0 1 2 3 +' + +The following function implements single-source breadth-first search algoritm through masked matrix-vector product. +The algorithm accepts starting vertex and an adjacency matrix of a graph. It traverces graph using `vxm` and assigns +depths to reached vertices. Mask is used to update only unvisited vertices reducing number of required computations. +>>> def bfs(s: int, A: Matrix): +>>> v = Vector(A.n_rows, INT) # to store depths +>>> +>>> front = Vector.from_lists([s], [1], A.n_rows, INT) # front of new vertices to study +>>> front_size = 1 # current front size +>>> depth = Scalar(INT, 0) # depth of search +>>> count = 0 # num of reached vertices +>>> +>>> while front_size > 0: # while have something to study +>>> depth += 1 +>>> count += front_size +>>> v.assign(front, depth, op_assign=INT.SECOND, op_select=INT.NQZERO) # assign depths +>>> front = front.vxm(v, A, op_mult=INT.LAND, op_add=INT.LOR, op_select=INT.EQZERO) # do traversal +>>> front_size = front.reduce(op_reduce=INT.PLUS).get() # update front count to end algorithm +>>> +>>> return v, count, depth.get() + +Run bfs algorithm starting from 0-vertex with the graph adjacency matrix created earlier. None, that `spla` will +automatically select GPU-based acceleration backed for computations. +>>> v, c, d = bfs(0, A) + +Outhput the result vector with distances of reached vertices. +>>> print(v) +' + 0| 1 + 1| 2 + 2| 3 + 3| 4 +' + +Total number of reached vertices. +>>> print(c) +' + 4 +' + +Maximum depth of a discovered vertex. +>>> print(d) +' + 4 +' + Performance ----------- @@ -131,6 +200,15 @@ operations, binary operations used for reductions and products, select operations used for filtration and mask application. +Math operations +--------------- + +Library provides as of high-level linera algebra operations over matrices and vectors with +parametrization by binary, unary and select `ops`. There is avalable implementation for +masked `mxmT` matrix-matrix, `mxv` matrix-vector, `vxm` vector-matrix products, matrix and +vector reductions, assignment, and so on. Most operations have both CPU and GPU implementation. +Thus, you will have GPU performance in computations out of the box. + Details ------- diff --git a/python/pyspla/bridge.py b/python/pyspla/bridge.py index 9b12d9084..53f921a3e 100644 --- a/python/pyspla/bridge.py +++ b/python/pyspla/bridge.py @@ -101,6 +101,18 @@ class SplaNotImplemented(SplaError): class FormatMatrix(enum.Enum): """ Mapping for spla supported matrix storage formats enumeration. + + | Name | Memory type | Description | + |:---------|--------------:|:------------------------------------------------------------------| + |`CPU_LIL` | RAM (host) | List of lists, storing adjacency lists per vertex | + |`CPU_DOK` | RAM (host) | Dictionary of keys, effectively hash map of row,column to value | + |`CPU_COO` | RAM (host) | Lists of coordinates, storing rows, columns and values separately | + |`CPU_CSR` | RAM (host) | Compressed sparse rows format | + |`CPU_CSC` | RAM (host) | Compressed sparse columns format | + |`ACC_COO` | VRAM (device) | List of coordinates, but implemented for GPU/ACC usage | + |`ACC_CSR` | VRAM (device) | CSR, but implemented for GPU/ACC usage | + |`ACC_CSC` | VRAM (device) | CSC, but implemented for GPU/ACC usage | + """ CPU_LIL = 0 @@ -117,6 +129,14 @@ class FormatMatrix(enum.Enum): class FormatVector(enum.Enum): """ Mapping for spla supported vector storage formats enumeration. + + | Name | Memory type | Description | + |:-----------|--------------:|:---------------------------------------------------------------| + |`CPU_DOK` | RAM (host) | Dictionary of keys, hash map of index to value | + |`CPU_DENSE` | RAM (host) | Dense array of values with direct indexing | + |`CPU_COO` | RAM (host) | List of coordinates, storing rows and values separately | + |`ACC_DENSE` | VRAM (device) | Dense array, but implemented for GPU/ACC usage | + |`ACC_COO` | VRAM (device) | List of coordinates, but implemented for GPU/ACC usage | """ CPU_DOK = 0 diff --git a/python/pyspla/scalar.py b/python/pyspla/scalar.py index 8eeca475e..4efd5d1cc 100644 --- a/python/pyspla/scalar.py +++ b/python/pyspla/scalar.py @@ -29,7 +29,7 @@ import ctypes from .bridge import backend, check -from .type import INT +from .type import INT, FLOAT from .object import Object @@ -66,6 +66,12 @@ def __init__(self, dtype=INT, value=None, hnd=None, label=None): """ Creates new scalar of desired type or retains existing C object. + >>> s = Scalar(INT, 10) + >>> print(s) + ' + 10 + ' + :param dtype: optional: Type. default: INT. Type of the scalar value. @@ -94,6 +100,12 @@ def __init__(self, dtype=INT, value=None, hnd=None, label=None): def dtype(self): """ Returns the type of stored value in the scalar. + + >>> s = Scalar(INT) + >>> print(s.dtype) + ' + + ' """ return self._dtype @@ -102,6 +114,12 @@ def dtype(self): def shape(self): """ 2-tuple shape of the storage. For scalar object it is always 1 by 1. + + >>> s = Scalar(INT) + >>> print(s.shape) + ' + (1, 1) + ' """ return 1, 1 @@ -110,14 +128,53 @@ def shape(self): def n_vals(self): """ Number of stored values in the scalar. Always 1. + + >>> s = Scalar(INT) + >>> print(s.n_vals) + ' + 1 + ' """ return 1 + @classmethod + def from_value(cls, value): + """ + Create scalar and infer type. + + >>> s = Scalar.from_value(0.5) + >>> print(s.dtype) + ' + + ' + + :param value: any. + Value to create scalar from. + + :return: Scalar with value. + """ + + if isinstance(value, float): + return Scalar(dtype=FLOAT, value=value) + elif isinstance(value, int): + return Scalar(dtype=INT, value=value) + elif isinstance(value, bool): + return Scalar(dtype=INT, value=value) + else: + raise Exception("cannot infer type") + def set(self, value=None): """ Set the value stored in the scalar. If no value passed the default value is set. + >>> s = Scalar(INT) + >>> s.set(10) + >>> print(s) + ' + 10 + ' + :param value: optional: Any. default: None. Optional value to store in scalar. """ @@ -128,6 +185,12 @@ def get(self): """ Read the value stored in the scalar. + >>> s = Scalar(INT, 10) + >>> print(s.get()) + ' + 10 + ' + :return: Value from scalar. """ @@ -140,3 +203,41 @@ def __str__(self): def __iter__(self): return iter([self.get()]) + + def __add__(self, other): + return Scalar(dtype=self.dtype, value=self.get() + Scalar._value(other)) + + def __sub__(self, other): + return Scalar(dtype=self.dtype, value=self.get() + Scalar._value(other)) + + def __mul__(self, other): + return Scalar(dtype=self.dtype, value=self.get() * Scalar._value(other)) + + def __truediv__(self, other): + return Scalar(dtype=self.dtype, value=self.get() / Scalar._value(other)) + + def __floordiv__(self, other): + return Scalar(dtype=self.dtype, value=self.get() // Scalar._value(other)) + + def __iadd__(self, other): + self.set(self.get() + Scalar._value(other)) + return self + + def __isub__(self, other): + self.set(self.get() - Scalar._value(other)) + return self + + def __imul__(self, other): + self.set(self.get() * Scalar._value(other)) + return self + + def __idiv__(self, other): + self.set(self.get() / Scalar._value(other)) + return self + + @classmethod + def _value(cls, other): + if isinstance(other, Scalar): + return other.get() + else: + return other