Source code for teili.tools.sorting

# -*- coding: utf-8 -*-
"""To understand the structure of in the rasterplots but also in the learned weight matrices, we need to sort the weight matrices according to some similarity measure, such as euclidean and jaccard distance.
However, the sorting algorithm is completely agnostic to the similarity measure. It connects each node with maximum two edges and constructs a directed graph.
This is similar to the travelling salesman problem.

Example:
    In order to use this class you need to initialize it
    either without a filename:

    >>> from teili.tools.sorting import SortMatrix
    >>> import numpy as np
    >>> matrix = np.random.randint((49, 49))
    >>> obj = SortMatrix(nrows=49, matrix=matrix)
    >>> print(obj.matrix)
    >>> print(obj.permutation)
    >>> print(ob.sorted_matrix)

    or instead of using a matrix you can also specify a
    path to a stored matrix:

    >>> filename = '/path/to/your/matrix.npy'
    >>> obj = SortMatrix(nrows=49, filename=filename)
"""
# @Author: Moritz Milde
# @Date:   2018-06-05 11:09:20

import numpy as np
import warnings


[docs]class SortMatrix(): """Class which can sort your matrix based on similarity Attributes: filename (str, optional): path/to/matrix/name.npy. matrix (ndarray, optional): matrix as provided by load_matrix. ncols (int, optional): number of columns of the 2d array. nrows (int, required): number of rows of the 2d array. permutation (list): List of indices which are more similar to each other in (euclidean, jaccard) distance. similarity_matrix (ndarray, optional): Matrix containing similarities. sorted_matrix (TYPE): Sorted matrix according to permutation. recurrent_matrix (boolean, optional): Indicates whether it is a recurrent matrix or not. similarity_metric (str, optional): Indicates metric for calculating similarity. max_val (float): Maximum value of the provided matrix """ def __init__(self, nrows, ncols=None, filename=None, matrix=None, axis=0, target_indices=None, rec_matrix=False, similarity_metric='euclidean'): """Summary Args: nrows (int, required): number of rows of the 2d array. ncols (int, optional): number of columns of the 2d array. filename (str, optional): path/to/matrix/name.npy. matrix (ndarray, optional): Instead of providing filename and location one can also pass the matrix to sort directly to the class. axis (int, optional): Axis along which similarity should be computed. target_indices (list of lists, optional): Must be provided only when connections between neurons are sparse. Each row contains the postsynaptic targets of a neuron whose index is represented by the number of the row. rec_matrix (boolean, optional): Informs whether it is the matrix represents recurrent connections or not. If it is a recurrent matrix, both dimensions will be sorted according to permutation. similarity_metric (str, optional): Metric used to calculate similarity between arrays. """ self.nrows = nrows self.ncols = ncols self.axis = axis self.recurrent_matrix = rec_matrix self.similarity_metric = similarity_metric if self.ncols is None: warnings.warn('Unspecified ncols. Matrix is assumed to be squared') self.ncols = self.nrows self.filename = filename # Load reshaped matrix. Dimensions are specified via nrwos, ncols if matrix is None: self.matrix = self.load_matrix() elif matrix is not None: if target_indices is None: self.matrix = np.reshape(matrix, (self.nrows, self.ncols)) elif target_indices is not None: # Fill un-connected inputs with zero filled_matrix = np.zeros((self.nrows, self.ncols)) for i in range(self.nrows): filled_matrix[i, target_indices[i][:]] = matrix[i][:] self.matrix = filled_matrix self.max_val = np.max(self.matrix) # Compute similarity along specified axis self.similarity_matrix = self.get_similarity_matrix(axis=axis) # Get permutation along specified axis self.permutation = self.get_permutation(axis=axis) # Sort matrix self.sorted_matrix = self.sort_matrix()
[docs] def load_matrix(self): """Load matrix from .npy file Returns: ndarray: loaded matrix from file. """ # if self.filename is not None: try: matrix = np.load(self.filename) except TypeError: raise TypeError('Invalid filename. Please specify a valid path and filename.') self.matrix = matrix.reshape((self.nrows, self.ncols)) return self.matrix
[docs] def compute_distance(self, x, y, similarity_metric, threshold=.8): """This function returns the distance of any to vectors x and y Args: x (ndarray, required): 1d vector. y (ndarray, required): 1d vector. similarity_metric (str, required): Metric used to measure similarity Returns: ndarray: Element-wise distance of two 1d vectors. """ if similarity_metric == 'euclidean': dist = np.linalg.norm(x - y) elif similarity_metric == 'jaccard': x_elements = np.where(x > self.max_val*threshold)[0] y_elements = np.where(y > self.max_val*threshold)[0] intersection = len(list(set(x_elements).intersection(y_elements))) union = (len(x_elements) + len(y_elements)) - intersection dist = (1 - float(intersection) / union) else: raise ValueError return dist
[docs] def get_similarity_matrix(self, axis=0): """This function computes a similarity matrix of a given matrix. Args: axis (int, optional): Axis along which similarity should be computed. Returns: ndarray: Matrix containing similarities. """ self.similarity_matrix = np.zeros( (np.size(self.matrix, axis), np.size(self.matrix, axis))) * np.nan # Loop through both dimensions of the specified matrix # and compute the distance between every pairs of vectors for index_i in range(np.size(self.matrix, axis)): if axis == 0: reference_vector = self.matrix[index_i, :] else: reference_vector = self.matrix[:, index_i] for index_j in range(np.size(self.matrix, axis)): if index_i != index_j: if axis == 0: comparison_vector = self.matrix[index_j, :] else: comparison_vector = self.matrix[:, index_j] self.similarity_matrix[index_i, index_j] = self.compute_distance( reference_vector, comparison_vector, self.similarity_metric) else: self.similarity_matrix[index_i, index_j] = np.inf return self.similarity_matrix
[docs] def get_permutation(self, axis=0): """To sort a given matrix according to its similarity we need to construct permutation indices, which are used to sort the matrix. First we find the most similar entry in the similarity matrix. This function allows each node in the similarity graph to be only used twice, i.e. each node has maximally two edges connected to it. The vector 'degree' keeps track of this. To prevent a loop closure in the similarity graph a proxy vector called 'partner' is used to set the distance between the two ends of the similarity graph to infinity. Args: axis (int, optional): Axis along which similarity should be computed. Returns: list: Vector of permuted indices. """ similarity_matrix = np.array(self.similarity_matrix) steps = [] degree = np.zeros((np.size(similarity_matrix, 0))) # diagonal was already set to infinity partner = np.arange(np.size(similarity_matrix, 0)) while(len(steps) <= np.size(similarity_matrix, 0) - 1): # Get the index (tuple) of the most similar entry in the similarity # matrix ind_nearest = np.unravel_index( np.argmin(similarity_matrix, axis=None), similarity_matrix.shape) # Define the two to be connected nodes vertexA = ind_nearest[0] # we gonna draw an edge between A and B vertexB = ind_nearest[1] # Increase the (connectivity) degree of these nodes by one degree[vertexA] += 1 degree[vertexB] += 1 # Draw an edge between the nodes steps.append(ind_nearest) # the bag of edges # don't use anything more than twice ind2exclude = np.where(degree >= 2) similarity_matrix[ind2exclude, :] = np.inf similarity_matrix[:, ind2exclude] = np.inf # To prevent loop closure identify the two ends of the graph endA = partner[vertexA] endB = partner[vertexB] # Re-asign the partners of the ends partner[endA] = endB partner[endB] = endA # Set the distance between the new ends of the graph to infinity similarity_matrix[endA, endB] = np.inf similarity_matrix[endB, endA] = np.inf # Break condition that stops the construction of the graph as soon # as every node except the two end points are connected twice if np.sum(degree) >= np.size(similarity_matrix, 0) * 2 - 2: break # Now that the graph is defined in steps, we can construct the # permutation indices self.permutation = [] # Pick one end of the graph and define as start node start_ind = np.where(degree == 1) start_node = start_ind[0][0] self.permutation.append(start_node) while(len(steps) > 0): # Find the corresponding node which is the end node for the current # edge corresponding_tuple = [ item for item in steps if start_node in item] if corresponding_tuple[0][0] == start_node: end_node = corresponding_tuple[0][1] else: end_node = corresponding_tuple[0][0] # Remove nodes from steps steps = [index_i for index_i in steps if index_i != corresponding_tuple[0]] # Add end node of the edge to the list of permutation self.permutation.append(end_node) # Set the end_node of the edge to be the start_node of the next # edge start_node = end_node return self.permutation
[docs] def sort_matrix(self): """This function returns the sorted matrix given the permutation indices. Returns: ndarray: Sorted matrix according to similarity. """ if len(self.permutation) == 0: self.permutation = self.get_permutation(axis=self.axis) tmp_matrix = np.array(self.matrix) if self.recurrent_matrix: # First sort each row for row in range(len(self.permutation)): tmp_matrix[row] = tmp_matrix[row][self.permutation] # Second sort each column self.sorted_matrix = tmp_matrix[self.permutation] else: if self.axis == 0: self.sorted_matrix = tmp_matrix[self.permutation, :] else: self.sorted_matrix = tmp_matrix[:, self.permutation] return self.sorted_matrix