# Numpy snippets

This notebook is inspired by the following page: https://docs.scipy.org/doc/numpy-dev/user/numpy-for-matlab-users.html

"Open

"Open

## Import directives

In [None]:
%matplotlib inline
#%matplotlib notebook

from IPython.display import display

In [None]:
import numpy as np
import math

## Create arrays

In [None]:
np.array([1, 2, 3])

In [None]:
np.array([[1, 2, 3],[4, 5, 6]])

### Special matrices

In [None]:
np.zeros(3)

In [None]:
np.zeros((3, 4))

In [None]:
np.ones(3)

In [None]:
np.ones((3, 4))

In [None]:
np.eye(3)

### Arange

In [None]:
np.arange(10)

In [None]:
np.arange(10, 20)

In [None]:
np.arange(10, 20, 2)

### Linspace

In [None]:
np.linspace(0., 2., 5)

### Meshgrid

In [None]:
xx, yy = np.meshgrid([1, 2, 3], [4, 5, 6])

print(xx)
print()
print(yy)

### Random

#### Uniform distribution in [0, 1]

In [None]:
np.random.rand(3)

In [None]:
np.random.rand(3, 4)

#### Poisson distribution

In [None]:
np.random.poisson(10, size=[3, 4])

#### Multivariate normal distribution

In [None]:
mu = np.array([0., 0.])
cov = np.array([[1., 0.3],
 [0.3, 1.]])
num_points = 10

np.random.multivariate_normal(mu, cov, num_points)

## Print options

In [None]:
np.get_printoptions()

### Print large arrays

In [None]:
default_threshold = np.get_printoptions()["threshold"]
default_threshold

Arrays with more than ``default_threshold`` elements are truncated.

In [None]:
max_size = math.ceil(math.sqrt(default_threshold))
max_size

In [None]:
a = np.random.randint(1, size=[max_size + 1, max_size + 1])
a

Print the full array (set threshold to infinity):

In [None]:
np.set_printoptions(threshold=np.inf)
a

Go back to the default threshold:

In [None]:
np.set_printoptions(threshold=default_threshold)
a

## Dimension and shape

In [None]:
a = np.array([[1, 2, 3],[4, 5, 6]])

Number of dimensions:

In [None]:
a.ndim

Number of elements:

In [None]:
a.size

Number of elements per dimension:

In [None]:
a.shape

## Convert

In [None]:
l = [[1, 2, 3],[4, 5, 6]]
a = np.array([[1, 2, 3],[4, 5, 6]])

### Python list to Numpy array

In [None]:
np.array(l)

### Numpy array to Python list

In [None]:
a.tolist()

## Copy

### np.copy()

In [None]:
a = np.array([[1, 2, 3],[4, 5, 6]])
a

In [None]:
b = a.copy()
b

In [None]:
a[0,0] = 10
print(a)
print(b)

### np.astype()

In [None]:
a = np.array([[1, 2, 3],[4, 5, 6]])
a

In [None]:
b = a.astype('float64', copy=True)
b

In [None]:
a[0,0] = 10
print(a)
print(b)

## Access elements

In [None]:
a = np.arange(6)

In [None]:
a

In [None]:
a[0]

In [None]:
a[-1]

### Slices

In [None]:
a[1:4]

In [None]:
a = np.array([[1, 2, 3, 4, 5, 6],
 [10, 20, 30, 40, 50, 60],
 [100, 200, 300, 400, 500, 600]])
a

In [None]:
a[0,1]

In [None]:
a[1, :]

In [None]:
a[1, ::2]

In [None]:
a[:, 1]

In [None]:
a[0:2, 2:4]

In [None]:
a[1:, 1:]

In [None]:
a[:-1, :-1]

### Ellipsis

"The ellipsis is used to slice high-dimensional data structures.

It's designed to mean at this point, insert as many full slices (:) to extend the multi-dimensional slice to all dimensions."

https://stackoverflow.com/questions/118370/how-do-you-use-the-ellipsis-slicing-syntax-in-python

In [None]:
a = np.arange(2**3).reshape(2, 2, 2)
a

To select all first elements in the last (3rd) dimension

In [None]:
a[..., 0]

is equivalent to

In [None]:
a[:, :, 0]

To select all first elements in the first (1st) dimension

In [None]:
a[0, ...]

is equivalent to

In [None]:
a[0, :, :]

## Filter

In [None]:
a = np.array([[1, 2, 3, 4, 5, 6],
 [10, 20, 30, 40, 50, 60],
 [100, 200, 300, 400, 500, 600]])
a

### Boolean matrix whose i,jth element is (a_ij > 5)

In [None]:
(a>5)

### Find the indices where (a > 5)

In [None]:
np.nonzero(a>5)

### Set or copy a with elements greater than 5 zeroed out

In [None]:
a * (a<=5)

In [None]:
a[a>5] = 0
a

### Select indices satisfying multiple conditions

#### Short version

In [None]:
a = np.array([[-1, 7, 3], [-11, -5, 20]])
a

In [None]:
a[(a > -10) & (a < 10)] = 0
a

In [None]:
a[(a < -10) | (a > 10)] = 1
a

#### Detailed version

In [None]:
a = np.array([[-1, 7, 3], [-11, -5, 20]])
a

In [None]:
m1 = (a > -10)
m2 = (a < 10)

print(m1)
print(m2)
print(m1 & m2)

a[m1 & m2] = 0
a

## Concatenate

### Append 1D arrays

In [None]:
a = np.array([])
a = np.append(a, 3)
a

#### Performance test

It's probably not a good idea to use `np.append` to often as it makes a copy of the array each time it is called...

In [None]:
%%timeit

a = np.array([])
for i in range(10000):
 a = np.append(a, i)

Lists use a different data structure that makes them more efficient for repeated additions...

In [None]:
%%timeit

l = []
for i in range(10000):
 l.append(i)

a = np.array(l)

In this case, the better option is probably the following:

In [None]:
%%timeit

a = np.array([i for i in range(10000)])

### Concatenate 1D arrays

In [None]:
a = np.zeros(3)
b = np.ones(3)
print("a:", a)
print("b:", b)

In [None]:
np.concatenate([a, b])

In [None]:
np.hstack([a, b])

### Concatenate 2D arrays

In [None]:
a = np.zeros([2, 3])
b = np.ones([2, 3])

In [None]:
a

In [None]:
b

#### On the first dimension

Using vstack:

In [None]:
np.vstack([a, b])

In [None]:
np.vstack([a, b]).shape

Using concatenate:

In [None]:
np.concatenate([a, b], axis=0)

In [None]:
np.concatenate([a, b], axis=0).shape

#### On the second dimension

Using hstack:

In [None]:
np.hstack([a, b])

In [None]:
np.hstack([a, b]).shape

Using concatenate:

In [None]:
np.concatenate([a, b], axis=1)

In [None]:
np.concatenate([a, b], axis=1).shape

### Join a sequence of arrays along a *new* axis

The `axis` parameter specifies the index of the new axis in the dimensions
of the result.

In [None]:
a = np.zeros([2, 3])
b = np.ones([2, 3])

In [None]:
a

In [None]:
b

#### Along axis 0

In [None]:
np.stack([a, b], axis=0)

In [None]:
np.stack([a, b], axis=0).shape

#### Along axis 1

In [None]:
np.stack([a, b], axis=1)

In [None]:
np.stack([a, b], axis=1).shape

#### Along axis 2

In [None]:
np.stack([a, b], axis=2)

In [None]:
np.stack([a, b], axis=2).shape

#### Tile

In [None]:
a = np.array([[1, 2, 3], [4, 5, 6]])
np.tile(a, (2, 3))

## Reshape or transpose

In [None]:
a = np.array([[1, 2, 3], [4, 5, 6]])

### Transpose

In [None]:
a.T

### Flatten

In [None]:
a.flatten()

### Reshape

In [None]:
a = np.arange(6)
a

#### Row vector to column vector

In [None]:
a.reshape([-1, 1])

#### Vector to matrix

In [None]:
a.reshape([2, 3])

In [None]:
a.reshape([3, 2])

## Repeat

In [None]:
a = np.arange(3)
a

In [None]:
np.repeat(a, 5)

In [None]:
a = np.arange(3).reshape([-1, 1])
a

In [None]:
np.repeat(a, 5, axis=0)

In [None]:
a = np.array([[1, 3, 5],[2, 4, 6]])
a

In [None]:
np.repeat(a, 5, axis=0)

## Sort

### Return the indices that would sort an array

In [None]:
a = np.array([8, 5, 1])
a

In [None]:
a.argsort()

### Sort an array by the $n^{\text{th}}$ column

Ref.: https://stackoverflow.com/questions/2828059/sorting-arrays-in-numpy-by-column

In [None]:
a = np.array([[4, 4, 2],
 [8, 5, 1],
 [7, 0, 0],
 [3, 1, 1],
 [3, 0, 5]])
a

In [None]:
n = 0 # the column sorted by

In [None]:
a[a[:,n].argsort()]

In [None]:
n = 1 # the column sorted by

In [None]:
a[a[:,n].argsort()]

In [None]:
n = 2 # the column sorted by

In [None]:
a[a[:,n].argsort()]

## Aggregation / reduction

In [None]:
a = np.array([[1, 2, 3], [4, 5, 6]])

Change the `axis` value in the following functions to aggregate along a given axis.

In [None]:
np.sum(a, axis=None)

In [None]:
np.cumsum(a, axis=None)

In [None]:
np.diff(a.ravel())

In [None]:
np.mean(a, axis=None)

In [None]:
np.var(a, axis=None)

In [None]:
np.std(a, axis=None)

In [None]:
np.median(a, axis=None)

In [None]:
np.min(a, axis=None)

In [None]:
np.max(a, axis=None)

In [None]:
np.prod(a, axis=None)

In [None]:
np.cumprod(a, axis=None)

## Compute the histogram of a set of data (with a specific binning)

In [None]:
a = np.array([1, 1, 3, 2, 2, 2])
a

All but the last (righthand-most) bin is half-open. In other words,
if `bins` is:

 [1, 2, 3, 4]

then the first bin is ``[1, 2)`` (including 1, but excluding 2) and
the second ``[2, 3)``. The last bin, however, is ``[3, 4]``, which
*includes* 4.

In [None]:
bins = np.array([1, 2, 3, 4])
bins

In [None]:
hist, bins_ = np.histogram(a, bins=bins)
hist

## Linear algebra

### Dot product of two arrays

In [None]:
a = np.array([1, 2, 3])
b = np.array([10, 20, 30])

In [None]:
np.dot(a, b)

In [None]:
a.dot(b)

### Compute the (multiplicative) inverse of a matrix

In [None]:
a = np.random.normal(size=(3, 3))
a

In [None]:
np.linalg.inv(a)

### Compute the eigenvalues and right eigenvectors of a square array

In [None]:
a = np.random.normal(size=(3, 3))
a

In [None]:
np.linalg.eig(a)

### Singular Value Decomposition

In [None]:
a = np.random.normal(size=(3, 3))
a

In [None]:
U, s, V = np.linalg.svd(a)
print(U, s, V)

### Solve a linear matrix equation, or system of linear scalar equations

In [None]:
a = np.array([[3, 1], [1, 2]])
b = np.array([9, 8])
np.linalg.solve(a, b)

### Diagonals

Extract the diagonal:

In [None]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
np.diag(a)

Make a diagonal matrix:

In [None]:
d = np.array([1, 2, 3])
np.diag(d)

### Trace

In [None]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
np.trace(a)

### Upper and lower triangles of an array

In [None]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [None]:
np.triu(a)

In [None]:
np.tril(a)

## Data types

See http://docs.scipy.org/doc/numpy-1.10.1/user/basics.types.html

### Get type

In [None]:
a = np.arange(-2., 2., 0.5)
a.dtype

### Size in memory (in bytes)

In [None]:
a = np.arange(-2., 2., 0.5)

Per item:

In [None]:
a.itemsize

Full array:

In [None]:
a.nbytes

### Init

In [None]:
a = np.zeros(3)
a.dtype

In [None]:
a = np.zeros(3, dtype=np.bool)
a.dtype

In [None]:
a = np.zeros(3, dtype=np.int)
a.dtype

In [None]:
a = np.zeros(3, dtype=np.int8)
a.dtype

In [None]:
a = np.zeros(3, dtype=np.uint8)
a.dtype

### Conversions

In [None]:
a = np.arange(-2., 2., 0.5)
a

In [None]:
a.astype(np.bool)

In [None]:
a.astype(np.int)

In [None]:
a.astype(np.int8)

In [None]:
a.astype(np.uint8)

## Masked arrays

### Without masked array

In [None]:
a = np.array([[np.nan, 2, 3], [1, np.nan, 6]])
a

In [None]:
a.min()

In [None]:
np.nanmin(a)

In [None]:
a.max()

In [None]:
np.nanmax(a)

In [None]:
a.mean()

In [None]:
np.nanmean(a)

In [None]:
a.shape

### With masked array

In [None]:
ma = np.ma.masked_where(np.isnan(a), a)
ma

In [None]:
ma.min()

In [None]:
ma.max()

In [None]:
ma.mean()

In [None]:
ma.shape