import numpy as np


# from a list
l = [10.0, 12.5, 15.0, 17.5, 20.0]
np.array(l)

array([10. , 12.5, 15. , 17.5, 20. ])


# fast but the values can be anything
np.empty(4)

array([1.27880790e-316, 0.00000000e+000, 6.91986808e-310, 1.57378525e-316])


# slower than np.empty but the values are all 0.
np.zeros([2, 6])

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])


# multidimensional array
a = np.ones([2, 3, 4])
print(a.shape, a.size, a.dtype)
a

(2, 3, 4) 24 float64

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]])


# like range but produce 1D numpy array
np.arange(4)

array([0, 1, 2, 3])


# np.arange can produce arrays of floats
np.arange(4.)

array([0., 1., 2., 3.])


# another convenient function to generate 1D arrays
np.linspace(10, 20, 5)

array([10. , 12.5, 15. , 17.5, 20. ])


a = np.linspace(10, 20 ,5)
list(a)

[10.0, 12.5, 15.0, 17.5, 20.0]


# Or even better
a.tolist()

[10.0, 12.5, 15.0, 17.5, 20.0]


n = 1000
# we use the ipython magic command %timeit
%timeit list(range(n))

15.6 µs ± 1.59 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


%%capture timeit_python
# to capture the result of the command timeit in the variable timeit_python
# Pure Python
%timeit list(range(n))


%%capture timeit_numpy
# numpy
%timeit np.arange(n)


def compute_time_in_second(timeit_result):
    string = timeit_result.stdout
    print(string)
    for line in string.split('\n'):
        words = line.split(' ')
        if len(words) > 1:
            time = float(words[0])
            unit = words[1]
    if unit == 'ms':
        time *= 1e-3
    elif unit == 'us':
        time *= 1e-6
    elif unit == 'ns':
        time *= 1e-9
    return time

def compare_times(string, timeit_python, timeit_numpy):
    time_python = compute_time_in_second(timeit_python)
    time_numpy = compute_time_in_second(timeit_numpy)

    print(string + ': ratio times (Python / NumPy): ', 
          time_python/time_numpy)


compare_times('Creation of object', timeit_python, timeit_numpy)

12.7 us +- 1.14 us per loop (mean +- std. dev. of 7 runs, 100000 loops each)

1.31 us +- 112 ns per loop (mean +- std. dev. of 7 runs, 1000000 loops each)

Creation of object: ratio times (Python / NumPy):  9.694656488549617


n = 200000
python_r_1 = range(n)
python_r_2 = range(n)

numpy_a_1 = np.arange(n)
numpy_a_2 = np.arange(n)


%%capture timeit_python
%%timeit
# Regular Python
[(x + y) for x, y in zip(python_r_1, python_r_2)]


print(timeit_python)

16.6 ms +- 220 us per loop (mean +- std. dev. of 7 runs, 100 loops each)


%%capture timeit_numpy
%%timeit
#Numpy
numpy_a_1 + numpy_a_2


print(timeit_numpy)

246 us +- 16.7 us per loop (mean +- std. dev. of 7 runs, 1000 loops each)


compare_times('Additions', timeit_python, timeit_numpy)

16.6 ms +- 220 us per loop (mean +- std. dev. of 7 runs, 100 loops each)

246 us +- 16.7 us per loop (mean +- std. dev. of 7 runs, 1000 loops each)

Additions: ratio times (Python / NumPy):  67.47967479674797


A = np.random.random([4, 5])
A

array([[0.89925962, 0.31519992, 0.17170063, 0.06102236, 0.6055506 ],
       [0.43365108, 0.67461267, 0.34962124, 0.75648088, 0.53096922],
       [0.65643503, 0.4723704 , 0.77202087, 0.50192904, 0.14067726],
       [0.80709755, 0.2314217 , 0.65465368, 0.28459125, 0.54727527]])


# Get the element from second line, first column
A[1, 0]

0.4336510750584107


# Get the first two lines
A[:2]

array([[0.89925962, 0.31519992, 0.17170063, 0.06102236, 0.6055506 ],
       [0.43365108, 0.67461267, 0.34962124, 0.75648088, 0.53096922]])


# Get the last column
A[:, -1]

array([0.6055506 , 0.53096922, 0.14067726, 0.54727527])


# Get the first two lines and the columns with an even index
A[:2, ::2]

array([[0.89925962, 0.17170063, 0.6055506 ],
       [0.43365108, 0.34962124, 0.53096922]])


cond = A > 0.5
print(cond)
print(A[cond])

[[ True False False False  True]
 [False  True False  True  True]
 [ True False  True  True False]
 [ True False  True False  True]]
[0.89925962 0.6055506  0.67461267 0.75648088 0.53096922 0.65643503
 0.77202087 0.50192904 0.80709755 0.65465368 0.54727527]


# Selecting only particular columns
print(A)
A[:, [0, 1, 4]]

[[0.89925962 0.31519992 0.17170063 0.06102236 0.6055506 ]
 [0.43365108 0.67461267 0.34962124 0.75648088 0.53096922]
 [0.65643503 0.4723704  0.77202087 0.50192904 0.14067726]
 [0.80709755 0.2314217  0.65465368 0.28459125 0.54727527]]

array([[0.89925962, 0.31519992, 0.6055506 ],
       [0.43365108, 0.67461267, 0.53096922],
       [0.65643503, 0.4723704 , 0.14067726],
       [0.80709755, 0.2314217 , 0.54727527]])


(A+5)**2

array([[34.80126403, 28.25135024, 26.7464874 , 25.61394735, 31.42219749],
       [29.52456401, 32.20122896, 28.61844741, 33.13707212, 30.59162046],
       [31.99525724, 29.94683782, 33.31622493, 30.27122313, 26.42656267],
       [33.72238198, 27.36777304, 31.97510827, 27.92690466, 30.77226288]])


np.exp(A) # With numpy arrays, use the functions from numpy !

array([[2.45778274, 1.37053329, 1.18732233, 1.06292268, 1.83226077],
       [1.54288042, 1.9632724 , 1.41853016, 2.13076459, 1.70057974],
       [1.92790714, 1.60379132, 2.16413527, 1.65190478, 1.15105309],
       [2.24139301, 1.26039064, 1.92447592, 1.3292186 , 1.72853679]])


A[:, 0] = 0.
print(A)

[[0.         0.31519992 0.17170063 0.06102236 0.6055506 ]
 [0.         0.67461267 0.34962124 0.75648088 0.53096922]
 [0.         0.4723704  0.77202087 0.50192904 0.14067726]
 [0.         0.2314217  0.65465368 0.28459125 0.54727527]]


# BONUS: Safe element-wise inverse with masks
cond = (A != 0)
A[cond] = 1./A[cond]
print(A)

[[ 0.          3.17258959  5.82409047 16.387435    1.65138967]
 [ 0.          1.48233207  2.86023812  1.32191048  1.88334836]
 [ 0.          2.11698277  1.29530177  1.99231351  7.10846954]
 [ 0.          4.32111589  1.5275252   3.51381149  1.82723405]]


print([s for s in dir(A) if not s.startswith('__')])

['T', 'all', 'any', 'argmax', 'argmin', 'argpartition', 'argsort', 'astype', 'base', 'byteswap', 'choose', 'clip', 'compress', 'conj', 'conjugate', 'copy', 'ctypes', 'cumprod', 'cumsum', 'data', 'diagonal', 'dot', 'dtype', 'dump', 'dumps', 'fill', 'flags', 'flat', 'flatten', 'getfield', 'imag', 'item', 'itemset', 'itemsize', 'max', 'mean', 'min', 'nbytes', 'ndim', 'newbyteorder', 'nonzero', 'partition', 'prod', 'ptp', 'put', 'ravel', 'real', 'repeat', 'reshape', 'resize', 'round', 'searchsorted', 'setfield', 'setflags', 'shape', 'size', 'sort', 'squeeze', 'std', 'strides', 'sum', 'swapaxes', 'take', 'tobytes', 'tofile', 'tolist', 'tostring', 'trace', 'transpose', 'var', 'view']


# Ex1: Get the mean through different dimensions
print(A)
print('Mean value', A.mean())
print('Mean line', A.mean(axis=0))
print('Mean column', A.mean(axis=1))

[[ 0.          3.17258959  5.82409047 16.387435    1.65138967]
 [ 0.          1.48233207  2.86023812  1.32191048  1.88334836]
 [ 0.          2.11698277  1.29530177  1.99231351  7.10846954]
 [ 0.          4.32111589  1.5275252   3.51381149  1.82723405]]
Mean value 2.9143043986324475
Mean line [0.         2.77325508 2.87678889 5.80386762 3.1176104 ]
Mean column [5.40710095 1.50956581 2.50261352 2.23793733]


# Ex2: Convert a 2D array in 1D keeping all elements
print(A, A.shape)
A_flat = A.flatten()
print(A_flat, A_flat.shape)

[[ 0.          3.17258959  5.82409047 16.387435    1.65138967]
 [ 0.          1.48233207  2.86023812  1.32191048  1.88334836]
 [ 0.          2.11698277  1.29530177  1.99231351  7.10846954]
 [ 0.          4.32111589  1.5275252   3.51381149  1.82723405]] (4, 5)
[ 0.          3.17258959  5.82409047 16.387435    1.65138967  0.
  1.48233207  2.86023812  1.32191048  1.88334836  0.          2.11698277
  1.29530177  1.99231351  7.10846954  0.          4.32111589  1.5275252
  3.51381149  1.82723405] (20,)


b = np.linspace(0, 10, 11)
c = b @ b
# before 3.5:
# c = b.dot(b)
print(b)
print(c)

[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]
385.0


dtypes = np.dtype([('country', 'S20'), ('density', 'i4'), 
                  ('area', 'i4'), ('population', 'i4')])
x = np.array([('Netherlands', 393, 41526, 16928800),
              ('Belgium', 337, 30510, 11007020),
              ('United Kingdom', 256, 243610, 62262000),
              ('Germany', 233, 357021, 81799600)], 
             dtype=dtypes)
arr = np.array(x, dtype=dtypes)
arr.sort(order='density')
print(arr)

[(b'Germany', 233, 357021, 81799600)
 (b'United Kingdom', 256, 243610, 62262000)
 (b'Belgium', 337,  30510, 11007020)
 (b'Netherlands', 393,  41526, 16928800)]


A = np.random.random([5,5])
print(A)
np.linalg.det(A)

[[0.47138506 0.41353868 0.09441948 0.225147   0.82335198]
 [0.04490952 0.14682972 0.31792846 0.22918746 0.73823443]
 [0.50485749 0.99705961 0.51896582 0.93318595 0.11375617]
 [0.37148317 0.0477689  0.29061475 0.41826056 0.47950005]
 [0.70324502 0.82838271 0.92172528 0.79532669 0.56698101]]

0.06968780805887545


squared_subA = A[1:3, 1:3]
print(squared_subA)
np.linalg.inv(squared_subA)

[[0.14682972 0.31792846]
 [0.99705961 0.51896582]]

array([[-2.15522717,  1.32033369],
       [ 4.14071576, -0.6097731 ]])


from scipy.sparse import csr_matrix
print(csr_matrix([[1, 2, 0], [0, 0, 3], [4, 0, 5]]))

  (0, 0)	1
  (0, 1)	2
  (1, 2)	3
  (2, 0)	4
  (2, 2)	5


# scalar and one-dimensional
a = np.array([1, 2, 3])
print(a)
b = 2
print(b)
c = a + b
print(c)

[1 2 3]
2
[3 4 5]


# scalar and two-dimensional
A = np.array([[1, 2, 3], [1, 2, 3]])
print(A)
b = 2
print(b)
C = A + b
print(C)

[[1 2 3]
 [1 2 3]]
2
[[3 4 5]
 [3 4 5]]


# one-dimensional and two-dimensional
A = np.array([[1, 2, 3], [1, 2, 3]])
print(A)
b = np.array([1, 2, 3])
print(b)
C = A + b
print(C)

[[1 2 3]
 [1 2 3]]
[1 2 3]
[[2 4 6]
 [2 4 6]]


# Vetorize a function
def myfunc(a, b):
    "Return a-b if a>b, otherwise return a+b"
    if a > b:
        return a - b
    else:
        return a + b

vfunc = np.vectorize(myfunc)
vfunc([1, 2, 3, 4], 2)

array([3, 4, 1, 2])


# Vectorize a problem

def dist_loop(X,Y):
    """
    Naive loop
    """
    res=0
    for x,y in zip(X,Y):
        res += (x-y)**2
    
    return np.sqrt(res)

     
def dist(x,y):   
    return np.sqrt(np.sum((x-y)**2))

a = np.random.random(1000)
b = np.random.random(1000)
%time dist_a_b = dist(a,b)
%time dist_a_b_loop = dist_loop(a,b)
assert np.isclose(dist_a_b, dist_a_b_loop)

CPU times: user 95 µs, sys: 0 ns, total: 95 µs
Wall time: 78.7 µs
CPU times: user 619 µs, sys: 0 ns, total: 619 µs
Wall time: 608 µs

	Matlab	Numpy
element wise	`.*`	`*`
dot product	`*`	`@`

Python training UGA 2017¶

Python scientific ecosystem¶

A short introduction to Numpy, Scipy and Pandas¶

Python scientific ecosystem¶

A short introduction on NumPy¶

NumPy efficiency¶

Manipulating NumPy arrays¶

Access elements¶

Indexes and slices¶

Using a mask to select elements validating a condition:¶

Perform array manipulations¶

Apply arithmetic operations to whole arrays (element-wise):¶

Apply functions element-wise:¶

Setting parts of arrays¶

Attributes and methods of `np.ndarray` (see the doc)¶

Remark: dot product¶

For Matlab users¶

To finish: `dtypes` and sub-packages¶

NumPy and SciPy sub-packages:¶

SciPy or NumPy ?¶

Broadcasting¶

Limitations of Broadcasting¶

Vectorization¶

Python training UGA 2017¶

Python scientific ecosystem¶

A short introduction to Numpy, Scipy and Pandas¶

Python scientific ecosystem¶

A short introduction on NumPy¶

NumPy efficiency¶

Manipulating NumPy arrays¶

Access elements¶

Indexes and slices¶

Using a mask to select elements validating a condition:¶

Perform array manipulations¶

Apply arithmetic operations to whole arrays (element-wise):¶

Apply functions element-wise:¶

Setting parts of arrays¶

Attributes and methods of np.ndarray (see the doc)¶

Remark: dot product¶

For Matlab users¶

To finish: dtypes and sub-packages¶

NumPy and SciPy sub-packages:¶

SciPy or NumPy ?¶

Broadcasting¶

Limitations of Broadcasting¶

Vectorization¶

Attributes and methods of `np.ndarray` (see the doc)¶

To finish: `dtypes` and sub-packages¶