import numpy as np


# from a list
l = [10.0, 12.5, 15.0, 17.5, 20.0]
np.array(l)

array([10. , 12.5, 15. , 17.5, 20. ])


# fast but the values can be anything
np.empty(4)

array([1.75274491e-316, 6.94225492e-310, 6.94224527e-310, 6.94225376e-310])


# Filled with zeros (slower than np.empty)
np.zeros([2, 6])

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])


# Multidimensional array filled with ones 
a = np.ones([2, 3, 4])
print(a.shape, a.size, a.dtype)
a

(2, 3, 4) 24 float64

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]])


# Like range but produces a 1D numpy array
np.arange(4)

array([0, 1, 2, 3])


# Start and step can be changed
np.arange(2., 4., 0.1)

array([2. , 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3. , 3.1, 3.2,
       3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9])


# Equally-spaced elements between start and end (included)
np.linspace(10, 20, 5)

array([10. , 12.5, 15. , 17.5, 20. ])


a = np.linspace(10, 20 ,5)
a.tolist()

[10.0, 12.5, 15.0, 17.5, 20.0]


A = np.random.random([4, 5])
A

array([[0.74931905, 0.4399789 , 0.96017188, 0.88886798, 0.28382067],
       [0.4532329 , 0.99181478, 0.07017858, 0.4993961 , 0.1678844 ],
       [0.59791893, 0.50793759, 0.77954852, 0.05390075, 0.984206  ],
       [0.93149267, 0.02959492, 0.60720976, 0.92916837, 0.24923606]])


# Get the element from second line, first column
A[1, 0]

0.45323290450951004


# Get the first two lines
A[:2]

array([[0.74931905, 0.4399789 , 0.96017188, 0.88886798, 0.28382067],
       [0.4532329 , 0.99181478, 0.07017858, 0.4993961 , 0.1678844 ]])


# Get the last column
A[:, -1]

array([0.28382067, 0.1678844 , 0.984206  , 0.24923606])


# Get the first two lines and the columns with an even index
A[:2, ::2]

array([[0.74931905, 0.96017188, 0.28382067],
       [0.4532329 , 0.07017858, 0.1678844 ]])


cond = A > 0.5
print(cond)
print(A[cond])

[[ True False  True  True False]
 [False  True False False False]
 [ True  True  True False  True]
 [ True False  True  True False]]
[0.74931905 0.96017188 0.88886798 0.99181478 0.59791893 0.50793759
 0.77954852 0.984206   0.93149267 0.60720976 0.92916837]


# Selecting only particular columns
print(A)
A[:, [0, 1, 4]]

[[0.74931905 0.4399789  0.96017188 0.88886798 0.28382067]
 [0.4532329  0.99181478 0.07017858 0.4993961  0.1678844 ]
 [0.59791893 0.50793759 0.77954852 0.05390075 0.984206  ]
 [0.93149267 0.02959492 0.60720976 0.92916837 0.24923606]]

array([[0.74931905, 0.4399789 , 0.28382067],
       [0.4532329 , 0.99181478, 0.1678844 ],
       [0.59791893, 0.50793759, 0.984206  ],
       [0.93149267, 0.02959492, 0.24923606]])


(A+5)**2

array([[33.05466955, 29.59337041, 35.52364888, 34.67876614, 27.91876089],
       [29.73774911, 35.90184432, 25.7067108 , 30.24335742, 26.70702917],
       [31.3366963 , 30.33737648, 33.4031811 , 25.54191284, 35.81072142],
       [35.18260526, 25.29682501, 31.44080124, 35.15503757, 27.55447916]])


np.exp(A) # With numpy arrays, use the functions from numpy !

array([[2.11555894, 1.55267445, 2.61214542, 2.43237461, 1.32819473],
       [1.57339059, 2.6961229 , 1.07269972, 1.6477259 , 1.18279987],
       [1.81833078, 1.66186022, 2.1804876 , 1.05537986, 2.67568654],
       [2.53829518, 1.0300372 , 1.8353033 , 2.53240228, 1.28304487]])


n = 1000


%%capture timeit_python
# to capture the result of the command timeit in the variable timeit_python
# Pure Python
%timeit list(range(n))


%%capture timeit_numpy
# numpy
%timeit np.arange(n)


compare_times('Creation of object', timeit_python, timeit_numpy)

14.1 us +- 1.04 us per loop (mean +- std. dev. of 7 runs, 100000 loops each)

1.63 us +- 94.5 ns per loop (mean +- std. dev. of 7 runs, 1000000 loops each)

Creation of object: ratio times (Python / NumPy):  8.650306748466258


n = 200000
python_r_1 = range(n)
python_r_2 = range(n)

numpy_a_1 = np.arange(n)
numpy_a_2 = np.arange(n)


%%capture timeit_python
%%timeit
# Regular Python
[(x + y) for x, y in zip(python_r_1, python_r_2)]


%%capture timeit_numpy
%%timeit
#Numpy
numpy_a_1 + numpy_a_2


compare_times('Additions', timeit_python, timeit_numpy)

20.1 ms +- 885 us per loop (mean +- std. dev. of 7 runs, 10 loops each)

276 us +- 25.5 us per loop (mean +- std. dev. of 7 runs, 1000 loops each)

Additions: ratio times (Python / NumPy):  72.82608695652175


A[:, 0] = 0.
print(A)

[[0.         0.4399789  0.96017188 0.88886798 0.28382067]
 [0.         0.99181478 0.07017858 0.4993961  0.1678844 ]
 [0.         0.50793759 0.77954852 0.05390075 0.984206  ]
 [0.         0.02959492 0.60720976 0.92916837 0.24923606]]


# BONUS: Safe element-wise inverse with masks
cond = (A != 0)
A[cond] = 1./A[cond]
print(A)

[[ 0.          2.27283627  1.04148019  1.12502646  3.52335153]
 [ 0.          1.00825277 14.24936289  2.00241854  5.95647958]
 [ 0.          1.96874581  1.28279379 18.55261602  1.01604746]
 [ 0.         33.78958815  1.64687736  1.07623121  4.01226058]]


print([s for s in dir(A) if not s.startswith('__')])

['T', 'all', 'any', 'argmax', 'argmin', 'argpartition', 'argsort', 'astype', 'base', 'byteswap', 'choose', 'clip', 'compress', 'conj', 'conjugate', 'copy', 'ctypes', 'cumprod', 'cumsum', 'data', 'diagonal', 'dot', 'dtype', 'dump', 'dumps', 'fill', 'flags', 'flat', 'flatten', 'getfield', 'imag', 'item', 'itemset', 'itemsize', 'max', 'mean', 'min', 'nbytes', 'ndim', 'newbyteorder', 'nonzero', 'partition', 'prod', 'ptp', 'put', 'ravel', 'real', 'repeat', 'reshape', 'resize', 'round', 'searchsorted', 'setfield', 'setflags', 'shape', 'size', 'sort', 'squeeze', 'std', 'strides', 'sum', 'swapaxes', 'take', 'tobytes', 'tofile', 'tolist', 'tostring', 'trace', 'transpose', 'var', 'view']


print(A)
print('Mean value', A.mean())
print('Mean line', A.mean(axis=0))
print('Mean column', A.mean(axis=1))

[[ 0.          2.27283627  1.04148019  1.12502646  3.52335153]
 [ 0.          1.00825277 14.24936289  2.00241854  5.95647958]
 [ 0.          1.96874581  1.28279379 18.55261602  1.01604746]
 [ 0.         33.78958815  1.64687736  1.07623121  4.01226058]]
Mean value 4.7262184313320255
Mean line [0.         9.75985575 4.55512856 5.68907306 3.62703479]
Mean column [1.59253889 4.64330276 4.56404062 8.10499146]


print(A, A.shape)
A_flat = A.flatten()
print(A_flat, A_flat.shape)

[[ 0.          2.27283627  1.04148019  1.12502646  3.52335153]
 [ 0.          1.00825277 14.24936289  2.00241854  5.95647958]
 [ 0.          1.96874581  1.28279379 18.55261602  1.01604746]
 [ 0.         33.78958815  1.64687736  1.07623121  4.01226058]] (4, 5)
[ 0.          2.27283627  1.04148019  1.12502646  3.52335153  0.
  1.00825277 14.24936289  2.00241854  5.95647958  0.          1.96874581
  1.28279379 18.55261602  1.01604746  0.         33.78958815  1.64687736
  1.07623121  4.01226058] (20,)


new_A = A_flat.reshape((4, 5))
print(new_A, new_A.shape)

[[ 0.          2.27283627  1.04148019  1.12502646  3.52335153]
 [ 0.          1.00825277 14.24936289  2.00241854  5.95647958]
 [ 0.          1.96874581  1.28279379 18.55261602  1.01604746]
 [ 0.         33.78958815  1.64687736  1.07623121  4.01226058]] (4, 5)


b = np.linspace(0, 10, 11)
c = b @ b

print(b)
print(c)

[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]
385.0


I = np.identity(11)
I[5:, :] = 0.
print(I, b)

I @ b

[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]

array([0., 1., 2., 3., 4., 0., 0., 0., 0., 0., 0.])


dtypes = np.dtype([('country', 'S20'), ('density', 'i4'), 
                  ('area', 'i4'), ('population', 'i4')])
x = np.array([('Netherlands', 393, 41526, 16928800),
              ('Belgium', 337, 30510, 11007020),
              ('United Kingdom', 256, 243610, 62262000),
              ('Germany', 233, 357021, 81799600)], 
             dtype=dtypes)
arr = np.array(x, dtype=dtypes)
arr.sort(order='density')
print(arr)

[(b'Germany', 233, 357021, 81799600)
 (b'United Kingdom', 256, 243610, 62262000)
 (b'Belgium', 337,  30510, 11007020)
 (b'Netherlands', 393,  41526, 16928800)]


meteo_data = np.genfromtxt('../TP/TP1_MeteoData/data/synop-2016.csv', names=True, delimiter=',',
                           dtype=('f8', 'S25', 'f8', 'f8', 'i4', 'f8', 'S20'))
meteo_data

array([(7761., b'2016-01-01T01:00:00+01:00', 2. , 283.75, 94, 0.2, b'AJACCIO'),
       (7761., b'2016-01-01T04:00:00+01:00', 2.2, 283.95, 91, 0.2, b'AJACCIO'),
       (7761., b'2016-01-01T07:00:00+01:00', 1.7, 284.05, 88, 0.2, b'AJACCIO'),
       ...,
       (7761., b'2016-12-31T16:00:00+01:00', 2.2, 287.75, 61, 0. , b'AJACCIO'),
       (7761., b'2016-12-31T19:00:00+01:00', 1.9, 284.05, 79, 0. , b'AJACCIO'),
       (7761., b'2016-12-31T22:00:00+01:00', 2.5, 283.05, 79, 0. , b'AJACCIO')],
      dtype=[('ID_OMM_station', '<f8'), ('Date', 'S25'), ('Average_wind_10_mn', '<f8'), ('Temperature', '<f8'), ('Humidity', '<i4'), ('Rainfall_3_last_hours', '<f8'), ('Station', 'S20')])


A = np.random.random([5,5])
print(A)
np.linalg.det(A)

[[0.97686707 0.19570122 0.36140422 0.06750466 0.18070765]
 [0.53024102 0.68681885 0.67799432 0.14903517 0.96101573]
 [0.41638762 0.08302575 0.37110877 0.04414894 0.49869079]
 [0.86026592 0.16876345 0.23845197 0.60002328 0.68178478]
 [0.9029329  0.68171618 0.35792988 0.09063473 0.78865979]]

0.046431202254328265


square_subA = A[1:3, 1:3]
print(square_subA)
np.linalg.inv(square_subA)

[[0.68681885 0.67799432]
 [0.08302575 0.37110877]]

array([[ 1.8686853 , -3.41398029],
       [-0.41806881,  3.4584154 ]])


from scipy.sparse import csr_matrix
print(csr_matrix([[1, 2, 0], [0, 0, 3], [4, 0, 5]]))

  (0, 0)	1
  (0, 1)	2
  (1, 2)	3
  (2, 0)	4
  (2, 2)	5


import pandas as pd

filename = "../TP/TP1_MeteoData/data/synop-2016.csv"

df = pd.read_csv(filename, sep = ',', encoding = "utf-8", header=0)

"""
max temperature
"""

print(df['Temperature'].max() - 273.15)

"""
mean temperature
"""
print(df['Temperature'].mean() - 273.15)

"""
total rainfall
"""
print(df['Rainfall 3 last hours'].sum())

"""
August max temperature

"""
print(df[df['Date'].str.startswith('2016-08')]['Temperature'].max()-273.15)

32.60000000000002
16.268433652530803
2334.7
30.5

	Matlab	Numpy
element wise	`.*`	`*`
dot product	`*`	`@`

Python training UGA 2017¶

Python scientific ecosystem¶

A short introduction to Numpy, Scipy and Pandas¶

Python scientific ecosystem¶

A short introduction on NumPy¶

Array creation¶

Generate sequences¶

Manipulating NumPy arrays¶

Access elements¶

Indexes and slices¶

Using a mask to select elements validating a condition:¶

Perform array manipulations¶

Apply arithmetic operations to whole arrays (element-wise):¶

Apply functions element-wise:¶

NumPy efficiency¶

Array creation:¶

Array operations:¶

Setting parts of arrays¶

Attributes and methods of `np.ndarray` (see the doc)¶

Example 1: Get the mean through different dimensions¶

Example 2: Manipulate the shape of arrays while keeping all elements¶

Remark: matrix/dot product¶

For Matlab users¶

To finish: `dtypes` and sub-packages¶

NumPy and SciPy sub-packages:¶

Matrix inversion (only on square matrices !)¶

SciPy or NumPy ?¶

Introduction to Pandas: Python Data Analysis Library¶

Some Solutions of Practical 1 with Pandas¶

Python training UGA 2017¶

Python scientific ecosystem¶

A short introduction to Numpy, Scipy and Pandas¶

Python scientific ecosystem¶

A short introduction on NumPy¶

Array creation¶

Generate sequences¶

Manipulating NumPy arrays¶

Access elements¶

Indexes and slices¶

Using a mask to select elements validating a condition:¶

Perform array manipulations¶

Apply arithmetic operations to whole arrays (element-wise):¶

Apply functions element-wise:¶

NumPy efficiency¶

Array creation:¶

Array operations:¶

Setting parts of arrays¶

Attributes and methods of np.ndarray (see the doc)¶

Example 1: Get the mean through different dimensions¶

Example 2: Manipulate the shape of arrays while keeping all elements¶

Remark: matrix/dot product¶

For Matlab users¶

To finish: dtypes and sub-packages¶

NumPy and SciPy sub-packages:¶

Matrix inversion (only on square matrices !)¶

SciPy or NumPy ?¶

Introduction to Pandas: Python Data Analysis Library¶

Some Solutions of Practical 1 with Pandas¶

Attributes and methods of `np.ndarray` (see the doc)¶

To finish: `dtypes` and sub-packages¶