2.2. Advanced Numpy¶

Pauli Virtanen

chararray
maskedarray
matrix
chararray
masked_array
recarray
matrix
>>> import numpy as np

typedef struct PyArrayObject {
        PyObject_HEAD

        /* Block of memory */
        char *data;

        /* Data type descriptor */
        PyArray_Descr *descr;

        /* Indexing scheme */
        int nd;
        npy_intp *dimensions;
        npy_intp *strides;

        /* Other stuff */
        PyObject *base;
        int flags;
        PyObject *weakreflist;
} PyArrayObject;

>>> x = np.array([1, 2, 3], dtype=np.int32)
>>> x.data      
<... at ...>
>>> str(x.data)  
'\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00'

>>> x.__array_interface__['data'][0] 
64803824

__array_interface__
>>> x.__array_interface__  
{'data': (35828928, False),
 'descr': [('', '<i4')],
 'shape': (4,),
 'strides': None,
 'typestr': '<i4',
 'version': 3}

ndarrays
>>> x = np.array([1, 2, 3, 4])
>>> y = x[:-1]
>>> x[0] = 9
>>> y
array([9, 2, 3])

ndarray
>>> x = b'1234'      # The 'b' is for "bytes", necessary in Python 3

>>> y = np.frombuffer(x, dtype=np.int8)
>>> y.data      
<... at ...>
>>> y.base is x
True

>>> y.flags
  C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : False
  WRITEABLE : False
  ALIGNED : True
  UPDATEIFCOPY : False

owndata
writeable
dtype
>
<
|
>>> np.dtype(int).type      
<type 'numpy.int64'>
>>> np.dtype(int).itemsize
8
>>> np.dtype(int).byteorder
'='

.wav
.wav
"RIFF"
"WAVE"
"fmt "
"data"
data_size
.wav
>>> wav_header_dtype = np.dtype([
...     ("chunk_id", (bytes, 4)), # flexible-sized scalar type, item size 4
...     ("chunk_size", "<u4"),    # little-endian unsigned 32-bit integer
...     ("format", "S4"),         # 4-byte string
...     ("fmt_id", "S4"),
...     ("fmt_size", "<u4"),
...     ("audio_fmt", "<u2"),     #
...     ("num_channels", "<u2"),  # .. more of the same ...
...     ("sample_rate", "<u4"),   #
...     ("byte_rate", "<u4"),
...     ("block_align", "<u2"),
...     ("bits_per_sample", "<u2"),
...     ("data_id", ("S1", (2, 2))), # sub-array, just for fun!
...     ("data_size", "u4"),
...     #
...     # the sound data itself cannot be represented here:
...     # it does not have a fixed size
...    ])

>>> wav_header_dtype['format']
dtype('S4')
>>> wav_header_dtype.fields     
dict_proxy({'block_align': (dtype('uint16'), 32), 'format': (dtype('S4'), 8), 'data_id': (dtype(('S1', (2, 2))), 36), 'fmt_id': (dtype('S4'), 12), 'byte_rate': (dtype('uint32'), 28), 'chunk_id': (dtype('S4'), 0), 'num_channels': (dtype('uint16'), 22), 'sample_rate': (dtype('uint32'), 24), 'bits_per_sample': (dtype('uint16'), 34), 'chunk_size': (dtype('uint32'), 4), 'fmt_size': (dtype('uint32'), 16), 'data_size': (dtype('uint32'), 40), 'audio_fmt': (dtype('uint16'), 20)})
>>> wav_header_dtype.fields['format']
(dtype('S4'), 8)

format
>>> wav_header_dtype = np.dtype(dict(
...   names=['format', 'sample_rate', 'data_id'],
...   offsets=[offset_1, offset_2, offset_3], # counted from start of structure in bytes
...   formats=list of dtypes for each of the fields,
... ))  

data_id
>>> f = open('data/test.wav', 'r')
>>> wav_header = np.fromfile(f, dtype=wav_header_dtype, count=1)
>>> f.close()  
>>> print(wav_header)   
[ ('RIFF', 17402L, 'WAVE', 'fmt ', 16L, 1, 1, 16000L, 32000L, 2, 16, [['d', 'a'], ['t', 'a']], 17366L)]
>>> wav_header['sample_rate']
array([16000], dtype=uint32)

>>> wav_header['data_id']  
array([[['d', 'a'],
        ['t', 'a']]],
      dtype='|S1')
>>> wav_header.shape
(1,)
>>> wav_header['data_id'].shape
(1, 2, 2)

wavfile
audiolab
.astype(dtype)
.view(dtype)
>>> x = np.array([1, 2, 3, 4], dtype=np.float)
>>> x
array([ 1.,  2.,  3.,  4.])
>>> y = x.astype(np.int8)
>>> y
array([1, 2, 3, 4], dtype=int8)
>>> y + 1
array([2, 3, 4, 5], dtype=int8)
>>> y + 256
array([257, 258, 259, 260], dtype=int16)
>>> y + 256.0
array([ 257.,  258.,  259.,  260.])
>>> y + np.array([256], dtype=np.int32)
array([257, 258, 259, 260], dtype=int32)

>>> y[:] = y + 1.5
>>> y
array([2, 3, 4, 5], dtype=int8)

0x01
0x02
0x03
0x04
>>> x = np.array([1, 2, 3, 4], dtype=np.uint8)
>>> x.dtype = "<i2"
>>> x
array([ 513, 1027], dtype=int16)
>>> 0x0201, 0x0403
(513, 1027)

0x01
0x02
0x03
0x04
>>> y = x.view("<i4")
>>> y
array([67305985], dtype=int32)
>>> 0x04030201
67305985

0x01
0x02
0x03
0x04
.view()
>>> x[1] = 5
>>> y
array([328193], dtype=int32)
>>> y.base is x
True

>>> x = np.zeros((10, 10, 4), dtype=np.int8)
>>> x[:, :, 0] = 1
>>> x[:, :, 1] = 2
>>> x[:, :, 2] = 3
>>> x[:, :, 3] = 4

>>> y = ...                     

>>> assert (y['r'] == 1).all()  
>>> assert (y['g'] == 2).all()  
>>> assert (y['b'] == 3).all()  
>>> assert (y['a'] == 4).all()  

>>> y = x.view([('r', 'i1'),
...             ('g', 'i1'),
...             ('b', 'i1'),
...             ('a', 'i1')]
...              )[:, :, 0]

>>> y = np.array([[1, 3], [2, 4]], dtype=np.uint8).transpose()
>>> x = y.copy()
>>> x
array([[1, 2],
       [3, 4]], dtype=uint8)
>>> y
array([[1, 2],
       [3, 4]], dtype=uint8)
>>> x.view(np.int16)
array([[ 513],
       [1027]], dtype=int16)
>>> 0x0201, 0x0403
(513, 1027)
>>> y.view(np.int16)
array([[ 769, 1026]], dtype=int16)

x[0,1]
>>> 0x0301, 0x0402
(769, 1026)

>>> x = np.array([[1, 2, 3],
...               [4, 5, 6],
...               [7, 8, 9]], dtype=np.int8)
>>> str(x.data)  
'\x01\x02\x03\x04\x05\x06\x07\x08\t'

At which byte in ``x.data`` does the item ``x[1, 2]`` begin?

  >>> x.strides
  (3, 1)
  >>> byte_offset = 3*1 + 1*2   # to find x[1, 2]
  >>> x.flat[byte_offset]
  6
  >>> x[1, 2]
  6

- simple, **flexible**

>>> x = np.array([[1, 2, 3],
...               [4, 5, 6]], dtype=np.int16, order='C')
>>> x.strides
(6, 2)
>>> str(x.data)  
'\x01\x00\x02\x00\x03\x00\x04\x00\x05\x00\x06\x00'

>>> y = np.array(x, order='F')
>>> y.strides
(2, 4)
>>> str(y.data)  
'\x01\x00\x04\x00\x02\x00\x05\x00\x03\x00\x06\x00'

.view()
>>> y = np.array([[1, 3], [2, 4]], dtype=np.uint8).transpose()
>>> x = y.copy()

>>> x.strides
(2, 1)
>>> y.strides
(1, 2)

>>> str(x.data)  
'\x01\x02\x03\x04'
>>> str(y.data)  
'\x01\x03\x02\x04'

.copy()
shape
strides
data
>>> x = np.array([1, 2, 3, 4, 5, 6], dtype=np.int32)
>>> y = x[::-1]
>>> y
array([6, 5, 4, 3, 2, 1], dtype=int32)
>>> y.strides
(-4,)

>>> y = x[2:]
>>> y.__array_interface__['data'][0] - x.__array_interface__['data'][0]
8

>>> x = np.zeros((10, 10, 10), dtype=np.float)
>>> x.strides
(800, 80, 8)
>>> x[::2,::3,::4].strides
(1600, 240, 32)

>>> x = np.zeros((10, 10, 10), dtype=np.float)
>>> x.strides
(800, 80, 8)
>>> x.T.strides
(8, 80, 800)

>>> a = np.arange(6, dtype=np.int8).reshape(3, 2)
>>> b = a.T
>>> b.strides
(1, 2)

>>> str(a.data)  
'\x00\x01\x02\x03\x04\x05'
>>> b
array([[0, 2, 4],
       [1, 3, 5]], dtype=int8)
>>> c = b.reshape(3*2)
>>> c
array([0, 2, 4, 1, 3, 5], dtype=int8)

c
a
reshape
>>> from numpy.lib.stride_tricks import as_strided
>>> help(as_strided)    
as_strided(x, shape=None, strides=None)
   Make an ndarray from the given array with the given shape and strides

as_strided
>>> x = np.array([1, 2, 3, 4], dtype=np.int16)
>>> as_strided(x, strides=(2*2, ), shape=(2, ))
array([1, 3], dtype=int16)
>>> x[::2]
array([1, 3], dtype=int16)

array([1, 2, 3, 4], dtype=np.int8)

-> array([[1, 2, 3, 4],
          [1, 2, 3, 4],
          [1, 2, 3, 4]], dtype=np.int8)

as_strided
Hint: byte_offset = stride[0]*index[0] + stride[1]*index[1] + ...

>>> x = np.array([1, 2, 3, 4], dtype=np.int8)
>>> y = as_strided(x, strides=(0, 1), shape=(3, 4))
>>> y
array([[1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4]], dtype=int8)
>>> y.base.base is x
True

[1, 2, 3, 4]
[5, 6, 7]
>>> x = np.array([1, 2, 3, 4], dtype=np.int16)
>>> x2 = as_strided(x, strides=(0, 1*2), shape=(3, 4))
>>> x2
array([[1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4]], dtype=int16)

>>> y = np.array([5, 6, 7], dtype=np.int16)
>>> y2 = as_strided(y, strides=(1*2, 0), shape=(3, 4))
>>> y2
array([[5, 5, 5, 5],
       [6, 6, 6, 6],
       [7, 7, 7, 7]], dtype=int16)

>>> x2 * y2
array([[ 5, 10, 15, 20],
       [ 6, 12, 18, 24],
       [ 7, 14, 21, 28]], dtype=int16)

>>> x = np.array([1, 2, 3, 4], dtype=np.int16)
>>> y = np.array([5, 6, 7], dtype=np.int16)
>>> x[np.newaxis,:] * y[:,np.newaxis]
array([[ 5, 10, 15, 20],
       [ 6, 12, 18, 24],
       [ 7, 14, 21, 28]], dtype=int16)

>>> x = np.array([[1, 2, 3],
...               [4, 5, 6],
...               [7, 8, 9]], dtype=np.int32)

>>> x_diag = as_strided(x, shape=(3,), strides=(???,)) 

[2, 6]
>>> x_diag = as_strided(x, shape=(3, ), strides=((3+1)*x.itemsize, ))
>>> x_diag
array([1, 5, 9], dtype=int32)

>>> as_strided(x[0, 1:], shape=(2, ), strides=((3+1)*x.itemsize, ))
array([2, 6], dtype=int32)

>>> as_strided(x[1:, 0], shape=(2, ), strides=((3+1)*x.itemsize, ))
array([4, 8], dtype=int32)

>>> y = np.diag(x, k=1)
>>> y
array([2, 6], dtype=int32)

>>> y.flags.owndata  
False

>>> x = np.arange(5*5*5*5).reshape(5, 5, 5, 5)
>>> s = 0
>>> for i in range(5):
...    for j in range(5):
...       s += x[j, i, j, i]

sum()
>>> y = as_strided(x, shape=(5, 5), strides=(TODO, TODO))   
>>> s2 = ...   
>>> assert s == s2   

>>> y = as_strided(x, shape=(5, 5), strides=((5*5*5 + 5)*x.itemsize,
...                                          (5*5 + 1)*x.itemsize))
>>> s2 = y.sum()

In [1]: x = np.zeros((20000,))

In [2]: y = np.zeros((20000*67,))[::67]

In [3]: x.shape, y.shape
((20000,), (20000,))

In [4]: %timeit x.sum()
100000 loops, best of 3: 0.180 ms per loop

In [5]: %timeit y.sum()
100000 loops, best of 3: 2.34 ms per loop

In [6]: x.strides, y.strides
((8,), (536,))

>>> a -= b    

>>> a -= b.copy()    

>>> x = np.array([[1, 2], [3, 4]])
>>> x -= x.transpose()
>>> x   
array([[ 0, -1],
       [ 4,  0]])

>>> y = np.array([[1, 2], [3, 4]])
>>> y -= y.T.copy()
>>> y
array([[ 0, -1],
       [ 1,  0]])

x
x.transpose()
x -= x.transpose()
x
x.transpose()
.base
.data
.astype()
.view()
as_strided
diag
np.add, np.subtract, scipy.special.*, ...

void ufunc_loop(void **args, int *dimensions, int *steps, void *data)
{
    /*
     * int8 output = elementwise_function(int8 input_1, int8 input_2)
     *
     * This function must compute the ufunc for many values at once,
     * in the way shown below.
     */
    char *input_1 = (char*)args[0];
    char *input_2 = (char*)args[1];
    char *output = (char*)args[2];
    int i;

    for (i = 0; i < dimensions[0]; ++i) {
        *output = elementwise_function(*input_1, *input_2);
        input_1 += steps[0];
        input_2 += steps[1];
        output += steps[2];
    }
}

char types[3]

types[0] = NPY_BYTE   /* type of first input arg */
types[1] = NPY_BYTE   /* type of second input arg */
types[2] = NPY_BYTE   /* type of third input arg */

PyObject *python_ufunc = PyUFunc_FromFuncAndData(
    ufunc_loop,
    NULL,
    types,
    1, /* ntypes */
    2, /* num_inputs */
    1, /* num_outputs */
    identity_element,
    name,
    docstring,
    unused)

ufunc_loop
PyUfunc_f_f
float elementwise_func(float input_1)
PyUfunc_ff_f
float elementwise_func(float input_1, float input_2)
PyUfunc_d_d
double elementwise_func(double input_1)
PyUfunc_dd_d
double elementwise_func(double input_1, double input_2)
PyUfunc_D_D
elementwise_func(npy_cdouble *input, npy_cdouble* output)
PyUfunc_DD_D
elementwise_func(npy_cdouble *in1, npy_cdouble *in2, npy_cdouble* out)
elementwise_func
mandel(z0, c)
z = z0
for k in range(iterations):
    z = z*z + c

z.real**2 + z.imag**2 > 1000
PyUFunc_*
PyUfunc_f_f
float elementwise_func(float input_1)
PyUfunc_ff_f
float elementwise_func(float input_1, float input_2)
PyUfunc_d_d
double elementwise_func(double input_1)
PyUfunc_dd_d
double elementwise_func(double input_1, double input_2)
PyUfunc_D_D
elementwise_func(complex_double *input, complex_double* output)
PyUfunc_DD_D
elementwise_func(complex_double *in1, complex_double *in2, complex_double* out)
NPY_BOOL, NPY_BYTE, NPY_UBYTE, NPY_SHORT, NPY_USHORT, NPY_INT, NPY_UINT,
NPY_LONG, NPY_ULONG, NPY_LONGLONG, NPY_ULONGLONG, NPY_FLOAT, NPY_DOUBLE,
NPY_LONGDOUBLE, NPY_CFLOAT, NPY_CDOUBLE, NPY_CLONGDOUBLE, NPY_DATETIME,
NPY_TIMEDELTA, NPY_OBJECT, NPY_STRING, NPY_UNICODE, NPY_VOID

# The elementwise function
# ------------------------

cdef void mandel_single_point(double complex *z_in, 
                              double complex *c_in,
                              double complex *z_out) nogil:
    #
    # The Mandelbrot iteration
    #

    #
    # Some points of note:
    #
    # - It's *NOT* allowed to call any Python functions here.
    #
    #   The Ufunc loop runs with the Python Global Interpreter Lock released.
    #   Hence, the ``nogil``.
    #
    # - And so all local variables must be declared with ``cdef``
    #
    # - Note also that this function receives *pointers* to the data;
    #   the "traditional" solution to passing complex variables around
    #

    cdef double complex z = z_in[0]
    cdef double complex c = c_in[0]
    cdef int k  # the integer we use in the for loop

    # Straightforward iteration

    for k in range(100):
        z = z*z + c
        if z.real**2 + z.imag**2 > 1000:
            break

    # Return the answer for this point
    z_out[0] = z

# Boilerplate Cython definitions
#
# You don't really need to read this part, it just pulls in
# stuff from the Numpy C headers.
# ----------------------------------------------------------

cdef extern from "numpy/arrayobject.h":
    void import_array()
    ctypedef int npy_intp
    cdef enum NPY_TYPES:
        NPY_CDOUBLE

cdef extern from "numpy/ufuncobject.h":
    void import_ufunc()
    ctypedef void (*PyUFuncGenericFunction)(char**, npy_intp*, npy_intp*, void*)
    object PyUFunc_FromFuncAndData(PyUFuncGenericFunction* func, void** data,
        char* types, int ntypes, int nin, int nout,
        int identity, char* name, char* doc, int c)

    void PyUFunc_DD_D(char**, npy_intp*, npy_intp*, void*)

# Required module initialization
# ------------------------------

import_array()
import_ufunc()

# The actual ufunc declaration
# ----------------------------

cdef PyUFuncGenericFunction loop_func[1]
cdef char input_output_types[3]
cdef void *elementwise_funcs[1]

loop_func[0] = PyUFunc_DD_D

input_output_types[0] = NPY_CDOUBLE
input_output_types[1] = NPY_CDOUBLE
input_output_types[2] = NPY_CDOUBLE

elementwise_funcs[0] = <void*>mandel_single_point

mandel = PyUFunc_FromFuncAndData(
    loop_func,
    elementwise_funcs,
    input_output_types,
    1, # number of supported input types
    2, # number of input args
    1, # number of output args
    0, # `identity` element, never mind this
    "mandel", # function name
    "mandel(z, c) -> computes iterated z*z + c", # docstring
    0 # unused
    )

"""
Plot Mandelbrot
================

Plot the Mandelbrot ensemble.

"""

import numpy as np
import mandel
x = np.linspace(-1.7, 0.6, 1000)
y = np.linspace(-1.4, 1.4, 1000)
c = x[None,:] + 1j*y[:,None]
z = mandel.mandel(c, c)

import matplotlib.pyplot as plt
plt.imshow(abs(z)**2 < 1000, extent=[-1.7, 0.6, -1.4, 1.4])
plt.gray()
plt.show()

cdef void mandel_single_point(double complex *z_in,
                              double complex *c_in,
                              double complex *z_out) nogil:
   ...

cdef void mandel_single_point_singleprec(float complex *z_in,
                                         float complex *c_in,
                                         float complex *z_out) nogil:
   ...

cdef PyUFuncGenericFunction loop_funcs[2]
cdef char input_output_types[3*2]
cdef void *elementwise_funcs[1*2]

loop_funcs[0] = PyUFunc_DD_D
input_output_types[0] = NPY_CDOUBLE
input_output_types[1] = NPY_CDOUBLE
input_output_types[2] = NPY_CDOUBLE
elementwise_funcs[0] = <void*>mandel_single_point

loop_funcs[1] = PyUFunc_FF_F
input_output_types[3] = NPY_CFLOAT
input_output_types[4] = NPY_CFLOAT
input_output_types[5] = NPY_CFLOAT
elementwise_funcs[1] = <void*>mandel_single_point_singleprec

mandel = PyUFunc_FromFuncAndData(
    loop_func,
    elementwise_funcs,
    input_output_types,
    2, # number of supported input types   <----------------
    2, # number of input args
    1, # number of output args
    0, # `identity` element, never mind this
    "mandel", # function name
    "mandel(z, c) -> computes iterated z*z + c", # docstring
    0 # unused
    )

output = elementwise_function(input)
output
input
output
input
input shape = (n, n)
output shape = ()      i.e.  scalar

(n, n) -> ()

input_1 shape = (m, n)
input_2 shape = (n, p)
output shape  = (m, p)

(m, n), (n, p) -> (m, p)

PyUFunc_FromFuncAndDataAndSignature
>>> import numpy.core.umath_tests as ut
>>> ut.matrix_multiply.signature
'(m,n),(n,p)->(m,p)'

>>> x = np.ones((10, 2, 4))
>>> y = np.ones((10, 4, 5))
>>> ut.matrix_multiply(x, y).shape
(10, 2, 5)

(m,n),(n,p) -> (m,p)
void gufunc_loop(void **args, int *dimensions, int *steps, void *data)
{
    char *input_1 = (char*)args[0];  /* these are as previously */
    char *input_2 = (char*)args[1];
    char *output = (char*)args[2];

    int input_1_stride_m = steps[3];  /* strides for the core dimensions */
    int input_1_stride_n = steps[4];  /* are added after the non-core */
    int input_2_strides_n = steps[5]; /* steps */
    int input_2_strides_p = steps[6];
    int output_strides_n = steps[7];
    int output_strides_p = steps[8];

    int m = dimension[1]; /* core dimensions are added after */
    int n = dimension[2]; /* the main dimension; order as in */
    int p = dimension[3]; /* signature */

    int i;

    for (i = 0; i < dimensions[0]; ++i) {
        matmul_for_strided_matrices(input_1, input_2, output,
                                    strides for each array...);

        input_1 += steps[0];
        input_2 += steps[1];
        output += steps[2];
    }
}

PyBufferProcs tp_as_buffer
>>> from PIL import Image
>>> data = np.zeros((200, 200, 4), dtype=np.int8)
>>> data[:, :] = [255, 0, 0, 255] # Red
>>> # In PIL, RGBA images consist of 32-bit integers whose bytes are [RR,GG,BB,AA]
>>> data = data.view(np.int32).squeeze()
>>> img = Image.frombuffer("RGBA", (200, 200), data, "raw", "RGBA", 0, 1)
>>> img.save('test.png')

data
img
"""
From buffer
============

Show how to exchange data between numpy and a library that only knows
the buffer interface.
"""

import numpy as np
import Image

# Let's make a sample image, RGBA format

x = np.zeros((200, 200, 4), dtype=np.int8)

x[:,:,0] = 254 # red
x[:,:,3] = 255 # opaque

data = x.view(np.int32) # Check that you understand why this is OK!

img = Image.frombuffer("RGBA", (200, 200), data)
img.save('test.png')

#
# Modify the original data, and save again.
#
# It turns out that PIL, which knows next to nothing about Numpy,
# happily shares the same data.
#

x[:,:,1] = 254
img.save('test2.png')

>>> x = np.array([[1, 2], [3, 4]])
>>> x.__array_interface__   
{'data': (171694552, False),      # memory address of data, is readonly?
 'descr': [('', '<i4')],          # data type descriptor
 'typestr': '<i4',                # same, in another form
 'strides': None,                 # strides; or None if in C-order
 'shape': (2, 2),
 'version': 3,
}

>>> from PIL import Image
>>> img = Image.open('data/test.png')
>>> img.__array_interface__     
{'data': ...,
 'shape': (200, 200, 4),
 'typestr': '|u1'}
>>> x = np.asarray(img)
>>> x.shape
(200, 200, 4)

chararray
maskedarray
matrix
chararray
>>> x = np.array(['a', '  bbb', '  ccc']).view(np.chararray)
>>> x.lstrip(' ')       
chararray(['a', 'bbb', 'ccc'],
      dtype='...')
>>> x.upper()       
chararray(['A', '  BBB', '  CCC'],
      dtype='...')

.view()
masked_array
>>> x = np.array([1, 2, 3, -99, 5])

>>> mx = np.ma.masked_array(x, mask=[0, 0, 0, 1, 0])
>>> mx
masked_array(data = [1 2 3 -- 5],
             mask = [False False False  True False],
       fill_value = 999999)

>>> mx.mean()
2.75
>>> np.mean(mx)
2.75

np.dot
masked_array
>>> mx[1] = 9
>>> x
array([  1,   9,   3, -99,   5])

>>> mx[1] = np.ma.masked
>>> mx
masked_array(data = [1 -- 3 -- 5],
             mask = [False  True False  True False],
       fill_value = 999999)

>>> mx[1] = 9
>>> mx
masked_array(data = [1 9 3 -- 5],
             mask = [False False False  True False],
       fill_value = 999999)

>>> mx.mask
array([False, False, False,  True, False], dtype=bool)

>>> x2 = mx.filled(-1)
>>> x2
array([ 1,  9,  3, -1,  5])

>>> mx.mask = np.ma.nomask
>>> mx
masked_array(data = [1 9 3 -99 5],
             mask = [False False False False False],
       fill_value = 999999)

>>> np.ma.log(np.array([1, 2, -1, -2, 3, -5]))
masked_array(data = [0.0 0.6931471805599453 -- -- 1.0986122886681098 --],
             mask = [False False  True  True False  True],
       fill_value = 1e+20)

>>> data = np.loadtxt('data/populations.txt')
>>> populations = np.ma.masked_array(data[:,1:])
>>> year = data[:, 0]

>>> bad_years = (((year >= 1903) & (year <= 1910))
...            | ((year >= 1917) & (year <= 1918)))
>>> # '&' means 'and' and '|' means 'or'
>>> populations[bad_years, 0] = np.ma.masked
>>> populations[bad_years, 1] = np.ma.masked

>>> populations.mean(axis=0)
masked_array(data = [40472.72727272727 18627.272727272728 42400.0],
             mask = [False False False],
       fill_value = 1e+20)

>>> populations.std(axis=0)
masked_array(data = [21087.656489006717 15625.799814240254 3322.5062255844787],
             mask = [False False False],
       fill_value = 1e+20)

>>> plt.plot(year, populations, 'o-')   
[<matplotlib.lines.Line2D object at ...>, ...]

recarray
>>> arr = np.array([('a', 1), ('b', 2)], dtype=[('x', 'S1'), ('y', int)])
>>> arr2 = arr.view(np.recarray)
>>> arr2.x       
chararray(['a', 'b'],
      dtype='|S1')
>>> arr2.y
array([1, 2])

matrix
*
>>> np.matrix([[1, 0], [0, 1]]) * np.matrix([[1, 2], [3, 4]])
matrix([[1, 2],
        [3, 4]])

Title: numpy.random.permutations fails for non-integer arguments

I'm trying to generate random permutations, using numpy.random.permutations

When calling numpy.random.permutation with non-integer arguments
it fails with a cryptic error message::

    >>> np.random.permutation(12)
    array([ 6, 11,  4, 10,  2,  8,  1,  7,  9,  3,  0,  5])
    >>> np.random.permutation(12.)
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
      File "mtrand.pyx", line 3311, in mtrand.RandomState.permutation
      File "mtrand.pyx", line 3254, in mtrand.RandomState.shuffle
    TypeError: len() of unsized object

This also happens with long arguments, and so
np.random.permutation(X.shape[0]) where X is an array fails on 64
bit windows (where shape is a tuple of longs).

It would be great if it could cast to integer or at least raise a
proper error for non-integer types.

I'm using Numpy 1.4.1, built from the official tarball, on Windows
64 with Visual studio 2008, on Python.org 64-bit Python.

>>> print(np.__version__) 
1...

>>> print(np.__file__) 
/...

scipy-dev
scipy-dev
To: scipy-dev@scipy.org

Hi,

I'd like to edit Numpy/Scipy docstrings. My account is XXXXX

Cheers,
N. N.

# Clone numpy repository
git clone --origin svn http://projects.scipy.org/git/numpy.git numpy
cd numpy

# Create a feature branch
git checkout -b name-of-my-feature-branch  svn/trunk

<edit stuff>

git commit -a

git remote add github git@github:USERNAME/REPOSITORYNAME.git
git push github name-of-my-feature-branch

numpy-discussion
scipy-dev
.wav
chararray
maskedarray
matrix
chararray
masked_array
recarray
matrix

`PyUfunc_f_f`	`float elementwise_func(float input_1)`
`PyUfunc_ff_f`	`float elementwise_func(float input_1, float input_2)`
`PyUfunc_d_d`	`double elementwise_func(double input_1)`
`PyUfunc_dd_d`	`double elementwise_func(double input_1, double input_2)`
`PyUfunc_D_D`	`elementwise_func(npy_cdouble input, npy_cdouble output)`
`PyUfunc_DD_D`	`elementwise_func(npy_cdouble in1, npy_cdouble in2, npy_cdouble* out)`

`PyUfunc_f_f`	`float elementwise_func(float input_1)`
`PyUfunc_ff_f`	`float elementwise_func(float input_1, float input_2)`
`PyUfunc_d_d`	`double elementwise_func(double input_1)`
`PyUfunc_dd_d`	`double elementwise_func(double input_1, double input_2)`
`PyUfunc_D_D`	`elementwise_func(complex_double input, complex_double output)`
`PyUfunc_DD_D`	`elementwise_func(complex_double in1, complex_double in2, complex_double* out)`

type	scalar type of the data, one of: int8, int16, float64, et al. (fixed size) str, unicode, void (flexible size)
itemsize	size of the data block
byteorder	byte order: big-endian `>` / little-endian `<` / not applicable `\|`
fields	sub-dtypes, if it’s a structured data type
shape	shape of the array, if it’s a sub-array

chunk_id	`"RIFF"`
chunk_size	4-byte unsigned little-endian integer
format	`"WAVE"`
fmt_id	`"fmt "`
fmt_size	4-byte unsigned little-endian integer
audio_fmt	2-byte unsigned little-endian integer
num_channels	2-byte unsigned little-endian integer
sample_rate	4-byte unsigned little-endian integer
byte_rate	4-byte unsigned little-endian integer
block_align	2-byte unsigned little-endian integer
bits_per_sample	2-byte unsigned little-endian integer
data_id	`"data"`
data_size	4-byte unsigned little-endian integer

Navigation

2.2. Advanced Numpy¶

2.2.1.3.1. The descriptor¶

2.2.1.3.2. Example: reading .wav files¶

2.2.1.3.3. Casting and re-interpretation/views¶

2.2.1.3.3.1. Casting¶

2.2.1.3.3.2. Re-interpretation / viewing¶

2.2.1.4.1. Main point¶

2.2.1.4.1.1. C and Fortran order¶

2.2.1.4.1.2. Slicing with integers¶

2.2.1.4.2. Example: fake dimensions with strides¶

2.2.1.4.3. Broadcasting¶

2.2.1.4.4. More tricks: diagonals¶

2.2.1.4.5. CPU cache effects¶

2.2.1.4.6. Example: inplace operations (caveat emptor)¶

2.2.2.1.1. Parts of an Ufunc¶

2.2.2.1.2. Making it easier¶

2.2.4.2.1. The mask¶

2.2.4.2.2. Domain-aware functions¶

2.2.6.2.1. Good bug report¶

Navigation

2.2.1.3.2. Example: reading `.wav` files¶