Issue
I have a numpy array of np.uint64
holding only 0
or 1
values, and I have to map 0
to np.float64(1.0)
, and 1
to np.float64(-1.0)
.
Since the interpreter doesn't knows that it has only to convert 0
and 1
, it uses a costly general algorithm, so I thought to use an array with the result, and use the uint64
as index for the array, avoiding any conversion, but it is even slower.
import numpy as np
import timeit
random_bit = np.random.randint(0, 2, size=(10000), dtype=np.uint64)
def np_cast(random_bit):
vectorized_result = 1.0 - 2.0 * np.float64(random_bit)
return vectorized_result
def product(random_bit):
mapped_result = 1.0 - 2.0 * random_bit
return mapped_result
np_one_minus_one = np.array([1.0, -1.0]).astype(np.float64)
def _array(random_bit):
mapped_result = np_one_minus_one[random_bit]
return mapped_result
one = np.float64(1)
minus_two = np.float64(-2)
def astype(random_bit):
mapped_result = one + minus_two * random_bit.astype(np.float64)
return mapped_result
function_list = [np_cast, product, _array, astype]
print("start benchmark")
for function in function_list:
_time = timeit.timeit(lambda: function(random_bit), number=100000)
print(f"{function.__name__}: {_time:.3f} seconds")
I get these times:
np_cast: 178.604 seconds
product: 172.939 seconds
_array: 239.305 seconds
astype: 186.031 seconds
Solution
You can do this ~4x faster by using numba, for the general Nd case this could be:
import numba as nb
@nb.vectorize
def numba_if(random_bit):
return -1.0 if random_bit else 1.0
@nb.vectorize
def numba_product(random_bit):
return 1.0 - 2.0 * random_bit
Or in your specific 1d case you can use explicit loops to make it faster:
import numpy as np
@nb.njit
def numba_if_loop(random_bit):
assert random_bit.ndim == 1
result = np.empty_like(random_bit, dtype=np.float64)
for i in range(random_bit.size):
result[i] = -1.0 if random_bit[i] else 1.0
return result
@nb.njit
def numba_product_loop(random_bit):
assert random_bit.ndim == 1
result = np.empty_like(random_bit, dtype=np.float64)
for i in range(random_bit.size):
result[i] = 1.0 - 2.0 * random_bit[i]
return result
Timings (mason
is lambda x:(1-2*x.astype(np.int8)).astype(float)
from the comments):
%timeit np_cast(random_bit)
%timeit product(random_bit)
%timeit _array(random_bit)
%timeit astype(random_bit)
%timeit mason(random_bit)
assert np.array_equal(np_cast(random_bit), numba_if(random_bit))
assert np.array_equal(np_cast(random_bit), numba_product(random_bit))
assert np.array_equal(np_cast(random_bit), numba_if_loop(random_bit))
assert np.array_equal(np_cast(random_bit), numba_product_loop(random_bit))
%timeit numba_if(random_bit)
%timeit numba_product(random_bit)
%timeit numba_if_loop(random_bit)
%timeit numba_product_loop(random_bit)
Output:
6.58 µs ± 218 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
7.58 µs ± 251 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
11 µs ± 9.34 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
7.32 µs ± 674 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
6.86 µs ± 153 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
1.89 µs ± 25.8 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
2.07 µs ± 13.1 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
1.6 µs ± 14.7 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
1.78 µs ± 5.31 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
Answered By - Nin17
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.