Issue
I have a PyTorch tensor that contains the labels of some samples.
I want to split each label into n_groups
groups, introducing new virtual labels.
For example, for the labels:
labels = torch.as_tensor([0, 0, 0, 1, 1, 1, 2, 2, 2], dtype=torch.long)
One possible solution to subdivide each label into n_groups=2
is the following:
subdivided_labels = [0, 3, 0, 1, 4, 1, 2, 5, 2]
The constraints are the following:
- There is no assumption about the order of the initial labels
- The labels should be well distributed among the groups
- The label ranking should be consistent among different groups, i.e., the first label
0
in the first group should be the first label also in any other group. - The following should always be true
torch.equal(labels, subdivided_labels % num_classes)
- It is possible that the number of groups is greater than the number of samples for a given label
The following tests should pass for the desired algorithm:
@pytest.mark.parametrize(
"labels",
(
torch.randint(100, size=(50,)),
torch.arange(100),
torch.ones(100),
torch.randint(100, size=(50,)).repeat(4),
torch.arange(100).repeat(4),
torch.ones(100).repeat(4),
torch.randint(100, size=(50,)).repeat_interleave(4),
torch.arange(100).repeat_interleave(4),
torch.ones(100).repeat_interleave(4),
),
)
@pytest.mark.parametrize("n_groups", (1, 2, 3, 4, 5, 50, 150))
def test_subdivide_labels(labels, n_groups):
subdivided_labels = subdivide_labels(labels, n_groups=n_groups, num_classes=100)
assert torch.equal(labels, subdivided_labels % 100)
@pytest.mark.parametrize(
"labels, n_groups, n_classes, expected_result",
(
(
torch.tensor([0, 0, 1, 1, 2, 2]),
2,
3,
torch.tensor([0, 3, 1, 4, 2, 5]),
),
(
torch.tensor([0, 0, 1, 1, 2, 2]),
2,
10,
torch.tensor([0, 10, 1, 11, 2, 12]),
),
(
torch.tensor([0, 0, 1, 1, 2, 2]),
1,
10,
torch.tensor([0, 0, 1, 1, 2, 2]),
),
(
torch.tensor([0, 0, 2, 2, 1, 1]),
2,
3,
torch.tensor([0, 3, 2, 5, 1, 4]),
),
(
torch.tensor([0, 0, 2, 2, 1, 1]),
30,
3,
torch.tensor([0, 3, 2, 5, 1, 4]),
),
),
)
def test_subdivide_labels_with_gt(labels, n_groups, n_classes, expected_result):
subdivided_labels = subdivide_labels(labels, n_groups=n_groups, num_classes=n_classes)
assert torch.equal(expected_result, subdivided_labels)
assert torch.equal(labels, subdivided_labels % n_classes)
I have a non-vectorized solution:
import torch
def subdivide_labels(labels: torch.Tensor, n_groups: int, num_classes: int) -> torch.Tensor:
"""Divide each label in groups introducing virtual labels.
Args:
labels: the tensor containing the labels, each label should be in [0, num_classes)
n_groups: the number of groups to create for each label
num_classes: the number of classes
Returns:
a tensor with the same shape of labels, but with each label partitioned in n_groups virtual labels
"""
unique, counts = labels.unique(
sorted=True,
return_counts=True,
return_inverse=False,
)
virtual_labels = labels.clone().detach()
max_range = num_classes * (torch.arange(counts.max()) % n_groups)
for value, count in zip(unique, counts):
virtual_labels[labels == value] = max_range[:count] + value
return virtual_labels
labels = torch.as_tensor([0, 0, 0, 1, 1, 1, 2, 2, 2], dtype=torch.long)
subdivide_labels(labels, n_groups=2, num_classes=3)
tensor([0, 3, 0, 1, 4, 1, 2, 5, 2])
Is it possible to vectorize this algorithm? Alternatively, are there any faster algorithms to perform the same operation?
Solution
A variation of OP's approach can be vectorized with a grouped cumcount (numpy
implementation by @divakar). All tests pass, but the output is slightly different since argsort
has no 'stable' option in pytorch
, AFAIK.
def vector_labels(labels, n_groups, num_classes):
counts = torch.unique(labels, return_counts=True)[1]
idx = counts.cumsum(0)
id_arr = torch.ones(idx[-1], dtype=torch.long)
id_arr[0] = 0
id_arr[idx[:-1]] = -counts[:-1] + 1
rng = id_arr.cumsum(0)[labels.argsort().argsort()] % n_groups
maxr = torch.arange(n_groups) * num_classes
return maxr[rng] + labels
labels = torch.arange(100).repeat_interleave(4)
%timeit vector_labels(labels, 2, 100)
%timeit subdivide_labels(labels, 2, 100)
Output
10000 loops, best of 5: 117 µs per loop
1000 loops, best of 5: 1.6 ms per loop
This is far from the fastest algorithm. For example a trivial O(n) approach, but only CPU and needs numba
to be fast with Python.
import numpy as np
import numba as nb
@nb.njit
def numpy_labels(labels, n_groups, num_classes):
lookup = np.zeros(labels.max() + 1, np.intp)
res = np.empty_like(labels)
for i in range(len(labels)):
res[i] = num_classes * lookup[labels[i]] + labels[i]
lookup[labels[i]] = lookup[labels[i]] + 1 if lookup[labels[i]] < n_groups-1 else 0
return res
numpy_labels(labels.numpy(), 20, 100) # compile run
%timeit torch.from_numpy(numpy_labels(labels.numpy(), 20, 100))
Output
100000 loops, best of 5: 3.63 µs per loop
Answered By - Michael Szczesny
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.