requires_grad
)backward()
).grad
)torch.nn
torch.nn.Module
Base Classtorch.nn
losses)torch.optim
)torch.utils.data.Dataset
torchvision.transforms
)torch.utils.data.DataLoader
Alright, let's put the concepts from this chapter into practice. The best way to solidify your understanding of tensor manipulations is by working through examples. We'll cover indexing, reshaping, combining, broadcasting, data types, and moving tensors between devices. Make sure you have PyTorch imported.
import torch
import numpy as np
# Check if CUDA is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
Indexing and slicing are fundamental for accessing and modifying parts of your tensors. Let's try selecting specific data points.
Task 1: Create a 2D tensor and select the element in the second row, third column.
# Create a sample 2D tensor (3 rows, 4 columns)
data = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
tensor_2d = torch.tensor(data)
print("Original Tensor:\n", tensor_2d)
# Select the element at row index 1, column index 2
element = tensor_2d[1, 2]
print("\nElement at [1, 2]:", element)
print("Value:", element.item()) # Use .item() to get the Python number
Task 2: Select the entire second row of tensor_2d
.
# Select the row at index 1
row_1 = tensor_2d[1]
print("\nSecond row (index 1):\n", row_1)
# Alternative using slicing (selects row 1, all columns)
row_1_alt = tensor_2d[1, :]
print("\nSecond row (alternative):\n", row_1_alt)
Task 3: Select the third column of tensor_2d
.
# Select all rows, column index 2
col_2 = tensor_2d[:, 2]
print("\nThird column (index 2):\n", col_2)
Task 4: Create a boolean mask to select all elements in tensor_2d
greater than 7, then use the mask to extract these elements.
# Create the boolean mask
mask = tensor_2d > 7
print("\nBoolean mask (tensor > 7):\n", mask)
# Apply the mask
selected_elements = tensor_2d[mask]
print("\nElements greater than 7:\n", selected_elements)
These exercises show how standard Python indexing combines with NumPy-like slicing and boolean masking to provide flexible data access.
Changing a tensor's shape without altering its data is common, especially when preparing inputs for different neural network layers.
Task 1: Create a 1D tensor with 12 elements and reshape it into a 3x4 tensor.
tensor_1d = torch.arange(12) # Creates a tensor with values 0 to 11
print("\nOriginal 1D Tensor:", tensor_1d)
# Reshape using reshape()
reshaped_tensor = tensor_1d.reshape(3, 4)
print("\nReshaped to 3x4:\n", reshaped_tensor)
# Reshape using view() - note view works on contiguous tensors
# arange creates a contiguous tensor, so view works here.
view_tensor = tensor_1d.view(3, 4)
print("\nViewed as 3x4:\n", view_tensor)
Remember that view
requires the tensor to be contiguous in memory and shares the underlying data. reshape
might return a copy or a view depending on contiguity.
Task 2: Given the reshaped_tensor
(3x4), use permute
to swap its dimensions, resulting in a 4x3 tensor.
# Original 3x4 tensor
print("\nOriginal 3x4 Tensor:\n", reshaped_tensor)
# Swap dimensions 0 and 1
permuted_tensor = reshaped_tensor.permute(1, 0)
print("\nPermuted to 4x3:\n", permuted_tensor)
print("Original shape:", reshaped_tensor.shape)
print("Permuted shape:", permuted_tensor.shape)
permute
is useful for tasks like changing image dimension order (e.g., from Channels x Height x Width to Height x Width x Channels).
Combining tensors or breaking them apart is often needed when dealing with batches or different feature sets.
Task 1: Create two 2x3 tensors and concatenate them along dimension 0 (rows).
tensor_a = torch.tensor([[1, 2, 3], [4, 5, 6]])
tensor_b = torch.tensor([[7, 8, 9], [10, 11, 12]])
print("\nTensor A:\n", tensor_a)
print("Tensor B:\n", tensor_b)
# Concatenate along dimension 0 (stacking rows)
concatenated_rows = torch.cat((tensor_a, tensor_b), dim=0)
print("\nConcatenated along rows (dim=0):\n", concatenated_rows)
print("Shape:", concatenated_rows.shape) # Should be 4x3
Task 2: Concatenate tensor_a
and tensor_b
along dimension 1 (columns).
# Concatenate along dimension 1 (joining columns)
concatenated_cols = torch.cat((tensor_a, tensor_b), dim=1)
print("\nConcatenated along columns (dim=1):\n", concatenated_cols)
print("Shape:", concatenated_cols.shape) # Should be 2x6
Task 3: Use stack
to combine tensor_a
and tensor_b
into a new tensor with shape 2x2x3.
# Stack tensors - creates a new dimension (dim=0 by default)
stacked_tensor = torch.stack((tensor_a, tensor_b), dim=0)
print("\nStacked tensors (dim=0):\n", stacked_tensor)
print("Shape:", stacked_tensor.shape) # Should be 2x2x3
# Stack along dimension 1
stacked_tensor_dim1 = torch.stack((tensor_a, tensor_b), dim=1)
print("\nStacked tensors (dim=1):\n", stacked_tensor_dim1)
print("Shape:", stacked_tensor_dim1.shape) # Should be 2x2x3
Notice how stack
adds a new dimension, while cat
joins along an existing one.
Task 4: Create a 6x4 tensor and split it into three equal chunks along dimension 0.
tensor_to_split = torch.arange(24).reshape(6, 4)
print("\nTensor to split (6x4):\n", tensor_to_split)
# Split into 3 chunks along dimension 0
chunks = torch.chunk(tensor_to_split, chunks=3, dim=0)
print("\nSplit into 3 chunks:")
for i, chunk in enumerate(chunks):
print(f"Chunk {i} (shape {chunk.shape}):\n", chunk)
Broadcasting simplifies operations between tensors of different shapes.
Task 1: Create a 3x3 tensor and a 1x3 tensor (row vector). Add them together.
matrix = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
row_vector = torch.tensor([[10, 20, 30]]) # Shape 1x3
print("\nMatrix (3x3):\n", matrix)
print("Row Vector (1x3):\n", row_vector)
# Broadcasting addition: row_vector is expanded to match matrix shape
result = matrix + row_vector
print("\nMatrix + Row Vector (Broadcasting):\n", result)
PyTorch automatically expanded the row_vector
(shape 1x3) to shape 3x3 by duplicating rows, allowing element-wise addition with the matrix
.
Task 2: Create a 3x3 tensor and a 3x1 tensor (column vector). Add them together.
col_vector = torch.tensor([[100], [200], [300]]) # Shape 3x1
print("\nMatrix (3x3):\n", matrix)
print("Column Vector (3x1):\n", col_vector)
# Broadcasting addition: col_vector is expanded to match matrix shape
result_col = matrix + col_vector
print("\nMatrix + Column Vector (Broadcasting):\n", result_col)
Here, the col_vector
(shape 3x1) was broadcast across columns to match the 3x3 shape of the matrix
.
Managing data types is important for memory efficiency and numerical stability.
Task 1: Create a tensor of integers and check its dtype
. Then, cast it to a floating-point tensor.
int_tensor = torch.tensor([1, 2, 3, 4])
print("\nInteger Tensor:", int_tensor)
print("Data Type:", int_tensor.dtype)
# Cast to float32
float_tensor = int_tensor.to(torch.float32)
# Alternative: float_tensor = int_tensor.float()
print("\nConverted to Float Tensor:", float_tensor)
print("Data Type:", float_tensor.dtype)
Task 2: Create a floating-point tensor and cast it to an integer tensor. Observe any changes.
float_tensor_orig = torch.tensor([1.1, 2.7, 3.5, 4.9])
print("\nOriginal Float Tensor:", float_tensor_orig)
print("Data Type:", float_tensor_orig.dtype)
# Cast to int32
int_tensor_cast = float_tensor_orig.to(torch.int32)
# Alternative: int_tensor_cast = float_tensor_orig.int()
print("\nConverted to Integer Tensor:", int_tensor_cast)
print("Data Type:", int_tensor_cast.dtype)
Note that casting from float to int truncates the decimal part. Be mindful of potential precision loss.
Moving tensors to the appropriate device (CPU or GPU) is necessary for leveraging hardware acceleration.
Task 1: Create a tensor and check its default device. Then, move it to the GPU (if available) and back to the CPU.
# Create tensor (defaults to CPU unless specified otherwise)
cpu_tensor = torch.randn(2, 2)
print(f"\nTensor on CPU: {cpu_tensor.device}\n", cpu_tensor)
# Move to the configured device (GPU if available, otherwise CPU)
device_tensor = cpu_tensor.to(device)
print(f"\nTensor moved to {device_tensor.device}:\n", device_tensor)
# Move back to CPU explicitly
cpu_tensor_again = device_tensor.to("cpu")
print(f"\nTensor moved back to CPU: {cpu_tensor_again.device}\n", cpu_tensor_again)
# Perform an operation - requires tensors on the same device
if device_tensor.device != cpu_tensor.device:
print("\nAdding tensors on different devices would cause an error.")
# This would fail: cpu_tensor + device_tensor
# Correct way:
result_on_device = device_tensor + device_tensor
print(f"Result of operation on {result_on_device.device}:\n", result_on_device)
else:
print("\nBoth tensors are on the CPU, addition is fine.")
result_on_cpu = cpu_tensor + cpu_tensor_again
print(f"Result of operation on {result_on_cpu.device}:\n", result_on_cpu)
Remember that operations between tensors generally require them to be on the same device. Explicitly moving tensors using .to(device)
is a common pattern in PyTorch code, especially when preparing data and models for GPU training.
This practical session covered essential techniques for manipulating tensors effectively. As you build more complex models, proficiency with indexing, reshaping, combining tensors, understanding broadcasting, managing data types, and controlling device placement will become increasingly important.
© 2025 ApX Machine Learning