Project Setup¶

In [1]:
from pathlib import Path
import sys

sys.path.append(str(Path().resolve().parent))
from TSC_Python import TSC, TSCResult, TSC_MLX
In [2]:
import mlx.core as mx
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
import os
from tqdm.notebook import tqdm, trange
from matplotlib.colors import ListedColormap, BoundaryNorm
import ipywidgets as widgets

File Location¶

In [3]:
ROOT = Path().resolve().parent
MAT_DIR = ROOT / "MAT Files"
GT_DIR = ROOT / "GT Files"

Datasets - Files & Keys¶

In [4]:
DATASETS = {
    "Pavia": {
        "data_file": "Pavia.mat",
        "gt_file":   "Pavia_gt.mat",
        "data_key":  "pavia",
        "gt_key":    "pavia_gt",
    },
    "PaviaUni": {
        "data_file": "PaviaUni.mat",
        "gt_file":   "PaviaU_gt.mat",
        "data_key":  "paviaU",
        "gt_key":    "paviaU_gt",
    },
    "Salinas": {
        "data_file":  "Salinas_corrected.mat",
        "gt_file":    "Salinas_gt.mat",
        "data_key":   "salinas_corrected",
        "gt_key":     "salinas_gt",
    },
    "Indian pines":{
        "data_file":  "Indian_pines.mat",
        "gt_file":    "Indian_pines_gt.mat",
        "data_key":   "indian_pines",
        "gt_key":     "indian_pines_gt",
    }
}
In [5]:
DEFAULT_DS = "Pavia"

ds = widgets.Dropdown(options=list(DATASETS.keys()),
                      value=DEFAULT_DS,
                      description="Dataset:")
out = widgets.Output()

display(ds, out)
Dropdown(description='Dataset:', options=('Pavia', 'PaviaUni', 'Salinas', 'Indian pines'), value='Pavia')
Output()
In [6]:
keys = DATASETS[ds.value]
In [7]:
keys
Out[7]:
{'data_file': 'Pavia.mat',
 'gt_file': 'Pavia_gt.mat',
 'data_key': 'pavia',
 'gt_key': 'pavia_gt'}

Hyperspectral Image Cube¶

In [8]:
def load_ds(name: str):
    cfg = DATASETS[name]
    data_mat = sio.loadmat(MAT_DIR / cfg["data_file"])
    gt_mat = sio.loadmat(GT_DIR / cfg["gt_file"])
    globals()["data_cube"] = data_mat[cfg["data_key"]]
    globals()["gt_data"] = gt_mat[cfg["gt_key"]]
    with out:
        out.clear_output(wait=True)
        print(f"Loaded {name}: X {data_cube.shape}, GT {gt_data.shape}")
In [9]:
def _on_change(change):
    if change["name"] == "value":
        load_ds(change["new"])
In [10]:
ds.observe(_on_change, names="value")
In [11]:
load_ds(ds.value)

Ground Truth Data¶

In [12]:
gt_data
Out[12]:
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], shape=(1096, 715), dtype=uint8)
In [13]:
gt_data.shape
Out[13]:
(1096, 715)
In [14]:
gt_labels = np.sort(np.unique(gt_data))
In [15]:
num_labels = len(gt_labels)
In [16]:
bg_indices = gt_data == 0
In [17]:
filter = mx.ones([data_cube.shape[0], data_cube.shape[1]])
In [18]:
mask = mx.where(bg_indices, 0.0, filter)
In [19]:
mask3d = mask[..., None]
In [20]:
masked_cube = mx.multiply(data_cube, mask3d)
In [21]:
masked_cube_reshaped = mx.reshape(masked_cube, (masked_cube.shape[0]*masked_cube.shape[1], masked_cube.shape[2]))
In [22]:
cube_np = np.array(masked_cube_reshaped)
nonzero_mask = np.any(cube_np != 0, axis=1)
filtered_np = cube_np[nonzero_mask]
filtered_pixels = mx.array(filtered_np)

Fit the Model¶

In [23]:
model = TSC(
    n_clusters = num_labels-1,
    verbose = 2,
    random_state=10
)
In [24]:
%%time
model.fit(filtered_pixels.T)
[TSC] Converting input to NumPy array with dtype=float64, shape=(102, 148152)
Running TSC with n_clusters=9, max_nz=15, max_chunksize=1024, n_init=20, max_iter=100
[TSC] Building affinity matrix | N=148152, q=15, chunksize=1024, n_chunks=145, backend=CPU
TSC affinity:   0%|          | 0/145 [00:00<?, ?it/s]
[TSC] Affinity construction finished | nnz=3472838
[TSC] Computing spectral embedding | nev=9, mode=fixed K=9
[TSC] Using sparse eigsh
[TSC] Running k-means on embedding with shape=(148152, 9), n_clusters=9, n_init=20, max_iter=100
Finished TSC with n_clusters_=9
CPU times: user 13min 6s, sys: 2min 13s, total: 15min 19s
Wall time: 11min 41s
Out[24]:
<TSC_Python.tsc.TSC at 0x166451c10>
In [25]:
labels = model.labels_
In [26]:
for k in range(1, num_labels):
    ilist = np.nonzero(labels == k)[0]
    print(f"Cluster {k}: {ilist}")
Cluster 1: [     0      1      2 ... 104696 104808 104918]
Cluster 2: [    68   1896   1897 ... 144861 144891 144892]
Cluster 3: [    62     63     64 ... 148140 148144 148145]
Cluster 4: [   137    138    139 ... 148149 148150 148151]
Cluster 5: [   136    322    467 ... 148141 148142 148143]
Cluster 6: [   134    203    320 ... 145606 145705 145762]
Cluster 7: [    67   4445   4614 ... 144717 144718 144742]
Cluster 8: [  2179   2631   2633 ... 144896 144897 144898]
Cluster 9: [   323    324    468 ... 147519 147520 147521]
In [ ]:
 
In [27]:
label_px = gt_data > 0 # Returns a boolean map with background pixels stored as False

clustermap = np.zeros_like(gt_data, dtype=np.int32) # Create a zero filled 2D array size of gt_data

clustermap[label_px] = labels # Fills the foreground pixels
In [ ]:
 
In [28]:
def plot_gt_vs_clusters(gt_data, clustermap, title_left="Ground Truth", title_right="TSC Clustering Results"):
    gt_max = int(np.max(gt_data))
    km_max = int(np.max(clustermap))
    n_classes = max(gt_max, km_max)

    # color 0 = background (black); then categorical colors for 1..n_classes
    base = plt.cm.tab20.colors  # plenty of distinct colors
    palette = ['black'] + [base[i % len(base)] for i in range(n_classes)]
    cmap = ListedColormap(palette)
    norm = BoundaryNorm(np.arange(n_classes + 2) - 0.5, cmap.N)  # hard bins at integers

    fig, axes = plt.subplots(1, 2, figsize=(9, 7), constrained_layout=True)

    im0 = axes[0].imshow(gt_data, cmap=cmap, norm=norm, interpolation='nearest')
    axes[0].set_title(title_left, fontsize=16)
    axes[0].axis('equal'); axes[0].set_xticks([]); axes[0].set_yticks([])

    im1 = axes[1].imshow(clustermap, cmap=cmap, norm=norm, interpolation='nearest')
    axes[1].set_title(title_right, fontsize=16)
    axes[1].axis('equal'); axes[1].set_xticks([]); axes[1].set_yticks([])

    # one colorbar per panel with integer ticks (0..n_classes)
    ticks = np.arange(0, n_classes + 1, 1)
    cbar0 = fig.colorbar(im0, ax=axes[0], ticks=ticks, orientation='horizontal', fraction=0.046, pad=0.05)
    cbar1 = fig.colorbar(im1, ax=axes[1], ticks=ticks, orientation='horizontal', fraction=0.046, pad=0.05)

    plt.show()
In [29]:
plot_gt_vs_clusters(gt_data, clustermap)
No description has been provided for this image
In [30]:
model_gpu = TSC_MLX(
    n_clusters = num_labels-1,
    use_gpu = True,
    verbose = 2,
    random_state=10
)
In [31]:
%%time
model_gpu.fit(filtered_pixels.T)
[TSC] Converting input to MLX array on gpu with dtype=mlx.core.float32, shape=(102, 148152)
Running TSC with n_clusters=9, max_nz=15, max_chunksize=1024, n_init=20, max_iter=100
[TSC] Building affinity matrix | N = 148152, q=15, chunksize=1024n_chunks=145, backend=GPU
TSC affinity:   0%|          | 0/145 [00:00<?, ?it/s]
[TSC] Affinity construction finished | nnz=3472818
[TSC] Computing spectral embedding | nev=9, mode=fixed K=9
[TSC] Running k-means on embedding with shape=(148152, 9), n_clusters=9, n_init=20, max_iter=100
Finished TSC with n_clusters_=9
CPU times: user 9min 43s, sys: 30.6 s, total: 10min 14s
Wall time: 11min 10s
Out[31]:
<TSC_Python.tsc_mlx.TSC_MLX at 0x37bc04bc0>
In [32]:
labels_gpu = model_gpu.labels_
In [33]:
for k in range(1, num_labels):
    ilist = np.nonzero(labels_gpu == k)[0]
    print(f"Cluster {k}: {ilist}")
Cluster 1: [  2179   2631   2633 ... 144896 144897 144898]
Cluster 2: [     0      1      2 ... 104696 104808 104918]
Cluster 3: [    62     63     64 ... 148140 148144 148145]
Cluster 4: [   134    203    320 ... 145606 145705 145762]
Cluster 5: [   137    138    139 ... 148149 148150 148151]
Cluster 6: [   136    322    467 ... 148141 148142 148143]
Cluster 7: [   323    324    468 ... 147519 147520 147521]
Cluster 8: [    67   4445   4614 ... 144717 144718 144742]
Cluster 9: [    68   1896   1897 ... 144861 144891 144892]
In [34]:
label_px = gt_data > 0 # Returns a boolean map with background pixels stored as False

clustermap_gpu = np.zeros_like(gt_data, dtype=np.int32) # Create a zero filled 2D array size of gt_data

clustermap_gpu[label_px] = labels_gpu # Fills the foreground pixels
In [35]:
plot_gt_vs_clusters(gt_data, clustermap_gpu)
No description has been provided for this image
In [ ]: