# UMAP on the Fashion MNIST Digits dataset using Datashader¶

This is a simple example of using UMAP on the Fashion-MNIST dataset. The goal of this example is largely to demonstrate the use of datashader as an effective tool for visualising UMAP results. In particular datashader allows visualisation of very large datasets where overplotting can be a serious problem. It supports coloring by categorical variables (as shown in this example), or by continuous variables, or by density (as is common in datashader examples).

import umap
import numpy as np
import pandas as pd
import requests
import os
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(context="paper", style="white")

if not os.path.isfile("fashion-mnist.csv"):
csv_data = requests.get("https://www.openml.org/data/get_csv/18238735/phpnBqZGZ")
with open("fashion-mnist.csv", "w") as f:
f.write(csv_data.text)

data = source_df.iloc[:, :784].values.astype(np.float32)
target = source_df["class"].values

pal = [
"#9e0142",
"#d8434e",
"#f67a49",
"#fdbf6f",
"#feeda1",
"#f1f9a9",
"#bfe5a0",
"#74c7a5",
"#378ebb",
"#5e4fa2",
]
color_key = {str(d): c for d, c in enumerate(pal)}

reducer = umap.UMAP(random_state=42)
embedding = reducer.fit_transform(data)

df = pd.DataFrame(embedding, columns=("x", "y"))
df["class"] = pd.Series([str(x) for x in target], dtype="category")

cvs = ds.Canvas(plot_width=400, plot_height=400)
agg = cvs.points(df, "x", "y", ds.count_cat("class"))

utils.export_image(img, filename="fashion-mnist", background="black")

fig, ax = plt.subplots(figsize=(6, 6))
plt.imshow(image)
plt.setp(ax, xticks=[], yticks=[])
plt.title(
"Fashion MNIST data embedded\n"
"into two dimensions by UMAP\n"