Close Menu
    Facebook X (Twitter) Instagram
    Articles Stock
    • Home
    • Technology
    • AI
    • Pages
      • About ArticlesStock — AI & Technology Journalist
      • Contact us
      • Disclaimer For Articles Stock
      • Privacy Policy
      • Terms and Conditions
    Facebook X (Twitter) Instagram
    Articles Stock
    AI

    A Coding Tutorial on Datashader on Rendering Huge Datasets with Excessive-Efficiency Python Visible Analytics

    Naveed AhmadBy Naveed Ahmad26/04/2026Updated:26/04/2026No Comments3 Mins Read
    blog 74


    import subprocess, sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                          "datashader", "colorcet", "numba", "scipy"])
    
    
    import numpy  as np
    import pandas as pd
    import datashader as ds
    import datashader.transfer_functions as tf
    from datashader import reductions as rd
    import colorcet as cc
    import matplotlib.pyplot as plt
    import matplotlib.colours as mcolors
    from matplotlib.gridspec import GridSpec
    from scipy.stats import multivariate_normal
    import time, warnings
    warnings.filterwarnings("ignore")
    
    
    print("Datashader model:", ds.__version__)
    
    
    def present(img, title="", ax=None, figsize=(6, 5)):
       standalone = ax is None
       if standalone:
           fig, ax = plt.subplots(figsize=figsize)
       rgba = img.to_pil()
       ax.imshow(rgba, origin="higher", side="auto")
       ax.set_title(title, fontsize=11, fontweight="daring")
       ax.axis("off")
       if standalone:
           plt.tight_layout()
           plt.present()
    
    
    print("n=== SECTION 1: Core Pipeline ===")
    
    
    rng = np.random.default_rng(42)
    N   = 2_000_000
    
    
    x = np.concatenate([rng.normal(-1, 0.5, N//3),
                       rng.normal( 1, 0.5, N//3),
                       rng.normal( 0, 1.5, N//3)])
    y = np.concatenate([rng.normal(-1, 0.5, N//3),
                       rng.normal( 1, 0.5, N//3),
                       rng.normal( 0, 0.5, N//3)])
    df_base = pd.DataFrame({"x": x, "y": y})
    
    
    canvas = ds.Canvas(plot_width=600, plot_height=500,
                      x_range=(-4, 4), y_range=(-4, 4))
    
    
    agg = canvas.factors(df_base, "x", "y", agg=rd.depend())
    
    
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    combos = [
       ("Linear / blues",  tf.shade(agg, cmap=cc.blues,        how="linear")),
       ("Log    / fire",   tf.shade(agg, cmap=cc.fire,         how="log"   )),
       ("Eq-hist / bmy",   tf.shade(agg, cmap=cc.bmy,          how="eq_hist")),
    ]
    for ax, (title, img) in zip(axes, combos):
       present(img, title, ax=ax)
    plt.suptitle("Part 1 – 2 M factors: Linear vs Log vs Eq-Hist normalisation",
                fontsize=13, fontweight="daring")
    plt.tight_layout()
    plt.present()
    
    
    print("n=== SECTION 2: Discount Sorts ===")
    
    
    n_actual = len(df_base)
    df_base["value"] = rng.exponential(scale=2, dimension=n_actual)
    df_base["label"] = pd.Categorical(
       rng.selection(["A", "B", "C"], dimension=n_actual),
       classes=["A", "B", "C"]
    )
    
    
    canvas2 = ds.Canvas(plot_width=400, plot_height=350,
                       x_range=(-4, 4), y_range=(-4, 4))
    
    
    reductions_cfg = [
       ("count()",          rd.count(),                 cc.kbc),
       ("sum(value)",       rd.sum("value"),             cc.CET_L3),
       ("mean(value)",      rd.mean("value"),            cc.CET_D4),
       ("std(value)",       rd.std("value"),             cc.CET_L16),
       ("min(value)",       rd.min("value"),             cc.CET_L17),
       ("max(value)",       rd.max("value"),             cc.bgyw),
       ("var(value)",       rd.var("value"),             cc.CET_L18),
       ("count_cat(label)", rd.count_cat("label"),       None),
    ]
    
    
    fig, axes = plt.subplots(2, 4, figsize=(18, 9))
    axes = axes.flat
    
    
    for ax, (identify, agg_fn, cmap) in zip(axes, reductions_cfg):
       agg_r = canvas2.factors(df_base, "x", "y", agg=agg_fn)
       if cmap is None:
           img = tf.shade(agg_r, color_key={"A":"#e41a1c","B":"#377eb8","C":"#4daf4a"})
       else:
           img = tf.shade(agg_r, cmap=cmap, how="eq_hist")
       present(img, identify, ax=ax)
    
    
    plt.suptitle("Part 2 – All Discount Sorts on 2 M factors", fontsize=14, fontweight="daring")
    plt.tight_layout()
    plt.present()
    
    
    print("n=== SECTION 3: Categorical Visualisation ===")
    
    
    N_cat = 500_000
    classes = ["Cluster A", "Cluster B", "Cluster C", "Cluster D"]
    facilities = [(-2, -2), (-2, 2), (2, -2), (2, 2)]
    colours  = {"Cluster A":"#e41a1c","Cluster B":"#377eb8",
              "Cluster C":"#4daf4a","Cluster D":"#ff7f00"}
    
    
    frames = []
    for cat, (cx, cy) in zip(classes, facilities):
       n = N_cat // len(classes)
       frames.append(pd.DataFrame({
           "x":    rng.regular(cx, 0.8, n),
           "y":    rng.regular(cy, 0.8, n),
           "cat":  pd.Categorical([cat]*n, classes=classes),
       }))
    df_cat = pd.concat(frames, ignore_index=True)
    
    
    canvas3 = ds.Canvas(plot_width=500, plot_height=500,
                       x_range=(-5, 5), y_range=(-5, 5))
    agg_cat = canvas3.factors(df_cat, "x", "y", agg=rd.count_cat("cat"))
    
    
    fig, axes = plt.subplots(1, 3, figsize=(16, 5))
    
    
    img_raw  = tf.shade(agg_cat, color_key=colours)
    present(img_raw, "Uncooked (no unfold)", ax=axes[0])
    
    
    img_sp1  = tf.unfold(tf.shade(agg_cat, color_key=colours), px=1)
    present(img_sp1, "Unfold px=1", ax=axes[1])
    
    
    img_bg   = tf.set_background(tf.shade(agg_cat, color_key=colours), shade="black")
    present(img_bg, "Black background", ax=axes[2])
    
    
    for cat, col in colours.objects():
       axes[2].plot([], [], "o", shade=col, label=cat, markersize=8)
    axes[2].legend(loc="decrease proper", fontsize=8, framealpha=0.6)
    
    
    plt.suptitle("Part 3 – Categorical Rendering (500 ok factors)", fontsize=13, fontweight="daring")
    plt.tight_layout()
    plt.present()



    Source link

    Naveed Ahmad

    Naveed Ahmad is a technology journalist and AI writer at ArticlesStock, covering artificial intelligence, machine learning, and emerging tech policy. Read his latest articles.

    Related Posts

    OpenAI says hackers stole some information after newest code safety concern

    14/05/2026

    Cerebras raises $5.5B, kicking off 2026’s IPO season with a bang

    14/05/2026

    Khosla Ventures is betting $10M on Ian Crosby, whose final startup, Bench, imploded

    14/05/2026
    Leave A Reply Cancel Reply

    Categories
    • AI
    Recent Comments
      Facebook X (Twitter) Instagram Pinterest
      © 2026 ThemeSphere. Designed by ThemeSphere.

      Type above and press Enter to search. Press Esc to cancel.