Close Menu
    Facebook X (Twitter) Instagram
    Articles Stock
    • Home
    • Technology
    • AI
    • Pages
      • About ArticlesStock — AI & Technology Journalist
      • Contact us
      • Disclaimer For Articles Stock
      • Privacy Policy
      • Terms and Conditions
    Facebook X (Twitter) Instagram
    Articles Stock
    AI

    A Coding Tutorial on Datashader on Rendering Huge Datasets with Excessive-Efficiency Python Visible Analytics

    Naveed AhmadBy Naveed Ahmad26/04/2026No Comments3 Mins Read
    blog 74


    import subprocess, sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                          "datashader", "colorcet", "numba", "scipy"])
    
    
    import numpy  as np
    import pandas as pd
    import datashader as ds
    import datashader.transfer_functions as tf
    from datashader import reductions as rd
    import colorcet as cc
    import matplotlib.pyplot as plt
    import matplotlib.colours as mcolors
    from matplotlib.gridspec import GridSpec
    from scipy.stats import multivariate_normal
    import time, warnings
    warnings.filterwarnings("ignore")
    
    
    print("Datashader model:", ds.__version__)
    
    
    def present(img, title="", ax=None, figsize=(6, 5)):
       standalone = ax is None
       if standalone:
           fig, ax = plt.subplots(figsize=figsize)
       rgba = img.to_pil()
       ax.imshow(rgba, origin="higher", side="auto")
       ax.set_title(title, fontsize=11, fontweight="daring")
       ax.axis("off")
       if standalone:
           plt.tight_layout()
           plt.present()
    
    
    print("n=== SECTION 1: Core Pipeline ===")
    
    
    rng = np.random.default_rng(42)
    N   = 2_000_000
    
    
    x = np.concatenate([rng.normal(-1, 0.5, N//3),
                       rng.normal( 1, 0.5, N//3),
                       rng.normal( 0, 1.5, N//3)])
    y = np.concatenate([rng.normal(-1, 0.5, N//3),
                       rng.normal( 1, 0.5, N//3),
                       rng.normal( 0, 0.5, N//3)])
    df_base = pd.DataFrame({"x": x, "y": y})
    
    
    canvas = ds.Canvas(plot_width=600, plot_height=500,
                      x_range=(-4, 4), y_range=(-4, 4))
    
    
    agg = canvas.factors(df_base, "x", "y", agg=rd.depend())
    
    
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    combos = [
       ("Linear / blues",  tf.shade(agg, cmap=cc.blues,        how="linear")),
       ("Log    / fire",   tf.shade(agg, cmap=cc.fire,         how="log"   )),
       ("Eq-hist / bmy",   tf.shade(agg, cmap=cc.bmy,          how="eq_hist")),
    ]
    for ax, (title, img) in zip(axes, combos):
       present(img, title, ax=ax)
    plt.suptitle("Part 1 – 2 M factors: Linear vs Log vs Eq-Hist normalisation",
                fontsize=13, fontweight="daring")
    plt.tight_layout()
    plt.present()
    
    
    print("n=== SECTION 2: Discount Sorts ===")
    
    
    n_actual = len(df_base)
    df_base["value"] = rng.exponential(scale=2, dimension=n_actual)
    df_base["label"] = pd.Categorical(
       rng.selection(["A", "B", "C"], dimension=n_actual),
       classes=["A", "B", "C"]
    )
    
    
    canvas2 = ds.Canvas(plot_width=400, plot_height=350,
                       x_range=(-4, 4), y_range=(-4, 4))
    
    
    reductions_cfg = [
       ("count()",          rd.count(),                 cc.kbc),
       ("sum(value)",       rd.sum("value"),             cc.CET_L3),
       ("mean(value)",      rd.mean("value"),            cc.CET_D4),
       ("std(value)",       rd.std("value"),             cc.CET_L16),
       ("min(value)",       rd.min("value"),             cc.CET_L17),
       ("max(value)",       rd.max("value"),             cc.bgyw),
       ("var(value)",       rd.var("value"),             cc.CET_L18),
       ("count_cat(label)", rd.count_cat("label"),       None),
    ]
    
    
    fig, axes = plt.subplots(2, 4, figsize=(18, 9))
    axes = axes.flat
    
    
    for ax, (identify, agg_fn, cmap) in zip(axes, reductions_cfg):
       agg_r = canvas2.factors(df_base, "x", "y", agg=agg_fn)
       if cmap is None:
           img = tf.shade(agg_r, color_key={"A":"#e41a1c","B":"#377eb8","C":"#4daf4a"})
       else:
           img = tf.shade(agg_r, cmap=cmap, how="eq_hist")
       present(img, identify, ax=ax)
    
    
    plt.suptitle("Part 2 – All Discount Sorts on 2 M factors", fontsize=14, fontweight="daring")
    plt.tight_layout()
    plt.present()
    
    
    print("n=== SECTION 3: Categorical Visualisation ===")
    
    
    N_cat = 500_000
    classes = ["Cluster A", "Cluster B", "Cluster C", "Cluster D"]
    facilities = [(-2, -2), (-2, 2), (2, -2), (2, 2)]
    colours  = {"Cluster A":"#e41a1c","Cluster B":"#377eb8",
              "Cluster C":"#4daf4a","Cluster D":"#ff7f00"}
    
    
    frames = []
    for cat, (cx, cy) in zip(classes, facilities):
       n = N_cat // len(classes)
       frames.append(pd.DataFrame({
           "x":    rng.regular(cx, 0.8, n),
           "y":    rng.regular(cy, 0.8, n),
           "cat":  pd.Categorical([cat]*n, classes=classes),
       }))
    df_cat = pd.concat(frames, ignore_index=True)
    
    
    canvas3 = ds.Canvas(plot_width=500, plot_height=500,
                       x_range=(-5, 5), y_range=(-5, 5))
    agg_cat = canvas3.factors(df_cat, "x", "y", agg=rd.count_cat("cat"))
    
    
    fig, axes = plt.subplots(1, 3, figsize=(16, 5))
    
    
    img_raw  = tf.shade(agg_cat, color_key=colours)
    present(img_raw, "Uncooked (no unfold)", ax=axes[0])
    
    
    img_sp1  = tf.unfold(tf.shade(agg_cat, color_key=colours), px=1)
    present(img_sp1, "Unfold px=1", ax=axes[1])
    
    
    img_bg   = tf.set_background(tf.shade(agg_cat, color_key=colours), shade="black")
    present(img_bg, "Black background", ax=axes[2])
    
    
    for cat, col in colours.objects():
       axes[2].plot([], [], "o", shade=col, label=cat, markersize=8)
    axes[2].legend(loc="decrease proper", fontsize=8, framealpha=0.6)
    
    
    plt.suptitle("Part 3 – Categorical Rendering (500 ok factors)", fontsize=13, fontweight="daring")
    plt.tight_layout()
    plt.present()



    Source link

    Naveed Ahmad

    Naveed Ahmad is a technology journalist and AI writer at ArticlesStock, covering artificial intelligence, machine learning, and emerging tech policy. Read his latest articles.

    Related Posts

    RAG With out Vectors: How PageIndex Retrieves by Reasoning

    26/04/2026

    India’s Snabbit seeks contemporary funding at a $400M valuation, sources say

    26/04/2026

    xAI Launches grok-voice-think-fast-1.0: Topping τ-voice Bench at 67.3%, Outperforming Gemini, GPT Realtime, and Extra

    26/04/2026
    Leave A Reply Cancel Reply

    Categories
    • AI
    Recent Comments
      Facebook X (Twitter) Instagram Pinterest
      © 2026 ThemeSphere. Designed by ThemeSphere.

      Type above and press Enter to search. Press Esc to cancel.