Close Menu
    Facebook X (Twitter) Instagram
    Articles Stock
    • Home
    • Technology
    • AI
    • Pages
      • About ArticlesStock — AI & Technology Journalist
      • Contact us
      • Disclaimer For Articles Stock
      • Privacy Policy
      • Terms and Conditions
    Facebook X (Twitter) Instagram
    Articles Stock
    AI

    Why Gradient Descent Zigzags and How Momentum Fixes It

    Naveed AhmadBy Naveed Ahmad05/05/2026Updated:05/05/2026No Comments2 Mins Read
    blog 1 4


    PLOT_STEPS = 55
     
    x_ = np.linspace(-5, 5, 500)
    y_ = np.linspace(-2.2, 2.2, 500)
    X, Y = np.meshgrid(x_, y_)
    Z    = loss(X, Y)
     
    fig = plt.determine(figsize=(16, 10), facecolor="#FAFAF8")
    gs  = GridSpec(2, 3, determine=fig, hspace=0.45, wspace=0.38,
                   left=0.07, proper=0.97, prime=0.88, backside=0.08)
     
    COLORS = {
        "gd":        "#E05C4B",
        "mom_good":  "#3A7CA5",
        "mom_large": "#F4A536",
        "contour":   "#D4C9B8",
        "minima":    "#2A9D5C",
        "begin":     "#444444",
    }
     
    PANEL_TITLES = [
        "Vanilla Gradient DescentnOscillates, slow  (185 steps to converge)",
        "Momentum  β = 0.90nSmooth, fast  (159 steps to converge)",
        "Momentum  β = 0.99 (too large)nOvershoots -- never converges",
    ]
     
    paths_plot = [
        path_gd[:PLOT_STEPS+1],
        path_mom_good[:PLOT_STEPS+1],
        path_mom_large[:PLOT_STEPS+1],
    ]
    colours = [COLORS["gd"], COLORS["mom_good"], COLORS["mom_large"]]
     
    # prime row: trajectory panels
    for col, (path, shade, title) in enumerate(zip(paths_plot, colours, PANEL_TITLES)):
        ax = fig.add_subplot(gs[0, col])
        ax.set_facecolor("#F5F3EE")
     
        ranges = np.geomspace(0.005, 3.5, 28)
        ax.contour(X, Y, Z, ranges=ranges, colours=COLORS["contour"],
                   linewidths=0.7, alpha=0.9)
     
        ax.plot(path[:, 0], path[:, 1], shade=shade, lw=1.8, alpha=0.85, zorder=3)
        ax.scatter(path[:, 0], path[:, 1], shade=shade, s=18, zorder=4, alpha=0.6)
     
        ax.scatter(*path[0],  marker="o", s=90,  shade=COLORS["start"],  zorder=5, label="begin")
        ax.scatter(*path[-1], marker="*", s=120, shade=COLORS["minima"], zorder=5, label="finish")
        ax.scatter(0, 0, marker="+", s=200, shade=COLORS["minima"], linewidths=2.5, zorder=6)
     
        ax.set_xlim(-5, 5)
        ax.set_ylim(-2.2, 2.2)
        ax.set_title(title, fontsize=9.5, fontweight="daring", shade="#222", pad=7, loc="left")
        ax.set_xlabel("θ₁  (sluggish route)", fontsize=8, shade="#666")
        ax.set_ylabel("θ₂  (quick route)", fontsize=8, shade="#666")
        ax.tick_params(labelsize=7, colours="#888")
        for backbone in ax.spines.values():
            backbone.set_edgecolor("#CCCCCC")
     
    # bottom-left: loss curves (full 300 steps)
    ax_loss = fig.add_subplot(gs[1, :2])
    ax_loss.set_facecolor("#F5F3EE")
     
    full_paths  = [path_gd, path_mom_good, path_mom_large]
    full_labels = ["Vanilla GD  (185 steps)", "Momentum β=0.90  (159 steps)", "Momentum β=0.99  (diverges)"]
     
    for path, shade, label in zip(full_paths, colours, full_labels):
        losses = [loss(*p) for p in path]
        steps_range = np.arange(len(path))
        ax_loss.plot(steps_range, losses, shade=shade, lw=2, label=label, alpha=0.9)
     
    ax_loss.axhline(0.001, shade="#999", lw=1, ls="--", alpha=0.6)
    ax_loss.textual content(305, 0.001, "convergencenthreshold", fontsize=7, shade="#888", va="heart")
     
    ax_loss.set_yscale("log")
    ax_loss.set_xlim(0, STEPS)
    ax_loss.set_title("Loss vs. Optimisation Step  (log scale, 300 steps)",
                      fontsize=10.5, fontweight="daring", shade="#222", loc="left")
    ax_loss.set_xlabel("Step", fontsize=9, shade="#666")
    ax_loss.set_ylabel("Loss  f(θ)", fontsize=9, shade="#666")
    ax_loss.legend(fontsize=8.5, framealpha=0.6)
    ax_loss.tick_params(labelsize=8, colours="#888")
    for backbone in ax_loss.spines.values():
        backbone.set_edgecolor("#CCCCCC")
     
    # bottom-right: annotation panel
    ax_ann = fig.add_subplot(gs[1, 2])
    ax_ann.set_facecolor("#F5F3EE")
    ax_ann.axis("off")
     
    annotation = (
        "Replace rulesnn"
        "Vanilla GDn"
        "  θ ← θ − α·∇L(θ)nn"
        "Momentum GDn"
        "  v ← β·v + (1−β)·∇L(θ)n"
        "  θ ← θ − α·vnn"
        "Key intuitionn"
        "  v accumulates previous gradients.n"
        "  Vertical oscillations cancel out.n"
        "  Horizontal steps compound.nn"
        "Hyperparameter βn"
        "  β → 0  :  behaves like GDn"
        "  β = 0.9:  typical candy spotn"
        "  β → 1  :  overshoots / diverges"
    )
    ax_ann.textual content(0.05, 0.97, annotation, rework=ax_ann.transAxes,
                fontsize=8.8, va="prime", ha="left",
                fontfamily="monospace", shade="#333", linespacing=1.7)
     
    fig.suptitle("Momentum in Gradient Descent",
                 fontsize=16, fontweight="daring", shade="#111", y=0.95)
     
    plt.savefig("momentum_explainer.png", dpi=150, bbox_inches="tight",
                facecolor=fig.get_facecolor())
    plt.present()



    Source link

    Naveed Ahmad

    Naveed Ahmad is a technology journalist and AI writer at ArticlesStock, covering artificial intelligence, machine learning, and emerging tech policy. Read his latest articles.

    Related Posts

    As X shuts down Communities, Acorn debuts an alternate that places creators in management

    05/05/2026

    Google Provides Occasion-Pushed Webhooks to the Gemini API, Eliminating the Want for Polling in Lengthy-Operating AI Jobs

    05/05/2026

    Elon Musk despatched ominous texts to Greg Brockman, Sam Altman after asking for a settlement, OpenAI claims

    05/05/2026
    Leave A Reply Cancel Reply

    Categories
    • AI
    Recent Comments
      Facebook X (Twitter) Instagram Pinterest
      © 2026 ThemeSphere. Designed by ThemeSphere.

      Type above and press Enter to search. Press Esc to cancel.