Close Menu
    Facebook X (Twitter) Instagram
    Articles Stock
    • Home
    • Technology
    • AI
    • Pages
      • About us
      • Contact us
      • Disclaimer For Articles Stock
      • Privacy Policy
      • Terms and Conditions
    Facebook X (Twitter) Instagram
    Articles Stock
    AI

    How you can Construct Excessive-Efficiency GPU-Accelerated Simulations and Differentiable Physics Workflows Utilizing NVIDIA Warp Kernels

    Naveed AhmadBy Naveed Ahmad17/03/2026Updated:17/03/2026No Comments3 Mins Read
    blog banner23 51


    angles = np.linspace(0.0, 2.0 * np.pi, n_particles, endpoint=False, dtype=np.float32)
    px0_np = 0.4 * np.cos(angles).astype(np.float32)
    py0_np = (0.7 + 0.15 * np.sin(angles)).astype(np.float32)
    vx0_np = (-0.8 * np.sin(angles)).astype(np.float32)
    vy0_np = (0.8 * np.cos(angles)).astype(np.float32)
    
    
    px0_wp = wp.array(px0_np, dtype=wp.float32, machine=machine)
    py0_wp = wp.array(py0_np, dtype=wp.float32, machine=machine)
    vx0_wp = wp.array(vx0_np, dtype=wp.float32, machine=machine)
    vy0_wp = wp.array(vy0_np, dtype=wp.float32, machine=machine)
    
    
    state_size = (steps + 1) * n_particles
    px_wp = wp.empty(state_size, dtype=wp.float32, machine=machine)
    py_wp = wp.empty(state_size, dtype=wp.float32, machine=machine)
    vx_wp = wp.empty(state_size, dtype=wp.float32, machine=machine)
    vy_wp = wp.empty(state_size, dtype=wp.float32, machine=machine)
    
    
    wp.launch(
       kernel=init_particles_kernel,
       dim=n_particles,
       inputs=[n_particles, px0_wp, py0_wp, vx0_wp, vy0_wp],
       outputs=[px_wp, py_wp, vx_wp, vy_wp],
       machine=machine,
    )
    
    
    wp.launch(
       kernel=simulate_particles_kernel,
       dim=steps * n_particles,
       inputs=[n_particles, dt, gravity, damping, bounce, radius],
       outputs=[px_wp, py_wp, vx_wp, vy_wp],
       machine=machine,
    )
    wp.synchronize()
    
    
    px_traj = px_wp.numpy().reshape(steps + 1, n_particles)
    py_traj = py_wp.numpy().reshape(steps + 1, n_particles)
    
    
    sample_ids = np.linspace(0, n_particles - 1, 16, dtype=int)
    plt.determine(figsize=(8, 6))
    for idx in sample_ids:
       plt.plot(px_traj[:, idx], py_traj[:, idx], linewidth=1.5)
    plt.axhline(radius, linestyle="--")
    plt.xlim(-1.05, 1.05)
    plt.ylim(0.0, 1.25)
    plt.title(f"Warp particle trajectories on {machine}")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.present()
    
    
    proj_steps = 180
    proj_dt = np.float32(0.025)
    proj_g = np.float32(-9.8)
    target_x = np.float32(3.8)
    target_y = np.float32(0.0)
    
    
    vx_value = np.float32(2.0)
    vy_value = np.float32(6.5)
    lr = 0.08
    iters = 60
    
    
    loss_history = []
    vx_history = []
    vy_history = []
    
    
    for it in vary(iters):
       init_vx_wp = wp.array(np.array([vx_value], dtype=np.float32), dtype=wp.float32, machine=machine, requires_grad=True)
       init_vy_wp = wp.array(np.array([vy_value], dtype=np.float32), dtype=wp.float32, machine=machine, requires_grad=True)
    
    
       x_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, machine=machine, requires_grad=True)
       y_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, machine=machine, requires_grad=True)
       vx_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, machine=machine, requires_grad=True)
       vy_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, machine=machine, requires_grad=True)
       loss_wp = wp.zeros(1, dtype=wp.float32, machine=machine, requires_grad=True)
    
    
       tape = wp.Tape()
       with tape:
           wp.launch(
               kernel=init_projectile_kernel,
               dim=1,
               inputs=[],
               outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp, init_vx_wp, init_vy_wp],
               machine=machine,
           )
           wp.launch(
               kernel=projectile_step_kernel,
               dim=proj_steps,
               inputs=[proj_dt, proj_g],
               outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp],
               machine=machine,
           )
           wp.launch(
               kernel=projectile_loss_kernel,
               dim=1,
               inputs=[proj_steps, target_x, target_y],
               outputs=[x_hist_wp, y_hist_wp, loss_wp],
               machine=machine,
           )
    
    
       tape.backward(loss=loss_wp)
       wp.synchronize()
    
    
       current_loss = float(loss_wp.numpy()[0])
       grad_vx = float(init_vx_wp.grad.numpy()[0])
       grad_vy = float(init_vy_wp.grad.numpy()[0])
    
    
       vx_value = np.float32(vx_value - lr * grad_vx)
       vy_value = np.float32(vy_value - lr * grad_vy)
    
    
       loss_history.append(current_loss)
       vx_history.append(float(vx_value))
       vy_history.append(float(vy_value))
    
    
       if it % 10 == 0 or it == iters - 1:
           print(f"iter={it:02d} loss={current_loss:.6f} vx={vx_value:.4f} vy={vy_value:.4f} grad=({grad_vx:.4f}, {grad_vy:.4f})")
    
    
    final_init_vx_wp = wp.array(np.array([vx_value], dtype=np.float32), dtype=wp.float32, machine=machine)
    final_init_vy_wp = wp.array(np.array([vy_value], dtype=np.float32), dtype=wp.float32, machine=machine)
    x_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, machine=machine)
    y_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, machine=machine)
    vx_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, machine=machine)
    vy_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, machine=machine)
    
    
    wp.launch(
       kernel=init_projectile_kernel,
       dim=1,
       inputs=[],
       outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp, final_init_vx_wp, final_init_vy_wp],
       machine=machine,
    )
    wp.launch(
       kernel=projectile_step_kernel,
       dim=proj_steps,
       inputs=[proj_dt, proj_g],
       outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp],
       machine=machine,
    )
    wp.synchronize()
    
    
    x_path = x_hist_wp.numpy()
    y_path = y_hist_wp.numpy()
    
    
    fig = plt.determine(figsize=(15, 4))
    
    
    ax1 = fig.add_subplot(1, 3, 1)
    ax1.plot(loss_history)
    ax1.set_title("Optimization loss")
    ax1.set_xlabel("Iteration")
    ax1.set_ylabel("Squared distance")
    
    
    ax2 = fig.add_subplot(1, 3, 2)
    ax2.plot(vx_history, label="vx")
    ax2.plot(vy_history, label="vy")
    ax2.set_title("Realized preliminary velocity")
    ax2.set_xlabel("Iteration")
    ax2.legend()
    
    
    ax3 = fig.add_subplot(1, 3, 3)
    ax3.plot(x_path, y_path, linewidth=2)
    ax3.scatter([target_x], [target_y], s=80, marker="x")
    ax3.set_title("Differentiable projectile trajectory")
    ax3.set_xlabel("x")
    ax3.set_ylabel("y")
    ax3.set_ylim(-0.1, max(1.0, float(np.max(y_path)) + 0.3))
    
    
    plt.tight_layout()
    plt.present()
    
    
    final_dx = float(x_path[-1] - target_x)
    final_dy = float(y_path[-1] - target_y)
    final_dist = math.sqrt(final_dx * final_dx + final_dy * final_dy)
    print(f"Ultimate goal miss distance: {final_dist:.6f}")
    print(f"Optimized preliminary velocity: vx={vx_value:.6f}, vy={vy_value:.6f}")



    Source link

    Naveed Ahmad

    Related Posts

    One other deep tech chip startup turns into a unicorn: Frore hits $1.64B

    17/03/2026

    The MacBook Neo is ‘essentially the most repairable MacBook’ in years, in keeping with iFixit

    17/03/2026

    Nvidia’s DLSS 5 makes use of generative AI to spice up photorealism in video video games, with ambitions past gaming

    17/03/2026
    Leave A Reply Cancel Reply

    Categories
    • AI
    Recent Comments
      Facebook X (Twitter) Instagram Pinterest
      © 2026 ThemeSphere. Designed by ThemeSphere.

      Type above and press Enter to search. Press Esc to cancel.