A Coding Information on LLM Put up Coaching with TRL from Supervised High-quality Tuning to DPO and GRPO Reasoning

import subprocess, sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-U",
   "torchao>=0.16",
   "trl>=0.20",
   "transformers>=4.45",
   "datasets",
   "peft>=0.13",
   "accelerate",
   "bitsandbytes",
])


import sys as _sys
for _m in [m for m in list(_sys.modules) if m.startswith(("torchao", "peft"))]:
   _sys.modules.pop(_m, None)
attempt:
   import torchao
besides Exception:
   import sorts
   _fake = sorts.ModuleType("torchao")
   _fake.__version__ = "0.16.1"
   _sys.modules["torchao"] = _fake


import os, re, gc, torch, warnings
warnings.filterwarnings("ignore")
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB_DISABLED"] = "true"
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"


from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig


print(f"torch={torch.__version__}  cuda={torch.cuda.is_available()}")
if torch.cuda.is_available():
   print(f"GPU: {torch.cuda.get_device_name(0)}  "
         f"({torch.cuda.get_device_properties(0).total_memory/1e9:.1f} GB)")


MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
DEVICE     = "cuda" if torch.cuda.is_available() else "cpu"
BF16_OK    = torch.cuda.is_available() and torch.cuda.is_bf16_supported()


LORA_CFG = LoraConfig(
   r=8, lora_alpha=16, lora_dropout=0.05, bias="none",
   target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
   task_type="CAUSAL_LM",
)


def cleanup():
   """Launch VRAM between coaching levels (Colab T4 is tight)."""
   gc.accumulate()
   if torch.cuda.is_available():
       torch.cuda.empty_cache()


def chat_generate(mannequin, tokenizer, immediate, max_new_tokens=120):
   """Helper: format as chat, generate, decode simply the assistant flip."""
   msgs = [{"role": "user", "content": prompt}]
   ids = tokenizer.apply_chat_template(
       msgs, return_tensors="pt", add_generation_prompt=True
   ).to(mannequin.system)
   with torch.no_grad():
       out = mannequin.generate(
           ids, max_new_tokens=max_new_tokens,
           do_sample=True, temperature=0.7, top_p=0.9,
           pad_token_id=tokenizer.eos_token_id,
       )
   return tokenizer.decode(out[0][ids.shape[-1]:], skip_special_tokens=True)

Source link

A Coding Information on LLM Put up Coaching with TRL from Supervised High-quality Tuning to DPO and GRPO Reasoning

Musely secures $360M from Basic Catalyst with out giving up fairness

A Darkish-Cash Marketing campaign Is Paying Influencers to Body Chinese language AI as a Menace

Musk v. Altman is simply getting began

A Coding Information on LLM Put up Coaching with TRL from Supervised High-quality Tuning to DPO and GRPO Reasoning

Related Posts

Musely secures $360M from Basic Catalyst with out giving up fairness

A Darkish-Cash Marketing campaign Is Paying Influencers to Body Chinese language AI as a Menace

Musk v. Altman is simply getting began