import subprocess, sys
def pip(*pkgs):
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", *pkgs])
pip("budoux")
import json, time, textwrap, html, random, re, os, tempfile
from pathlib import Path
import budoux
from IPython.show import HTML, show, Markdown
print(f"✅ BudouX model: {budoux.__version__ if hasattr(budoux,'__version__') else 'put in'}")
def header(title):
show(Markdown(f"## {title}"))
header("1️⃣ Default parsers — Japanese / Chinese language (Simplified & Conventional) / Thai")
samples = {
"Japanese (ja)": ("今日は天気です。BudouXは機械学習を用いた改行整形ツールです。",
budoux.load_default_japanese_parser()),
"Simplified Chinese language": ("今天是晴天。BudouX 是一个使用机器学习的换行整理工具。",
budoux.load_default_simplified_chinese_parser()),
"Conventional Chinese language": ("今天是晴天。BudouX 是一個使用機器學習的換行整理工具。",
budoux.load_default_traditional_chinese_parser()),
"Thai (th)": ("วันนี้อากาศดีมากและฉันอยากออกไปเดินเล่นที่สวนสาธารณะ",
budoux.load_default_thai_parser()),
}
for title, (textual content, parser) in samples.objects():
chunks = parser.parse(textual content)
print(f"n• {title}")
print(f" uncooked : {textual content}")
print(f" parsed: '.be part of(chunks) ({len(chunks)} phrases)")
