Close Menu
    Facebook X (Twitter) Instagram
    Articles Stock
    • Home
    • Technology
    • AI
    • Pages
      • About us
      • Contact us
      • Disclaimer For Articles Stock
      • Privacy Policy
      • Terms and Conditions
    Facebook X (Twitter) Instagram
    Articles Stock
    AI

    [In-Depth Guide] The Full CTGAN + SDV Pipeline for Excessive-Constancy Artificial Knowledge

    Naveed AhmadBy Naveed Ahmad14/02/2026Updated:14/02/2026No Comments1 Min Read
    blog banner23 22


    metadata_dict = metadata.to_dict()
    
    
    diagnostic = DiagnosticReport()
    diagnostic.generate(real_data=actual, synthetic_data=synthetic_sdv, metadata=metadata_dict, verbose=True)
    print("Diagnostic rating:", diagnostic.get_score())
    
    
    high quality = QualityReport()
    high quality.generate(real_data=actual, synthetic_data=synthetic_sdv, metadata=metadata_dict, verbose=True)
    print("High quality rating:", high quality.get_score())
    
    
    def show_report_details(report, title):
       print(f"n===== {title} particulars =====")
       props = report.get_properties()
       for p in props:
           print(f"n--- {p} ---")
           particulars = report.get_details(property_name=p)
           strive:
               show(particulars.head(10))
           besides Exception:
               show(particulars)
    
    
    show_report_details(diagnostic, "DiagnosticReport")
    show_report_details(high quality, "QualityReport")
    
    
    train_real, test_real = train_test_split(
       actual, test_size=0.25, random_state=42, stratify=actual[target_col]
    )
    
    
    def make_pipeline(cat_cols, num_cols):
       pre = ColumnTransformer(
           transformers=[
               ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
               ("num", "passthrough", num_cols),
           ],
           the rest="drop"
       )
       clf = LogisticRegression(max_iter=200)
       return Pipeline([("pre", pre), ("clf", clf)])
    
    
    pipe_syn = make_pipeline(categorical_cols, numerical_cols)
    pipe_syn.match(synthetic_sdv.drop(columns=[target_col]), synthetic_sdv[target_col])
    
    
    proba_syn = pipe_syn.predict_proba(test_real.drop(columns=[target_col]))[:, 1]
    y_true = (test_real[target_col].astype(str).str.accommodates(">")).astype(int)
    auc_syn = roc_auc_score(y_true, proba_syn)
    print("Artificial-train -> Actual-test AUC:", auc_syn)
    
    
    pipe_real = make_pipeline(categorical_cols, numerical_cols)
    pipe_real.match(train_real.drop(columns=[target_col]), train_real[target_col])
    
    
    proba_real = pipe_real.predict_proba(test_real.drop(columns=[target_col]))[:, 1]
    auc_real = roc_auc_score(y_true, proba_real)
    print("Actual-train -> Actual-test AUC:", auc_real)
    
    
    model_path = "ctgan_sdv_synth.pkl"
    synth.save(model_path)
    print("Saved synthesizer to:", model_path)
    
    
    from sdv.utils import load_synthesizer
    synth_loaded = load_synthesizer(model_path)
    
    
    synthetic_loaded = synth_loaded.pattern(1000)
    print("Loaded synthesizer pattern:")
    show(synthetic_loaded.head())



    Source link

    Naveed Ahmad

    Related Posts

    Why prime expertise is strolling away from OpenAI and xAI

    14/02/2026

    Indian pharmacy chain large uncovered buyer information and inner methods

    14/02/2026

    Airbnb plans to bake in AI options for search, discovery and help

    14/02/2026
    Leave A Reply Cancel Reply

    Categories
    • AI
    Recent Comments
      Facebook X (Twitter) Instagram Pinterest
      © 2026 ThemeSphere. Designed by ThemeSphere.

      Type above and press Enter to search. Press Esc to cancel.