Close Menu
    Facebook X (Twitter) Instagram
    Articles Stock
    • Home
    • Technology
    • AI
    • Pages
      • About us
      • Contact us
      • Disclaimer For Articles Stock
      • Privacy Policy
      • Terms and Conditions
    Facebook X (Twitter) Instagram
    Articles Stock
    AI

    Google ADK Multi-Agent Pipeline Tutorial: Knowledge Loading, Statistical Testing, Visualization, and Report Era in Python

    Naveed AhmadBy Naveed Ahmad14/04/2026Updated:14/04/2026No Comments5 Mins Read
    blog 35


    def describe_dataset(dataset_name: str, tool_context: ToolContext) -> dict:
       print(f"📊 Describing dataset: {dataset_name}")
      
       df = DATA_STORE.get_dataset(dataset_name)
       if df is None:
           return {"standing": "error", "message": f"Dataset '{dataset_name}' not discovered"}
      
       numeric_cols = df.select_dtypes(embody=[np.number]).columns.tolist()
       categorical_cols = df.select_dtypes(embody=['object', 'category']).columns.tolist()
      
       end result = {
           "standing": "success",
           "dataset": dataset_name,
           "overview": {
               "total_rows": int(len(df)),
               "total_columns": int(len(df.columns)),
               "numeric_columns": numeric_cols,
               "categorical_columns": categorical_cols,
               "memory_mb": spherical(float(df.memory_usage(deep=True).sum() / 1024 / 1024), 2),
               "duplicate_rows": int(df.duplicated().sum()),
               "missing_total": int(df.isnull().sum().sum())
           }
       }
      
       if numeric_cols:
           stats_dict = {}
           for col in numeric_cols:
               col_data = df[col].dropna()
               if len(col_data) > 0:
                   stats_dict[col] = {
                       "depend": int(len(col_data)),
                       "imply": spherical(float(col_data.imply()), 3),
                       "std": spherical(float(col_data.std()), 3),
                       "min": spherical(float(col_data.min()), 3),
                       "25%": spherical(float(col_data.quantile(0.25)), 3),
                       "50%": spherical(float(col_data.median()), 3),
                       "75%": spherical(float(col_data.quantile(0.75)), 3),
                       "max": spherical(float(col_data.max()), 3),
                       "skewness": spherical(float(col_data.skew()), 3),
                       "lacking": int(df[col].isnull().sum())
                   }
           end result["numeric_summary"] = stats_dict
      
       if categorical_cols:
           cat_dict = {}
           for col in categorical_cols[:10]:
               vc = df[col].value_counts()
               cat_dict[col] = {
                   "unique_values": int(df[col].nunique()),
                   "top_values": {str(okay): int(v) for okay, v in vc.head(5).gadgets()},
                   "lacking": int(df[col].isnull().sum())
               }
           end result["categorical_summary"] = cat_dict
      
       DATA_STORE.log_analysis("describe", dataset_name, "Statistics generated")
       return make_serializable(end result)
    
    
    
    
    def correlation_analysis(dataset_name: str, methodology: str = "pearson", tool_context: ToolContext = None) -> dict:
       print(f"📊 Correlation evaluation: {dataset_name} ({methodology})")
      
       df = DATA_STORE.get_dataset(dataset_name)
       if df is None:
           return {"standing": "error", "message": f"Dataset '{dataset_name}' not discovered"}
      
       numeric_df = df.select_dtypes(embody=[np.number])
      
       if numeric_df.form[1] < 2:
           return {"standing": "error", "message": "Want not less than 2 numeric columns"}
      
       corr_matrix = numeric_df.corr(methodology=methodology)
      
       strong_corrs = []
       for i in vary(len(corr_matrix.columns)):
           for j in vary(i + 1, len(corr_matrix.columns)):
               col1, col2 = corr_matrix.columns[i], corr_matrix.columns[j]
               val = corr_matrix.iloc[i, j]
               if abs(val) > 0.5:
                   strong_corrs.append({
                       "var1": col1,
                       "var2": col2,
                       "correlation": spherical(float(val), 3),
                       "energy": "robust" if abs(val) > 0.7 else "average"
                   })
      
       strong_corrs.kind(key=lambda x: abs(x["correlation"]), reverse=True)
      
       corr_dict = {}
       for col in corr_matrix.columns:
           corr_dict[col] = {okay: spherical(float(v), 3) for okay, v in corr_matrix[col].gadgets()}
      
       DATA_STORE.log_analysis("correlation", dataset_name, f"{methodology} correlation")
      
       return make_serializable({
           "standing": "success",
           "methodology": methodology,
           "correlation_matrix": corr_dict,
           "strong_correlations": strong_corrs[:10],
           "perception": f"Discovered {len(strong_corrs)} pairs with |correlation| > 0.5"
       })
    
    
    
    
    def hypothesis_test(dataset_name: str, test_type: str, column1: str,
                      column2: str = None, group_column: str = None,
                      tool_context: ToolContext = None) -> dict:
       print(f"📊 Speculation check: {test_type} on {dataset_name}")
      
       df = DATA_STORE.get_dataset(dataset_name)
       if df is None:
           return {"standing": "error", "message": f"Dataset '{dataset_name}' not discovered"}
      
       if column1 not in df.columns:
           return {"standing": "error", "message": f"Column '{column1}' not discovered"}
      
       attempt:
           if test_type == "normality":
               knowledge = df[column1].dropna()
               if len(knowledge) > 5000:
                   knowledge = knowledge.pattern(5000)
               stat, p = stats.shapiro(knowledge)
              
               return make_serializable({
                   "standing": "success",
                   "check": "Shapiro-Wilk Normality Check",
                   "column": column1,
                   "statistic": spherical(float(stat), 4),
                   "p_value": spherical(float(p), 6),
                   "is_normal": bool(p > 0.05),
                   "interpretation": "Knowledge seems usually distributed" if p > 0.05 else "Knowledge is NOT usually distributed"
               })
              
           elif test_type == "ttest":
               if group_column is None:
                   return {"standing": "error", "message": "group_column required for t-test"}
              
               teams = df[group_column].dropna().distinctive()
               if len(teams) != 2:
                   return {"standing": "error", "message": f"T-test wants precisely 2 teams, discovered {len(teams)}: {record(teams)}"}
              
               g1 = df[df[group_column] == teams[0]][column1].dropna()
               g2 = df[df[group_column] == teams[1]][column1].dropna()
              
               stat, p = stats.ttest_ind(g1, g2)
              
               return make_serializable({
                   "standing": "success",
                   "check": "Unbiased Samples T-Check",
                   "evaluating": column1,
                   "group1": {"title": str(teams[0]), "imply": spherical(float(g1.imply()), 3), "n": int(len(g1))},
                   "group2": {"title": str(teams[1]), "imply": spherical(float(g2.imply()), 3), "n": int(len(g2))},
                   "t_statistic": spherical(float(stat), 4),
                   "p_value": spherical(float(p), 6),
                   "vital": bool(p < 0.05),
                   "interpretation": "Important distinction" if p < 0.05 else "No vital distinction"
               })
              
           elif test_type == "anova":
               if group_column is None:
                   return {"standing": "error", "message": "group_column required for ANOVA"}
              
               groups_data = [grp[column1].dropna().values for _, grp in df.groupby(group_column)]
               group_names = record(df[group_column].distinctive())
              
               stat, p = stats.f_oneway(*groups_data)
              
               group_stats = []
               for title in group_names:
                   grp_data = df[df[group_column] == title][column1].dropna()
                   group_stats.append({
                       "group": str(title),
                       "imply": spherical(float(grp_data.imply()), 3),
                       "std": spherical(float(grp_data.std()), 3),
                       "n": int(len(grp_data))
                   })
              
               return make_serializable({
                   "standing": "success",
                   "check": "One-Method ANOVA",
                   "evaluating": column1,
                   "throughout": group_column,
                   "n_groups": int(len(group_names)),
                   "group_statistics": group_stats,
                   "f_statistic": spherical(float(stat), 4),
                   "p_value": spherical(float(p), 6),
                   "vital": bool(p < 0.05),
                   "interpretation": "Important variations amongst teams" if p < 0.05 else "No vital variations"
               })
              
           elif test_type == "chi2":
               if column2 is None:
                   return {"standing": "error", "message": "column2 required for chi-square check"}
              
               contingency = pd.crosstab(df[column1], df[column2])
               chi2, p, dof, _ = stats.chi2_contingency(contingency)
              
               return make_serializable({
                   "standing": "success",
                   "check": "Chi-Sq. Check of Independence",
                   "variables": [column1, column2],
                   "chi2_statistic": spherical(float(chi2), 4),
                   "p_value": spherical(float(p), 6),
                   "degrees_of_freedom": int(dof),
                   "vital": bool(p < 0.05),
                   "interpretation": "Variables are dependent" if p < 0.05 else "Variables are impartial"
               })
              
           else:
               return {"standing": "error", "message": f"Unknown check: {test_type}. Use: normality, ttest, anova, chi2"}
              
       besides Exception as e:
           return {"standing": "error", "message": f"Check failed: {str(e)}"}
    
    
    
    
    def outlier_detection(dataset_name: str, column: str, methodology: str = "iqr",
                         tool_context: ToolContext = None) -> dict:
       print(f"📊 Outlier detection: {column} in {dataset_name}")
      
       df = DATA_STORE.get_dataset(dataset_name)
       if df is None:
           return {"standing": "error", "message": f"Dataset '{dataset_name}' not discovered"}
      
       if column not in df.columns:
           return {"standing": "error", "message": f"Column '{column}' not discovered"}
      
       knowledge = df[column].dropna()
      
       if methodology == "iqr":
           Q1 = float(knowledge.quantile(0.25))
           Q3 = float(knowledge.quantile(0.75))
           IQR = Q3 - Q1
           decrease = Q1 - 1.5 * IQR
           higher = Q3 + 1.5 * IQR
           outliers = knowledge[(data < lower) | (data > upper)]
          
           return make_serializable({
               "standing": "success",
               "methodology": "IQR (Interquartile Vary)",
               "column": column,
               "bounds": {"decrease": spherical(decrease, 3), "higher": spherical(higher, 3)},
               "iqr": spherical(IQR, 3),
               "total_values": int(len(knowledge)),
               "outlier_count": int(len(outliers)),
               "outlier_pct": spherical(float(len(outliers) / len(knowledge) * 100), 2),
               "outlier_examples": [round(float(x), 2) for x in outliers.head(10).tolist()]
           })
          
       elif methodology == "zscore":
           z = np.abs(stats.zscore(knowledge))
           outliers = knowledge[z > 3]
          
           return make_serializable({
               "standing": "success",
               "methodology": "Z-Rating (threshold: 3)",
               "column": column,
               "total_values": int(len(knowledge)),
               "outlier_count": int(len(outliers)),
               "outlier_pct": spherical(float(len(outliers) / len(knowledge) * 100), 2),
               "outlier_examples": [round(float(x), 2) for x in outliers.head(10).tolist()]
           })
      
       return {"standing": "error", "message": f"Unknown methodology: {methodology}. Use: iqr, zscore"}
    
    
    
    
    print("✅ Statistical evaluation instruments outlined!")



    Source link

    Naveed Ahmad

    Related Posts

    Google provides AI Expertise to Chrome that can assist you save favourite workflows

    14/04/2026

    Anthropic Opposes the Excessive AI Legal responsibility Invoice That OpenAI Backed

    14/04/2026

    Google brings its Gemini Private Intelligence characteristic to India

    14/04/2026
    Leave A Reply Cancel Reply

    Categories
    • AI
    Recent Comments
      Facebook X (Twitter) Instagram Pinterest
      © 2026 ThemeSphere. Designed by ThemeSphere.

      Type above and press Enter to search. Press Esc to cancel.