Google ADK Multi-Agent Pipeline Tutorial: Knowledge Loading, Statistical Testing, Visualization, and Report Era in Python

def describe_dataset(dataset_name: str, tool_context: ToolContext) -> dict:
   print(f"📊 Describing dataset: {dataset_name}")
  
   df = DATA_STORE.get_dataset(dataset_name)
   if df is None:
       return {"standing": "error", "message": f"Dataset '{dataset_name}' not discovered"}
  
   numeric_cols = df.select_dtypes(embody=[np.number]).columns.tolist()
   categorical_cols = df.select_dtypes(embody=['object', 'category']).columns.tolist()
  
   end result = {
       "standing": "success",
       "dataset": dataset_name,
       "overview": {
           "total_rows": int(len(df)),
           "total_columns": int(len(df.columns)),
           "numeric_columns": numeric_cols,
           "categorical_columns": categorical_cols,
           "memory_mb": spherical(float(df.memory_usage(deep=True).sum() / 1024 / 1024), 2),
           "duplicate_rows": int(df.duplicated().sum()),
           "missing_total": int(df.isnull().sum().sum())
       }
   }
  
   if numeric_cols:
       stats_dict = {}
       for col in numeric_cols:
           col_data = df[col].dropna()
           if len(col_data) > 0:
               stats_dict[col] = {
                   "depend": int(len(col_data)),
                   "imply": spherical(float(col_data.imply()), 3),
                   "std": spherical(float(col_data.std()), 3),
                   "min": spherical(float(col_data.min()), 3),
                   "25%": spherical(float(col_data.quantile(0.25)), 3),
                   "50%": spherical(float(col_data.median()), 3),
                   "75%": spherical(float(col_data.quantile(0.75)), 3),
                   "max": spherical(float(col_data.max()), 3),
                   "skewness": spherical(float(col_data.skew()), 3),
                   "lacking": int(df[col].isnull().sum())
               }
       end result["numeric_summary"] = stats_dict
  
   if categorical_cols:
       cat_dict = {}
       for col in categorical_cols[:10]:
           vc = df[col].value_counts()
           cat_dict[col] = {
               "unique_values": int(df[col].nunique()),
               "top_values": {str(okay): int(v) for okay, v in vc.head(5).gadgets()},
               "lacking": int(df[col].isnull().sum())
           }
       end result["categorical_summary"] = cat_dict
  
   DATA_STORE.log_analysis("describe", dataset_name, "Statistics generated")
   return make_serializable(end result)




def correlation_analysis(dataset_name: str, methodology: str = "pearson", tool_context: ToolContext = None) -> dict:
   print(f"📊 Correlation evaluation: {dataset_name} ({methodology})")
  
   df = DATA_STORE.get_dataset(dataset_name)
   if df is None:
       return {"standing": "error", "message": f"Dataset '{dataset_name}' not discovered"}
  
   numeric_df = df.select_dtypes(embody=[np.number])
  
   if numeric_df.form[1] < 2:
       return {"standing": "error", "message": "Want not less than 2 numeric columns"}
  
   corr_matrix = numeric_df.corr(methodology=methodology)
  
   strong_corrs = []
   for i in vary(len(corr_matrix.columns)):
       for j in vary(i + 1, len(corr_matrix.columns)):
           col1, col2 = corr_matrix.columns[i], corr_matrix.columns[j]
           val = corr_matrix.iloc[i, j]
           if abs(val) > 0.5:
               strong_corrs.append({
                   "var1": col1,
                   "var2": col2,
                   "correlation": spherical(float(val), 3),
                   "energy": "robust" if abs(val) > 0.7 else "average"
               })
  
   strong_corrs.kind(key=lambda x: abs(x["correlation"]), reverse=True)
  
   corr_dict = {}
   for col in corr_matrix.columns:
       corr_dict[col] = {okay: spherical(float(v), 3) for okay, v in corr_matrix[col].gadgets()}
  
   DATA_STORE.log_analysis("correlation", dataset_name, f"{methodology} correlation")
  
   return make_serializable({
       "standing": "success",
       "methodology": methodology,
       "correlation_matrix": corr_dict,
       "strong_correlations": strong_corrs[:10],
       "perception": f"Discovered {len(strong_corrs)} pairs with |correlation| > 0.5"
   })




def hypothesis_test(dataset_name: str, test_type: str, column1: str,
                  column2: str = None, group_column: str = None,
                  tool_context: ToolContext = None) -> dict:
   print(f"📊 Speculation check: {test_type} on {dataset_name}")
  
   df = DATA_STORE.get_dataset(dataset_name)
   if df is None:
       return {"standing": "error", "message": f"Dataset '{dataset_name}' not discovered"}
  
   if column1 not in df.columns:
       return {"standing": "error", "message": f"Column '{column1}' not discovered"}
  
   attempt:
       if test_type == "normality":
           knowledge = df[column1].dropna()
           if len(knowledge) > 5000:
               knowledge = knowledge.pattern(5000)
           stat, p = stats.shapiro(knowledge)
          
           return make_serializable({
               "standing": "success",
               "check": "Shapiro-Wilk Normality Check",
               "column": column1,
               "statistic": spherical(float(stat), 4),
               "p_value": spherical(float(p), 6),
               "is_normal": bool(p > 0.05),
               "interpretation": "Knowledge seems usually distributed" if p > 0.05 else "Knowledge is NOT usually distributed"
           })
          
       elif test_type == "ttest":
           if group_column is None:
               return {"standing": "error", "message": "group_column required for t-test"}
          
           teams = df[group_column].dropna().distinctive()
           if len(teams) != 2:
               return {"standing": "error", "message": f"T-test wants precisely 2 teams, discovered {len(teams)}: {record(teams)}"}
          
           g1 = df[df[group_column] == teams[0]][column1].dropna()
           g2 = df[df[group_column] == teams[1]][column1].dropna()
          
           stat, p = stats.ttest_ind(g1, g2)
          
           return make_serializable({
               "standing": "success",
               "check": "Unbiased Samples T-Check",
               "evaluating": column1,
               "group1": {"title": str(teams[0]), "imply": spherical(float(g1.imply()), 3), "n": int(len(g1))},
               "group2": {"title": str(teams[1]), "imply": spherical(float(g2.imply()), 3), "n": int(len(g2))},
               "t_statistic": spherical(float(stat), 4),
               "p_value": spherical(float(p), 6),
               "vital": bool(p < 0.05),
               "interpretation": "Important distinction" if p < 0.05 else "No vital distinction"
           })
          
       elif test_type == "anova":
           if group_column is None:
               return {"standing": "error", "message": "group_column required for ANOVA"}
          
           groups_data = [grp[column1].dropna().values for _, grp in df.groupby(group_column)]
           group_names = record(df[group_column].distinctive())
          
           stat, p = stats.f_oneway(*groups_data)
          
           group_stats = []
           for title in group_names:
               grp_data = df[df[group_column] == title][column1].dropna()
               group_stats.append({
                   "group": str(title),
                   "imply": spherical(float(grp_data.imply()), 3),
                   "std": spherical(float(grp_data.std()), 3),
                   "n": int(len(grp_data))
               })
          
           return make_serializable({
               "standing": "success",
               "check": "One-Method ANOVA",
               "evaluating": column1,
               "throughout": group_column,
               "n_groups": int(len(group_names)),
               "group_statistics": group_stats,
               "f_statistic": spherical(float(stat), 4),
               "p_value": spherical(float(p), 6),
               "vital": bool(p < 0.05),
               "interpretation": "Important variations amongst teams" if p < 0.05 else "No vital variations"
           })
          
       elif test_type == "chi2":
           if column2 is None:
               return {"standing": "error", "message": "column2 required for chi-square check"}
          
           contingency = pd.crosstab(df[column1], df[column2])
           chi2, p, dof, _ = stats.chi2_contingency(contingency)
          
           return make_serializable({
               "standing": "success",
               "check": "Chi-Sq. Check of Independence",
               "variables": [column1, column2],
               "chi2_statistic": spherical(float(chi2), 4),
               "p_value": spherical(float(p), 6),
               "degrees_of_freedom": int(dof),
               "vital": bool(p < 0.05),
               "interpretation": "Variables are dependent" if p < 0.05 else "Variables are impartial"
           })
          
       else:
           return {"standing": "error", "message": f"Unknown check: {test_type}. Use: normality, ttest, anova, chi2"}
          
   besides Exception as e:
       return {"standing": "error", "message": f"Check failed: {str(e)}"}




def outlier_detection(dataset_name: str, column: str, methodology: str = "iqr",
                     tool_context: ToolContext = None) -> dict:
   print(f"📊 Outlier detection: {column} in {dataset_name}")
  
   df = DATA_STORE.get_dataset(dataset_name)
   if df is None:
       return {"standing": "error", "message": f"Dataset '{dataset_name}' not discovered"}
  
   if column not in df.columns:
       return {"standing": "error", "message": f"Column '{column}' not discovered"}
  
   knowledge = df[column].dropna()
  
   if methodology == "iqr":
       Q1 = float(knowledge.quantile(0.25))
       Q3 = float(knowledge.quantile(0.75))
       IQR = Q3 - Q1
       decrease = Q1 - 1.5 * IQR
       higher = Q3 + 1.5 * IQR
       outliers = knowledge[(data < lower) | (data > upper)]
      
       return make_serializable({
           "standing": "success",
           "methodology": "IQR (Interquartile Vary)",
           "column": column,
           "bounds": {"decrease": spherical(decrease, 3), "higher": spherical(higher, 3)},
           "iqr": spherical(IQR, 3),
           "total_values": int(len(knowledge)),
           "outlier_count": int(len(outliers)),
           "outlier_pct": spherical(float(len(outliers) / len(knowledge) * 100), 2),
           "outlier_examples": [round(float(x), 2) for x in outliers.head(10).tolist()]
       })
      
   elif methodology == "zscore":
       z = np.abs(stats.zscore(knowledge))
       outliers = knowledge[z > 3]
      
       return make_serializable({
           "standing": "success",
           "methodology": "Z-Rating (threshold: 3)",
           "column": column,
           "total_values": int(len(knowledge)),
           "outlier_count": int(len(outliers)),
           "outlier_pct": spherical(float(len(outliers) / len(knowledge) * 100), 2),
           "outlier_examples": [round(float(x), 2) for x in outliers.head(10).tolist()]
       })
  
   return {"standing": "error", "message": f"Unknown methodology: {methodology}. Use: iqr, zscore"}




print("✅ Statistical evaluation instruments outlined!")
Source link
Google ADK Multi-Agent Pipeline Tutorial: Knowledge Loading, Statistical Testing, Visualization, and Report Era in Python

Google provides AI Expertise to Chrome that can assist you save favourite workflows

Anthropic Opposes the Excessive AI Legal responsibility Invoice That OpenAI Backed

Google brings its Gemini Private Intelligence characteristic to India

Google ADK Multi-Agent Pipeline Tutorial: Knowledge Loading, Statistical Testing, Visualization, and Report Era in Python

Related Posts

Google provides AI Expertise to Chrome that can assist you save favourite workflows

Anthropic Opposes the Excessive AI Legal responsibility Invoice That OpenAI Backed

Google brings its Gemini Private Intelligence characteristic to India