SOFAM/medium_test.py at main · cwccie/SOFAM

315 lines (261 loc) · 14 KB
Medium test: Train SOFAM on 3,000 rows, evaluate on 10K val + 38K test.
Uses cached dataset (no re-download).
HA/GA: 3 generations, 3 population (fast tuning, representative evaluation).
import json
import time
import numpy as np
import torch
os.chdir(os.path.dirname(os.path.abspath(__file__)))
# Import dataset cache
from dataset_cache import load_cached, create_splits
# Import gqfam
import gqfam
from gqfam import (Config, FAM, MapField, QAgent, MetricsManager,
                    run_fam_with_parameters, train_artmap_step)
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, matthews_corrcoef, cohen_kappa_score)
# Override Config for medium test
Config.DATASET_MAX_ROWS = None  # We manage data ourselves
Config.GENERATIONS = 3
Config.POPULATION_SIZE = 3
Config.HEURISTIC_GENERATION = 3
Config.HEURISTIC_POPULATION = 3
Config.MAX_NODES = 800
Config.BATCH_SIZE = 128
print("=" * 60)
print("MEDIUM TEST: Train 3K, Val 10K, Test 38K")
print(f"  Generations: {Config.GENERATIONS}, Population: {Config.POPULATION_SIZE}")
print("=" * 60)
# Load cached dataset
X_scaled, X_complement, y, le = load_cached()
# Create smart splits
splits = create_splits(X_complement, y, train_size=3000, random_seed=42)
device = Config.device
# Convert to tensors
X_train_t = torch.tensor(splits['X_train'], dtype=torch.float32, device=device)
y_train_t = torch.tensor(splits['y_train'], dtype=torch.long, device=device)
X_val_t = torch.tensor(splits['X_val'], dtype=torch.float32, device=device)
y_val_t = torch.tensor(splits['y_val'], dtype=torch.long)  # Keep on CPU for metrics
X_test_t = torch.tensor(splits['X_test'], dtype=torch.float32, device=device)
y_test_t = torch.tensor(splits['y_test'], dtype=torch.long)  # Keep on CPU for metrics
num_features = X_train_t.shape[1]
num_categories = len(np.unique(splits['y_train']))
print(f"\nFeatures: {num_features}, Classes: {num_categories}")
print(f"Device: {device}")
def evaluate_model(fam_model, map_field_model, X_data, y_true, label_encoder, device):
    """Evaluate a trained FAM model on given data using FAM's prediction API."""
    predictions = []
    with torch.no_grad():
        for i in range(len(X_data)):
            sample = X_data[i].to(device) if isinstance(X_data[i], torch.Tensor) else torch.tensor(X_data[i], dtype=torch.float32, device=device)
            # Use FAM's find_matching_category with rho_a=0 (forced choice / prediction mode)
            J, _ = fam_model.find_matching_category(sample, 0.0)
            if J is not None:
                pred_output = map_field_model.predict(J)
                pred_label = np.argmax(pred_output) if isinstance(pred_output, np.ndarray) else pred_output
                predictions.append(pred_label)
            else:
                predictions.append(0)  # Default to majority class
    y_pred = np.array(predictions)
    y_true_np = y_true.numpy() if isinstance(y_true, torch.Tensor) else y_true
    metrics = {
        "Accuracy": float(accuracy_score(y_true_np, y_pred)),
        "Precision": float(precision_score(y_true_np, y_pred, average='weighted', zero_division=0)),
        "Recall": float(recall_score(y_true_np, y_pred, average='weighted', zero_division=0)),
        "F1 Score": float(f1_score(y_true_np, y_pred, average='weighted', zero_division=0)),
        "MCC": float(matthews_corrcoef(y_true_np, y_pred)),
        "Cohen Kappa": float(cohen_kappa_score(y_true_np, y_pred)),
    return metrics
def run_experiment():
    results = {"meta": {"train_size": 3000, "val_size": len(splits['y_val']),
                        "test_size": len(splits['y_test']), "seed": 42,
                        "generations": Config.GENERATIONS, "population": Config.POPULATION_SIZE}}
    # --- PHASE 1: Baseline ---
    print("\n--- PHASE 1: Baseline FAM ---")
    t0 = time.time()
    baseline_metrics, baseline_fam, baseline_mf = run_fam_with_parameters(
        X_train=X_train_t, y_train=y_train_t,
        X_validation=X_val_t, y_validation=y_val_t,
        num_features=num_features, num_categories=num_categories,
        learning_rate=Config.BASELINE_LR, vigilance=Config.BASELINE_VIG,
        label_encoder=le, device=device
    t1 = time.time()
    print(f"  Baseline Val: Acc={baseline_metrics['Accuracy']:.4f}, MCC={baseline_metrics['MCC']:.4f} ({t1-t0:.1f}s)")
    # Also test on the large test set
    print("  Evaluating baseline on test set (38K samples)...")
    baseline_test = evaluate_model(baseline_fam, baseline_mf, X_test_t, y_test_t, le, device)
    print(f"  Baseline Test: Acc={baseline_test['Accuracy']:.4f}, MCC={baseline_test['MCC']:.4f}")
    results["Baseline"] = {"val": baseline_metrics, "test": baseline_test,
                           "nodes": baseline_fam.num_active_nodes if hasattr(baseline_fam, 'num_active_nodes') else "N/A",
                           "time_seconds": round(t1-t0, 2)}
    # --- PHASE 2: Heuristic Search ---
    print("\n--- PHASE 2: Heuristic Agent (HA) ---")
    t0 = time.time()
    best_ha = {"accuracy": 0, "lr": Config.BASELINE_LR, "vig": Config.BASELINE_VIG}
    lr_values = np.linspace(Config.MIN_LEARNING_RATE, Config.MAX_LEARNING_RATE, Config.HEURISTIC_GENERATION)
    vig_values = np.linspace(Config.MIN_VIGILANCE, Config.MAX_VIGILANCE, Config.HEURISTIC_POPULATION)
    ha_all = []
    for gen, lr in enumerate(lr_values, 1):
        for ind, vig in enumerate(vig_values, 1):
            metrics, fam, mf = run_fam_with_parameters(
                X_train=X_train_t, y_train=y_train_t,
                X_validation=X_val_t, y_validation=y_val_t,
                num_features=num_features, num_categories=num_categories,
                learning_rate=lr, vigilance=vig,
                label_encoder=le, device=device
            if metrics and metrics['Accuracy'] > best_ha['accuracy']:
                best_ha = {"accuracy": metrics['Accuracy'], "lr": lr, "vig": vig, "metrics": metrics}
            ha_all.append({"gen": gen, "ind": ind, "lr": round(lr, 5), "vig": round(vig, 5),
                          "accuracy": metrics['Accuracy'] if metrics else 0,
                          "mcc": metrics['MCC'] if metrics else 0})
            print(f"  HA Gen {gen} Ind {ind}: LR={lr:.4f} VIG={vig:.4f} Acc={metrics['Accuracy']:.4f} MCC={metrics['MCC']:.4f}")
    t1 = time.time()
    print(f"  HA Best: LR={best_ha['lr']:.4f}, VIG={best_ha['vig']:.4f}, Acc={best_ha['accuracy']:.4f} ({t1-t0:.1f}s)")
    results["HAgent"] = {"best": best_ha, "all_candidates": ha_all, "time_seconds": round(t1-t0, 2)}
    # --- PHASE 3: Genetic Algorithm ---
    print("\n--- PHASE 3: Genetic Agent (GA) ---")
    t0 = time.time()
    best_ga = {"accuracy": 0, "lr": Config.BASELINE_LR, "vig": Config.BASELINE_VIG}
    import random
    random.seed(42)
    # Initialize population
    population = []
    for _ in range(Config.POPULATION_SIZE):
        lr = random.uniform(Config.MIN_LEARNING_RATE, Config.MAX_LEARNING_RATE)
        vig = random.uniform(Config.MIN_VIGILANCE, Config.MAX_VIGILANCE)
        population.append({"lr": lr, "vig": vig})
    ga_all = []
    for gen in range(1, Config.GENERATIONS + 1):
        gen_results = []
        for ind_idx, ind in enumerate(population):
            metrics, fam, mf = run_fam_with_parameters(
                X_train=X_train_t, y_train=y_train_t,
                X_validation=X_val_t, y_validation=y_val_t,
                num_features=num_features, num_categories=num_categories,
                learning_rate=ind['lr'], vigilance=ind['vig'],
                label_encoder=le, device=device
            fitness = metrics['Accuracy'] if metrics else 0
            gen_results.append({"lr": ind['lr'], "vig": ind['vig'], "accuracy": fitness,
                               "mcc": metrics['MCC'] if metrics else 0})
            if metrics and fitness > best_ga['accuracy']:
                best_ga = {"accuracy": fitness, "lr": ind['lr'], "vig": ind['vig'], "metrics": metrics}
            print(f"  GA Gen {gen} Ind {ind_idx+1}: LR={ind['lr']:.4f} VIG={ind['vig']:.4f} Acc={fitness:.4f}")
        ga_all.extend(gen_results)
        # Selection + mutation for next gen
        gen_results.sort(key=lambda x: x['accuracy'], reverse=True)
        elite = gen_results[0]
        new_pop = [{"lr": elite['lr'], "vig": elite['vig']}]
        while len(new_pop) < Config.POPULATION_SIZE:
            lr = elite['lr'] + random.gauss(0, 0.05)
            vig = elite['vig'] + random.gauss(0, 0.025)
            lr = max(Config.MIN_LEARNING_RATE, min(Config.MAX_LEARNING_RATE, lr))
            vig = max(Config.MIN_VIGILANCE, min(Config.MAX_VIGILANCE, vig))
            new_pop.append({"lr": lr, "vig": vig})
        population = new_pop
    t1 = time.time()
    print(f"  GA Best: LR={best_ga['lr']:.4f}, VIG={best_ga['vig']:.4f}, Acc={best_ga['accuracy']:.4f} ({t1-t0:.1f}s)")
    results["GAgent"] = {"best": best_ga, "all_candidates": ga_all, "time_seconds": round(t1-t0, 2)}
    # --- PHASE 4: Best optimized params ---
    # Take best from HA and GA
    if best_ha['accuracy'] >= best_ga['accuracy']:
        opt_lr, opt_vig = best_ha['lr'], best_ha['vig']
        opt_source = "HAgent"
        opt_lr, opt_vig = best_ga['lr'], best_ga['vig']
        opt_source = "GAgent"
    print(f"\n--- PHASE 4: Optimized Run (from {opt_source}: LR={opt_lr:.4f}, VIG={opt_vig:.4f}) ---")
    t0 = time.time()
    opt_metrics, opt_fam, opt_mf = run_fam_with_parameters(
        X_train=X_train_t, y_train=y_train_t,
        X_validation=X_val_t, y_validation=y_val_t,
        num_features=num_features, num_categories=num_categories,
        learning_rate=opt_lr, vigilance=opt_vig,
        label_encoder=le, device=device
    t1 = time.time()
    print(f"  Optimized Val: Acc={opt_metrics['Accuracy']:.4f}, MCC={opt_metrics['MCC']:.4f} ({t1-t0:.1f}s)")
    # Test on large test set
    print("  Evaluating optimized on test set (38K samples)...")
    opt_test = evaluate_model(opt_fam, opt_mf, X_test_t, y_test_t, le, device)
    print(f"  Optimized Test: Acc={opt_test['Accuracy']:.4f}, MCC={opt_test['MCC']:.4f}")
    results["Optimized"] = {"source": opt_source, "lr": opt_lr, "vig": opt_vig,
                            "val": opt_metrics, "test": opt_test, "time_seconds": round(t1-t0, 2)}
    # --- PHASE 5: QAgent refinement ---
    print("\n--- PHASE 5: Q-Agent Refinement ---")
    t0 = time.time()
    # Use the existing QAgent from gqfam.py with our optimized model
        from gqfam import DatasetProcessor
        # Create a minimal data processor wrapper for QAgent
        # QAgent needs: dataset_processor with get_training_data, get_validation_data, etc.
        # We'll use run_fam_with_parameters to train with QAgent-style parameter adjustments
        # Simulate QAgent: try small perturbations around optimized params
        q_best = {"accuracy": opt_metrics['Accuracy'], "lr": opt_lr, "vig": opt_vig, "metrics": opt_metrics}
        perturbations = [
            (opt_lr * 0.9, opt_vig),
            (opt_lr * 1.1, opt_vig),
            (opt_lr, min(0.999, opt_vig * 1.02)),
            (opt_lr, max(0.65, opt_vig * 0.98)),
            (opt_lr * 0.95, min(0.999, opt_vig * 1.01)),
            (opt_lr * 1.05, max(0.65, opt_vig * 0.99)),
        for q_lr, q_vig in perturbations:
            q_metrics, q_fam, q_mf = run_fam_with_parameters(
                X_train=X_train_t, y_train=y_train_t,
                X_validation=X_val_t, y_validation=y_val_t,
                num_features=num_features, num_categories=num_categories,
                learning_rate=q_lr, vigilance=q_vig,
                label_encoder=le, device=device
            if q_metrics and q_metrics['Accuracy'] > q_best['accuracy']:
                q_best = {"accuracy": q_metrics['Accuracy'], "lr": q_lr, "vig": q_vig,
                          "metrics": q_metrics, "fam": q_fam, "mf": q_mf}
                print(f"  QAgent improvement: LR={q_lr:.4f} VIG={q_vig:.4f} Acc={q_metrics['Accuracy']:.4f}")
        t1 = time.time()
        # If QAgent found improvement, evaluate on test set
        if q_best['accuracy'] > opt_metrics['Accuracy']:
            print(f"  QAgent found improvement! Val Acc: {opt_metrics['Accuracy']:.4f} -> {q_best['accuracy']:.4f}")
            q_test = evaluate_model(q_best['fam'], q_best['mf'], X_test_t, y_test_t, le, device)
            print(f"  QAgent Test: Acc={q_test['Accuracy']:.4f}, MCC={q_test['MCC']:.4f}")
        else:
            print(f"  QAgent: no improvement over optimized ({opt_metrics['Accuracy']:.4f})")
            q_test = opt_test
            q_best['metrics'] = opt_metrics
        results["QAgent"] = {"val": q_best['metrics'], "test": q_test,
                             "lr": q_best['lr'], "vig": q_best['vig'],
                             "improved_over_optimized": q_best['accuracy'] > opt_metrics['Accuracy'],
                             "time_seconds": round(t1-t0, 2)}
    except Exception as e:
        print(f"  QAgent phase failed: {e}")
        import traceback; traceback.print_exc()
        results["QAgent"] = {"error": str(e)}
    # --- SUMMARY ---
    print("\n" + "=" * 60)
    print("MEDIUM TEST RESULTS SUMMARY")
    print("=" * 60)
    print(f"{'Phase':<20} {'Val Acc':>8} {'Val MCC':>8} {'Test Acc':>9} {'Test MCC':>9}")
    print("-" * 56)
    for phase_name, phase_key in [("Baseline", "Baseline"), ("Optimized", "Optimized"), ("QAgent", "QAgent")]:
        if phase_key in results and 'val' in results[phase_key]:
            v = results[phase_key]['val']
            t = results[phase_key].get('test', {})
            print(f"{phase_name:<20} {v.get('Accuracy',0):>8.4f} {v.get('MCC',0):>8.4f} {t.get('Accuracy',0):>9.4f} {t.get('MCC',0):>9.4f}")
    # Save results
    # Remove non-serializable objects
    clean_results = json.loads(json.dumps(results, default=lambda o: str(o) if not isinstance(o, (int, float, str, bool, type(None), list, dict)) else o))
    outfile = f"Medium_Test_Results_{int(time.time())}.json"
    with open(outfile, 'w') as f:
        json.dump(clean_results, f, indent=2)
    print(f"\nResults saved to {outfile}")
    return results
if __name__ == "__main__":
    run_experiment()
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

medium_test.py

Latest commit

History

medium_test.py

File metadata and controls