using DataFrames, CSV, MLJ, MLJDecisionTreeInterface
begin
    iowa_file_path = "data//home-data-for-ml-course//train.csv"
    home_data = CSV.read(iowa_file_path, DataFrame);
    y = home_data.SalePrice;
    feature_names = [:LotArea, :YearBuilt, Symbol("1stFlrSF"), Symbol("2ndFlrSF"), :FullBath, :BedroomAbvGr, :TotRmsAbvGrd];
    X = home_data[:, feature_names];

    # Split into validation and training data
    (Xtrain, Xtest), (ytrain, ytest) = partition((X, y), 0.8, rng=123, multi=true);
    
    # Specify Model
    Tree = @load DecisionTreeRegressor pkg=DecisionTree verbosity=0
    iowa_model = Tree()
    mach = machine(iowa_model, Xtrain, ytrain, scitype_check_level=0)
    # Fit Model
    fit!(mach, verbosity = 0)
    val_predictions = predict(mach, Xtest)
    val_mae = mean_absolute_error(val_predictions, ytest)
    println("Validation MAE when not specifying max_leaf_nodes: $(round(Int, val_mae))")

    # Using best value for max_leaf_nodes
    iowa_model = Tree(min_samples_leaf=5, rng=1)
    mach = machine(iowa_model, Xtrain, ytrain, scitype_check_level=0)
    fit!(mach, verbosity = 0)
    val_predictions = predict(mach, Xtest)
    val_mae = mean_absolute_error(val_predictions, ytest)
    println("Validation MAE for best value of max_leaf_nodes: $(round(Int, val_mae))")
end
Forest = @load RandomForestRegressor pkg=DecisionTree verbosity=0
RandomForestRegressor
forest = Forest()
RandomForestRegressor(
  max_depth = -1, 
  min_samples_leaf = 1, 
  min_samples_split = 2, 
  min_purity_increase = 0.0, 
  n_subfeatures = -1, 
  n_trees = 100, 
  sampling_fraction = 0.7, 
  feature_importance = :impurity, 
  rng = Random._GLOBAL_RNG())
rf_model = machine(forest, Xtrain, ytrain, scitype_check_level=0)
untrained Machine; caches model-specific representations of data
  model: RandomForestRegressor(max_depth = -1, …)
  args: 
    1:	Source @654 ⏎ ScientificTypesBase.Table{AbstractVector{ScientificTypesBase.Count}}
    2:	Source @067 ⏎ AbstractVector{ScientificTypesBase.Count}
fit!(rf_model)
trained Machine; caches model-specific representations of data
  model: RandomForestRegressor(max_depth = -1, …)
  args: 
    1:	Source @654 ⏎ ScientificTypesBase.Table{AbstractVector{ScientificTypesBase.Count}}
    2:	Source @067 ⏎ AbstractVector{ScientificTypesBase.Count}
rf_val_predictions = predict(rf_model, Xtest)
292-element Vector{Float64}:
 178613.93
 134950.14
 234560.9
 155530.5
 305688.15
 310621.37
 244759.2
      ⋮
 156312.5
 135762.4
 150215.0
  99155.71
 255391.5
 154969.5
rf_val_mae = mean_absolute_error(rf_val_predictions, ytest)
22869.537739726016

Built with Julia 1.9.1 and

CSV 0.10.9
DataFrames 1.5.0
MLJ 0.19.1
MLJDecisionTreeInterface 0.4.0

To run this tutorial locally, download [this file](/tutorials/randomforests01x03.jl) and open it with Pluto.jl.