I cannot re-use a workable code in 2021 to predict from LbfgsMaximumEntropy taining

DJamin 386 Reputation points

Hello this piece of code is not working anymore, it was running well in 2021.

Current setup :

Microsoft Visual Studio Professional 2022 (64-bit) - LTSC 17.4

Version 17.4.21

Windows 10 Entreprise LTSC 21H2

crash when use Training code at ligne 223 :

System.ArgumentOutOfRangeException: 'Could not find input column 'Label'

Nom du paramètre : inputSchema'

codes and input csv file :

////////////////////////////////////////////////

// Predict

////////////////////////////////////////////////

using Microsoft.ML.Data;

using Microsoft.ML;

using System;

using System.Collections.Generic;

using System.IO;

using System.Linq;

using System.Text;

using System.Threading.Tasks;

namespace PredictCategorie

{

public class InputObject

{

 [LoadColumn(0)]

 public string CatSWM { get; set; }

 [LoadColumn(1)]

 public string ArticleFR { get; set; }

 [LoadColumn(2)]

 [ColumnName("Label")]

 public float IDCategorie { get; set; }

}

/////////////// from Example BEGIN

public class InputObjectDataView : IDataView

{

 private readonly IEnumerable<InputObject> _data;

 public IEnumerable<InputObject> Data

 {

 get

 {

 return _data;

 }

 }

 public DataViewSchema Schema { get; }

 public bool CanShuffle => false;

 public InputObjectDataView(IEnumerable<InputObject> data)

 {

 _data = data;

 var builder = new DataViewSchema.Builder();

 builder.AddColumn("CatSWM", TextDataViewType.Instance);

 builder.AddColumn("ArticleFR", TextDataViewType.Instance);

 builder.AddColumn("IDCategorie", NumberDataViewType.Single);

 Schema = builder.ToSchema();

 }

 public long? GetRowCount() => null;

 public DataViewRowCursor GetRowCursor(

 IEnumerable<DataViewSchema.Column> columnsNeeded,

 Random rand = null)

 => new Cursor(this, columnsNeeded.Any(c => c.Index == 0),

 columnsNeeded.Any(c => c.Index == 1), columnsNeeded.Any(c => c.Index == 2));

 public DataViewRowCursor[] GetRowCursorSet(

 IEnumerable<DataViewSchema.Column> columnsNeeded, int n,

 Random rand = null)

 => new[] { GetRowCursor(columnsNeeded, rand) };

 public class Cursor : DataViewRowCursor

 {

 private bool _disposed;

 private long _position;

 private readonly IEnumerator<InputObject> _enumerator;

 private readonly Delegate[] _getters;

 public override long Position => _position;

 public override long Batch => 0;

 public override DataViewSchema Schema { get; }

 public Cursor(InputObjectDataView parent, bool wantsCatSWM, bool wantsArticleFR, bool wantsIDCategorie)

 {

 Schema = parent.Schema;

 _position = -1;

 _enumerator = parent.Data.GetEnumerator();

 _getters = new Delegate[]

 {

 wantsCatSWM ?

 (ValueGetter<ReadOnlyMemory<char>>)

 Text2GetterImplementation : null,

 wantsArticleFR ?

 (ValueGetter<ReadOnlyMemory<char>>)

 Text1GetterImplementation : null,

 wantsIDCategorie ?

 (ValueGetter<float>)

 Text3GetterImplementation : null

 };

 }

 protected override void Dispose(bool disposing)

 {

 if (_disposed)

 return;

 if (disposing)

 {

 _enumerator.Dispose();

 _position = -1;

 }

 _disposed = true;

 base.Dispose(disposing);

 }

 private void Text2GetterImplementation(ref ReadOnlyMemory<char> value)

 => value = _enumerator.Current.CatSWM.AsMemory();

 private void Text1GetterImplementation(ref ReadOnlyMemory<char> value)

 => value = _enumerator.Current.ArticleFR.AsMemory();

 private void Text3GetterImplementation(ref float value)

 => value = _enumerator.Current.IDCategorie;

 private void IdGetterImplementation(ref DataViewRowId id)

 => id = new DataViewRowId((ulong)_position, 0);

 public override ValueGetter<TValue> GetGetter<TValue>(

 DataViewSchema.Column column)

 {

 if (!IsColumnActive(column))

 throw new ArgumentOutOfRangeException(nameof(column));

 return (ValueGetter<TValue>)_getters[column.Index];

 }

 public override ValueGetter<DataViewRowId> GetIdGetter()

 => IdGetterImplementation;

 public override bool IsColumnActive(DataViewSchema.Column column)

 => _getters[column.Index] != null;

 public override bool MoveNext()

 {

 if (_disposed)

 return false;

 if (_enumerator.MoveNext())

 {

 _position++;

 return true;

 }

 Dispose();

 return false;

 }

 }

}

/////////////// from Example END

class Program

{

 static void Main(string[] args)

 {

 Console.WriteLine("On va trouver la catégorie!");

 // path and file location definition

 string file_path = new DirectoryInfo(Environment.CurrentDirectory).Parent.Parent.Parent.Parent.FullName + @"\MLwork\categorie\";

 string file_name = "cat_TestOpenIndexOnly_classification_04.10.2024";

 // file to write

 string filename_out = $"{file_path}{file_name}_out.csv";

 if (File.Exists(filename_out))

 {

 File.Delete(filename_out);

 }

 using StreamWriter sw = File.CreateText(filename_out);

 // the 1st line

 sw.WriteLine("CatSWM;ArticleFR;IDcategorie;LibelleCategorie;SMP");

 // file to open

 Console.WriteLine("Read input file");

 string filename_in = $"{file_path}{file_name}.csv";

 string[] lines = System.IO.File.ReadAllLines(filename_in);

 // read file and store

 List<string> art = new List<string>();

 List<string> cat = new List<string>();

 int count = 0;

 foreach (string line in lines)

 {

 string[] line_elements = line.Split(';');

 if (line_elements[0] != "CatSWM")

 {

 art.Add(line_elements[0]);

 cat.Add(line_elements[1]);

 count++;

 }

 }

 // https://docs.microsoft.com/fr-fr/dotnet/machine-learning/how-to-guides/save-load-machine-learning-models-ml-net

 //Create MLContext

 MLContext mlContext = new MLContext();

 // Define data preparation and trained model schemas

 DataViewSchema dataPrepPipelineSchema, modelSchema;

 // Load data preparation pipeline

 Console.WriteLine("Load data preparation pipeline");

 string data_prep_name = "\\files\\data_preparation_pipeline_categorie";

 string data_prep_file = $"{file_path}{data_prep_name}.zip";

 ITransformer dataPrepPipeline = mlContext.Model.Load(data_prep_file, out dataPrepPipelineSchema);

 // Load Trained Model

 Console.WriteLine("Load Trained Model");

 //string model_name = "\\ML_categorie";

 string model_name = "\\files\\model_lbfgs";

 string model_file = $"{file_path}{model_name}.zip";

 ITransformer trainedModel = mlContext.Model.Load(model_file, out modelSchema);

 Console.WriteLine("Load IDataView");

 List<InputObject> categorieData = new List<InputObject>();

 for (int i = 0; i < count; i++)

 {

 categorieData.Add(new InputObject { CatSWM = cat[i], ArticleFR = art[i] });

 }

 var inputData = new InputObjectDataView(categorieData);

 // Predicted Data

 Console.WriteLine("Predict");

 IDataView predictions = trainedModel.Transform(inputData);

 float[] scoreColumn = predictions.GetColumn<float>("PredictedLabel").ToArray();

 /////////////////////////////

 // output

 Console.WriteLine("Write ouptput");

 string line_out = "";

 for (int i = 0; i < count; i++)

 {

 if (i % 500 == 0) { Console.WriteLine($"{i}/{count} DONE"); }

 line_out = $"{art[i]};{cat[i]};{scoreColumn[i]}";

 //Console.WriteLine(line_out);

 sw.WriteLine(line_out);

 }

 }

}

}

////////////////////////////////////////////////

// Train

////////////////////////////////////////////////

using Microsoft.ML.Data;

using Microsoft.ML.Trainers;

using Microsoft.ML;

using System;

using System.Collections.Generic;

using System.IO;

using System.Linq;

using System.Text;

using System.Threading.Tasks;

namespace TrainCategorie

{

public class InputObject

{

 [LoadColumn(0)]

 public string CatSWM { get; set; }

 [LoadColumn(1)]

 public string ArticleFR { get; set; }

 [LoadColumn(2)]

 [ColumnName("Label")]

 public float IDCategorie { get; set; }

}

class Program

{

 static void Main(string[] args)

 {

 Console.WriteLine("On va entraîner la catégorie!!");

 // path and file location definition

 //string file_path = @"\\int.ofac.ch\OFAC\Collaborateurs\jamin\Visual Studio 2019\MLwork\categorie\files";

 string file_path = new DirectoryInfo(Environment.CurrentDirectory).Parent.Parent.Parent.Parent.FullName + @"\MLwork\categorie\files";

 string file_name = "cat_TrainOpenIndex_classification_04.10.2024";

 // file to open

 Console.WriteLine("Read input file");

 string filename_in = $"{file_path}\\{file_name}.csv";

 // https://docs.microsoft.com/fr-fr/dotnet/machine-learning/how-to-guides/load-data-ml-net

 //Create MLContext

 MLContext mlContext = new MLContext();

 // Load Trained Model

 Console.WriteLine("Load Pipeline");

 IDataView raw_data = mlContext.Data.LoadFromTextFile<InputObject>(filename_in, separatorChar: ';', hasHeader: true);

 //https://docs.microsoft.com/en-us/dotnet/machine-learning/how-to-guides/prepare-data-ml-net

 Console.WriteLine("Convert str->float");

 // 1

 // Define text transform estimator

 var textEstimator1 = mlContext.Transforms.Text.FeaturizeText("ArticleFR");

 var textEstimator2 = mlContext.Transforms.Text.FeaturizeText("CatSWM");

 // Fit data to estimator

 // Fitting generates a transformer that applies the operations of defined by estimator

 ITransformer textTransformer1 = textEstimator1.Fit(raw_data);

 ITransformer textTransformer2 = textEstimator2.Fit(raw_data);

 var fullTransformer = textTransformer1.Append(textTransformer2);

 // Transform data

 IDataView data = fullTransformer.Transform(raw_data);

 //https://docs.microsoft.com/fr-fr/dotnet/machine-learning/how-to-guides/train-machine-learning-model-ml-net

 //DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

 //IDataView trainData = dataSplit.TrainSet;

 //IDataView testData = dataSplit.TestSet;

 IDataView trainData = data;

 IDataView testData = data;

 // Define Data Prep Estimator

 // 1. Concatenate Size and Historical into a single feature vector output to a new column called Features

 // 2. Normalize Features vector

 Console.WriteLine("Concatenate Features");

 IEstimator<ITransformer> dataPrepEstimator =

 mlContext.Transforms.Concatenate("Features", "ArticleFR", "CatSWM")

 .Append(mlContext.Transforms.NormalizeMinMax("Features"));

 // Create data prep transformer

 ITransformer dataPrepTransformer = dataPrepEstimator.Fit(trainData);

 // Apply transforms to data

 Console.WriteLine("Prepare Train Data");

 IDataView transformedTrainingData = dataPrepTransformer.Transform(trainData);

 Console.WriteLine("Prepare Test Data");

 IDataView transformedTestData = dataPrepTransformer.Transform(testData);

 // tuto trainer

 //Console.WriteLine("Train Sdca");

 //TrainSdca(mlContext, transformedTrainingData, transformedTestData);

 // Lbfgs needed Trainer

 Console.WriteLine("Train Lbfgs");

 TrainLbfgs(mlContext, transformedTrainingData, transformedTestData, file_path);

 // Save Data Prep transformer

 Console.WriteLine("Save Data Prep transformer");

 mlContext.Model.Save(dataPrepTransformer, trainData.Schema, $"{file_path}\\data_preparation_pipeline_categorie.zip");

 }

 private static void TrainSdca(MLContext mlContext, IDataView transformedTrainingData, IDataView transformedTestData)

 {

 // Define StochasticDualCoordinateAscent regression algorithm estimator

 Console.WriteLine("Build ML");

 var sdcaEstimator = mlContext.Regression.Trainers.Sdca();

 // Build machine learning model

 Console.WriteLine("Train ML");

 var trainedModel = sdcaEstimator.Fit(transformedTrainingData);

 // extract model parameters

 //var trainedModelParameters = trainedModel.Model as LinearRegressionModelParameters;

 // Measure trained model performance

 // Use trained model to make inferences on test data

 IDataView testDataPredictions = trainedModel.Transform(transformedTestData);

 // Extract model metrics and get RSquared

 Console.WriteLine("Evaluate Test Data");

 RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(testDataPredictions);

 double rSquared = trainedModelMetrics.RSquared;

 Console.WriteLine($"rSquared={rSquared}");

 // save model

 //mlContext.Model.Save(trainedModel, data.Schema, $"{file_path}\\testmodel.zip");

 }

 private static void TrainLbfgs(MLContext mlContext, IDataView transformedTrainingData, IDataView transformedTestData, string file_path)

 {

 // https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.standardtrainerscatalog.lbfgslogisticregression?view=ml-dotnet

 // https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.standardtrainerscatalog.lbfgsmaximumentropy?view=ml-dotnet#Microsoft_ML_StandardTrainersCatalog_LbfgsMaximumEntropy_Microsoft_ML_MulticlassClassificationCatalog_MulticlassClassificationTrainers_System_String_System_String_System_String_System_Single_System_Single_System_Single_System_Int32_System_Boolean_

 Console.WriteLine("Build ML");

 var options = new LbfgsMaximumEntropyMulticlassTrainer.Options()

 {

 //LabelColumnName = "IDCategorie",

 //FeatureColumnName = "Features",

 HistorySize = 50,

 L1Regularization = 0.1f,

 NumberOfThreads = 1

 };

 // Define the trainer.

 var pipeline =

 // Convert the string labels into key types.

 mlContext.Transforms.Conversion.MapValueToKey("Label")

 // Apply LbfgsMaximumEntropy multiclass trainer.

 .Append(mlContext.MulticlassClassification.Trainers

 //.LbfgsMaximumEntropy(options));

 .LbfgsMaximumEntropy());

 // Train the model.

 Console.WriteLine("Train ML");

 var trainedModel = pipeline.Fit(transformedTrainingData);

 // Use trained model to make inferences on test data

 Console.WriteLine("transform trained model");

 IDataView testDataPredictions = trainedModel.Transform(transformedTestData);

 // Extract model metrics and get accuracy

 Console.WriteLine("Evaluate Test Data");

 var trainedModelMetrics = mlContext.MulticlassClassification.Evaluate(testDataPredictions);

 double accuracy = trainedModelMetrics.MicroAccuracy;

 Console.WriteLine($"accuracy={accuracy}");

 // Save Trained Model

 Console.WriteLine("Save Trained Model");

 mlContext.Model.Save(trainedModel, transformedTrainingData.Schema, $"{file_path}\\model_lbfgs.zip");

 }

}

}

////////////////////////////////////////////////

// csv file

////////////////////////////////////////////////

CatSWM;ArticleFR;IDCategorie

cat1;name1;1

;name2;2

cat3;nam3;3

0 comments No comments

Sign in to comment