Commit d12582ac authored by Duc Cao's avatar Duc Cao

Option to split training/dev/test sets

parent 1265036b
......@@ -17,6 +17,7 @@ public class CLIParameters {
private String[] args = null;
private Options options = new Options();
public static final String OPTION_DATA_SPLIT = "s";
public static final String OPTION_DATA_TYPE_LABELED = "l";
public static final String OPTION_CONFIG = "c";
public static final String OPTION_DATA_TYPE_UNLABELED = "u";
......@@ -38,6 +39,7 @@ public class CLIParameters {
OptionGroup optionGroup = new OptionGroup();
Option split = new Option(OPTION_DATA_SPLIT, "split", false, "split data into training/dev/test sets.");
Option labeled = new Option(OPTION_DATA_TYPE_LABELED, "labeled", false, "if test files are labeled.");
// labeled.setArgName(ARGUMENT_ENCODING);
......@@ -65,6 +67,7 @@ public class CLIParameters {
Option jobNumber = new Option(OPTION_JOB_NUMBER, true, "job number (default is 1 -- no multi-threading)");
optionGroup.addOption(split);
optionGroup.addOption(labeled);
optionGroup.addOption(finalFile);
optionGroup.addOption(finalDir);
......
......@@ -15,6 +15,25 @@ public class TrainingLabel extends AProcess {
public TrainingLabel(SourceExtractorConfig extractorConfig) {
super(extractorConfig);
}
public void splitTrainDevTest() throws IOException, InterruptedException {
LABELED_MODE = true;
this.paths.DIR_INPUT_LABELED_CONVERTED.mkdirs();
this.paths.DIR_MERGED_FILES_LABELED.mkdirs();
this.paths.DIR_MODELS_LABELED.mkdirs();
this.paths.DIR_BRAT_AUTO_CONVERSION_WAPITI_LABELED.mkdirs();
this.paths.DIR_RESULT_FILES_LABELED.mkdirs();
// we convert all input annotation files into SOURCE-PRIM and
// SOURCE-SEC
transformOriginalBratToBratSourcePrimAndSec();
System.out.println("Transformation en SOURCE-PRIM et SOURCE-SEC réussie.");
// we start with tagging each files and launch maltparser on these
// same files. We eventually end with the (B)IO conversion.
parseDirForLabeled(this.paths.DIR_INPUT_LABELED);
System.out.println("Tous les fichiers ont été transformés en (B)IO sans problème.");
}
public void train(String modelSuffix, int jubNumber) throws IOException, InterruptedException {
if (!org.apache.commons.lang3.SystemUtils.IS_OS_LINUX) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment