package org.openimaj.hadoop.tools.sequencefile;

import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.shell.MoveCommands;
import org.apache.hadoop.io.Text;
import org.apache.tools.ant.taskdefs.optional.sos.SOSCmd;
import org.apache.tools.ant.taskdefs.optional.vss.MSVSSConstants;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineOptionsProvider;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.ProxyOptionHandler;
import org.openimaj.hadoop.sequencefile.ExtractionState;
import org.openimaj.hadoop.sequencefile.NamingStrategy;
import org.openimaj.hadoop.sequencefile.SequenceFileUtility;
import org.openimaj.hadoop.sequencefile.TextBytesSequenceFileUtility;

/* loaded from: input_file:org/openimaj/hadoop/tools/sequencefile/SequenceFileTool.class */
public class SequenceFileTool {

    @Option(name = "--mode", aliases = {"-m"}, required = true, handler = ProxyOptionHandler.class, usage = "Operation mode")
    private Mode mode;
    private ModeOp modeOp;

    /* loaded from: input_file:org/openimaj/hadoop/tools/sequencefile/SequenceFileTool$CreateMode.class */
    private static class CreateMode extends ModeOp {

        @Option(name = "--recursive", aliases = {MSVSSConstants.FLAG_RECURSION}, required = false, usage = "Recurse into directories inside input directories")
        boolean recurse;

        @Option(name = "--key-name-strategy", aliases = {"-kns"}, required = false, usage = "Strategy for naming keys")
        KeyNameStrategy strategy;

        @Option(name = "--output", aliases = {"-o"}, required = false, usage = "Output directory (path or uri).")
        String outputPathOrUri;

        @Option(name = "--output-name", aliases = {SOSCmd.FLAG_USERNAME}, required = false, usage = "Output filename. Defaults to <uuid>.seq.")
        String outputName;

        @Option(name = "--write-map", aliases = {"-wm"}, required = false, usage = "Write uuid -> filename map to a file. File is saved in output directory as <name>-map.txt.")
        boolean writeFilename2IDMap;

        @Option(name = "--print-map", aliases = {"-pm"}, required = false, usage = "Print uuid -> filename map.")
        boolean printFilename2IDMap;

        @Option(name = "--filename-regex", aliases = {"-fnr"}, required = false, usage = "Regular expressions that file names must match to be added.")
        String filenameRegex;

        @Argument(usage = "input files", multiValued = true, required = true, metaVar = "input-paths-or-uris")
        List<String> inputs;

        private CreateMode() {
            super();
            this.recurse = false;
            this.strategy = KeyNameStrategy.FILENAME;
            this.outputPathOrUri = "./";
            this.writeFilename2IDMap = false;
            this.printFilename2IDMap = false;
            this.filenameRegex = null;
            this.inputs = null;
        }

        @Override // org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.ModeOp
        public void execute() throws Exception {
            if (this.outputName != null) {
                if (!this.outputPathOrUri.endsWith("/")) {
                    this.outputPathOrUri += "/";
                }
                this.outputPathOrUri += this.outputName;
            }
            TextBytesSequenceFileUtility textBytesSequenceFileUtility = new TextBytesSequenceFileUtility(this.outputPathOrUri, false);
            LinkedHashMap linkedHashMap = new LinkedHashMap();
            Iterator<String> it = this.inputs.iterator();
            while (it.hasNext()) {
                URI convertToURI = SequenceFileUtility.convertToURI(it.next());
                FileSystem fileSystem = textBytesSequenceFileUtility.getFileSystem(convertToURI);
                Path path = textBytesSequenceFileUtility.getPath(convertToURI);
                RegexPathFilter regexPathFilter = null;
                if (this.filenameRegex != null) {
                    regexPathFilter = new RegexPathFilter(this.filenameRegex);
                }
                linkedHashMap.putAll(textBytesSequenceFileUtility.appendFiles(fileSystem, path, this.recurse, regexPathFilter, this.strategy.getKeyProvider()));
            }
            if (this.writeFilename2IDMap) {
                textBytesSequenceFileUtility.writePathMap(linkedHashMap);
            }
            if (this.printFilename2IDMap) {
                for (Map.Entry entry : linkedHashMap.entrySet()) {
                    System.out.println(entry.getValue() + " " + entry.getKey());
                }
            }
            textBytesSequenceFileUtility.close();
            System.err.println("Created " + textBytesSequenceFileUtility.getSequenceFilePath());
        }
    }

    /* loaded from: input_file:org/openimaj/hadoop/tools/sequencefile/SequenceFileTool$ExtractMode.class */
    private static class ExtractMode extends ModeOp {

        @Option(name = "--output", aliases = {"-o"}, required = false, usage = "Output directory (path or uri).")
        String outputPathOrUri;

        @Option(name = "--key", aliases = {"-k"}, required = false, usage = "Key of file to extract. By default if this is not provided, all files are extracted.")
        String queryKey;

        @Option(name = "--offset", required = false, usage = "Offset from which to start extract")
        long offset;

        @Option(name = "--name-policy", aliases = {"-n"}, handler = ProxyOptionHandler.class, required = false, usage = "Select the naming policy of outputed files")
        NamingStrategy np;

        @Option(name = "--random-select", aliases = {"-r"}, required = false, usage = "Randomly select a subset of input of this size")
        int random;

        @Option(name = "--extract-max", aliases = {"-max"}, required = false, usage = "Randomly select a subset of input of this size")
        int max;

        @Option(name = "--auto-extension", aliases = {"-ae"}, required = false, usage = "Automatically extract the filetype and append its appropriate extension")
        boolean autoExtension;

        @Argument(required = true, usage = "Sequence file", metaVar = "input-path-or-uri")
        private String inputPathOrUri;

        @Option(name = "-zip", required = false, usage = "Extract to zip")
        private boolean zipMode;

        private ExtractMode() {
            super();
            this.np = NamingStrategy.KEY;
            this.random = -1;
            this.max = -1;
            this.autoExtension = false;
            this.zipMode = false;
        }

        @Override // org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.ModeOp
        public void execute() throws IOException {
            if (this.offset < 0) {
                throw new IllegalArgumentException("Offset cannot be less than 0.");
            }
            System.out.println("Getting file paths...");
            Path[] filePaths = SequenceFileUtility.getFilePaths(this.inputPathOrUri, "part");
            ExtractionState extractionState = new ExtractionState();
            extractionState.setMaxFileExtract(this.max);
            if (this.random >= 0) {
                System.out.println("Counting records");
                int i = 0;
                for (Path path : filePaths) {
                    System.out.println("... Counting from file: " + path);
                    i = (int) (i + new TextBytesSequenceFileUtility(path.toUri(), true).getNumberRecords());
                }
                System.out.println("Selecting random subset of " + this.random + " from " + i);
                extractionState.setRandomSelection(this.random, i);
            }
            ZipOutputStream openZipOutputStream = this.zipMode ? SequenceFileUtility.openZipOutputStream(this.outputPathOrUri) : null;
            for (Path path2 : filePaths) {
                System.out.println("Extracting from " + path2.getName());
                TextBytesSequenceFileUtility textBytesSequenceFileUtility = new TextBytesSequenceFileUtility(path2.toUri(), true);
                if (this.queryKey != null) {
                    if (this.zipMode) {
                        throw new UnsupportedOperationException(MoveCommands.MoveToLocal.DESCRIPTION);
                    }
                    if (!textBytesSequenceFileUtility.findAndExport(new Text(this.queryKey), this.outputPathOrUri, this.offset)) {
                        if (this.offset == 0) {
                            System.err.format("Key '%s' was not found in the file.\n", this.queryKey);
                        } else {
                            System.err.format("Key '%s' was not found in the file after offset %d.\n", this.queryKey, Long.valueOf(this.offset));
                        }
                    }
                } else if (this.zipMode) {
                    textBytesSequenceFileUtility.exportDataToZip(openZipOutputStream, this.np, extractionState, this.autoExtension, this.offset);
                } else {
                    textBytesSequenceFileUtility.exportData(this.outputPathOrUri, this.np, extractionState, this.autoExtension, this.offset);
                }
                if (extractionState.isFinished()) {
                    break;
                }
            }
            if (openZipOutputStream != null) {
                openZipOutputStream.close();
            }
        }
    }

    /* loaded from: input_file:org/openimaj/hadoop/tools/sequencefile/SequenceFileTool$InfoMode.class */
    private static class InfoMode extends ModeOp {

        @Option(name = "--options", aliases = {"-opts"}, required = false, usage = "Choose info type. Defaults to all.", multiValued = true)
        private List<InfoModeOptions> options;

        @Argument(required = true, usage = "Sequence file", metaVar = "input-path-or-uri")
        private String inputPathOrUri;

        private InfoMode() {
            super();
        }

        @Override // org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.ModeOp
        public void execute() throws Exception {
            TextBytesSequenceFileUtility textBytesSequenceFileUtility = new TextBytesSequenceFileUtility(this.inputPathOrUri, true);
            if (this.options == null) {
                this.options = new ArrayList();
                for (InfoModeOptions infoModeOptions : InfoModeOptions.values()) {
                    this.options.add(infoModeOptions);
                }
            }
            if (this.options.contains(InfoModeOptions.GUID) && !this.options.contains(InfoModeOptions.METADATA)) {
                System.out.println("UUID: " + textBytesSequenceFileUtility.getUUID());
            }
            if (this.options.contains(InfoModeOptions.METADATA)) {
                Map<Text, Text> metadata = textBytesSequenceFileUtility.getMetadata();
                System.out.println("Metadata:");
                for (Map.Entry<Text, Text> entry : metadata.entrySet()) {
                    System.out.println(entry.getKey() + ": " + entry.getValue());
                }
            }
            if (this.options.contains(InfoModeOptions.NRECORDS)) {
                System.out.println("NRecords: " + textBytesSequenceFileUtility.getNumberRecords());
            }
            if (this.options.contains(InfoModeOptions.COMPRESSION_CODEC)) {
                System.out.println("Compression codec: " + textBytesSequenceFileUtility.getCompressionCodecClass());
            }
            if (this.options.contains(InfoModeOptions.COMPRESSION_TYPE)) {
                System.out.println("Compression type: " + textBytesSequenceFileUtility.getCompressionType());
            }
        }
    }

    /* loaded from: input_file:org/openimaj/hadoop/tools/sequencefile/SequenceFileTool$InfoModeOptions.class */
    enum InfoModeOptions {
        GUID,
        METADATA,
        NRECORDS,
        COMPRESSION_CODEC,
        COMPRESSION_TYPE
    }

    /* loaded from: input_file:org/openimaj/hadoop/tools/sequencefile/SequenceFileTool$KeyNameStrategy.class */
    private enum KeyNameStrategy {
        MD5UUID { // from class: org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.KeyNameStrategy.1
            @Override // org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.KeyNameStrategy
            public SequenceFileUtility.KeyProvider<Text> getKeyProvider() {
                return new SequenceFileUtility.MD5UUIDKeyProvider();
            }
        },
        FILENAME { // from class: org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.KeyNameStrategy.2
            @Override // org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.KeyNameStrategy
            public SequenceFileUtility.KeyProvider<Text> getKeyProvider() {
                return new SequenceFileUtility.FilenameKeyProvider();
            }
        },
        RELATIVEPATH { // from class: org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.KeyNameStrategy.3
            @Override // org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.KeyNameStrategy
            public SequenceFileUtility.KeyProvider<Text> getKeyProvider() {
                return new SequenceFileUtility.RelativePathFilenameKeyProvider();
            }
        };

        public abstract SequenceFileUtility.KeyProvider<Text> getKeyProvider();
    }

    /* loaded from: input_file:org/openimaj/hadoop/tools/sequencefile/SequenceFileTool$ListMode.class */
    private static class ListMode extends ModeOp {

        @Option(name = "--print-offsets", aliases = {"-po"}, required = false, usage = "Also print the offset of each record")
        boolean printOffsets;

        @Option(name = "--options", aliases = {"-opts"}, required = false, usage = "Choose options to include per record in order.", multiValued = true)
        private final List<ListModeOptions> options;

        @Option(name = "--deliminator", aliases = {"-delim"}, required = false, usage = "Choose the per record options deliminator")
        private final String delim = " ";

        @Argument(required = true, usage = "Sequence file", metaVar = "input-path-or-uri")
        private String inputPathOrUri;

        private ListMode() {
            super();
            this.printOffsets = false;
            this.options = new ArrayList();
            this.delim = " ";
        }

        @Override // org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.ModeOp
        public void execute() throws IOException {
            for (Path path : SequenceFileUtility.getFilePaths(this.inputPathOrUri, "part")) {
                System.err.println("Outputting from seqfile: " + path);
                TextBytesSequenceFileUtility textBytesSequenceFileUtility = new TextBytesSequenceFileUtility(path.toUri(), true);
                if (this.options != null) {
                    textBytesSequenceFileUtility.extract(ListModeOptions.listOptionsToExtractPolicy(this.options), System.out, " ");
                } else if (this.printOffsets) {
                    for (Map.Entry<Text, Long> entry : textBytesSequenceFileUtility.listKeysAndOffsets().entrySet()) {
                        System.out.format("%10d %s\n", entry.getValue(), entry.getKey().toString());
                    }
                } else {
                    Iterator<Text> it = textBytesSequenceFileUtility.listKeys().iterator();
                    while (it.hasNext()) {
                        System.out.println(it.next().toString());
                    }
                }
            }
        }
    }

    /* loaded from: input_file:org/openimaj/hadoop/tools/sequencefile/SequenceFileTool$Mode.class */
    enum Mode implements CmdLineOptionsProvider {
        INFO { // from class: org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.Mode.1
            @Override // org.kohsuke.args4j.CmdLineOptionsProvider
            public Object getOptions() {
                return new InfoMode();
            }
        },
        CREATE { // from class: org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.Mode.2
            @Override // org.kohsuke.args4j.CmdLineOptionsProvider
            public Object getOptions() {
                return new CreateMode();
            }
        },
        EXTRACT { // from class: org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.Mode.3
            @Override // org.kohsuke.args4j.CmdLineOptionsProvider
            public Object getOptions() {
                return new ExtractMode();
            }
        },
        LIST { // from class: org.openimaj.hadoop.tools.sequencefile.SequenceFileTool.Mode.4
            @Override // org.kohsuke.args4j.CmdLineOptionsProvider
            public Object getOptions() {
                return new ListMode();
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/openimaj/hadoop/tools/sequencefile/SequenceFileTool$ModeOp.class */
    public static abstract class ModeOp {
        private ModeOp() {
        }

        public abstract void execute() throws Exception;
    }

    public void execute() throws Exception {
        this.modeOp.execute();
    }

    public static void main(String[] strArr) throws Exception {
        SequenceFileTool sequenceFileTool = new SequenceFileTool();
        CmdLineParser cmdLineParser = new CmdLineParser(sequenceFileTool);
        try {
            cmdLineParser.parseArgument(strArr);
            sequenceFileTool.execute();
        } catch (CmdLineException e) {
            System.err.println(e.getMessage());
            System.err.println("Usage: java -jar SequenceFileTool.jar [options...]");
            cmdLineParser.printUsage(System.err);
            if (sequenceFileTool.mode == null) {
                for (Mode mode : Mode.values()) {
                    System.err.println();
                    System.err.println(mode + " options: ");
                    new CmdLineParser(mode.getOptions()).printUsage(System.err);
                }
            }
        }
    }
}
