package org.openimaj.hadoop.tools.twitter.token.outputmode.sparsecsv;

import com.Ostermiller.util.CSVParser;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.openimaj.hadoop.mapreduce.stage.StageProvider;
import org.openimaj.hadoop.mapreduce.stage.helper.SequenceFileTextStage;
import org.openimaj.hadoop.mapreduce.stage.helper.SimpleSequenceFileTextStage;
import org.openimaj.hadoop.tools.HadoopToolsUtil;
import org.openimaj.hadoop.tools.twitter.utils.WordDFIDF;
import org.openimaj.hadoop.tools.twitter.utils.WordDFIDFTimeSeries;
import org.openimaj.util.pair.IndependentPair;

/* loaded from: input_file:org/openimaj/hadoop/tools/twitter/token/outputmode/sparsecsv/Values.class */
public class Values extends StageProvider {
    private String outputPath;
    private int valueReduceSplit;
    private boolean sortValueByTime;
    private boolean matlabOutput;
    public static final String ARGS_KEY = "INDEX_ARGS";
    public static final String MATLAB_OUT = "org.openimaj.hadoop.tools.twitter.token.outputmode.sparsecsv.matlab_out";

    public Values(String str, int i, boolean z, boolean z2) {
        this.outputPath = str;
        this.valueReduceSplit = i;
        this.sortValueByTime = z;
        this.matlabOutput = z2;
    }

    /* renamed from: stage, reason: merged with bridge method [inline-methods] */
    public SequenceFileTextStage<?, ?, ?, ?, ?, ?> m23stage() {
        return this.sortValueByTime ? new SequenceFileTextStage<Text, BytesWritable, LongWritable, BytesWritable, NullWritable, Text>() { // from class: org.openimaj.hadoop.tools.twitter.token.outputmode.sparsecsv.Values.1
            public void setup(Job job) {
                job.setNumReduceTasks(Values.this.valueReduceSplit);
                job.getConfiguration().setStrings(Values.ARGS_KEY, new String[]{Values.this.outputPath.toString()});
                job.getConfiguration().setBoolean(Values.MATLAB_OUT, Values.this.matlabOutput);
            }

            public Class<? extends Mapper<Text, BytesWritable, LongWritable, BytesWritable>> mapper() {
                return MapValuesByTime.class;
            }

            public Class<? extends Reducer<LongWritable, BytesWritable, NullWritable, Text>> reducer() {
                return ReduceValuesByTime.class;
            }

            public String outname() {
                return "values";
            }

            public void finished(Job job) {
                if (Values.this.matlabOutput) {
                    try {
                        WordIndex.writeToMatlab(Values.this.outputPath.toString());
                        TimeIndex.writeToMatlab(Values.this.outputPath.toString());
                        System.out.println("Done writing the word and time index files to matlab");
                    } catch (IOException e) {
                        System.out.println("Failed to write the word and time index files");
                    }
                }
            }
        } : new SimpleSequenceFileTextStage<Text, BytesWritable, NullWritable, Text>() { // from class: org.openimaj.hadoop.tools.twitter.token.outputmode.sparsecsv.Values.2
            public void setup(Job job) {
                job.setNumReduceTasks(Values.this.valueReduceSplit);
                job.getConfiguration().setStrings(Values.ARGS_KEY, new String[]{Values.this.outputPath.toString()});
            }

            public Class<? extends Mapper<Text, BytesWritable, NullWritable, Text>> mapper() {
                return MapValuesByWord.class;
            }

            public Class<? extends Reducer<NullWritable, Text, NullWritable, Text>> reducer() {
                return ReduceValuesByWord.class;
            }

            public String outname() {
                return "values";
            }
        };
    }

    public static LinkedHashMap<String, WordDFIDFTimeSeries> readWordDFIDF(String str, LinkedHashMap<Long, IndependentPair<Long, Long>> linkedHashMap, LinkedHashMap<String, IndependentPair<Long, Long>> linkedHashMap2) throws IOException {
        LinkedHashMap<String, WordDFIDFTimeSeries> linkedHashMap3 = new LinkedHashMap<>();
        long[] jArr = new long[linkedHashMap.size()];
        for (Map.Entry<Long, IndependentPair<Long, Long>> entry : linkedHashMap.entrySet()) {
            jArr[(int) ((Long) entry.getValue().secondObject()).longValue()] = entry.getKey().longValue();
        }
        String[] strArr = new String[linkedHashMap2.size()];
        for (Map.Entry<String, IndependentPair<Long, Long>> entry2 : linkedHashMap2.entrySet()) {
            strArr[(int) ((Long) entry2.getValue().secondObject()).longValue()] = entry2.getKey();
        }
        Path path = HadoopToolsUtil.getInputPaths(str + "/values")[0];
        CSVParser cSVParser = new CSVParser(new BufferedReader(new InputStreamReader(HadoopToolsUtil.getFileSystem(path).open(path))));
        long j = 0;
        while (true) {
            String[] line = cSVParser.getLine();
            if (line == null || line.length <= 0) {
                break;
            }
            int parseInt = Integer.parseInt(line[0]);
            int parseInt2 = Integer.parseInt(line[1]);
            long parseLong = Long.parseLong(line[2]);
            long parseLong2 = Long.parseLong(line[3]);
            long parseLong3 = Long.parseLong(line[4]);
            long parseLong4 = Long.parseLong(line[5]);
            long j2 = jArr[parseInt2];
            WordDFIDF wordDFIDF = new WordDFIDF(j2, parseLong, parseLong2, parseLong3, parseLong4);
            String str2 = strArr[parseInt];
            WordDFIDFTimeSeries wordDFIDFTimeSeries = linkedHashMap3.get(str2);
            if (wordDFIDFTimeSeries == null) {
                WordDFIDFTimeSeries wordDFIDFTimeSeries2 = new WordDFIDFTimeSeries();
                wordDFIDFTimeSeries = wordDFIDFTimeSeries2;
                linkedHashMap3.put(str2, wordDFIDFTimeSeries2);
            }
            wordDFIDFTimeSeries.add(j2, wordDFIDF);
            j++;
        }
        return linkedHashMap3;
    }
}
