package org.openimaj.hadoop.tools.twitter.token.mode.dfidf;

import gnu.trove.procedure.TObjectIntProcedure;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.TreeSet;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.kohsuke.args4j.CmdLineException;
import org.openimaj.hadoop.mapreduce.stage.StageProvider;
import org.openimaj.hadoop.mapreduce.stage.helper.SimpleSequenceFileStage;
import org.openimaj.hadoop.tools.HadoopToolsUtil;
import org.openimaj.hadoop.tools.twitter.HadoopTwitterTokenToolOptions;
import org.openimaj.hadoop.tools.twitter.token.mode.TextEntryType;
import org.openimaj.hadoop.tools.twitter.token.mode.WritableEnumCounter;
import org.openimaj.hadoop.tools.twitter.token.mode.dfidf.TimeFrequencyHolder;
import org.openimaj.hadoop.tools.twitter.utils.TimeperiodTweetCountWordCount;
import org.openimaj.hadoop.tools.twitter.utils.TweetCountWordMap;
import org.openimaj.hadoop.tools.twitter.utils.WordDFIDF;
import org.openimaj.io.IOUtils;
import org.openimaj.io.wrappers.WriteableListBinary;

/* loaded from: input_file:org/openimaj/hadoop/tools/twitter/token/mode/dfidf/CountWordsAcrossTimeperiod.class */
public class CountWordsAcrossTimeperiod extends StageProvider {
    private String[] nonHadoopArgs;
    private boolean combinedTimes;
    public static final String ARGS_KEY = "TOKEN_ARGS";
    private static final LongWritable END_TIME = new LongWritable(-1);
    public static final String WORDCOUNT_DIR = "wordtimeperiodDFIDF";

    /* loaded from: input_file:org/openimaj/hadoop/tools/twitter/token/mode/dfidf/CountWordsAcrossTimeperiod$Map.class */
    public static class Map extends Mapper<LongWritable, BytesWritable, Text, BytesWritable> {
        private static HadoopTwitterTokenToolOptions options;

        protected static synchronized void loadOptions(Mapper<LongWritable, BytesWritable, Text, BytesWritable>.Context context) throws IOException {
            if (options == null) {
                try {
                    options = new HadoopTwitterTokenToolOptions(context.getConfiguration().getStrings("TOKEN_ARGS"));
                    options.prepare();
                } catch (Exception e) {
                    throw new IOException(e);
                } catch (CmdLineException e2) {
                    throw new IOException((Throwable) e2);
                }
            }
        }

        protected void setup(Mapper<LongWritable, BytesWritable, Text, BytesWritable>.Context context) throws IOException, InterruptedException {
            loadOptions(context);
        }

        protected void map(final LongWritable longWritable, BytesWritable bytesWritable, final Mapper<LongWritable, BytesWritable, Text, BytesWritable>.Context context) throws IOException, InterruptedException {
            final TweetCountWordMap read = IOUtils.read(new ByteArrayInputStream(bytesWritable.getBytes()), TweetCountWordMap.class);
            if (!read.getTweetWordMap().forEachEntry(new TObjectIntProcedure<String>() { // from class: org.openimaj.hadoop.tools.twitter.token.mode.dfidf.CountWordsAcrossTimeperiod.Map.1
                public boolean execute(String str, int i) {
                    TimeperiodTweetCountWordCount timeperiodTweetCountWordCount = new TimeperiodTweetCountWordCount(longWritable.get(), i, read.getNTweets());
                    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
                    try {
                        IOUtils.writeBinary(byteArrayOutputStream, timeperiodTweetCountWordCount);
                        context.write(new Text(str), new BytesWritable(byteArrayOutputStream.toByteArray()));
                        return true;
                    } catch (IOException e) {
                        return false;
                    } catch (InterruptedException e2) {
                        return false;
                    }
                }
            })) {
                throw new IOException("Couldn't write the TimeperiodTweetCountWordCount object");
            }
        }

        protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
            map((LongWritable) obj, (BytesWritable) obj2, (Mapper<LongWritable, BytesWritable, Text, BytesWritable>.Context) context);
        }
    }

    /* loaded from: input_file:org/openimaj/hadoop/tools/twitter/token/mode/dfidf/CountWordsAcrossTimeperiod$NonCombinedTimesReducer.class */
    public static class NonCombinedTimesReducer extends Reducer<Text, BytesWritable, Text, BytesWritable> {
        private HadoopTwitterTokenToolOptions options;
        private WritableEnumCounter tgs;
        private TimeFrequencyHolder timeIndex;

        protected synchronized void loadOptions(Reducer<Text, BytesWritable, Text, BytesWritable>.Context context) throws IOException {
            try {
                this.options = new HadoopTwitterTokenToolOptions(context.getConfiguration().getStrings("TOKEN_ARGS"));
                this.options.prepare();
                Path outputPath = HadoopToolsUtil.getOutputPath(this.options);
                this.timeIndex = CountTweetsInTimeperiod.readTimeIndex(CountTweetsInTimeperiod.constructIndexPath(outputPath));
                Path path = new Path(new Path(outputPath, CountTweetsInTimeperiod.TIMECOUNT_DIR), CountTweetsInTimeperiod.GLOBAL_STATS_FILE);
                FileSystem fileSystem = HadoopToolsUtil.getFileSystem(path);
                this.tgs = IOUtils.read(fileSystem.open(path), new WritableEnumCounter<TextEntryType>() { // from class: org.openimaj.hadoop.tools.twitter.token.mode.dfidf.CountWordsAcrossTimeperiod.NonCombinedTimesReducer.1
                    /* JADX WARN: Can't rename method to resolve collision */
                    @Override // org.openimaj.hadoop.tools.twitter.token.mode.WritableEnumCounter
                    public TextEntryType valueOf(String str) {
                        return TextEntryType.valueOf(str);
                    }
                });
            } catch (CmdLineException e) {
                e.printStackTrace();
                throw new IOException((Throwable) e);
            } catch (Exception e2) {
                e2.printStackTrace();
                throw new IOException(e2);
            }
        }

        protected void setup(Reducer<Text, BytesWritable, Text, BytesWritable>.Context context) throws IOException, InterruptedException {
            loadOptions(context);
        }

        protected void reduce(Text text, Iterable<BytesWritable> iterable, Reducer<Text, BytesWritable, Text, BytesWritable>.Context context) throws IOException, InterruptedException {
            TimeperiodTweetCountWordCount timeperiodTweetCountWordCount = null;
            TreeSet treeSet = new TreeSet();
            HashMap hashMap = new HashMap();
            System.out.println("STARTING WORD: " + text);
            Iterator<BytesWritable> it = iterable.iterator();
            while (it.hasNext()) {
                TimeperiodTweetCountWordCount timeperiodTweetCountWordCount2 = (TimeperiodTweetCountWordCount) IOUtils.read(new ByteArrayInputStream(it.next().getBytes()), TimeperiodTweetCountWordCount.class);
                System.out.println("... FOUND TIME INSTANCE:" + timeperiodTweetCountWordCount2.timeperiod);
                if (timeperiodTweetCountWordCount2.timeperiod != CountWordsAcrossTimeperiod.END_TIME.get()) {
                    treeSet.add(Long.valueOf(timeperiodTweetCountWordCount2.timeperiod));
                    TimeperiodTweetCountWordCount timeperiodTweetCountWordCount3 = (TimeperiodTweetCountWordCount) hashMap.get(Long.valueOf(timeperiodTweetCountWordCount2.timeperiod));
                    System.out.println("Instance tweet count: " + timeperiodTweetCountWordCount2.tweetcount);
                    System.out.println("Instance word count: " + timeperiodTweetCountWordCount2.wordcount);
                    if (timeperiodTweetCountWordCount3 == null) {
                        System.out.println("... time CREATED");
                        hashMap.put(Long.valueOf(timeperiodTweetCountWordCount2.timeperiod), timeperiodTweetCountWordCount2);
                    } else {
                        System.out.println("... incremented time CREATED");
                        timeperiodTweetCountWordCount3.tweetcount += timeperiodTweetCountWordCount2.tweetcount;
                        timeperiodTweetCountWordCount3.wordcount += timeperiodTweetCountWordCount2.wordcount;
                    }
                } else if (timeperiodTweetCountWordCount == null) {
                    timeperiodTweetCountWordCount = timeperiodTweetCountWordCount2;
                    timeperiodTweetCountWordCount.tweetcount = this.tgs.getValue(TextEntryType.VALID);
                } else {
                    timeperiodTweetCountWordCount.wordcount += timeperiodTweetCountWordCount2.wordcount;
                }
            }
            long j = 0;
            TreeSet treeSet2 = new TreeSet();
            Iterator it2 = treeSet.iterator();
            while (it2.hasNext()) {
                Long l = (Long) it2.next();
                TimeperiodTweetCountWordCount timeperiodTweetCountWordCount4 = (TimeperiodTweetCountWordCount) hashMap.get(l);
                TimeFrequencyHolder.TimeFrequency timeFrequency = (TimeFrequencyHolder.TimeFrequency) this.timeIndex.get(l.longValue());
                long j2 = timeFrequency.cumulativeFrequency;
                long j3 = timeFrequency.periodFrequency;
                long j4 = timeperiodTweetCountWordCount4.wordcount;
                j += j4;
                treeSet2.add(new WordDFIDF(timeperiodTweetCountWordCount4.timeperiod, j4, j3, j, j2));
            }
            ArrayList arrayList = new ArrayList();
            arrayList.addAll(treeSet2);
            context.write(text, new BytesWritable(IOUtils.serialize(new WriteableListBinary<WordDFIDF>(arrayList) { // from class: org.openimaj.hadoop.tools.twitter.token.mode.dfidf.CountWordsAcrossTimeperiod.NonCombinedTimesReducer.2
                /* JADX INFO: Access modifiers changed from: protected */
                public void writeValue(WordDFIDF wordDFIDF, DataOutput dataOutput) throws IOException {
                    wordDFIDF.writeBinary(dataOutput);
                }
            })));
        }

        protected /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((Text) obj, (Iterable<BytesWritable>) iterable, (Reducer<Text, BytesWritable, Text, BytesWritable>.Context) context);
        }
    }

    /* loaded from: input_file:org/openimaj/hadoop/tools/twitter/token/mode/dfidf/CountWordsAcrossTimeperiod$Reduce.class */
    public static class Reduce extends Reducer<Text, BytesWritable, Text, BytesWritable> {
        protected void reduce(Text text, Iterable<BytesWritable> iterable, Reducer<Text, BytesWritable, Text, BytesWritable>.Context context) throws IOException, InterruptedException {
            TimeperiodTweetCountWordCount timeperiodTweetCountWordCount = null;
            ArrayList<TimeperiodTweetCountWordCount> arrayList = new ArrayList();
            Iterator<BytesWritable> it = iterable.iterator();
            while (it.hasNext()) {
                TimeperiodTweetCountWordCount timeperiodTweetCountWordCount2 = (TimeperiodTweetCountWordCount) IOUtils.read(new ByteArrayInputStream(it.next().getBytes()), TimeperiodTweetCountWordCount.class);
                if (timeperiodTweetCountWordCount2.timeperiod == CountWordsAcrossTimeperiod.END_TIME.get()) {
                    timeperiodTweetCountWordCount = timeperiodTweetCountWordCount2;
                } else {
                    arrayList.add(timeperiodTweetCountWordCount2);
                }
            }
            long j = timeperiodTweetCountWordCount.tweetcount;
            long j2 = timeperiodTweetCountWordCount.wordcount;
            TreeSet treeSet = new TreeSet();
            for (TimeperiodTweetCountWordCount timeperiodTweetCountWordCount3 : arrayList) {
                treeSet.add(new WordDFIDF(timeperiodTweetCountWordCount3.timeperiod, timeperiodTweetCountWordCount3.wordcount, timeperiodTweetCountWordCount3.tweetcount, j2, j));
            }
            ArrayList arrayList2 = new ArrayList();
            arrayList2.addAll(treeSet);
            context.write(text, new BytesWritable(IOUtils.serialize(new WriteableListBinary<WordDFIDF>(arrayList2) { // from class: org.openimaj.hadoop.tools.twitter.token.mode.dfidf.CountWordsAcrossTimeperiod.Reduce.1
                /* JADX INFO: Access modifiers changed from: protected */
                public void writeValue(WordDFIDF wordDFIDF, DataOutput dataOutput) throws IOException {
                    wordDFIDF.writeBinary(dataOutput);
                }
            })));
        }

        protected /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((Text) obj, (Iterable<BytesWritable>) iterable, (Reducer<Text, BytesWritable, Text, BytesWritable>.Context) context);
        }
    }

    public CountWordsAcrossTimeperiod(String[] strArr) {
        this.combinedTimes = false;
        this.nonHadoopArgs = strArr;
    }

    public CountWordsAcrossTimeperiod(String[] strArr, boolean z) {
        this.combinedTimes = false;
        this.nonHadoopArgs = strArr;
        this.combinedTimes = z;
    }

    /* renamed from: stage, reason: merged with bridge method [inline-methods] */
    public SimpleSequenceFileStage<LongWritable, BytesWritable, Text, BytesWritable> m8stage() {
        return new SimpleSequenceFileStage<LongWritable, BytesWritable, Text, BytesWritable>() { // from class: org.openimaj.hadoop.tools.twitter.token.mode.dfidf.CountWordsAcrossTimeperiod.1
            public void setup(Job job) {
                job.getConfiguration().setStrings("TOKEN_ARGS", CountWordsAcrossTimeperiod.this.nonHadoopArgs);
                if (CountWordsAcrossTimeperiod.this.combinedTimes) {
                    return;
                }
                job.setNumReduceTasks(26);
            }

            public Class<? extends Mapper<LongWritable, BytesWritable, Text, BytesWritable>> mapper() {
                return Map.class;
            }

            public Class<? extends Reducer<Text, BytesWritable, Text, BytesWritable>> reducer() {
                return CountWordsAcrossTimeperiod.this.combinedTimes ? Reduce.class : NonCombinedTimesReducer.class;
            }

            public String outname() {
                return CountWordsAcrossTimeperiod.WORDCOUNT_DIR;
            }
        };
    }
}
