package org.openimaj.hadoop.tools.downloader;

import java.io.IOException;
import java.net.URL;
import java.util.List;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger;
import org.openimaj.hadoop.tools.downloader.InputMode;
import org.openimaj.io.HttpUtils;
import org.openimaj.util.pair.IndependentPair;

/* loaded from: input_file:org/openimaj/hadoop/tools/downloader/DownloadMapper.class */
public class DownloadMapper extends Mapper<LongWritable, Text, Text, BytesWritable> {
    private InputMode.Parser parser;
    private long sleep;
    private boolean followRedirects;
    private static Logger logger = Logger.getLogger(DownloadMapper.class);
    private static FSDataOutputStream failureWriter = null;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/openimaj/hadoop/tools/downloader/DownloadMapper$Counters.class */
    public enum Counters {
        DOWNLOADED,
        FAILED,
        PARSE_ERROR
    }

    protected void setup(Mapper<LongWritable, Text, Text, BytesWritable>.Context context) throws IOException, InterruptedException {
        HadoopDownloaderOptions hadoopDownloaderOptions = new HadoopDownloaderOptions(context.getConfiguration().getStrings("hadoop.downloader.args"));
        hadoopDownloaderOptions.prepare(false);
        this.parser = hadoopDownloaderOptions.getInputParser();
        this.sleep = hadoopDownloaderOptions.getSleep();
        this.followRedirects = hadoopDownloaderOptions.followRedirects();
        synchronized (DownloadMapper.class) {
            if (hadoopDownloaderOptions.writeFailures() && failureWriter != null) {
                Path suffix = FileOutputFormat.getWorkOutputPath(context).suffix("/failures-" + context.getConfiguration().get("mapred.task.id").split("_")[4].substring(1));
                failureWriter = suffix.getFileSystem(context.getConfiguration()).create(suffix);
            }
        }
    }

    protected void cleanup(Mapper<LongWritable, Text, Text, BytesWritable>.Context context) throws IOException, InterruptedException {
        if (failureWriter != null) {
            failureWriter.close();
            failureWriter = null;
        }
        super.cleanup(context);
    }

    public void map(LongWritable longWritable, Text text, Mapper<LongWritable, Text, Text, BytesWritable>.Context context) {
        IndependentPair<String, List<URL>> parse;
        logger.info("Attempting to download: " + text);
        try {
            parse = this.parser.parse(text.toString());
        } catch (Exception e) {
            logger.info("Error parsing: " + text);
            logger.trace(e);
            context.getCounter(Counters.PARSE_ERROR).increment(1L);
            writeFailure(text, context);
        }
        if (parse == null) {
            logger.trace("parser returned null; record skipped.");
            return;
        }
        boolean z = false;
        for (URL url : (List) parse.secondObject()) {
            z = tryDownload((String) parse.firstObject(), url, context);
            if (z) {
                logger.info("Dowloaded: " + url);
                context.getCounter(Counters.DOWNLOADED).increment(1L);
                return;
            }
            logger.trace("Not found; trying next");
        }
        if (z) {
            context.getCounter(Counters.DOWNLOADED).increment(1L);
        } else {
            logger.info("Failed to download: " + text);
            context.getCounter(Counters.FAILED).increment(1L);
            writeFailure(text, context);
        }
        if (this.sleep > 0) {
            try {
                logger.trace("Waiting before continuing");
                Thread.sleep(this.sleep);
            } catch (InterruptedException e2) {
                logger.trace("Wait was interupted; ignoring");
            }
        }
    }

    private static synchronized void writeFailure(Text text, Mapper<LongWritable, Text, Text, BytesWritable>.Context context) {
        if (failureWriter != null) {
            try {
                failureWriter.writeUTF(text + "\n");
            } catch (IOException e) {
                logger.error(e);
            }
        }
    }

    private boolean tryDownload(String str, URL url, Mapper<LongWritable, Text, Text, BytesWritable>.Context context) throws InterruptedException {
        try {
            byte[] readURLAsBytes = HttpUtils.readURLAsBytes(url, this.followRedirects);
            if (readURLAsBytes == null) {
                return false;
            }
            context.write(new Text(str), new BytesWritable(readURLAsBytes));
            return true;
        } catch (IOException e) {
            logger.trace(e);
            return false;
        }
    }

    public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
        map((LongWritable) obj, (Text) obj2, (Mapper<LongWritable, Text, Text, BytesWritable>.Context) context);
    }
}
