package org.openimaj.picslurper;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.ProtocolException;
import org.apache.http.protocol.HttpContext;
import org.apache.log4j.Logger;
import org.openimaj.image.ImageUtilities;
import org.openimaj.image.MBFImage;
import org.openimaj.io.HttpUtils;
import org.openimaj.picslurper.output.OutputListener;
import org.openimaj.picslurper.output.WriteableImageOutput;
import org.openimaj.text.nlp.patterns.URLPatternProvider;
import org.openimaj.util.pair.IndependentPair;
import org.openimaj.web.scraping.SiteSpecificConsumer;
import org.openimaj.web.scraping.images.CommonHTMLConsumers;
import org.openimaj.web.scraping.images.FacebookConsumer;
import org.openimaj.web.scraping.images.ImgurConsumer;
import org.openimaj.web.scraping.images.InstagramConsumer;
import org.openimaj.web.scraping.images.OwlyImageConsumer;
import org.openimaj.web.scraping.images.TwipleConsumer;
import org.openimaj.web.scraping.images.TwitPicConsumer;
import org.openimaj.web.scraping.images.TwitterPhotoConsumer;
import org.openimaj.web.scraping.images.YfrogConsumer;
import twitter4j.Status;
import twitter4j.URLEntity;

/* loaded from: input_file:org/openimaj/picslurper/StatusConsumer.class */
public class StatusConsumer {
    public static Logger logger = Logger.getLogger(StatusConsumer.class);
    static final Pattern urlPattern = new URLPatternProvider().pattern();
    public static final List<SiteSpecificConsumer> siteSpecific = new ArrayList();
    private boolean outputStats;
    private File globalStats;
    private File outputLocation;
    private final Set<String> toProcess;
    private final HashSet<String> previouslySeen;
    private List<OutputListener> outputModes;

    /* loaded from: input_file:org/openimaj/picslurper/StatusConsumer$LoggingStatus.class */
    class LoggingStatus {
        List<String> strings = new ArrayList();

        LoggingStatus() {
        }
    }

    /* loaded from: input_file:org/openimaj/picslurper/StatusConsumer$StatusConsumerRedirectStrategy.class */
    public static class StatusConsumerRedirectStrategy extends HttpUtils.MetaRefreshRedirectStrategy {
        private boolean wasRedirected = false;
        private URL redirection;

        public boolean isRedirected(HttpRequest httpRequest, HttpResponse httpResponse, HttpContext httpContext) throws ProtocolException {
            this.wasRedirected = super.isRedirected(httpRequest, httpResponse, httpContext);
            if (!this.wasRedirected) {
                return false;
            }
            try {
                this.redirection = getRedirect(httpRequest, httpResponse, httpContext).getURI().toURL();
                return false;
            } catch (MalformedURLException e) {
                this.wasRedirected = false;
                return false;
            }
        }

        public boolean wasRedirected() {
            return this.wasRedirected;
        }

        public URL redirection() {
            return this.redirection;
        }
    }

    public StatusConsumer(boolean z, File file, File file2, List<OutputListener> list) {
        this();
        this.outputStats = z;
        this.globalStats = file;
        this.outputLocation = file2;
        this.outputModes = list;
    }

    public StatusConsumer() {
        this.previouslySeen = new HashSet<>();
        this.toProcess = new HashSet();
    }

    public StatusConsumption consume(Status status) throws Exception {
        String str;
        if (status.getURLEntities() != null) {
            for (URLEntity uRLEntity : status.getURLEntities()) {
                String expandedURL = uRLEntity.getExpandedURL();
                if (expandedURL == null) {
                    expandedURL = uRLEntity.getURL();
                }
                if (expandedURL != null && (str = expandedURL.toString()) != null) {
                    add(str);
                }
            }
        }
        String text = status.getText();
        if (text != null) {
            Matcher matcher = urlPattern.matcher(text);
            while (matcher.find()) {
                add(text.substring(matcher.start(), matcher.end()));
            }
        }
        StatusConsumption processAll = processAll(status);
        if (this.outputStats) {
            PicSlurperUtils.updateStats(this.globalStats, processAll, true);
        }
        return processAll;
    }

    public StatusConsumption processAll(Status status) throws IOException {
        StatusConsumption statusConsumption = new StatusConsumption();
        statusConsumption.nTweets = 1;
        statusConsumption.nURLs = 0;
        while (this.toProcess.size() > 0) {
            String next = this.toProcess.iterator().next();
            this.toProcess.remove(next);
            statusConsumption.nURLs++;
            File resolveURL = resolveURL(new URL(next), statusConsumption);
            if (resolveURL != null) {
                PicSlurperUtils.updateStats(new File(resolveURL, "status.txt"), statusConsumption);
                PicSlurperUtils.updateTweets(resolveURL, status);
                Iterator<OutputListener> it = this.outputModes.iterator();
                while (it.hasNext()) {
                    it.next().newImageDownloaded(new WriteableImageOutput(status, new URL(next), resolveURL, statusConsumption));
                }
            }
        }
        return statusConsumption;
    }

    public void add(String str) {
        boolean z = true;
        Iterator<String> it = this.previouslySeen.iterator();
        while (it.hasNext()) {
            String next = it.next();
            if (next.startsWith(str) || str.startsWith(next) || str.equals(next)) {
                z = false;
                break;
            }
        }
        if (!z) {
            logger.debug("URL not added, already exists: " + str);
            return;
        }
        logger.debug("New URL added to list: " + str);
        this.toProcess.add(str);
        this.previouslySeen.add(str);
    }

    public File resolveURL(URL url, StatusConsumption statusConsumption) {
        List<IndependentPair<URL, MBFImage>> urlToImage = urlToImage(url);
        if (urlToImage == null) {
            return null;
        }
        try {
            if (this.outputLocation == null) {
                return null;
            }
            File urlToOutput = urlToOutput(url, this.outputLocation);
            statusConsumption.nTweets++;
            int i = 0;
            for (IndependentPair<URL, MBFImage> independentPair : urlToImage) {
                URL url2 = (URL) independentPair.firstObject();
                MBFImage mBFImage = (MBFImage) independentPair.secondObject();
                if (mBFImage == null) {
                    logger.debug("Downloading a raw GIF");
                    int i2 = i;
                    i++;
                    FileUtils.writeByteArrayToFile(new File(urlToOutput, String.format("image_%d.gif", Integer.valueOf(i2))), HttpUtils.readURLAsBytes(url2, false));
                } else {
                    logger.debug("Downloading a normal image");
                    int i3 = i;
                    i++;
                    ImageUtilities.write(mBFImage, new File(urlToOutput, String.format("image_%d.png", Integer.valueOf(i3))));
                }
                statusConsumption.nImages++;
                statusConsumption.imageURLs.add(url2);
            }
            return urlToOutput;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    public List<IndependentPair<URL, MBFImage>> urlToImage(URL url) {
        logger.debug("Resolving URL: " + url);
        logger.debug("Attempting site specific consumers");
        for (SiteSpecificConsumer siteSpecificConsumer : siteSpecific) {
            if (siteSpecificConsumer.canConsume(url)) {
                logger.debug("Site specific consumer: " + siteSpecificConsumer.getClass().getName() + " working on link");
                List consume = siteSpecificConsumer.consume(url);
                if (consume != null && !consume.isEmpty()) {
                    logger.debug("Site specific consumer returned non-null, adding the URLs");
                    Iterator it = consume.iterator();
                    while (it.hasNext()) {
                        add(((URL) it.next()).toString());
                    }
                    return null;
                }
            }
        }
        try {
            logger.debug("Site specific consumers failed, trying the raw link");
            StatusConsumerRedirectStrategy statusConsumerRedirectStrategy = new StatusConsumerRedirectStrategy();
            IndependentPair readURLAsByteArrayInputStream = HttpUtils.readURLAsByteArrayInputStream(url, 1000, 1000, statusConsumerRedirectStrategy, "Mozilla/5.0 (Windows; U; Windows NT 6.0; ru; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)");
            if (statusConsumerRedirectStrategy.wasRedirected()) {
                logger.debug("Redirect intercepted, adding redirection to list");
                String url2 = statusConsumerRedirectStrategy.redirection().toString();
                if (url2.equals(url.toString())) {
                    return null;
                }
                add(url2);
                return null;
            }
            HttpEntity httpEntity = (HttpEntity) readURLAsByteArrayInputStream.firstObject();
            ByteArrayInputStream byteArrayInputStream = (ByteArrayInputStream) readURLAsByteArrayInputStream.getSecondObject();
            String value = httpEntity.getContentType().getValue();
            if (value.contains("text")) {
                reportFailedURL(url, "text content");
                return null;
            }
            List<IndependentPair<URL, MBFImage>> asList = Arrays.asList(IndependentPair.pair(url, value.contains("gif") ? null : ImageUtilities.readMBF(byteArrayInputStream)));
            logger.debug("Link resolved, returning image.");
            return asList;
        } catch (Throwable th) {
            reportFailedURL(url, th.getMessage());
            return null;
        }
    }

    private void reportFailedURL(URL url, String str) {
        if (this.outputModes != null) {
            Iterator<OutputListener> it = this.outputModes.iterator();
            while (it.hasNext()) {
                it.next().failedURL(url, str);
            }
        }
    }

    public static synchronized File urlToOutput(URL url, File file) throws IOException {
        String str = url.getProtocol() + File.separator + url.getHost() + File.separator;
        if (!url.getPath().equals("")) {
            str = str + sanitizeFilename(url.getPath()) + File.separator;
        }
        if (url.getQuery() != null) {
            str = str + sanitizeFilename(url.getQuery()) + File.separator;
        }
        File file2 = new File(file.getAbsolutePath() + File.separator + str);
        if (!file2.exists()) {
            createURLOutDir(file2);
        } else {
            if (file2.isDirectory()) {
                return file2;
            }
            createURLOutDir(file2);
        }
        return file2;
    }

    public static String sanitizeFilename(String str) {
        return str.replaceAll("[:\\\\/*?|<>]", "_");
    }

    static void createURLOutDir(File file) throws IOException {
        if ((file.exists() && !file.delete()) || !file.mkdirs()) {
            throw new IOException("Couldn't create URL output: " + file.getAbsolutePath());
        }
    }

    static {
        siteSpecific.add(new InstagramConsumer());
        siteSpecific.add(new TwitterPhotoConsumer());
        siteSpecific.add(new TwitPicConsumer());
        siteSpecific.add(new ImgurConsumer());
        siteSpecific.add(new FacebookConsumer());
        siteSpecific.add(new YfrogConsumer());
        siteSpecific.add(new OwlyImageConsumer());
        siteSpecific.add(new TwipleConsumer());
        siteSpecific.add(CommonHTMLConsumers.FOTOLOG);
        siteSpecific.add(CommonHTMLConsumers.PHOTONUI);
        siteSpecific.add(CommonHTMLConsumers.PICS_LOCKERZ);
    }
}
