package org.openimaj.stream.functions;

import com.google.common.collect.Lists;
import java.io.ByteArrayInputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.List;
import org.apache.http.HttpEntity;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.ProtocolException;
import org.apache.http.protocol.HttpContext;
import org.apache.log4j.Logger;
import org.openimaj.image.ImageUtilities;
import org.openimaj.io.HttpUtils;
import org.openimaj.util.pair.IndependentPair;
import org.openimaj.web.scraping.SiteSpecificConsumer;
import org.openimaj.web.scraping.images.CommonHTMLConsumers;
import org.openimaj.web.scraping.images.FacebookConsumer;
import org.openimaj.web.scraping.images.ImgurConsumer;
import org.openimaj.web.scraping.images.InstagramConsumer;
import org.openimaj.web.scraping.images.OwlyImageConsumer;
import org.openimaj.web.scraping.images.TmblrPhotoConsumer;
import org.openimaj.web.scraping.images.TwipleConsumer;
import org.openimaj.web.scraping.images.TwitPicConsumer;
import org.openimaj.web.scraping.images.TwitterPhotoConsumer;
import org.openimaj.web.scraping.images.YfrogConsumer;

/* loaded from: input_file:org/openimaj/stream/functions/ImageSiteURLExtractor.class */
public class ImageSiteURLExtractor extends SiteSpecificURLExtractor {
    private static final Logger logger = Logger.getLogger(ImageSiteURLExtractor.class);

    /* loaded from: input_file:org/openimaj/stream/functions/ImageSiteURLExtractor$StatusConsumerRedirectStrategy.class */
    private static class StatusConsumerRedirectStrategy extends HttpUtils.MetaRefreshRedirectStrategy {
        private boolean wasRedirected;
        private URL redirection;

        private StatusConsumerRedirectStrategy() {
            this.wasRedirected = false;
        }

        public boolean isRedirected(HttpRequest httpRequest, HttpResponse httpResponse, HttpContext httpContext) throws ProtocolException {
            this.wasRedirected = super.isRedirected(httpRequest, httpResponse, httpContext);
            if (!this.wasRedirected) {
                return false;
            }
            try {
                this.redirection = getRedirect(httpRequest, httpResponse, httpContext).getURI().toURL();
                return false;
            } catch (MalformedURLException e) {
                this.wasRedirected = false;
                return false;
            }
        }

        public boolean wasRedirected() {
            return this.wasRedirected;
        }

        public URL redirection() {
            return this.redirection;
        }
    }

    public ImageSiteURLExtractor(boolean z) {
        this.siteSpecific.addAll(Arrays.asList(new InstagramConsumer(), new TwitterPhotoConsumer(), new TwitPicConsumer(), new ImgurConsumer(), new FacebookConsumer(), new YfrogConsumer(), new OwlyImageConsumer(), new TwipleConsumer(), CommonHTMLConsumers.FOTOLOG, CommonHTMLConsumers.PHOTONUI, CommonHTMLConsumers.PICS_LOCKERZ));
        if (z) {
            this.siteSpecific.add(new TmblrPhotoConsumer());
        }
    }

    public ImageSiteURLExtractor() {
        this(true);
    }

    @Override // org.openimaj.stream.functions.SiteSpecificURLExtractor
    protected List<URL> processURLs(URL url) {
        logger.debug("Resolving URL: " + url);
        logger.debug("Attempting site specific consumers");
        for (SiteSpecificConsumer siteSpecificConsumer : this.siteSpecific) {
            if (siteSpecificConsumer.canConsume(url)) {
                logger.debug("Site specific consumer: " + siteSpecificConsumer.getClass().getName() + " working on link");
                List<URL> consume = siteSpecificConsumer.consume(url);
                if (consume != null && !consume.isEmpty()) {
                    logger.debug("Site specific consumer returned non-null, returning the URLs");
                    return consume;
                }
            }
        }
        try {
            logger.debug("Site specific consumers failed, trying the raw link");
            StatusConsumerRedirectStrategy statusConsumerRedirectStrategy = new StatusConsumerRedirectStrategy();
            IndependentPair readURLAsByteArrayInputStream = HttpUtils.readURLAsByteArrayInputStream(url, 1000, 1000, statusConsumerRedirectStrategy, "Mozilla/5.0 (Windows; U; Windows NT 6.0; ru; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)");
            if (statusConsumerRedirectStrategy.wasRedirected()) {
                logger.debug("Redirect intercepted, adding redirection to list");
                URL redirection = statusConsumerRedirectStrategy.redirection();
                if (!redirection.toString().equals(url.toString())) {
                    return processURLs(redirection);
                }
            }
            HttpEntity httpEntity = (HttpEntity) readURLAsByteArrayInputStream.firstObject();
            ByteArrayInputStream byteArrayInputStream = (ByteArrayInputStream) readURLAsByteArrayInputStream.getSecondObject();
            String value = httpEntity.getContentType().getValue();
            if (value.contains("text")) {
                logger.debug(url + " ignored -- text content");
                return null;
            }
            if (value.contains("gif")) {
                return Lists.newArrayList(new URL[]{url});
            }
            ImageUtilities.readMBF(byteArrayInputStream);
            return Lists.newArrayList(new URL[]{url});
        } catch (Throwable th) {
            logger.debug(url + " ignored -- exception", th);
            return null;
        }
    }
}
