package org.openimaj.tools.twitter.modes.preprocessing;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.openimaj.io.FileUtils;
import org.openimaj.text.nlp.language.LanguageDetector;
import org.openimaj.twitter.USMFStatus;

/* loaded from: input_file:org/openimaj/tools/twitter/modes/preprocessing/StopwordMode.class */
public class StopwordMode extends TwitterPreprocessingMode<List<String>> {
    private static final String STOPWORDS_KEY = "nostopwords";
    private static final String[][] STOPWORD_FILES = {new String[]{"en", "/org/openimaj/text/stopwords/stopwords-list.txt"}, new String[]{"en", "/org/openimaj/text/stopwords/en_stopwords.txt"}, new String[]{"en", "/org/openimaj/text/stopwords/en_dokuwiki_stopwords.txt"}, new String[]{"bg", "/org/openimaj/text/stopwords/bg_dokuwiki_stopwords.txt"}, new String[]{"da", "/org/openimaj/text/stopwords/da_dokuwiki_stopwords.txt"}, new String[]{"de", "/org/openimaj/text/stopwords/de_dokuwiki_stopwords.txt"}, new String[]{"el", "/org/openimaj/text/stopwords/el_dokuwiki_stopwords.txt"}, new String[]{"es", "/org/openimaj/text/stopwords/es_dokuwiki_stopwords.txt"}, new String[]{"fi", "/org/openimaj/text/stopwords/fi_dokuwiki_stopwords.txt"}, new String[]{"fr", "/org/openimaj/text/stopwords/fr_dokuwiki_stopwords.txt"}, new String[]{"it", "/org/openimaj/text/stopwords/it_dokuwiki_stopwords.txt"}, new String[]{"nl", "/org/openimaj/text/stopwords/nl_dokuwiki_stopwords.txt"}, new String[]{"pt", "/org/openimaj/text/stopwords/pt_dokuwiki_stopwords.txt"}, new String[]{"sv", "/org/openimaj/text/stopwords/sv_dokuwiki_stopwords.txt"}};
    private LanguageDetectionMode langMode = new LanguageDetectionMode();
    private TokeniseMode tokMode = new TokeniseMode();
    private HashMap<String, HashSet<String>> languageStopwords = loadStopwords();

    private HashMap<String, HashSet<String>> loadStopwords() {
        HashMap<String, HashSet<String>> hashMap = new HashMap<>();
        for (String[] strArr : STOPWORD_FILES) {
            try {
                HashSet<String> hashSet = new HashSet<>();
                for (String str : FileUtils.readlines(StopwordMode.class.getResourceAsStream(strArr[1]), "UTF-8")) {
                    if (!str.startsWith("#")) {
                        hashSet.add(str.toLowerCase().trim());
                    }
                }
                hashMap.put(strArr[0], hashSet);
            } catch (IOException e) {
            }
        }
        return hashMap;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // org.openimaj.tools.twitter.modes.preprocessing.TwitterPreprocessingMode
    public List<String> process(USMFStatus uSMFStatus) {
        ArrayList arrayList = new ArrayList();
        try {
            String lowerCase = LanguageDetector.WeightedLocale.fromMap((Map) TwitterPreprocessingMode.results(uSMFStatus, this.langMode)).language.toLowerCase();
            Map map = (Map) TwitterPreprocessingMode.results(uSMFStatus, this.tokMode);
            if (this.languageStopwords.containsKey(lowerCase)) {
                HashSet hashSet = new HashSet();
                hashSet.addAll((Collection) map.get(TokeniseMode.TOKENS_PROTECTED));
                HashSet<String> hashSet2 = this.languageStopwords.get(lowerCase);
                for (String str : (List) map.get(TokeniseMode.TOKENS_ALL)) {
                    if (hashSet.contains(str)) {
                        arrayList.add(str);
                    } else if (!hashSet2.contains(str.toLowerCase())) {
                        arrayList.add(str);
                    }
                }
            } else {
                arrayList.addAll((Collection) map.get(TokeniseMode.TOKENS_ALL));
            }
        } catch (Exception e) {
        }
        uSMFStatus.addAnalysis(STOPWORDS_KEY, arrayList);
        return arrayList;
    }

    @Override // org.openimaj.tools.twitter.modes.preprocessing.TwitterPreprocessingMode
    public String getAnalysisKey() {
        return STOPWORDS_KEY;
    }
}
