package org.openimaj.text.nlp.namedentity;

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;

/* loaded from: input_file:org/openimaj/text/nlp/namedentity/IgnoreTokenStripper.class */
public class IgnoreTokenStripper {
    private String units = "one|two|three|four|five|six|seven|eight|nine";
    private String tens = "twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety";
    private String teens = "ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen";
    private String and = "\\s*-?\\s*and\\s*-?\\s*";
    private String toNN = "[" + this.units + "|" + this.teens + "] | [" + this.tens + "]\\s*-?\\s*[" + this.units + "]";
    private String toNNN = this.toNN + " | [[" + this.units + "]\\s*-?\\s*hundred [" + this.and + "[" + this.toNN + "]+]+]";
    private Pattern writtenNumbers = Pattern.compile("[" + this.toNNN + "]+");
    private HashSet<String> ignoreTokens = new HashSet<>();

    /* loaded from: input_file:org/openimaj/text/nlp/namedentity/IgnoreTokenStripper$Language.class */
    public enum Language {
        English
    }

    public IgnoreTokenStripper(Language language) {
        Iterator<InputStream> it = getListStreams(language).iterator();
        while (it.hasNext()) {
            addToIgnoreSet(it.next());
        }
    }

    private void addToIgnoreSet(InputStream inputStream) {
        try {
            DataInputStream dataInputStream = new DataInputStream(inputStream);
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    dataInputStream.close();
                    return;
                }
                this.ignoreTokens.add(readLine.trim());
            }
        } catch (Exception e) {
            System.err.println("Error: " + e.getMessage());
        }
    }

    private List<InputStream> getListStreams(Language language) {
        ArrayList arrayList = new ArrayList();
        if (!language.equals(Language.English)) {
            return null;
        }
        arrayList.add(getClass().getResourceAsStream("/org/openimaj/text/stopwords/en_stopwords.txt"));
        arrayList.add(getClass().getResourceAsStream("/org/openimaj/text/stopwords/en_nouns.txt"));
        arrayList.add(getClass().getResourceAsStream("/org/openimaj/text/stopwords/en_countries.txt"));
        return arrayList;
    }

    public ArrayList<String> getNonStopWords(List<String> list) {
        ArrayList<String> arrayList = new ArrayList<>();
        for (String str : list) {
            if (!isIgnoreToken(str)) {
                arrayList.add(str);
            }
        }
        return arrayList;
    }

    public boolean isIgnoreToken(String str) {
        if (this.ignoreTokens.contains(str)) {
            return true;
        }
        try {
            Double.parseDouble(str);
            return true;
        } catch (Exception e) {
            return this.writtenNumbers.matcher(str.toLowerCase()).matches();
        }
    }
}
