package org.openimaj.text.nlp.namedentity;

import info.bliki.wiki.filter.PlainTextConverter;
import info.bliki.wiki.model.WikiModel;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.store.SimpleFSDirectory;
import org.openimaj.text.nlp.namedentity.NamedEntity;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/openimaj/text/nlp/namedentity/EntityExtractionResourceBuilder.class */
public class EntityExtractionResourceBuilder {
    public static String DEFAULT_ALIAS_NAME = "AliasMapFile.txt";
    public static String DEFAULT_CONTEXT_NAME = "YagoLucene";
    private static String DEFAULT_ROOT_NAME = ".YagoEntityExtraction";
    private static String wikiApiPrefix = "http://en.wikipedia.org/w/api.php?format=xml&action=query&titles=";
    private static String wikiApiSuffix = "&prop=revisions&rvprop=content";
    private boolean verbose = true;
    private boolean locations = false;
    private static BufferedWriter logOut;

    /* loaded from: input_file:org/openimaj/text/nlp/namedentity/EntityExtractionResourceBuilder$StreamLooper.class */
    public static abstract class StreamLooper {
        BufferedReader reader;

        public StreamLooper(BufferedReader bufferedReader) {
            this.reader = bufferedReader;
        }

        public void loop() {
            while (true) {
                try {
                    String readLine = this.reader.readLine();
                    if (readLine == null) {
                        this.reader.close();
                        return;
                    }
                    doWork(readLine);
                } catch (IOException e) {
                    e.printStackTrace();
                    return;
                }
            }
        }

        protected abstract void doWork(String str);
    }

    public void buildCandidateAliasFile(String str) {
        buildCandidateAliasFile(str, getDefaultRootPath() + File.separator + DEFAULT_ALIAS_NAME);
    }

    public void buildCandidateAliasFile(String str, String str2) {
        writeAliasFile(getEntities(str), str2, str);
    }

    public void buildContextLuceneIndex(String str) {
        buildContextLuceneIndex(str, getDefaultRootPath() + File.separator + DEFAULT_CONTEXT_NAME);
    }

    public void buildContextLuceneIndex(String str, String str2) {
        try {
            buildIndex(getEntities(str), str2, str);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void buildAll(String str) {
        validateFileStructure();
        createLogging(getDefaultRootPath() + File.separator + "log.txt");
        buildAll(str, getDefaultRootPath());
        try {
            logOut.flush();
            logOut.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void buildAll(String str, String str2) {
        print("Building All...");
        HashMap<String, YagoNamedEntity> entities = getEntities(str);
        writeAliasFile(entities, str2 + File.separator + DEFAULT_ALIAS_NAME, str);
        try {
            buildIndex(entities, str2 + File.separator + DEFAULT_CONTEXT_NAME, str);
        } catch (IOException e) {
            e.printStackTrace();
        }
        print("Done");
    }

    public static String getDefaultRootPath() {
        return System.getProperty("user.home") + File.separator + DEFAULT_ROOT_NAME;
    }

    public static String getDefaultAliasFilePath() {
        return getDefaultRootPath() + File.separator + DEFAULT_ALIAS_NAME;
    }

    public static String getDefaultIndexDirectoryPath() {
        return getDefaultRootPath() + File.separator + DEFAULT_CONTEXT_NAME;
    }

    public static String getAliasFrom(String str) {
        String replaceAll = (str.startsWith("geoent_") ? str.substring(str.indexOf(95) + 1, str.lastIndexOf(95)) : str).replaceAll("_", " ");
        String substring = replaceAll.contains("(") ? replaceAll.substring(0, replaceAll.indexOf("(")) : replaceAll;
        return substring.contains(",") ? substring.substring(0, replaceAll.indexOf(",")) : substring;
    }

    private void validateFileStructure() {
        File file = new File(getDefaultRootPath());
        if (!file.isDirectory()) {
            file.mkdir();
        }
        File file2 = new File(getDefaultRootPath() + File.separator + DEFAULT_CONTEXT_NAME);
        if (!file2.isDirectory()) {
            file2.mkdir();
            return;
        }
        for (File file3 : file2.listFiles()) {
            file3.delete();
        }
    }

    private static void createLogging(String str) {
        File file = new File(str);
        if (!file.isFile()) {
            try {
                file.createNewFile();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        try {
            logOut = new BufferedWriter(new FileWriter(str));
            logOut.write("");
        } catch (IOException e2) {
            e2.printStackTrace();
        }
    }

    private void buildIndex(HashMap<String, YagoNamedEntity> hashMap, String str, String str2) throws IOException {
        print("Building Index...");
        setEntityContextValues(hashMap, str2);
        print("Initializing Lucene objects...");
        String[] strArr = {"uri", "context", "type"};
        FieldType fieldType = new FieldType();
        fieldType.setIndexed(true);
        fieldType.setTokenized(true);
        fieldType.setStored(true);
        FieldType fieldType2 = new FieldType();
        fieldType2.setStored(true);
        fieldType2.setIndexed(true);
        FieldType[] fieldTypeArr = {fieldType2, fieldType, fieldType2};
        QuickIndexer quickIndexer = new QuickIndexer(new SimpleFSDirectory(new File(str)));
        DocumentBuilder documentBuilder = null;
        try {
            documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        }
        Document document = null;
        WikiModel wikiModel = new WikiModel("http://www.mywiki.com/wiki/${image}", "http://www.mywiki.com/wiki/${title}");
        int i = 0;
        print("Building Lucene Index...");
        for (YagoNamedEntity yagoNamedEntity : hashMap.values()) {
            i++;
            if (i % 5000 == 0) {
                print("Processed " + i);
            }
            if (yagoNamedEntity.wikiURL != null) {
                try {
                    document = documentBuilder.parse(wikiApiPrefix + yagoNamedEntity.wikiURL.substring(yagoNamedEntity.wikiURL.lastIndexOf("/") + 1) + wikiApiSuffix);
                } catch (IOException e2) {
                    e2.printStackTrace();
                } catch (SAXException e3) {
                    e3.printStackTrace();
                }
                document.getDocumentElement().normalize();
                NodeList elementsByTagName = document.getElementsByTagName("rev");
                if (elementsByTagName.getLength() > 0) {
                    yagoNamedEntity.addContext(wikiModel.render(new PlainTextConverter(), elementsByTagName.item(0).getTextContent()));
                }
            }
            quickIndexer.addDocumentFromFields(strArr, new String[]{yagoNamedEntity.rootName, yagoNamedEntity.getContext(), yagoNamedEntity.type.toString()}, fieldTypeArr);
        }
        quickIndexer.finalise();
    }

    private void setEntityContextValues(final HashMap<String, YagoNamedEntity> hashMap, String str) {
        print("Setting Context Values...");
        BufferedReader bufferedReader = null;
        try {
            bufferedReader = openFileAsReadStream(str + File.separator + "created_stripped.tsv");
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        new StreamLooper(bufferedReader) { // from class: org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.1
            @Override // org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.StreamLooper
            protected void doWork(String str2) {
                String[] split = str2.split("\\s+");
                String str3 = split[1];
                String convertResource = EntityExtractionResourceBuilder.convertResource(split[2]);
                if (hashMap.keySet().contains(str3)) {
                    ((YagoNamedEntity) hashMap.get(str3)).addContext(convertResource);
                }
            }
        }.loop();
        try {
            bufferedReader = openFileAsReadStream(str + File.separator + "hasWikipediaAnchorText_stripped.tsv");
        } catch (FileNotFoundException e2) {
            e2.printStackTrace();
        }
        new StreamLooper(bufferedReader) { // from class: org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.2
            @Override // org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.StreamLooper
            protected void doWork(String str2) {
                String[] split = str2.split("\\s+");
                String str3 = split[1];
                String convertLiteral = EntityExtractionResourceBuilder.convertLiteral(split[2]);
                if (hashMap.keySet().contains(str3)) {
                    ((YagoNamedEntity) hashMap.get(str3)).addContext(convertLiteral);
                }
            }
        }.loop();
        try {
            bufferedReader = openFileAsReadStream(str + File.separator + "hasWikipediaUrl_stripped.tsv");
        } catch (FileNotFoundException e3) {
            e3.printStackTrace();
        }
        new StreamLooper(bufferedReader) { // from class: org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.3
            @Override // org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.StreamLooper
            protected void doWork(String str2) {
                String[] split = str2.split("\\s+");
                String str3 = split[1];
                if (hashMap.keySet().contains(str3)) {
                    ((YagoNamedEntity) hashMap.get(str3)).wikiURL = split[2].replaceAll("\"", "");
                }
            }
        }.loop();
        print("Validating Context...");
        int i = 0;
        for (YagoNamedEntity yagoNamedEntity : hashMap.values()) {
            Iterator<String> it = yagoNamedEntity.aliasList.iterator();
            while (it.hasNext()) {
                yagoNamedEntity.addContext(it.next());
            }
            if (yagoNamedEntity.getContext() == null || yagoNamedEntity.getContext().equals("")) {
                if (yagoNamedEntity.wikiURL == null) {
                    i++;
                }
            }
        }
        print("No Context: " + i);
    }

    private void setEntityAliasValues(final HashMap<String, YagoNamedEntity> hashMap, String str) {
        print("Setting Alias Values...");
        BufferedReader bufferedReader = null;
        try {
            bufferedReader = openFileAsReadStream(str + File.separator + "isCalled_stripped.tsv");
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        new StreamLooper(bufferedReader) { // from class: org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.4
            @Override // org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.StreamLooper
            protected void doWork(String str2) {
                String[] split = str2.split("\\s+");
                String str3 = split[1];
                String convertLiteral = EntityExtractionResourceBuilder.convertLiteral(split[2]);
                if (hashMap.keySet().contains(str3)) {
                    ((YagoNamedEntity) hashMap.get(str3)).addAlias(convertLiteral);
                }
            }
        }.loop();
        try {
            bufferedReader = openFileAsReadStream(str + File.separator + "means_stripped.tsv");
        } catch (FileNotFoundException e2) {
            e2.printStackTrace();
        }
        new StreamLooper(bufferedReader) { // from class: org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.5
            @Override // org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.StreamLooper
            protected void doWork(String str2) {
                String[] split = str2.split("\\s+");
                String str3 = split[2];
                String convertLiteral = EntityExtractionResourceBuilder.convertLiteral(split[1]);
                if (hashMap.keySet().contains(str3)) {
                    ((YagoNamedEntity) hashMap.get(str3)).addAlias(convertLiteral);
                }
            }
        }.loop();
        print("Validating Aliases...");
        for (YagoNamedEntity yagoNamedEntity : hashMap.values()) {
            yagoNamedEntity.addAlias(getAliasFrom(yagoNamedEntity.rootName));
        }
    }

    private void writeAliasFile(HashMap<String, YagoNamedEntity> hashMap, String str, String str2) {
        setEntityAliasValues(hashMap, str2);
        try {
            BufferedWriter openFileAsWriteStream = openFileAsWriteStream(str);
            openFileAsWriteStream.write("");
            for (YagoNamedEntity yagoNamedEntity : hashMap.values()) {
                if (yagoNamedEntity.aliasList.size() > 0) {
                    openFileAsWriteStream.append((CharSequence) ("+" + yagoNamedEntity.rootName + "\n"));
                    Iterator<String> it = yagoNamedEntity.aliasList.iterator();
                    while (it.hasNext()) {
                        openFileAsWriteStream.append((CharSequence) ("." + it.next() + "\n"));
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private HashMap<String, YagoNamedEntity> getEntities(String str) {
        print("Getting Entities...");
        final HashMap<String, YagoNamedEntity> hashMap = new HashMap<>();
        BufferedReader bufferedReader = null;
        try {
            bufferedReader = openFileAsReadStream(str + File.separator + "wordnet_person_100007846.txt");
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        new StreamLooper(bufferedReader) { // from class: org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.6
            @Override // org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.StreamLooper
            protected void doWork(String str2) {
                String convertLiteral = EntityExtractionResourceBuilder.convertLiteral(str2.split("\\s+")[1]);
                if (convertLiteral.startsWith("Category:")) {
                    return;
                }
                hashMap.put(convertLiteral, new YagoNamedEntity(convertLiteral, NamedEntity.Type.Person));
            }
        }.loop();
        try {
            bufferedReader = openFileAsReadStream(str + File.separator + "wordnet_organization_108008335.txt");
        } catch (FileNotFoundException e2) {
            e2.printStackTrace();
        }
        new StreamLooper(bufferedReader) { // from class: org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.7
            @Override // org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.StreamLooper
            protected void doWork(String str2) {
                String convertLiteral = EntityExtractionResourceBuilder.convertLiteral(str2.split("\\s+")[1]);
                if (convertLiteral.startsWith("Category:") || convertLiteral.startsWith("geoent_")) {
                    return;
                }
                hashMap.put(convertLiteral, new YagoNamedEntity(convertLiteral, NamedEntity.Type.Organisation));
            }
        }.loop();
        if (this.locations) {
            try {
                bufferedReader = openFileAsReadStream(str + File.separator + "wordnet_location_100027167.txt");
            } catch (FileNotFoundException e3) {
                e3.printStackTrace();
            }
            new StreamLooper(bufferedReader) { // from class: org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.8
                @Override // org.openimaj.text.nlp.namedentity.EntityExtractionResourceBuilder.StreamLooper
                protected void doWork(String str2) {
                    String convertLiteral = EntityExtractionResourceBuilder.convertLiteral(str2.split("\\s+")[1]);
                    if (convertLiteral.startsWith("Category:")) {
                        return;
                    }
                    hashMap.put(convertLiteral, new YagoNamedEntity(convertLiteral, NamedEntity.Type.Location));
                }
            }.loop();
        }
        print("Total Entities: " + hashMap.size());
        return hashMap;
    }

    public static BufferedReader openFileAsReadStream(String str) throws FileNotFoundException {
        return new BufferedReader(new FileReader(str));
    }

    public static BufferedWriter openFileAsWriteStream(String str) throws IOException {
        return new BufferedWriter(new FileWriter(str));
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String convertLiteral(String str) {
        String unescapeJava = StringEscapeUtils.unescapeJava(str);
        String substring = unescapeJava.startsWith("\"") ? unescapeJava.substring(1) : unescapeJava;
        return substring.endsWith("\"") ? substring.substring(0, substring.length() - 1) : substring;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String convertResource(String str) {
        return StringEscapeUtils.unescapeJava(str).replaceAll("_", " ");
    }

    private void print(String str) {
        if (this.verbose) {
            System.out.println(str);
        }
        if (logOut != null) {
            log(str);
        }
    }

    private void log(String str) {
        try {
            logOut.append((CharSequence) (str + "\n"));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] strArr) {
        new EntityExtractionResourceBuilder().buildCandidateAliasFile(strArr[0]);
    }
}
