package com.nexwave.nquindexer;

import com.nexwave.nsidita.BlankRemover;
import com.nexwave.nsidita.DocFileInfo;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.xalan.templates.Constants;
import org.apache.xml.serialize.OutputFormat;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:lib/docbook-xsl/extensions/webhelpindexer.jar:com/nexwave/nquindexer/SaxDocFileParser.class */
public class SaxDocFileParser extends DefaultHandler {
    protected DocFileInfo fileDesc = null;
    protected String projectDir = null;
    protected StringBuffer strbf = null;
    private String currentElName = "";
    private StringBuffer tempVal = null;
    private boolean shortdescBool = false;
    private int shortTagCpt = 0;
    private boolean addContent = false;
    private boolean addHeaderInfo = false;
    private boolean doNotIndex = false;
    private int divCount = 0;

    public int init(String str) {
        return 0;
    }

    public DocFileInfo runExtractData(File file) {
        this.fileDesc = new DocFileInfo(file);
        this.strbf = new StringBuffer("");
        parseDocument(file);
        return this.fileDesc;
    }

    public void parseDocument(File file) {
        SAXParserFactory newInstance = SAXParserFactory.newInstance();
        newInstance.setValidating(false);
        this.addContent = false;
        this.divCount = 0;
        try {
            SAXParser newSAXParser = newInstance.newSAXParser();
            newSAXParser.getXMLReader().setFeature("http://xml.org/sax/features/external-general-entities", false);
            newSAXParser.getXMLReader().setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
            System.out.println("Parsing: " + file);
            System.currentTimeMillis();
            String RemoveValidationPI = RemoveValidationPI(file);
            if (RemoveValidationPI != null) {
                InputSource inputSource = new InputSource(new StringReader(RemoveValidationPI));
                inputSource.setSystemId(file.toURI().toURL().toString());
                newSAXParser.parse(inputSource, this);
            }
            System.currentTimeMillis();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParserConfigurationException e2) {
            e2.printStackTrace();
        } catch (SAXParseException e3) {
            System.out.println("SaxParseException: The indexing file contains incorrect xml syntax.");
            e3.printStackTrace();
        } catch (SAXException e4) {
            System.out.println("SaxException. You may need to include Xerces in your classpath. See documentation for details");
            e4.printStackTrace();
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        this.currentElName = str3;
        if (str3.equalsIgnoreCase("meta")) {
            this.addHeaderInfo = true;
            String value = attributes.getValue("name");
            if (value != null && (value.equalsIgnoreCase("keywords") || value.equalsIgnoreCase("description"))) {
                this.strbf.append(ShingleFilter.TOKEN_SEPARATOR + attributes.getValue("content") + ShingleFilter.TOKEN_SEPARATOR);
            }
            if (value != null && value.equalsIgnoreCase("description")) {
                this.fileDesc.setShortdesc(BlankRemover.rmWhiteSpace(attributes.getValue("content").replace('\n', ' ')));
            }
        }
        if (str3.equalsIgnoreCase("title") || str3.equalsIgnoreCase("shortdesc")) {
            this.tempVal = new StringBuffer();
        }
        if (str3.equalsIgnoreCase("meta") || str3.equalsIgnoreCase("title") || str3.equalsIgnoreCase("shortdesc")) {
            this.addHeaderInfo = true;
        } else {
            this.addHeaderInfo = false;
        }
        if ("content".equals(attributes.getValue("id"))) {
            this.addContent = true;
        }
        if (this.addContent) {
            if (str3.equalsIgnoreCase("div")) {
                this.divCount++;
            }
            if (str3.equalsIgnoreCase("div") || str3.equalsIgnoreCase("p") || str3.equalsIgnoreCase("span")) {
                String value2 = attributes.getValue(Constants.ATTRNAME_CLASS);
                if (value2 != null && (value2.equalsIgnoreCase("shortdesc") || value2.equalsIgnoreCase("summary"))) {
                    this.shortdescBool = true;
                }
                this.tempVal = new StringBuffer();
                this.strbf.append(ShingleFilter.TOKEN_SEPARATOR);
            }
            if (this.shortdescBool) {
                this.shortTagCpt++;
            }
            String value3 = attributes.getValue("accesskey");
            if (value3 == null || !("n".equals(value3) || "p".equals(value3) || "h".equals(value3))) {
                this.doNotIndex = false;
            } else {
                this.doNotIndex = true;
            }
        }
        this.strbf.append(ShingleFilter.TOKEN_SEPARATOR);
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        if ((!this.addContent && !this.addHeaderInfo) || this.doNotIndex || this.currentElName.equalsIgnoreCase(Constants.ELEMNAME_SCRIPT_STRING)) {
            return;
        }
        String str = new String(cArr, i, i2);
        this.strbf.append(str);
        if (this.tempVal != null) {
            this.tempVal.append(str);
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        if (str3.equalsIgnoreCase("title")) {
            this.fileDesc.setTitle(BlankRemover.rmWhiteSpace(this.tempVal.toString()));
            this.tempVal = null;
        } else if (this.shortdescBool) {
            this.shortTagCpt--;
            if (this.shortTagCpt == 0) {
                this.fileDesc.setShortdesc(BlankRemover.rmWhiteSpace(this.tempVal.toString().replace('\n', ' ')));
                this.tempVal = null;
                this.shortdescBool = false;
            }
        }
        if (str3.equalsIgnoreCase("div") && this.addContent) {
            this.divCount--;
            if (this.divCount == 0) {
                this.addContent = false;
            }
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void processingInstruction(String str, String str2) throws SAXException {
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.EntityResolver
    public InputSource resolveEntity(String str, String str2) throws SAXException, IOException {
        return null;
    }

    public String RemoveValidationPI(File file) {
        StringBuilder sb = new StringBuilder();
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), OutputFormat.Defaults.Encoding));
            while (true) {
                try {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    String replaceAll = readLine.replaceAll("\\x26nbsp\\x3B", "&#160;");
                    if (replaceAll.contains("<!DOCTYPE html PUBLIC")) {
                        int indexOf = replaceAll.indexOf("<!DOCTYPE");
                        int indexOf2 = replaceAll.indexOf(">", indexOf);
                        while (indexOf2 < 0) {
                            replaceAll = replaceAll.concat(bufferedReader.readLine());
                            indexOf2 = replaceAll.indexOf(">", indexOf);
                        }
                        replaceAll.substring(indexOf, indexOf2);
                        if (replaceAll.contains("<?xml version")) {
                            replaceAll = replaceAll.replaceAll("\\x3C\\x3Fxml[^\\x3E]*\\x3F\\x3E", "\n");
                        }
                        sb.append(replaceAll.replaceAll("\\x3C\\x21DOCTYPE[^\\x3E]*\\x3E", "\n"));
                    } else {
                        if (replaceAll.contains("<?xml version")) {
                            replaceAll = replaceAll.replaceAll("\\x3C\\x3Fxml[^\\x3E]*\\x3F\\x3E", "\n");
                        }
                        sb.append(replaceAll + "\n");
                    }
                } catch (IOException e) {
                }
            }
            bufferedReader.close();
            return sb.toString();
        } catch (IOException e2) {
            return null;
        }
    }
}
