/*
 * Decompiled with CFR 0.152.
 */
package edu.harvard.hul.ois.jhove.module;

import edu.harvard.hul.ois.jhove.Agent;
import edu.harvard.hul.ois.jhove.AgentType;
import edu.harvard.hul.ois.jhove.Checksum;
import edu.harvard.hul.ois.jhove.ChecksumInputStream;
import edu.harvard.hul.ois.jhove.ChecksumType;
import edu.harvard.hul.ois.jhove.Checksummer;
import edu.harvard.hul.ois.jhove.Document;
import edu.harvard.hul.ois.jhove.DocumentType;
import edu.harvard.hul.ois.jhove.ErrorMessage;
import edu.harvard.hul.ois.jhove.ExternalSignature;
import edu.harvard.hul.ois.jhove.Identifier;
import edu.harvard.hul.ois.jhove.IdentifierType;
import edu.harvard.hul.ois.jhove.InfoMessage;
import edu.harvard.hul.ois.jhove.JhoveBase;
import edu.harvard.hul.ois.jhove.ModuleBase;
import edu.harvard.hul.ois.jhove.Property;
import edu.harvard.hul.ois.jhove.RepInfo;
import edu.harvard.hul.ois.jhove.SignatureType;
import edu.harvard.hul.ois.jhove.SignatureUseType;
import edu.harvard.hul.ois.jhove.TextMDMetadata;
import edu.harvard.hul.ois.jhove.module.XmlModule;
import edu.harvard.hul.ois.jhove.module.html.Html3_2DocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_01FrameDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_01StrictDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_01TransDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_0FrameDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_0StrictDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_0TransDocDesc;
import edu.harvard.hul.ois.jhove.module.html.HtmlCharStream;
import edu.harvard.hul.ois.jhove.module.html.HtmlDocDesc;
import edu.harvard.hul.ois.jhove.module.html.HtmlMetadata;
import edu.harvard.hul.ois.jhove.module.html.JHDoctype;
import edu.harvard.hul.ois.jhove.module.html.JHElement;
import edu.harvard.hul.ois.jhove.module.html.JHOpenTag;
import edu.harvard.hul.ois.jhove.module.html.JHXmlDecl;
import edu.harvard.hul.ois.jhove.module.html.ParseException;
import edu.harvard.hul.ois.jhove.module.html.ParseHtml;
import edu.harvard.hul.ois.jhove.module.html.Token;
import edu.harvard.hul.ois.jhove.module.html.TokenMgrError;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.List;

public class HtmlModule
extends ModuleBase {
    private static final String NAME = "HTML-hul";
    private static final String RELEASE = "1.3";
    private static final int[] DATE = new int[]{2006, 9, 5};
    private static final String[] FORMAT = new String[]{"HTML"};
    private static final String COVERAGE = "HTML 3.2, HTML 4.0 Strict,HTML 4.0 Transitional, HTML 4.0 Frameset, HTML 4.01 Strict, HTML 4.01 Transitional, HTML 4.01 FramesetXHTML 1.0 Strict, XHTML 1.0 Transitional, XHTML 1.0 FramesetXHTML 1.1";
    private static final String[] MIMETYPE = new String[]{"text/html"};
    private static final String WELLFORMED = "An HTML file is well-formed if it meets the criteria defined in the HTML 3.2 specification (W3C Recommendation, 14-Jan-1997), the HTML 4.0 specification (W3C Recommendation, 24-Apr-1998, the HTML 4.01 specification (W3C Recommendation, 24-Dec-1999, the XHTML 1.0 specification (W3C Recommendation, 26-Jan-2000, revised 1-Aug-2002, or the XHTML 1.1 specification (W3C Recommendation, 31-May-2001";
    private static final String VALIDITY = "An HTML file is valid if it is well-formed and has a valid DOCTYPE declaration.";
    private static final String REPINFO = "Languages, title, META tags, frames, links, scripts, images, citations, defined terms, abbreviations, entities, Unicode entity blocks";
    private static final String NOTE = "";
    private static final String RIGHTS = "Copyright 2004-2007 by JSTOR and the President and Fellows of Harvard College. Released under the GNU Lesser General Public License.";
    protected ChecksumInputStream _cstream;
    protected DataInputStream _dstream;
    protected String _doctype;
    public static final int HTML_3_2 = 1;
    public static final int HTML_4_0_STRICT = 2;
    public static final int HTML_4_0_FRAMESET = 3;
    public static final int HTML_4_0_TRANSITIONAL = 4;
    public static final int HTML_4_01_STRICT = 5;
    public static final int HTML_4_01_FRAMESET = 6;
    public static final int HTML_4_01_TRANSITIONAL = 7;
    public static final int XHTML_1_0_STRICT = 8;
    public static final int XHTML_1_0_TRANSITIONAL = 9;
    public static final int XHTML_1_0_FRAMESET = 10;
    public static final int XHTML_1_1 = 11;
    private static final String[] profileNames = new String[]{null, null, "Strict", "Frameset", "Transitional", "Strict", "Frameset", "Transitional", "Strict", "Frameset", "Transitional", null};
    private static final String[] versionNames = new String[]{null, "HTML 3.2", "HTML 4.0", "HTML 4.0", "HTML 4.0", "HTML 4.01", "HTML 4.01", "HTML 4.01", "XHTML 1.0", "XHTML 1.0", "XHTML 1.0", "XHTML 1.1"};
    protected boolean _withTextMD = false;
    protected TextMDMetadata _textMD;

    public HtmlModule() {
        super(NAME, RELEASE, DATE, FORMAT, COVERAGE, MIMETYPE, WELLFORMED, VALIDITY, REPINFO, NOTE, RIGHTS, false);
        Agent agent = new Agent("Harvard University Library", AgentType.EDUCATIONAL);
        agent.setAddress("Office for Information Systems, 90 Mt. Auburn St., Cambridge, MA 02138");
        agent.setTelephone("+1 (617) 495-3724");
        agent.setEmail("jhove-support@hulmail.harvard.edu");
        this._vendor = agent;
        Document document = new Document("HTML 3.2 Reference Specification", DocumentType.REPORT);
        Agent agent2 = new Agent("Word Wide Web Consortium", AgentType.NONPROFIT);
        agent2.setAddress("Massachusetts Institute of Technology, Computer Science and Artificial Intelligence Laboratory, 32 Vassar Street, Room 32-G515, Cambridge, MA 02139");
        agent2.setTelephone("(617) 253-2613");
        agent2.setFax("(617) 258-5999");
        agent2.setWeb("http://www.w3.org/");
        document.setPublisher(agent2);
        Agent agent3 = new Agent("Dave Raggett", AgentType.OTHER);
        document.setAuthor(agent3);
        document.setDate("1997-01-14");
        document.setIdentifier(new Identifier("http://www.w3c.org/TR/REC-html32-19970114", IdentifierType.URL));
        this._specification.add(document);
        document = new Document("HTML 4.0 Specification", DocumentType.REPORT);
        document.setPublisher(agent2);
        document.setAuthor(agent3);
        Agent agent4 = new Agent("Arnaud Le Hors", AgentType.OTHER);
        document.setAuthor(agent4);
        Agent agent5 = new Agent("Ian Jacobs", AgentType.OTHER);
        document.setAuthor(agent5);
        document.setDate("1998-04-24");
        document.setIdentifier(new Identifier("http://www.w3.org/TR/1998/REC-html40-19980424/", IdentifierType.URL));
        this._specification.add(document);
        document = new Document("HTML 4.01 Specification", DocumentType.REPORT);
        document.setPublisher(agent2);
        document.setAuthor(agent3);
        document.setAuthor(agent4);
        document.setAuthor(agent5);
        document.setDate("1999-12-24");
        document.setIdentifier(new Identifier("http://www.w3.org/TR/1999/REC-html401-19991224/", IdentifierType.URL));
        this._specification.add(document);
        document = new Document("XHTML(TM) 1.0 The Extensible HyperText Markup Language (Second Edition)", DocumentType.REPORT);
        document.setPublisher(agent2);
        document.setDate("01-08-2002");
        document.setIdentifier(new Identifier("http://www.w3.org/TR/xhtml1/", IdentifierType.URL));
        this._specification.add(document);
        document = new Document(" XHTML(TM) 1.1 - Module-based XHTML", DocumentType.REPORT);
        document.setPublisher(agent2);
        document.setDate("31-05-2001");
        document.setIdentifier(new Identifier("http://www.w3.org/TR/2001/REC-xhtml11-20010531/", IdentifierType.URL));
        this._specification.add(document);
        ExternalSignature externalSignature = new ExternalSignature(".html", SignatureType.EXTENSION, SignatureUseType.OPTIONAL);
        this._signature.add(externalSignature);
        externalSignature = new ExternalSignature(".htm", SignatureType.EXTENSION, SignatureUseType.OPTIONAL);
        this._signature.add(externalSignature);
    }

    @Override
    public int parse(InputStream inputStream, RepInfo repInfo, int n) throws IOException {
        Object object;
        Object object22;
        if (n != 0) {
            if (HtmlModule.isXmlAvailable()) {
                XmlModule xmlModule = new XmlModule();
                if (n == 100) {
                    n = 0;
                }
                xmlModule.setApp(this._app);
                xmlModule.setBase(this._je);
                xmlModule.setXhtmlDoctype(this._doctype);
                return xmlModule.parse(inputStream, repInfo, n);
            }
            repInfo.setMessage(new ErrorMessage("XML-HUL module required to validate XHTML documents"));
            repInfo.setWellFormed(false);
            return 0;
        }
        this._doctype = null;
        if (this._defaultParams != null) {
            for (Object object22 : this._defaultParams) {
                if (!((String)object22).toLowerCase().equals("withtextmd=true")) continue;
                this._withTextMD = true;
            }
        }
        this.initParse();
        repInfo.setFormat(this._format[0]);
        repInfo.setMimeType(this._mimeType[0]);
        repInfo.setModule(this);
        if (this._textMD == null || n == 0) {
            this._textMD = new TextMDMetadata();
        }
        Object object3 = null;
        if (this._je != null && this._je.getChecksumFlag() && repInfo.getChecksum().size() == 0) {
            object3 = new Checksummer();
            this._cstream = new ChecksumInputStream(inputStream, (Checksummer)object3);
            this._dstream = HtmlModule.getBufferedDataStream(this._cstream, this._je != null ? this._je.getBufferSize() : 0);
        } else {
            this._dstream = HtmlModule.getBufferedDataStream(inputStream, this._je != null ? this._je.getBufferSize() : 0);
        }
        object22 = null;
        HtmlMetadata htmlMetadata = null;
        HtmlCharStream htmlCharStream = null;
        try {
            htmlCharStream = new HtmlCharStream(this._dstream, "ISO-8859-1");
            object22 = new ParseHtml(htmlCharStream);
        }
        catch (UnsupportedEncodingException unsupportedEncodingException) {
            repInfo.setMessage(new ErrorMessage("Internal error: " + unsupportedEncodingException.getMessage()));
            repInfo.setWellFormed(false);
            return 0;
        }
        int n2 = 0;
        try {
            Object object4;
            Object object5;
            object = ((ParseHtml)object22).HtmlDoc();
            if (object.isEmpty()) {
                repInfo.setWellFormed(false);
                repInfo.setMessage(new ErrorMessage("Document is empty"));
                return 0;
            }
            n2 = this.checkDoctype((List)object);
            if (n2 < 0) {
                repInfo.setWellFormed(false);
                repInfo.setMessage(new ErrorMessage("DOCTYPE is not HTML"));
                return 0;
            }
            boolean bl = false;
            Iterator iterator = object.iterator();
            while (iterator.hasNext()) {
                object5 = iterator.next();
                if (!(object5 instanceof JHOpenTag)) continue;
                object4 = ((JHOpenTag)object5).getName();
                if (!"html".equals(object4) && !"head".equals(object4) && !"body".equals(object4) && !"title".equals(object4)) break;
                bl = true;
                break;
            }
            if (!bl) {
                repInfo.setMessage(new ErrorMessage("Document contains no html, head, body or title tags"));
                repInfo.setWellFormed(false);
                return 0;
            }
            object5 = htmlCharStream.getKindOfLineEnd();
            if (object5 == null) {
                repInfo.setMessage(new InfoMessage("Not able to determine type of end of line"));
                this._textMD.setLinebreak(-1);
            } else if (((String)object5).equalsIgnoreCase("CR")) {
                this._textMD.setLinebreak(0);
            } else if (((String)object5).equalsIgnoreCase("LF")) {
                this._textMD.setLinebreak(1);
            } else if (((String)object5).equalsIgnoreCase("CRLF")) {
                this._textMD.setLinebreak(2);
            }
            if (n2 == 0) {
                switch (this.seemsToBeXHTML((List)object)) {
                    case 0: {
                        break;
                    }
                    case 1: {
                        repInfo.setMessage(new ErrorMessage("Document has XML declaration but no DOCTYPE; probably XML rather than HTML"));
                        repInfo.setWellFormed(false);
                        return 0;
                    }
                    case 2: {
                        return 100;
                    }
                }
                repInfo.setMessage(new ErrorMessage("Unrecognized or missing DOCTYPE declaration; validation continuing as HTML 3.2"));
                repInfo.setValid(false);
            }
            object4 = null;
            switch (n2) {
                default: {
                    object4 = new Html3_2DocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("3.2");
                    break;
                }
                case 3: {
                    object4 = new Html4_0FrameDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.0");
                    break;
                }
                case 4: {
                    object4 = new Html4_0TransDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.0");
                    break;
                }
                case 2: {
                    object4 = new Html4_0StrictDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.0");
                    break;
                }
                case 6: {
                    object4 = new Html4_01FrameDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.01");
                    break;
                }
                case 7: {
                    object4 = new Html4_01TransDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.01");
                    break;
                }
                case 5: {
                    object4 = new Html4_01StrictDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.01");
                    break;
                }
                case 8: 
                case 9: 
                case 10: 
                case 11: {
                    return 100;
                }
            }
            this._textMD.setMarkup_language(this._doctype);
            if (object4 == null) {
                repInfo.setMessage(new InfoMessage("Code for appropriate HTML version not available yet:substituting HTML 3.2"));
                object4 = new Html3_2DocDesc();
            }
            ((HtmlDocDesc)object4).validate((List)object, repInfo);
            htmlMetadata = ((HtmlDocDesc)object4).getMetadata();
            if (htmlMetadata.getCharset() != null) {
                this._textMD.setCharset(htmlMetadata.getCharset());
            } else {
                this._textMD.setCharset("ISO-8859-1");
            }
            String string = this._textMD.getCharset();
            if (string.indexOf("UTF") != -1) {
                this._textMD.setByte_order(this._bigEndian ? 0 : 1);
                this._textMD.setByte_size("8");
                this._textMD.setCharacter_size("variable");
            } else {
                this._textMD.setByte_order(this._bigEndian ? 0 : 1);
                this._textMD.setByte_size("8");
                this._textMD.setCharacter_size("1");
            }
        }
        catch (ParseException parseException) {
            Token token = parseException.currentToken;
            repInfo.setMessage(new ErrorMessage("Parse error", "Line = " + token.beginLine + ", column = " + token.beginColumn));
            repInfo.setWellFormed(false);
        }
        catch (TokenMgrError tokenMgrError) {
            repInfo.setMessage(new ErrorMessage("TokenMgrError: " + tokenMgrError.getLocalizedMessage()));
            repInfo.setWellFormed(false);
        }
        if (repInfo.getWellFormed() == 0) {
            return 0;
        }
        if (n2 != 0) {
            if (profileNames[n2] != null) {
                repInfo.setProfile(profileNames[n2]);
            }
            repInfo.setVersion(versionNames[n2]);
        }
        if (htmlMetadata != null && (object = htmlMetadata.toProperty(this._withTextMD ? this._textMD : null)) != null) {
            repInfo.setProperty((Property)object);
        }
        if (object3 != null) {
            repInfo.setSize(this._cstream.getNBytes());
            repInfo.setChecksum(new Checksum(((Checksummer)object3).getCRC32(), ChecksumType.CRC32));
            object = ((Checksummer)object3).getMD5();
            if (object != null) {
                repInfo.setChecksum(new Checksum((String)object, ChecksumType.MD5));
            }
            if ((object = ((Checksummer)object3).getSHA1()) != null) {
                repInfo.setChecksum(new Checksum((String)object, ChecksumType.SHA1));
            }
        }
        return 0;
    }

    @Override
    public void checkSignatures(File file, InputStream inputStream, RepInfo repInfo) throws IOException {
        repInfo.setFormat(this._format[0]);
        repInfo.setMimeType(this._mimeType[0]);
        repInfo.setModule(this);
        char[][] cArrayArray = new char[][]{"<!DOCTYPE HTML".toCharArray(), "<HTML".toCharArray(), "<TITLE".toCharArray()};
        int[] nArray = new int[]{0, 0, 0};
        JhoveBase jhoveBase = this.getBase();
        int n = jhoveBase.getSigBytes();
        int n2 = 0;
        boolean bl = false;
        DataInputStream dataInputStream = new DataInputStream(inputStream);
        while (!bl && n2 < n) {
            try {
                int n3 = HtmlModule.readUnsignedByte(dataInputStream, this);
                char c = Character.toUpperCase((char)n3);
                ++n2;
                if (Character.isWhitespace(c)) continue;
                for (int i = 0; i < 3; ++i) {
                    char[] cArray = cArrayArray[i];
                    int n4 = nArray[i];
                    if (c == cArray[n4]) {
                        int n5 = i;
                        nArray[n5] = nArray[n5] + 1;
                        if (nArray[i] != cArray.length) continue;
                        repInfo.setSigMatch(this._name);
                        return;
                    }
                    nArray[i] = 0;
                }
            }
            catch (EOFException eOFException) {
                bl = true;
            }
        }
        repInfo.setWellFormed(false);
    }

    protected int checkDoctype(List list) {
        JHElement jHElement = (JHElement)list.get(0);
        if (jHElement instanceof JHXmlDecl && list.size() >= 2) {
            jHElement = (JHElement)list.get(1);
        }
        if (!(jHElement instanceof JHDoctype)) {
            return 0;
        }
        List list2 = ((JHDoctype)jHElement).getDoctypeElements();
        if (list2.size() < 3) {
            return 0;
        }
        try {
            String string = ((String)list2.get(0)).toUpperCase();
            if (!"HTML".equals(string)) {
                return -1;
            }
            string = ((String)list2.get(1)).toUpperCase();
            if (!"PUBLIC".equals(string)) {
                return 0;
            }
            this._doctype = string = this.stripQuotes(((String)list2.get(2)).toUpperCase());
            if ("-//W3C//DTD HTML 3.2 FINAL//EN".equals(string) || "-//W3C//DTD HTML 3.2//EN".equals(string)) {
                return 1;
            }
            if ("-//W3C//DTD HTML 4.0//EN".equals(string)) {
                return 2;
            }
            if ("-//W3C//DTD HTML 4.0 TRANSITIONAL//EN".equals(string)) {
                return 4;
            }
            if ("-//W3C//DTD HTML 4.0 FRAMESET//EN".equals(string)) {
                return 3;
            }
            if ("-//W3C//DTD HTML 4.01//EN".equals(string)) {
                return 5;
            }
            if ("-//W3C//DTD HTML 4.01 TRANSITIONAL//EN".equals(string)) {
                return 7;
            }
            if ("-//W3C//DTD HTML 4.01 FRAMESET//EN".equals(string)) {
                return 6;
            }
        }
        catch (Exception exception) {
            return 0;
        }
        return 0;
    }

    protected int seemsToBeXHTML(List list) {
        try {
            JHElement jHElement2 = (JHElement)list.get(0);
            if (!(jHElement2 instanceof JHXmlDecl)) {
                return 0;
            }
            for (JHElement jHElement2 : list) {
                if (!(jHElement2 instanceof JHOpenTag)) continue;
                JHOpenTag jHOpenTag = (JHOpenTag)jHElement2;
                return "html".equals(jHOpenTag.getName()) ? 2 : 1;
            }
        }
        catch (Exception exception) {
            return 0;
        }
        return 1;
    }

    protected String stripQuotes(String string) {
        int n = string.length();
        if (string.charAt(0) == '\"' && string.charAt(n - 1) == '\"') {
            return string.substring(1, n - 1);
        }
        return string;
    }

    protected static boolean isXmlAvailable() {
        try {
            Class.forName("edu.harvard.hul.ois.jhove.module.XmlModule");
            return true;
        }
        catch (Exception exception) {
            return false;
        }
    }
}

