#!/usr/bin/env ruby2.1
# -*- coding: utf-8 -*-
require 'kramdown-rfc2629'
require 'kramdown-rfc/parameterset'
require 'kramdown-rfc/refxml'
require 'yaml'
require 'erb'
require 'date'

Encoding.default_external = "UTF-8" # wake up, smell the coffee

def do_the_tls_dance
  begin
    require 'openssl'
    File.open(OpenSSL::X509::DEFAULT_CERT_FILE) do end
  rescue
    warn "** Configuration problem with OpenSSL certificate store at #{OpenSSL::X509::DEFAULT_CERT_FILE}."
    warn "**   Activating workaround.  Occasionally run `certified-update`."
    require 'certified'
  end
end

RE_NL = /(?:\n|\r|\r\n)/
RE_SECTION = /---(?:\s+(\w+)(-?))?\s*#{RE_NL}(.*?#{RE_NL})(?=---(?:\s+\w+-?)?\s*#{RE_NL}|\Z)/m

NMDTAGS = ["{:/nomarkdown}\n\n", "\n\n{::nomarkdown}\n"]

NORMINFORM = { "!" => :normative, "?" => :informative }

def yaml_load(input, *args)
  if YAML.respond_to?(:safe_load)
    YAML.safe_load(input, *args)
  else
    YAML.load(input)
 end
end

def xml_from_sections(input)

  unless ENV["KRAMDOWN_NO_SOURCE"]
    require 'kramdown-rfc/gzip-clone'
    require 'base64'
    compressed_input = Gzip.compress(input)
    $source = Base64.encode64(compressed_input)
  end

  sections = input.scan(RE_SECTION)
  # resulting in an array; each section is [section-label, nomarkdown-flag, section-text]

  # the first section is a YAML with front matter parameters (don't put a label here)
  # We put back the "---" plus gratuitous blank lines to hack the line number in errors
  yaml_in = input[/---\s*/] << sections.shift[2]
  ps = KramdownRFC::ParameterSet.new(yaml_load(yaml_in, [Date], [], true))
  coding_override = ps.has(:coding)
  smart_quotes = ps[:smart_quotes]

  # all the other sections are put in a Hash, possibly concatenated from parts there
  sechash = Hash.new{ |h,k| h[k] = ""}
  snames = []                   # a stack of section names
  sections.each do |sname, nmdflag, text|
    nmdin, nmdout = {
      "-" => ["", ""],          # stay in nomarkdown
      "" => NMDTAGS, # pop out temporarily
    }[nmdflag || ""]
    if sname
      snames << sname           # "--- label" -> push label (now current)
    else
      snames.pop                # just "---" -> pop label (previous now current)
    end
    sechash[snames.last] << "#{nmdin}#{text}#{nmdout}"
  end

  ref_replacements = { }
  anchor_to_bibref = { }

  [:ref, :normative, :informative].each do |sn|
    if refs = ps.has(sn)
      warn "*** bad section #{sn}: #{refs.inspect}" unless refs.respond_to? :each
      refs.each do |k, v|
        if v.respond_to? :to_str
          if bibtagsys(v)       # enable "foo: RFC4711" as a custom anchor definition
            anchor_to_bibref[k] = v.to_str
          end
          ref_replacements[v.to_str] = k
        end
        if Hash === v
          if aliasname = v.delete("-")
            ref_replacements[aliasname] = k
          end
          if bibref = v.delete("=")
            anchor_to_bibref[k] = bibref
          end
        end
      end
    end
  end
  open_refs = ps[:ref] || { }       # consumed

  norm_ref = { }

  # convenience replacement of {{-coap}} with {{I-D.ietf-core-coap}}
  # collect normative/informative tagging {{!RFC2119}} {{?RFC4711}}
  sechash.each do |k, v|
    next if k == "fluff"
    v.gsub!(/{{(?:([?!])(-)?|(-))([\w._\-]+)(?:=([\w.\/_\-]+))?}}/) do |match|
      norminform = $1
      replacing = $2 || $3
      word = $4
      bibref = $5
      if replacing
        if new = ref_replacements[word]
          word = new
        else
          warn "*** no alias replacement for {{-#{word}}}"
          word = "-#{word}"
        end
      end       # now, word is the anchor
      if bibref
        if old = anchor_to_bibref[word]
          if bibref != old
            warn "*** conflicting definitions for xref #{anchor}: #{old} != #{bibref}"
          end
        else
          anchor_to_bibref[word] = bibref
        end
      end

      # things can be normative in one place and informative in another -> normative
      # collect norm/inform above and assign it by priority here
      if norminform
        norm_ref[word] ||= norminform == '!' # one normative ref is enough
      end
      "{{#{word}}}"
    end
  end

  [:normative, :informative].each do |k|
    ps.rest[k.to_s] ||= { }
  end

  norm_ref.each do |k, v|
    # could check bibtagsys here: needed if open_refs is nil or string
    target = ps.has(v ? :normative : :informative)
    warn "*** overwriting #{k}" if target.has_key?(k)
    target[k] = open_refs[k] # add reference to normative/informative
  end
  # note that unused items from ref are considered OK, therefore no check for that here

  # also should allow norm/inform check of other references
  # {{?coap}} vs. {{!coap}} vs. {{-coap}} (undecided)
  # or {{?-coap}} vs. {{!-coap}} vs. {{-coap}} (undecided)
  # could require all references to be decided by a global flag
  overlap = [:normative, :informative].map { |s| (ps.has(s) || { }).keys }.reduce(:&)
  unless overlap.empty?
    warn "*** #{overlap.join(', ')}: both normative and informative"
  end

  stand_alone = ps[:stand_alone]
  link_defs = {}

  [:normative, :informative].each do |sn|
    if refs = ps[sn]
      refs.each do |k, v|
        href = k.gsub(/\A[0-9]/) { "_#{$&}" } # can't start an IDREF with a number
        link_defs[k] = ["##{href}", nil]            # allow [RFC2119] in addition to {{RFC2119}}

        bibref = anchor_to_bibref[k] || k
        bts, url = bibtagsys(bibref, k, stand_alone)
        if bts && (!v || v == {} || v.respond_to?(:to_str))
          if stand_alone
            a = %{{: anchor="#{k}"}}
            sechash[sn.to_s] << %{\n#{NMDTAGS[0]}\n![:include:](#{bts})#{a}\n#{NMDTAGS[1]}\n}
          else
            bts.gsub!('/', '_')
            (ps.rest["bibxml"] ||= []) << [bts, url]
            sechash[sn.to_s] << %{&#{bts};\n} # ???
          end
        else
          unless v && Hash === v
            warn "*** don't know how to expand ref #{k}"
            next
          end
          if bts && !v.delete("override")
            warn "*** warning: explicit settings completely override canned bibxml in reference #{k}"
          end
          options = {input: 'RFC2629Kramdown', entity_output: coding_override, link_defs: link_defs}
          $global_markdown_options = options # For recursive calls in bibref annotation processing.
          sechash[sn.to_s] << KramdownRFC::ref_to_xml(k, v)
        end
      end
    end
  end

  erbfilename = File.expand_path '../../data/kramdown-rfc2629.erb', __FILE__
  erbfile = File.read(erbfilename, coding: "UTF-8")
  erb = ERB.new(erbfile, nil, '-')
  # remove redundant nomarkdown pop outs/pop ins as they confuse kramdown
  input = erb.result(binding).gsub(%r"{::nomarkdown}\s*{:/nomarkdown}"m, "")
  ps.warn_if_leftovers
  sechash.delete("fluff")       # fluff is a "commented out" section
  if !sechash.empty?            # any sections unused by the ERb file?
    warn "*** sections left #{sechash.keys.inspect}!"
  end

  [input, coding_override, link_defs, smart_quotes]
end

XML_RESOURCE_ORG_PREFIX = Kramdown::Converter::Rfc2629::XML_RESOURCE_ORG_PREFIX

# return XML entity name, url, rewrite_anchor flag
def bibtagsys(bib, anchor=nil, stand_alone=true)
  if bib =~ /\Arfc(\d+)/i
    rfc4d = "%04d" % $1.to_i
    [bib.upcase,
     "#{XML_RESOURCE_ORG_PREFIX}/bibxml/reference.RFC.#{rfc4d}.xml"]
  elsif bib =~ /\A([-A-Z0-9]+)\./ &&
        (xro = Kramdown::Converter::Rfc2629::XML_RESOURCE_ORG_MAP[$1])
    dir, _ttl, rewrite_anchor = xro
    bib1 = bib.gsub(/\A[0-9]/) { "_#{$&}" } # can't start an ID with a number
    if anchor && bib1 != anchor
      if rewrite_anchor
        a = %{?anchor=#{anchor}}
      else
        if !stand_alone
          warn "*** selecting a custom anchor '#{anchor}' for '#{bib1}' requires stand_alone mode"
          warn "    the output will need manual editing to correct this"
        end
      end
    end
    [bib1,
     "#{XML_RESOURCE_ORG_PREFIX}/#{dir}/reference.#{bib}.xml#{a}"]
  end
end

def read_encodings
  encfilename = File.expand_path '../../data/encoding-fallbacks.txt', __FILE__
  encfile = File.read(encfilename, coding: "UTF-8")
  Hash[encfile.lines.map{|l|
         l.chomp!;
         x, s = l.split(" ", 2)
         [x.hex.chr(Encoding::UTF_8), s || " "]}]
end

FALLBACK = read_encodings

def expand_tabs(s, tab_stops = 8)
  s.gsub(/([^\t\n]*)\t/) do
    $1 + " " * (tab_stops - ($1.size % tab_stops))
  end
end

coding_override = :as_char
input = ARGF.read
if input[0] == "\uFEFF"
   warn "*** There is a leading byte order mark. Ignored."
   input[0..0] = ''
end
if input[-1] != "\n"
  # warn "*** added missing newline at end"
  input << "\n"                 # fix #26
end
input.gsub!(/^\{::include\s+(.*?)\}/) {
  File.read($1).chomp
} unless ENV["KRAMDOWN_SAFE"]
if input =~ /[\t]/
   warn "*** Input contains HT (\"tab\") characters. Undefined behavior will ensue."
   input = expand_tabs(input)
end

link_defs = {}
if input =~ /\A---/        # this is a sectionized file
  do_the_tls_dance unless ENV["KRAMDOWN_DONT_VERIFY_HTTPS"]
  input, target_coding, link_defs, smart_quotes = xml_from_sections(input)
end
if input =~ /\A<\?xml/          # if this is a whole XML file, protect it
  input = "{::nomarkdown}\n#{input}\n{:/nomarkdown}\n"
end
options = {input: 'RFC2629Kramdown', entity_output: coding_override, link_defs: link_defs}
if target_coding && target_coding =~ /ascii/ && smart_quotes.nil?
  smart_quotes = false
end
if smart_quotes == false
  smart_quotes = ["'".ord, "'".ord, '"'.ord, '"'.ord]
end
case smart_quotes
when Array
  options[:smart_quotes] = smart_quotes
when nil, true
  # nothin
else
  warn "*** Can't deal with smart_quotes value #{smart_quotes.inspect}"
end

if target_coding
  input = input.encode(Encoding.find(target_coding), fallback: FALLBACK)
end

# warn "options: #{options.inspect}"
doc = Kramdown::Document.new(input, options)
$stderr.puts doc.warnings.to_yaml unless doc.warnings.empty?
puts doc.to_rfc2629
