Module:User:Cscott/llpeg

return (function()
local builders = {}
local function register(name, f)
  builders[name] = f
end
register('advent.compat', function() return require [[Module:User:Cscott/compat]] end)

register('llpeg.types', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local CHARMAX = 0x7F -- maximum codepoint for charsets

-- metatable for pattern objects; will be filled in later
local metareg = {}

local enum = function(keys)
   local Enum = {}
   Enum.__index = Enum
   function Enum:__tostring() return self.name end
   function Enum:pairs() return keys end
   function Enum:type() return Enum end

    for name, value in pairs(keys) do
       Enum[name] = setmetatable({ name = name, value = value }, Enum)
    end
    return Enum
end

local CapKind = enum{
   close = "close",  -- not used in trees */
   position = "position",
   const = "constant",  -- ktable[key] is Lua constant
   backref = "backref",  -- ktable[key] is "name" of group to get capture
   arg = "argument",  -- 'key' is arg's number
   simple = "simple",  -- next node is pattern
   table = "table",  -- next node is pattern
   ["function"] = "function",  -- ktable[key] is function; next node is pattern
   acc = "acc", -- ktable[key] is function; next node is pattern
   query = "query",  -- ktable[key] is table; next node is pattern
   string = "string",  -- ktable[key] is string; next node is pattern
   num = "num",  -- numbered capture; 'key' is number of value to return
   subst = "substitution",  -- substitution capture; next node is pattern
   fold = "fold",  -- ktable[key] is function; next node is pattern
   runtime = "runtime",  -- not used in trees (is uses another type for tree)
   group = "group",  -- ktable[key] is group's "name"
}

local TTag = enum{
  Char = "char", -- 'n' has unicode codepoint
  Set = "set", -- 'set' has sparse array codepoint->true for codepoint <=CHARMAX
                -- 'rest' indicates whether all codepoints > CHARMAX should be
                -- part of the set (true) or not (false)
  Any = "any",
  True = "true",
  False = "false",
  UTFR = "utf8.range",  --[[ range of UTF-8 codepoints;
                 'from' has initial codepoint; 'to' has final codepoint ]]--
  Rep = "rep",  -- 'sib1' *
  Seq = "seq",  -- 'sib1' 'sib2'
  Choice = "choice",  -- 'sib1' / 'sib2'
  Not = "not",  -- !'sib1'
  And = "and",  -- &'sib1'
  Call = "call",  -- 'sib2' is rule being called; otherwise same as TOpenCall
  OpenCall = "opencall",  -- 'key' is rule name
  Rule = "rule",  --[[ 'key' is rule name (but key == nil for unused rules);
             'sib1' is rule's pattern pre-rule; 'sib2' is next rule;
             'n' is rule's sequential number, 'name' is rule name (even
             for unused rules) ]]--
  XInfo = "xinfo",  -- extra info (not used)
  Grammar = "grammar",  -- 'sib1' is initial (and first) rule, 'n' is # rules
  Behind = "behind",  -- 'sib1' is pattern, 'n' is how much to go back
  Capture = "capture",  --[[ captures: 'cap' is kind of capture (enum 'CapKind');
                'key' is Lua value associated with capture;
               'sib1' is capture body ]]--
  RunTime = "run-time",  --[[ run-time capture: 'key' is Lua function;
                 'sib1' is capture body ]]--
  Throw = "throw",    -- labeled failure: 'key' is label's name,
                       -- sib2 is associated recovery rule
}

local PE = enum{
   nullable = "nullable",
   nofail = "nofail",
}

-- virtual machine instructions
local Opcode = enum{
  Any = "any", -- if no char, fail
  Char = "char",  -- if char != aux, fail
  Set = "set",  -- if char not in buff, fail
  TestAny = "testany",  -- in no char, jump to 'offset'
  TestChar = "testchar",  -- if char != aux, jump to 'offset'
  TestSet = "testset",  -- if char not in buff, jump to 'offset'
  Span = "span",  -- read a span of chars in buff
  UTFR = "utf-range",  -- if codepoint not in range [offset, utf_to], fail
  Behind = "behind",  -- walk back 'aux' characters (fail if not possible)
  Ret = "ret",  -- return from a rule
  End = "end",  -- end of pattern
  Choice = "choice",  -- stack a choice; next fail will jump to 'offset'
  PredChoice = "pred_choice",  -- labeled failure: stack a choice; changes label env next fail will jump to 'offset'
  Jmp = "jmp",  -- jump to 'offset'
  Call = "call",  -- call rule at 'offset'
  OpenCall = "open_call",  -- call rule number 'key' (must be closed to a ICall)
  Commit = "commit",  -- pop choice and jump to 'offset'
  PartialCommit = "partial_commit",  -- update top choice to current position and jump
  BackCommit = "back_commit",  -- backtrack like "fail" but jump to its own 'offset'
  FailTwice = "failtwice",  -- pop one choice and then fail
  Fail = "fail",  -- go back to saved state on choice and jump to saved offset
  Giveup = "giveup",  -- internal use
  FullCapture = "fullcapture",  -- complete capture of last 'off' chars
  OpenCapture = "opencapture",  -- start a capture
  CloseCapture = "closecapture",
  CloseRunTime = "closeruntime",
  Throw = "throw",    -- fails with a given label --labeled failure
  ThrowRec = "throw_rec", -- fails with a given label and call rule at 'offset' --labeled failure
  Empty = "--",  -- to fill empty slots left by optimizations
}

-- helper for visitor pattern definitions
function define(dispatch, which, f)
   for _,v in pairs(which) do
      assert(v ~= nil) -- catch typos
      dispatch[v] = f
   end
end

local numsiblings = {}
define(numsiblings, {
          TTag.Char, TTag.Set, TTag.Any,
          TTag.True, TTag.False, TTag.UTFR,
          TTag.Call, TTag.OpenCall,
          TTag.Throw,
}, 0)
define(numsiblings, {
          TTag.Rep, TTag.Not, TTag.And, TTag.Grammar,
          TTag.Behind, TTag.Capture, TTag.RunTime,
}, 1)
define(numsiblings, {
          TTag.Seq, TTag.Choice, TTag.Rule,
}, 2)

-- more help for visitor functions

local function_name_registry = {}
function register_fname(name, f)
   assert(type(name) == "string")
   assert(type(f) == "function")
   function_name_registry[f] = name
end

function report_ferror(f, msg)
   local fname = function_name_registry[f]
   if fname ~= nil then
      msg = fname .. ": " .. msg
   end
   error(msg)
end

function define_type_visitor(tbl)
   local dispatch = {}
   for keys,func in pairs(tbl) do
      if type(keys) ~= "table" then
         keys = { keys }
      end
      define(dispatch, keys, func)
   end
   local visit
   visit = function(val, ...)
      local a = dispatch["assert"]
      if a ~= nil then a(val, ...) end -- assert preconditions
      local ty = type(val)
      if ty == 'table' and getmetatable(val) == metareg then
         ty = 'pattern'
      end
      local f = dispatch[ty]
      if f ~= nil then return f(val, ...) end
      f = dispatch.default
      if f == nil then
         report_ferror(visit, "no default for " .. ty)
      end
      return f(val, ...)
   end
   return visit
end

function define_tree_visitor(tbl, opt_name)
   local dispatch = {}
   for keys,func in pairs(tbl) do
      if type(keys) ~= "table" or getmetatable(keys) == TTag then
         keys = { keys }
      end
      define(dispatch, keys, func)
   end
   local visit
   visit = function(tree, ...)
      if tree == nil then report_ferror(visit, "nil tree") end
      local a = dispatch["assert"]
      if a ~= nil then a(tree, ...) end -- assert preconditions
      local f = dispatch[tree.tag]
      if f ~= nil then return f(tree, ...) end
      f = dispatch.default
      if f == nil then
         report_ferror(visit, "no default for " .. tree.tag)
      end
      return f(tree, ...)
   end
   return visit
end

function define_opcode_visitor(tbl)
   local dispatch = {}
   for keys,func in pairs(tbl) do
      if type(keys) ~= "table" or getmetatable(keys) == Opcode then
         keys = { keys }
      end
      define(dispatch, keys, func)
   end
   local visit
   visit = function(op, ...)
      if op == nil then report_ferror(visit, "nil op") end
      local a = dispatch["assert"]
      if a ~= nil then a(op, ...) end -- assert preconditions
      local f = dispatch[op.code]
      if f ~= nil then return f(op, ...) end
      f = dispatch.default
      if f == nil then
         report_ferror(visit, "no default for " .. op.code)
      end
      return f(op, ...)
   end
   return visit
end

-- helper for module imports
function from(mod, list)
   local result = {}
   for _,v in ipairs(list) do
      table.insert(result, mod[v])
   end
   return compat.unpack(result)
end

function newcharset()
   return setmetatable({
         tag = TTag.Set,
         code = nil,
         rest = false,
         set = {}
   }, metareg)
end

local fullset = newcharset()
for i = 0,CHARMAX do
   fullset.set[i] = true
end
fullset.rest = true -- make sure non-ascii unicode chars are included!
assert(fullset.tag == TTag.Set)

return {
   CHARMAX = CHARMAX,
   CapKind = CapKind,
   Opcode = Opcode,
   PE = PE,
   TTag = TTag,
   define = define,
   define_tree_visitor = define_tree_visitor,
   enum = enum,
   from = from,
   fullset = fullset,
   metareg = metareg,
   newcharset = newcharset,
   numsiblings = numsiblings,
   register_fname = register_fname,
}

end)

register('llpeg.print', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local from = myrequire('llpeg.types').from
local
   CHARMAX,
   CapKind,
   Opcode,
   TTag,
   define,
   define_tree_visitor,
   numsiblings,
   _ = from(myrequire('llpeg.types'), {
               'CHARMAX',
               'CapKind',
               'Opcode',
               'TTag',
               'define',
               'define_tree_visitor',
               'numsiblings',
   })

function printcharset(tree)
   local result = "["
   local i = 0
   while i <= CHARMAX do
      local first = i
      while tree.set[i] and i <= CHARMAX do
         i = i + 1
      end
      if first == (i - 1) then -- unary range
         result = result .. string.format("(%02x)", first)
      elseif first < (i-1) then -- non-empty range
         result = result .. string.format("(%02x-%02x)", first, i - 1)
      end
      i = i + 1
   end
   if tree.rest then
      result = result .. "(80-FFFF)"
   end
   return result .. "]"
end

function printjmp(op, pc)
   return "-> " .. op.target
end

local printinst_helper = define_opcode_visitor{
   [Opcode.Char] = function(op, pc)
      return string.format("'%c' (%02x)", op.aux, op.aux)
   end,
   [Opcode.TestChar] = function(op, pc)
      return string.format("'%c' (%02x)", op.aux, op.aux) .. printjmp(op, pc)
   end,
   [Opcode.UTFR] = function(op, pc)
      return string.format("%d - %d", op.from, op.to)
   end,
   [Opcode.FullCapture] = function(op, pc)
      return string.format("%s (size = %s)  (idx = %s)",
                           op.cap.value, op.aux, op.key)
   end,
   [Opcode.OpenCapture] = function(op, pc)
      return string.format("%s (idx = %s)",
                           op.cap.value, op.key)
   end,
   [Opcode.Set] = function(op, pc)
      return printcharset(op)
   end,
   [Opcode.TestSet] = function(op, pc)
      return printcharset(op) .. printjmp(op, pc)
   end,
   [Opcode.Span] = function(op, pc)
      return printcharset(op)
   end,
   [Opcode.OpenCall] = function(op, pc)
      return string.format("-> %d", op.target) -- rule number
   end,
   [Opcode.Behind] = function(op, pc)
      return string.format("%d", op.aux)
   end,
   [{Opcode.Jmp, Opcode.Call, Opcode.Commit, Opcode.Choice,
     Opcode.PartialCommit, Opcode.BackCommit, Opcode.TestAny,
     Opcode.PredChoice}] = function(op, pc)
      return printjmp(op, pc)
   end,
   [Opcode.Throw] = function(op, pc) -- labeled failure
      return string.format("(idx = %s)", op.key)
   end,
   [Opcode.ThrowRec] = function(op, pc)
      return printjmp(op, pc) .. string.format("(idx = %s)", op.key)
   end,
   default = function() return '' end,
}
function printinst(pc, op, accum)
   table.insert(accum, string.format("%02d: %s ", pc, op.code.value))
   table.insert(accum, printinst_helper(op, pc))
   table.insert(accum, "\n")
   return accum
end

function printpatt(code, accum)
   for pc,op in ipairs(code) do
      printinst(pc, op, accum)
   end
   return accum
end

function printcap(cap, indent)
   print(string.format("%s%s", string.rep(' ', indent), cap))
end

function printcap2close(captures, ncaptures, i, indent)
   local head = captures[i]
   i = i + 1
   printcap(head, indent) -- print head capture
   while i <= ncaptures and head:inside(captures[i]) do
      i = printcap2close(captures, ncaptures, i, indent + 2) -- print nested captures
   end
   if i <= ncaptures and head:isopencap() then
      assert(captures[i]:isclosecap())
      printcap(captures[i], indent) -- print and skip close capture
      i = i + 1
   end
   return i
end

function printcaplist(captures, ncaptures)
   -- for debugging, first print a raw list of captures
   if ncaptures == nil then ncaptures = #captures end
   for i=1,ncaptures do
      printcap(captures[i], 0)
   end
  print(">======");
  local i=1
  while i <= ncaptures and not captures[i]:isclosecap() do
     i = printcap2close(captures, ncaptures, i, 0)
  end
  if i > ncaptures then
     print("<unmatched>")
  end
  print("=======");
end

local printtree_helper = define_tree_visitor{
   [TTag.Char] = function(tree)
      local c = compat.utf8char(tree.n)
      if c:find("%C") ~= nil then -- printable?
         return " '" .. c .. "'"
      else
         return string.format(" (%02X)", tree.n)
      end
   end,
   [TTag.Set] = function(tree)
      return printcharset(tree)
   end,
   [TTag.UTFR] = function(tree)
      return " " .. tree.from .. " - " .. tree.to
   end,
   [{TTag.OpenCall, TTag.Call}] = function(tree)
      local ret = string.format(" key: %s", tree.key)
      local rule = tree.sib2
      if rule ~= nil then
         ret = ret .. " (rule: " .. rule.n .. ")"
      end
      return ret
   end,
   [TTag.Behind] = function(tree)
      return " " .. tree.n
   end,
   [TTag.Capture] = function(tree)
      return string.format(" kind: '%s'  key: %s", tree.cap.value, tree.key)
   end,
   [TTag.Rule] = function(tree)
      return string.format(" key: %s", tree.key)
   end,
   [TTag.XInfo] = function(tree)
      return " n: " .. tree.n
   end,
   [TTag.Grammar] = function(tree)
      return " " .. tree.n -- number of rules
   end,
   [TTag.Throw] = function(tree)
      return string.format(" key: %s", tree.key)
   end,
   default = function(tree) return '' end
}
function printtree(tree, indent, accum)
   local sibs = numsiblings[tree.tag]

   table.insert(accum, string.rep(' ', indent))
   table.insert(accum, tree.tag.value)

   table.insert(accum, printtree_helper(tree))
   table.insert(accum, "\n")

   if tree.tag == TTag.Rule then
      sibs = 1 -- don't print sib2
   elseif tree.tag == TTag.Grammar then
      local rule = tree.sib1
      for i=1,tree.n do
         printtree(rule, indent + 2, accum)
         rule = rule.sib2
      end
      sibs = 0 -- siblings already handled
   end
   if sibs >= 1 then
      printtree(tree.sib1, indent + 2, accum)
      if sibs >= 2 then
         printtree(tree.sib2, indent + 2, accum)
      end
   end
   return accum
end

local PREFIX = "" -- could also be "l." or "lpeg." etc
local printrepl_helper
printrepl_helper = define_tree_visitor{
   [TTag.True] = function(tree, buf)
      table.insert(buf, PREFIX .. 'P""')
   end,
   [TTag.Any] = function(tree, buf)
      table.insert(buf, PREFIX .. 'P(1)')
   end,
   [TTag.Char] = function(tree, buf)
      table.insert(buf, PREFIX .. 'P"')
      local c = compat.utf8char(tree.n)
      if c:find("%C") ~= nil then -- printable?
         table.insert(buf, c)
      else
         table.insert(buf, string.format('\\%02X', tree.n))
      end
      table.insert(buf, '"')
   end,
   [TTag.Set] = function(tree, buf)
      local nbuf = {}
      local insertchar = function(cp)
         local c = compat.utf8char(cp)
         if string.find(c, "^[^%w%p ]") ~= nil then
            table.insert(nbuf, string.format('\\x%02X', cp))
         else
            table.insert(nbuf, c)
         end
      end
      local nargs = 0
      local inserttwo = function(cp1, cp2)
         if nargs > 0 then table.insert(nbuf, ',') end
         nargs = nargs + 1
         table.insert(nbuf, '"')
         insertchar(cp1)
         insertchar(cp2)
         table.insert(nbuf, '"')
      end

      local i = 0
      while i <= CHARMAX do
         local first = i
         while tree.set[i] and i <= CHARMAX do
            i = i + 1
         end
         if first == (i - 1) then -- unary range
            inserttwo(first, first)
         elseif first < (i-1) then -- non-empty range
            inserttwo(first, i-1)
         end
         i = i + 1
      end

      local r = table.concat(nbuf)
      if nargs == 1 then
         r = PREFIX .. 'S' .. r
      else
         r = PREFIX .. 'S(' .. r .. ')'
      end

      if tree.rest then
         table.insert(buf, '(')
         table.insert(buf, r)
         table.insert(buf, ' + ')
         table.insert(buf, PREFIX)
         table.insert(buf, 'utfR(0x80, 0x10FFFF))')
      else
         table.insert(buf, r)
      end
   end,
   [TTag.UTFR] = function(tree, buf)
      table.insert(buf, string.format("%sutfR(0x%04X, 0x%04X)", PREFIX, tree.from, tree.to))
   end,
   [{TTag.OpenCall, TTag.Call}] = function(tree, buf)
      table.insert(buf, string.format('%sV"%s"', PREFIX, tree.key))
   end,
   [TTag.Not] = function(tree, buf)
      table.insert(buf, '-(')
      printrepl_helper(tree.sib1, buf)
      table.insert(buf, ')')
   end,
   [TTag.Seq] = function(tree, buf)
      table.insert(buf, "(")
      printrepl_helper(tree.sib1, buf)
      table.insert(buf, " * ")
      printrepl_helper(tree.sib2, buf)
      table.insert(buf, ")")
   end,
   [TTag.Choice] = function(tree, buf)
      table.insert(buf, "(")
      printrepl_helper(tree.sib1, buf)
      table.insert(buf, " + ")
      printrepl_helper(tree.sib2, buf)
      table.insert(buf, ")")
   end,
   [TTag.Rep] = function(tree, buf)
      printrepl_helper(tree.sib1, buf)
      table.insert(buf, "^0")
   end,
   --[[
   [TTag.Behind] = function(tree)
      return " " .. tree.n
      end,
   ]]--
   [TTag.Capture] = function(tree, buf)
      local repl = define_type_visitor{
         string = function(v)
            return '"' .. v .. '"' -- xxx should handle escapes
         end,
         default = tostring,
      }
      local name = nil
      local show_patt = false
      local show_key = false
      if tree.cap == CapKind.simple then
         name = 'C'
         show_patt = true
      elseif tree.cap == CapKind.subst then
         name = 'Cs'
         show_patt = true
      elseif tree.cap == CapKind.table then
         name = 'Ct'
         show_patt = true
      elseif tree.cap == CapKind.pos then
         name = 'Cp'
      elseif tree.cap == CapKind.arg then
         name = 'Carg'
         show_key = true
      elseif tree.cap == CapKind.backref then
         name = 'Cb'
         show_key = true
      elseif tree.cap == CapKind.group then
         name = 'Cg'
         show_patt = true
         show_key = (tree.key ~= nil)
      end
      if name ~= nil then
         table.insert(buf, PREFIX)
         table.insert(buf, name)
         table.insert(buf, '(')
         if show_patt then
            printrepl_helper(tree.sib1, buf)
            if show_key then
               table.insert(buf, ', ')
            end
         end
         if show_key then
            table.insert(buf, repl(tree.key))
         end
         table.insert(buf, ')')
         return
      end
      if tree.cap == CapKind.string or
         tree.cap == CapKind.num or
         tree.cap == CapKind.query or
         tree.cap == CapKind['function'] then
         printrepl_helper(tree.sib1, buf)
         table.insert(buf, ' / ')
         table.insert(buf, repl(tree.key))
         return
      end
      -- fallback
      table.insert(buf, string.format("<pattern %s>", tostring(tree.tag)))
   end,
   [TTag.Rule] = function(tree, buf)
      local key = tree.name
      if type(key) == 'number' then key = string.format("[%d]", key) end
      table.insert(buf, key)
      table.insert(buf, " = ")
      printrepl_helper(tree.sib1, buf)
   end,
   [TTag.Grammar] = function(tree, buf)
      table.insert(buf, PREFIX .. "P{")
      local rule = tree.sib1

      local r = {}
      local first_rule_name = rule.name
      r[first_rule_name] = rule
      rule = rule.sib2

      local names = {}
      for i=2,tree.n do
         table.insert(names, rule.name)
         r[rule.name] = rule
         rule = rule.sib2
      end

      -- sort rule names
      table.sort(names)
      table.insert(names, 1, first_rule_name)

      -- now print in order
      for _,name in ipairs(names) do
         printrepl_helper(r[name], buf)
         table.insert(buf, ", ")
      end
      table.insert(buf, "}")
   end,
   --[[
   [TTag.Throw] = function(tree)
      return " key: " .. tree.key .. "  (rule: " .. tree.sib2.cap .. ")"
   end,
   ]]--
   default = function(tree, buf)
      table.insert(buf, string.format("<pattern %s>", tostring(tree.tag)))
   end,
}
function printrepl(tree)
   local buf = {}
   printrepl_helper(tree, buf)
   return table.concat(buf)
end

return {
   printcaplist = printcaplist,
   printcharset = printcharset,
   printinst = printinst,
   printpatt = printpatt,
   printrepl = printrepl,
   printtree = printtree,
}

end)

register('llpeg.code', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local from = myrequire('llpeg.types').from
local
   CHARMAX,
   CapKind,
   Opcode,
   PE,
   TTag,
   define,
   define_tree_visitor,
   fullset,
   newcharset,
   numsiblings,
   register_fname,
   _ = from(myrequire('llpeg.types'), {
               'CHARMAX',
               'CapKind',
               'Opcode',
               'PE',
               'TTag',
               'define',
               'define_tree_visitor',
               'fullset',
               'newcharset',
               'numsiblings',
               'register_fname',
})
local printinst = myrequire('llpeg.print').printinst

local TRACE_INSTRUCTIONS = false

-- signals a "no-instruction"
local NOINST = nil

-- don't optimize captures longer than this
local MAXOFF = 15

-- forward declarations
local codegen

local CompileState = {}
CompileState.__index = CompileState

--[[
** {======================================================
** Analysis and some optimizations
** =======================================================
]]--

--[[
** Check whether a charset is empty (returns IFail), singleton (IChar),
** full (IAny), or none of those (ISet). When singleton, '*c' returns
** which character it is. (When generic set, the set was the input,
** so there is no need to return it.)
]]--
function charsettype(cs)
   local count = 0
   local candidate
   for i,_ in pairs(cs.set) do
      candidate = i
      count = count + 1
   end
   if cs.rest then
      if count == (CHARMAX + 1) then
         return Opcode.Any -- full set
      end
   elseif count == 0 then
      return Opcode.Fail -- empty set
   elseif count == 1 then
      return Opcode.Char, candidate -- single char
   end
   return Opcode.Set -- neither full nor empty nor singleton
end

-- A few basic operations on charsets; returns new object

function cs_clone(cs)
   local result = newcharset()
   for i,_ in pairs(cs.set) do
      result.set[i] = true
   end
   result.rest = cs.rest
   return result
end

function cs_complement(cs)
   local result = newcharset()
   for i=0,CHARMAX do
      if not cs.set[i] then
         result.set[i] = true
      end
   end
   result.rest = not cs.rest
   return result
end

function cs_intersection(a, b)
   local result = newcharset()
   for i,_ in pairs(a.set) do
      if a.set[i] and b.set[i] then
         result.set[i] = true
      end
   end
   result.rest = a.rest and b.rest
   return result
end

function cs_union(a, b)
   local result = newcharset()
   for i=0,CHARMAX do
      if a.set[i] or b.set[i] then
         result.set[i] = true
      end
   end
   result.rest = a.rest or b.rest
   return result
end

function cs_diff(a, b)
   local result = newcharset()
   for i=0,CHARMAX do
      if a.set[i] and not b.set[i] then
         result.set[i] = true
      end
   end
   result.rest = a.rest and not b.rest
   return result
end

function cs_disjoint(a, b)
   if a.rest == b.rest then return false end
   for i,_ in pairs(a.set) do
      if b.set[i] then return false end
   end
   for i,_ in pairs(b.set) do
      if a.set[i] then return false end
   end
   return true
end

function cs_equal(a, b)
   if a.rest ~= b.rest then return false end
   for i,_ in pairs(a.set) do
      if not b.set[i] then return false end
   end
   for i,_ in pairs(b.set) do
      if not a.set[i] then return false end
   end
   return true
end

--[[
** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a
** charset and return it; else return nil.
]]--
local tocharset = define_tree_visitor{
   [TTag.Set] = function(v)
      return v -- copy set
   end,
   [TTag.Char] = function(v)
      -- only one char
      if v.n <= CHARMAX then
         local t = newcharset()
         t.set[v.n] = true
         return t
      else
         return nil
      end
   end,
   [TTag.Any] = function(v)
      return fullset
   end,
   [TTag.False] = function(v)
      return newcharset()
   end,
   default = function(v)
      return nil
   end,
}
register_fname("tocharset", tocharset)

--[[
** Visit a TCall node taking care to stop recursion. If node not yet
** visited, return 'f(rule for call)', otherwise return 'def' (default
** value)
]]--
function CompileState:callrecursive(tree, f, default_value, ...)
   if tree.tag ~= TTag.Call then
      error("unexpected tree tag")
   end
   local rule = self.grammar.ruletab[tree.key]
   if rule.tag ~= TTag.Rule then
      error("unexpected tree sibling")
   end
   if tree.seen == true then
      return default_value -- node already visited
   else
      -- first visit
      local oldseen = tree.seen
      tree.seen = true
      local result = f(rule, ...)
      tree.seen = oldseen -- restore tree
      return result
   end
end

--[[
** Check whether a pattern tree has captures
]]--
local hascaptures
hascaptures = define_tree_visitor{
   [{TTag.Capture, TTag.RunTime}] = function(tree, cs)
         return true
   end,
   [TTag.Call] = function(tree, cs)
      assert(cs ~= nil)
      return cs:callrecursive(tree, hascaptures, false, cs)
   end,
   [TTag.Rule] = function(tree, cs)
      -- do not follow siblings
      return hascaptures(tree.sib1, cs)
   end,
   [TTag.OpenCall] = function(tree, cs)
      error("should not happen")
   end,
   [TTag.Grammar] = function(tree, cs)
      -- make a fake compile state to hold the grammar, if necessary
      if cs == nil then cs = CompileState:new(nil) end
      return cs:withGrammar(tree, hascaptures, tree.sib1, cs)
   end,
   default = function(tree, cs)
      local n = numsiblings[tree.tag]
      if n == 1 then
         return hascaptures(tree.sib1, cs) -- tail call
      elseif n == 2 then
         if hascaptures(tree.sib1, cs) then return true end
         return hascaptures(tree.sib2, cs) -- tail call
      elseif n == 0 then
         return false
      else
         error("how many siblings does this have?")
      end
   end,
}
function CompileState:hascaptures(t) return hascaptures(t, self) end
register_fname("hascaptures", hascaptures)

--[[
** Checks how a pattern behaves regarding the empty string,
** in one of two different ways:
** A pattern is *nullable* if it can match without consuming any character;
** A pattern is *nofail* if it never fails for any string
** (including the empty string).
** The difference is only for predicates and run-time captures;
** for other patterns, the two properties are equivalent.
** (With predicates, &'a' is nullable but not nofail. Of course,
** nofail => nullable.)
** These functions are all convervative in the following way:
**    p is nullable => nullable(p)
**    nofail(p) => p cannot fail
** The function assumes that TOpenCall is not nullable;
** this will be checked again when the grammar is fixed.
** Run-time captures can do whatever they want, so the result
** is conservative.
]]--
local checkaux
checkaux = define_tree_visitor{
   [{
         TTag.Char, TTag.Set, TTag.Any, TTag.UTFR, TTag.False,
         TTag.OpenCall, TTag.Throw,
   }] = function(tree, pred, cs)
      return false -- not nullable
   end,
   [{TTag.Rep,TTag.True}] = function(tree, pred, cs)
      return true -- no fail
   end,
   [{TTag.Not,TTag.Behind}] = function(tree, pred, cs)
      -- can match empty, but can fail
      if pred == PE.nofail then
         return false
      else
         return true
      end
   end,
   [TTag.And] = function(tree, pred, cs)
      -- can match empty; fail iff body does
      if pred == PE.nullable then
         return true
      end
      return checkaux(tree.sib1, pred, cs) -- tail call
   end,
   [TTag.RunTime] = function(tree, pred, cs)
      -- can fail; match empty iff body does
      if pred == PE.nofail then
         return false
      end
      return checkaux(tree.sib1, pred, cs) -- tail call
   end,
   [TTag.Seq] = function(tree, pred, cs)
      if not checkaux(tree.sib1, pred, cs) then
         return false
      end
      return checkaux(tree.sib2, pred, cs) -- tail call
   end,
   [TTag.Choice] = function(tree, pred, cs)
      if checkaux(tree.sib2, pred, cs) then
         return true
      end
      return checkaux(tree.sib1, pred, cs) -- tail call
   end,
   [{ TTag.Capture, TTag.Rule, TTag.XInfo, }] = function(tree, pred, cs)
      return checkaux(tree.sib1, pred, cs)
   end,
   [TTag.Grammar] = function(tree, pred, cs)
      -- make a fake compile state to hold the grammar, if necessary
      if cs == nil then cs = CompileState:new(nil) end
      return cs:withGrammar(tree, checkaux, tree.sib1, pred, cs)
   end,
   [TTag.Call] = function(tree, pred, cs)
      -- open calls are assumed not nullable; checked again after grammar
      -- is fixed
      if cs == nil then return false end
      return checkaux(cs.grammar.ruletab[tree.key], pred, cs)
   end,
}
register_fname("checkaux", checkaux)

function nofail(t, cs) return checkaux(t, PE.nofail, cs) end

function CompileState:nofail(t) return nofail(t, self) end

function nullable(t, cs) return checkaux(t, PE.nullable, cs) end

function CompileState:nullable(t) return nullable(t, self) end

function nullable_with_grammar(t, grm)
   local cs = CompileState:new(nil)
   return cs:withGrammar(grm, nullable, t, cs)
end

-- Note that we are counting characters, not bytes
local fixedlen, fixedlen_helper
fixedlen_helper = define_tree_visitor{
   [{TTag.Char, TTag.Set, TTag.Any, TTag.UTFR}] = function(tree, len)
      return len + 1
   end,
   [{TTag.False, TTag.True, TTag.Not, TTag.And, TTag.Behind}] = function(tree, len)
      return len
   end,
   [{TTag.Rep, TTag.RunTime, TTag.OpenCall, TTag.Throw,}] = function(tree, len)
      return -1 -- variable
   end,
   [{TTag.Capture, TTag.Rule, TTag.XInfo,}] = function(tree, len, cs)
      return fixedlen_helper(tree.sib1, len, cs)
   end,
   [TTag.Grammar] = function(tree, len, cs)
      -- make a fake compile state to hold the grammar, if necessary
      if cs == nil then cs = CompileState:new(nil) end
      return cs:withGrammar(tree, fixedlen_helper, tree.sib1, len, cs)
   end,
   [TTag.Call] = function(tree, len, cs)
      -- If evaluating outside the context of a grammar, conservatively
      -- return "variable"
      if cs == nil then return -1 end
      -- otherwise, carefully recurse
      local n1 = cs:callrecursive(tree, fixedlen, -1, cs)
      if n1 < 0 then return -1 end -- variable
      return len + n1
   end,
   [TTag.Seq] = function(tree, len, cs)
      local n1 = fixedlen_helper(tree.sib1, len, cs)
      if n1 < 0 then return -1 end -- variable
      return fixedlen_helper(tree.sib2, n1, cs)
   end,
   [TTag.Choice] = function(tree, len, cs)
      local n1 = fixedlen_helper(tree.sib1, len, cs)
      local n2 = fixedlen_helper(tree.sib2, len, cs)
      if n1 ~= n2 or n1 < 0 then
         return -1
      else
         return n1
      end
   end,
}
function fixedlen(tree, cs)
   return fixedlen_helper(tree, 0, cs) -- supply default 0 for 'len'
end
function CompileState:fixedlen(t) return fixedlen(t, self) end
register_fname("fixedlen_helper", fixedlen_helper)

--[[
** Computes the 'first set' of a pattern.
** The result is a conservative aproximation:
**   match p ax -> x (for some x) ==> a belongs to first(p)
** or
**   a not in first(p) ==> match p ax -> fail (for all x)
**
** The set 'follow' is the first set of what follows the
** pattern (full set if nothing follows it).
**
** The function returns 0 when this resulting set can be used for
** test instructions that avoid the pattern altogether.
** A non-zero return can happen for two reasons:
** 1) match p '' -> ''            ==> return has bit 1 set
** (tests cannot be used because they would always fail for an empty input);
** 2) there is a match-time capture ==> return has bit 2 set
** (optimizations should not bypass match-time captures).
]]--
local getfirst
getfirst = define_tree_visitor{
   [TTag.Char] = function(t, follow, cs)
      if t.n <= CHARMAX then return 0, tocharset(t) end
      -- conservative approximation!
      local s = newcharset()
      s.rest = true
      return 0, s
   end,
   [{ TTag.Set, TTag.Any, TTag.False }] = function(t, follow, cs)
      return 0, tocharset(t)
   end,
   [TTag.UTFR] = function(t, follow, cs)
      -- conservative approximation!
      local firstset = newcharset()
      if t.from <= CHARMAX then
         for i=t.from, math.min(CHARMAX, t.to) do
            firstset.set[i] = true
         end
      end
      if t.to > CHARMAX then
         -- conservative approximation of non-ascii unicode range
         firstset.rest = true
      end
      return 0, firstset
   end,
   [TTag.True] = function(t, follow, cs)
      return 1, follow -- 1 because this accepts the empty string
   end,
   [TTag.Throw] = function(t, follow, cs)
      -- labeled failure: must always throw the label
      return 1, fullset
   end,
   [TTag.Choice] = function(t, follow, cs)
      local firstset = newcharset()
      local e1,e1set = getfirst(t.sib1, follow, cs)
      local e2,e2set = getfirst(t.sib2, follow, cs)
      local firstset = cs_union(e1set, e2set)
      local ret = 0 -- awkward lua5.1 way to say "e1 | e2"
      if (e1 % 2) == 1 or (e2 % 2) == 1 then
         ret = ret + 1
      end
      e1,e2 = compat.rshift(e1, 1), compat.rshift(e2, 1)
      if (e1 % 2) == 1 or (e2 % 2) == 1 then
         ret = ret + 2
      end
      return ret, firstset
   end,
   [TTag.Seq] = function(t, follow, cs)
      if not nullable(t.sib1, cs) then
         -- when p1 is not nullable, p2 has nothing to contribute
         return getfirst(t.sib1, fullset, cs) -- tail call
      else -- FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl))
         local e2,csaux = getfirst(t.sib2, follow, cs)
         local e1,firstset = getfirst(t.sib1, csaux, cs)
         if e1 == 0 then
            return 0, firstset -- 'e1' ensures that first can be used
         elseif compat.rshift(e1, 1) % 2 == 1 or compat.rshift(e2, 1) % 2 == 1 then
            -- one of the children has a matchtime?
            return 2, firstset -- pattern has a matchtime capture
         else
            return e2, firstset -- else depends on e2
         end
      end
   end,
   [TTag.Rep] = function(t, follow, cs)
      local _,firstset = getfirst(t.sib1, follow, cs)
      return 1, cs_union(firstset, follow, cs) -- accepts the empty string
   end,
   [{ TTag.Capture,TTag.Rule,TTag.XInfo }] = function(t, follow, cs)
      return getfirst(t.sib1, follow, cs) -- tail call
   end,
   [TTag.Grammar] = function(t, follow, cs)
      return cs:withGrammar(t, getfirst, t.sib1, follow, cs)
   end,
   [TTag.RunTime] = function(t, follow, cs)
      -- function invalidates any follow info
      local e,firstset = getfirst(t.sib1, fullset, cs)
      if e ~= 0 then
         -- function is not "protected"?
         return 2,firstset
      else
         -- pattern inside capture ensures first can be used
         return 0,firstset
      end
   end,
   [TTag.Call] = function(t, follow, cs)
      local rule = cs.grammar.ruletab[t.key]
      return getfirst(rule, follow, cs) -- tail call
   end,
   [TTag.And] = function(t, follow, cs)
      local e,firstset = getfirst(t.sib1, follow, cs)
      return e, cs_intersection(firstset, follow, cs)
   end,
   [{ TTag.Not, TTag.Behind }] = function(t, follow, cs)
      if t.tag == TTag.Not then
         local firstset = tocharset(t.sib1)
         if firstset ~= nil then
            return 1,cs_complement(firstset) -- could match empty input
         end
      end
      -- the TNot or TBehind gives no new information
      -- call getfirst only to check for math-time captures
      local e,_ = getfirst(t.sib1, follow, cs)
      if e%2 == 0 then e = e + 1 end -- set the lsb; could match empty input
      return e, follow -- uses follow
   end,
}
function CompileState:getfirst(t, follow) return getfirst(t, follow, self) end
register_fname("getfirst", getfirst)

--[[
** If 'headfail(tree)' true, then 'tree' can fail only depending on the
** next character of the subject.
   -- ie, a single character of lookahead is enough to evaluate the pattern
   -- rooted at this node
]]--
local headfail
headfail = define_tree_visitor{
   [{TTag.Char, TTag.Set, TTag.Any,
     TTag.False}] = function(t, cs)
      return true
     end,
   [{TTag.True, TTag.Rep, TTag.RunTime, TTag.Not,
     -- even though we are codepoint-based, we don't have a TestUTFR instruction
     -- so we can't use a Test instruction on the first character, which is
     -- implicitly what headfail is checking for.
     TTag.UTFR,
     TTag.Behind, TTag.Throw}] = function(t, cs)
      return false
     end,
   [{TTag.Capture, TTag.Rule,
     TTag.XInfo, TTag.And}] = function(t, cs)
      return headfail(t.sib1, cs) -- tail call
     end,
   [TTag.Grammar] = function(t, cs)
      return cs:withGrammar(t, headfail, t.sib1, cs)
   end,
   [TTag.Call] = function(t, cs)
      local rule = cs.grammar.ruletab[t.key]
      return headfail(rule, cs) -- tail call
   end,
   [TTag.Seq] = function(t, cs)
      if not nofail(t.sib2, cs) then
         -- if the second child could possibly fail, then we can't
         -- evaluate the entire seq based just on the first child
         return false
      end
      return headfail(t.sib1, cs) -- tail call
   end,
   [TTag.Choice] = function(t, cs)
      -- both children need to be headfail for this to be headfail
      if not headfail(t.sib1, cs) then
         return false
      end
      return headfail(t.sib2, cs) -- tail call
   end,
}
function CompileState:headfail(t) return headfail(t, self) end
register_fname("headfail", headfail)

--[[
** Check whether the code generation for the given tree can benefit
** from a follow set (to avoid computing the follow set when it is
** not needed)
]]--
local needfollow
needfollow = define_tree_visitor{
   [{TTag.Char, TTag.Set, TTag.Any, TTag.UTFR,
    TTag.False, TTag.True, TTag.And, TTag.Not,
    TTag.RunTime, TTag.Grammar, TTag.Call, TTag.Behind,
    TTag.Throw, }] = function(tree) return false end,
   [{TTag.Choice, TTag.Rep}] = function(tree) return true end,
   [TTag.Capture] = function(tree) return needfollow(tree.sib1) end,
   [TTag.Seq] = function(tree) return needfollow(tree.sib2) end,
}
register_fname("needfollow", needfollow)

--[[
** ======================================================
** Code generation
** ======================================================
]]--

local Instruction = {}
Instruction.__index = Instruction

function Instruction:new(arg)
   local opcode = arg[1]
   if opcode == nil then error("no opcode") end
   -- target is rule # for open calls before correction, and absolute pc after
   local instr = {
      code = opcode,
      exec = opcode.exec, -- copy the exec function from the opcode!
      aux = arg.aux, -- used for the "primary argument"
      key = arg.key, -- used for string-valued arguments
      target = arg.target, -- used for jmp-like instructions
      from = arg.from, -- inclusive start, for ranges
      to = arg.to, -- inclusive end, for ranges
      set = arg.set, -- charset <= CHARMAX
      rest = arg.rest, -- include characters above CHARMAX?
      cap = arg.cap, -- used for "capture kind"
   }
   setmetatable(instr, self)
   instr:setCode(opcode) -- opportunity to add tracing logic!
   return instr
end

function Instruction:setCode(opcode)
   self.code = opcode
   local exec = opcode.exec
   if TRACE_INSTRUCTIONS then
      local str
      self.exec = function(self, state, ...)
         if str == nil then
            str = table.concat(printinst(0, self, { "Executing " })):gsub("\n","")
         end
         print(state.bytePos, state.codepoint, str)
         return exec(self, state, ...)
      end
   else
      self.exec = exec
   end
end

-- state for the compiler

function CompileState:new(p)
   local cs = {
      p = p,
   }
   setmetatable(cs, self)
   return cs
end

function CompileState:withGrammar(g, f, ...)
   local lastGrammar = self.grammar
   self.grammar = g
   local result = compat.pack(f(...))
   self.grammar = lastGrammar
   return compat.unpack(result)
end

function CompileState:codegen(tree, opt, tt, fl)
   assert(fl.tag == TTag.Set)
   -- just a little helper
   return codegen(tree, self, opt, tt, fl)
end

function CompileState:getinstr(i)
   return self.p.code[i]
end

function CompileState:addinstruction(arg)
   local code = self.p.code
   table.insert(code, Instruction:new(arg))
   return #code
end

function CompileState:gethere()
   local code = self.p.code
   return 1 + #code
end

function CompileState:jumptothere(pc, where)
   if pc ~= NOINST then
      local code = self.p.code
      code[pc].target = where
   end
end

function CompileState:jumptohere(pc)
   self:jumptothere(pc, self:gethere())
end

function codethrow(cs, throw)
   local rule = nil
   if cs.grammar ~= nil then
      -- we only lookup/match *string* rule names, not numeric indices
      rule = cs.grammar.ruletab[tostring(throw.key)]
   end
   if rule ~= nil then
      return cs:addinstruction{
         Opcode.ThrowRec,
         key=throw.key, -- rule name / error label
         target=rule.n -- recovery rule number
      }
   else
      return cs:addinstruction{
         Opcode.Throw,
         key=throw.key, -- rule name / error label
         -- no recovery rule
      }
   end
end

function codeutfr(cs, tree)
   return cs:addinstruction{
      Opcode.UTFR,
      from = tree.from,
      to = tree.to,
   }
end

--[[
** Code an IChar instruction, or IAny if there is an equivalent
** test dominating it
]]--
function codechar(cs, codepoint, tt)
   if tt ~= NOINST and
      cs:getinstr(tt).code == Opcode.TestChar and
      cs:getinstr(tt).aux == codepoint then
      cs:addinstruction{Opcode.Any}
   else
      cs:addinstruction{Opcode.Char, aux=codepoint,}
   end
end

--[[
** Add a charset posfix to an instruction
]]--
function addcharset(cs, codepoint)
   --[[
static void addcharset (CompileState *compst, const byte *cs) {
  int p = gethere(compst);
  int i;
  for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++)
    nextinstruction(compst);  /* space for buffer */
  /* fill buffer with charset */
      loopset(j, getinstr(compst, p).buff[j] = cs[j]);
   ]]--
end

--[[
** code a char set, optimizing unit sets for IChar, "complete"
** sets for IAny, and empty sets for IFail; also use an IAny
** when instruction is dominated by an equivalent test.
]]--
function codecharset(cs, tree, tt)
   local op,codepoint = charsettype(tree)
   if op == Opcode.Char then
      return codechar(cs, codepoint, tt)
   elseif op == Opcode.Set then
      -- non-trivial set?
      if tt ~= NOINST and
         cs:getinstr(tt).code == Opcode.TestSet and
         cs_equal(tree, cs:getinstr(tt)) then
         return cs:addinstruction{Opcode.Any}
      else
         return cs:addinstruction{
            Opcode.Set,
            set = tree.set, -- XXX ensure immutable
            rest = tree.rest,
         }
      end
   else
      return cs:addinstruction{op} -- Any or Fail
   end
end

--[[
** code a test set, optimizing unit sets for ITestChar, "complete"
** sets for ITestAny, and empty sets for IJmp (always fails).
** 'e' is nonzero iff test should accept the empty string. (Test
** instructions in the current VM never accept the empty string.)
]]--
function codetestset(cs, tree, e)
   if e ~= 0 then return NOINST end
   local op,codepoint = charsettype(tree)
   if op == Opcode.Fail then
      return cs:addinstruction{Opcode.Jmp, target = NOINST} -- always jump
   elseif op == Opcode.Any then
      return cs:addinstruction{Opcode.TestAny, target = NOINST}
   elseif op == Opcode.Char then
      return cs:addinstruction{
         Opcode.TestChar,
         target = NOINST,
         aux = codepoint,
      }
   elseif op == Opcode.Set then
      return cs:addinstruction{
         Opcode.TestSet,
         target = NOINST,
         set = tree.set, -- XXX ensure immutable
         rest = tree.rest,
      }
   else
      error("unreachable")
   end
end

--[[
** <behind(p)> == behind n; <p>   (where n = fixedlen(p))
]]--
function codebehind(cs, tree)
   if tree.n > 0 then
      cs:addinstruction{ Opcode.Behind, aux = tree.n }
   end
   return cs:codegen(tree.sib1, false, NOINST, fullset)
end

--[[
** Choice; optimizations:
** - when p1 is headfail or when first(p1) and first(p2) are disjoint,
** than a character not in first(p1) cannot go to p1 and a character
** in first(p1) cannot go to p2, either because p1 will accept
** (headfail) or because it is not in first(p2) (disjoint).
** (The second case is not valid if p1 accepts the empty string,
** as then there is no character at all...)
** - when p2 is empty and opt is true; a IPartialCommit can reuse
** the Choice already active in the stack.
]]--
function codechoice(cs, p1, p2, opt, fl)
   local emptyp2 = (p2.tag == TTag.True)
   local e1, cs1 = cs:getfirst(p1, fullset)
   local headfailp1 = cs:headfail(p1)
   local e2, cs2
   if not headfailp1 and e1 == 0 then
      e2, cs2 = cs:getfirst(p2, fl) -- avoid computing unless necessary
   end
   if headfailp1 or (e1 == 0 and cs_disjoint(cs1, cs2)) then
      -- <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2:
      local test = codetestset(cs, cs1, 0)
      local jmp = NOINST
      cs:codegen(p1, false, test, fl)
      if not emptyp2 then
         jmp = cs:addinstruction{Opcode.Jmp, target = NOINST }
      end
      cs:jumptohere(test)
      cs:codegen(p2, opt, NOINST, fl)
      cs:jumptohere(jmp)
   elseif opt and emptyp2 then
      -- p1? == IPartialCommit; p1
      cs:jumptohere(cs:addinstruction{Opcode.PartialCommit, target = NOINST})
      cs:codegen(p1, true, NOINST, fullset)
   else
      -- <p1 / p2> ==
      --  test(first(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2:
      local test = codetestset(cs, cs1, e1)
      local pchoice = cs:addinstruction{Opcode.Choice, target = NOINST}
      cs:codegen(p1, emptyp2, test, fullset)
      local pcommit = cs:addinstruction{Opcode.Commit, target = NOINST}
      cs:jumptohere(pchoice)
      cs:jumptohere(test)
      cs:codegen(p2, opt, NOINST, fl)
      cs:jumptohere(pcommit)
   end
end

--[[
** And predicate
** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
** (valid only when 'p' has no captures)
]]--
function codeand(cs, tree, tt)
  --[[ labeled failure: optimization disabled because in case of a failure it
     does not report the expected error position (the current subject position
     when begin the matching of <&p>) ]]--
   local pchoice = cs:addinstruction{Opcode.PredChoice, target = NOINST}
   cs:codegen(tree, false, tt, fullset)
   local pcommit = cs:addinstruction{Opcode.BackCommit, target = NOINST}
   cs:jumptohere(pchoice)
   cs:addinstruction{Opcode.Fail}
   cs:jumptohere(pcommit)
end

--[[
** Captures: if pattern has fixed (and not too big) length, and it
** has no nested captures, use a single IFullCapture instruction
** after the match; otherwise, enclose the pattern with OpenCapture -
** CloseCapture.
]]--
function codecapture(cs, tree, tt, fl)
   local len = cs:fixedlen(tree.sib1)
   if len >= 0 and len <= MAXOFF and not cs:hascaptures(tree.sib1) then
      cs:codegen(tree.sib1, false, tt, fl)
      cs:addinstruction{
         Opcode.FullCapture,
         cap = tree.cap,
         key = tree.key, -- capture name
         aux = len,
      }
   else
      assert(tree.cap ~= nil)
      cs:addinstruction({
            Opcode.OpenCapture,
            cap = tree.cap,
            key = tree.key, -- capture name
      })
      cs:codegen(tree.sib1, false, tt, fl)
      cs:addinstruction({
            Opcode.CloseCapture,
            cap = CapKind.close,
      })
   end
end

function coderuntime(cs, tree, tt)
   cs:addinstruction({
         Opcode.OpenCapture,
         cap = CapKind.group,
         key = tree.key, -- capture *function*
   })
   cs:codegen(tree.sib1, false, tt, fullset)
   cs:addinstruction({
         Opcode.CloseRunTime,
         cap = CapKind.close,
   })
end

--[[
** Repetition; optimizations:
** When pattern is a charset, can use special instruction ISpan.
** When pattern is head fail, or if it starts with characters that
** are disjoint from what follows the repetions, a simple test
** is enough (a fail inside the repetition would backtrack to fail
** again in the following pattern, so there is no need for a choice).
** When 'opt' is true, the repetion can reuse the Choice already
** active in the stack.
]]--
function coderep(cs, tree, opt, fl)
   local st = tocharset(tree)
   if st ~= nil then
      return cs:addinstruction{
         Opcode.Span,
         set = st.set,
         rest = st.rest,
      }
   end
   local e1,st = cs:getfirst(tree, fullset)
   if cs:headfail(tree) or (e1 == 0 and cs_disjoint(st, fl)) then
      -- L1: test (fail(p1)) -> L2; <p>; jmp L1; L2:
      local test = codetestset(cs, st, 0)
      cs:codegen(tree, false, test, fullset)
      local jmp = cs:addinstruction{Opcode.Jmp, target = NOINST}
      cs:jumptohere(test)
      cs:jumptothere(jmp, test)
   else
      -- test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2:
      -- or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1;
      local test = codetestset(cs, st, e1)
      local pchoice = NOINST
      if opt then
         cs:jumptohere(cs:addinstruction{Opcode.PartialCommit, target = NOINST})
      else
         pchoice = cs:addinstruction{Opcode.Choice, target = NOINST}
      end
      local l2 = cs:gethere()
      cs:codegen(tree, false, NOINST, fullset)
      local commit = cs:addinstruction{Opcode.PartialCommit, target = NOINST}
      cs:jumptothere(commit, l2)
      cs:jumptohere(pchoice)
      cs:jumptohere(test)
   end
end

--[[
** Not predicate; optimizations:
** In any case, if first test fails, 'not' succeeds, so it can jump to
** the end. If pattern is headfail, that is all (it cannot fail
** in other parts); this case includes 'not' of simple sets. Otherwise,
** use the default code (a choice plus a failtwice).
]]--
function codenot(cs, tree)
   local e,st = cs:getfirst(tree, fullset)
   local test = codetestset(cs, st, e)
   if cs:headfail(tree) then
      -- test (fail(p1)) -> L1; fail; L1:
      cs:addinstruction{Opcode.Fail}
   else
      -- test(fail(p))-> L1; choice L1; <p>; failtwice; L1:
      local pchoice = cs:addinstruction{Opcode.PredChoice, target = NOINST }
      cs:codegen(tree, false, NOINST, fullset)
      cs:addinstruction{Opcode.FailTwice}
      cs:jumptohere(pchoice)
   end
   cs:jumptohere(test)
end

-- find the final destination of a sequence of jumps
function finaltarget(code, pc)
   while code[pc].code == Opcode.Jmp do
      pc = code[pc].target
   end
   return pc
end

-- final label (after traversing any jumps)
function finallabel(code, pc)
   return finaltarget(code, code[pc].target)
end

--[[
** change open calls to calls, using list 'positions' to find
** correct offsets; also optimize tail calls
]]--
function correctcalls(cs, positions, from, to)
   local code = cs.p.code
   for i=from,(to-1) do
      local op = code[i]
      if op.code == Opcode.OpenCall or op.code == Opcode.ThrowRec then
         local n = op.target -- rule number
         local rule = positions[n] -- rule position
         if rule == from or code[rule - 1].code == Opcode.Ret then
            -- sanity check! ok!
         else
            error("bad rule position")
         end
         if op.code == Opcode.OpenCall then
            if code[finaltarget(code, i+1)].code == Opcode.Ret then
               -- call; ret => tail call
               op:setCode(Opcode.Jmp)
            else
               op:setCode(Opcode.Call) -- open call no more
            end
         end
         op.target = rule
      end
   end
end

--[[
** Code for a grammar:
** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
]]--
function codegrammar(cs, tree)
   local firstcall = cs:addinstruction{Opcode.Call, target = NOINST} -- call initial rule
   local jumptoend = cs:addinstruction{Opcode.Jmp, target = NOINST} -- jump to the end
   local start = cs:gethere() -- here starts the initial rule
   cs:jumptohere(firstcall)

   local positions = {}
   local rule = tree.sib1
   for i=1,tree.n do
      local pattern = rule.sib1
      positions[i] = cs:gethere() -- save rule position
      cs:codegen(rule.sib1, false, NOINST, fullset) -- code rule
      cs:addinstruction{Opcode.Ret}
      rule = rule.sib2
   end
   if rule.tag ~= TTag.True then error("impossible") end
   cs:jumptohere(jumptoend)
   correctcalls(cs, positions, start, cs:gethere())
end

function codecall(cs, tree)
   local rule = cs.grammar.ruletab[tree.key]
   assert(rule ~= nil)
   assert(rule.n ~= nil)
   return cs:addinstruction{
      Opcode.OpenCall, -- to be corrected later
      target = rule.n -- rule number
   }
end

--[[
** Code first child of a sequence
** (second child is called in-place to allow tail call)
** Return 'tt' for second child
]]--
function codeseq1(cs, p1, p2, tt, fl)
   assert(fl.tag == TTag.Set)
   if needfollow(p1) then
      local _, fl1 = cs:getfirst(p2, fl) -- p1 follow is p2 first
      cs:codegen(p1, false, tt, fl1)
   else
      -- use fullset as follow
      cs:codegen(p1, false, tt, fullset)
   end
   if cs:fixedlen(p1) ~= 0 then -- can 'p1' consume anything?
      return NOINST -- invalidate test
   else
      return tt -- else 'tt' still protects sib2
   end
end

--[[
** Main code-generation function: dispatch to auxiliar functions
** according to kind of tree. ('needfollow' should return true
** only for consructions that use 'fl'.)
]]--

--[[
** code generation is recursive; 'opt' indicates that the code is being
** generated as the last thing inside an optional pattern (so, if that
** code is optional too, it can reuse the 'IChoice' already in place for
** the outer pattern). 'tt' points to a previous test protecting this
** code (or NOINST). 'fl' is the follow set of the pattern.
]]--
codegen = define_tree_visitor{
   [TTag.Char] = function(tree, cs, opt, tt, fl)
      return codechar(cs, tree.n, tt)
   end,
   [TTag.Any] = function(tree, cs, opt, tt, fl)
      return cs:addinstruction{Opcode.Any}
   end,
   [TTag.Set] = function(tree, cs, opt, tt, fl)
      return codecharset(cs, tree, tt)
   end,
   [TTag.True] = function(tree, cs, opt, tt, fl)
      return -- do nothing
   end,
   [TTag.False] = function(tree, cs, opt, tt, fl)
      return cs:addinstruction{Opcode.Fail}
   end,
   [TTag.UTFR] = function(tree, cs, opt, tt, fl)
      return codeutfr(cs, tree)
   end,
   [TTag.Choice] = function(tree, cs, opt, tt, fl)
      return codechoice(cs, tree.sib1, tree.sib2, opt, fl)
   end,
   [TTag.Rep] = function(tree, cs, opt, tt, fl)
      return coderep(cs, tree.sib1, opt, fl)
   end,
   [TTag.Behind] = function(tree, cs, opt, tt, fl)
      return codebehind(cs, tree)
   end,
   [TTag.Not] = function(tree, cs, opt, tt, fl)
      return codenot(cs, tree.sib1)
   end,
   [TTag.And] = function(tree, cs, opt, tt, fl)
      return codeand(cs, tree.sib1, tt)
   end,
   [TTag.Capture] = function(tree, cs, opt, tt, fl)
      return codecapture(cs, tree, tt, fl)
   end,
   [TTag.RunTime] = function(tree, cs, opt, tt, fl)
      return coderuntime(cs, tree, tt)
   end,
   [TTag.Grammar] = function(tree, cs, opt, tt, fl)
      return cs:withGrammar(tree, codegrammar, cs, tree)
   end,
   [TTag.Call] = function(tree, cs, opt, tt, fl)
      return codecall(cs, tree)
   end,
   [TTag.Seq] = function(tree, cs, opt, tt, fl)
      tt = codeseq1(cs, tree.sib1, tree.sib2, tt, fl) -- code 'p1'
      return cs:codegen(tree.sib2, opt, tt, fl) -- tail call
   end,
   [TTag.Throw] = function(tree, cs, opt, tt, fl)
      return codethrow(cs, tree)
   end,
   ["assert"] = function(tree, cs, opt, tt, fl)
      assert(fl.tag == TTag.Set)
      assert(opt ~= 0)
   end,
}
register_fname("codegen", codegen)

--[[
** Optimize jumps and other jump-like instructions.
** * Update labels of instructions with labels to their final
** destinations (e.g., choice L1; ... L1: jmp L2: becomes
** choice L2)
** * Jumps to other instructions that do jumps become those
** instructions (e.g., jump to return becomes a return; jump
** to commit becomes a commit)
]]--
function peephole(cs)
   local code = cs.p.code
   local jmpswitch
   local switch = define_opcode_visitor{
      -- instructions with labels
      [{Opcode.Choice, Opcode.Call, Opcode.Commit, Opcode.PartialCommit,
        Opcode.BackCommit, Opcode.TestChar, Opcode.TestSet,
        Opcode.TestAny}] = function(op, i)
         cs:jumptothere(i, finallabel(code, i))
        end,
      [Opcode.Jmp] = function(op, i)
         local ft = finaltarget(code, i)
         jmpswitch(code[ft], i, ft) -- jumping to what?
      end,
      default = function() end,
   }
   jmpswitch = define_opcode_visitor{
      -- instructions with unconditional implicit jumps
      [{Opcode.Ret,Opcode.Fail,Opcode.FailTwice,Opcode.End}] = function(op, i, ft)
         code[i]:setCode(code[ft].code) -- jump becomes that instruction
      end,
      -- instructions with unconditional explicit jumps
      [{Opcode.Commit, Opcode.PartialCommit, Opcode.BackCommit}] = function(op, i, ft)
         local fft = finallabel(code, ft)
         code[i]:setCode(code[ft].code) -- jump becomes that instruction
         cs:jumptothere(i, fft) -- with an optimized target
         switch(code[i], i) -- reoptimize the label
      end,
      default = function(op, i, ft)
         cs:jumptothere(i, ft) -- optimize label
      end,
   }
   for i=1,#code do
      switch(code[i], i)
   end
end

-- thread the instructions to speed up dispatch during execution
function thread(cs)
   local code = cs.p.code
   for i=1,#code-1 do
      code[i].next = code[i+1]
      if code[i].target ~= nil then
         code[i].branch = code[code[i].target]
      end
   end
end

function compile(p)
   local compst = CompileState:new(p)
   p.code = {}
   assert(fullset.tag == TTag.Set)
   compst:codegen(p, false, NOINST, fullset)
   compst:addinstruction{Opcode.End}
   peephole(compst)
   thread(compst)
   return p.code
end

return {
   Instruction = Instruction,
   compile = compile,
   cs_clone = cs_clone,
   cs_complement = cs_complement,
   cs_diff = cs_diff,
   cs_intersection = cs_intersection,
   cs_union = cs_union,
   fixedlen = fixedlen,
   hascaptures = hascaptures,
   nofail = nofail,
   nullable = nullable,
   nullable_with_grammar = nullable_with_grammar,
   tocharset = tocharset,
}

end)

register('llpeg.utf8util', function(myrequire)
myrequire('strict')

local utf8util = {}

function utf8util.codepointAt(s, pos)
   local c1 = string.byte(s, pos)
   if c1 <= 0x7F then
      return c1, 1
   end
   local c2 = string.byte(s, pos + 1)
   if c1 <= 0xDF then
      return ((c1 % 0x20 ) * 0x40) + (c2 % 0x40), 2
   end
   local c3 = string.byte(s, pos + 2)
   if c1 <= 0xEF then
      return (((c1 % 0x10) * 0x40) + (c2 % 0x40)) * 0x40 + (c3 % 0x40), 3
   end
   local c4 = string.byte(s, pos + 3)
   if c1 <= 0xF7 then
      return ((((c1 % 0x08) * 0x40) + (c2 % 0x40)) * 0x40 + (c3 % 0x40)) * 0x40 + (c4 % 0x40), 4
   end
   error( "bad utf8" )
end

-- same as utf8.offset in Lua 5.3 standard library
function utf8util.offset(s, n, i)
   if n > 0 then error("unimplemented") end
   while n < 0 do
      i = i - 1
      if i < 1 then return nil end
      local c = string.byte(s, i)
      if c < 0x80 or c > 0xBF then
         n = n + 1
      end
   end
   return i
end

return utf8util

end)

register('llpeg.list', function(myrequire)
local List = {}
List.__index = List

function List:new()
   return setmetatable({ n = 0 }, self)
end

function List:__len()
   return self.n
end

function List:push(val)
   local n = self.n + 1
   self[n] = val
   self.n = n
end

function List:pop()
   local n = self.n
   assert(n > 0)
   local old = self[n]
   self[n] = nil
   self.n = n - 1
   return old
end

function List:insert(pos, val)
   for i=self.n,pos,-1 do
      self[i+1] = self[i]
   end
   self[pos] = val
   self.n = self.n + 1
end

return List

end)

register('llpeg.cap', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local from = myrequire('llpeg.types').from
local
   CapKind,
   _ = from(myrequire('llpeg.types'), {
               'CapKind',
   })
local
   printcaplist,
   _ = from(myrequire('llpeg.print'), {
               'printcaplist',
   })
local List = myrequire('llpeg.list')

local MAXRECLEVEL = 200

local Capture = {}
Capture.__index = Capture

-- kind is CapKind of the capture
-- bytePos is the subject position (in bytes)
-- byteLen is the length of the capture (in bytes)
-- extra is extra info (group name, arg index, etc)
function Capture:new(kind, bytePos, byteLen, extra)
   assert(getmetatable(kind) == CapKind)
   return setmetatable({
         kind = kind, bytePos = bytePos, byteLen = byteLen, extra = extra,
   }, self)
end

function Capture:__tostring()
   return string.format("Capture{kind=%s, pos=%d, len=%s, extra=%s}",
                        self.kind, self.bytePos, self.byteLen, self.extra)
end

function Capture:isopencap()
   return self.byteLen == nil
end

-- true if c2 is (any number of levels) inside self
function Capture:inside(c2)
   if self:isopencap() then
      return not c2:isclosecap()
   else
      return c2.bytePos < (self.bytePos + self.byteLen)
   end
end

function Capture:isclosecap()
   return self.kind == CapKind.close
end

--[[
** Return the size of capture 'cap'. If it is an open capture, 'close'
** must be its corresponding close.
]]--
function Capture:size(close)
   if self:isopencap() then
      assert(close:isclosecap())
      return close.bytePos - self.bytePos
   else
      return self.byteLen
   end
end

function CapKind:newCapture(bytePos, byteLen, extra)
   return Capture:new(self, bytePos, byteLen, extra)
end

local CapState = {}
CapState.__index = CapState

-- Capture cap: current capture
-- Capture ocap: (original) capture list
-- int ptop: index of last argument to 'match'
-- string s: original string
-- int valuecached: value stored in cache slot
-- int reclevel: recursion level
function CapState:new(captures, source, extraArgs)
   return setmetatable({
         captures = captures,
         index = 1,
         source = source,
         valuecached = {},
         reclevel = 0,
         extraArgs = extraArgs,
    }, self)
end

function CapState:cap() -- helper
   return self.captures[self.index]
end

function CapState:advance() -- helper
   local i = self.index
   local cap = self.captures[i]
   self.index = i + 1
   return cap, i
end

function CapState:substr(start, len) -- helper
   return string.sub(self.source, start, start + len - 1)
end

function CapState:skipclose(head)
   if head:isopencap() then
      assert(self.captures[self.index]:isclosecap())
      self.index = self.index + 1
   end
end

function CapState:closesize(head)
   return head:size(self:cap())
end

--[[
** Go to the next capture at the same level
]]--
function CapState:nextcap()
   local cap = self:cap()
   if cap:isopencap() then -- must look for a close
      local n = 0 -- number of opens waiting a close
      while true do -- look for corresponding close
         self.index = self.index + 1
         cap = self:cap()
         if cap:isopencap() then
            n = n + 1
         elseif cap:isclosecap() then
            if n == 0 then break end
            n = n - 1
         end
      end
      self.index = self.index + 1 -- skip last close (or entire single capture)
   else
      self.index = self.index + 1
      while cap:inside(self:cap()) do
         self.index = self.index + 1 -- skip captures inside the current one
      end
   end
end

--[[
** Goes back in a list of captures looking for an open capture
** corresponding to a close
]]--
function CapState:findopen(i) -- captures[i] is the close that we want to match
   assert(self.captures[i]:isclosecap())
   local n = 0 -- number of closes waiting an open
   while i > 1 do
      i = i - 1
      local cap = self.captures[i]
      if cap:isclosecap() then
         n = n + 1 -- one more open to skip
      elseif cap:isopencap() then
         if n == 0 then return cap,i end
         n = n - 1
      end
   end
   error("couldn't find open")
end

--[[
** Checks whether group 'grp' is visible to 'ref', that is, 'grp' is
** not nested inside a full capture that does not contain 'ref'.  (We
** only need to care for full captures because the search at 'findback'
** skips open-end blocks; so, if 'grp' is nested in a non-full capture,
** 'ref' is also inside it.)  To check this, we search backward for the
** inner full capture enclosing 'grp'.  A full capture cannot contain
** non-full captures, so a close capture means we cannot be inside a
** full capture anymore.
]]--
function CapState:capvisible(igrp, ref)
   local i = igrp
   local grp = self.captures[igrp]
   while i > 1 do
      i = i - 1
      local cap = self.captures[i]
      if cap:isclosecap() then
         return true -- can stop the search
      elseif cap:inside(grp) then -- is 'grp' inside cap?
         return cap:inside(ref) -- ok iff cap also contains ref
      end
   end
   return true -- 'grp' is not inside any capture
end

--[[
** Try to find a named group capture with the name given;
** goes backward from 'i'.
]]--
function CapState:findback(name, i)
   if i == nil then i = self.index end
   local ref = self.captures[i]
   while i > 1 do
      i = i - 1
      local cap = self.captures[i]
      if cap:isclosecap() or not cap:inside(ref) then
         if cap:isclosecap() then
            cap,i = self:findopen(i)
         end
         if cap.kind == CapKind.group and self:capvisible(i, ref) then
            if cap.extra == name then
               return cap,i
            end
         end
      end
   end
   error("back reference '"..name.."' not found")
end

function CapState:getcaptures()
   local result = List:new()
   while not self:cap():isclosecap() do
      self:pushcapture(result)
   end
   return result
end

function CapState:pushcapture(result)
   self.reclevel = self.reclevel + 1
   if self.reclevel > MAXRECLEVEL then
      error("subcapture nesting too deep")
   end
   local cap = self.captures[self.index]
   assert(cap.kind.push ~= nil)
   local res = cap.kind.push(self, cap, result)
   self.reclevel = self.reclevel - 1
   return res
end

-- helper functions for pushcapture

--[[
** Push on the Lua stack all values generated by nested captures inside
** the current capture. Returns number of values pushed. 'addextra'
** makes it push the entire match after all captured values. The
** entire match is pushed also if there are no other nested values,
** so the function never returns zero.
]]--
function CapState:pushnestedvalues(result, addextra)
   local head = self:advance()
   local n = 0 -- number of pushed subvalues
   -- repeat for all nested patterns
   while head:inside(self:cap()) do
      n = n + self:pushcapture(result)
   end
   if addextra or n == 0 then -- need extra?
      result:push(self:substr(head.bytePos, self:closesize(head)))
      n = n + 1
   end
   self:skipclose(head)
   return n
end

--[[
** Push only the first value generated by nested captures
]]--
function CapState:pushonenestedvalue(result)
   local n = self:pushnestedvalues(result, false)
   if n == 0 then
      result:push(nil) -- ensure there's exactly one value
      return 1
   end
   while n > 1 do
      result:pop() -- pop extra values
      n = n - 1
   end
   return n
end


-- visitor patterns for pushcapture
function CapKind.position.push(capstate, cap, result)
   result:push(cap.bytePos)
   capstate.index = capstate.index + 1
   return 1
end

function CapKind.const.push(capstate, cap, result)
   result:push(cap.extra)
   capstate.index = capstate.index + 1
   return 1
end

function CapKind.arg.push(capstate, cap, result)
   local n = cap.extra
   if n > capstate.extraArgs.n then
      error(string.format("reference to absent extra argument #%d", n))
   end
   result:push(capstate.extraArgs[n])
   capstate.index = capstate.index + 1
   return 1
end

function CapKind.simple.push(capstate, cap, result)
   local k = capstate:pushnestedvalues(result, true)
   -- reorder so that the whole match is the first result, not the last
   local last = result:pop()
   result:insert(2 + #result - k, last)
   return k
end

-- missing a bunch

--[[
** Table capture: creates a new table and populates it with nested
** captures.
]]--
function CapKind.table.push(capstate, cap, result) -- aka tablecap
   local t = {}
   result:push(t)
   local head = capstate:advance()

   local n = 0
   while head:inside(capstate:cap()) do
      cap = capstate:cap()
      if cap.kind == CapKind.group and cap.extra ~= nil then -- named group?
         capstate:pushonenestedvalue(result)
         t[cap.extra] = result:pop() -- move it into table
      else -- not a named group
         local k = capstate:pushcapture(result)
         for i=k,1,-1 do
            t[n + i] = result:pop() -- move it into table (indexed)
         end
         n = n + k
      end
   end
   capstate:skipclose(head)
   return 1 -- number of values pushed (only the table)
end

--[[
** Table-query capture
]]--
function CapKind.query.push(capstate, cap, result) -- aka querycap
   capstate:pushonenestedvalue(result)
   local key = result:pop()
   local tbl = cap.extra
   assert(type(tbl) == "table")
   local val = tbl[key]
   if val ~= nil then
      result:push(val)
      return 1
   else
      return 0
   end
end

--[[
** Fold capture
]]--
function CapKind.fold.push(capstate, cap, result) -- aka foldcap
   local f = cap.extra
   assert(type(f) == "function")
   local head = capstate:advance()
   if capstate:cap():isclosecap() then
      -- no nested captures? (large subject)
      error("no initial value for fold capture")
   end
   local args = List:new()
   local n = capstate:pushcapture(args)
   if n == 0 then
      -- nested captures with no values?
      error("no initial value for fold capture")
   end
   local accum = args[1] -- leave only one result for accumulator
   while head:inside(capstate:cap()) do
      args = List:new()
      args:push( accum ) -- put accumulator first
      n = capstate:pushcapture(args) -- get next capture's values
      accum = f(compat.unpack(args))
   end
   capstate:skipclose(head)
   -- only accumulator left in result
   result:push(accum)
   return 1
end

--[[
** Function capture
]]--
CapKind["function"].push = function(capstate, cap, result)
   local f = cap.extra
   assert(type(f) == "function")
   local args = List:new()
   local n = capstate:pushnestedvalues(args, false)
   local r = compat.pack(f(compat.unpack(args)))
   for i=1,r.n do
      result:push(r[i])
   end
   return r.n
end

--[[
** Accumulator capture
]]--
function CapKind.acc.push(capstate, cap, result) -- aka accumulatorcap
   if #result == 0 then
      error("no previous value for accumulator capture")
   end
   local f = cap.extra
   assert(type(f) == "function")
   local prev = #result
   local args = List:new()
   args:push(result[prev])
   local n = capstate:pushnestedvalues(args, false)
   result[prev] = f(compat.unpack(args))
   return 0 -- did not add any extra value
end

--[[
** Select capture
]]--
function CapKind.num.push(capstate, cap, result) -- aka numcap
   local idx = cap.extra -- value to select
   if idx == 0 then -- no values?
      capstate:nextcap() -- skip entire capture
      return 0 -- no value produced
   else
      local vals = List:new()
      local n = capstate:pushnestedvalues(vals, false)
      if n < idx then -- invalid index?
         error("no capture '"..idx.."'")
      else
         result:push(vals[idx])
         return 1
      end
   end
end

function CapState:runtimecap(closePos)
   local close = self.captures[closePos]
   local open,openPos = self:findopen(closePos) -- get open group capture
   assert(open.kind == CapKind.group)
   self.index = openPos -- set state to the open capture
   local args = List:new()
   args:push( self.source) -- original subject
   args:push( close.bytePos ) -- current position
   local n = self:pushnestedvalues(args, false) -- push nested captures
   local func = open.extra
   local funcRet = compat.pack(func(compat.unpack(args)))
   local res = closePos - openPos -- number of captures to be removed
   return res, funcRet
end

function CapKind.runtime.push(capstate, cap, result) -- aka runtimecap
   result:push(cap.extra)
   capstate.index = capstate.index + 1
   return 1
end

local MAXSTRCAPS = 10

--[[
** Collect values from current capture into array 'cps'. Current
** capture must be Cstring (first call) or Csimple (recursive calls).
** (In first call, fills %0 with whole match for Cstring.)
** Returns number of elements in the array that were filled.
]]--
function CapState:getstrcaps(cps, n)
   local k = n
   n = n + 1
   cps[k] = {
      isstring = true, -- get string value
      bytePos = self:cap().bytePos, -- starts here
   }
   local head = self:advance()
   while head:inside(self:cap()) do
      if n > MAXSTRCAPS then -- too many captures?
         self:nextcap() -- skip extra captures (will not need them)
      elseif self:cap().kind == CapKind.Simple then -- string?
         n = self:getstrcaps(cps, n) -- put info into array
      else
         cps[n] = {
            isstring = false, -- not a string
            cap = self.index, -- keep original capture
         }
         self:nextcap()
         n = n + 1
      end
   end

   cps[k].endPos = head.bytePos + self:closesize(head)
   self:skipclose(head)

   return n
end

function CapState:addonestring(buffer, what)
   local cap = self:cap()
   if cap.kind == CapKind.string then
      -- add capture directly to buffer
      return stringcap(self, buffer)
   elseif cap.kind == CapKind.subst then
      -- add capture directly to buffer
      return substcap(self, buffer)
   elseif cap.kind == CapKind.acc then
      error("invalid context for an accumulator capture")
   end
   local result = List:new()
   local n = self:pushcapture(result)
   if n == 0 then return 0 end -- no values to add
   local val = result[1] -- take only one result (the first)
   if type(val) == "number" then
      val = tostring(val)
   elseif type(val) ~= "string" then
      error("invalid "..what.." value (a "..type(val)..")")
   end
   table.insert(buffer, val)
   return 1
end

--[[
** String capture: add result to buffer 'b' (instead of pushing
** it into the stack)
]]--
function stringcap(capstate, buffer)
   local fmt = capstate:cap().extra
   local cps = {}
   local n = capstate:getstrcaps(cps, 1) - 1 -- collect nested captures
   local sawEscape = false
   for _,c in compat.utf8codes(fmt) do
      if sawEscape then
         sawEscape = false
         if c < 48 or c > 57 then -- not followed by a digit
            table.insert(buffer, compat.utf8char(c))
         else
            local l = 1 + c - 48 -- capture index (1-based)
            if l > n then
               error("invalid capture index ("..(l-1)..")")
            elseif cps[l].isstring then
               table.insert(buffer, capstate:substr(cps[l].bytePos, cps[l].endPos - cps[l].bytePos))
            else
               -- go back to evaluate that nested capture
               local curr = capstate.index
               capstate.index = cps[l].cap
               if capstate:addonestring(buffer, "capture") == 0 then
                  error("no values in capture index "..l)
               end
               capstate.index = curr
            end
         end
      elseif c ~= 37 then -- not a % escape?
         table.insert(buffer, compat.utf8char(c))
      else
         sawEscape = true
      end
   end
   return 1
end

--[[
** Substitution capture: add result to buffer 'b'
]]--
function substcap(capstate, buffer)
   local head = capstate:advance()

   local curr = head.bytePos
   while head:inside(capstate:cap()) do
      local cap = capstate:cap()
      local nextPos = cap.bytePos
      local s = capstate:substr(curr, nextPos - curr)
      table.insert(buffer, s) -- add text up to capture
      if capstate:addonestring(buffer, "replacement") == 0 then
         -- no capture value, keep original text in final result
         curr = nextPos
      else
         -- continue after match
         local lastCap = capstate.captures[capstate.index - 1]
         curr = nextPos + cap:size(lastCap)
      end
   end
   -- add last piece of text
   local s = capstate:substr(curr, head.bytePos + capstate:closesize(head) - curr)
   table.insert(buffer, s)
   capstate:skipclose(head)
end

function CapKind.subst.push(capstate, cap, result) -- aka substcap
   local buffer = {}
   substcap(capstate, buffer)
   result:push(table.concat(buffer))
   return 1
end


function CapKind.string.push(capstate, cap, result) -- aka stringcap
   local buffer = {}
   stringcap(capstate, buffer)
   result:push(table.concat(buffer))
   return 1
end

function CapKind.group.push(capstate, cap, result)
   if cap.extra == nil then -- anonymous group?
      return capstate:pushnestedvalues(result, false) -- add all nested values
   else -- named group: add no values
      capstate:nextcap()
      return 0
   end
end

--[[
** Back-reference capture. Return number of values pushed.
]]--
function CapKind.backref.push(capstate, cap, result)
   local curr = capstate.index
   local _,i = capstate:findback(cap.extra)
   capstate.index = i
   local n = capstate:pushnestedvalues(result, false)
   capstate.index = curr + 1
   return n
end

return {
   CapState = CapState,
   Capture = Capture,
}

end)

register('llpeg.vm', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local utf8util = myrequire('llpeg.utf8util')
local from = myrequire('llpeg.types').from
local
   CHARMAX,
   CapKind,
   Opcode,
   enum,
   _ = from(myrequire('llpeg.types'), {
               'CHARMAX',
               'CapKind',
               'Opcode',
               'enum',
   })
local
   CapState,
   Capture,
   __ = from(myrequire('llpeg.cap'), {
                'CapState',
                'Capture',
   })
local
   Instruction,
   ___ = from(myrequire('llpeg.code'), {
                'Instruction',
   })
local
   printcaplist,
   ___ = from(myrequire('llpeg.print'), {
                'printcaplist',
   })

local LFAIL = {}
local InsidePred = enum{
   OUTPRED = 0,
   INPRED = 1,
}

local Stack = {}
Stack.__index = Stack
-- Stack prev: previous entry in the stack
-- int bytePos: saved position, or NULL for calls
-- Instruction pc: saved instruction
-- int caplevel
-- int labenv -- for labeled failure
-- bool predchoice -- for labeled failure
function Stack:new(prev, bytePos, pc, caplevel, labenv, predchoice)
   return setmetatable({
         prev = prev,
         bytePos = bytePos, pc = pc, caplevel = caplevel,
         labenv = labenv, predchoice = predchoice,
   }, self)
end

function Stack:__tostring()
   return string.format(
      "Stack{ bytePos=%d, caplevel=%d, labenv=%s, predchoice=%s }",
      self.bytePos, self.caplevel, self.labenv, self.predchoice
   )
end

function Stack:print()
   local s = self
      while s ~= nil do
         print("Stack", s)
         s = s.prev
      end
end

local MatchResult = {}
MatchResult.__index = MatchResult

function MatchResult:new()
   return setmetatable({
         labelf = nil, -- failure label
         sfail = -1, -- farthest failure
   }, self)
end

local State = {}
State.__index = State

function State:new(source, bytePos, ...)
   local giveup = Instruction:new{Opcode.Giveup}
   local insidepred = InsidePred.OUTPRED -- label environment is off inside predicates
   local stack = Stack:new(nil, bytePos, giveup, 0, insidepred, nil)
   local cp,cpLen
   if bytePos <= #source then
      cp, cpLen = utf8util.codepointAt(source, bytePos)
   else
      cp, cpLen = nil, nil
   end
   assert(bytePos ~= nil)
   return setmetatable({
         source = source, -- the source string
         bytePos = bytePos, -- current position in the string, in bytes
         codepoint = cp, -- the codepoint at 'bytePos' in 'source'
         codepointLen = cpLen, -- the length of the codepoint at 'bytePos'
         stack = stack, -- top of stack
         captures = {}, -- list of captures
         captop = 1, -- point to first empty slot in captures (1-based)
         extraArgs = compat.pack(...),
         -- labeled failures:
         insidepred = insidepred,
         labelf = nil, -- failure label
         sfail = -1, -- farthest failure
   }, self)
end

function State:advance()
   return self:resetPosTo(self.bytePos + self.codepointLen)
end

function State:resetPosTo(newPos)
   assert(newPos ~= nil)
   self.bytePos = newPos
   local source = self.source
   if newPos <= #source then
      local cp, cpLen = utf8util.codepointAt(source, newPos)
      self.codepoint = cp
      self.codepointLen = cpLen
      return cp
   else
      self.codepoint = nil
      self.codepointLen = nil
      return nil
   end
end

function State:backtrack(n)
   local off = utf8util.offset(self.source, -n, self.bytePos)
   if off == nil then return false end -- can't backtrack that far!
   self:resetPosTo(off)
   return true
end

function State:updatefarthest(label)
   self.labelf = label
   if self.bytePos > self.sfail then
      self.sfail = self.bytePos
   end
end

function State:pushcapture(cap)
   self.captures[self.captop] = cap
   self.captop = self.captop + 1
end

function State:fail()
   -- pattern failed, try to backtrack
   local lastStack
   repeat
      lastStack = self.stack
      self.stack = lastStack.prev
   until lastStack.bytePos ~= nil
   self:resetPosTo(lastStack.bytePos)
   self.captop = lastStack.caplevel
   self.insidepred = lastStack.labenv -- labeled failure
   return lastStack.pc:exec(self)
end

function State:giveup()
   local r = nil
   local lab = "fail"
   local errpos = self.sfail
   if self.labelf ~= nil and self.labelf ~= LFAIL then
      lab = self.labelf
   end
   return r, lab, errpos
end

function State:getcaptures()
   local results = {}
   if self.captures[1].kind == CapKind.close then -- are there any captures?
      return results -- no captures
   end
   return CapState:new(self.captures, self.source, self.extraArgs):getcaptures()
end

function Opcode.End:exec(state)
   state:pushcapture(CapKind.close:newCapture(state.bytePos, 0))
   -- trim table to proper length
   while #state.captures > state.captop - 1 do
      table.remove(state.captures)
   end
   -- printcaplist(state.captures, #state.captures) -- for debugging
   local results = state:getcaptures()
   if #results == 0 then -- no captured values?
      return state.bytePos -- return only end position
   else
      return compat.unpack(results)
   end
end

function Opcode.Giveup:exec(state)
   return state:giveup()
end

function Opcode.Ret:exec(state)
   local lastStack = state.stack
   state.stack = lastStack.prev
   return lastStack.pc:exec(state)
end

function Opcode.Any:exec(state)
   if state.codepoint ~= nil then
      state:advance()
      return self.next:exec(state)
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.TestAny:exec(state)
   if state.codepoint ~= nil then
      return self.next:exec(state)
   else
      return self.branch:exec(state)
   end
end

function Opcode.UTFR:exec(state)
   local cp = state.codepoint
   if cp ~= nil and self.from <= cp and cp <= self.to then
      state:advance()
      return self.next:exec(state)
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.Char:exec(state)
   if state.codepoint == self.aux then
      state:advance()
      return self.next:exec(state)
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.TestChar:exec(state)
   if state.codepoint == self.aux then
      return self.next:exec(state)
   else
      return self.branch:exec(state)
   end
end

function Opcode.Set:exec(state)
   local cp = state.codepoint
   if cp ~= nil then
      if cp <= CHARMAX then
         if self.set[cp] then
            state:advance()
            return self.next:exec(state)
         end
      else
         if self.rest then
            state:advance()
            return self.next:exec(state)
         end
      end
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.TestSet:exec(state)
   local cp = state.codepoint
   if cp ~= nil then
      if cp <= CHARMAX then
         if self.set[cp] then
            return self.next:exec(state)
         end
      elseif self.rest then
         return self.next:exec(state)
      end
   end
   return self.branch:exec(state)
end

function Opcode.Behind:exec(state)
   local n = self.aux
   -- XXX SLOW self.aux is in *characters* not *bytes*
   if state:backtrack(n) then
      return self.next:exec(state)
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.Span:exec(state)
   local cp = state.codepoint
   while true do
      local match = false
      if cp ~= nil then
         if cp <= CHARMAX then
            if self.set[cp] then match = true end
         else
            if self.rest then match = true end
         end
      end
      if not match then break end
      cp = state:advance()
   end
   return self.next:exec(state)
end

function Opcode.Jmp:exec(state)
   return self.branch:exec(state)
end

function Opcode.Choice:exec(state)
   state.stack = Stack:new(
      state.stack, state.bytePos, self.branch, state.captop, state.insidepred
   )
   return self.next:exec(state)
end

function Opcode.PredChoice:exec(state)
   state.stack = Stack:new(
      state.stack, state.bytePos, self.branch, state.captop, state.insidepred,
      true -- predchoice
   )
   state.insidepred = InsidePred.INPRED
   return self.next:exec(state)
end

function Opcode.Call:exec(state)
   state.stack = Stack:new(
      state.stack, nil, self.next
   )
   return self.branch:exec(state)
end

function Opcode.Commit:exec(state)
   state.stack = state.stack.prev
   return self.branch:exec(state)
end

function Opcode.PartialCommit:exec(state)
   local stack = state.stack
   stack.bytePos = state.bytePos
   stack.caplevel = state.captop
   return self.branch:exec(state)
end

function Opcode.BackCommit:exec(state)
   local stack = state.stack
   state.stack = stack.prev -- pop the stack
   state:resetPosTo(stack.bytePos) -- but reset the position to that stored
   state.insidepred = stack.labenv -- labeled failure
   state.captop = stack.caplevel
   return self.branch:exec(state)
end

function Opcode.Throw:exec(state)
   if state.insidepred == InsidePred.OUTPRED then
      state.labelf = self.key
      -- pop entire stack
      while state.stack.prev ~= nil do
         state.stack = state.stack.prev
      end
   else
      state.labelf = LFAIL
      -- pop until you read a 'predchoice' state
      while not state.stack.predchoice do
         state.stack = state.stack.prev
      end
   end
   state.sfail = state.bytePos
   return state:fail()
end

function Opcode.ThrowRec:exec(state) -- with recovery
   state.sfail = state.bytePos
   if state.insidepred == InsidePred.OUTPRED then
      state.labelf = self.key
      state.stack = Stack:new(
         state.stack, nil, self.next, state.captop
      )
      return self.branch:exec(state)
   else
      state.labelf = LFAIL
      -- pop until you read a 'predchoice' state
      while not state.stack.predchoice do
         state.stack = state.stack.prev
      end
      return state:fail()
   end
end

function Opcode.FailTwice:exec(state)
   state.stack = state.stack.prev
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.Fail:exec(state)
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.CloseRunTime:exec(state)
   -- close the group
   state:pushcapture(self.cap:newCapture(state.bytePos, 0))
   -- trim table to proper length
   while #state.captures > state.captop - 1 do
      table.remove(state.captures)
   end
   local cs = CapState:new(state.captures, state.source, state.extraArgs)
   local n, funcRet = cs:runtimecap(state.captop - 1)
   state.captop = state.captop - n -- remove nested captures
   -- resdyncaptures: resolve returned values in `funcRet`
   -- first argument false=fail, true=keep current pos, number=next position
   local firstArg = funcRet[1]
   if funcRet.n == 0 then
      firstArg = false -- returning void means we'll fail
   end
   if not firstArg then -- if it is falsey, discard rest of returned vals & fail
      state:updatefarthest(LFAIL)
      return state:fail() -- tail call
   elseif type(firstArg) == "boolean" then
      -- keep current position, nothing needs to be done
   else
      local npos = tonumber(firstArg)
      if npos < state.bytePos or npos > (1 + #(state.source)) then
         error("invalid position returned by match-time capture")
      end
      state:resetPosTo(npos)
   end
   -- push the rest of the funcRet values as new captures
   local n = funcRet.n - 1 -- number of new captures
   if n == 0 then -- no new captures?
      state.captop = state.captop - 1 -- remove open group
   else
      -- new captures, keep original open group
      -- add new captures + close group to 'capture' list
      -- adddyncaptures:
      assert(state.captures[state.captop - 1].kind == CapKind.group)
      assert(state.captures[state.captop - 1]:isopencap())
      -- make group capture an anonymous group (this used to hold match-time f)
      state.captures[state.captop - 1].extra = nil
      for i=2,funcRet.n do -- add runtime captures
         state:pushcapture(CapKind.runtime:newCapture(state.bytePos, 0, funcRet[i]))
      end
      -- close group
      state:pushcapture(CapKind.close:newCapture(state.bytePos, 0))
   end
   return self.next:exec(state)
end

local MAXLOP = 20
function findopen(captures, i, currPos)
   i = i - 1 -- check last captop
   local cap = captures[i]
   if (not cap:isopencap()) and cap.bytePos == currPos then
      return nil -- current one cannot be a full capture
   end
   -- else, look for an 'open' capture
   for j=1,MAXLOP do
      if cap:isopencap() then -- open capture?
         return cap,i -- that's the one to be closed
      elseif cap.kind == CapKind.close then
         return nil -- a full capture should not nest a non-full one
      end
      i = i - 1
      if i<1 then break end
      cap = captures[i]
   end
   return nil -- not found within allowed search limit
end

function Opcode.CloseCapture:exec(state)
   -- if possible, turn capture into a full capture
   assert(state.captop > 1)
   local open,_ = findopen(state.captures, state.captop, state.bytePos)
   if open ~= nil then -- if possible, turn capture into a full capture
      open.byteLen = state.bytePos - open.bytePos
   else
      -- non-nil length to mark entry as closed
      state:pushcapture(self.cap:newCapture(state.bytePos, 0))
   end
   return self.next:exec(state)
end

function Opcode.OpenCapture:exec(state)
   state:pushcapture(self.cap:newCapture(
      -- byteLen = nil marks entry as open
      state.bytePos, nil, self.key
   ))
   return self.next:exec(state)
end

function Opcode.FullCapture:exec(state)
   -- XXX SLOW: self.aux is in *characters* not *bytes*
   local nPos = utf8util.offset(state.source, -self.aux, state.bytePos)
   state:pushcapture(self.cap:newCapture(
      nPos, state.bytePos - nPos, self.key
   ))
   return self.next:exec(state)
end

function match(s, init, code, ...)
   local state = State:new(s, init, ...)
   return code[1]:exec(state)
end

return {
   match = match,
}

end)

register('llpeg', function(myrequire)
local VERSION = '0.0.1'
local MAXSTACK = 400 -- maximum backtracking
local MAXBEHIND = 255 -- maximum look-behind
local MAXRULES = 1000 -- maximum number of rules
local LPEG_COMPAT = true

myrequire('strict')
local compat = myrequire('advent.compat')

local from = myrequire('llpeg.types').from
local
   CHARMAX,
   CapKind,
   TTag,
   define,
   define_tree_visitor,
   metareg,
   newcharset,
   numsiblings,
   _ = from(myrequire('llpeg.types'), {
               'CHARMAX',
               'CapKind',
               'TTag',
               'define',
               'define_tree_visitor',
               'metareg',
               'newcharset',
               'numsiblings',
   })
local
   compile,
   cs_diff,
   cs_union,
   fixedlen,
   hascaptures,
   nofail,
   nullable,
   nullable_with_grammar,
   tocharset,
   __ = from(myrequire('llpeg.code'), {
                'compile',
                'cs_diff',
                'cs_union',
                'fixedlen',
                'hascaptures',
                'nofail',
                'nullable',
                'nullable_with_grammar',
                'tocharset',
   })
local
   match,
   ___ = from(myrequire('llpeg.vm'), {
                'match',
   })
local
   printpatt,
   printrepl,
   printtree,
   ____ = from(myrequire('llpeg.print'), {
                 'printpatt',
                 'printrepl',
                 'printtree',
   })

function checkint(v)
   if type(v) == 'string' then
      v = tonumber(v)
   end
   if type(v) ~= "number" then
      error("not a number")
   end
   return math.floor(v)
end

local is_pattern = define_type_visitor{
   pattern = function() return true end,
   default = function() return false end,
}

local ptype = define_type_visitor{
   pattern = function() return "pattern" end,
   default = function(v) return type(v) end,
}

function val2str(v)
   if type(v) == 'number' then return tostring(v) end
   if type(v) == 'string' then return v end
   return string.format("(a %s)", ptype(v))
end

--[[ lpltree.c ]]--

function newtree(tag)
   local t = {
      tag = tag,
      code = nil,
   }
   setmetatable(t, metareg)
   return t
end

function newleaf(tag, n)
   return setmetatable({
         tag = tag,
         code = nil,
         n = n,
   }, metareg)
end

function newroot1sib(tag, sib1)
   return setmetatable({
         tag = tag,
         code = nil,
         sib1 = sib1,
   }, metareg)
end

function newroot2sib(tag, sib1, sib2)
   return setmetatable({
         tag = tag,
         code = nil,
         sib1 = sib1,
         sib2 = sib2,
   }, metareg)
end

--[[ Build a sequence of #s nodes from the array 's' with the tag 'tag' ]]--
function fillseq(tag, s)
   if type(s) == 'number' then
      local len = checkint(s)
      s = setmetatable({}, {__len = function() return len end})
   end
   if #s == 0 then
      return newleaf(tag, 0)
   end
   local i = #s
   local result = newleaf(tag, s[i])
   while i > 1 do
      i = i - 1
      result = newroot2sib(
         TTag.Seq,
         newleaf(tag, s[i]),
         result
      )
   end
   return result
end

--[[ Numbers as patterns:
 0 == true (always match); n == TAny repeated 'n' times;
 -n == not (TAny repeated 'n' times)
]]--
function numtree(n)
   n = checkint(n)
   if n == 0 then
      return newleaf(TTag.True)
   elseif n > 0 then
      return fillseq(TTag.Any, n) -- sequence of 'n' anys
   else
      return newroot1sib(TTag.Not, fillseq(TTag.Any, -n))
   end
end

-- Convert value v to a pattern
local getpatt = define_type_visitor{
   ["string"] = function(s)
      if #s == 0 then
         return newleaf(TTag.True) -- always match if string is empty
      end
      local cp = {}
      for _,c in compat.utf8codes(s) do
         table.insert(cp, c)
      end
      return fillseq(TTag.Char, cp)
   end,
   ["number"] = function(n)
      return numtree(n)
   end,
   ["boolean"] = function(b)
      if b then
         return newleaf(TTag.True)
      else
         return newleaf(TTag.False)
      end
   end,
   ["function"] = function(f)
      return setmetatable({
            tag = TTag.RunTime,
            code = nil,
            key = f,
            sib1 = newleaf(TTag.True),
      }, metareg)
   end,
   ["pattern"] = function(v)
      return v
   end,
   ["table"] = function(v)
      return newgrammar(v)
   end,
   default = function(v)
      error("Not a pattern")
   end,
}

-- labeled failure begin
function newthrowleaf(label)
   return setmetatable({
         tag = TTag.Throw,
         code = nil,
         sib2 = nil, -- no recovery rule associated (yet)
         key = label,
   }, metareg)
end
-- labeled failure end

function lp_P(v)
   return getpatt(v)
end

--[[
** sequence operator; optimizations:
** false x => false, x true => x, true x => x
** (cannot do x . false => false because x may have runtime captures)
]]--

function lp_seq(tree1, tree2)
   tree1 = getpatt(tree1)
   tree2 = getpatt(tree2)
   if tree1.tag == TTag.False or tree2.tag == TTag.True then
      -- false . x = false, x . true = x
      return tree1
   elseif tree1.tag == TTag.True then
      -- true . x = x
      return tree2
   else
      return newroot2sib(TTag.Seq, tree1, tree2)
   end
end


--[[
** choice operator; optimizations:
** charset / charset => charset
** true / x => true, x / false => x, false / x => x
** (x / true is not equivalent to true)
]]--
function lp_choice(t1, t2)
   t1 = getpatt(t1)
   t2 = getpatt(t2)
   local t1c = tocharset(t1)
   local t2c = tocharset(t2)
   if t1c ~= nil and t2c ~= nil then
      local t = cs_union(t1c, t2c)
      return t
   elseif nofail(t1) or t2.tag == TTag.False then
      -- true / x => true, x / false => x
      return t1
   elseif t1.tag == TTag.False then
      -- false / x => x
      return t2
   else
      return newroot2sib(TTag.Choice, t1, t2)
   end
end

--[[
   p^n
]]--
function lp_star(p, n)
   local tree1 = getpatt(p)
   n = checkint(n)
   if n >= 0 then -- seq tree1 (seq tree1 ... (seq tree1 (rep tree1)))
      if nullable(tree1) then
         error("loop body may accept empty string")
      end
      local tree = newroot1sib(TTag.Rep, tree1)
      while n > 0 do
         tree = newroot2sib(TTag.Seq, tree1, tree)
         n = n - 1
      end
      return tree
   else -- choice (seq tree1 ... choice tree1 true ...) true
      n = -n
      local tree = newroot2sib( -- at most 1
            TTag.Choice,
            tree1,
            newleaf(TTag.True)
      )
      while n > 1 do
         tree = newroot2sib( -- at most (n-1)
            TTag.Seq,
            tree1,
            tree
         )
         tree = newroot2sib(TTag.Choice, tree, newleaf(TTag.True))
         n = n - 1
      end
      return tree
   end
end

--[[
** #p == &p
]]--
function lp_and(v)
   return newroot1sib(TTag.And, getpatt(v))
end

--[[
** -p == !p
]]--
function lp_not(v)
   return newroot1sib(TTag.Not, getpatt(v))
end

--[[
** [t1 - t2] == Seq (Not t2) t1
** If t1 and t2 are charsets, make their difference.
]]--
function lp_sub(t1, t2)
   t1 = getpatt(t1)
   t2 = getpatt(t2)
   local t1c = tocharset(t1)
   local t2c = tocharset(t2)
   if t1c ~= nil and t2c ~= nil then
      return cs_diff(t1c, t2c)
   else
      return newroot2sib(
         TTag.Seq,
         newroot1sib(TTag.Not, t2),
         t1
      )
   end
end

--[[
   A set with the given characters
]]--
function lp_set(s)
   local t = newcharset()
   local extra = nil
   for _,c in compat.utf8codes(s) do
      if c > CHARMAX then
         -- non ascii, we can't use charset for these
         local one = newleaf(TTag.Char, c)
         if extra == nil then
            extra = one
         else
            extra = newroot2sib(TTag.Choice, extra, one)
         end
      else
         t.set[c] = true
      end
   end
   if extra == nil then
      return t
   else
      return newroot2sib(TTag.Choice, t, extra)
   end
end

function lp_range(...)
   local t = newcharset()
   local extra = nil
   for _,v in ipairs{...} do
      if type(v) ~= "string" then
         error("argument must be string")
      else
         local first, second
         for _,c in compat.utf8codes(v) do
            if first == nil then
               first = c
            elseif second == nil then
               second = c
            else
               error("range must have two characters")
            end
         end
         if first == nil or second == nil then
            error("range must have two characters")
         end
         if first > second then
            if LPEG_COMPAT then
               -- ignore, just silently create an empty range
            else
               error("empty range")
            end
         elseif second <= CHARMAX then -- ascii range
            for i = first, second do
               t.set[i] = true
            end
         else
            local r = lp_utfr(first, second)
            if extra == nil then
               extra = r
            else
               extra = newroot2sib(TTag.Choice, extra, one)
            end
         end
      end
   end
   if extra == nil then
      return t
   else
      return newroot2sib(TTag.Choice, t, extra)
   end
end

function lp_utfr(from, to)
   from = checkint(from)
   to = checkint(to)
   if from > to then
      error("empty range")
   end
   if to > 0x10FFFF then
      error("invalid code point")
   end
   if to <= CHARMAX then -- ascii range?
      local t = newcharset() -- code it as a regular charset
      for i = from, to do
         t.set[i] = true
      end
      return t
   end
   -- multibyte utf-8 range
   return setmetatable({
         tag = TTag.UTFR,
         code = nil,
         from = from,
         to = to,
   }, metareg)
end

--[[
   Look-behind predicate
]]--
function lp_behind(v)
   local tree1 = getpatt(v)
   local n = fixedlen(tree1)
   if n < 0 then
      error("pattern may not have fixed length")
   end
   if hascaptures(tree1) then
      error("pattern has captures")
   end
   if n > MAXBEHIND then
      error("pattern too long to look behind")
   end
   return setmetatable({
         tag = TTag.Behind,
         code = nil,
         sib1 = tree1,
         n = n,
   }, metareg)
end

--[[ labeled failure begin ]]--
--[[
** Throws a label
]]--
local lp_throw = define_type_visitor{
   [{"string","number"}] = newthrowleaf,
   default = function() error("not a string") end,
}
--[[ labeled failure end ]]--


--[[
** Create a non-terminal
]]--
function lp_V(v)
   if v == nil then
      error("non-nil value expected")
   end
   return setmetatable({
         tag = TTag.Call,
         code = nil,
         key = v,
   }, metareg)
end

--[[
** Create a tree for a non-empty capture, with a body and
** optionally with an associated Lua value (at index 'labelidx' in the
** stack)
]]--
function capture_aux(capkind, patt, val)
   local t = newroot1sib(TTag.Capture, getpatt(patt))
   t.cap = capkind
   t.key = val
   return t
end

function newemptycap(capkind, val)
   return capture_aux(capkind, newleaf(TTag.True), val)
end

--[[
** Captures with syntax p / v
** (function capture, query capture, string capture, or number capture)
]]--
local divcapture_helper = define_type_visitor{
   ["function"] = function(v, p)
      return capture_aux(CapKind["function"], p, v)
   end,
   ["table"] = function(v, p)
      return capture_aux(CapKind.query, p, v)
   end,
   ["string"] = function(v, p)
      return capture_aux(CapKind.string, p, v)
   end,
   ["number"] = function(v, p)
      v = checkint(v)
      if v < 0 or v > 65536 then
         error("invalid number")
      end
      return capture_aux(CapKind.num, p, v)
   end,
   default = function(v)
      error("unexpected "..ptype(v).." as 2nd operand to LPeg '/'") end,
}
function lp_divcapture(p, v)
   return divcapture_helper(v, p) -- dispatch on v
end

function lp_acccapture(p, v)
   return capture_aux(CapKind.acc, p, v)
end

-- the match for patt with the values from nested captures replacing their
-- matches
function lp_substcapture(patt)
   return capture_aux(CapKind.subst, patt)
end

-- a table with all captures from patt
function lp_tablecapture(patt)
   return capture_aux(CapKind.table, patt)
end

-- the values produced by patt, optionally tagged with key
function lp_groupcapture(patt, key)
   -- key can be nil
   return capture_aux(CapKind.group, patt, key)
end

-- folding capture (deprecated)
function lp_foldcapture(patt, func)
   if type(func) ~= "function" then
      error("Bad function argument")
   end
   return capture_aux(CapKind.fold, patt, func)
end

-- the match for patt plus all captures made by patt
function lp_simplecapture(patt)
   return capture_aux(CapKind.simple, patt)
end

-- the current position (matches the empty string)
function lp_poscapture()
   return newemptycap(CapKind.position)
end

-- the value of the nth extra argument to lpeg.match (matches the empty string)
function lp_argcapture(n)
   n = checkint(n)
   if n <= 0 or n > 65536 then error("invalid argument index") end
   return newemptycap(CapKind.arg, n)
end

-- the value produced by the previous group capture named `key`
-- (matches the empty string)
function lp_backref(key)
   return newemptycap(CapKind.backref, key)
end

-- Constant capture (matches the empty string)
function lp_constcapture(...)
   local arg = compat.pack(...)
   if arg.n == 0 then -- no values?
      return newleaf(TTag.True) -- no capture
   else
      local i = arg.n
      local tree = newemptycap(CapKind.const, arg[i])
      while i > 1 do
         i = i - 1
         tree = newroot2sib(
            TTag.Seq,
            newemptycap(CapKind.const, arg[i]),
            tree
         )
      end
      if arg.n == 1 then
         -- single constant capture
         return tree
      else
         -- create a group capture with all values
         return lp_groupcapture( tree )
      end
   end
end

-- the returns of function applied to the captures of patt; the application
-- is done at match time
function lp_matchtime(patt, func)
   if type(func) ~= 'function' then
      error('not a function')
   end
   return setmetatable({
         tag = TTag.RunTime,
         code = nil,
         key = func,
         sib1 = getpatt(patt),
   }, metareg)
end

--[[======================================================]]--


--[[
** ======================================================
** Grammar - Tree generation
** ======================================================
]]--

--[[
** push on the stack the index and the pattern for the
** initial rule of grammar at index 'arg' in the stack;
** also add that index into position table.
]]--
function getfirstrule(tbl)
   local first_name, first_rule
   first_name = tbl[1]
   -- is this the name of an initial rule?
   if type(first_name) == 'number' or type(first_name) == 'string' then
      first_rule = tbl[first_name] -- get associated rule
   else
      first_name,first_rule = 1,first_name
   end
   if not is_pattern(first_rule) then
      if first_rule == nil then
         error("grammar has no initial rule")
      else
         error(string.format("initial rule '%s' is not a pattern", first_name))
      end
   end
   -- rule position (after TGrammar)
   -- return map from name to position, and from position to name
   return { [first_name] = 1 }, { first_name }
end

--[[
** traverse grammar at index 'arg', pushing all its keys and patterns
** into the stack. Create a new table (before all pairs key-pattern) to
** collect all keys and their associated positions in the final tree
** (the "position table").
** Return the number of rules and (in 'totalsize') the total size
** for the new tree.
]]--
function collectrules(tbl)
    -- find the first rule and put in position table
   local postab, rpostab = getfirstrule(tbl)

   -- collect and sort rule names (for repeatability)
   local names = {}
   for k,v in pairs(tbl) do
      if k == 1 or postab[k] == 1 then -- initial rule?
         -- skip the initial rules, it's already in the position table
      else
         table.insert(names, k)
      end
   end
   table.sort(names, function(a,b)
                 return tostring(a) < tostring(b)
   end)

   -- fill out rule, name, and position maps
   for _,k in ipairs(names) do
      local v = tbl[k]
      if not is_pattern(v) then
         error("rule '" .. val2str(k) .. "' is not a pattern")
      end
      table.insert(rpostab, k)
      postab[k] = #rpostab
   end
   return postab, rpostab
end

function buildgrammar(g, tbl, postab, rpostab)
   local trees = {}
   for i,name in ipairs(rpostab) do
      local rule = setmetatable({
            tag = TTag.Rule,
            code = nil,
            key = nil, -- will be fixed when rule is used
            n = i, -- rule number
            name = name,
            sib1 = tbl[name], -- pattern
            sib2 = nil,
      }, metareg)
      table.insert(trees, rule)
      g.ruletab[name] = rule
   end
   -- link up siblings
   for i = 1, #trees-1 do
      trees[i].sib2 = trees[i+1]
   end
   trees[#trees].sib2 = newleaf(TTag.True) -- finish list of rules
   g.sib1 = trees[1]
end

--[[
** Check whether a tree has potential infinite loops
]]--
function checkloops(grammar, tree)
   local n = numsiblings[tree.tag]
   if tree.tag == TTag.Rep and nullable_with_grammar(tree.sib1, grammar) then
      return true
   elseif tree.tag == TTag.Grammar then
      return false -- sub-grammars already checked
   elseif n == 1 then
      return checkloops(grammar, tree.sib1) -- tail call
   elseif n == 2 then
      if checkloops(grammar, tree.sib1) then
         return true
      else
         return checkloops(grammar, tree.sib2) -- tail call
      end
   elseif n == 0 then
      return false
   else
      error("surprising number of siblings")
   end
end

--[[
** Give appropriate error message for 'verifyrule'. If a rule appears
** twice in 'passed', there is path from it back to itself without
** advancing the subject.
]]--
function verifyerror(grammar, passed, npassed)
   local i, j
   for i = npassed,1,-1 do -- search for a repetition
      for j = i-1,1,-1 do
         if passed[i] == passed[j] then
            error(string.format("rule '%s' may be left recursive", val2str(passed[i])))
         end
      end
   end
   error("too many left calls in grammar")
end

--[[
** Check whether a rule can be left recursive; raise an error in that
** case; otherwise return 1 iff pattern is nullable.
** The return value is used to check sequences, where the second pattern
** is only relevant if the first is nullable.
** Parameter 'nb' works as an accumulator, to allow tail calls in
** choices. ('nb' true makes function returns true.)
** Parameter 'passed' is a list of already visited rules, 'npassed'
** counts the elements in 'passed'.
** Assume ktable at the top of the stack.
]]--
local verifyrule
verifyrule = define_tree_visitor{
   [{
         TTag.Char, TTag.Set, TTag.Any, TTag.False, TTag.UTFR,
         TTag.Throw, -- labeled failure
   }] = function(tree, g, passed, n, nb)
      return nb -- cannot pass from here
   end,
   [{
         TTag.True, TTag.Behind, -- look-behind cannot have calls
   }] = function(tree, g, passed, n, nb)
      return true
   end,
   [{ TTag.Not, TTag.And, TTag.Rep, }] = function(tree, g, passed, n, nb)
      return verifyrule(tree.sib1, g, passed, n, true) -- tail call
   end,
   [{ TTag.Capture, TTag.RunTime, TTag.XInfo, }] = function(tree, g, passed, n, nb)
      return verifyrule(tree.sib1, g, passed, n, nb) -- tail call
   end,
   [ TTag.Call ] = function(tree, g, passed, n, nb)
      local rule = g.ruletab[tree.key] -- look up rule
      return verifyrule(rule, g, passed, n, nb) -- tail call
   end,
   [ TTag.Seq ] = function(tree, g, passed, n, nb)
      -- only check 2nd child if first is nb
      if not verifyrule(tree.sib1, g, passed, n, false) then
         return nb
      else
         -- note that we don't propagate new npassed from 1st child
         return verifyrule(tree.sib2, g, passed, n, nb) -- tail call
      end
   end,
   [ TTag.Choice ] = function(tree, g, passed, n, nb)
      -- must check both children
      nb = verifyrule(tree.sib1, g, passed, n, nb)
      -- note that we don't propagate new npassed from 1st child
      return verifyrule(tree.sib2, g, passed, n, nb) -- tail call
   end,
   [ TTag.Rule ] = function(tree, g, passed, n, nb)
      if n >= MAXRULES then -- too many steps?
         return verifyerror(g, passed, n) -- error
      else
         passed[n+1] = tree.key -- add rule to path
         return verifyrule(tree.sib1, g, passed, n + 1, nb) -- tail call
      end
   end,
   [ TTag.Grammar ] = function(tree, g, passed, n, nb)
      return nullable(tree) -- sub-grammar cannot be left recursive
   end,
}

function verifygrammar(grammar)
   local passed = {}
   -- check left-recursive rules
   local rule = grammar.sib1
   while rule.tag == TTag.Rule do
      if rule.key ~= nil then -- skip unused rules
         verifyrule(rule.sib1, grammar, passed, 0, false)
      end
      rule = rule.sib2
   end
   if rule.tag ~= TTag.True then
      error("assertion failure")
   end
   -- check infinite loops inside rules
   rule = grammar.sib1
   while rule.tag == TTag.Rule do
      if rule.key ~= nil then -- skip unused rules
         if checkloops(grammar, rule.sib1) then
            error("empty loop in rule '" .. val2str(rule.name) .. "'")
         end
      end
      rule = rule.sib2
   end
   if rule.tag ~= TTag.True then
      error("assertion failure")
   end
end

--[[
** Fix a TOpenCall into a TCall node, using table 'postable' to
** translate a key to its rule address in the tree. Raises an
** error if key does not exist.
]]--
function fixonecall(g, t, postab)
   local name = t.key
   local rule = g.ruletab[name]
   if t.tag == TTag.Call then
      if rule == nil then
         error(string.format("rule '%s' undefined in given grammar", val2str(name)))
      end
      -- unlike our upstream, we don't clone patterns when we build a grammar
      -- so we can't mutate this tree w/o possibly breaking any other grammars
      -- which might hold an alias of this call.  So we don't distinguish
      -- Call and OpenCall and we don't mutate the tag here and
      -- don't link it up.  However, we can mutate the Rule
      -- as those are not shared
      rule.key = name -- mark this as used
   elseif rule ~= nil then -- TTag.Throw
      -- As before, we can't mutate the tree
      rule.key = name -- mark this as used
   end
end

--[[
** Transform left associative constructions into right
** associative ones, for sequence and choice; that is:
** (t11 + t12) + t2  =>  t11 + (t12 + t2)
** (t11 * t12) * t2  =>  t11 * (t12 * t2)
** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))
]]--
function  correctassociativity (tree)
   local tag = tree.tag
   if tag ~= TTag.Choice and tag ~= TTag.Seq then
      error("impossible")
   end
   local t1 = tree.sib1
   while t1.tag == tree.tag do
      local t11, t12 = t1.sib1, t1.sib2
      local t2 = tree.sib2
      -- don't mutate t1 in place as others may be keeping copies of it;
      -- mutating 'tree' in place is okay as we're not changing its semantics
      tree.sib1 = t11
      tree.sib2 = newroot2sib(tag, t12, t2)
      t1 = tree.sib1
   end
   return tree
end

--[[
** Make final adjustments in a tree. Fix open calls in tree 't',
** making them refer to their respective rules or raising appropriate
** errors (if not inside a grammar). Correct associativity of associative
** constructions (making them right associative). Assume that tree's
** ktable is at the top of the stack (for error messages).
]]--
local finalfix_helper = define_tree_visitor{
   [TTag.Grammar] = function(t)
      return t -- subgrammars were already fixed
   end,
   [TTag.Call] = function(t, g, postab)
      if g == nil then
         error("rule '" .. val2str(t.key) .. "' used outside a grammar")
      else
         return fixonecall(g, t, postab)
      end
   end,
   [TTag.Throw] = function(t, g, postab)
      if g ~= nil then
         return fixonecall(g, t, postab)
      end
   end,
   [{TTag.Seq, TTag.Choice}] = function(t, g, postab)
      return correctassociativity(t)
   end,
   default = function(t) return t end,
}
function finalfix(g, t, postab)
   finalfix_helper(t, g, postab)
   if t.tag == TTag.Grammar then return end
   local n = numsiblings[t.tag]
   if n == 1 then
      return finalfix(g, t.sib1, postab) -- tail call
   elseif n == 2 then
      finalfix(g, t.sib1, postab)
      return finalfix(g, t.sib2, postab) -- tail call
   elseif n == 0 then
      return
   else
      error("strange number of siblings")
   end
end

--[[
** Give a name for the initial rule if it is not referenced
]]--
function initialrulename(grammar)
   if grammar.sib1.key == nil then -- initial rule is not referenced?
      grammar.sib1.key = grammar.sib1.name
   end
end

function newgrammar(tbl)
   local postab, rpostab = collectrules(tbl)
   local g = setmetatable({
         tag = TTag.Grammar,
         code = nil,
         sib1 = nil, -- will fill this in
         n = #rpostab, -- number of rules
         ruletab = {}, -- map rule names to rules
   }, metareg)
   buildgrammar(g, tbl, postab, rpostab)
   finalfix(g, g.sib1, postab)
   initialrulename(g)
   verifygrammar(g)
   return g
end

--[[ ====================================================== ]]--

function prepcompile(p)
   finalfix(nil, p, {}) -- correct associativity
   return compile(p)
end

function lp_printtree(patt, c)
   local tree = getpatt(patt)
   if c then
      finalfix(nil, tree, {}) -- correct associativity
   end
   print("[]") -- for compatibility, this is a fake 'ktable'
   io.write(table.concat(printtree(tree, 0, {})))
end

function lp_printcode(patt)
   local p = getpatt(patt)
   if p.code == nil then
      prepcompile(p)
   end
   print("[]") -- for compatibility, this is a fake 'ktable'
   io.write(table.concat(printpatt(p.code, {})))
end

--[[
** Get the initial position for the match, interpreting negative
** values from the end of the subject.  Result is 1-based.
]]--
function initposition(ii, len)
   if ii > 0 then -- positive index?
      if ii <= len then -- inside the string?
         return ii -- return it (no correction to 0-base)
      else
         return len + 1 -- crop at the end
      end
   else -- negative index
      if (-ii) <= len then -- inside the string?
         return len + 1 - (-ii) -- return position from the end
      else
         return 1
      end
   end
end

-- Main match function
function lp_match(pattern, subject, init, ...)
   local p = getpatt(pattern)
   if p.code == nil then prepcompile(p) end
   local code = p.code
   if type(subject) ~= 'string' then error("subject is not a string") end
   local i
   if init == nil then
      i = 1
   else
      i = initposition(checkint(init), #subject)
   end
   return match(subject, i, code, ...)
end


--[[
** ======================================================
** Library creation and functions not related to matching
** ======================================================
]]--

function lp_setmax(lim)
   lim = 0 + lim -- convert to integer
   if lim <= 0 then
      error("out of range")
   end
   MAXSTACK = lim
end

local lp_type = define_type_visitor{
   pattern = function() return "pattern" end,
   default = function() return nil end,
}

function lp_gc(p)
   p._code = nil
end

function createcat(charspec)
   local t = newcharset()
   for i=0,CHARMAX do -- XXX not unicode safe
      local s = compat.utf8char(i)
      if s:find(charspec) ~= nil then
         t.set[i] = true
      end
   end
   return t
end

function lp_locale(tbl)
   if tbl == nil then
      tbl = {}
   end
   tbl.alnum = createcat("%w")
   tbl.alpha = createcat("%a")
   tbl.cntrl = createcat("%c")
   tbl.digit = createcat("%d")
   tbl.graph = createcat("[%p%w]") -- printable except space
   tbl.lower = createcat("%l")
   tbl.print = createcat("%C") -- printable = "not a control character"?
   tbl.punct = createcat("%p") -- "printable but not space or alnum
   tbl.space = createcat("%s")
   tbl.upper = createcat("%u")
   tbl.xdigit = createcat("%x")
   return tbl
end

--[[ lpltree.c ]]--

metareg.__mul = lp_seq
metareg.__add = lp_choice
metareg.__pow = lp_star
metareg.__gc = lp_gc
metareg.__len = lp_and
metareg.__div = lp_divcapture
metareg.__mod = lp_acccapture
metareg.__unm = lp_not
metareg.__sub = lp_sub
metareg.__tostring = printrepl

local pattreg = {
   ptree = lp_printtree,
   pcode = lp_printcode,
   match = lp_match,
   B = lp_behind,
   V = lp_V,
   C = lp_simplecapture,
   Cc = lp_constcapture,
   Cmt = lp_matchtime,
   Cb = lp_backref,
   Carg = lp_argcapture,
   Cp = lp_poscapture,
   Cs = lp_substcapture,
   Ct = lp_tablecapture,
   Cf = lp_foldcapture,
   Cg = lp_groupcapture,
   P = lp_P,
   S = lp_set,
   R = lp_range,
   utfR = lp_utfr,
   locale = lp_locale,
   version = "LLPegLabel " .. VERSION,
   setmaxstack = lp_setmax,
   type = lp_type,
   T = lp_throw, -- labeled failure throw
}
metareg.__index = pattreg

return pattreg

end)

local modules = {}
modules['bit32'] = require('bit32')
modules['string'] = require('string')
modules['strict'] = {}
modules['table'] = require('table')
local function myrequire(name)
  if modules[name] == nil then
    modules[name] = true
    modules[name] = (builders[name])(myrequire)
  end
  return modules[name]
end
return myrequire('llpeg')
end)()

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.