Módulo:Convert/makeunits

Ourige: Biquipédia, la anciclopédia lhibre.
Saltar para a navegação Saltar para a pesquisa

La decumentaçon pa este módulo puode ser criada na páigina Módulo:Convert/makeunits/doc

-- This module generates the wikitext required at Module:Convert/data
-- by reading and processing the wikitext of the master list of units
-- (see conversion_data for the page title).
--
-- Script method:
-- * Read lines, ignoring everything before "== Conversions ==".
-- * Process the following lines:
--   * Find next level-3 heading like "=== Length ===".
--   * Parse each following line starting with "|"
--     (but ignore lines starting with "|-" or "|}".
--   * Split such lines into fields (delimiter "||") and trim
--     leading/trailing whitespace from each field.
--     Remove any "colspan" at front of second field (symbol).
--   * Remove thousand separators (commas) from the scale field.
--     If the scale is a number, do not change it.
--     Otherwise, it should be an expression like "5/9", in
--     which case it is replaced by the value of the expression.
--   * Remove wiki formatting '[[...]]' from the link field.
--   * Remove redundant fields from the unit to reduce size of data table.
--   * Create alternative forms of a unit such as an alias or a combination.
-- * Stop processing when encounter end of text or a line starting
--   with a level-2 heading ("==" but not "===").
-- * Repeat above for each heading listed at prepare_data().
-- * Output Lua source for the units table.
--
-- -- Output has the following form.
-- local all_units = {
--     ["unitcode"] = {                        -- standard format
--         name1    = "singular name",         -- omitted if redundant
--         name1_us = "singular name sp=us",   -- omitted if redundant
--         name2    = "plural name",           -- omitted if redundant
--         name2_us = "plural name sp=us",     -- omitted if redundant
--         symbol   = "symbol",
--         sym_us   = "symbol sp=us",          -- omitted if redundant
--         usename  = 1,                       -- omitted if empty
--         utype    = "unit type",             -- from level-3 heading
--         scale    = 1,                       -- a value, if necessary from evaluating an expression
--         subdivs  = { ["ft"] = { 5280, default = "km" }, ["yd"] = { 1760 } }  -- composite input; omitted if empty
--         link     = "title of article for wikilink",  -- omitted if empty or redundant
--         ...                                 -- other values
--     },
--     ["unitcode"] = {        -- alternative format to generate an alias
--         target   = "unit code",
--         ...                                 -- optional values to override those of target
--     },
--     ["unitcode"] = {        -- alternative format to generate a "per" unit like $/acre or BTU/h
--         per      = {u1, u2},                -- numbered table of unitcodes (u1 may be a currency symbol)
--         ...                                 -- optional values
--     },
--     ["unitcode"] = {        -- alternative format to generate an error message
--         shouldbe = "message that some other unit code should be used",
--     },
--     ["unitcode"] = {        -- alternative format for combination outputs (like 'm ft')
--         combination = {u1, u2, ...},        -- numbered table of unitcodes
--         utype    = "unit type",             -- as for standard format
--     },
--     ["unitcode"] = {        -- alternative format for output multiples (like 'ftin')
--         combination = {u1, u2, ...},        -- numbered table of unitcodes
--         multiple = {f1, f2, ...},           -- numbered table of integer factors
--         utype    = "unit type",             -- as for standard format
--     },
--     ...
-- }

local ulower = mw.ustring.lower
local usub = mw.ustring.sub
local text_code

local specials = {
    -- This table is used to add extra fields when defining some units which
    -- require exceptions to normal processing.
    -- Each key is in the local language, while each value is fixed text.
    -- However, this script should NOT be edited.
    -- Instead, the translation_table in Module:Convert/text can be edited,
    -- and this script will replace sections of the following with localized
    -- definitions from Module:Convert/text, if given.
    -- Ask for assistance at [[:en:Module talk:Convert]].
    -- LATER: It would be better if this was defined in the conversion data.
    utype = {
        -- ["unit type in local language"] = "name_used_in_this_script"
        ["Eficiência combustível"] = "type_fuel_efficiency",
        ["comprimento"] = "type_length",
        ["temperatura"] = "type_temperature",
        ["volume"] = "type_volume",
    },
    ucode = {
        exception = {
            -- ["unit code in local language"] = "name_used_in_module_convert"
            ["ft"] = "integer_more_precision",
            ["in"] = "subunit_more_precision",
            ["lb"] = "integer_more_precision",
        },
        istemperature = {
            -- Common temperature scales (not keVT or MK).
            -- ["unit code in local language"] = true
            ["C"] = true,
            ["F"] = true,
            ["K"] = true,
            ["R"] = true,
        },
        usesymbol = {
            -- Use unit symbol not name if abbr not specified.
            -- ["unit code in local language"] = 1
            ["C"] = 1,
            ["F"] = 1,
            ["K"] = 1,
            ["R"] = 1,
            ["C-change"] = 1,
            ["F-change"] = 1,
            ["K-change"] = 1,
        },
        alttype = {
            -- Unit has an alternate type that is a valid conversion.
            -- ["unit code in local language"] = "alternate type in local language"
            ["Nm"] = "energy",
            ["ftlb"] = "torque",
            ["ftlb-f"] = "torque",
            ["ftlbf"] = "torque",
            ["inlb"] = "torque",
            ["inlb-f"] = "torque",
            ["inlbf"] = "torque",
            ["inoz-f"] = "torque",
            ["inozf"] = "torque",
        },
    },
}

-- Module text for the local language (localization).
-- A default table of text for enwiki is provided here.
-- If needed for another wiki, wanted sections from the table can be
-- copied into translation_table in Module:Convert/text.
-- For example, copying and modifying only the titles section may give:
--
--   local translation_table = {
--       ...             -- other items
--       mtext = {
--           titles = {
--               -- name_used_in_this_script = 'Title of page'
--               conversion_data = 'Modul:Convert/documentation/conversion data/dok',
--           },
--       },
--   }
local mtext = {
    section_names = {
        -- name_used_in_this_script = 'Section title used in conversion data'
        overrides    = 'Overrides',
        conversions  = 'Conversões',
        outmultiples = 'Output multiples',
        combinations = 'Combinations',
        inmultiples  = 'Input multiples',
        defaults     = 'Modelos',
        links        = 'Links',
        perunits     = 'Automático por unidades',
        varnames     = 'Variable names',
    },
    titles = {
        -- name_used_in_this_script = 'Título da página'
        conversion_data = 'Módulo:Convert/documentação/conversion data/doc',
    },
    messages = {
        -- name_used_in_this_script = 'Mensagem de erro ($1 = primeiro parâmetro, $2 = segundo)'
        m_als_bad   = 'Alias com texto inválido no campo "$1".',
        m_als_dup   = 'Alias "$1" já está definido.',
        m_als_link  = 'Alias "$1" deve incluir um wikilink ("[[...]]") no texto symlink.',
        m_als_mul   = 'Alias "$1" tem multiplicador "$2" que não é um número.',
        m_als_same  = 'Deve omitir "$1" para alias "$2" pois é o mesmo que o objecto.',
        m_als_type  = 'Objecto do alias "$1" tem tipo errado.',
        m_als_undef = 'As unidades primárias devem ser definids antes de "=$1"',
        m_cmb_miss  = 'Falto código de unidade para combinação.',
        m_cmb_none  = 'Não foi definido unidades para a combinação "$1"',
        m_cmb_one   = 'Só uma unidade foi definido para a combinação "$1"',
        m_cmb_type  = 'Unidade "$1" na combinação "$2" com tipo errado.',
        m_cmb_undef = 'Unidade "$1" na combinação "$2" não definida.',
        m_cmp_def   = 'Composto "$1" deve definir código unidade padrão.',
        m_cmp_int   = 'Composite "$1" has components where scale ratios are not integers.',
        m_cmp_inval = 'Composite "$1" has a component with an invalid scale, "$2".',
        m_cmp_many  = 'Composite "$1" has too many fields.',
        m_cmp_miss  = 'Missing unit code for a composite.',
        m_cmp_order = 'Composite "$1" has components in wrong order or with invalid scales.',
        m_cmp_scale = 'Alternate unit "$1" in composite "$2" has wrong scale.',
        m_cmp_two   = 'Composite "$1" must specify exactly two unit codes.',
        m_cmp_type  = 'Unit "$1" in composite "$2" has wrong type.',
        m_cmp_undef = 'Unit "$1" in composite "$2" not defined.',
        m_def_cond  = 'Invalid condition in default "$1" for unit "$2".',
        m_def_fmt   = 'Default output "$1" for unit "$2" should have 2 or 3 "!".',
        m_def_rpt   = 'Default output "$1" for unit "$2" is repeated.',
        m_def_same  = 'Default output for unit "$1" is the same unit.',
        m_def_type  = 'Default output "$1" for unit "$2" has wrong type.',
        m_def_undef = 'Default output "$1" for unit "$2" is not defined.',
        m_dfs_code  = 'Defaults section: no unit code specified.',
        m_dfs_dup   = 'Defaults section: unit "$1" has already been specified.',
        m_dfs_none  = 'Defaults section: unit "$1" has no default specified.',
        m_dfs_sym   = 'Defaults section: unit "$1" must have a symbol.',
        m_dfs_two   = 'Defaults section: unit "$1" should have two fields only.',
        m_dfs_undef = 'Defaults section: unit "$1" is not defined.',
        m_dup_code  = 'Unit code "$1" has already been defined.',
        m_error     = 'Error:',
        m_ftl_read  = 'Could not read wikitext from "[[$1]]".',
        m_ftl_table = '[[$1]] should export table "$2".',
        m_ftl_type  = 'Fatal error: unknown data type for "$1"',
        m_hdg_lev2  = 'Level 2 heading "$1" not found.',
        m_hdg_lev3  = 'No level 3 heading before: $1',
        m_line_num  = ' (line $1).',
        m_lnk_brack = 'Link "$1" has wrong number of brackets.',
        m_lnk_dup   = 'Link exception "$1" is already defined.',
        m_lnk_miss  = 'Missing unit code for a link.',
        m_lnk_none  = 'No link defined for unit "$1".',
        m_lnk_sym   = 'Unit code "$1" for a link must have a symbol.',
        m_lnk_two   = 'Row for unit "$1" link should have two fields only.',
        m_lnk_type  = 'Link exception "$1" has wrong type.',
        m_lnk_undef = 'Unit code "$1" for a link is not defined.',
        m_miss_code = 'Missing unit code.',
        m_miss_sym  = 'Missing symbol.',
        m_miss_type = 'Missing unit type.',
        m_mul_int   = 'Multiple "$1" has components where scale ratios are not integers.',
        m_mul_miss  = 'Missing unit code for a multiple.',
        m_mul_none  = 'No units specified for multiple "$1"',
        m_mul_one   = 'Only one unit specified for multiple "$1"',
        m_mul_order = 'Multiple "$1" has components in wrong order or with invalid scales.',
        m_mul_scale = 'Multiple "$1" has a component with an invalid scale, "$2".',
        m_mul_std   = 'Unit "$1" in multiple "$2" must be a standard unit.',
        m_mul_type  = 'Unit "$1" in multiple "$2" has wrong type.',
        m_mul_undef = 'Unit "$1" in multiple "$2" not defined.',
        m_no_title  = 'Need title of page with unit definitions.',
        m_ovr_dup   = 'Override "$1" is already defined.',
        m_ovr_miss  = 'Missing unit code for an override.',
        m_per_dup   = 'Per unit "$1" already defined.',
        m_per_empty = 'Unit "$1" has an empty field in the "per".',
        m_per_fuel  = 'Unit "$1" has invalid unit types for fuel efficiency.',
        m_per_inv   = 'Invalid field for a "per".',
        m_per_two   = 'Unit "$1" does not have exactly 2 fields in the "per".',
        m_per_undef = 'Unit "$1" has undefined unit code "$2" in the "per".',
        m_percent_s = 'Field "$1" must not contain "%s".',
        m_pfx_bad   = 'Unknown prefix: "$1".',
        m_pfx_name  = 'Unit with Prefix set must include Name.',
        m_scl_bad   = 'Scale expression is invalid: "$1".',
        m_scl_miss  = 'Falta escala.',
        m_scl_oflow = 'Scale expression gives an invalid value: "$1".',
        m_var_cnt   = 'Variable names section: each row must have five fields.',
        m_var_dup   = 'Unit "$1" already has a variable name.',
        m_var_miss  = 'Missing field for a variable name.',
        m_var_undef = 'Unit "$1" in variable names is not defined.',
        m_warning   = 'Aviso:',
        m_wrn_more  = '  (e mais não mostrados)',
        m_wrn_nbsp  = 'A linha $1 tem espaço nonbreaking.',
        m_wrn_nodef = 'Units with the following unit codes have no default output.',
        m_wrn_ucode = '  $1',
    },
}

local function message(key, ...)
    -- Return a message from the message table, which can be localized.
    -- '$1', '$2', ... are replaced with the first, second, ... parameters,
    -- each of which must be a string or a number.
    -- The global variable is_test_run can be set by a testing program to
    -- check the messages generated by this program.
    local rep = {}
    for i, v in ipairs({...}) do
        rep['$' .. i] = v
    end
    key = key or '???'
    local extra
    if is_test_run and key ~= 'm_line_num' then
        extra = key .. ': '
    else
        extra = ''
    end
    return extra .. string.gsub(mtext.messages[key] or key, '$%d+', rep)
end

local function quit(key, ...)
    -- Use error() to pass an error message to the surrounding pcall().
    error(message(key, ...), 0)
end

local function quit_no_message()
    -- Throw an error.
    -- This is used in some functions which can throw an error with a message,
    -- but where the message is in fact never displayed because the calling
    -- function uses pcall to catch errors, and any message is ignored.
    -- Using this function documents that the message (which may be useful in
    -- some other application) does not need translation as it never appears.
    error('this message is not displayed', 0)
end

local function collection()
    -- Return a table to hold items.
    return {
        n = 0,
        add = function (self, item)
            self.n = self.n + 1
            self[self.n] = item
        end,
        pop = function (self, item)
            if self.n > 0 then
                local top = self[self.n]
                self.n = self.n - 1
                return top
            end
        end,
        join = function (self, sep)
            return table.concat(self, sep or '\n')
        end,
    }
end

local warnings = collection()
local function add_warning(key, ...)
    -- Add a warning that will be inserted before the final result.
    warnings:add(message(key, ...))
end

---Begin code to evaluate expressions-----------------------------------
-- This is needed because Lua's loadstring() is not available in Scribunto,
-- and each scale value can be specifed as an expression such as "5/9".
-- More complex expressions are supported, including use of parentheses
-- and the binary operators: + - * / ^

local operators = {
    ['+'] = { precedence = 1, associativity = 1, func = function (a, b) return a + b end },
    ['-'] = { precedence = 1, associativity = 1, func = function (a, b) return a - b end },
    ['*'] = { precedence = 2, associativity = 1, func = function (a, b) return a * b end },
    ['/'] = { precedence = 2, associativity = 1, func = function (a, b) return a / b end },
    ['^'] = { precedence = 3, associativity = 2, func = function (a, b) return a ^ b end },
    ['('] = '(',
    [')'] = ')',
}

local function tokenizer(text)
    -- Function 'next' returns the next token which is one of:
    --     number
    --     table (operator)
    --     string ('(' or ')')
    --     nil (end of text)
    -- If invalid, an error is thrown.
    -- The number is unsigned (unary operators are not supported).
    return {
        pos = 1,
        maxpos = #text,
        text = text,
        next = function(self)
            if self.pos <= self.maxpos then
                local p1, p2, hit = self.text:find('^%s*([+%-*/^()])', self.pos)
                if hit then
                    self.pos = p2 + 1
                    return operators[hit]
                end
                p1, p2, hit = self.text:find('^%s*(%d*%.?%d*[eE][+-]?%d*)', self.pos)
                if not hit then
                    p1, p2, hit = self.text:find('^%s*(%d*%.?%d*)', self.pos)
                end
                local value = tonumber(hit)
                if value then
                    self.pos = p2 + 1
                    return value
                end
                quit_no_message('invalid number "' .. self.text:sub(self.pos) .. '"')
            end
        end
    }
end

local function evaluate_tokens(tokens, inparens)
    -- Return the value from evaluating tokenized expression, or throw an error.
    local numstack, opstack = collection(), collection()
    local function perform_ops(precedence, associativity)
        while opstack.n > 0 and (opstack[opstack.n].precedence > precedence or
            (opstack[opstack.n].precedence == precedence and associativity == 1)) do
            local rhs = numstack:pop()
            local lhs = numstack:pop()
            if not (rhs and lhs) then quit_no_message('missing number') end
            local op = opstack:pop()
            numstack:add(op.func(lhs, rhs))
        end
    end
    local token_last
    local function set_state(token_type)
        if token_last == token_type then
            local missing = (token_type == 'number') and 'operator' or 'number'
            quit_no_message('missing ' .. missing)
        end
        token_last = token_type
    end
    while true do
        local token = tokens:next()
        if type(token) == 'number' then
            set_state('number')
            numstack:add(token)
        elseif type(token) == 'table' then
            set_state('operator')
            perform_ops(token.precedence, token.associativity)
            opstack:add(token)
        elseif token == '(' then
            set_state('number')
            numstack:add(evaluate_tokens(tokens, true))
        elseif token == ')' then
            if inparens then
                break
            end
            quit_no_message('unbalanced parentheses')
        else
            break
        end
    end
    perform_ops(0)
    if numstack.n > 1 then quit_no_message('missing operator') end
    if numstack.n < 1 then quit_no_message('missing number') end
    return numstack:pop()
end

local function evaluate(expression)
    -- Return value (a number) from evaluating expression (a string),
    -- or throw an error if invalid.
    -- This is not bullet proof, but it should support the expressions used.
    return evaluate_tokens(tokenizer(expression))
end
---End code to evaluate expressions-------------------------------------
---Begin code adapted from Module:Convert-------------------------------

local plural_suffix = 's'  -- may be changed from translation.plural_suffix below

local function shallow_copy(t)
    -- Return a shallow copy of t.
    -- Do not need the features and overhead of mw.clone() provided by Scribunto.
    local result = {}
    for k, v in pairs(t) do
        result[k] = v
    end
    return result
end

local function split(text, delimiter)
    -- Return a numbered table with fields from splitting text.
    -- The delimiter is used in a regex without escaping (for example, '.' would fail).
    -- Each field has any leading/trailing whitespace removed.
    local t = {}
    text = text .. delimiter  -- to get last item
    for item in text:gmatch('%s*(.-)%s*' .. delimiter) do
        table.insert(t, item)
    end
    return t
end

local unit_mt = {
    -- Metatable to get missing values for a unit that does not accept SI prefixes.
    -- Warning: The boolean value 'false' is returned for any missing field
    -- so __index is not called twice for the same field in a given unit.
    __index = function (self, key)
        local value
        if key == 'name1' or key == 'sym_us' then
            value = self.symbol
        elseif key == 'name2' then
            value = self.name1 .. plural_suffix
        elseif key == 'name1_us' then
            value = self.name1
            if not rawget(self, 'name2_us') then
                -- If name1_us is 'foot', do not make name2_us by appending plural_suffix.
                self.name2_us = self.name2
            end
        elseif key == 'name2_us' then
            local raw1_us = rawget(self, 'name1_us')
            if raw1_us then
                value = raw1_us .. plural_suffix
            else
                value = self.name2
            end
        elseif key == 'link' then
            value = self.name1
        else
            value = false
        end
        rawset(self, key, value)
        return value
    end
}

local function prefixed_name(unit, name, index)
    -- Return unit name with SI prefix inserted at correct position.
    -- index = 1 (name1), 2 (name2), 3 (name1_us), 4 (name2_us).
    -- The position is a byte (not character) index, so use Lua's sub().
    local pos = rawget(unit, 'prefix_position')
    if type(pos) == 'string' then
        pos = tonumber(split(pos, ',')[index])
    end
    if pos then
        return name:sub(1, pos - 1) .. unit.si_name .. name:sub(pos)
    end
    return unit.si_name .. name
end

local unit_prefixed_mt = {
    -- Metatable to get missing values for a unit that accepts SI prefixes.
    -- Before use, fields si_name, si_prefix must be defined.
    -- The unit must define _symbol, _name1 and
    -- may define _sym_us, _name1_us, _name2_us
    -- (_sym_us, _name2_us may be defined for a language using sp=us
    -- to refer to a variant unrelated to U.S. units).
    __index = function (self, key)
        local value
        if key == 'symbol' then
            value = self.si_prefix .. self._symbol
        elseif key == 'sym_us' then
            value = rawget(self, '_sym_us')
            if value then
                value = self.si_prefix .. value
            else
                value = self.symbol
            end
        elseif key == 'name1' then
            value = prefixed_name(self, self._name1, 1)
        elseif key == 'name2' then
            value = rawget(self, '_name2')
            if value then
                value = prefixed_name(self, value, 2)
            else
                value = self.name1 .. plural_suffix
            end
        elseif key == 'name1_us' then
            value = rawget(self, '_name1_us')
            if value then
                value = prefixed_name(self, value, 3)
            else
                value = self.name1
            end
        elseif key == 'name2_us' then
            value = rawget(self, '_name2_us')
            if value then
                value = prefixed_name(self, value, 4)
            elseif rawget(self, '_name1_us') then
                value = self.name1_us .. plural_suffix
            else
                value = self.name2
            end
        elseif key == 'link' then
            value = self.name1
        else
            value = false
        end
        rawset(self, key, value)
        return value
    end
}

local function lookup(units, unitcode, sp, what)
    -- Return a copy of the unit if found, or return nil.
    -- In this cut-down code, sp is always nil, and what is ignored.
    local t = units[unitcode]
    if t then
        if t.shouldbe then
            return nil
        end
        local result = shallow_copy(t)
        if result.prefixes then
            result.si_name = ''
            result.si_prefix = ''
            return setmetatable(result, unit_prefixed_mt)
        end
        return setmetatable(result, unit_mt)
    end
    local SIprefixes = text_code.SIprefixes
    for plen = SIprefixes[1] or 2, 1, -1 do
        -- Look for an SI prefix; should never occur with an alias.
        -- Check for longer prefix first ('dam' is decametre).
        -- SIprefixes[1] = prefix maximum #characters (as seen by mw.ustring.sub).
        local prefix = usub(unitcode, 1, plen)
        local si = SIprefixes[prefix]
        if si then
            local t = units[usub(unitcode, plen+1)]
            if t and t.prefixes then
                local result = shallow_copy(t)
                if (sp == 'us' or t.sp_us) and si.name_us then
                    result.si_name = si.name_us
                else
                    result.si_name = si.name
                end
                result.si_prefix = si.prefix or prefix
                -- In this script, each scale is a string.
                result.scale = tostring(tonumber(t.scale) * 10 ^ (si.exponent * t.prefixes))
                result.prefixes = nil  -- a prefixed unit does not take more prefixes (in this script, the returned unit may be added to the list of units)
                return setmetatable(result, unit_prefixed_mt)
            end
        end
    end
    local exponent, baseunit = unitcode:match('^e(%d+)(.*)')
    if exponent then
        local engscale = text_code.eng_scales[exponent]
        if engscale then
            local result = lookup(units, baseunit, sp, 'no_combination')
            if not result then return nil end
            if not (result.offset or result.builtin or result.engscale) then
                result.defkey = unitcode  -- key to lookup default exception
                result.engscale = engscale
                -- Do not set result.scale as this code is called for units where that is not set.
                return result
            end
        end
    end
    return nil
end

local function evaluate_condition(value, condition)
    -- Return true or false from applying a conditional expression to value,
    -- or throw an error if invalid.
    -- A very limited set of expressions is supported:
    --    v < 9
    --    v * 9 < 9
    -- where
    --    'v' is replaced with value
    --    9 is any number (as defined by Lua tonumber)
    --    '<' can also be '<=' or '>' or '>='
    -- In addition, the following form is supported:
    --    LHS and RHS
    -- where
    --    LHS, RHS = any of above expressions.
    local function compare(value, text)
        local arithop, factor, compop, limit = text:match('^%s*v%s*([*]?)(.-)([<>]=?)(.*)$')
        if arithop == nil then
            quit_no_message('Invalid default expression.')
        elseif arithop == '*' then
            factor = tonumber(factor)
            if factor == nil then
                quit_no_message('Invalid default expression.')
            end
            value = value * factor
        end
        limit = tonumber(limit)
        if limit == nil then
            quit_no_message('Invalid default expression.')
        end
        if compop == '<' then
            return value < limit
        elseif compop == '<=' then
            return value <= limit
        elseif compop == '>' then
            return value > limit
        elseif compop == '>=' then
            return value >= limit
        end
        quit_no_message('Invalid default expression.')  -- should not occur
    end
    local lhs, rhs = condition:match('^(.-%W)and(%W.*)')
    if lhs == nil then
        return compare(value, condition)
    end
    return compare(value, lhs) and compare(value, rhs)
end

---End adapted code-----------------------------------------------------

local function strip(text)
    -- Return text with no leading/trailing whitespace.
    return text:match("^%s*(.-)%s*$")
end

local function empty(text)
    -- Return true if text is nil or empty (assuming a string).
    return text == nil or text == ''
end

-- Tables of units: k = unit code, v = unit table.
local units_index = {}  -- all units: normal, alias, per, combination, or multiple
local alias_index = {}  -- all aliases (to detect attempts to define more than once)
local per_index = {}    -- all "per" units (to detect attempts to define more than once)

local function get_unit(ucode, utype)
    -- Look up unit code in our cache of units.
    -- If utype == nil, the unit should already have been defined.
    -- Otherwise, ucode may represent an automatically generated combination
    -- where each component must have the given utype; a dummy unit is returned.
    if empty(ucode) then
        return nil
    end
    local unit = lookup(units_index, ucode)
    if unit or not utype then
        return unit
    end
    local combo = collection()
    if ucode:find('+', 1, true) then
        for item in (ucode .. '+'):gmatch('%s*(.-)%s*%+') do
            if item ~= '' then
                combo:add(item)
            end
        end
    elseif ucode:find('%s') then
        for item in ucode:gmatch('%S+') do
            combo:add(item)
        end
    end
    if combo.n > 1 then
        local result = setmetatable({ utype = utype }, {
            __index = function (self, key)
                error('Bug: invalid use of automatically generated unit')
            end })
        for _, v in ipairs(combo) do
            local component = lookup(units_index, v)
            if not component or component.shouldbe or component.combination then
                return nil
            end
            if utype ~= component.utype then
                result.utype = component.utype  -- set wrong type which caller will detect
                break
            end
        end
        return result
    end
end

local overrides = {}  -- read from input for unit codes that should not be checked for a duplicate

local function insert_unique_unit(data, unit, index)
    -- After inserting any required built-in data, insert the unit into the
    -- data table and (if index not nil) add to index,
    -- but not if the unit code is already defined.
    local ucode = unit.unitcode
    local known = get_unit(ucode)
    if known and not overrides[ucode] then
        quit('m_dup_code', ucode)
    end
    for item, t in pairs(specials.ucode) do
        unit[item] = t[ucode]
    end
    if index then
        index[ucode] = unit
    end
    table.insert(data, unit)
end

local function check_condition(condition)
    -- Return true if condition appears to be valid; otherwise return false.
    for _, value in ipairs({ 0, 0.1, 1, 1.1, 10, 100, 1000, 1e4, 1e5 }) do
        local success, result = pcall(evaluate_condition, value, condition)
        if not success then
            return false
        end
    end
    return true
end

local function check_default_expression(default, ucode)
    -- Return a numbered table of names present in param default
    -- (two names if an expression, or one name (param default) otherwise).
    -- Throw an error if a problem occurs.
    -- An expression uses pipe-delimited fields with 'v' representing
    -- the input value for the conversion.
    -- Example (suffix is optional): 'v < 120 ! small ! big ! suffix'
    -- returns { 'smallsuffix', 'bigsuffix' }.
    if not default:find('!', 1, true) then
        return { default }
    end
    local t = {}
    for item in (default .. '!'):gmatch('%s*(.-)%s*!') do
        t[#t+1] = item  -- split on '!', removing leading/trailing whitespace
    end
    if not (#t == 3 or #t == 4) then
        quit('m_def_fmt', default, ucode)
    end
    local condition, default1, default2 = t[1], t[2], t[3]
    if #t == 4 then
        default1 = default1 .. t[4]
        default2 = default2 .. t[4]
    end
    if not check_condition(condition) then
        quit('m_def_cond', default, ucode)
    end
    return { default1, default2 }
end

local function check_default(default, ucode, utype, unit_table)
    -- Check the given name (or expression) of a default output.
    -- Normally a unit must not define itself as its default. However,
    -- some units are defined merely for use in per units, and they have
    -- the same ucode, utype and default.
    -- Example: unit cent which cannot be converted to anything other than
    -- a cent, but which can work, for example, in cent/km and cent/mi.
    -- Throw an error if a problem occurs.
    local done = {}
    for _, default in ipairs(check_default_expression(default, ucode)) do
        if done[default] then
            quit('m_def_rpt', default, ucode)
        end
        if default == ucode and ucode ~= utype then
            quit('m_def_same', ucode)
        end
        local default_table = get_unit(default, utype)
        if not default_table then
            quit('m_def_undef', default, ucode)
        end
        if not (utype == unit_table.utype and utype == default_table.utype) then
            quit('m_def_type', default, ucode)
        end
        done[default] = true
    end
end

local function check_all_defaults(units, maxerrors)
    -- Check each default in units and warn if needed.
    -- This is done after all input data has been processed.
    -- Throw an error if a problem occurs.
    local errors = collection()
    local missing = collection()  -- unitcodes with missing defaults
    for _, unit in ipairs(units) do
        if not unit.shouldbe and not unit.combination then
            -- This is a standard unit or an alias/per (not shouldbe, combo).
            -- An alias may have a default defined, but it is optional.
            local default = unit.default
            local ucode = unit.unitcode
            if empty(default) then
                if not unit.target then  -- unit should have a default
                    missing:add(ucode)
                end
            else
                local ok, msg = pcall(check_default, default, ucode, unit.utype, unit)
                if not ok then
                    errors:add(msg)
                    if errors.n >= maxerrors then
                        break
                    end
                end
            end
        end
    end
    if errors.n > 0 then
        error(errors:join(), 0)
    end
    if missing.n > 0 then
        add_warning('m_wrn_nodef')
        local limit = maxerrors
        for _, v in ipairs(missing) do
            limit = limit - 1
            if limit < 0 then
                add_warning('m_wrn_more')
                break
            end
            add_warning('m_wrn_ucode', v)
        end
    end
end

local function check_all_pers(units, maxerrors)
    -- Check each component of each "per" unit and warn if needed.
    -- In addition, add any required extra fields for some types of units.
    -- This is done after all input data has been processed.
    -- Throw an error if a problem occurs.
    local errors = collection()
    local function errmsg(key, ...)
        errors:add(message(key, ...))
    end
    for _, unit in ipairs(units) do
        local per = unit.per
        if per then
            local ucode = unit.unitcode
            if #per ~= 2 then
                errmsg('m_per_two', ucode)
            else
                local types = {}
                for i, v in ipairs(per) do
                    if empty(v) then
                        errmsg('m_per_empty', ucode)
                    end
                    if not text_code.currency[v] then
                        local t = get_unit(v)
                        if t then
                            types[i] = t.utype
                        else
                            errmsg('m_per_undef', ucode, v)
                        end
                    end
                end
                if specials.utype[unit.utype] == 'type_fuel_efficiency' then
                    local expected = { type_volume = 1, type_length = 2 }
                    local top_type = expected[specials.utype[types[1]]]
                    local bot_type = expected[specials.utype[types[2]]]
                    if top_type and bot_type and top_type ~= bot_type then
                        unit.iscomplex = true
                        if top_type == 1 then
                            unit.invert = 1
                        else
                            unit.invert = -1
                        end
                    else
                        errmsg('m_per_fuel', ucode)
                    end
                end
            end
        end
        if errors.n >= maxerrors then
            break
        end
    end
    if errors.n > 0 then
        error(errors:join(), 0)
    end
end

local function update_units(units, composites, varnames)
    -- Update some unit definitions with extra data defined in other sections.
    -- This is done after all input data has been processed.
    for _, unit in ipairs(units) do
        local comp = composites[unit.unitcode]
        if comp then
            unit.subdivs = '{ ' .. table.concat(comp.subdivs, ', ') .. ' }'
        end
        local vn = varnames[unit.unitcode]
        if vn then
            unit.varname = vn
        end
    end
end

local function make_override(data)
    -- Return a function which, when called, stores a unit code that is not to be
    -- checked for a duplicate. The table is stored in data (also a table).
    return function (utype, fields)
        local ucode = fields[1]
        if empty(ucode) then
            quit('m_ovr_miss')
        end
        if data[ucode] then
            quit('m_ovr_dup', ucode)
        end
        data[ucode] = true
    end
end

local function make_default(data)
    -- Return a function which, when called, stores a table that defines a
    -- default output unit. The table is stored in data (also a table).
    local defaults_index = {}  -- to detect attempts to define a default twice
    return function (utype, fields)
        -- Store a table defining a unit.
        -- This is for a unit such as 'kg' that has a default output unit
        -- different from what is defined for the base unit ('g').
        -- Throw an error if a problem occurs.
        local ucode = fields[1]
        local default = fields[2]
        if empty(ucode) then
            quit('m_dfs_code')
        end
        if empty(default) then
            quit('m_dfs_none', ucode)
        end
        if #fields ~= 2 then
            quit('m_dfs_two', ucode)
        end
        local unit_table = get_unit(ucode)
        if not unit_table then
            quit('m_dfs_undef', ucode)
        end
        local symbol = unit_table.defkey or unit_table.symbol
        if empty(symbol) then
            quit('m_dfs_sym', ucode)
        end
        check_default(default, ucode, utype, unit_table)
        if defaults_index[ucode] then
            quit('m_dfs_dup', ucode)
        end
        defaults_index[ucode] = default
        table.insert(data, { symbol = symbol, default = default })
    end
end

local function clean_link(link, name)
    -- Return link, customary where:
    --   link = given link after removing any '[[...]]' wiki formatting
    --          and removing any leading '+' or '*' or '@';
    --   customary = 1 if leading '+', or 2 if '*' or 3 if '@', or nil
    --   (for extra "US" or "U.S." or "Imperial" customary units link).
    -- Result has leading/trailing whitespace removed, and is nil if empty
    -- or if link matches the name, if a name is specified.
    -- Exception: If the link is empty and the name starts with '[[',
    -- the link is stored as '' (for a unit name which is always linked).
    -- If the resulting link is nil, no link field is stored, and
    -- if a link is required, it will be set from the unit's name.
    local original = link
    if empty(link) then
        return (name and name:sub(1, 2) == '[[') and '' or nil
    end
    local prefixes = { ['+'] = 1, ['*'] = 2, ['@'] = 3 }
    local customary = prefixes[link:sub(1, 1)]
    if customary then
        link = strip(link:sub(2))
    end
    if link:sub(1, 2) == '[[' then
        link = link:sub(3)
    end
    if link:sub(-2) == ']]' then
        link = link:sub(1, -3)
    end
    link = strip(link)
    if link:sub(1, 1) == '[' or link:sub(-1) == ']' then
        quit('m_lnk_brack', original)
    end
    if link == '' then
        link = nil
    elseif name then
        local l = ulower(usub(link, 1, 1)) .. usub(link, 2)
        local n = ulower(usub(name, 1, 1)) .. usub(name, 2)
        if l == n then
            link = nil  -- link == name, ignoring case of first letter
        end
    end
    return link, customary
end

local function make_link(data)
    -- Return a function which, when called, stores a table that defines a
    -- link exception. The table is stored in data (also a table).
    local links_index = {}  -- to detect attempts to define a link twice
    return function (utype, fields)
        -- Store a table defining a unit.
        -- This is for a unit such as 'kg' that has a linked article
        -- different from what is defined for the base unit ('g').
        -- Throw an error if a problem occurs.
        local ucode = fields[1]
        local link = clean_link(fields[2])
        if empty(ucode) then
            quit('m_lnk_miss')
        end
        if empty(link) then
            quit('m_lnk_none', ucode)
        end
        if #fields ~= 2 then
            quit('m_lnk_two', ucode)
        end
        local unit_table = get_unit(ucode)
        if not unit_table then
            quit('m_lnk_undef', ucode)
        end
        if utype ~= unit_table.utype then
            quit('m_lnk_type', ucode)
        end
        local symbol = unit_table.symbol
        if empty(symbol) then
            quit('m_lnk_sym', ucode)
        end
        if links_index[ucode] then
            quit('m_lnk_dup', ucode)
        end
        links_index[ucode] = link
        table.insert(data, { symbol = symbol, link = link })
    end
end

local function clean_scale(scale)
    -- Return cleaned scale as a string, after evaluating any expression.
    -- It would be better to retain scale expressions like "5/9" so that
    -- the expression is evaluated on the server and maintains the full
    -- resolution of the server. However, there are many such expressions
    -- in the table of all units, and it seems pointless to require the
    -- server to evaluate all of them just to do one convert.
    if empty(scale) then
        quit('m_scl_miss')
    end
    assert(type(scale) == 'string', 'Bug: scale has an unexpected type')
    scale = string.gsub(scale, ',', '')  -- remove comma separators
    if tonumber(scale) then  -- not an expression
        return scale
    end
    local status, value = pcall(evaluate, scale)
    if not (status and type(value) == 'number') then
        quit('m_scl_bad', scale)
    end
    local result = string.format('%.17g', value)
    if result:find('[#n]') then
        -- Lua can give results like "#INF" while Scribunto gives "inf". Either is an error.
        quit('m_scl_oflow', scale)
    end
    -- Omit redundant zeros from results like '1.2e-005'.
    -- Do not bother looking for results like '1.2e+005' as none occur in practice.
    local lhs, zeros, rhs = result:match('^(.-e%-)(0+)(.*)')
    if zeros then
        result = lhs .. rhs
    end
    return result
end

local function add_alias_optional_fields(unit, start, fields, target)
    -- Inspect fields[i] for i = start, start+1 ..., and extract any
    -- definitions appropriate for an alias or "per", and add them to unit.
    -- For an alias, target is a valid unit; for a "per", target is nil.
    -- Throw error if encounter an invalid entry.
    for i = start, #fields do
        local field = fields[i]
        if not empty(field) then
            local lhs, rhs = field:match('^%s*(.-)%s*=%s*(.-)%s*$')
            local good
            if not empty(rhs) then
                for _, item in ipairs({ 'sp', 'default', 'link', 'multiplier', 'symbol', 'symlink' }) do
                    if lhs == item then
                        if item == 'sp' then
                            if rhs == 'us' then
                                unit.sp_us = true
                                good = true
                            end
                        elseif item == 'link' then
                            local tlink
                            if target then
                                tlink = target[item]
                            end
                            local link, customary = clean_link(rhs, tlink)
                            if link then
                                unit[item] = link
                            end
                            if customary then
                                unit.customary = customary
                            end
                            good = true
                        elseif item == 'symlink' then
                            local pos1 = rhs:find('[[', 1, true)
                            local pos2 = rhs:find(']]', 1, true)
                            if not (pos1 and pos2 and (pos1 < pos2)) then
                                quit('m_als_link', unit.unitcode)
                            end
                            unit.symlink = rhs
                            good = true
                        elseif item == 'multiplier' then
                            if not tonumber(rhs) then
                                quit('m_als_mul', unit.unitcode, rhs)
                            end
                            unit[item] = rhs
                            good = true
                        else
                            if target and rhs == target[item] then
                                quit('m_als_same', item, unit.unitcode)
                            end
                            unit[item] = rhs
                            good = true
                        end
                        break
                    end
                end
            end
            if not good then
                quit('m_als_bad', field)
            end
        end
    end
end

local function make_alias(fields, ucode, utype, symbol)
    -- Return a new alias unit, or return nil if symbol is not already
    -- defined as the unit code of the target unit.
    -- Throw an error if invalid.
    local target = get_unit(symbol)
    if not target then
        return nil
    end
    local unit = { unitcode = ucode, utype = utype, target = symbol }
    add_alias_optional_fields(unit, 3, fields, target)
    if alias_index[ucode] then
        quit('m_als_dup', ucode)
    else
        alias_index[ucode] = unit
    end
    if target.utype ~= utype then
        quit('m_als_type', ucode)
    end
    return unit
end

local function make_per(fields, ucode, utype, symbol)
    -- Return a new "per" unit, or return nil if symbol is not of form "x/y".
    -- Throw an error if invalid.
    -- The top, bottom unit codes are checked later, after all units are defined.
    local top, bottom = symbol:match('^(.-)/(.*)$')
    if not top then
        return nil
    end
    local unit = { unitcode = ucode, utype = utype, per = { strip(top), strip(bottom) } }
    add_alias_optional_fields(unit, 3, fields)
    if per_index[ucode] then
        quit('m_per_dup', ucode)
    else
        per_index[ucode] = unit
    end
    return unit
end

local function make_unit(data)
    -- Return a function which, when called, stores a table that defines a
    -- single unit. The table is stored in data (also a table).
    local fieldnames = {
        -- Fields in the Conversions section are assumed to be in the following order.
        'unitcode',
        'symbol',
        'sym_us',
        'scale',
        'extra',
        'name1',
        'name2',
        'name1_us',
        'name2_us',
        'prefixes',
        'default',
        'link',
    }
    return function (utype, fields)
        -- Store a table defining a unit.
        -- Throw an error if a problem occurs.
        local ucode, symbol = fields[1], fields[2]
        if empty(utype) then
            quit('m_miss_type')
        end
        if empty(ucode) then
            quit('m_miss_code')
        end
        if empty(symbol) then
            quit('m_miss_sym')
        end
        local prefix = symbol:sub(1, 1)
        if prefix == '~' or prefix == '=' or prefix == '!' or prefix == '*' then
            if symbol:sub(1, 2) == '==' then
                prefix = symbol:sub(1, 2)
            end
            symbol = strip(symbol:sub(#prefix + 1))  -- omit prefix and any following whitespace
            fields[2] = symbol
        else
            prefix = nil  -- not a valid prefix
        end
        if prefix == '=' or prefix == '==' then
            -- ucode is an alias (a fake unit code used in a convert template), or
            -- defines a "per" unit like "$/acre" or "BTU/h".
            -- For an alias, symbol is the unit code of the actual unit.
            -- For a "per", symbol is of form "x/y" where x and y are unit codes,
            -- or x is a recognized currency symbol and y is a unit code.
            -- Checking that x and y are valid is deferred until all units have
            -- been defined so, for example, "BTU/h" can be defined before "h".
            local unit
            if prefix == '=' then
                unit = make_alias(fields, ucode, utype, symbol)
            else
                unit = make_per(fields, ucode, utype, symbol)
            end
            if not unit then
                -- Do not define an alias in terms of another alias.
                quit('m_als_undef', symbol)
            end
            insert_unique_unit(data, unit, units_index)
            return
        elseif prefix == '!' then
            -- ucode may be incorrectly entered as a unit code.
            -- symbol is a message saying what unit code should be used.
            local unit = { unitcode = ucode, shouldbe = symbol }
            insert_unique_unit(data, unit, nil)
            return
        end
        -- Make the unit.
        local unit = { utype = utype }
        for i, name in ipairs(fieldnames) do
            if not empty(fields[i]) then
                unit[name] = fields[i]
            end
        end
        -- Remove redundancy from unit.
        if unit.sym_us == symbol then
            unit.sym_us = nil
        end
        local prefixes = unit.prefixes
        local name1, name2 = unit.name1, unit.name2
        if name1 then
            if name1 == symbol and not prefixes then
                -- A unit which takes an SI prefix must not have a nil name because,
                -- for example, the name for "kW" = "kilo" .. "watt" (name for "W").
                -- The "not prefixes" test is needed for bnwiki where the
                -- watt unit has the same name and symbol.
                unit.name1 = nil
            end
        else
            name1 = symbol
        end
        if name2 then
            if name2 == name1 .. plural_suffix then
                unit.name2 = nil
            end
        else
            name2 = name1 .. plural_suffix
        end
        local name1_us, name2_us = unit.name1_us, unit.name2_us
        if name1_us then
            if name1_us == name1 then
                unit.name1_us = nil
            end
        end
        if name2_us then
            if unit.name1_us then
                if name2_us == unit.name1_us .. plural_suffix then
                    unit.name2_us = nil
                end
            elseif name2_us == name2 then
                unit.name2_us = nil
            end
        end
        -- Other changes to unit.
        unit.scale = clean_scale(unit.scale)
        local extra = unit.extra
        if not empty(extra) then
            -- Set appropriate fields for a unit that needs more than a simple
            -- multiplication by a ratio of unit scales to convert values.
            unit.iscomplex = true
            if extra == 'volume/length' then
                unit.invert = 1
            elseif extra == 'length/volume' then
                unit.invert = -1
            elseif specials.utype[utype] == 'type_temperature' then
                unit.offset = extra
            elseif extra == 'invert' then
                unit.invert = -1
            else
                unit.builtin = extra
            end
        end
        if prefix == '~' then
            -- Magic code for units like "acre" where the symbol is not really a
            -- symbol, and output should use the singular or plural name instead.
            unit.usename = 1
        elseif prefix == '*' then
            -- Magic code for units like "pitch" which have a symbol that is the same as
            -- another unit with entries defined in the default or link exceptions tables.
            unit.defkey = ucode  -- key for default exceptions
            unit.linkey = ucode  -- key for link exceptions
        end
        local name_for_link
        if prefixes then
            if prefixes == 'SI' then
                unit.prefixes = 1
            elseif prefixes == 'SI2' then
                unit.prefixes = 2
            elseif prefixes == 'SI3' then
                unit.prefixes = 3
            else
                quit('m_pfx_bad', prefixes)
            end
        else
            -- Only units which do not accept SI prefixes have name_for_link set.
            -- That is because, for example, if set name_for_link = name1 for unit g,
            -- then the link is "kilogram" for kg, and "yottagram" for Yg, and so on
            -- for all prefixes. That might be desirable for some units, but not all.
            name_for_link = name1
        end
        unit.link, unit.customary = clean_link(unit.link, name_for_link)
        if prefixes then
            -- The SI prefix is always at the start (position = 1) for symbol and sym_us.
            -- However, each name (name1, name2, name1_us, name2_us) can have the SI prefix
            -- at any position, and that position can be different for each name.
            -- For enwiki, the only units with names where the prefix is not at the start
            -- are "square metre" and "cubic metre" ("square meter" and "cubic meter" for sp=us).
            -- Some other wikis want the flexibility that the prefix position can be different
            -- so the position is stored as nil (if always 1), or N (an integer, if always N),
            -- or a string of four comma-separated numbers such as "5,7,9,11" which means the
            -- prefix position for (name1, name2, name1_us, name2_us) is (5, 7, 9, 11)
            -- respectively.
            local name1, name1_us = unit.name1, unit.name1_us  -- after redundancy removed
            if not name1 then
                quit('m_pfx_name')
            end
            local positions = collection()
            for i, k in ipairs({ 'name1', 'name2', 'name1_us', 'name2_us' }) do
                local name = unit[k]
                local pos
                if name then
                    pos = name:find('%s', 1, true)
                    if pos then
                        unit[k] = name:sub(1, pos - 1) .. name:sub(pos + 2)
                    end
                elseif i == 2 or i == 3 then
                    pos = positions[1]
                elseif i == 4 then
                    pos = positions[unit.name1_us and 3 or 2]
                end
                positions:add(pos or 1)
            end
            local pos = positions[1]
            for i = 2, positions.n do
                if pos ~= positions[i] then
                    pos = '"' .. positions:join(',') .. '"'
                    break
                end
            end
            if pos ~= 1 then
                unit.prefix_position = pos
            end
            for _, name in ipairs({ 'symbol', 'sym_us', 'name1', 'name1_us', 'name2', 'name2_us' }) do
                unit['_' .. name] = unit[name]
                unit[name] = nil  -- force call to __index metamethod so any SI prefix can be handled
            end
        end
        for name, v in pairs(unit) do
            -- Reject if a string field includes "%s" (should not occur after above).
            if type(v) == 'string' and v:find('%s', 1, true) then
                quit('m_percent_s', name)
            end
        end
        insert_unique_unit(data, unit, units_index)
    end
end

local function make_combination(data)
    -- Return a function which, when called, stores a table that defines a
    -- single combination unit. The table is stored in data (also a table).
    return function (utype, fields)
        -- Store a table defining a unit.
        -- This is for a combination unit that specifies more than one output.
        -- The target units must be defined first.
        -- Throw an error if a problem occurs.
        local unit = { utype = utype, combination = {} }
        for i, v in ipairs(fields) do
            if i == 1 then  -- unitcode
                if v == '' then
                    quit('m_cmb_miss')
                end
                unit.unitcode = v
            elseif v == '' then
                -- Ignore empty fields.
            else
                local target = get_unit(v)
                if not target then
                    quit('m_cmb_undef', v, unit.unitcode)
                end
                if target.utype ~= utype then
                    quit('m_cmb_type', v, unit.unitcode)
                end
                table.insert(unit.combination, v)
            end
        end
        if #unit.combination < 2 then
            quit(#unit.combination == 0 and 'm_cmb_none' or 'm_cmb_one', unit.unitcode)
        end
        insert_unique_unit(data, unit, units_index)
    end
end

local function make_perunit(data)
    -- Return a function which, when called, stores a table that defines a
    -- fixup for an automatic per unit. The table is stored in data (also a table).
    local pertype_index = {}  -- to detect attempts to define a fixup twice
    return function (utype, fields)
        -- Store a table to define a fixup.
        -- Typos or other errors in the input are not detected!
        -- Parameter utype is ignored (it is nil).
        -- Throw an error if a problem occurs.
        local lhs, rhs, link, multiplier
        for i, v in ipairs(fields) do
            if v == '' then
                -- Ignore empty fields.
            elseif i == 1 then
                lhs = v  -- like "length/time"
            elseif i == 2 then
                rhs = v  -- like "speed"
            elseif i == 3 then
                link = v
            elseif i == 4 then
                if not tonumber(v) then
                    quit('m_per_inv')
                end
                multiplier = v
            else
                quit('m_per_inv')
            end
        end
        if lhs and (rhs or link or multiplier) then
            if link or multiplier then
                local parts = collection()
                if rhs then
                    parts:add('utype = "' .. rhs .. '"')
                end
                if link then
                    parts:add('link = "' .. link .. '"')
                end
                if multiplier then
                    parts:add('multiplier = ' .. multiplier)
                end
                rhs = '{ ' .. parts:join(', ') .. ' }'
            else
                rhs = '"' .. rhs .. '"'
            end
            if pertype_index[lhs] then
                quit('m_per_dup', lhs)
            end
            pertype_index[lhs] = rhs
            table.insert(data, { lhs = lhs, rhs = rhs })
        else
            quit('m_per_inv')
        end
    end
end

local function make_varname(data)
    -- Return a function which, when called, stores a table that defines a
    -- variable name for a unit. The table is stored in data (also a table).
    return function (utype, fields)
        -- Set or update an entry in the data table to record that a unit has a variable name.
        -- This is for slwiki where a unit name depends on the value.
        -- The target units must be defined first.
        -- Parameter utype is ignored (it is nil).
        -- Throw an error if a problem occurs.
        local count = #fields
        if count ~= 5 then
            quit('m_var_cnt')
        end
        local ucode
        local names = {}
        for i = 1, count do
            local v = fields[i]
            if empty(v) then
                quit('m_var_miss')
            end
            if i == 1 then  -- unitcode
                ucode = v
                if not get_unit(v) then
                    quit('m_var_undef', v)
                end
            else
                table.insert(names, v)
            end
        end
        if data[ucode] then
            quit('m_var_dup', ucode)
        end
        data[ucode] = table.concat(names, '!')
    end
end

local function reversed(t)
    -- Return a numbered table in reverse order.
    local reversed, count = {}, #t
    for i = 1, count do
        reversed[i] = t[count + 1 - i]
    end
    return reversed
end

local function make_inputmultiple(data)
    -- Return a function which, when called, stores a table that defines a
    -- single composite (multiple input) unit. The table is stored in data (also a table).
    return function (utype, fields)
        -- Set or update an entry in the data table to record that a unit
        -- accepts subdivisions to make a composite input unit like '|2|ft|6|in'.
        -- The target units must be defined first.
        -- Throw an error if a problem occurs.
        local unitcode  -- dummy code required for simplicity, but which is not used in output
        local alternate_code  -- an alternative unit code can be specified to replace convert input
        local fixed_name  -- a fixed name can be specified to replace the unit's normal symbol/name
        local default_code
        local ucodes, scales = {}, {}
        for i, v in ipairs(fields) do
            -- 1=composite, 2=ucode1, 3=ucode2, 4=default, 5=alternate, 6=name
            if i == 1 then
                if v == '' then
                    quit('m_cmp_miss')
                end
                unitcode = v
            elseif 2 <= i and i <= 5 then
                if not (i == 5 and v == '') then
                    local target = get_unit(v, (i == 4) and utype or nil)  -- the default may be an auto combination
                    if not target then
                        quit('m_cmp_undef', v, unitcode)
                    end
                    if target.utype ~= utype then
                        quit('m_cmp_type', v, unitcode)
                    end
                    if i < 4 then
                        if not target.scale then
                            quit('m_mul_std', v, unitcode)
                        end
                        table.insert(ucodes, v)
                        table.insert(scales, target.scale)
                    elseif i == 4 then
                        default_code = v
                    else
                        if scales[#scales] ~= target.scale then
                            quit('m_cmp_scale', v, unitcode)
                        end
                        alternate_code = v
                    end
                end
            elseif i == 6 then
                if v ~= '' then
                    fixed_name = v
                end
            else
                quit('m_cmp_many', unitcode)
            end
        end
        if #ucodes ~= 2 then
            quit('m_cmp_two', unitcode)
        end
        if not default_code then
            quit('m_cmp_def', unitcode)
        end
        -- Component units must be specified from most-significant to least-significant,
        -- and each ratio of a pair of scales must be very close to an integer.
        -- Currently, there will be exactly two scales and one ratio.
        local ratios, count = {}, #scales
        for i = 1, count do
            local scale = tonumber(scales[i])
            if scale == nil or scale <= 0 then
                quit('m_cmp_inval', unitcode, scales[i])
            end
            scales[i] = scale
        end
        for i = 1, count - 1 do
            local ratio = scales[i] / scales[i + 1]
            local rounded = math.floor(ratio + 0.5)
            if rounded < 2 then
                quit('m_cmp_order', unitcode)
            end
            if math.abs(ratio - rounded)/ratio > 1e-6 then
                quit('m_cmp_int', unitcode)
            end
            ratios[i] = rounded
        end
        local text = { tostring(ratios[1]) }
        local function add_text(key, value)
            table.insert(text, string.format('%s = %q', key, value))
        end
        if default_code then
            add_text('default', default_code)
        end
        if alternate_code then
            add_text('unit', alternate_code)
        end
        if fixed_name then
            add_text('name', fixed_name)
        end
        local subdiv = string.format('["%s"] = { %s }', ucodes[2], table.concat(text, ', '))
        local main_code = ucodes[1]
        local item = data[main_code]
        if item then
            table.insert(item.subdivs, subdiv)
        else
            data[main_code] = { subdivs = { subdiv } }
        end
    end
end

local function make_outputmultiple(data)
    -- Return a function which, when called, stores a table that defines a
    -- single multiple output unit. The table is stored in data (also a table).
    return function (utype, fields)
        -- Store a table defining a unit.
        -- This is for a multiple unit like 'ydftin' (result in yards, feet, inches).
        -- The target units must be defined first.
        -- Throw an error if a problem occurs.
        local unit = { utype = utype }
        local ucodes, scales = {}, {}
        for i, v in ipairs(fields) do
            if i == 1 then  -- unitcode
                if v == '' then
                    quit('m_mul_miss')
                end
                unit.unitcode = v
            elseif v == '' then
                -- Ignore empty fields.
            else
                local target = get_unit(v)
                if not target then
                    quit('m_mul_undef', v, unit.unitcode)
                end
                if target.utype ~= utype then
                    quit('m_mul_type', v, unit.unitcode)
                end
                if not target.scale then
                    quit('m_mul_std', v, unit.unitcode)
                end
                table.insert(ucodes, v)
                table.insert(scales, target.scale)
            end
        end
        if #ucodes < 2 then
            quit(#ucodes == 0 and 'm_mul_none' or 'm_mul_one', unit.unitcode)
        end
        -- Component units must be specified from most-significant to least-significant
        -- (so scale values will be in descending order),
        -- and each ratio of a pair of scales must be very close to an integer.
        -- The componenets and ratios are stored in reverse order (least significant first).
        -- This script stores a unit scale as a string (might be an expression like "5/9"),
        -- but scales in a multiple are handled as numbers (should never be expressions).
        local ratios, count = {}, #scales
        for i = 1, count do
            local scale = tonumber(scales[i])
            if scale == nil or scale <= 0 then
                quit('m_mul_scale', unit.unitcode, scales[i])
            end
            scales[i] = scale
        end
        for i = 1, count - 1 do
            local ratio = scales[i] / scales[i + 1]
            local rounded = math.floor(ratio + 0.5)
            if rounded < 2 then
                quit('m_mul_order', unit.unitcode)
            end
            if math.abs(ratio - rounded)/ratio > 1e-6 then
                quit('m_mul_int', unit.unitcode)
            end
            ratios[i] = rounded
        end
        unit.combination = reversed(ucodes)
        unit.multiple = reversed(ratios)
        insert_unique_unit(data, unit, units_index)
    end
end

-- To make updating the data module easier, this script inserts a preamble
-- and a postamble so the result can be used to replace the whole page.
local data_preamble = [=[
-- Conversion data used by [[Module:Convert]] which uses mw.loadData() for
-- read-only access to this module so that it is loaded only once per page.
-- See [[:en:Template:Convert/Transwiki guide]] if copying to another wiki.
--
-- These data tables follow:
--   all_units           all properties for a unit, including default output
--   default_exceptions  exceptions for default output ('kg' and 'g' have different defaults)
--   link_exceptions     exceptions for links ('kg' and 'g' have different links)
--
-- These tables are generated by a script which reads the wikitext of a page that
-- documents the required properties of each unit; see [[:en:Module:Convert/doc]].
]=]

local data_postamble = [=[
return {
    all_units = all_units,
    default_exceptions = default_exceptions,
    link_exceptions = link_exceptions,
    per_unit_fixups = per_unit_fixups,
}]=]

local out_unit_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above).   --
---------------------------------------------------------------------------
local all_units = {]]

local out_unit_suffix = [[
}
]]

local out_default_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above).   --
---------------------------------------------------------------------------
local default_exceptions = {
    -- Prefixed units with a default different from that of the base unit.
    -- Each key item is a prefixed symbol (unitcode for engineering notation).]]

local out_default_suffix = [[
}
]]

local out_default_item = [[
    ["{symbol}"] = "{default}",]]

local out_link_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above).   --
---------------------------------------------------------------------------
local link_exceptions = {
    -- Prefixed units with a linked article different from that of the base unit.
    -- Each key item is a prefixed symbol (not unitcode).]]

local out_link_suffix = [[
}
]]

local out_link_item = [[
    ["{symbol}"] = "{link}",]]

local out_perunit_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above).   --
---------------------------------------------------------------------------
local per_unit_fixups = {
    -- Automatically created per units of form "x/y" may have their unit type
    -- changed, for example, "length/time" is changed to "speed".
    -- Other adjustments can also be specified.]]

local out_perunit_suffix = [[
}
]]

local out_perunit_item = [[
    ["{lhs}"] = {rhs},]]

local combination_specification = {     -- pure combination like 'm ft', or a multiple like 'ftin'
    'combination',
    'multiple',
    'utype',
}

local alias_specification = {
    'target',
    'symbol',
    'sp_us',
    'default',
    'link',
    'symlink',
    'customary',
    'multiplier',
}

local per_specification = {
    'per',
    'symbol',
    'sp_us',
    'utype',
    'invert',
    'iscomplex',
    'default',
    'link',
    'symlink',
    'customary',
    'multiplier',
}

local shouldbe_specification = {
    'shouldbe',
}

local unit_specification = {
    '_name1',
    '_name1_us',
    '_name2',
    '_name2_us',
    '_symbol',
    '_sym_us',
    'prefix_position',
    'name1',
    'name1_us',
    'name2',
    'name2_us',
    'varname',
    'symbol',
    'sym_us',
    'usename',
    'usesymbol',
    'utype',
    'alttype',
    'builtin',
    'scale',
    'offset',
    'invert',
    'iscomplex',
    'istemperature',
    'exception',
    'prefixes',
    'default',
    'subdivs',
    'defkey',
    'linkey',
    'link',
    'customary',
    'sp_us',
}

local no_quotes = {
    combination = true,
    customary = true,
    multiple = true,
    multiplier = true,
    offset = true,
    per = true,
    prefix_position = true,
    scale = true,
    subdivs = true,
}

local function add_unit_lines(results, unit, spec)
    -- Add lines of Lua source to define a unit to the results collection.
    local function add_line(line)
        -- Had planned to replace sequences of spaces with 4-column tabs here
        -- (because the CodeEditor now assumes the use of such tabs).
        -- However, 4-column tabs are only visible when editing a module
        -- with browser scripting and the CodeEditor enabled, and that is rare.
        -- A module is usually viewed (with 8-column tabs), and some indents
        -- would be messed up unless 8-column tabs are used. Therefore,
        -- have decided to simply replace 8 spaces at start of line with a single
        -- tab which reduces the size of the module, and is correct for viewing.
        if line:sub(1, 8) == string.rep(' ', 8) then
            line = '\t' .. line:sub(9)
        end
        results:add(line)
    end
    local first_item = '    ["' .. unit.unitcode .. '"] = {'
    local last_item  = '    },'
    add_line(first_item)
    for _, k in ipairs(spec) do
        local v = unit[k]
        if v then
            local want_quotes = (type(v) == 'string' and not no_quotes[k])
            if type(v) == 'boolean' then
                v = tostring(v)
            elseif type(v) == 'number' or k == 'scale' then
                -- Replace results like '1e-006' with '1e-6'.
                v = string.gsub(tostring(v), '(e[+-])0+([1-9].*)', '%1%2', 1)
            elseif type(v) ~= 'string' then
                quit('m_ftl_type', unit.unitcode)
            end
            local fmt = string.format('%8s%%-9s= %%%s,', '', want_quotes and 'q' or 's')
            add_line(fmt:format(k, v))
        end
    end
    add_line(last_item)
end

local function numbered_table_as_string(data, unit)
    local t = {}
    for _, v in ipairs(data) do
        if type(v) == 'string' then
            table.insert(t, '"' .. v .. '"')
        elseif type(v) == 'number' then
            table.insert(t, tostring(v))
        else
            quit('m_ftl_type', unit.unitcode)
        end
    end
    return '{ ' .. table.concat(t, ', ') .. ' }'
end

local function extract_heading(line)
    -- Return n, s where n = heading level number (nil if none), and
    -- s = heading text (with leading/trailing whitespace removed).
    local pattern = '^(==+)%s*(.-)%s*(==+)%s*$'
    local before, heading, after = line:match(pattern)
    if heading and #heading > 0 then
        -- Don't bother checking if before == after.
        return #before, heading
    end
end

local function fields(line)
    -- Return a numbered table of fields split from line.
    -- Items are delimited by "||".
    -- Each item has leading/trailing whitespace removed, and any encoded pipe
    -- characters are decoded.
    -- The second field (for symbol when processing units) is adjusted to
    -- remove any "colspan" at the front of lines like:
    -- "| unitcode || colspan="11" | !Text to display for an error message".
    local t = {}
    line = line .. "||"  -- to get last field
    for item in line:gmatch("%s*(.-)%s*||") do
        table.insert(t, (item:gsub('&#124;', '|')))
    end
    if t[2] then
        local cleaned = t[2]:match('^%s*colspan%s*=.-|%s*(.*)$')
        if cleaned then
            t[2] = cleaned
        end
    end
    return t
end

local function prepare_section(maker, lines, section, maxerrors, need_section, need_utype)
    -- Process the first level-two section with the given section name
    -- in the given table of lines of wikitext.
    -- If successful, maker inserts each item into a table.
    -- Otherwise, an error is thrown.
    local skip = true
    local errors = collection()
    local utype  -- unit type (from level-three heading)
    local nbsp = '\194\160'  -- nonbreaking space is utf-8 encoded as hex c2 a0
    for linenumber, line in ipairs(lines) do
        if skip then
            -- Skip down to and including the starting heading.
            local level, heading = extract_heading(line)
            if level == 2 and heading == section then
                skip = false
            end
        else
            -- Accummulate unit definitions.
            local c1 = line:sub(1, 1)
            local c2 = line:sub(2, 2)
            if c1 == '|' and not (c2 == '-' or c2 == '}') then
                if need_utype and empty(utype) then
                    quit('m_hdg_lev3', line)
                end
                if line:find(nbsp, 1, true) then
                    -- For example, "acre ft" does not work if it contains nbsp.
                    add_warning('m_wrn_nbsp', linenumber)
                end
                local ok, msg = pcall(maker, utype, fields(line:sub(2)))
                if not ok then
                    if msg:sub(-1) == '.' then msg = msg:sub(1, -2) end
                    errors:add(msg .. message('m_line_num', linenumber))
                    if errors.n >= maxerrors then
                        break
                    end
                end
            else
                local level, heading = extract_heading(line)
                if level == 3 then
                    utype = ulower(heading)
                elseif level == 2 then
                    break
                end
            end
        end
    end
    if skip and need_section then
        quit('m_hdg_lev2', section)
    end
    if errors.n > 0 then
        error(errors:join(), 0)
    end
end

local function get_page_lines(page_title)
    -- Read the wikitext of the page at the given title; split the text into
    -- lines with leading and trailing space removed from each line.
    -- Return a numbered table of the lines, or throw an error.
    if empty(page_title) then
        quit('m_no_title')
    end
    local t = mw.title.new(page_title)
    if t then
        local content = t:getContent()
        if content then
            if content:sub(-1) ~= '\n' then
                content = content .. '\n'
            end
            local lines = collection()
            for line in string.gmatch(content, '[\t ]*(.-)[\t\r ]*\n') do
                lines:add(line)
            end
            return lines
        end
    end
    quit('m_ftl_read', page_title)
end

local function prepare_data(conversion_data_title, maxerrors, is_sandbox)
    -- Read the page of conversion data, and process the wikitext
    -- in the sections with wanted level-two headings.
    -- Return units, defaults, links (three tables).
    -- Throw an error if a problem occurs.
    local composites, defaults, links, units, perunits, varnames = {}, {}, {}, {}, {}, {}
    local sections = {
        { 'overrides'   , make_override      , overrides , 0 },
        { 'conversions' , make_unit          , units     , 0 },
        { 'outmultiples', make_outputmultiple, units     , 0 },
        { 'combinations', make_combination   , units     , 0 },
        { 'inmultiples' , make_inputmultiple , composites, 0 },  -- after all units defined so default will be defined
        { 'defaults'    , make_default       , defaults  , 0 },
        { 'links'       , make_link          , links     , 0 },
        { 'perunits'    , make_perunit       , perunits  , 1 },
        { 'varnames'    , make_varname       , varnames  , 1 },
    }
    local lines = get_page_lines(conversion_data_title)
    for _, section in ipairs(sections) do
        local heading = mtext.section_names[section[1]]
        local maker = section[2](section[3])
        local code = section[4]
        local need_section, need_utype
        if code == 0 and not is_sandbox then
            need_section = true
        end
        if code == 0 then
            need_utype = true
        end
        prepare_section(maker, lines, heading, maxerrors, need_section, need_utype)
    end
    check_all_defaults(units, maxerrors)
    check_all_pers(units, maxerrors)
    update_units(units, composites, varnames)
    return units, defaults, links, perunits
end

local function _makeunits(results, data_title, text_title)
    -- Read the wikitext for the conversion data.
    -- Append output to given results collection, or throw error if a problem.
    text_code = require(text_title)
    for _, name in ipairs({ 'SIprefixes', 'eng_scales', 'currency' }) do
        if type(text_code[name]) ~= 'table' then
            quit('m_ftl_table', text_title, name)
        end
    end
    local translation = text_code.translation_table
    if translation then
        if translation.plural_suffix then
            plural_suffix = translation.plural_suffix
        end
        local ts = translation.specials
        if ts then
            if ts.utype then
                specials.utype = ts.utype
            end
            if ts.ucode then
                specials.ucode = ts.ucode
            end
        end
        local tm = translation.mtext
        if tm then
            if tm.section_names then
                mtext.section_names = tm.section_names
            end
            if tm.titles then
                mtext.titles = tm.titles
            end
            if tm.messages then
                mtext.messages = tm.messages
            end
        end
    end
    local is_sandbox
    local conversion_data_title = mtext.titles.conversion_data
    if data_title and conversion_data_title ~= data_title then
        conversion_data_title = data_title
        if is_test_run then
            is_sandbox = true
            data_preamble = nil
            data_postamble = nil
            out_unit_prefix = 'local all_units = {'
            out_unit_suffix = '}'
            out_default_prefix = '\nlocal default_exceptions = {'
            out_default_suffix = '}'
            out_default_item = '\t["{symbol}"] = "{default}",'
            out_link_prefix = '\nlocal link_exceptions = {'
            out_link_suffix = '}'
            out_link_item = '\t["{symbol}"] = "{link}",'
            out_perunit_prefix = '\nlocal per_unit_fixups = {'
            out_perunit_suffix = '}'
            out_perunit_item = '\t["{lhs}"] = {rhs},'
        end
    end
    local units, defaults, links, perunits = prepare_data(conversion_data_title, 20, is_sandbox)
    if data_preamble then
        results:add(data_preamble)
    end
    results:add(out_unit_prefix)
    for _, unit in ipairs(units) do
        local spec
        if unit.target then
            spec = alias_specification
        elseif unit.per then
            spec = per_specification
            unit.per = numbered_table_as_string(unit.per, unit)
        elseif unit.shouldbe then
            spec = shouldbe_specification
        elseif unit.combination then
            spec = combination_specification
            unit.combination = numbered_table_as_string(unit.combination, unit)
            if unit.multiple then
                unit.multiple = numbered_table_as_string(unit.multiple, unit)
            end
        else
            spec = unit_specification
        end
        add_unit_lines(results, unit, spec)
    end
    results:add(out_unit_suffix)
    for _, t in ipairs({
        { defaults, out_default_prefix, out_default_item, out_default_suffix },
        { links   , out_link_prefix   , out_link_item   , out_link_suffix    },
        { perunits, out_perunit_prefix, out_perunit_item, out_perunit_suffix } }) do
        local data, prefix, item, suffix = t[1], t[2], t[3], t[4]
        if #data > 0 or not is_sandbox then
            results:add(prefix)
            for _, unit in ipairs(data) do
                results:add((item:gsub('{([%w_]+)}', unit)))
            end
            results:add(suffix)
        end
    end
    if data_postamble then
        results:add(data_postamble)
    end
end

local function makeunits(frame)
    local args = frame.args
    local results = collection()
    local ok, msg = pcall(_makeunits, results, args[1], args[2] or 'Módulo:Convert/text')
    if not ok then
        results:add(message('m_error'))
        results:add('')
        results:add(msg)
    end
    local warn = ''
    if warnings.n > 0 then
        warn = message('m_warning') .. '\n\n' .. warnings:join() .. '\n\n'
    end
    -- Pre tags returned by a module are html tags, not like wikitext <pre>...</pre>.
    -- The following renders the text as is, and preserves tab characters.
    return '<pre>\n' .. mw.text.nowiki(warn .. results:join()) .. '\n</pre>\n'
end

return { makeunits = makeunits }