Module:SST/hosts

require('strict')
local p = {}

--[[--------------------------< S H A R E D   H E L P E R S >--------------------------------]]

local function resolve_id_pool(val)
    if type(val) == "table" then
        local count = 0
        for _ in ipairs(val) do
            count = count + 1
        end
        if count == 0 then return nil end
        local index = math.random(1, count)
        return val[index]
    end
    return val
end

local function get_safe_ia_id(raw_id)
    raw_id = resolve_id_pool(raw_id)
    if not raw_id or raw_id == "" then return nil end
    if mw.ustring.match(raw_id, "^http") then
        local extracted = mw.ustring.match(raw_id, "/details/([^/?#]+)")
        if extracted then return extracted end
        return mw.ustring.match(raw_id, "([^/]+)$")
    end
    return raw_id
end

local function get_range_start(val)
    if not val then return nil end
    return mw.ustring.match(val, "^([%w_]+)") or mw.text.trim(val)
end

-- Converts Roman numerals to integers for Google Books PR pages
local function roman_to_int(roman)
    local map = {I=1, V=5, X=10, L=50, C=100, D=500, M=1000}
    roman = string.upper(roman)
    local total = 0
    local prev_val = 0
    
    -- Iterate backwards through the roman numeral
    for i = #roman, 1, -1 do
        local char = string.sub(roman, i, i)
        local val = map[char]
        if not val then return nil end -- Failsafe: Not a valid roman numeral character
        
        if val < prev_val then
            total = total - val
        else
            total = total + val
        end
        prev_val = val
    end
    
    return total
end

local function resolve_ia_search(explicit_search, display_text, user_search, config_search)
    if explicit_search and explicit_search ~= "" then return explicit_search end
    local override = user_search
    if not override or override == "" then override = config_search end
    local implicit_search = mw.ustring.match(display_text, "([%w%.]+)") or display_text
    implicit_search = mw.ustring.gsub(implicit_search, "[f%+–—%-%.]+$", "")
    if override then
        if override == 'none' then return nil end
        if override == 'pagenum' then return implicit_search end
        return override
    end
    return implicit_search
end

-- Extracts the first wikilink target and returns it along with the plain text display string
local function unpack_wikilink(text)
    if not text or type(text) ~= 'string' or not mw.ustring.match(text, "%[%[") then 
        return nil, text 
    end
    
    local target = mw.ustring.match(text, "%[%[([^|%]]+)%|?[^%]]*%]%]")
    local clean_text = mw.ustring.gsub(text, "%[%[([^|%]]+)%|?([^%]]*)%]%]", function(t, d)
        return (d and mw.text.trim(d) ~= "") and d or t
    end)
    
    return target, clean_text
end

--[[--------------------------< P A R S E R >------------------------------------------------]]

local function parse_segment(segment)
    if mw.ustring.match(segment, "^%[http") then
        segment = mw.ustring.match(segment, "%s+([^%]]+)%]%s*$") or segment
    end

    local search = segment:match("%[(.-)%]")
    local clean_segment = segment:gsub("%s*%[.-%]%s*", "") 
    
    -- 1. Try matching a quoted machine ID first (for Wikisource chapters with spaces/hyphens like @"Chapter 1")
    local display, machine_id, rest = clean_segment:match("^(.-)@(\"[^\"]+\")(.*)$")
    
    if not display then
        -- 2. Standard match: Alphanumerics and dots. It will STOP immediately at a hyphen.
        display, machine_id, rest = clean_segment:match("^(.-)@([%w%.]+)(.*)$")
    end
    
    if display then
        local clean_display = mw.text.trim(display .. rest)
        return clean_display, mw.text.trim(machine_id), search
    else
        return mw.text.trim(clean_segment), nil, search
    end
end

local function process_page_string(page_str, engine, host_data, frame_args, is_chapter)
    page_str = mw.ustring.gsub(page_str, ";", ",")
    local segments = mw.text.split(page_str, ",")
    local processed_segments = {}
    local clean_segments = {}
    local first_bare_url = nil
    local made_link = false
    
    for _, segment in ipairs(segments) do
        local trimmed = mw.text.trim(segment)
        if trimmed ~= "" then
            local display, machine, search = parse_segment(trimmed)
            table.insert(clean_segments, display) 
            
            if type(engine.build_link) == "function" then
                local formatted_link, bare_url = engine.build_link(host_data, display, machine, search, frame_args)
                table.insert(processed_segments, formatted_link)
                if bare_url then
                    made_link = true
                    if not first_bare_url then first_bare_url = bare_url end
                end
            else
                table.insert(processed_segments, display) 
            end
        end
    end
    
    if is_chapter then return first_bare_url, made_link end
    
    return table.concat(processed_segments, ", "), made_link, first_bare_url, table.concat(clean_segments, ", ")
end

--[[--------------------------< T H E   E N G I N E S >--------------------------------------]]
-- Placed before the Orchestrator to ensure tripwires can safely assign p.Physical 

-- 1. INTERNET ARCHIVE (IA)
p.IA = {
    build_link = function(host_data, display, machine, search, frame_args)
        local active_id = get_safe_ia_id(host_data.id or host_data.ia_id)
        if not active_id then return display, nil end
        
        local final_target = machine or get_range_start(display)
        if not final_target then return display, nil end
        
        local u_search = frame_args['ia-search']
        local c_search = host_data.iasearch
        local search_term = resolve_ia_search(search, display, u_search, c_search)

        local display_mode = frame_args['ia-display'] or host_data.iadisplay
        local endpoint = (display_mode == "full screen") and "stream" or "details"
        
        local url = string.format("https://archive.org/%s/%s/page/%s/mode/2up", endpoint, active_id, final_target)
        if search_term and search_term ~= "" then
            url = url .. "?q=" .. mw.uri.encode(search_term, "QUERY")
        end
        
        return string.format("[%s %s]", url, display), url
    end,

    build_base_url = function(host_data, frame_args)
        local active_id = get_safe_ia_id(host_data.id or host_data.ia_id)
        local active_query = (host_data.query and host_data.query ~= "") and host_data.query or nil

        if active_id then
            local display_mode = frame_args['ia-display'] or host_data.iadisplay
            local endpoint = (display_mode == "full screen") and "stream" or "details"
            local url = "https://archive.org/" .. endpoint .. "/" .. active_id .. "/"
            
            local fallback_search = frame_args['ia-search'] or host_data.iasearch
            if fallback_search and fallback_search ~= "none" and fallback_search ~= "pagenum" and fallback_search ~= "" then
                url = url .. "?q=" .. mw.uri.encode(fallback_search, "QUERY")
            end
            return url, nil
            
        elseif active_query then
            return "https://archive.org/search?query=" .. mw.uri.encode(active_query, "QUERY"), nil
        end
        return nil, nil
    end
}

-- 2. PROJECT GUTENBERG
p.Gutenberg = {
    build_link = function(host_data, display, machine, search, frame_args)
        local id = resolve_id_pool(host_data.id)
        if not id or id == "" then return display, nil end
        
        local raw_target = machine or display
        local target = get_range_start(raw_target)
        if not target then return display, nil end
        
        target = mw.ustring.gsub(target, "^u(%d+)", "%1")
        
        if mw.ustring.match(target, "^%d+$") then
            target = "page-" .. target
        end
        
        local url = string.format("https://gutenberg.org/cache/epub/%s/pg%s-images.html#%s", id, id, target)
        return string.format("[%s %s]", url, display), url
    end,

    build_base_url = function(host_data, frame_args)
        local id = resolve_id_pool(host_data.id)
        if not id or id == "" then return nil, nil end
        return "https://www.gutenberg.org/ebooks/" .. id, nil
    end
}

-- 3. HATHITRUST
p.Hathi = {
    build_link = function(host_data, display, machine, search, frame_args)
        local id = resolve_id_pool(host_data.id)
        if not id or id == "" then return display, nil end
        
        -- STRICT SSTS ENFORCEMENT: HathiTrust requires a machine sequence (seq)
        if not machine then return display, nil end
        
        local target = get_range_start(machine)
        target = mw.ustring.gsub(target, "^s(%d+)", "%1")
        
        local url = string.format("https://babel.hathitrust.org/cgi/pt?id=%s&view=1up&seq=%s", id, target)
        
        if search and search ~= "" then
            url = url .. "&q1=" .. mw.uri.encode(search, "QUERY")
        end
        
        return string.format("[%s %s]", url, display), url
    end,

    build_base_url = function(host_data, frame_args)
        local id = resolve_id_pool(host_data.id)
        if not id or id == "" then return nil, nil end
        return "https://hdl.handle.net/2027/" .. id, nil
    end
}

-- 4. WIKISOURCE
p.Wikisource = {
    build_link = function(host_data, display, machine, search, frame_args)
        local id = resolve_id_pool(host_data.id)
        if not id or id == "" then return display, nil end
        
        -- STRICT SSTS ENFORCEMENT: Only link if an explicit @machine leaf is provided
        if not machine then return display, nil end
        
        local target = get_range_start(machine)
        target = mw.ustring.gsub(target, '^"(.-)"$', "%1")
        target = mw.ustring.gsub(target, "^w(.+)", "%1")
        target = mw.ustring.gsub(target, " ", "_")
        
        local file_id = mw.ustring.gsub(id, " ", "_")
        local bare_url = string.format("https://en.wikisource.org/wiki/Page:%s/%s", file_id, target)
        local formatted_link = string.format("[[s:Page:%s/%s|%s]]", file_id, target, display)
        
        return formatted_link, bare_url
    end,

    build_base_url = function(host_data, frame_args)
        local id = resolve_id_pool(host_data.id)
        if not id or id == "" then return nil, nil end
        
        local clean_id = mw.ustring.gsub(id, " ", "_")
        clean_id = mw.ustring.gsub(clean_id, "^Page:", "")
        clean_id = mw.ustring.gsub(clean_id, "%.%l%l%l%l?$", "")
        
        return "https://en.wikisource.org/wiki/" .. clean_id, nil
    end
}

-- 5. GOOGLE BOOKS
p.GBook = {
    build_link = function(host_data, display, machine, search, frame_args)
        local id = resolve_id_pool(host_data.id)
        if not id or id == "" then return display, nil end
        
        local raw_target = machine or display
        local target = get_range_start(raw_target)
        if not target then return display, nil end
        
        if machine then
            target = mw.ustring.gsub(target, "^g([%w_]+)", "%1")
        else
            if mw.ustring.match(target, "^%d+$") then
                -- Pure Arabic numeral (e.g., "42")
                target = "PA" .. target
            elseif mw.ustring.match(string.lower(target), "^[ivxlcdm]+$") then
                -- Pure Roman numeral (e.g., "xiii")
                local int_val = roman_to_int(target)
                if int_val then
                    target = "PR" .. tostring(int_val)
                else
                    target = "PA" .. target -- Failsafe
                end
            else
                -- Unrecognized text or ranges (e.g., "42-45" or "cover"). 
                -- Extract the first digit for the base URL parameter.
                local first_num = mw.ustring.match(target, "^%d+")
                if first_num then
                    target = "PA" .. first_num
                end
            end
        end
        
        local url = string.format("https://books.google.com/books?id=%s&pg=%s", id, target)
        
        if search and search ~= "" then
            url = url .. "&dq=" .. mw.uri.encode(search, "QUERY")
        end
        
        return string.format("[%s %s]", url, display), url
    end,

    build_base_url = function(host_data, frame_args)
        local id = resolve_id_pool(host_data.id)
        if not id or id == "" then return nil, nil end
        return "https://books.google.com/books?id=" .. id, nil
    end
}

-- 6. STATIC WEB LINK
p.Web = {
    build_link = function(host_data, display, machine, search, frame_args)
        return display, nil
    end,

    build_base_url = function(host_data, frame_args)
        if not host_data.url or host_data.url == "" then return nil, nil end
        return host_data.url, host_data.access
    end
}

-- 7. PHYSICAL BOOK (NO DIGITAL HOST)
p.Physical = {
    build_link = function(host_data, display, machine, search, frame_args)
        return display, nil
    end,

    build_base_url = function(host_data, frame_args)
        return nil, nil
    end
}

--[[--------------------------< O R C H E S T R A T O R   H E L P E R S >--------------------]]

local function extract_core_isbn(isbn_str)
    if not isbn_str then return nil end
    local clean = mw.ustring.gsub(isbn_str, "[^%dX%x]", "")
    clean = mw.ustring.upper(clean)
    -- If it's a 13-digit ISBN starting with 978, the core is the next 9 digits
    local core13 = mw.ustring.match(clean, "^978(%d%d%d%d%d%d%d%d%d)")
    if core13 then return core13 end
    -- Otherwise grab the first 9 digits
    return mw.ustring.match(clean, "^(%d%d%d%d%d%d%d%d%d)")
end

local function evaluate_mismatches(citeArgs, frame_args)
    local mismatch_detected = false
    -- citeArgs was loaded with the shard's cite_params in the router
    local shard_year = citeArgs['year']
    local user_year = frame_args['year'] or frame_args['date']
    
    -- 1. Check for Year conflicts
    if shard_year and user_year then
        local s_yr = mw.ustring.match(shard_year, "%d%d%d%d")
        local u_yr = mw.ustring.match(user_year, "%d%d%d%d")
        if s_yr and u_yr and s_yr ~= u_yr then
            mismatch_detected = true
        end
    end
    
    -- 2. Check for ISBN conflicts
    local shard_isbn = citeArgs['isbn']
    local user_isbn = frame_args['isbn']
    if user_isbn then
        if shard_isbn then
            local s_core = extract_core_isbn(shard_isbn)
            local u_core = extract_core_isbn(user_isbn)
            if s_core and u_core and s_core ~= u_core then
                mismatch_detected = true
            end
        else
            -- User provided an ISBN, but shard has none. This is a later reprint.
            mismatch_detected = true
        end
    end
    
    return mismatch_detected, shard_year
end

local function apply_mismatch_rescues(citeArgs, mismatch_detected, shard_year)
    if not mismatch_detected then return end
    
    -- 1. Push shard year to orig-year so historical info isn't lost
    local user_year = citeArgs['year'] or citeArgs['date']
    if shard_year and not citeArgs['orig-year'] and not citeArgs['orig-date'] then
        local s_yr = mw.ustring.match(shard_year, "%d%d%d%d")
        local u_yr = user_year and mw.ustring.match(user_year, "%d%d%d%d")
        
        -- Only push to orig-year if the user explicitly provided a DIFFERENT year
        if u_yr and s_yr ~= u_yr then
            citeArgs['orig-year'] = shard_year
        end
    end

    -- We intentionally DO NOT delete the year to appease pre-1970 ISBN CS1 errors.
    -- Deleting the year also destroys the CITEREF Harvard anchors used by {{sfn}}.
end

local function process_page_arguments(engine, host_data, citeArgs, frame_args)
    local internal_keys = {
        ['ia-search'] = true,
        ['ia-display'] = true,
        ['chapter-page'] = true,
        ['title-page'] = true  
    }
    local has_deep_link = false

    -- CS1 Collision Shield: If the user provides a specific in-source locator, 
    -- instantly delete the shard's default locators to prevent red errors.
    if frame_args['page'] or frame_args['pages'] or frame_args['p'] or frame_args['pp'] or frame_args['at'] or frame_args['sheet'] or frame_args['sheets'] then
        citeArgs['page'] = nil
        citeArgs['pages'] = nil
        citeArgs['at'] = nil
        citeArgs['sheet'] = nil
        citeArgs['sheets'] = nil
    end

    for k, v in pairs(frame_args) do
        local val = mw.text.trim(v)
        if val ~= "" then
            local k_lower = string.lower(k)
            
            if k_lower == 'page' or k_lower == 'pages' or k_lower == 'p' or k_lower == 'pp' then
                local apply_article_url = (citeArgs['_template'] == 'cite encyclopedia' or citeArgs['_template'] == 'cite journal') and not host_data.ws_base
                local formatted_str, link_generated, bare_url, clean_str = process_page_string(val, engine, host_data, frame_args, false)
                local final_key = (k_lower == 'page' or k_lower == 'p') and 'page' or 'pages'
                
                if apply_article_url then
                    if bare_url then
                        citeArgs['url'] = bare_url
                        has_deep_link = true
                    end
                    
                    if not citeArgs[final_key] or k_lower == final_key then
                        citeArgs[final_key] = formatted_str
                    end
                else
                    if not citeArgs[final_key] or k_lower == final_key then
                        citeArgs[final_key] = formatted_str
                    end
                    if link_generated then has_deep_link = true end
                end
                
            elseif k_lower == 'chapter-page' then
                local bare_url, _ = process_page_string(val, engine, host_data, frame_args, true)
                if bare_url then citeArgs['chapter-url'] = bare_url end
                
            elseif k_lower == 'title-page' then
                local bare_url, _ = process_page_string(val, engine, host_data, frame_args, true)
                if bare_url then citeArgs['url'] = bare_url end
                
            elseif not internal_keys[k_lower] then
                -- Protect shard data: prevent routing integers from clobbering defined volumes/editions
                if (k_lower == 'volume' or k_lower == 'edition') and (citeArgs[k] or citeArgs[k_lower]) then
                    -- Preserve the shard's text, ignore the user's routing input
                else
                    citeArgs[k] = val 
                    -- CS1 Collision Prevention: Clean up legacy shard dates
                    if k_lower == 'date' then citeArgs['year'] = nil end
                    if k_lower == 'year' then citeArgs['date'] = nil end
                end
            end
        end
    end

    -- Auto-infer chapter-url
    local user_chap = citeArgs['chapter'] or citeArgs['article'] or citeArgs['entry']
    if user_chap and not citeArgs['chapter-url'] and not citeArgs['article-url'] and not citeArgs['entry-url'] then
        local pgs = frame_args['page'] or frame_args['pages'] or frame_args['p'] or frame_args['pp']
        if pgs then
            local auto_chapter_page = mw.ustring.match(pgs, "^%s*([%w%._]+)")
            if auto_chapter_page then
                local bare_url, _ = process_page_string(auto_chapter_page, engine, host_data, frame_args, true)
                if bare_url then citeArgs['chapter-url'] = bare_url end
            end
        end
    end

    -- CS1 Encyclopedia URL Shift
    if citeArgs['_template'] == 'cite encyclopedia' and citeArgs['url'] and user_chap and not citeArgs['title'] then
        if not citeArgs['chapter-url'] then
            citeArgs['chapter-url'] = citeArgs['url']
        end
        citeArgs['url'] = nil
    end

    return has_deep_link
end

local function apply_wikisource_formatting(host_data, citeArgs)
    if host_data.ws_base then
        if citeArgs['title'] then
            citeArgs['title'] = "[[s:" .. host_data.ws_base .. "/" .. citeArgs['title'] .. "|" .. citeArgs['title'] .. "]]"
            
            if citeArgs['encyclopedia'] and not mw.ustring.match(citeArgs['encyclopedia'], "%[%[") then
                citeArgs['encyclopedia'] = "[[s:" .. host_data.ws_base .. "|" .. citeArgs['encyclopedia'] .. "]]"
            end
            
            if citeArgs['journal'] and not mw.ustring.match(citeArgs['journal'], "%[%[") then
                citeArgs['journal'] = "[[s:" .. host_data.ws_base .. "|" .. citeArgs['journal'] .. "]]"
            end
            
            citeArgs['title-link'] = nil
            citeArgs['url'] = nil -- Prevent CS1 from throwing "extraneous wikilink" error
        else
            citeArgs['title-link'] = "s:" .. host_data.ws_base
            citeArgs['url'] = nil -- FIX: Prevent URL-wikilink conflict
        end
    end
end

--[[--------------------------< M A I N   O R C H E S T R A T O R >--------------------------]]

function p.process(engine, host_data, citeArgs, frame_args)
    -- ==========================================================
    -- 1. SAFETY TRIPWIRES (Detect Mismatches)
    -- ==========================================================
    local mismatch_detected, shard_year = evaluate_mismatches(citeArgs, frame_args)
    if mismatch_detected then
        host_data = {} -- Safe empty table to sever links instead of mutating read-only table
        engine = p.Physical
    end
    
    -- ==========================================================
    -- 1.5. ENCYCLOPEDIA ROUTING & DOWNGRADE
    -- ==========================================================
    if citeArgs['_template'] == 'cite encyclopedia' then
        local user_article = frame_args['article'] or frame_args['entry'] or frame_args['title'] or frame_args['chapter']
        local has_article = user_article and mw.text.trim(user_article) ~= ""

        if has_article then
            if citeArgs['title'] and not citeArgs['encyclopedia'] then
                if citeArgs['title-link'] then
                    citeArgs['encyclopedia'] = "[[" .. citeArgs['title-link'] .. "|" .. citeArgs['title'] .. "]]"
                    citeArgs['title-link'] = nil
                else
                    citeArgs['encyclopedia'] = citeArgs['title']
                end
                citeArgs['title'] = nil
            end
        else
            citeArgs['_template'] = 'cite book'
            if citeArgs['encyclopedia'] and not citeArgs['title'] then
                local encyc_link, clean_encyc = unpack_wikilink(citeArgs['encyclopedia'])
                citeArgs['title'] = clean_encyc
                if encyc_link and not citeArgs['title-link'] then
                    citeArgs['title-link'] = encyc_link
                end
                citeArgs['encyclopedia'] = nil
            end
        end
    end
    -- ==========================================================
    -- 2. UNPACK WIKILINKED TITLES
    -- ==========================================================
    local extracted_link, clean_title = unpack_wikilink(citeArgs['title'])
    if extracted_link then
        if not citeArgs['title-link'] then citeArgs['title-link'] = extracted_link end
        citeArgs['title'] = clean_title
    end

    -- ==========================================================
    -- 3. ROUTE ARGUMENTS & DEEP LINKS
    -- ==========================================================
    local has_deep_link = process_page_arguments(engine, host_data, citeArgs, frame_args)

    -- ==========================================================
    -- 4. BASE URL FALLBACK
    -- ==========================================================
    if not citeArgs['url'] and not citeArgs['chapter-url'] and not citeArgs['title-link'] then
        if not has_deep_link and type(engine.build_base_url) == "function" then
            local url, access = engine.build_base_url(host_data, frame_args)
            if url then citeArgs['url'] = url end
            if access then citeArgs['url-access'] = access end
        end
    end

    -- ==========================================================
    -- 5. WIKISOURCE FORMATTING
    -- ==========================================================
    apply_wikisource_formatting(host_data, citeArgs)
    
    -- ==========================================================
    -- 6. SAFETY TRIPWIRE RESCUES
    -- ==========================================================
    apply_mismatch_rescues(citeArgs, mismatch_detected, shard_year)
    
    -- ==========================================================
    -- 7. CS1 CONFLICT RESOLUTION
    -- ==========================================================
    if citeArgs['url'] and citeArgs['title-link'] then
        citeArgs['title-link'] = nil
    end

    return citeArgs
end

return p

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.