DOKUMEN123.COM

local p = {}

-- Helper to mimic PHP's explode(delimiter, string, 2)
local function split_once(str, delimiter)
    local pos = mw.ustring.find(str, delimiter, 1, true)
    if pos then
        return mw.ustring.sub(str, 1, pos - 1), mw.ustring.sub(str, pos + mw.ustring.len(delimiter))
    end
    return str, nil
end

function p.parseTablesToData(text)
    local lines = mw.text.split(text, "\n")
    
    local parsed_data = {}
    local current_table = nil
    local current_row = nil
    
    for _, outLine in ipairs(lines) do
        local line = mw.text.trim(outLine)
        
        -- Equivalent to skipping via 'continue'
        if line ~= '' then
            local first_char = mw.ustring.sub(line, 1, 1)
            local first_two = mw.ustring.sub(line, 1, 2)
            
            local indent_match, attributes_match = mw.ustring.match(line, "^(:*)%s*%{%|(.*)$")
            
            if indent_match then
                -- Start a new table
                current_table = {
                    attributes = mw.text.unstrip(attributes_match),
                    rows = {}
                }
                table.insert(parsed_data, current_table)
                current_row = nil
                
            elseif not current_table then
                -- Not inside a table, do nothing (skip)
                
            elseif first_two == '|}' then
                -- End the current table
                current_table = nil
                current_row = nil
                
            elseif first_two == '|-' then
                -- Start a new row
                line = mw.ustring.gsub(line, "^|%-+", "")
                current_row = {
                    attributes = mw.text.unstrip(line),
                    cells = {}
                }
                table.insert(current_table.rows, current_row)
                
            elseif first_char == '|' or first_char == '!' or first_two == '|+' then
                -- Process cells, headers, or captions
                
                -- Handle implicit rows (if cell data appears before a |- tag)
                if not current_row and first_two ~= '|+' then
                    current_row = { attributes = "", cells = {} }
                    table.insert(current_table.rows, current_row)
                end
                
                if first_two == '|+' then
                    first_char = '+'
                    line = mw.ustring.sub(line, 3)
                    current_table.caption = mw.text.trim(mw.text.unstrip(line))
                else
                    line = mw.ustring.sub(line, 2)
                    
                    if first_char == '!' then
                        line = mw.ustring.gsub(line, "!!", "||")
                    end
                    
                    local cells = mw.text.split(line, "||", true)
                    
                    for _, cell_str in ipairs(cells) do
                        local cell_type = (first_char == '!') and "th" or "td"
                        local cell_attr_str, cell_content_str = split_once(cell_str, "|")
                        
                        local cell_obj = {
                            type = cell_type,
                            attributes = "",
                            content = ""
                        }
                        
                        -- Replicate PHP's logic to check for [[ links or -{ language converter tags
                        -- to ensure a pipe inside a link isn't mistaken for an attribute delimiter.
                        if not cell_content_str or mw.ustring.match(cell_attr_str, "%[%[") or mw.ustring.match(cell_attr_str, "%-%{") then
                            cell_obj.content = mw.text.trim(cell_str)
                        else
                            cell_obj.attributes = mw.text.unstrip(cell_attr_str)
                            cell_obj.content = mw.text.trim(cell_content_str)
                        end
                        
                        table.insert(current_row.cells, cell_obj)
                    end
                end
            end
        end
    end
    
    return parsed_data
end

function p.testLoadPage(frame)
    -- 1. Define the page title to load. 
    -- Note: Testing this on the full US Reps page immediately will yield a massive dump string.
    -- I recommend creating a subpage with a small test table first.
    local page_title = "List of current United States representatives#List of representatives" 
    
    local titleObj = mw.title.new(page_title)
    if not titleObj then return "Error: Invalid title." end

    -- 2. Fetch the raw wikitext
    local content = titleObj:getContent()
    if not content then return "Error: Page not found or empty." end

    -- 3. Run the parser
    local parsed_data = p.parseTablesToData(content)[6]

    -- 4. Dump the resulting Lua table as a preformatted string so you can read the structure
    return "<pre>\n" .. mw.dumpObject(parsed_data) .. "\n</pre>"
end

function p.tallyParties(frame)
    -- 1. Load the page and parse the text
    local page_title = "List of current United States representatives#List of representatives" 
    local titleObj = mw.title.new(page_title)
    if not titleObj then return "Error: Invalid title." end

    local content = titleObj:getContent()
    if not content then return "Error: Page not found or empty." end

    local parsed_data = p.parseTablesToData(content)[6]
    if not parsed_data then return "Error: Table not found." end

    -- 2. Build a dynamic column map from the header row
    local column_map = {}
    local current_col = 1
    local header_row = parsed_data.rows[1]
    
    for _, cell in ipairs(header_row.cells) do
        -- Clean the header text (removes <ref> tags and hidden spans)
        local header_text = mw.ustring.gsub(cell.content, "<[^>]+>", "")
        header_text = mw.text.trim(header_text)
        
        -- Check if this header spans multiple columns (e.g., colspan=2)
        local colspan = 1
        local colspan_match = mw.ustring.match(cell.attributes, "colspan%s*=%s*[\"']?(%d+)[\"']?")
        if colspan_match then
            colspan = tonumber(colspan_match)
        end
        
        column_map[header_text] = {
            start_index = current_col,
            span = colspan,
            -- For Party, the text name is in the right-most cell of the span
            end_index = current_col + colspan - 1 
        }
        
        current_col = current_col + colspan
    end

    -- Abort if we can't find the Party column
    if not column_map["Party"] then return "Error: Could not find 'Party' column." end
    
    -- Extract the exact index we need to check in the data rows
    local party_col_index = column_map["Party"].end_index

    -- 3. Initialize the tally counters
    local tallies = {
        Democratic = 0,
        Republican = 0,
        Independent = 0,
        Vacant = 0
    }

    -- 4. Iterate through the data rows
    for i = 2, #parsed_data.rows do
        local row = parsed_data.rows[i]
        local cells = row.cells
        
        if #cells > 0 then
            
            -- Check for vacant seats (vacant rows use a massive colspan, yielding fewer cells)
            if #cells < party_col_index then
                assert(#cells == 2, "Row had " .. #cells .. " cells. Expected 2.")
                assert(cells[2].content == "''Vacant''", "Expected ''Vacant'', got: " .. tostring(cells[2].content))
                tallies.Vacant = tallies.Vacant + 1
            else
                -- 5. Extract the party using the dynamic index
                local party = cells[party_col_index].content
                
                party = mw.ustring.gsub(party, "<[^>]+>", "")
                party = mw.text.trim(party)
                
                if tallies[party] ~= nil then
                    tallies[party] = tallies[party] + 1
                else
                    tallies[party] = 1
                end
            end
            
        end
    end

    -- 6. Format the output
    local output = "'''Current House Tallies:'''\n"
    output = output .. "* Democratic: " .. tallies.Democratic .. "\n"
    output = output .. "* Republican: " .. tallies.Republican .. "\n"
    output = output .. "* Independent: " .. tallies.Independent .. "\n"
    output = output .. "* Vacant: " .. tallies.Vacant .. "\n"
    
    return output
end

return p
DOKUMEN123.COM

Module:Sandbox/Blippy1998

Content Disclaimer