Module:Diff/sandbox

-----------------------------------------------------------------------------
-- Provides functions for getting the difference between two pieces of text.
--
-- (c) 2007, 2008  Yuri Takhteyev ([email protected])
-- (c) 2007 Hisham Muhammad
-- Adapted to MediaWiki Lua originally by User:Ebrahim
--
-- License: MIT/X, see http://sputnik.freewisdom.org/en/License
-----------------------------------------------------------------------------

--- Localization of globals
local lstring = mw.ustring
local ltext = mw.text
local table_insert = table.insert
local table_remove = table.remove
local html_create = mw.html.create
local p = {}

local SKIP_SEPARATOR = true -- a constant
local STYLES = 'src = "Module:Diff/styles.css"'

-- Token statuses
local IN = "in"
local OUT = "out"
local SAME = "same"

-----------------------------------------------------------------------------
--- Split a string into tokens.
-- (Adapted from Gavin Kistner's split on http://lua-users.org/wiki/SplitJoin.)
--
--- @param text string Input string to be split.
--- @param separator string|'%s+'? the separator pattern (default: whitespace)
--- @param skip_separator boolean? don't include the separator in the results.
--- @return table token_list list of tokens.
-----------------------------------------------------------------------------
local function split(text, separator, skip_separator)
	skip_separator = skip_separator or SKIP_SEPARATOR
	separator = separator or "%s+"
	local parts = {}
	local start = 1
	local split_start, split_end = lstring.find(text, separator, start)
	while split_start do
		table_insert(parts, lstring.sub(text, start, split_start - 1))
		if not skip_separator then
			table_insert(parts, lstring.sub(text, split_start, split_end))
		end
		start = split_end + 1
		split_start, split_end = lstring.find(text, separator, start)
	end
	if lstring.sub(text, start) ~= "" then
		table_insert(parts, lstring.sub(text, start))
	end
	return parts
end

-----------------------------------------------------------------------------
--- Derives the longest common subsequence of two strings.  This is a faster
--- implementation than one provided by stdlib.  Submitted by Hisham Muhammad.
-- (The algorithm was taken from:
-- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence)
--
--- NOTE: High-level design choice.
--- This implementation makes use of nested metatables to create a sparse,
--- auto-initializing 2D array. This is clever but complex and slightly
--- unconventional in order to create the LCS matrix only as needed.
--- It's a neat trick, but it may be slower than a more traditional approach,
--- especially for large strings.  It also uses more memory, since the matrix
--- is not a sparse array and the matrix is of size (m+1)×(n+1), where m and n
--- are the lengths of the two strings t1 and t2.
--- While the metatable approach is compact, it increases mental overhead too.
--- Sparse array initialization and explicitly managing state with tables would
--- just as helpful and could improve performance.
---
--- @param t1 any first string.
--- @param t2 any second string.
--- @return table lcs_matrix least common subsequence as a matrix.
-----------------------------------------------------------------------------
local function quick_LCS(t1, t2)
	local m = #t1
	local n = #t2

	-- Build the dynamic programming matrix C on demand using nested metatables.
	-- This creates a "sparse array" structure where C[i][j] automatically
	-- initializes to 0 when first accessed, avoiding the need for explicit
	-- initialization of a potentially large (m+1) x (n+1) table.
	local C = {}
	local setmetatable = setmetatable
	local mt_tbl = {
		__index = function(t, k)
			t[k] = 0
			return 0
		end,
	}
	local mt_C = {
		__index = function(t, k)
			local tbl = {}
			setmetatable(tbl, mt_tbl)
			t[k] = tbl
			return tbl
		end,
	}
	setmetatable(C, mt_C)
	local max = math.max

	-- Loops run from 1 to m and 1 to n.
	for i = 1, m do
		local ci1 = C[i + 1]
		local ci = C[i]
		for j = 1, n do
			-- Compare the tokens directly using 1-based indices (t1[i] and t2[j]).
			if t1[i] == t2[j] then
				ci1[j + 1] = ci[j] + 1
			else
				ci1[j + 1] = max(ci1[j], ci[j + 1])
			end
		end
	end
	return C
end

-----------------------------------------------------------------------------
--- Formats an inline diff as HTML, with <ins> and <del> tags.
--
--- @param tokens table table of {token, status} key-value pairs.
--- @return string diff_buffer HTML string.
-----------------------------------------------------------------------------
local function format_as_html(tokens)
	local diff_buffer = ""
	local token, status
	for _, token_record in ipairs(tokens) do
		token = ltext.nowiki(token_record[1])
		status = token_record[2]
		if status == "in" then
			diff_buffer = diff_buffer .. "<ins>" .. token .. "</ins>"
		elseif status == "out" then
			diff_buffer = diff_buffer .. "<del>" .. token .. "</del>"
		else
			diff_buffer = diff_buffer .. token
		end
	end
	return diff_buffer
end

-----------------------------------------------------------------------------
-- Returns a diff of two strings in tuples/two-element array, where the key
-- is a token and the value the token's status ("same", "in", "out").
--
--- @param old string "old" text string
--- @param new string "new" text string
--- @param separator string|'%s+'? separator pattern (default: whitespace).
--- @return table<string, 'same'|'in'|'out'> token_list Two-element array/tuples of annotated tokens.
-----------------------------------------------------------------------------
local function diff(old, new, separator)
	assert(old)
	assert(new)
	local tbl_new = split(new, separator)
	local tbl_old = split(old, separator)

	-- First, compare the beginnings and ends of strings to remove the common
	-- prefix and suffix.
	local prefix = "" -- common text in the beginning
	local suffix = "" -- common text in the end

	while tbl_old[1] and tbl_old[1] == tbl_new[1] do
		local token = table_remove(tbl_old, 1)
		table_remove(tbl_new, 1)
		prefix = prefix .. token
	end
	while tbl_old[#tbl_old] and tbl_old[#tbl_old] == tbl_new[#tbl_new] do
		local token = table_remove(tbl_old)
		table_remove(tbl_new)
		suffix = token .. suffix
	end

	-- Setup the table that will store the diff in reverse order.
	-- ... (rest of the function is now fine as it uses tbl_old/tbl_new)

	local rev_diff = {}
	-- Define local helper functions to manage token insertion into rev_diff.
	local function put(token, type)
		table_insert(rev_diff, { token, type })
	end
	local function ins(token)
		put(token, IN)
	end
	local function del(token)
		put(token, OUT)
	end
	local function same(token)
		if token then
			put(token, SAME)
		end
	end

	-- Put the suffix as the first token (we are storing the diff in the
	-- reverse order)
	same(suffix)
	-- Define a function that will scan the LCS matrix backwards and build the
	-- diff output recursively.
	local function get_diff(C, tbl_old, tbl_new, i, j)
		local old_i = tbl_old[i]
		local new_j = tbl_new[j]
		if i >= 1 and j >= 1 and old_i == new_j then
			same(old_i)
			return get_diff(C, tbl_old, tbl_new, i - 1, j - 1)
		else
			local Cij1 = C[i][j - 1]
			local Ci1j = C[i - 1][j]
			if j >= 1 and (i == 0 or Cij1 >= Ci1j) then
				ins(new_j)
				return get_diff(C, tbl_old, tbl_new, i, j - 1)
			elseif i >= 1 and (j == 0 or Cij1 < Ci1j) then
				del(old_i)
				return get_diff(C, tbl_old, tbl_new, i - 1, j)
			end
		end
	end

	-- Then call it.
	get_diff(quick_LCS(tbl_old, tbl_new), tbl_old, tbl_new, #tbl_old + 1, #tbl_new + 1)
	-- Put the prefix in at the end
	same(prefix)

	-- Reverse the diff.
	local tbl_diff = {}
	for i = #rev_diff, 1, -1 do
		table_insert(tbl_diff, rev_diff[i])
	end
	tbl_diff.to_html = format_as_html
	return tbl_diff
end

local function attach_styles(base)
	local templatestyles_tag
	if not templatestyles_tag then
		templatestyles_tag = lstring.format(
			"%s %s",
			mw.getCurrentFrame():extensionTag({
				name = "templatestyles",
				content = "",
				args = { STYLES },
			}),
			base
		)
	else
		return lstring.format("%s%s", templatestyles_tag, base)
	end
end

-----------------------------------------------------------------------------
-- Wiki diff style, currently just for a line
-----------------------------------------------------------------------------
local function wikiDiff(old, new, separator)
	local tokens = diff(old, new, separator)
	local root = html_create("")

	local token, status
	local invalidStyles = "-webkit-border-end-width: 1px;"
		.. "-webkit-border-start-width: 4px;"
		.. "-moz-border-end-width: 1px;"
		.. "-moz-border-start-width: 4px;"

	local tr = root
		:tag("table")
		:addClass("mdl-diff")
		:attr("role", "none")
		-- :attr("aria-label", "text difference, table, two columns," ..
		-- " first column content removed, second column content added")
		:tag(
			"tr"
		)

	tr:tag("td"):addClass("mdl-diff-marker"):attr("aria-hidden", "true"):wikitext("−")

	local deleted = tr
		:tag("td")
		:attr("aria-label", "Removed")
		:addClass("mdl-diff-deletedline")
		:addClass("mdl-diff-changedline")
		:css(invalidStyles) -- Workaround for a bug in Firefox where border styles are ignored on table cells
		:addClass("monospace")
		:addClass("monospaced")
		:tag("div")
		:addClass("mdl-diff-div")

	for _, token_record in ipairs(tokens) do
		token = ltext.nowiki(token_record[1]):gsub("\n", "&#10;") -- Force all newlines to encode to avoid linter issues
		status = token_record[2]
		if status == OUT then
			deleted:tag("del"):addClass("mdl-diff-deletedline"):addClass("mdl-diff-change-inline"):wikitext(token)
		elseif status == SAME then
			deleted:wikitext(token)
		end
	end

	tr:tag("td"):addClass("mdl-diff-marker"):attr("aria-hidden", "true"):wikitext("+")

	local inserted = tr:tag("td")
		:attr("aria-label", "Added")
		:addClass("mdl-diff-addedline")
		:addClass("mdl-diff-changedline")
		:css(invalidStyles)
		:addClass("monospace")
		:addClass("monospaced")
		:tag("div")
		:addClass("mdl-diff-div")

	for _, token_record in ipairs(tokens) do
		token = ltext.nowiki(token_record[1]):gsub("\n", "&#10;") -- Force all newlines to encode to avoid linter issues
		status = token_record[2]
		if status == IN then
			inserted:tag("ins"):addClass("mdl-diff-addedline"):addClass("mdl-diff-change-inline"):wikitext(token)
		elseif status == SAME then
			inserted:wikitext(token)
		end
	end

	return attach_styles(tostring(root))
end

local function main(frame)
	return wikiDiff(
		ltext.decode(ltext.unstrip(frame.args[1])),
		ltext.decode(ltext.unstrip(frame.args[2])),
		frame.args[3] or "[%s%.:-]+"
	)
end

p = {
	diff = diff,
	wikiDiff = wikiDiff,
	main = main,
}

return p

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.