--[[ ----------------------------------

Lua module implementing the {{webarchive}} template. 

A merger of the functionality of three templates: {{wayback}}, {{webcite}} and {{cite archives}}
	
]]


--[[--------------------------< D E P E N D E N C I E S >------------------------------------------------------
]]

require('Module:No globals');
local getArgs = require ('Module:Arguments').getArgs;


--[[--------------------------< F O R W A R D   D E C L A R A T I O N S >--------------------------------------
]]

local categories = {};															-- category names
local config = {};																-- global configuration settings
local digits = {};																-- for i18n; table that translates local-wiki digits to western digits
local err_warn_msgs = {};														-- error and warning messages
local excepted_pages = {};
local month_num = {};															-- for i18n; table that translates local-wiki month names to western digits
local prefixes = {};															-- service provider tail string prefixes
local services = {};															-- archive service provider data from
local s_text = {};																-- table of static text strings used to build final rendering
local uncategorized_namespaces = {};											-- list of namespaces that we should not categorize
local uncategorized_subpages = {};												-- list of subpages that should not be categorized


--[[--------------------------< P A G E   S C O P E   I D E N T I F I E R S >----------------------------------
]]

local non_western_digits;														-- boolean flag set true when data.digits.enable is true
local this_page = mw.title.getCurrentTitle();

local track = {};																-- Associative array to hold tracking categories
local ulx = {};																	-- Associative array to hold template data 


--[[--------------------------< S U B S T I T U T E >----------------------------------------------------------

Populates numbered arguments in a message string using an argument table.

]]

local function substitute (msg, args)
	return args and mw.message.newRawMessage (msg, args):plain() or msg;
end


--[[--------------------------< tableLength >-----------------------

Given a 1-D table, return number of elements

]]

local function tableLength(T)
	local count = 0
	for _ in pairs(T) do count = count + 1 end
	return count
end


--[=[-------------------------< M A K E _ W I K I L I N K >----------------------------------------------------

Makes a wikilink; when both link and display text is provided, returns a wikilink in the form [[L|D]]; if only
link is provided, returns a wikilink in the form [[L]]; if neither are provided or link is omitted, returns an
empty string.

]=]

local function make_wikilink (link, display, no_link)
	if nil == no_link then
		if link and ('' ~= link) then
			if display and ('' ~= display) then
				return table.concat ({'[[', link, '|', display, ']]'});
			else
				return table.concat ({'[[', link, ']]'});
			end
		end
		return display or '';													-- link not set so return the display text

	else																		-- no_link
		if display and ('' ~= display) then										-- if there is display text
			return display;														-- return that
		else
			return link or '';													-- return the target article name or empty string
		end
	end
end


--[[--------------------------< createTracking >-----------------------

Return data in track[] ie. tracking categories

]]

local function createTracking()
	if not excepted_pages[this_page.fullText] then								-- namespace:title/fragment is allowed to be categorized (typically this module's / template's testcases page(s))
		if uncategorized_namespaces[this_page.nsText] then
			return '';															-- this page not to be categorized so return empty string
		end
		for _,v in ipairs (uncategorized_subpages) do							-- cycle through page name patterns
			if this_page.text:match (v) then									-- test page name against each pattern
				return '';														-- this subpage type not to be categorized so return empty string
			end
		end
	end

	local out = {};
	if tableLength(track) > 0 then
		for key, _ in pairs(track) do											-- loop through table
			table.insert (out, make_wikilink (key));							-- and convert category names to links
		end
	end
	return table.concat (out);													-- concat into one big string; empty string if table is empty

end


--[[--------------------------< inlineError >-----------------------

Critical error. Render output completely in red. Add to tracking category.

This function called as the last thing before abandoning this module

]]

local function inlineError (msg, args)
	track[categories.error] = 1
	return table.concat ({
		'<span style="font-size:100%" class="error citation-comment">Error in ',	-- open the error message span
		config.tname,															-- insert the local language template name
		' template: ',
		substitute (msg, args),													-- insert the formatted error message
		'.</span>',																-- close the span
		createTracking()														-- add the category
		})
end


--[[--------------------------< inlineRed >-----------------------

Render a text fragment in red, such as a warning as part of the final output.
Add tracking category.

 ]]

local function inlineRed(msg, trackmsg)
	if trackmsg == "warning" then
		track[categories.warning] = 1;
	elseif trackmsg == "error" then
		track[categories.error] = 1;
	end

	return '<span style="font-size:100%" class="error citation-comment">' .. msg .. '</span>'
end

--[[--------------------------< serviceName >-----------------------

Given a domain extracted by mw.uri.new() (eg. web.archive.org) set tail string and service ID

]]

local function serviceName(host, no_link)
	local tracking;
	local index;
	
	host = host:lower():gsub ('^web%.(.+)', '%1'):gsub ('^www%.(.+)', '%1');	-- lowercase, remove web. and www. subdomains

	if services[host] then
		index = host;
	else
		for k, _ in pairs (services) do
			if host:find ('%f[%a]'..k:gsub ('([%.%-])', '%%%1')) then
				index = k;
				break;
			end
		end
	end
	
	if index then
		local out = {''};														-- empty string in [1] so that concatenated result has leading single space
		ulx.url1.service = services[index][4] or 'other';
		tracking = services[index][5] or categories.other;
																				-- build tail string
		if false == services[index][1] then										-- select prefix
			table.insert (out, prefixes.at);
		elseif true == services[index][1] then
			table.insert (out, prefixes.atthe);
		else
			table.insert (out, services[index][1]);
		end
		
		table.insert (out, make_wikilink (services[index][2], services[index][3], no_link));	-- add article wikilink
		if services[index][6] then												-- add tail postfix if it exists
			table.insert (out, services[index][6]);
		end
		
		ulx.url1.tail = table.concat (out, ' ');								-- put it all together; result has leading space character

	else																		-- here when unknown archive
		ulx.url1.service = 'other';
		tracking = categories.unknown;
		ulx.url1.tail = table.concat ({'', prefixes.at, host, inlineRed (err_warn_msgs.unknown_url, error)}, ' ');
	end
	
	track[tracking] = 1
end


--[[--------------------------< parseExtraArgs >-----------------------

Parse numbered arguments starting at 2, such as url2..url10, date2..date10, title2..title10
	For example: {{webarchive |url=.. |url4=.. |url7=..}}
		Three url arguments not in numeric sequence (1..4..7). 
			Function only processes arguments numbered 2 or greater (in this case 4 and 7)
				It creates numeric sequenced table entries like:
				urlx.url2.url = <argument value for url4>
				urlx.url3.url = <argument value for url7>
			Returns the number of URL arguments found numbered 2 or greater (in this case returns "2")

 ]]

local function parseExtraArgs(args)

	local i, j, argurl, argurl2, argdate, argtitle

	j = 2
	for i = 2, config.maxurls do
		argurl = "url" .. i
		if args[argurl] then
			argurl2 = "url" .. j
			ulx[argurl2] = {}
			ulx[argurl2]["url"] = args[argurl]
			argdate = "date" .. j
			if args[argdate] then
				ulx[argurl2]["date"] = args[argdate]
			else
				ulx[argurl2]["date"] = inlineRed (err_warn_msgs.date_miss, 'warning');
			end
	
			argtitle = "title" .. j
			if args[argtitle] then
				ulx[argurl2]["title"] = args[argtitle]
			else
				ulx[argurl2]["title"] = nil
			end
			j = j + 1
		end
	end

	if j == 2 then
		return 0
	else
		return j - 2
	end
end



--[[--------------------------< createRendering >-----------------------

Return a rendering of the data in ulx[][]

]]

local function createRendering()

	local displayfield
	local out = {};
	
	local index_date, msg = ulx.url1.date:match ('(index)(.*)');				-- when ulx.url1.date extract 'index' text and message text (if there is a message)
	ulx.url1.date = ulx.url1.date:gsub ('index.*', 'index');					-- remove message

	if 'none' == ulx.url1.format then											-- For {{wayback}}, {{webcite}}
		table.insert (out, '[');												-- open extlink markup
		table.insert (out, ulx.url1.url);										-- add url

		if ulx.url1.title then
			table.insert (out, ' ')												-- the required space
			table.insert (out, ulx.url1.title)									-- the title
			table.insert (out, ']');											-- close extlink markup
			table.insert (out, ulx.url1.tail);									-- tail text
			if ulx.url1.date then
				table.insert (out, '&#32;(');									-- open date text; TODO: why the html entity? replace with regular space?
				table.insert (out, 'index' == ulx.url1.date and s_text.archive or s_text.archived);	-- add text
				table.insert (out, ' ');										-- insert a space
				table.insert (out, ulx.url1.date);								-- add date
				table.insert (out, ')');										-- close date text
			end
		else																	-- no title
			if index_date then													-- when url date is 'index' 
				table.insert (out, table.concat ({' ', s_text.Archive_index, ']'}));	-- add the index link label
				table.insert (out, msg or '');									-- add date mismatch message when url date is /*/ and |date= has valid date
			else
				table.insert (out, table.concat ({' ', s_text.Archived, '] '}));	-- add link label for url has timestamp date (will include mismatch message if there is one)
			end
			if ulx.url1.date then
				if 'index' ~= ulx.url1.date then
					table.insert (out, ulx.url1.date);							-- add date when data is not 'index'
				end
				table.insert (out, ulx.url1.date);						-- add ',' if date format is mdy
				table.insert (out, ulx.url1.tail);								-- add tail text
			else																-- no date
				table.insert (out, ulx.url1.tail);								-- add tail text
			end
		end

		if 0 < ulx.url1.extraurls then											-- For multiple archive URLs
			local tot = ulx.url1.extraurls + 1
			table.insert (out, '.')												-- terminate first url
			table.insert (out, table.concat ({' ', s_text.addlarchives, ': '}));	-- add header text

			for i=2, tot do														-- loop through the additionals
				local index = table.concat ({'url', i});						-- make an index
				displayfield = ulx[index]['title'] and 'title' or 'date';		-- choose display text
				table.insert (out, '[');										-- open extlink markup
				table.insert (out, ulx[index]['url']);							-- add the url
				table.insert (out, ' ');										-- the required space
				table.insert (out, ulx[index][displayfield]);					-- add the label
				table.insert (out, ']');										-- close extlink markup
				table.insert (out, i==tot and '.' or ', ');						-- add terminator
			end
		end
		return table.concat (out);												-- make a big string and done

	else																		-- For {{cite archives}}																	
		if 'addlarchives' == ulx.url1.format then								-- Multiple archive services 
			table.insert (out, table.concat ({s_text.addlarchives, ': '}));		-- add header text
		else																	-- Multiple pages from the same archive 
			table.insert (out, table.concat ({s_text.addlpages, ' '}));			-- add header text
			table.insert (out, ulx.url1.date);									-- add date to header text
			table.insert (out, ': ');											-- close header text
		end

		local tot = ulx.url1.extraurls + 1;
		for i=1, tot do															-- loop through the additionals
			local index = table.concat ({'url', i});							-- make an index
			table.insert (out, '[');											-- open extlink markup
			table.insert (out, ulx[index]['url']);								-- add url
			table.insert (out, ' ');											-- add required space

			displayfield = ulx[index]['title'];
			if 'addlarchives' == ulx.url1.format then
				if not displayfield then 
					displayfield = ulx[index]['date']
				end
			else																-- must be addlpages
				if not displayfield then 
					displayfield = table.concat ({s_text.Page, ' ', i});
				end
			end
			table.insert (out, displayfield);									-- add title, date, page label text
			table.insert (out, ']');											-- close extlink markup
			table.insert (out, (i==tot and '.' or ', '));							-- add terminator
		end
		return table.concat (out);												-- make a big string and done
	end
end


--[[--------------------------< P A R A M E T E R _ N A M E _ X L A T E >--------------------------------------

for internaltionalization, translate local-language parameter names to their English equivalents

TODO: return error message if multiple aliases of the same canonical parameter name are found?

returns two tables:
	new_args - holds canonical form parameters and their values either from translation or because the parameter was already in canonical form
	origin - maps canonical-form parameter names to their untranslated (local language) form for error messaging in the local language

unrecognized parameters are ignored

]]

local function parameter_name_xlate (args, params, enum_params)
	local name;																	-- holds modifiable name of the parameter name during evaluation
	local enum;																	-- for enumerated parameters, holds the enumerator during evaluation
	local found = false;														-- flag used to break out of nested for loops
	local new_args = {};														-- a table that holds canonical and translated parameter k/v pairs
	local origin = {};															-- a table that maps original (local language) parameter names to their canonical name for local language error messaging
	local unnamed_params;														-- set true when unsupported positional parameters are detected
	
	for k, v in pairs (args) do													-- loop through all of the arguments in the args table
		name = k;																-- copy of original parameter name

		if 'string' == type (k) then
			if non_western_digits then											-- true when non-western digits supported at this wiki
				name = mw.ustring.gsub (name, '%d', digits);					-- convert this wiki's non-western digits to western digits
			end
			
			enum = name:match ('%d+$');											-- get parameter enumerator if it exists; nil else
			
			if not enum then													-- no enumerator so looking for non-enumnerated parameters
				-- TODO: insert shortcut here? if params[name] then name holds the canonical parameter name; no need to search further
				for pname, aliases in pairs (params) do							-- loop through each parameter the params table
					for _, alias in ipairs (aliases) do							-- loop through each alias in the parameter's aliases table
						if name == alias then
							new_args[pname] = v;								-- create a new entry in the new_args table
							origin [pname] = k;									-- create an entry to make canonical parameter name to original local language parameter name
							found = true;										-- flag so that we can break out of these nested for loops
							break;												-- no need to search the rest of the aliases table for name so go on to the next k, v pair
						end
					end
	
					if found then												-- true when we found an alias that matched name
						found = false;											-- reset the flag
						break;													-- go do next args k/v pair
					end
				end
			else																-- enumerated parameters
				name = name:gsub ('%d$', '#');									-- replace enumeration digits with place holder for table search
				-- TODO: insert shortcut here? if num_params[name] then name holds the canonical parameter name; no need to search further
				for pname, aliases in pairs (enum_params) do					-- loop through each parameter the num_params table
					for _, alias in ipairs (aliases) do							-- loop through each alias in the parameter's aliases table
						if name == alias then
							pname = pname:gsub ('#$', enum);					-- replace the '#' place holder with the actual enumerator
							new_args[pname] = v;								-- create a new entry in the new_args table
							origin [pname] = k;									-- create an entry to make canonical parameter name to original local language parameter name
							found = true;										-- flag so that we can break out of these nested for loops
							break;												-- no need to search the rest of the aliases table for name so go on to the next k, v pair
						end
					end
	
					if found then												-- true when we found an alias that matched name
						found = false;											-- reset the flag
						break;													-- go do next args k/v pair
					end
				end
			end
		else
			unnamed_params = true;												-- flag for unsupported positional parameters
		end
	end																			-- for k, v
	return new_args, origin, unnamed_params;
end


--[[--------------------------< W E B A R C H I V E >----------------------------------------------------------

template entry point

]]

local function webarchive(frame)
	local args = getArgs (frame);

	local data = mw.loadData (table.concat ({									-- make a data module name; sandbox or live
		'Module:Webarchive/data',
		frame:getTitle():find('sandbox', 1, true) and '/sandbox' or ''			-- this instance is ./sandbox then append /sandbox
		}));
	categories = data.categories;												-- fill in the forward declarations
	config = data.config;
	if data.digits.enable then
		digits = data.digits;													-- for i18n; table of digits in the local wiki's language
		non_western_digits = true;												-- use_non_western_digits
	end
	err_warn_msgs = data.err_warn_msgs;
	excepted_pages = data.excepted_pages;
	month_num = data.month_num;													-- for i18n; table of month names in the local wiki's language
	prefixes = data.prefixes;
	services = data.services;
	s_text = data.s_text;
	uncategorized_namespaces = data.uncategorized_namespaces;
	uncategorized_subpages = data.uncategorized_subpages;

	local origin = {};															-- holds a map of English to local language parameter names used in the current template; not currently used
	local unnamed_params;														-- boolean set to true when template call has unnamed parameters
	args, origin, unnamed_params = parameter_name_xlate (args, data.params, data.enum_params);	-- translate parameter names in args to English

	local date, format, msg, udate, uri, url;
	local ldf = 'iso';															-- when there is no |date= parameter, render url dates in iso format
	
	if args.url and args.url1 then												-- URL argument (first)
		return inlineError (data.crit_err_msgs.conflicting, {origin.url, origin.url1});
	end
	
	url = args.url or args.url1;
	
	if not url then
		return inlineError (data.crit_err_msgs.empty);
	end
		-- these iabot bugs perportedly fixed; removing these causes lua script error

	if not (url:lower():find ('^http') or url:find ('^//')) then
		return inlineError (data.crit_err_msgs.invalid_url );
	end

	ulx.url1 = {}
	ulx.url1.url = url

	ulx.url1.extraurls = parseExtraArgs(args)

	local good = false;
	good, uri = pcall (mw.uri.new, ulx.url1.url);								-- get a table of uri parts from this url; protected mode to prevent lua error when ulx.url1.url is malformed
	
	if not good or nil == uri.host then											-- abandon when ulx.url1.url is malformed
		return inlineError (data.crit_err_msgs.invalid_url);
	end
	
	serviceName(uri.host, args.nolink)

	if args.date and args.date1 then											-- Date argument
		return inlineError (data.crit_err_msgs.conflicting, {origin.date, origin.date1});
	end
	
	date = args.date or args.date1;
	date = date and date:gsub (' +', ' ');										-- replace multiple spaces with a single space

	if 'index' == date then
		ulx.url1.date = date .. (msg or '');									-- create index + message (if there is one)
	else
		ulx.url1.date = msg;
	end
		
	format = args.format;														-- Format argument 

	if not format then
		format = "none"
	else
		for k, v in pairs (data.format_vals) do									-- |format= accepts two specific values loop through a table of those values
			local found;														-- declare a nil flag
			for _, p in ipairs (v) do											-- loop through local language variants
				if format == p then												-- when |format= value matches 
					format = k;													-- use name from table key
					found = true;												-- declare found so that we can break out of outer for loop
					break;														-- break out of inner for loop
				end
			end
			
			if found then
				break;
			end
		end

		if format == "addlpages" then
			if not ulx.url1.date then
				format = "none"
			end
		elseif format == "addlarchives" then
			format = "addlarchives"
		else
			format = "none"
		end
	end
	ulx.url1.format = format

	if args.title and args.title1 then											-- Title argument
		return inlineError (data.crit_err_msgs.conflicting, {origin.title, origin.title1});
	end

	ulx.url1.title = args.title or args.title1;

	local rend = createRendering()
	if not rend then
		return inlineError (data.crit_err_msgs.unknown);
	end

	return rend .. ((unnamed_params and inlineRed (err_warn_msgs.unnamed_params, 'warning')) or '') .. createTracking();

end


--[[--------------------------< E X P O R T E D 	 F U N C T I O N S >------------------------------------------
]]

return {webarchive = webarchive};