Line 1:
Line 1:
+
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
]]
]]
−
local has_accept_as_written, is_set, in_array, remove_wiki_link, strip_apostrophe_markup; -- functions in Module:Citation/CS1/Utilities
+
local is_set, in_array, remove_wiki_link, strip_apostrophe_markup; -- functions in Module:Citation/CS1/Utilities
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
Line 17:
Line 18:
local function make_coins_title (title, script)
local function make_coins_title (title, script)
−
title = has_accept_as_written (title);
if is_set (title) then
if is_set (title) then
title = strip_apostrophe_markup (title); -- strip any apostrophe markup
title = strip_apostrophe_markup (title); -- strip any apostrophe markup
else
else
−
title = ''; -- if not set, make sure title is an empty string
+
title=''; -- if not set, make sure title is an empty string
end
end
if is_set (script) then
if is_set (script) then
Line 27:
Line 27:
script = strip_apostrophe_markup (script); -- strip any apostrophe markup
script = strip_apostrophe_markup (script); -- strip any apostrophe markup
else
else
−
script = ''; -- if not set, make sure script is an empty string
+
script=''; -- if not set, make sure script is an empty string
end
end
if is_set (title) and is_set (script) then
if is_set (title) and is_set (script) then
Line 38:
Line 38:
--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
−
Returns a string where all of Lua's magic characters have been escaped. This is important because functions like
+
Returns a string where all of lua's magic characters have been escaped. This is important because functions like
string.gsub() treat their pattern and replace strings as patterns, not literal strings.
string.gsub() treat their pattern and replace strings as patterns, not literal strings.
]]
]]
Line 44:
Line 44:
local function escape_lua_magic_chars (argument)
local function escape_lua_magic_chars (argument)
argument = argument:gsub("%%", "%%%%"); -- replace % with %%
argument = argument:gsub("%%", "%%%%"); -- replace % with %%
−
argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other Lua magic pattern characters
+
argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters
return argument;
return argument;
end
end
Line 60:
Line 60:
while true do
while true do
−
pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the URL and following space(s): "[url "
+
pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url "
−
if nil == pattern then break; end -- no more URLs
+
if nil == pattern then break; end -- no more urls
−
pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape Lua's magic pattern characters
+
pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
end
end
pages = pages:gsub("[%[%]]", ""); -- remove the brackets
pages = pages:gsub("[%[%]]", ""); -- remove the brackets
−
pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
+
pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
−
pages = pages:gsub("&%w+;", "-" ); -- and replace HTML entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like?
+
pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like?
return pages;
return pages;
end
end
Line 80:
Line 80:
MathML with SVG or PNG fallback
MathML with SVG or PNG fallback
−
All three are heavy with HTML and CSS which doesn't belong in the metadata.
+
All three are heavy with html and css which doesn't belong in the metadata.
Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings
Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings
Line 87:
Line 87:
This function gets the rendered form of an equation according to the editor's preference before the page is saved. It
This function gets the rendered form of an equation according to the editor's preference before the page is saved. It
then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so
then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so
−
that the page is saved without extraneous HTML/CSS markup and with a reasonably readable text form of the equation.
+
that the page is saved without extraneous html/css markup and with a reasonably readable text form of the equation.
−
When a replacement is made, this function returns true and the value with replacement; otherwise false and the initial
+
When a replacement is made, this function returns true and the value with replacement; otherwise false and the intital
−
value. To replace multipe equations it is necessary to call this function from within a loop.
+
value. To replace multipe equations it is necesary to call this function from within a loop.
]=]
]=]
Line 120:
Line 120:
--[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------
--[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------
−
Cleanup parameter values for the metadata by removing or replacing invisible characters and certain HTML entities.
+
Cleanup parameter values for the metadata by removing or replacing invisible characters and certain html entities.
2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content
2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content
Line 137:
Line 137:
end
end
−
value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message
+
value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message
value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content
value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content
Line 143:
Line 143:
value = value:gsub (' ', ' '); -- replace entity with plain space
value = value:gsub (' ', ' '); -- replace entity with plain space
value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space
value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space
−
if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero-width joiner characters from indic script
+
if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero width joiner characters from indic script
−
value = value:gsub ('‍', ''); -- remove ‍ entities
+
value = value:gsub ('‍', ''); -- remove ‍ entities
value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen
value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen
end
end
−
value = value:gsub ('[\009\010\013 ]+', ' '); -- replace horizontal tab, line feed, carriage return with plain space
+
value = value:gsub ('[\009\010\013]', ' '); -- replace horizontal tab, line feed, carriage return with plain space
return value;
return value;
end
end
Line 180:
Line 180:
});
});
−
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn', 'journal', 'news', 'magazine'}) or
+
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn', 'journal', 'news', 'magazine'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or
−
(in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or
('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier
−
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn'}) then -- set genre according to the type of citation template we are rendering
+
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn'}) then -- set genre according to the type of citation template we are rendering
−
OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx, cite medrxiv, cite ssrn
+
OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx, cite ssrn
elseif 'conference' == class then
elseif 'conference' == class then
OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)
OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)
Line 198:
Line 197:
-- these used only for periodicals
-- these used only for periodicals
OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall
OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall
−
OCinSoutput["rft.quarter"] = data.Quarter; -- single digits 1->first quarter, etc.
OCinSoutput["rft.chron"] = data.Chron; -- free-form date components
OCinSoutput["rft.chron"] = data.Chron; -- free-form date components
OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books
OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books
OCinSoutput["rft.issue"] = data.Issue;
OCinSoutput["rft.issue"] = data.Issue;
−
OCinSoutput['rft.artnum'] = data.ArticleNumber; -- {{cite journal}} only
OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata
OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata
Line 223:
Line 220:
end
end
end
end
−
else -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
+
else --{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
OCinSoutput["rft.genre"] = "unknown";
OCinSoutput["rft.genre"] = "unknown";
end
end
Line 239:
Line 236:
OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation
OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation
end
end
−
-- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent", "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc", "info:ofi/fmt:kev:mtx:ctx"
-- and now common parameters (as much as possible)
-- and now common parameters (as much as possible)
OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation
OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation
−
+
for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end
if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end
Line 248:
Line 244:
if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry
if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry
OCinSoutput["rft_id"] = table.concat{ id, "/", v };
OCinSoutput["rft_id"] = table.concat{ id, "/", v };
−
elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc. that have defined COinS keywords
+
elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords
OCinSoutput[ id ] = v;
OCinSoutput[ id ] = v;
−
elseif 'url' == id then -- for urls that are assembled in ~/Identifiers; |asin= and |ol=
+
elseif id then -- when cfg.id_handlers[k].COinS is not nil
−
OCinSoutput["rft_id"] = table.concat ({data.ID_list[k], "#id-name=", cfg.id_handlers[k].label});
+
OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v }; -- others; provide a url
−
elseif id then -- when cfg.id_handlers[k].COinS is not nil so urls created here
−
OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v, cfg.id_handlers[k].suffix or '', "#id-name=", cfg.id_handlers[k].label }; -- others; provide a URL and indicate identifier name as #fragment (human-readable, but transparent to browsers)
end
end
end
end
Line 259:
Line 253:
local last, first;
local last, first;
for k, v in ipairs( data.Authors ) do
for k, v in ipairs( data.Authors ) do
−
last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki stripmarkers, non-printing or invisible characters
+
last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki strip markers, non-printing or invisible characers
if k == 1 then -- for the first author name only
if k == 1 then -- for the first author name only
−
if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name
+
if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name
OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation
OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation
OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation
OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation
Line 273:
Line 267:
OCinSoutput["rft.au"] = last; -- book, journal, dissertation
OCinSoutput["rft.au"] = last; -- book, journal, dissertation
end
end
−
-- TODO: At present we do not report "et al.". Add anything special if this condition applies?
end
end
end
end
Line 279:
Line 272:
OCinSoutput.rft_id = data.URL;
OCinSoutput.rft_id = data.URL;
OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };
OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };
−
+
OCinSoutput = setmetatable( OCinSoutput, nil );
−
-- TODO: Add optional extra info:
−
-- rfr_dat=#REVISION<version> (referrer private data)
−
-- ctx_id=<data.RawPage>#<ref> (identifier for the context object)
−
-- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd)
−
-- ctx_enc=info:ofi/enc:UTF-8 (character encoding)
−
OCinSoutput = setmetatable( OCinSoutput, nil );
−
-- sort with version string always first, and combine.
-- sort with version string always first, and combine.
−
-- table.sort( OCinSoutput );
+
--table.sort( OCinSoutput );
−
table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"
+
table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"
return table.concat(OCinSoutput, "&");
return table.concat(OCinSoutput, "&");
end
end
Line 304:
Line 290:
cfg = cfg_table_ptr;
cfg = cfg_table_ptr;
−
has_accept_as_written = utilities_page_ptr.has_accept_as_written; -- import functions from selected Module:Citation/CS1/Utilities module
+
is_set = utilities_page_ptr.is_set; -- import functions from selected Module:Citation/CS1/Utilities module
−
is_set = utilities_page_ptr.is_set;
in_array = utilities_page_ptr.in_array;
in_array = utilities_page_ptr.in_array;
remove_wiki_link = utilities_page_ptr.remove_wiki_link;
remove_wiki_link = utilities_page_ptr.remove_wiki_link;