Difference between pages "Module:Citation/CS1/COinS" and "Module:Citation/CS1/COinS/sandbox"
(Difference between pages)
Josh Renaud (talk | contribs) m (1 revision imported: Updating the citation templates) |
Josh Renaud (talk | contribs) m (1 revision imported) |
||
Line 1: | Line 1: | ||
+ | --[[ | ||
+ | History of changes since last sync: 2016-04-16 | ||
+ | |||
+ | 2017-03-26: support for cite biorxiv and cite citeseerx; see Help_talk:Citation_Style_1#.7B.7Bcite_bioRxiv.7D.7D | ||
+ | |||
+ | ]] | ||
+ | |||
+ | local coins = {}; | ||
+ | |||
+ | |||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | ||
]] | ]] | ||
− | local | + | local is_set, in_array, remove_wiki_link; -- functions in Module:Citation/CS1/Utilities |
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | ||
+ | |||
+ | |||
+ | --[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >-------------------------------- | ||
+ | |||
+ | Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata. | ||
+ | This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to | ||
+ | markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind. | ||
+ | |||
+ | ]] | ||
+ | |||
+ | local function strip_apostrophe_markup (argument) | ||
+ | if not is_set (argument) then return argument; end | ||
+ | |||
+ | if argument:find ( "''", 1, true ) == nil then -- Is there at least one double apostrophe? If not, exit. | ||
+ | return argument; | ||
+ | end | ||
+ | |||
+ | while true do | ||
+ | if argument:find ( "'''''", 1, true ) then -- bold italic (5) | ||
+ | argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it | ||
+ | elseif argument:find ( "''''", 1, true ) then -- italic start and end without content (4) | ||
+ | argument=argument:gsub("%'%'%'%'", ""); | ||
+ | elseif argument:find ( "'''", 1, true ) then -- bold (3) | ||
+ | argument=argument:gsub("%'%'%'", ""); | ||
+ | elseif argument:find ( "''", 1, true ) then -- italic (2) | ||
+ | argument=argument:gsub("%'%'", ""); | ||
+ | else | ||
+ | break; | ||
+ | end | ||
+ | end | ||
+ | return argument; -- done | ||
+ | end | ||
Line 17: | Line 59: | ||
local function make_coins_title (title, script) | local function make_coins_title (title, script) | ||
− | |||
if is_set (title) then | if is_set (title) then | ||
title = strip_apostrophe_markup (title); -- strip any apostrophe markup | title = strip_apostrophe_markup (title); -- strip any apostrophe markup | ||
else | else | ||
− | title = ''; -- if not set, make sure title is an empty string | + | title=''; -- if not set, make sure title is an empty string |
end | end | ||
if is_set (script) then | if is_set (script) then | ||
Line 27: | Line 68: | ||
script = strip_apostrophe_markup (script); -- strip any apostrophe markup | script = strip_apostrophe_markup (script); -- strip any apostrophe markup | ||
else | else | ||
− | script = ''; | + | script=''; -- if not set, make sure script is an empty string |
end | end | ||
if is_set (title) and is_set (script) then | if is_set (title) and is_set (script) then | ||
Line 38: | Line 79: | ||
--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >---------------------------------- | --[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >---------------------------------- | ||
− | Returns a string where all of | + | Returns a string where all of lua's magic characters have been escaped. This is important because functions like |
string.gsub() treat their pattern and replace strings as patterns, not literal strings. | string.gsub() treat their pattern and replace strings as patterns, not literal strings. | ||
]] | ]] | ||
Line 44: | Line 85: | ||
local function escape_lua_magic_chars (argument) | local function escape_lua_magic_chars (argument) | ||
argument = argument:gsub("%%", "%%%%"); -- replace % with %% | argument = argument:gsub("%%", "%%%%"); -- replace % with %% | ||
− | argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other | + | argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters |
return argument; | return argument; | ||
end | end | ||
Line 60: | Line 101: | ||
while true do | while true do | ||
− | pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the | + | pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url " |
− | if nil == pattern then break; end -- no more | + | if nil == pattern then break; end -- no more urls |
− | pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape | + | pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters |
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible | pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible | ||
end | end | ||
− | |||
pages = pages:gsub("[%[%]]", ""); -- remove the brackets | pages = pages:gsub("[%[%]]", ""); -- remove the brackets | ||
− | pages = pages:gsub("–", "-" ); | + | pages = pages:gsub("–", "-" ); -- replace endashes with hyphens |
− | pages = pages:gsub("&%w+;", "-" ); | + | pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like? |
− | |||
return pages; | return pages; | ||
end | end | ||
Line 82: | Line 121: | ||
MathML with SVG or PNG fallback | MathML with SVG or PNG fallback | ||
− | All three are heavy with | + | All three are heavy with html and css which doesn't belong in the metadata. |
Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings | Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings | ||
Line 89: | Line 128: | ||
This function gets the rendered form of an equation according to the editor's preference before the page is saved. It | This function gets the rendered form of an equation according to the editor's preference before the page is saved. It | ||
then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so | then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so | ||
− | that the page is saved without extraneous | + | that the page is saved without extraneous html/css markup and with a reasonably readable text form of the equation. |
− | When a replacement is made, this function returns true and the value with replacement; otherwise false and the | + | When a replacement is made, this function returns true and the value with replacement; otherwise false and the intital |
− | value. To replace multipe equations it is | + | value. To replace multipe equations it is necesary to call this function from within a loop. |
]=] | ]=] | ||
Line 122: | Line 161: | ||
--[[--------------------------< C O I N S _ C L E A N U P >---------------------------------------------------- | --[[--------------------------< C O I N S _ C L E A N U P >---------------------------------------------------- | ||
− | Cleanup parameter values for the metadata by removing or replacing invisible characters and certain | + | Cleanup parameter values for the metadata by removing or replacing invisible characters and certain html entities. |
2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content | 2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content | ||
Line 139: | Line 178: | ||
end | end | ||
− | value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); | + | value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message |
value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content | value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content | ||
Line 145: | Line 184: | ||
value = value:gsub (' ', ' '); -- replace entity with plain space | value = value:gsub (' ', ' '); -- replace entity with plain space | ||
value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space | value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space | ||
− | + | value = value:gsub ('‍', ''); -- remove ‍ entities | |
− | + | value = value:gsub ('[\226\128\141\226\128\139\194\173]', '') -- remove zero-width joiner, zero-width space, soft hyphen | |
− | + | value = value:gsub ('[\009\010\013]', ' '); -- replace horizontal tab, line feed, carriage return with plain space | |
− | |||
− | value = value:gsub ('[\009\010\013 ] | ||
return value; | return value; | ||
end | end | ||
Line 182: | Line 219: | ||
}); | }); | ||
− | if in_array (class, {'arxiv', 'biorxiv', 'citeseerx | + | if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'journal', 'news'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or |
− | |||
('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then | ('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then | ||
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier | OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier | ||
− | if in_array (class, {'arxiv', 'biorxiv', 'citeseerx | + | if in_array (class, {'arxiv', 'biorxiv', 'citeseerx'}) then -- set genre according to the type of citation template we are rendering |
− | OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx | + | OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx |
elseif 'conference' == class then | elseif 'conference' == class then | ||
OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set) | OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set) | ||
Line 200: | Line 236: | ||
-- these used only for periodicals | -- these used only for periodicals | ||
OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall | OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall | ||
− | |||
OCinSoutput["rft.chron"] = data.Chron; -- free-form date components | OCinSoutput["rft.chron"] = data.Chron; -- free-form date components | ||
OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books | OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books | ||
OCinSoutput["rft.issue"] = data.Issue; | OCinSoutput["rft.issue"] = data.Issue; | ||
− | |||
OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata | OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata | ||
Line 225: | Line 259: | ||
end | end | ||
end | end | ||
− | else -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'} | + | else --{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'} |
OCinSoutput["rft.genre"] = "unknown"; | OCinSoutput["rft.genre"] = "unknown"; | ||
end | end | ||
Line 241: | Line 275: | ||
OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation | OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation | ||
end | end | ||
− | |||
-- and now common parameters (as much as possible) | -- and now common parameters (as much as possible) | ||
OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation | OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation | ||
− | + | ||
for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? | for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? | ||
+ | -- if k == 'ISBN' then v = clean_isbn( v ) end | ||
if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end | if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end | ||
local id = cfg.id_handlers[k].COinS; | local id = cfg.id_handlers[k].COinS; | ||
if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry | if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry | ||
OCinSoutput["rft_id"] = table.concat{ id, "/", v }; | OCinSoutput["rft_id"] = table.concat{ id, "/", v }; | ||
− | elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc | + | elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords |
OCinSoutput[ id ] = v; | OCinSoutput[ id ] = v; | ||
− | + | elseif id then -- when cfg.id_handlers[k].COinS is not nil | |
− | + | OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v }; -- others; provide a url | |
− | elseif id then -- when cfg.id_handlers[k].COinS is not nil | ||
− | OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v | ||
end | end | ||
end | end | ||
+ | --[[ | ||
+ | for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? | ||
+ | local id, value = cfg.id_handlers[k].COinS; | ||
+ | if k == 'ISBN' then value = clean_isbn( v ); else value = v; end | ||
+ | if string.sub( id or "", 1, 4 ) == 'info' then | ||
+ | OCinSoutput["rft_id"] = table.concat{ id, "/", v }; | ||
+ | else | ||
+ | OCinSoutput[ id ] = value; | ||
+ | end | ||
+ | end | ||
+ | ]] | ||
local last, first; | local last, first; | ||
for k, v in ipairs( data.Authors ) do | for k, v in ipairs( data.Authors ) do | ||
− | last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki | + | last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki strip markers, non-printing or invisible characers |
if k == 1 then -- for the first author name only | if k == 1 then -- for the first author name only | ||
− | if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name | + | if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name |
OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation | OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation | ||
OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation | OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation | ||
Line 275: | Line 318: | ||
OCinSoutput["rft.au"] = last; -- book, journal, dissertation | OCinSoutput["rft.au"] = last; -- book, journal, dissertation | ||
end | end | ||
− | |||
end | end | ||
end | end | ||
Line 281: | Line 323: | ||
OCinSoutput.rft_id = data.URL; | OCinSoutput.rft_id = data.URL; | ||
OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage }; | OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage }; | ||
− | + | OCinSoutput = setmetatable( OCinSoutput, nil ); | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
-- sort with version string always first, and combine. | -- sort with version string always first, and combine. | ||
− | + | table.sort( OCinSoutput ); | |
− | table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004" | + | table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004" |
return table.concat(OCinSoutput, "&"); | return table.concat(OCinSoutput, "&"); | ||
end | end | ||
Line 306: | Line 341: | ||
cfg = cfg_table_ptr; | cfg = cfg_table_ptr; | ||
− | + | is_set = utilities_page_ptr.is_set; -- import functions from select Module:Citation/CS1/Utilities module | |
− | |||
in_array = utilities_page_ptr.in_array; | in_array = utilities_page_ptr.in_array; | ||
remove_wiki_link = utilities_page_ptr.remove_wiki_link; | remove_wiki_link = utilities_page_ptr.remove_wiki_link; | ||
− | |||
end | end | ||
− | |||
− | |||
return { | return { |