无编辑摘要
imported>=海豚= 无编辑摘要 |
imported>=海豚= 无编辑摘要 |
||
第1行: | 第1行: | ||
local z = { | local z = { | ||
error_categories = {}; -- | error_categories = {}; -- for categorizing citations that contain errors | ||
error_ids = {}; | error_ids = {}; | ||
message_tail = {}; | message_tail = {}; | ||
maintenance_cats = {}; -- | maintenance_cats = {}; -- for categorizing citations that aren't erroneous per se, but could use a little work | ||
properties_cats = {}; -- | properties_cats = {}; -- for categorizing citations based on certain properties, language of source for instance | ||
} | } | ||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | ||
]] | ]] | ||
local dates, year_date_check -- | local dates, year_date_check -- functions in Module:Citation/CS1/Date_validation | ||
local cfg = {}; -- | local cfg = {}; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | ||
local whitelist = {}; -- | local whitelist = {}; -- table of tables listing valid template parameter names; defined in Module:Citation/CS1/Whitelist | ||
--[[--------------------------< I S _ S E T >------------------------------------------------------------------ | --[[--------------------------< I S _ S E T >------------------------------------------------------------------ | ||
Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string. | Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string. | ||
This function is global because it is called from both this module and from Date validation | This function is global because it is called from both this module and from Date validation | ||
第29行: | 第26行: | ||
--[[--------------------------< F I R S T _ S E T >------------------------------------------------------------ | --[[--------------------------< F I R S T _ S E T >------------------------------------------------------------ | ||
Locates and returns the first set value in a table of values where the order established in the table, | Locates and returns the first set value in a table of values where the order established in the table, | ||
left-to-right (or top-to-bottom), is the order in which the values are evaluated. Returns nil if none are set. | left-to-right (or top-to-bottom), is the order in which the values are evaluated. Returns nil if none are set. | ||
第241行: | 第233行: | ||
return true; | return true; | ||
elseif domain:match ('%f[%a][iq]%.net$') then -- assigned one character .net hostname (q.net registered but not active 2015-12-10) | elseif domain:match ('%f[%a][iq]%.net$') then -- assigned one character .net hostname (q.net registered but not active 2015-12-10) | ||
return true; | |||
elseif domain:match ('%f[%a%d][%a%d][%a%d%-]+[%a%d]%.xn%-%-[%a%d]+$') then -- internationalized domain name with ACE prefix | |||
return true; | |||
elseif domain:match ('%f[%a%d][%a%d]%.cash$') then -- one character/digit .cash hostname | |||
return true; | return true; | ||
elseif domain:match ('%f[%a%d][%a%d]%.%a%a$') then -- one character hostname and cctld (2 chars) | elseif domain:match ('%f[%a%d][%a%d]%.%a%a$') then -- one character hostname and cctld (2 chars) | ||
第598行: | 第594行: | ||
end | end | ||
if is_set(script_value) then | if is_set(script_value) then | ||
script_value = '-{R|' .. script_value .. ' | script_value = '-{R|' .. script_value .. ''; | ||
end | end | ||
script_value = substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is rtl | script_value = substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is rtl | ||
第814行: | 第810行: | ||
if position then | if position then | ||
if 'nowiki' == capture or 'math' == capture or -- nowiki and math stripmarkers (not an error condition) | |||
('templatestyles' == capture) then -- templatestyles stripmarker allowed | |||
stripmarker = true; -- set a flag | |||
elseif true == stripmarker and 'delete' == char then -- because stripmakers begin and end with the delete char, assume that we've found one end of a stripmarker | elseif true == stripmarker and 'delete' == char then -- because stripmakers begin and end with the delete char, assume that we've found one end of a stripmarker | ||
position = nil; -- unset | position = nil; -- unset | ||
第932行: | 第928行: | ||
end | end | ||
local function tidy_date(date) | |||
if date:match("^%d%d%d%d%-%d%d?%-%d%d?$") then | |||
local y, m, d = date:match("(%d%d%d%d)%-(%d%d?)%-(%d%d?)") | |||
return y..'-'..string.format('%02d', m)..'-'..string.format('%02d', d) | |||
else | |||
return date | |||
end | |||
end | |||
--[[--------------------------< N O W R A P _ D A T E >-------------------------------------------------------- | --[[--------------------------< N O W R A P _ D A T E >-------------------------------------------------------- | ||
第1,303行: | 第1,306行: | ||
end | end | ||
--[[ | --[[--------------------------< P M I D >---------------------------------------------------------------------- | ||
Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This code checks the PMID to see that it | |||
contains only digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically as more PMIDs are issued. | Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This | ||
code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable | |||
test_limit will need to be updated periodically as more PMIDs are issued. | |||
]] | ]] | ||
local function pmid(id) | local function pmid(id) | ||
local test_limit = | local test_limit = 33000000; -- update this value as PMIDs approach | ||
local handler = cfg.id_handlers['PMID']; | local handler = cfg.id_handlers['PMID']; | ||
local err_cat = | local err_cat = ''; -- presume that PMID is valid | ||
if id:match("[^%d]") then | if id:match("[^%d]") then -- if PMID has anything but digits | ||
err_cat = ' ' .. set_error( 'bad_pmid' ); | err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message | ||
else | else -- PMID is only digits | ||
local id_num = tonumber(id); | local id_num = tonumber(id); -- convert id to a number for range testing | ||
if 1 > id_num or test_limit < id_num then | if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries | ||
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message | err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message | ||
end | end | ||
end | end | ||
第1,371行: | 第1,377行: | ||
local function pmc(id, embargo) | local function pmc(id, embargo) | ||
local test_limit = | local test_limit = 7000000; -- update this value as PMCs approach | ||
local handler = cfg.id_handlers['PMC']; | local handler = cfg.id_handlers['PMC']; | ||
local err_cat = | local err_cat = ''; -- presume that PMC is valid | ||
local id_num; | |||
local text; | |||
id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with pmc prefix | |||
if is_set (id_num) then | |||
add_maint_cat ('pmc_format'); | |||
else -- plain number without pmc prefix | |||
id_num = id:match ('^%d+$'); -- if here id is all digits | |||
end | |||
if | if is_set (id_num) then -- id_num has a value so test it | ||
id_num = tonumber(id_num); -- convert id_num to a number for range testing | |||
if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries | |||
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message | |||
if 1 > id_num or test_limit < id_num then | else | ||
err_cat = ' ' .. set_error( 'bad_pmc' ); | id = tostring (id_num); -- make sure id is a string | ||
end | end | ||
else -- when id format incorrect | |||
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message | |||
end | end | ||
if is_set (embargo) then -- is PMC is still embargoed? | if is_set (embargo) then -- is PMC is still embargoed? | ||
text= | text = table.concat ( -- still embargoed so no external link | ||
{ | |||
make_wikilink (handler.link, handler.label), | |||
handler.separator, | |||
id, | |||
err_cat | |||
}); | |||
else | else | ||
text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article | text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article | ||
第1,395行: | 第1,417行: | ||
end | end | ||
-- Formats a DOI and checks for DOI errors. | --[[--------------------------< D O I >------------------------------------------------------------------------ | ||
Formats a DOI and checks for DOI errors. | |||
DOI names contain two parts: prefix and suffix separated by a forward slash. | |||
Prefix: directory indicator '10.' followed by a registrant code | |||
Suffix: character string of any length chosen by the registrant | |||
This function checks a DOI name for: prefix/suffix. If the doi name contains spaces or endashes, or, if it ends | |||
with a period or a comma, this function will emit a bad_doi error message. | |||
- | DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash, | ||
and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | |||
if ever used in doi names. | |||
]] | |||
local function doi(id, inactive) | local function doi(id, inactive) | ||
第1,413行: | 第1,440行: | ||
local text; | local text; | ||
if is_set(inactive) then | if is_set(inactive) then | ||
local inactive_year = inactive:match("%d%d%d%d") or ''; | local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date | ||
if is_set(inactive_year) then | if is_set(inactive_year) then | ||
table.insert( z.error_categories, " | table.insert( z.error_categories, " 自" .. inactive_year .. "年含有不活躍DOI的頁面" ); | ||
else | else | ||
table.insert( z.error_categories, " | table.insert( z.error_categories, " 含有不活躍DOI的頁面" ); -- when inactive doesn't contain a recognizable year | ||
end | end | ||
inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")" | inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")" | ||
end | end | ||
text = external_link_id({link = handler.link, label = handler.label, | |||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. (inactive or '') | |||
if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma | if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma | ||
cat = ' ' .. set_error( 'bad_doi' ); | cat = ' ' .. set_error( 'bad_doi' ); | ||
end | end | ||
return text | |||
return text .. cat | |||
end | end | ||
--[[--------------------------< H D L >------------------------------------------------------------------------ | |||
Formats an HDL with minor error checking. | |||
HDL names contain two parts: prefix and suffix separated by a forward slash. | |||
Prefix: character string using any character in the UCS-2 character set except '/' | |||
Suffix: character string of any length using any character in the UCS-2 character set chosen by the registrant | |||
This function checks a HDL name for: prefix/suffix. If the HDL name contains spaces, endashes, or, if it ends | |||
with a period or a comma, this function will emit a bad_hdl error message. | |||
HDL names are case-insensitive and can incorporate any printable Unicode characters so the test for endashes and | |||
terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | |||
if ever used in HDLs. | |||
]] | |||
local function hdl(id) | |||
local handler = cfg.id_handlers['HDL']; | |||
local text = external_link_id({link = handler.link, label = handler.label, | |||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | |||
if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma | |||
text = text .. ' ' .. set_error( 'bad_hdl' ); | |||
end | |||
return text; | |||
end | |||
--[[--------------------------< O P E N L I B R A R Y >-------------------------------------------------------- | --[[--------------------------< O P E N L I B R A R Y >-------------------------------------------------------- | ||
第1,984行: | 第2,038行: | ||
elseif k == 'DOI' then | elseif k == 'DOI' then | ||
table.insert( new_list, {handler.label, doi( v, options.DoiBroken ) } ); | table.insert( new_list, {handler.label, doi( v, options.DoiBroken ) } ); | ||
elseif k == 'HDL' then | |||
table.insert( new_list, {handler.label, hdl( v ) } ); | |||
elseif k == 'ARXIV' then | elseif k == 'ARXIV' then | ||
table.insert( new_list, {handler.label, arxiv( v, options.Class ) } ); | table.insert( new_list, {handler.label, arxiv( v, options.Class ) } ); | ||
第2,278行: | 第2,334行: | ||
if is_set (code) then | if is_set (code) then | ||
if 'no' == code then name = ' | if 'no' == code then name = ' 挪威语' end; -- override wikimedia when code is 'no' | ||
if 'zh' ~= code and not code:match ('^zh-') then -- English not the language | if 'zh' ~= code and not code:match ('^zh-') then -- English not the language | ||
add_prop_cat ('foreign_lang_source', {name, code}) | add_prop_cat ('foreign_lang_source', {name, code}) | ||
第2,292行: | 第2,348行: | ||
code = #language_list -- reuse code as number of languages in the list | code = #language_list -- reuse code as number of languages in the list | ||
if 2 >= code then | if 2 >= code then | ||
name = table.concat (language_list, ' | name = table.concat (language_list, ' 及') -- insert ' 及' between two language names | ||
elseif 2 < code then | elseif 2 < code then | ||
language_list[code] = ' | language_list[code] = ' 及' .. language_list[code]; -- prepend last name with ' 及' | ||
name = table.concat (language_list, ' | name = table.concat (language_list, ' 、'); -- and concatenate with '<comma><space>' separators | ||
name = name:gsub ('、及', '及', 1); | |||
end | end | ||
return (" " .. wrap_msg ('language', name)); -- otherwise wrap with '(in ...)' | return (" " .. wrap_msg ('language', name)); -- otherwise wrap with '(in ...)' | ||
第2,785行: | 第2,842行: | ||
local PublicationDate = A['PublicationDate']; | local PublicationDate = A['PublicationDate']; | ||
local OrigYear = A['OrigYear']; | local OrigYear = A['OrigYear']; | ||
local Date = A['Date']; | local Date = tidy_date( A['Date'] ); | ||
local LayDate = A['LayDate']; | local LayDate = tidy_date( A['LayDate'] ); | ||
------------------------------------------------- Get title data | ------------------------------------------------- Get title data | ||
local Title = A['Title']; | local Title = A['Title']; | ||
第2,854行: | 第2,911行: | ||
local Via = A['Via']; | local Via = A['Via']; | ||
local AccessDate = A['AccessDate']; | local AccessDate = tidy_date( A['AccessDate'] ); | ||
local ArchiveDate = A['ArchiveDate']; | local ArchiveDate = tidy_date( A['ArchiveDate'] ); | ||
local Agency = A['Agency']; | local Agency = A['Agency']; | ||
local DeadURL = A['DeadURL'] | local DeadURL = A['DeadURL'] | ||
第3,190行: | 第3,247行: | ||
TitleType = set_titletype (config.CitationClass, TitleType); | TitleType = set_titletype (config.CitationClass, TitleType); | ||
if is_set(Degree) and "Thesis" == TitleType then -- special case for cite thesis | if is_set(Degree) and "Thesis" == TitleType then -- special case for cite thesis | ||
TitleType = Degree .. " | TitleType = Degree .. " 论文"; | ||
end | end | ||
end | end | ||
第3,998行: | 第4,055行: | ||
text = text .. '[[Category:' .. v ..']]'; | text = text .. '[[Category:' .. v ..']]'; | ||
end | end | ||
end | end | ||