模块:Citation/CS1/Date validation:修订间差异
imported>Liangent 无编辑摘要 |
imported>Liangent |
||
| 第2行: | 第2行: | ||
local p = {} | local p = {} | ||
-- | -- returns a number according to the month in a date: 1 for January, etc. Capitalization and spelling must be correct. If not a valid month, returns 0 | ||
function get_month_number (month) | |||
returns a number according to the month in a date: 1 for January, etc. Capitalization and spelling must be correct. If not a valid month, returns 0 | |||
local long_months = {['January']=1, ['February']=2, ['March']=3, ['April']=4, ['May']=5, ['June']=6, ['July']=7, ['August']=8, ['September']=9, ['October']=10, ['November']=11, ['December']=12}; | local long_months = {['January']=1, ['February']=2, ['March']=3, ['April']=4, ['May']=5, ['June']=6, ['July']=7, ['August']=8, ['September']=9, ['October']=10, ['November']=11, ['December']=12}; | ||
local short_months = {['Jan']=1, ['Feb']=2, ['Mar']=3, ['Apr']=4, ['May']=5, ['Jun']=6, ['Jul']=7, ['Aug']=8, ['Sep']=9, ['Oct']=10, ['Nov']=11, ['Dec']=12}; | local short_months = {['Jan']=1, ['Feb']=2, ['Mar']=3, ['Apr']=4, ['May']=5, ['Jun']=6, ['Jul']=7, ['Aug']=8, ['Sep']=9, ['Oct']=10, ['Nov']=11, ['Dec']=12}; | ||
local zh_months = {['1月']=1, ['2月']=2, ['3月']=3, ['4月']=4, ['5月']=5, ['6月']=6, ['7月']=7, ['8月']=8, ['9月']=9, ['10月']=10, ['11月']=11, ['12月']=12}; | local zh_months = {['1月']=1, ['2月']=2, ['3月']=3, ['4月']=4, ['5月']=5, ['6月']=6, ['7月']=7, ['8月']=8, ['9月']=9, ['10月']=10, ['11月']=11, ['12月']=12}; | ||
local temp; | local temp; | ||
temp=long_months[month]; | temp=long_months[month]; | ||
| 第53行: | 第12行: | ||
temp=short_months[month]; | temp=short_months[month]; | ||
if temp then return temp; end -- if month is the short-form name | if temp then return temp; end -- if month is the short-form name | ||
temp=zh_months[month]; | temp=zh_months[month]; | ||
if temp then return temp; end -- if month is in Chinese | if temp then return temp; end -- if month is in Chinese | ||
return 0; -- misspelled, improper case, or not a month name | return 0; -- misspelled, improper case, or not a month name | ||
end | end | ||
p.get_month_number = get_month_number -- LOCAL | |||
-- | -- returns a number according to the sequence of seasons in a year: 1 for Winter, etc. Capitalization and spelling must be correct. If not a valid season, returns 0 | ||
function get_season_number (season) | |||
returns a number according to the sequence of seasons in a year: 1 for Winter, etc. Capitalization and spelling must be correct. If not a valid season, returns 0 | local season_list = {['Winter']=1, ['Spring']=2, ['Summer']=3, ['Fall']=4, ['Autumn']=4} | ||
local season_list = {['Winter']= | |||
local temp; | local temp; | ||
temp=season_list[season]; | temp=season_list[season]; | ||
if temp then return temp; end | if temp then return temp; end -- if season is a valid name return its number | ||
return 0; | return 0; -- misspelled, improper case, or not a season name | ||
end | end | ||
--returns true if month or season is valid (properly spelled, capitalized, abbreviated) | --returns true if month or season is valid (properly spelled, capitalized, abbreviated) | ||
function is_valid_month_or_season (month_season) | |||
if 0 == get_month_number (month_season) then -- if month text isn't one of the twelve months, might be a season | if 0 == get_month_number (month_season) then -- if month text isn't one of the twelve months, might be a season | ||
if 0 == get_season_number (month_season) then -- not a month, is it a season? | if 0 == get_season_number (month_season) then -- not a month, is it a season? | ||
| 第102行: | 第38行: | ||
-- | -- Function gets current year from the server and compares it to year from a citation parameter. Years more than one year in the future are not acceptable. | ||
function is_valid_year(year) | |||
Function gets current year from the server and compares it to year from a citation parameter. Years more than one year in the future are not acceptable. | |||
if not is_set(year_limit) then | if not is_set(year_limit) then | ||
year_limit = tonumber(os.date("%Y"))+1; | year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once (os.date("Y") no longer works?) | ||
end | end | ||
return tonumber(year) <= year_limit; | return tonumber(year) <= year_limit; -- false if year is in the future more than one year | ||
end | end | ||
| 第121行: | 第52行: | ||
Where the two calendars overlap (1582 to approximately 1923) dates are assumed to be Gregorian. | Where the two calendars overlap (1582 to approximately 1923) dates are assumed to be Gregorian. | ||
]] | ]] | ||
function is_valid_date (year, month, day) | |||
local days_in_month = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; | local days_in_month = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; | ||
local month_length; | local month_length; | ||
| 第140行: | 第71行: | ||
end | end | ||
else | else | ||
month_length=days_in_month[ | month_length=days_in_month[month]; | ||
end | end | ||
| 第149行: | 第80行: | ||
end | end | ||
--[[ | --[[ | ||
Check a pair of months or seasons to see if both are valid members of a month or season pair. | Check a pair of months or seasons to see if both are valid members of a month or season pair. | ||
Month pairs are expected to be left to right, earliest to latest in time. | Month pairs are expected to be left to right, earliest to latest in time. Similarly, seasons are also left to right, earliest to latest in time. There is | ||
an oddity with seasons. Winter is assigned a value of 1, spring 2, ..., fall and autumn 4. Because winter can follow fall/autumn at the end of a calender year, a special test | |||
Similarly, seasons are also left to right, earliest to latest in time. There is an oddity with seasons | is made to see if |date=Fall-Winter yyyy (4-1) is the date. | ||
fall and autumn 4. Because winter can follow fall/autumn at the end of a calender year, a special test is made to see if |date=Fall-Winter yyyy (4-1) is the date. | |||
]] | ]] | ||
function is_valid_month_season_range(range_start, range_end) | |||
local range_start_number = get_month_number (range_start); | local range_start_number = get_month_number (range_start); | ||
if 0 == range_start_number then | if 0 == range_start_number then -- is this a month range? | ||
local range_start_number = get_season_number (range_start); | local range_start_number = get_season_number (range_start); -- not a month; is it a season? get start season number | ||
local range_end_number = get_season_number (range_end); | local range_end_number = get_season_number (range_end); -- get end season number | ||
if 0 ~= range_start_number then | if 0 ~= range_start_number then -- is start of range a season? | ||
if range_start_number < range_end_number then | if range_start_number < range_end_number then -- range_start is a season | ||
return true; | return true; -- return true when range_end is also a season and follows start season; else false | ||
end | end | ||
if | if 4 == range_start_number and 1 == range_end_number then -- special case when range is Fall-Winter or Autumn-Winter | ||
return true; | return true; | ||
end | end | ||
| 第202行: | 第106行: | ||
end | end | ||
local range_end_number = get_month_number (range_end); | local range_end_number = get_month_number (range_end); -- get end month number | ||
if range_start_number < range_end_number then | if range_start_number < range_end_number then -- range_start is a month; does range_start precede range_end? | ||
return true; -- if yes, return true | |||
end | end | ||
return false; | return false; -- range_start month number is greater than or equal to range end number; or range end isn't a month | ||
end | end | ||
--[[ | |||
--[[ | |||
Check date format to see that it is one of the formats approved by WP:DATESNO or WP:DATERANGE. Exception: only allowed range separator is endash. | Check date format to see that it is one of the formats approved by WP:DATESNO or WP:DATERANGE. Exception: only allowed range separator is endash. | ||
Additionally, check the date to see that it is a real date: no 31 in 30-day months; no 29 February when not a leap year. Months, both long-form and three | Additionally, check the date to see that it is a real date: no 31 in 30-day months; no 29 February when not a leap year. Months, both long-form and three | ||
character abbreviations, and seasons must be spelled correctly. Future years beyond next year are not allowed. | character abbreviations, and seasons must be spelled correctly. Future years beyond next year are not allowed. | ||
If the date fails the | If the date fails the fomat tests, this function returns false and does not return values for anchor_year and COinS_date. When this happens, the date parameter is | ||
used in the COinS metadata and the CITEREF identifier gets its year from the year parameter if present otherwise CITEREF does not get a date value. | used in the COinS metadata and the CITEREF identifier gets its year from the year parameter if present otherwise CITEREF does not get a date value. | ||
Inputs: | Inputs: | ||
date_string - date string from date-holding parameters (date, year, accessdate, embargo, archivedate, etc | date_string - date string from date-holding parameters (date, year, accessdate, embargo, archivedate, etc) | ||
Returns: | Returns: | ||
| 第313行: | 第128行: | ||
true, anchor_year, COinS_date | true, anchor_year, COinS_date | ||
anchor_year can be used in CITEREF anchors | anchor_year can be used in CITEREF anchors | ||
COinS_date is date_string without anchor_year disambiguator if any | COinS_date is date_string without anchor_year disambiguator if any | ||
]] | ]] | ||
function check_date (date_string) | |||
local year; -- assume that year2, months, and days are not used; | local year; -- assume that year2, months, and days are not used; | ||
local year2=0; -- second year in a year range | local year2=0; -- second year in a year range | ||
| 第328行: | 第142行: | ||
if date_string:match("^%d%d%d%d%-%d%d%-%d%d$") then -- year-initial numerical year month day format | if date_string:match("^%d%d%d%d%-%d%d%-%d%d$") then -- year-initial numerical year month day format | ||
year, month, day=string.match(date_string, "(%d%d%d%d)%-(%d%d)%-(%d%d)"); | year, month, day=string.match(date_string, "(%d%d%d%d)%-(%d%d)%-(%d%d)"); | ||
if 12 < | month=tonumber(month); | ||
if 12 < month or 1 > month or 1583 > tonumber(year) then return false; end -- month number not valid or not Gregorian calendar | |||
anchor_year = year; | anchor_year = year; | ||
elseif date_string:match("^%a+ +[1-9]%d?, +[1-9]%d%d%d%a?$") then -- month-initial: month day, year | elseif date_string:match("^%a+ +[1-9]%d?, +[1-9]%d%d%d?%a?$") then -- month-initial: month day, year | ||
month, day, anchor_year, year=string.match(date_string, "(%a+)%s*(%d%d?),%s*((%d%d%d%d)%a?)"); | month, day, anchor_year, year=string.match(date_string, "(%a+)%s*(%d%d?),%s*((%d%d%d%d)%a?)"); | ||
month = get_month_number (month); | month = get_month_number (month); | ||
| 第341行: | 第156行: | ||
month = get_month_number (month); | month = get_month_number (month); | ||
if 0 == month then return false; end -- return false if month text isn't one of the twelve months | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
elseif date_string:match("^[1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day-initial: day month year | elseif date_string:match("^[1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day-initial: day month year | ||
| 第354行: | 第167行: | ||
month = get_month_number (month); | month = get_month_number (month); | ||
if 0 == month then return false; end -- return false if month text isn't one of the twelve months | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
elseif date_string:match("^[1-9]%d? +%a+ – [1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day initial month-day-range: day month - day month year; uses spaced endash | elseif date_string:match("^[1-9]%d? +%a+ – [1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day initial month-day-range: day month - day month year; uses spaced endash | ||
day, month, day2, month2, anchor_year, year=date_string:match("(%d%d?) +(%a+) – (%d%d?) +(%a+) +((%d%d%d%d)%a?)"); | day, month, day2, month2, anchor_year, year=date_string:match("(%d%d?) +(%a+) – (%d%d?) +(%a+) +((%d%d%d%d)%a?)"); | ||
if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end -- date range order is left to right: earlier to later; | if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end -- date range order is left to right: earlier to later; | ||
month = get_month_number (month); | month = get_month_number (month); | ||
month2 = get_month_number (month2); | month2 = get_month_number (month2); | ||
elseif date_string:match("^%a+ +[1-9]%d? – %a+ +[1-9]%d?, +[1-9]%d%d%d | elseif date_string:match("^%a+ +[1-9]%d? – %a+ +[1-9]%d?, +[1-9]%d%d%d%a?$") then -- month initial month-day-range: month day – month day, year; uses spaced endash | ||
month, day, month2, day2, anchor_year, year=date_string:match("(%a+) +(%d%d?) – (%a+) +(%d%d?), +((%d%d%d%d)%a?)"); | month, day, month2, day2, anchor_year, year=date_string:match("(%a+) +(%d%d?) – (%a+) +(%d%d?), +((%d%d%d%d)%a?)"); | ||
if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end | if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end | ||
month = get_month_number (month); | month = get_month_number (month); | ||
month2 = get_month_number (month2); | month2 = get_month_number (month2); | ||
elseif date_string:match("^[1-9]%d? +%a+ +[1-9]%d%d%d – [1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day initial month-day-year-range: day month year - day month year; uses spaced endash | elseif date_string:match("^[1-9]%d? +%a+ +[1-9]%d%d%d – [1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day initial month-day-year-range: day month year - day month year; uses spaced endash | ||
day, month, year, day2, month2, anchor_year, year2=date_string:match("(%d%d?) +(%a+) +(%d%d%d%d?) – (%d%d?) +(%a+) +((%d%d%d%d?)%a?)"); | day, month, year, day2, month2, anchor_year, year2=date_string:match("(%d%d?) +(%a+) +(%d%d%d%d?) – (%d%d?) +(%a+) +((%d%d%d%d?)%a?)"); | ||
if tonumber(year2) <= tonumber(year) then return false; end | if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2 | if not is_valid_year(year2) then return false; end -- year2 no more than one year in the future | ||
month = get_month_number (month); | month = get_month_number (month); | ||
month2 = get_month_number (month2); | month2 = get_month_number (month2); | ||
elseif date_string:match("^%a+ +[1-9]%d?, +[1-9]%d%d%d – %a+ +[1-9]%d?, +[1-9]%d%d%d%a?$") then -- month initial month-day-year-range: month day, year – month day, year; uses spaced endash | elseif date_string:match("^%a+ +[1-9]%d?, +[1-9]%d%d%d – %a+ +[1-9]%d?, +[1-9]%d%d%d%a?$") then -- month initial month-day-year-range: month day, year – month day, year; uses spaced endash | ||
month, day, year, month2, day2, anchor_year, year2=date_string:match("(%a+) +(%d%d?), +(%d%d%d%d) – (%a+) +(%d%d?), +((%d%d%d%d)%a?)"); | month, day, year, month2, day2, anchor_year, year2=date_string:match("(%a+) +(%d%d?), +(%d%d%d%d) – (%a+) +(%d%d?), +((%d%d%d%d)%a?)"); | ||
if tonumber(year2) <= tonumber(year) then return false; end | if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2 | if not is_valid_year(year2) then return false; end -- year2 no more than one year in the future | ||
month = get_month_number (month); | month = get_month_number (month); | ||
month2 = get_month_number (month2); | month2 = get_month_number (month2); | ||
elseif date_string:match("^ | elseif date_string:match("^Winter +[1-9]%d%d%d–[1-9]%d%d%d%a?$") then -- special case Winter year-year; year separated with unspaced endash | ||
year, anchor_year, year2=date_string:match("Winter +(%d%d%d%d)–((%d%d%d%d)%a?)"); | |||
anchor_year=year..'–'..anchor_year; -- assemble anchor_year from both years | anchor_year=year..'–'..anchor_year; -- assemble anchor_year from both years | ||
if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
elseif date_string:match("^%a+ +[1-9]%d%d%d% – %a+ +[1-9]%d%d%d%a?$") then -- month/season year - month/season year; separated by spaced endash | elseif date_string:match("^%a+ +[1-9]%d%d%d% – %a+ +[1-9]%d%d%d%a?$") then -- month/season year - month/season year; separated by spaced endash | ||
| 第408行: | 第205行: | ||
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
if 0 ~= get_month_number(month) and 0 ~= get_month_number(month2) | if not((0 ~= get_month_number(month) and 0 ~= get_month_number(month2)) or -- both must be month year or season year, not mixed | ||
(0 ~= get_season_number(month) and 0 ~= get_season_number(month2))) then return false; end | |||
elseif date_string:match ("^%a+–%a+ +[1-9]%d%d%d%a?$") then -- month/season range year; months separated by endash | elseif date_string:match ("^%a+–%a+ +[1-9]%d%d%d%a?$") then -- month/season range year; months separated by endash | ||
month, month2, anchor_year, year=date_string:match ("(%a+)–(%a+)%s*((%d%d%d%d)%a?)"); | month, month2, anchor_year, year=date_string:match ("(%a+)–(%a+)%s*((%d%d%d%d)%a?)"); | ||
if (not is_valid_month_season_range(month, month2)) or (not is_valid_year(year)) then return false | if (not is_valid_month_season_range(month, month2)) or (not is_valid_year(year)) then | ||
return false; | |||
end | end | ||
elseif date_string:match("^%a+ +%d%d%d%d%a?$") then -- month/season | elseif date_string:match("^%a+ +%d%d%d%d%a?$") then -- month/season year | ||
month, anchor_year, year=date_string:match("(%a+)%s*((%d%d%d%d)%a?)"); | month, anchor_year, year=date_string:match("(%a+)%s*((%d%d%d%d)%a?)"); | ||
if not is_valid_year(year) then return false; end | if not is_valid_year(year) then return false; end | ||
if not is_valid_month_or_season | if not is_valid_month_or_season (month) then return false; end | ||
elseif date_string:match("^[1-9]%d%d%d?–[1-9]%d%d%d?%a?$") then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | elseif date_string:match("^[1-9]%d%d%d?–[1-9]%d%d%d?%a?$") then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | ||
| 第462行: | 第239行: | ||
return false; | return false; | ||
end | end | ||
anchor_year = year; | |||
-- LOCAL: do not use mw.ustring: it allows full-width characters for %d. | -- LOCAL: do not use mw.ustring: it allows full-width characters for %d. | ||
| 第483行: | 第261行: | ||
anchor_year = year; | anchor_year = year; | ||
elseif | elseif mw.ustring.match(date_string, "^%d%d%d%d%-%d%d$") then -- numerical year month format | ||
year, month= | year, month=mw.ustring.match(date_string, "(%d%d%d%d)%-(%d%d)"); | ||
month=tonumber(month); | month=tonumber(month); | ||
if 12 < month or 1 > month or 1583 > tonumber(year) then return false; end -- month number not valid or not Gregorian calendar | if 12 < month or 1 > month or 1583 > tonumber(year) then return false; end -- month number not valid or not Gregorian calendar | ||
| 第513行: | 第291行: | ||
if false == result then return false; end | if false == result then return false; end | ||
-- if here, then date_string is valid; get coins_date from date_string (leave CITEREF disambiguator) ... | -- if here, then date_string is valid; get coins_date from date_string (leave CITEREF disambiguator) ... | ||
coins_date=mw.ustring.match(date_string, "^(.+%d)%a?$"); -- last character of valid disambiguatable date is always a digit -- LOCAL | |||
-- coins_date= mw.ustring.gsub(coins_date, "–", "-" ); -- ... and replace any ndash with a hyphen | coins_date= mw.ustring.gsub(coins_date, "–", "-" ); -- ... and replace any ndash with a hyphen | ||
return true, anchor_year, coins_date; -- format is good and date string represents a real date | |||
end | end | ||
--[[ | --[[ | ||
Cycle the date-holding parameters in passed table date_parameters_list through check_date() to check compliance with MOS:DATE. For all valid dates, check_date() returns | Cycle the date-holding parameters in passed table date_parameters_list through check_date() to check compliance with MOS:DATE. For all valid dates, check_date() returns | ||
true. The |date= parameter test is unique, it is the only date holding parameter from which values for anchor_year (used in CITEREF identifiers) and COinS_date (used in | true. The |date= parameter test is unique, it is the only date holding parameter from which values for anchor_year (used in CITEREF identifiers) and COinS_date (used in | ||
| 第532行: | 第304行: | ||
Unlike most error messages created in this module, only one error message is created by this function. Because all of the date holding parameters are processed serially, | Unlike most error messages created in this module, only one error message is created by this function. Because all of the date holding parameters are processed serially, | ||
a single error message is created as the dates are tested. | a single error message is created as the dates are tested. | ||
]] | ]] | ||
function p.dates(date_parameters_list) | |||
local anchor_year; -- will return as nil if the date being tested is not |date= | local anchor_year; -- will return as nil if the date being tested is not |date= | ||
local COinS_date; -- will return as nil if the date being tested is not |date= | local COinS_date; -- will return as nil if the date being tested is not |date= | ||
local error_message = "" | local error_message =""; | ||
local good_date=false; | |||
local good_date = false; | |||
for k, v in pairs(date_parameters_list) do -- for each date-holding parameter in the list | for k, v in pairs(date_parameters_list) do -- for each date-holding parameter in the list | ||
| 第553行: | 第323行: | ||
end | end | ||
elseif 'date'==k then -- if the parameter is |date= | elseif 'date'==k then -- if the parameter is |date= | ||
if v:match(" | if v:match("n%.d%.%a?") then -- if |date=n.d. with or without a CITEREF disambiguator | ||
good_date, anchor_year, COinS_date = true, v:match("((n%.d%.)%a?)"); --"n.d."; no error when date parameter is set to no date | good_date, anchor_year, COinS_date = true, v:match("((n%.d%.)%a?)"); --"n.d."; no error when date parameter is set to no date | ||
elseif v:match(" | elseif v:match("nd%a?$") then -- if |date=nd with or without a CITEREF disambiguator | ||
good_date, anchor_year, COinS_date = true, v:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date | good_date, anchor_year, COinS_date = true, v:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date | ||
else | else | ||
good_date, anchor_year, COinS_date = check_date (v | good_date, anchor_year, COinS_date = check_date (v); -- go test the date | ||
end | end | ||
else -- any other date-holding parameter | else -- any other date-holding parameter | ||
| 第576行: | 第341行: | ||
end | end | ||
end | end | ||
return anchor_year, COinS_date, error_message; -- and done | |||
end | end | ||
return | return p; | ||