Module:Serialize song

--

local export = {} local find = mw.ustring.find local match = mw.ustring.match local sub = mw.ustring.sub local replace = mw.ustring.gsub local split = mw.text.split local lower = mw.ustring.lower local len = mw.ustring.len local trim = mw.text.trim local resolve_redirect = require("module:resolve redirect").show

local function extract_sb(page_name) local sb = mw.title.new(page_name):getContent sb = resolve_redirect(sb)

local sb_start = find(sb, "{{song box 2\n") -- find where the song box starts in text sb = sub(sb, sb_start + len("{{song box 2\n")) -- reduce text to begin after the identifying text local sb_end = find(sb, "\n}}\n") -- find where the song box ends sb = sub(sb, 1, sb_end) -- reduce text to end before the brackets

return sb end

local function extract_sb_param(sb, sb_param, split_type) local param_start = find(sb, "|"..sb_param.." += ") -- find where parameter name starts in text sb = sub(sb, param_start, len(sb)) -- reduce text to begin with parameter name sb = replace(sb, "^|"..sb_param.." += ", "") -- remove parameter name from text sb = split(sb, "\n |") -- split by song box parameter sb = sb[1] -- keep the part before the next parameter starts

if split_type == "first line" then sb = match(sb, "^([^\n]+)") -- keep only first line elseif split_type == "all" then -- do nothing end

sb = replace(sb, "\n%* *", "\n") -- remove bullets at the beginning of a line and any spaces following them

return sb end

local function vocadb_id(page_name) local content = mw.title.new(page_name):getContent or nil if not content then return nil end content = resolve_redirect(content)

if find(content, "{{links") then content = match(content, " |vocadb = ([0-9]+)") elseif find(content, "{{cat%-singer") and find(content, "[0-9]}}") then content = match(content, "[^0-9]([0-9]+)}}") else content = '◆◆Error? (could not extract VocaDB ID from "' .. page_name .. '")◆◆' end

return content end

local function title(text, page_name) local text_split = split(text, "\n")

local original, romaji, english, other = nil, nil, nil, {}

local title_final = {}

-- if page name ends in parentheses, get romanized if find(page_name, "%)$") then		page_name = replace(page_name, "_", " ")		title_final['romanized'] = replace(page_name, "^.+ %((.+)%)$", "%1")	end

for i, title in ipairs(text_split) do		if i == 1 then title = replace(title, '^%"(.+)%"$', "%1") -- strip title of quotation marks title = replace(title, "^%'%'%'(.+)%'%'%'$", "%1") -- strip title of bolding title_final['original'] = title elseif find(title, "^English: ") then title_final['english'] = replace(title, "^English: ", "") elseif find(title, "^Romaji: ") or find(title, "^Pinyin: ") or find(title, "^Romanization: ") then -- do nothing else title = replace(title, "^[^:]+: ", "") table.insert(other, title) end end

title_final['other'] = other[1] and other or nil

return title_final end

local function links(text) text = replace(text, "\n", " ")

local links = replace(text, "{{l|(..)|([^|{}]+)|?([^|{}]*)}}", "%1 %2;") local comments = replace(text, "{{l|(..)|([^|{}]+)|?([^|{}]*)}}", "%3;")

local links_final = {}

links, comments = replace(links, "; ", ";"), replace(comments, "; ", ";") -- remove spaces from after semicolons links, comments = split(links, ";"), split(comments, ";") for i, abbr_id_pair in ipairs(links) do		if abbr_id_pair ~= "" then local abbr, id = match(abbr_id_pair, "^(..) (.+)$") local comment = comments[i] ~= "" and comments[i] or nil

local attribs = {} if abbr == "00" then attribs['full_url'] = id				attribs['comment'] = comment else attribs['full_url'] = mw.getCurrentFrame:expandTemplate{title = "external", args = {abbr.."-url",id}} attribs['full_url'] = match(attribs['full_url'], "^%[([^ ]+) .+%]$") attribs['website'] = abbr attribs['id'] = id				attribs['comment'] = comment end

table.insert(links_final, attribs) end end

return links_final end

local function singers_and_producers(text) local text_split = split(text, "\n")

local names_final = {} local supporting, roles = {}, {}

-- Examine each line for i, line in ipairs(text_split) do		-- Demarcate names and roles line = replace(line, ", ", "㍿㍿") line = replace(line, "[㍿ ]and ", "㍿㍿") line = replace(line, "㍿㍿+", "㍿㍿")

-- Record whether this line is within or not -- on one line → true, on one line → true and on the next line → false if find(line, " ") then supporting[i] = true elseif find(line, " ") then supporting[i] = false end line = replace(line, "", "")

-- Put roles aside if present if find(line, "%)$") then			line, roles[i] = match(line, "^(.+) %((.+)%)$")		end

-- Examine each name in a line local line_split = split(line, "㍿㍿") for j, name in ipairs(line_split) do			local attribs = {}

if find(name, "^%[%[") then attribs['name'], attribs['note'] = match(name, "^%[%[(.+)%]%](.*)$") attribs['vocadb'] = vocadb_id(attribs['name']) elseif find(name, "^{{vocaloid|") then attribs['name'], attribs['note'] = match(name, "^{{vocaloid|(.+)}}(.*)$") elseif find(name, "^%[http") then attribs['note'], attribs['name'] = match(name, "^%[([^ ]+) (.+)%]$") else attribs['name'] = name end

attribs['roles'] = roles[i] attribs['supporting'] = supporting[i] if attribs['note'] == '' then attribs['note'] = nil end if attribs['roles'] then attribs['roles'] = split(attribs['roles'], "㍿㍿") end

table.insert(names_final, attribs) end end

return names_final end

local function lyrics(page_name) local content = mw.title.new(page_name):getContent content = resolve_redirect(content)

local lyrics = content local lrc_start = find(lyrics, "\n==Lyrics==\n") lyrics = sub(lyrics, lrc_start + len("\n==Lyrics==\n")) local lrc_end = find(lyrics, "\n+==([^\n]+)==\n") lyrics = sub(lyrics, 1, lrc_end)

local lyrics_final = {}

if find(lyrics, " ") then local lang = match(content, "%[%[Category:([^\n]+) songs%]%]") local to_iso = { ['English']='en', ['Finnish']='fi', ['Italian']='it', ['Latin']='la', ['Spanish']='es', } lang = to_iso[lang] or lang

local attribs = { ['lang'] = lang, ['type'] = "original", ['text'] = match(lyrics, " (.+) "), }		table.insert(lyrics_final, attribs) elseif find(lyrics, " ") then -- not going to handle this lyrics_final = {} else local text_split = {} local lang_list = {}

-- Trim to lyrics lrc_start = find(lyrics, '{| class="lyrics"') + len('{| class="lyrics"') lyrics = sub(lyrics, lrc_start)

-- Determine languages; probably super inefficient since it looks at every single line text_split = split(lyrics, "\n") for i, line in ipairs(text_split) do			if find(line, "^!") then local lang = match(line, "{{(.+)}}") if find(lang, "head") then lang = replace(lang, "head|", "") end if find(lang, "en%-unofficial") then lang = "en-unofficial" end if find(lang, "%-r") then lang = "r" end if lang == "zh-s" then lang = "zh-simplified" end if lang == "zh-t" then lang = "zh-traditional" end table.insert(lang_list, lang) text_split[i] = '' end end lyrics = table.concat(text_split, "\n")

-- Convert shared to normal local function convert_shared_to_normal(text) local target_text = "\n| *{{shared}} *| *([^\n]+)\n|%-" target_text = mw.ustring.rep(target_text, #lang_list) -- capture successive {{shared}} lines together

local replacement_text = {} for i = 1, #lang_list do table.insert(replacement_text, "\n|%" .. i)			end replacement_text = table.concat(replacement_text) .. "\n|-" -- keep together, then add row seperator

-- original target, for testing:		\n\| *{{shared}} *\|([^\n]+)\n\|- -- original replacement, for testing:		\n|$1

-- example of result:		\n\| *{{shared}} *\|([^\n]+)\n\|-\n\| *{{shared}} *\|([^\n]+)\n\|- -- example of result:		\n|$1\n|$2\n|-

local text = replace(text, target_text, replacement_text) text = replace(text, "\n| *{{shared}} *| *", "") -- in case of table-final placement

return text end lyrics = convert_shared_to_normal(lyrics)

-- fmt stuff lyrics = replace(lyrics, "\n|%-[^\n]*", "") lyrics = replace(lyrics, "\n|}", "") lyrics = replace(lyrics, "\n|<[Bb][Rr] ?%/?>", mw.ustring.rep("\n", #lang_list)) lyrics = replace(lyrics, "\n|", "\n") lyrics = trim(lyrics)

-- 本番 text_split = split(lyrics, "\n") -- Prepare the tables that will hold each language for lang_i, lang in ipairs(lang_list) do			local type = nil

if lang_i == 1 then type = "original" elseif find(lang, "en.+official") then lang = "en" type = "translation" elseif lang == "r" then lang = nil type = "romanized" end

lyrics_final[lang_i] = { ['lang'] = lang, ['type'] = type, ['text'] = {}, }		end -- Fill each language table for line = 1, #text_split, #lang_list do			for lang_i, lang in ipairs(lang_list) do				table.insert(lyrics_final[lang_i]['text'], text_split[line+lang_i-1]) end end -- Convert tables of lines to blocks of text for lang_i, lang in ipairs(lang_list) do			lyrics_final[lang_i]['text'] = table.concat(lyrics_final[lang_i]['text'], "\n") end end

return lyrics_final end

function export.show(frame) local page_name = frame.args[1] local sb = extract_sb(page_name)

-- Extract each parameter and set it aside local sb_p = { ['title'] = extract_sb_param(sb, 'title', 'all'), ['singers'] = extract_sb_param(sb, 'singers', 'all'), ['producers'] = extract_sb_param(sb, 'producers', 'all'), ['links'] = extract_sb_param(sb, 'links', 'all'), }

-- Process each parameter local final_text = { ['names'] = title(sb_p['title'], page_name), ['vocalists'] = singers_and_producers(sb_p['singers']), ['producers'] = singers_and_producers(sb_p['producers']), ['media'] = links(sb_p['links']),

['lyrics'] = lyrics(page_name), ['vocadb'] = vocadb_id(page_name), }

if frame.args['lua'] then return '' .. require("module:wt/debug").dump(final_text) .. ' '	else return '' .. require("module:wt/json").toJSON(final_text) .. ' '	end end

return export