Различия

Здесь показаны различия между двумя версиями данной страницы.

--- wiki:moinmoin2dokuwiki [2009/07/22 02:11]
+++ wiki:moinmoin2dokuwiki [2009/12/25 23:53] (текущий)
@@ Строка 15: / Строка 15: @@
 # ----------------------------------------------------------------------------------------------- #
-# Hash for smileys replacement (actual for ubuntu.com wiki)
+# Hash for smileys replacement
 %smileys = (
 	#moin		doku
@@ Строка 60: / Строка 60: @@
 }
-# Link
+# Links
-sub link_replacement {
+sub linkReplacement {
-	die "Oops!\n" if @_ != 5;
+	die "Oops!\n" unless @_ == 5;
 	my ($namespace,$br,$target,$text,$params) = @_;
 	# Attachments links
@@ Строка 81: / Строка 81: @@
 }
-# Block
+# Blocks
-sub block_replacement {
+sub blockReplacement {
 	for (my $i = 0; $i < @_ && $_[$i] =~ /^\$*$/; $i++ ) { shift @_ }		# delete empty lines
 	for (my $i = -1; -$i <= @_ && $_[$i] =~ /^\$*$/; $i-- ) { pop @_ }		# delete empty lines
@@ Строка 96: / Строка 96: @@
 }
-# Table
+# Tables
-sub table_replacement {
+sub tableReplacement {
 	@table = @_;
 	for (@table) {
@@ Строка 107: / Строка 107: @@
 			"||  $2  " . '|' x (length($1) - 2)
 		/ge;
-		# Spanning
+		# Span
 			# TODO
 		# Aligment
@@ Строка 150: / Строка 150: @@
 		} elsif ($is_table) {
 			$is_table = 0;
-			@table = table_replacement(@table);
+			@table = tableReplacement(@table);
 			print OUTFILE @table;
 			@table = ();
@@ Строка 164: / Строка 164: @@
 			next MOINSCAN unless ($+{text} && $+{text} !~ /^\s+$/);
 			$_ = "$+{text}\n";
-		} elsif (/(?<data>.+?)}{$block_sep_len}\s*(?<text>.*)\n/ && $is_block) {
+		} elsif (/(?<data>.+?)?}{$block_sep_len}\s*(?<text>.*)\n/ && $is_block) {
 			$is_block = 0;
 			push @block, "$+{data}\n" if $+{data};
-			@block = block_replacement(@block);
+			@block = blockReplacement(@block);
 			print OUTFILE @block;
 			@block = ();
@@ Строка 204: / Строка 204: @@
 			(??{ &inv($+{br}) x 2 })								# closing brackets
 		/
-			&link_replacement($namespace,$+{br},$+{target},$+{text},$+{params})
+			&linkReplacement($namespace,$+{br},$+{target},$+{text},$+{params})
 		/gxe;
 		# CamelCase links
@@ Строка 228: / Строка 228: @@
 		s{~\+(.+?)\+~}<$1>g;										# larger text
 		# Lists and intends
-		if (/^(?<intend>\s+)\*\s*(?<value>\S.*)\n/) {							# dotted list
+		if (/^(?<intend>\s+)\*\s+(?<value>\S.*)\n/) {							# dotted list
 			if ($is_list) { print OUTFILE "\n" }
 			print OUTFILE " "x(2*length($+{intend})),"* ";
@@ Строка 234: / Строка 234: @@
 			$intend = length $+{intend};
 			$is_list = 1;
-		} elsif (/^(?<intend>\s+)[1aAiI]\.(#\d+)?\s*(?<value>\S.*)\n/) {		# numeric list
+		} elsif (/^(?<intend>\s+)[1aAiI]\.(#\d+)?\s+(?<value>\S.*)\n/) {		# numeric list
 			if ($is_list) { print OUTFILE "\n" }
 			print OUTFILE " "x(2*length($+{intend})),"- ";
@@ Строка 288: / Строка 288: @@
 	}
 	if ($is_table) {
-		@table = table_replacement(@table);
+		@table = tableReplacement(@table);
 		print OUTFILE @table;
 	}
 	if ($is_block) {
-		@block = block_replacement(@block);
+		@block = blockReplacement(@block);
 		print OUTFILE @block;
 	}
@@ Строка 315: / Строка 315: @@
   * http://moinmo.in/HelpOnEditing - проверить, всё ли учли
 </note>
+Абсолютно то же самое, только переписанное на Python, вдруг кому пригодится. Те же комментарии, таблицы обрабатываются не до конца и нужно не забыть заменить <!/code> на %%</code>%%:
+<code python>
+#!/usr/bin/env python
+# coding=utf8
+# Author: Vadim Nevorotin (malamut@ubuntu.ru)
+# License: GPLv3
+# Rus:
+# Скрипт конвертации разметки MoinMoin в разметку DokuWiki
+# Использование: moin2doku.py moin_file doku_file [namespace]
+# namespace - это пространство имён для всех ссылок на вложения (картинки, например)
+# Скрипт не до конца обрабатывает таблицы, не учитывается склейка ячеек и выравнивание
+# Так же не добавлена конвертация новой возможности moin: {{{#!wiki caution }}} и прочих выделений (http://moinmo.in/HelpOnAdmonitions)
+# Всё остальное с http://moinmo.in/HelpOnEditing на 11.2009 конвертируется в полном объёме
+# Eng:
+# Usage: ./moin2doku.py moin_file doku_file [namespace]
+# namespace - for all links to images ({{namespace:image.png}})
+import sys
+import re
+from os.path import isfile
+# Hash for smileys replacement
+smileys = {
+	#moin		doku
+	'X-('	:	':-X',
+	':D'	:	':-D',
+	'<:('	:	':-?',
+	':o'	:	':-O',
+	':('	:	':-(',
+	':)'	:	':-)',
+	'B)'	:	'8-)',
+	':))'	:	':-P',
+	';)'	:	';-)',
+	'/!\\'	:	':!:',
+	'<!>'	:	':!:',
+	'(!)'	:	':!:',
+	':-?'	:	':-P',
+	':\\'	:	':-\\',
+	'>:>'	:	'^_^',
+	'|)'	:	':-|',
+	':-('	:	':-(',
+	':-)'	:	':-)',
+	'B-)'	:	'8-)',
+	':-))'	:	':-P',
+	';-)'	:	';-)',
+	'|-)'	:	':-|',
+	'(./)'	:	'LOL',
+	'{OK}'	:	':!:',
+	'{X}'	:	':!:',
+	'{i}'	:	':!:',
+	'{1}'	:	'<1>',
+	'{2}'	:	'<2>',
+	'{3}'	:	'<3>',
+	'{*}'	:	'<ubu>',
+	'{o}'	:	'<circ>',
+}
+# Convert functions
+def inv(br):
+	if br == '{': return '}'
+	if br == '[': return ']'
+	return br
+def linkReplacement(namespace,br,target,text):
+	if not target:
+		print >> sys.stderr, "Fatal error"
+		sys.exit(1)
+	m_att = re.match('(?:(?:attachment|drawing):)(?P<att>.+)$',target)
+	m_inter = re.match('(.+?):(?!//)(.+)$',target)
+	# Attachments links
+	if m_att:
+		if m_att.group('att'):
+			att = re.sub('(.+)/','',m_att.group('att'))	# leave only filename, without namespaces
+		else:
+			att = m_att.groups('att')
+		if text:
+			return '{{'+namespace+':'+att+'|'+text+'}}'
+		else:
+			return '{{'+namespace+':'+att+'}}'
+	# InterWiki links
+	if m_inter:
+		if text:
+			return '[['+m_inter.group(1)+'>'+m_inter.group(2)+'|'+text+']]'
+		else:
+			return '[['+m_inter.group(1)+'>'+m_inter.group(2)+']]'
+	# All other links simply returned in doku format
+	if not re.search('://', target):		# if target not an internet link replace all / to : (for namespaces)
+		target = re.sub('/',':',target)
+	if text:
+		return br + br + target + '|' + text + inv(br) * 2
+	else:
+		return br + br + target + inv(br) * 2
+def tableReplacement(table):
+	for i in range(len(table)):
+		line = table[i]
+		line = re.sub('\|\|(?:\s*(<.+?>))?\s*(.+?)\s*(?=\|\|)', lambda m: '||' + m.group(1) + m.group(2) if m.group(1) else '||' + m.group(2), line)
+		line = re.sub('((?:\|\|){2,})(.+?)(?=\|\|)', lambda m: '||  '+m.group(2)+'  ' + '|' * (len(m.group(1)) - 2), line)
+		line = re.sub('\|\|(\s*)(?:<.+?>\s*?)+(.*?)(?=\|\|)','||\\1\\2', line)
+		line = re.sub('\|\|','|', line)
+		table[i] = line
+	size = len(table)
+	i = 0
+	while i < size:
+		if re.match('\|+$',table[i]):
+			table.pop(i)
+			size=size-1
+		else:
+			i=i+1
+	return table
+def blockReplacement(block):
+	if block and not block[0].strip():
+		block.pop(0)
+	if not block[-1].strip():
+		block.pop()
+	if not block:
+		block.append('<code>\n')
+	elif re.match('\s*#!python',block[0]):
+		block.pop(0)
+		block.insert(0,'<code python>\n')
+	elif re.match('\s*#!cplusplus',block[0]):
+		block.pop(0)
+		block.insert(0,'<code cpp>\n')
+	elif re.match('\s*#!java',block[0]):
+		block.pop(0)
+		block.insert(0,'<code java>\n')
+	elif re.match('\s*#!pascal',block[0]):
+		block.pop(0)
+		block.insert(0,'<code pascal>\n')
+	elif re.match('\s*#!.+',block[0]):
+		block.pop(0)
+		block.insert(0,'<code>\n')
+	else:
+		block.insert(0,'<code>\n')
+	block.append('<!/code>')
+	return block
+def ConvertMoinToDoku(moin_file,doku_file,namespace):
+	try:
+		infile = open(moin_file,"r")
+		outfile = open(doku_file,"w")
+	except:
+		print >> sys.stderr, "Open file error!"
+		sys.exit(1)
+	content=infile.readlines()
+	infile.close()
+	# Conversion
+	st = '~@!'												# For headings conversion
+	camel = '(?<![\\[!:])\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b'	# CamelCase regexp
+	conversion = (
+		# Processing instructions
+		('^##(.*)\n',''),
+		('^#(pragma|format|redirect|refresh|language)(.*)\n',''),
+		('^#deprecated(.*)\n','<note warning>This page is deprecated<note>\n'),
+		# Other elements
+		('(<<BR>>)|(\[\[BR]])','\\\\ '),
+		('^\s*-{4,}\s*$','----\n'),
+		('^(.*)/\*(.+?)\*/(.*)\n','\\1\n>\\2\n\\3\n'),		# inline comments
+		# Macros and another foolish - simply remove
+		('<<.+?>>',''),
+		# Headings
+		('^\s*=====\s*(.+?)\s*=====\s*$',st*2 + ' \\1 ' + st*2 + '\n'),
+		('^\s*====\s*(.+?)\s*====\s*$',st*3 + ' \\1 ' + st*3 + '\n'),
+		('^\s*===\s*(.+?)\s*===\s*$',st*4 + ' \\1 ' + st*4 + '\n'),
+		('^\s*==\s*(.+?)\s*==\s*$',st*5 + ' \\1 ' + st*5 + '\n'),
+		('^\s*=\s*(.+?)\s*=\s*$',st*6 + ' \\1 ' + st*6 + '\n'),
+		(st,'='),
+		# CamelCase links
+		('('+camel+')/('+camel+')','[[\\1:\\2]]'),
+		('\.\./('+camel+')','[[\\1]]'),
+		('/('+camel+')','[['+namespace+':\\1]]'),
+		('('+camel+')','[[\\1]]'),
+		# Avoid automating linking - simply remove
+		("''''''",''),
+		('``',''),
+		('!([A-Z]\w+)','\\1'),
+		# Text formatting
+		("'''''(.+?)'''''",'**//\\1//**'),
+		("'''(.+?)'''",'**\\1**'),
+		("''(.+?)''",'//\\1//'),
+		('`(.+?)`',"''\\1''"),
+		(',,(.+?),,','<sub>\\1</sub>'),
+		('\^(.+?)\^','<sup>\\1</sup>'),
+		('--\((.+?)\)--','<del>\\1</del>'),
+		# Unsupported text formating - simply remove
+		('~-(.+?)-~','\\1'),
+		('~\+(.+?)\+~','\\1')
+	)
+	# Values for conversion
+	intend = 0
+	is_table = 0
+	is_block = 0
+	block_sep_len = 3
+	is_list = 0
+	table = []
+	block = []
+	for i in range(len(content)):
+		line = content[i]
+		# First of all remove all end whitespaces except \n
+		line = re.sub('\s*$','\n',line)
+		# Tables
+		m = re.search('\s*(?P<line>\|\|.+\|\|\n)',line)
+		if m and not is_block:
+			is_table = 1
+			is_list = 0
+			line = m.group('line')
+		elif is_table:
+			is_table = 0
+			table = tableReplacement(table)
+			outfile.writelines(table)
+			table = []
+		# Code blocks parser
+		if not is_block:
+			line = re.sub('\{{3}(.+?)}{3}','%%\\1%%',line)
+		m_open = re.search('(?P<text>.*?)\s*(?P<sep>\{{3,})(?P<mod>.*\n)',line)
+		m_close = re.search('(?P<data>.+?)?}{'+str(block_sep_len)+'}\s*(?P<text>.*)\n',line)
+		if m_open and not is_block and not is_table:
+			block_sep_len = len(m_open.group('sep'))
+			is_block = 1
+			if is_list:
+				outfile.write('\n')
+			is_list = 0
+			block = [m_open.group('mod')]
+			if not ( m_open.group('text') and not re.match('\s+$',m_open.group('text')) ):
+				continue
+			line = m_open.group('text') + '\n'
+		elif m_close and is_block:
+			is_block = 0
+			if m_close.group('data'):
+				block.append(m_close.group('data') + '\n')
+			block = blockReplacement(block)
+			outfile.writelines(block)
+			block = []
+			if not m_close.group('text'):
+				continue
+			line = m_close.group('text') + '\n'
+		elif is_block:
+			block.append(line)
+			continue
+		# Links
+		# Unfortunately, Python is not a Perl, so we can't convert links using only one regexp...
+		link = ''
+		s = ''
+		br = ''
+		is_link = 0
+		last = len(line)-1
+		for i in range(len(line)):
+			if not is_link and i != last and line[i] == '[' and line[i+1] == '[':
+				is_link = 1
+				link = br = '['
+			elif not is_link and i != last and line[i] == '{' and line[i+1] == '{':
+				is_link = 1
+				link = br = '{'
+			elif is_link and line[i] == inv(br) and line[i-1] == inv(br):
+				is_link = 0
+				link = link + line[i]
+				m = re.match('(?P<target>[^\|]+)(?:\|(?P<text>[^\|]+))?',link[2:-2])
+				link = linkReplacement(namespace,link[0],m.group('target'),m.group('text'))
+				s = s + link
+			elif is_link:
+				link = link + line[i]
+			else:
+				s = s + line[i]
+		line = s
+		# Now convert syntax
+		for r in conversion:
+			reg=re.compile(r[0],re.I)
+			line=reg.sub(r[1],line)
+		# Lists and intends
+		m_dotted = re.match('(?P<intend>\s+)\*\s+(?P<value>\S.*)\n',line)
+		m_numeric = re.match('(?P<intend>\s+)[1aAiI]\.(#\d+)?\s+(?P<value>\S.*)\n',line)
+		m_defs = re.match('(?P<intend>\s+)(?P<key>\S[^\[\{]+)::\s+(?P<value>\S.*)\n',line)
+		m_def = re.match('(?P<intend>\s+)(?P<key>\S[^\[\{]+)::\s*',line)
+		m_desc = re.match('(?P<intend>\s+)::\s*(?P<value>\S.*)\n',line)
+		m_white = re.match('(?P<intend>\s+)(\.\s+)?\S',line)
+		if m_dotted:
+			if is_list:
+				outfile.write('\n')
+			outfile.write(" "*(2*len(m_dotted.group('intend')))+"* ")
+			line = m_dotted.group('value')
+			intend = len(m_dotted.group('intend'))
+			is_list = 1
+		elif m_numeric:
+			if is_list:
+				outfile.write('\n')
+			outfile.write(" "*(2*len(m_numeric.group('intend')))+"- ")
+			line = m_numeric.group('value')
+			intend = len(m_numeric.group('intend'))
+			is_list = 1
+		elif m_defs:
+			if is_list:
+				outfile.write('\n')
+			outfile.write('**'+m_defs.group('key')+'**\n')
+			outfile.write('  * ')
+			line = m_defs.group('value')
+			intend = len(m_defs.group('intend'))
+			is_list = 1
+		elif m_def:
+			if is_list:
+				outfile.write('\n')
+			outfile.write('**'+m_def.group('key')+'**\n')
+			line = ''
+			is_list = 0
+		elif m_desc:
+			if is_list:
+				outfile.write('\n')
+			outfile.write('  * ')
+			line = m_desc.group('value')
+			intend = len(m_desc.group('intend'))
+			is_list = 1
+		elif m_white:
+			curr_intend = len(m_white.groups('intend'))
+			if curr_intend != intend and is_list:
+				outfile.write('\n\n')
+				is_list = 0
+			elif curr_intend != intend and not is_list:
+				outfile.write('\n')
+			elif curr_intend == intend and is_list:
+				outfile.write(' ')
+				line = line.rstrip()
+			intend = curr_intend
+			line = re.sub('^\s*(\.\s*)?(?=\S)','',line)
+		else:
+			if is_list and not re.match('\n',line):
+				outfile.write('\n\n')
+			elif intend:
+				outfile.write('\n')
+			intend = 0
+			is_list = 0
+		# Smileys $)
+		for smile in smileys.keys():
+			line = re.sub('(\s|\A)'+re.escape(smile)+'(\s)',' '+smileys[smile]+'\\2',line)
+		# Finally...
+		if is_block and is_list:
+			line += '\n'
+		if not is_table:
+			outfile.write(line)
+		else:
+			table.append(line)
+	# If we haven't close some things
+	if is_table:
+		table = tableReplacement(table)
+		outfile.writelines(table)
+	if is_block:
+		block = blockReplacement(block)
+		outfile.writelines(block)
+	outfile.close()
+# Main script...
+def PrintHelp():
+	print """Usage: moin2doku moin_file doku_file [namespace]
+Convert MoinMoin page to Doku, using namespace for pictures"""
+	sys.exit(0)
+def PrintParameterError():
+	print >> sys.stderr, "Incorrect parameters! Use --help to read more information."
+	sys.exit(1)
+def CheckParameters(moin_file,doku_file):
+	if not isfile(moin_file):
+		print >> sys.stderr, "Moin file doesn't exists!"
+		sys.exit(1)
+	if not isfile(doku_file):
+		print >> sys.stderr, "Doku file doesn't exists!"
+		sys.exit(1)
+if __name__ == '__main__':
+	if len(sys.argv) > 1:
+		if sys.argv[1] in ('-h', '--help'):
+			PrintHelp()
+		elif len(sys.argv) > 2:
+			moin_file = sys.argv[1]
+			doku_file = sys.argv[2]
+			if len(sys.argv) > 3:
+				namespace = sys.argv[3]
+			else:
+				namespace = '.'
+		else:
+			PrintParameterError()
+	else:
+		PrintParameterError()
+	CheckParameters(moin_file,doku_file)
+	ConvertMoinToDoku(moin_file,doku_file,namespace)
+</code>
+===== Улучшенная версия =====
+Я ж ленивый, поэтому я написал 'обёртку' к представленной выше функции конвертации так, что бы её было удобно вызывать с различными аргументами и, кроме всего прочего, этот вариант умеет скачивать страницы и вложения из интернета. Функция конвертации здесь та же, просто она сопровождается удобным способом её вызывать.
+Описание см. в POD в коде. Не забудьте заменить <!/code> на </code>.
+<code perl>
+#!/usr/bin/perl -w
+use 5.010;					# Для всяких выкрутасов, очень удобно
+use File::Path;				# Для управления каталогами
+use File::Spec::Functions;	# Для склейки путей
+use Getopt::Long;			# Аргументики, разбираем аргументики
+use Pod::Usage;				# Для документации (всё равно пашет через пень-колоду)
+=begin comment
+Сначала идёт основная функция конвертации одного файла в другой, потом всякая ерунда, связанная с
+выкачкой вложений из интернета и разбором аргументов. Основную функцию как мог перевёл на аглицкий.
+Аргументы описываются ниже в основной программе, всё это написано just for fun, но если найдёте ошибку -
+сообщите мне, или, как вариант, исправьте прям в этом скрипте самостоятельно и не забудьте поместить его
+обратно на help.ubuntu.ru
+=end comment
+=cut
+# ----------------------------------------------------------------------------------------------- #
+#             Convert text file with MoinMoin Wiki syntax to DokuWiki syntax                      #
+#                          by Malamut (malamut@ubuntu.ru) (2009)                                  #
+# ----------------------------------------------------------------------------------------------- #
+# Hash for smileys replacement
+%smileys = (
+	#moin		doku
+	'X-('	=>	':-X',
+	':D'	=>	':-D',
+	'<:('	=>	':-?',
+	':o'	=>	':-O',
+	':('	=>	':-(',
+	':)'	=>	':-)',
+	'B)'	=>	'8-)',
+	':))'	=>	':-P',
+	';)'	=>	';-)',
+	'/!\\'	=>	':!:',
+	'<!>'	=>	':!:',
+	'(!)'	=>	':!:',
+	':-?'	=>	':-P',
+	':\\'	=>	':-\\',
+	'>:>'	=>	'^_^',
+	'|)'	=>	':-|',
+	':-('	=>	':-(',
+	':-)'	=>	':-)',
+	'B-)'	=>	'8-)',
+	':-))'	=>	':-P',
+	';-)'	=>	';-)',
+	'|-)'	=>	':-|',
+	'(./)'	=>	'LOL',
+	'{OK}'	=>	':!:',
+	'{X}'	=>	':!:',
+	'{i}'	=>	':!:',
+	'{1}'	=>	'<1>',
+	'{2}'	=>	'<2>',
+	'{3}'	=>	'<3>',
+	'{*}'	=>	'<ubu>',
+	'{o}'	=>	'<circ>',
+);
+# For links regex
+sub inv {
+	given ($_[0]) {
+		when ("{") { return "}" }
+		when ("[") { return "]" }
+	}
+	return $_[0];
+}
+# Links
+sub linkReplacement {
+	die "Oops!\n" unless @_ == 5;
+	my ($namespace,$br,$target,$text,$params) = @_;
+	# Attachments links
+	if ($target =~ /^(?:(?:attachment|drawing):)(?<att>.+)$/) {
+		(my $att = $+{att}) =~ s{(.+)/}{};		# leave only filename, without namespaces
+		if ($text) { return "{{$namespace:$att|$text}}" }
+		else { return "{{$namespace:$att}}" }
+	}
+	# InterWiki links
+	if ($target =~ m#^(.+?):(?!//)(.+)$#) {		# all with : except internet links (containing ://)
+		if ($text) { return "[[$1>$2|$text]]" }
+		else { return "[[$1>$2]]" }
+	}
+	# All other links simply returned in doku format
+	$target =~ s#/#:#g if $target !~ m#://#;	# if target not an internet link replace all / to : (for namespaces)
+	if ($text) { return "$br$br$target|$text" . inv($br) x 2 }
+	else { return "$br$br$target" . inv($br) x 2 }
+}
+# Blocks
+sub blockReplacement {
+	for (my $i = 0; $i < @_ && $_[$i] =~ /^\$*$/; $i++ ) { shift @_ }		# delete empty lines
+	for (my $i = -1; -$i <= @_ && $_[$i] =~ /^\$*$/; $i-- ) { pop @_ }		# delete empty lines
+	if (!@_)						{ unshift @_, "<code>\n" }
+	elsif ($_[0] =~ /^\s*#!python/)		{ shift @_; unshift @_, "<code python>\n" }
+	elsif ($_[0] =~ /^\s*#!cplusplus/)	{ shift @_; unshift @_, "<code cpp>\n" }
+	elsif ($_[0] =~ /^\s*#!java/)		{ shift @_; unshift @_, "<code java>\n" }
+	elsif ($_[0] =~ /^\s*#!pascal/)		{ shift @_; unshift @_, "<code pascal>\n" }
+	elsif ($_[0] =~ /^\s*#.+/)			{ shift @_; unshift @_, "<code>\n" }
+	else							{ unshift @_, "<code>\n" }
+	push @_, "<!/code>\n";
+	return @_;
+}
+# Tables
+sub tableReplacement {
+	@table = @_;
+	for (@table) {
+		s/\|\|(?:\s*(<.+?>))?\s*(.+?)\s*(?=\|\|)/							# delete all whitespaces
+			if ($1) { "||$1$2" }
+			else { "||$2" }
+		/ge;
+		s/((?:\|\|){2,})(.+?)(?=\|\|)/										# move few || to another side of cell and center
+			"||  $2  " . '|' x (length($1) - 2)
+		/ge;
+		# Span
+			# TODO
+		# Aligment
+			# TODO
+			#s/\|\|.*?(?:(?:<\(>)|(?:<style="text-align: left">))(?=\|\|)//g;	# left
+			#s/<:>//g;	# center
+			#s/<\)>//g;	# right
+		# Ok, thats all
+		s/\|\|(\s*)(?:<.+?>\s*?)+(.*?)(?=\|\|)/||$1$2/g;					# remove all tags, we really don't need it
+		s/\|\|/|/g;															# finally replace || to |
+	}
+	# Remove all empty strings
+	for (0..$#table) {
+		if ($table[$_] =~ /^\|+$/) { splice @table, $_, 1 }
+	}
+	return @table;
+}
+# Convert moin file (first argument) to doku file (second argument), namespace for attachments is the third argument (default - (.))
+sub ConvertMoinToDoku {
+	die "ConvertMoinToDoku: I need 2 or 3 arguments!\n" if (@_ != 2 and @_ != 3);
+	my $moin = shift @_;
+	my $doku = shift @_;
+	my $namespace = shift @_ // '.';	#/#3rd argument or (.)
+	open INFILE, "<", $moin or die "Can't open '$moin' ($!)!\n";
+	open OUTFILE, ">", $doku or die "Can't open '$doku' ($!)!\n";
+	my $intend = 0;
+	my $is_table = 0;
+	my $is_block = 0;
+	my $block_sep_len = 3;											# length of block separator ({{{), in can be >= 3
+	my $is_list = 0;
+	my @table = ();
+	my @block = ();
+MOINSCAN: while (<INFILE>) {
+		# First of all remove all end whitespaces except \n
+		s/\s*$/\n/;
+		# Tables
+		if (/\s*(?<line>\|\|.+\|\|\n)/ && !$is_block) {
+			$is_table = 1;
+			$is_list = 0;
+			$_ = $+{line};
+		} elsif ($is_table) {
+			$is_table = 0;
+			@table = tableReplacement(@table);
+			print OUTFILE @table;
+			@table = ();
+		}
+		# Code blocks parser
+		s/\{{3}(.+?)}{3}/%%$1%%/g unless $is_block;								# first remove all ignored blocks
+		if (/(?<text>.*?)\s*(?<sep>\{{3,})(?<mod>.*\n)/ && !$is_block && !$is_table) {
+			$block_sep_len = length $+{sep};
+			$is_block = 1;
+			if ($is_list) { print OUTFILE "\n" }
+			$is_list = 0;
+			@block = ($+{mod});
+			next MOINSCAN unless ($+{text} && $+{text} !~ /^\s+$/);
+			$_ = "$+{text}\n";
+		} elsif (/(?<data>.+?)?}{$block_sep_len}\s*(?<text>.*)\n/ && $is_block) {
+			$is_block = 0;
+			push @block, "$+{data}\n" if $+{data};
+			@block = blockReplacement(@block);
+			print OUTFILE @block;
+			@block = ();
+			next MOINSCAN unless $+{text};
+			$_ = "$+{text}\n";
+		} elsif ($is_block) {
+			push @block, $_;
+			next MOINSCAN;
+		}
+		# Processing instructions
+		s/##(.*)\n//;															# comments
+		s/#(pragma|format|redirect|refresh|language)(.*)\n//i;					# remove all
+		s/#deprecated(.*)\n/<note warning>This page is deprecated<note>\n/i;	# deprecated
+		# Other elements
+		s/(<<BR>>)|(\[\[BR]])/\\\\ /g;								# break
+		s/^\s*-{4,}\s*$/----\n/g;									# horizontal line
+		s#^(.*)/\*(.+?)\*/(.*)\n#$1\n>$2\n$3\n#;					# inline comments
+		# Macros and another foolish - simply remove
+		s/<<.+?>>//g;												# macros
+		# Headings
+		my $s = "~!@>{";
+		s/^\s*=====\s*(.+?)\s*=====\s*$/$s$s $1 $s$s\n/g;			# level 5
+		s/^\s*====\s*(.+?)\s*====\s*$/$s$s$s $1 $s$s$s\n/g;			# level 4
+		s/^\s*===\s*(.+?)\s*===\s*$/$s$s$s$s $1 $s$s$s$s\n/g;		# level 3
+		s/^\s*==\s*(.+?)\s*==\s*$/$s$s$s$s$s $1 $s$s$s$s$s\n/g;		# level 2
+		s/^\s*=\s*(.+?)\s*=\s*$/$s$s$s$s$s$s $1 $s$s$s$s$s$s\n/g;	# level 1
+		s/\Q$s\E/=/g;
+		# Links
+		s/
+			(?<br>[\[\{])\g{br}										# opening brackets
+				(?<target>[^\|]+?)									# target
+				(?:\|(?<text>										# text
+					(?(?=\{\{).+?}}|[^\|]+?)						# test if text is an image link
+				))?
+				(?:\|(?<params>[^\|]+?))?							# parameters
+			(??{ &inv($+{br}) x 2 })								# closing brackets
+		/
+			&linkReplacement($namespace,$+{br},$+{target},$+{text},$+{params})
+		/gxe;
+		# CamelCase links
+		my $camel = '(?<![\\[!:])\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b';	# CamelCase link regex
+		s#($camel)/($camel)#[[$1:$2]]#g;							# CamelCase to namespace
+		s#\.\./($camel)#[[$1]]#g;									# (very)strange CamelCase
+		s#/($camel)#[[$namespace:$1]]#g;							# CamelCase to subpage
+		s#($camel)#[[$1]]#g;										# simlpe CamelCase
+		# Avoid automating linking - simply remove
+		s{''''''}<>g;
+		s{``}<>g;
+		s{!([A-Z]\w+)}<$1>g;
+		# Text formatting
+		s{'''''(.+?)'''''}<**//$1//**>g;							# bold and italic
+		s{'''(.+?)'''}<**$1**>g;									# bold
+		s{''(.+?)''}<//$1//>g;										# italic
+		s{`(.+?)`}<''$1''>g;										# monospaced
+		s{,,(.+?),,}{<sub>$1</sub>}g;								# sub index
+		s{\^(.+?)\^}{<sup>$1</sup>}g;								# sup index
+		s{--\((.+?)\)--}{<del>$1</del>}g;							# strike through text
+		# Unsupported text formating - simply remove
+		s{~-(.+?)-~}<$1>g;											# smaller text
+		s{~\+(.+?)\+~}<$1>g;										# larger text
+		# Lists and intends
+		if (/^(?<intend>\s+)\*\s+(?<value>\S.*)\n/) {							# dotted list
+			if ($is_list) { print OUTFILE "\n" }
+			print OUTFILE " "x(2*length($+{intend})),"* ";
+			$_ = $+{value};
+			$intend = length $+{intend};
+			$is_list = 1;
+		} elsif (/^(?<intend>\s+)[1aAiI]\.(#\d+)?\s+(?<value>\S.*)\n/) {		# numeric list
+			if ($is_list) { print OUTFILE "\n" }
+			print OUTFILE " "x(2*length($+{intend})),"- ";
+			$_ = $+{value};
+			$intend = length $+{intend};
+			$is_list = 1;
+		} elsif (/^(?<intend>\s+)(?<key>\S[^\[\{]+)::\s+(?<value>\S.*)\n/) {	# definition list
+			if ($is_list) { print OUTFILE "\n" }
+			print OUTFILE "**$+{key}**\n";
+			print OUTFILE "  * ";
+			$_ = $+{value};
+			$intend = length $+{intend};
+			$is_list = 1;
+		} elsif (/^(?<intend>\s+)(?<key>\S[^\[\{]+)::\s*/) {					# definition
+			if ($is_list) { print OUTFILE "\n" }
+			print OUTFILE "**$+{key}**\n";
+			$_="";
+			$is_list = 0;
+		} elsif (/^(?<intend>\s+)::\s*(?<value>\S.*)\n/) {						# description
+			if ($is_list) { print OUTFILE "\n" }
+			print OUTFILE "  * ";
+			$_ = $+{value};
+			$intend = length $+{intend};
+			$is_list = 1;
+		} elsif (/^(?<intend>\s+)(\.\s+)?\S/) {									# simple leading whitespaces
+			my $curr_intend = length $+{intend};
+			if ($curr_intend != $intend and $is_list) {
+				print OUTFILE "\n\n";
+				$is_list = 0;
+			}
+			elsif ($curr_intend != $intend and !$is_list) { print OUTFILE "\n" }
+			elsif ($curr_intend == $intend and $is_list) {
+				print OUTFILE " ";
+				s/\n$//;
+			}
+			$intend = $curr_intend;
+			s/^\s*(\.\s*)?(?=\S)//;
+		} else {																# string witout leading whitespaces
+			if ($is_list && ! /^\n/) { print OUTFILE "\n\n" }
+			elsif ($intend) { print OUTFILE "\n" }
+			$intend = 0;
+			$is_list = 0;
+		}
+		# Smileys ;)
+		foreach $smile (keys %smileys) {
+			s/(\s|\A)\Q$smile\E(\s)/ $smileys{$smile}$2/g;
+		}
+		# Ok, print all results of abracadabra to file if we need it
+		if ($is_block && $is_list) { $_ .= "\n" }
+		if (!$is_table) { print OUTFILE $_ }
+		else { push @table, $_ }
+	}
+	if ($is_table) {
+		@table = tableReplacement(@table);
+		print OUTFILE @table;
+	}
+	if ($is_block) {
+		@block = blockReplacement(@block);
+		print OUTFILE @block;
+	}
+	close INFILE or die "error\n";
+	close OUTFILE or die "error\n";
+}
+# -----------------------------  End of Convert Function  --------------------------------------- #
+# -------------------------------------  Функции  ----------------------------------------------- #
+# Создаёт директорию если её ещё нет (должна быть такая функция, но я не знаю :( ). Параметр - имя создаваемой директории
+sub CreateDir {
+	die "CreateDir: нужен один аргумент!\n" unless @_ == 1;
+	if (! -e -d $_[0]) {
+		mkdir $_[0] or die "Не могу создать '$_[0]'! ($!)\n";
+	}
+}
+# Получает нужный файл из интернета с вики и пишет его в нужное местоъ
+# Первый параметр - адрес для скачивания
+# Второй параметр - файл, в который нужно сохранить результат
+sub GetUrlFile {
+	die "GetUrlFile: Должно быть ровно два аргумента\n" unless @_ == 2;
+	my ($url, $filename) = @_;
+	my $command = "wget -nv --user-agent 'Opera' -O '$filename' '$url'";
+	if  (system $command ) {
+		warn "Невозможно получить или сохранить $url\n";
+		return undef;
+	}
+}
+# Получает все вложения из указанного файла по указанному адресу
+# Первый параметр - имя файла с moin разметкой
+# Второй параметр - базовый интернет-адрес вики
+# Третий параметр - адрес страницы на вики вместе с неймспейсом если нужно
+# Четвёртый параметр - папка куда складывать все скачанные файлы
+sub GetAttachments {
+	die "GetAttachments: Должно быть ровно четыре аргумента\n" unless @_ == 4;
+	my ($moin,$base_url,$page_url,$atts_dir) = @_;
+	open INFILE, "<", $moin or die "Не могу открыть файл '$moin'!\n";
+	while (<INFILE>) {
+		if (my @atts = /([\{\[])\1(?:(?:attachment|drawing):)(.*?)(??{ &inv($1) x 2 })/g) {
+			for (my $i = 1; $i < @atts; $i+=2) {
+				my ($namespace, $file) = $atts[$i] =~ m#^(?:(.+)/)?([^/]+?)$#;
+				my $url = $namespace
+					? "$base_url/$namespace?action=AttachFile&do=get&target=$file"
+					: "$base_url/$page_url?action=AttachFile&do=get&target=$file";
+				CreateDir($atts_dir);
+				GetUrlFile($url,catfile($atts_dir,$file));
+			}
+		}
+	}
+	close INFILE;
+}
+# ----------------------------  Программа  ------------------------------------------------------- #
+=head1 NAME
+moin2doku.pl - преобразует разметку из MoinMoin в DokuWiki
+=head1 SYNOPSIS
+moin2doku.pl [options] files|urls
+=cut
+# Всякие (не)нужные переменные...
+my $help = 0;					# показать помощь
+my $urls = 0;					# воспринимать аргументы как url, а не как файлы (соответствует ключу -u)
+my $verbose = 0;				# подробный вывод
+my $working_dir = "";			# директория для сохранения всех результатов
+my $use_pagename_dir = 0;		# использовать ли в качестве $working_dir имя страницы для каждой обработанной страницы
+my $attachments_dir = "";		# директория для всяких картинок и прочего из {{attachment}}
+my $links = 1;					# преобразовывать ли внутренние ссылки
+my $wiki_adress = "localhost";	# адрес вики
+my $wiki_namespace = "";		# неймспейс на вики откуда брать страницы
+my $moin_file = "";				# имя файла с моин разметкой для закачки при -u, если не указано - используется ИмяСтраницы.moin
+my $doku_file = "";				# имя файла с доку разметкой, если не указано - используется ИмяСтраницы.txt или ИмяСтраницы.doku
+# Задаёт нужные параметры для русских статей с wiki.ubuntu.com
+sub ru_official_wiki {
+	$wiki_adress = 'https://wiki.ubuntu.com';
+	$wiki_namespace = 'RussianDocumentation';
+}
+Getopt::Long::Configure ("bundling");					# Конфигурирование getopt дабы воспринимать склейку коротких аргументов
+GetOptions(
+	"verbose|v+" => \$verbose,							# TODO: подробный вывод
+	"delete-internal-links|l" => \$links,				# TODO: удалять внутренние ссылки, а не преобразовывать
+	"urls|u" => \$urls,									# воспринимать параметры как интернет-ссылки
+	"moin-file|m=s" => \$moin_file,						# имя файла с moin разметкой, по умолчанию - ИмяСтраницы.moin
+	"doku-file|d=s" => \$doku_file,						# имя файла с doku разметкой, по умолчанию - ИмяСтраницы.txt или ИмяСтраницы.doku если занято
+	"use-pagename-dir|n" => \$use_pagename_dir,		# использовать для каждой страницы её имя в качестве working-directory
+	"wiki-adress|w=s" => \$wiki_adress,					# адрес вики, с которой качаем
+	"wiki-namespace|N=s" => \$wiki_namespace,			# неймспейс на вики, в котором искать страницы
+	"working-directory|D=s" => \$working_dir,			# директория для сохранения всех результатов
+	"attachments-directory|A=s" => \$attachments_dir,	# поддиректория для картинок и по совместительству неймспейс для вложений, по умолчанию - имя страницы
+	"ru-official-wiki|r" => \&ru_official_wiki,			# ага, всем лень, мне тоже
+	"help|h" => \$help);								# ну понятно
+=head1 OPTIONS
+=over 8
+=item B<-A имя, --attachments-directory=имя>
+Имя директории для складывания всех скачанных вложений при B<-u>, а так же имя пространства имён, указываемого при конвертировании всех вложений.
+=item B<-d файл, --doku-file=файл>
+Файл для сохранения сконвертированного в DokuWiki разметку результата. По умолчанию используется ИмяСтраницы.txt или, если такой файл уже существует, ИмяСтраницы.doku
+=item B<-D директория, --working-directory=директория>
+Директория в которую будут сложены все результаты. По умолчанию всё складывается в текущую.
+=item B<-l, --delete-internal-links>
+Удалять все внутренние ссылки, а не преобразовывать их (не работает)
+=item B<-m файл, --moin-file=файл>
+Имя файла, в котором будет сохранён скачанный исходный код страницы в moin разметке. По умолчанию для каждой статьи используется ИмяСтатьи.moin
+=item B<-n, --use-pagename-dir>
+Использовать для каждой страницы её имя в качестве директории для сохранения всех результатов.
+=item B<-N имя, --wiki-namespace=имя>
+Путь до пространства имён, в котором будут искаться указанные статьи, от корня вики.
+=item B<-u, --urls>
+Воспринимать все агрументы как имена статей на вики по адресу, заданному ключами B<-N> и B<-w>
+=item B<-v, --verbose>
+Подробный вывод (не работает)
+=item B<-w адрес, --wiki-adress>
+Интернет-адрес вики, с которой брать страницы при указании ключа B<-u>
+=back
+=cut
+if ($help) {
+	pod2usage(-verbose => 1);
+}
+unless (@ARGV) {
+	pod2usage(-verbose => 1);
+}
+if (!$use_pagename_dir && ($moin_file || $doku_file) && @ARGV > 1) {
+	warn "При указании жёстких имён для moin или doku файлов указанный файл будет использован для всех аргументов ".
+	"и будет в итоге содержать текст только для последней обработанной страницы\n";
+}
+if ($urls) {
+	if ($working_dir && !$use_pagename_dir) {
+		CreateDir($working_dir);
+		chdir $working_dir or die "Невозможно сменить директорию на '$working_dir'! ($!)\n";
+	}
+	for my $page (@ARGV) {
+		if ($page =~ m<[/ ]>) { die "Имя страницы не может содержать символы '/', ' ' ($_)\n" }
+		$wiki_adress =~ s#/$##;		# Удаляем завершающий / если он имеется
+		$wiki_namespace =~ s#/$##;	# Удаляем завершающий / если он имеется
+		if ($use_pagename_dir) {
+			CreateDir($page);
+		}
+		# Получаем исходники страницы
+		my $source_url = $wiki_namespace ? "$wiki_adress/$wiki_namespace/$page?action=raw" : "$wiki_adress/$page?action=raw";
+		my $moin_source = $moin_file ? $moin_file : "$page.moin";
+		$moin_source = catfile($page,$moin_source) if $use_pagename_dir;
+		GetUrlFile($source_url,$moin_source);
+		# Получаем вложения
+		my $atts_dir = $attachments_dir ? $attachments_dir : $page;
+		$atts_dir = catdir($page,$atts_dir) if $use_pagename_dir;
+		GetAttachments($moin_source,$wiki_adress, $wiki_namespace ? "$wiki_namespace/$page" : $page, $atts_dir);
+		# Теперь конвертируем
+		my $doku_result = $doku_file ? $doku_file : "$page.txt";
+		$doku_result = catfile($page,$doku_result) if $use_pagename_dir;
+		unless ($attachments_dir) { ConvertMoinToDoku($moin_source,$doku_result,$page) }
+		else { ConvertMoinToDoku($moin_source,$doku_result,$attachments_dir) }
+	}
+} else {
+	for my $file (@ARGV) {
+		# Берём имя страницы отбрасывая расширение файла
+		my ($page) = $file =~ m#^(?:.*/)([^/]+?)(?:\.[^\./]+)?$#;
+		# Выбираем имя выходного файла
+		my $doku_result = $doku_file ? $doku_file : "$page.txt";
+		if ($use_pagename_dir) {
+			CreateDir($page);
+			$doku_result = catfile($page,$doku_result);
+		} elsif ($working_dir) {
+			CreateDir($working_dir);
+			$doku_result = catfile($working_dir,$doku_result);
+		}
+		if (-e $doku_result) { $doku_result =~ s/\.txt$/.doku/ }
+		# Конвертируем
+		unless ($attachments_dir) { ConvertMoinToDoku($file,$doku_result,$page) }
+		else { ConvertMoinToDoku($file,$doku_result,$attachments_dir) }
+	}
+}
+__END__
+=head1 DESCRIPTION
+описания пожалуй не будет - лень писать. Лучше всего использовать так:
+moin2doku -run статья для url и
+moin2doku -n файл для обычных файлов
+=head1 AUTHOR
+Скрипт написал Malamut just for fun :) Со всеми вопросами и пожеланиями обращайтесь: malamut@ubuntu.ru
+=head1 BUGS
+Некорректно работает параметр -h поскольку весь этот текст написан на русском, вообще что-то Pod::Usage как-то странно работает с русским текстом.
+На данный момент некорректно обрабатываются таблицы - не учитывается выравнивание и склейка через colspan и rowspan.
+Кроме того, не работают параметры B<-v> и B<-l>.
+=cut
+</code>
 {{tag>DokuWiki Perl Программирование}}

MoinMoin to DokuWiki Сравнение версий

Различия