MoinMoin to DokuWiki | Русскоязычная документация по Ubuntu

Это старая версия документа.

Официальная Wiki Ubuntu сидит на движке MoinMoin, мы же используем DokuWiki. Озадачившись вопросами конвертации синтаксиса нашёл лишь это, что только приблизительно позволяет преобразовать основные конструкции. Поэтому, ради спортивного интереса, набросал скриптик на перле (замените только в нём <!/code> на </code> в одном месте, иначе местный парсер ломается):

#!/usr/bin/perl -w
 
# Usage: ./convert.pl moin_file doku_file [namespace]
# namespace - for all links to images ({{namespace:image.png}})
 
use 5.010;			# We need it
 
# ----------------------------------------------------------------------------------------------- #
#             Convert text file with MoinMoin Wiki syntax to DokuWiki syntax                      #
#                          by Malamut (malamut@ubuntu.ru) (2009)                                  #
# ----------------------------------------------------------------------------------------------- #
 
# Hash for smileys replacement (actual for ubuntu.com wiki)
%smileys = (
	#moin		doku
	'X-('	=>	':-X',
	':D'	=>	':-D',
	'<:('	=>	':-?',
	':o'	=>	':-O',
	':('	=>	':-(',
	':)'	=>	':-)',
	'B)'	=>	'8-)',
	':))'	=>	':-P',
	';)'	=>	';-)',
	'/!\\'	=>	':!:',
	'<!>'	=>	':!:',
	'(!)'	=>	':!:',
	':-?'	=>	':-P',
	':\\'	=>	':-\\',
	'>:>'	=>	'^_^',
	'|)'	=>	':-|',
	':-('	=>	':-(',
	':-)'	=>	':-)',
	'B-)'	=>	'8-)',
	':-))'	=>	':-P',
	';-)'	=>	';-)',
	'|-)'	=>	':-|',
	'(./)'	=>	'LOL',
	'{OK}'	=>	':!:',
	'{X}'	=>	':!:',
	'{i}'	=>	':!:',
	'{1}'	=>	'<1>',
	'{2}'	=>	'<2>',
	'{3}'	=>	'<3>',
	'{*}'	=>	'<ubu>',
	'{o}'	=>	'<circ>',
);
 
# For links regex
sub inv {
	given ($_[0]) {
		when ("{") { return "}" }
		when ("[") { return "]" }
	}
	return $_[0];
}
 
# Link 
sub link_replacement {
	die "Oops!\n" if @_ != 5;
	my ($namespace,$br,$target,$text,$params) = @_;
	# Attachments links
	if ($target =~ /^(?:(?:attachment|drawing):)(?<att>.+)$/) {
		(my $att = $+{att}) =~ s{(.+)/}{};		# leave only filename, without namespaces 
		if ($text) { return "{{$namespace:$att|$text}}" }
		else { return "{{$namespace:$att}}" }
	}
	# InterWiki links
	if ($target =~ m#^(.+?):(?!//)(.+)$#) {		# all with : except internet links (containing ://)
		if ($text) { return "[[$1>$2|$text]]" }
		else { return "[[$1>$2]]" }
	}
	# All other links simply returned in doku format
	$target =~ s#/#:#g if $target !~ m#://#;	# if target not an internet link replace all / to : (for namespaces)
	if ($text) { return "$br$br$target|$text" . inv($br) x 2 }
	else { return "$br$br$target" . inv($br) x 2 }
}
 
# Block
sub block_replacement {
	for (my $i = 0; $i < @_ && $_[$i] =~ /^\$*$/; $i++ ) { shift @_ }		# delete empty lines
	for (my $i = -1; -$i <= @_ && $_[$i] =~ /^\$*$/; $i-- ) { pop @_ }		# delete empty lines
	if (!@_)						{ unshift @_, "<code>\n" }
	elsif ($_[0] =~ /^\s*#!python/)		{ shift @_; unshift @_, "<code python>\n" }
	elsif ($_[0] =~ /^\s*#!cplusplus/)	{ shift @_; unshift @_, "<code cpp>\n" }
	elsif ($_[0] =~ /^\s*#!java/)		{ shift @_; unshift @_, "<code java>\n" }
	elsif ($_[0] =~ /^\s*#!pascal/)		{ shift @_; unshift @_, "<code pascal>\n" }
	elsif ($_[0] =~ /^\s*#.+/)			{ shift @_; unshift @_, "<code>\n" }
	else							{ unshift @_, "<code>\n" }
	push @_, "<!/code>\n";
	return @_;
}
 
# Table
sub table_replacement {
	@table = @_;
	for (@table) {
		s/\|\|(?:\s*(<.+?>))?\s*(.+?)\s*(?=\|\|)/							# delete all whitespaces
			if ($1) { "||$1$2" }
			else { "||$2" }
		/ge;
		s/((?:\|\|){2,})(.+?)(?=\|\|)/										# move few || to another side of cell and center
			"||  $2  " . '|' x (length($1) - 2)
		/ge;
		# Spanning
			# TODO
		# Aligment
			# TODO
			#s/\|\|.*?(?:(?:<\(>)|(?:<style="text-align: left">))(?=\|\|)//g;	# left
			#s/<:>//g;	# center
			#s/<\)>//g;	# right
		# Ok, thats all
		s/\|\|(\s*)(?:<.+?>\s*?)+(.*?)(?=\|\|)/||$1$2/g;					# remove all tags, we really don't need it
		s/\|\|/|/g;															# finally replace || to |
	}
	# Remove all empty strings
	for (0..$#table) {
		if ($table[$_] =~ /^\|+$/) { splice @table, $_, 1 }
	}
	return @table;
}
 
# Convert moin file (first argument) to doku file (second argument), namespace for attachments is the third argument (default - (.))
sub ConvertMoinToDoku {
	die "ConvertMoinToDoku: I need 2 or 3 arguments!\n" if (@_ != 2 and @_ != 3);
	my $moin = shift @_;
	my $doku = shift @_;
	my $namespace = shift @_ // '.';	#/#3rd argument or (.)
	open INFILE, "<", $moin or die "Can't open '$moin' ($!)!\n";
	open OUTFILE, ">", $doku or die "Can't open '$doku' ($!)!\n";
	my $intend = 0;
	my $is_table = 0;
	my $is_block = 0;
	my $block_sep_len = 3;											# length of block separator ({{{), in can be >= 3
	my $is_list = 0;
	my @table = ();
	my @block = ();
MOINSCAN: while (<INFILE>) {
		# First of all remove all end whitespaces except \n
		s/\s*$/\n/;
		# Tables
		if (/\s*(?<line>\|\|.+\|\|\n)/ && !$is_block) {
			$is_table = 1;
			$is_list = 0;
			$_ = $+{line};
		} elsif ($is_table) {
			$is_table = 0;
			@table = table_replacement(@table);
			print OUTFILE @table;
			@table = ();
		}
		# Code blocks parser
		s/\{{3}(.+?)}{3}/%%$1%%/g unless $is_block;								# first remove all ignored blocks
		if (/(?<text>.*?)\s*(?<sep>\{{3,})(?<mod>.*\n)/ && !$is_block && !$is_table) {
			$block_sep_len = length $+{sep};
			$is_block = 1;
			if ($is_list) { print OUTFILE "\n" }
			$is_list = 0;
			@block = ($+{mod});
			next MOINSCAN unless ($+{text} && $+{text} !~ /^\s+$/);
			$_ = "$+{text}\n";
		} elsif (/(?<data>.+?)}{$block_sep_len}\s*(?<text>.*)\n/ && $is_block) {
			$is_block = 0;
			push @block, "$+{data}\n" if $+{data};
			@block = block_replacement(@block);
			print OUTFILE @block;
			@block = ();
			next MOINSCAN unless $+{text};
			$_ = "$+{text}\n";
		} elsif ($is_block) {
			push @block, $_;
			next MOINSCAN;
		}
		# Processing instructions
		s/##(.*)\n//;															# comments
		s/#(pragma|format|redirect|refresh|language)(.*)\n//i;					# remove all
		s/#deprecated(.*)\n/<note warning>This page is deprecated<note>\n/i;	# deprecated
		# Other elements
		s/(<<BR>>)|(\[\[BR]])/\\\\ /g;								# break
		s/^\s*-{4,}\s*$/----\n/g;									# horizontal line
		s#^(.*)/\*(.+?)\*/(.*)\n#$1\n>$2\n$3\n#;					# inline comments
		# Macros and another foolish - simply remove
		s/<<.+?>>//g;												# macros
		# Headings
		my $s = "~!@>{";
		s/^\s*=====\s*(.+?)\s*=====\s*$/$s$s $1 $s$s\n/g;			# level 5
		s/^\s*====\s*(.+?)\s*====\s*$/$s$s$s $1 $s$s$s\n/g;			# level 4
		s/^\s*===\s*(.+?)\s*===\s*$/$s$s$s$s $1 $s$s$s$s\n/g;		# level 3
		s/^\s*==\s*(.+?)\s*==\s*$/$s$s$s$s$s $1 $s$s$s$s$s\n/g;		# level 2
		s/^\s*=\s*(.+?)\s*=\s*$/$s$s$s$s$s$s $1 $s$s$s$s$s$s\n/g;	# level 1
		s/\Q$s\E/=/g;
		# Links
		s/
			(?<br>[\[\{])\g{br}										# opening brackets
				(?<target>[^\|]+?)									# target
				(?:\|(?<text>										# text
					(?(?=\{\{).+?}}|[^\|]+?)						# test if text is an image link
				))?
				(?:\|(?<params>[^\|]+?))?							# parameters
			(??{ &inv($+{br}) x 2 })								# closing brackets
		/
			&link_replacement($namespace,$+{br},$+{target},$+{text},$+{params})
		/gxe;		
		# CamelCase links
		my $camel = '(?<![\\[!:])\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b';	# CamelCase link regex
		s#($camel)/($camel)#[[$1:$2]]#g;							# CamelCase to namespace
		s#\.\./($camel)#[[$1]]#g;									# (very)strange CamelCase
		s#/($camel)#[[$namespace:$1]]#g;							# CamelCase to subpage
		s#($camel)#[[$1]]#g;										# simlpe CamelCase
		# Avoid automating linking - simply remove
		s{''''''}<>g;
		s{``}<>g;
		s{!([A-Z]\w+)}<$1>g;
		# Text formatting
		s{'''''(.+?)'''''}<**//$1//**>g;							# bold and italic
		s{'''(.+?)'''}<**$1**>g;									# bold
		s{''(.+?)''}<//$1//>g;										# italic
		s{`(.+?)`}<''$1''>g;										# monospaced
		s{,,(.+?),,}{<sub>$1</sub>}g;								# sub index
		s{\^(.+?)\^}{<sup>$1</sup>}g;								# sup index
		s{--\((.+?)\)--}{<del>$1</del>}g;							# strike through text
		# Unsupported text formating - simply remove
		s{~-(.+?)-~}<$1>g;											# smaller text
		s{~\+(.+?)\+~}<$1>g;										# larger text
		# Lists and intends
		if (/^(?<intend>\s+)\*\s*(?<value>\S.*)\n/) {							# dotted list
			if ($is_list) { print OUTFILE "\n" }
			print OUTFILE " "x(2*length($+{intend})),"* ";
			$_ = $+{value};
			$intend = length $+{intend};
			$is_list = 1;
		} elsif (/^(?<intend>\s+)[1aAiI]\.(#\d+)?\s*(?<value>\S.*)\n/) {		# numeric list
			if ($is_list) { print OUTFILE "\n" }
			print OUTFILE " "x(2*length($+{intend})),"- ";
			$_ = $+{value};
			$intend = length $+{intend};
			$is_list = 1;
		} elsif (/^(?<intend>\s+)(?<key>\S[^\[\{]+)::\s+(?<value>\S.*)\n/) {	# definition list
			if ($is_list) { print OUTFILE "\n" }
			print OUTFILE "**$+{key}**\n";
			print OUTFILE "  * ";
			$_ = $+{value};
			$intend = length $+{intend};
			$is_list = 1;
		} elsif (/^(?<intend>\s+)(?<key>\S[^\[\{]+)::\s*/) {					# definition
			if ($is_list) { print OUTFILE "\n" }
			print OUTFILE "**$+{key}**\n";
			$_="";
			$is_list = 0;
		} elsif (/^(?<intend>\s+)::\s*(?<value>\S.*)\n/) {						# description
			if ($is_list) { print OUTFILE "\n" }
			print OUTFILE "  * ";
			$_ = $+{value};
			$intend = length $+{intend};
			$is_list = 1;
		} elsif (/^(?<intend>\s+)(\.\s+)?\S/) {									# simple leading whitespaces
			my $curr_intend = length $+{intend};
			if ($curr_intend != $intend and $is_list) {
				print OUTFILE "\n\n";
				$is_list = 0;
			}
			elsif ($curr_intend != $intend and !$is_list) { print OUTFILE "\n" }
			elsif ($curr_intend == $intend and $is_list) {
				print OUTFILE " ";
				s/\n$//;
			}
			$intend = $curr_intend;
 
			s/^\s*(\.\s*)?(?=\S)//;
		} else {																# string witout leading whitespaces
			if ($is_list && ! /^\n/) { print OUTFILE "\n\n" } 
			elsif ($intend) { print OUTFILE "\n" }
			$intend = 0;
			$is_list = 0;
		}
		# Smileys ;)
		foreach $smile (keys %smileys) {
			s/(\s|\A)\Q$smile\E(\s)/ $smileys{$smile}$2/g;
		}
		# Ok, print all results of abracadabra to file if we need it
		if ($is_block && $is_list) { $_ .= "\n" }
		if (!$is_table) { print OUTFILE $_ }
		else { push @table, $_ }
	}
	if ($is_table) {
		@table = table_replacement(@table);
		print OUTFILE @table;
	}
	if ($is_block) {
		@block = block_replacement(@block);
		print OUTFILE @block;
	}
	close INFILE or die "error\n";
	close OUTFILE or die "error\n";
}
 
# -----------------------------  End of Convert Function  --------------------------------------- #
 
ConvertMoinToDoku @ARGV;

Используется это дело так:

./convert.pl moin_file doku_file [namespace]

namespace - это пространство имён, которое надо пихать в ссылки на картинки, если его не указать будет использоваться '.'.

К сожалению, оно не совсем корректно работает с таблицами (не учитывает склейку и выравнивание, первое является достаточно критичным), возможно потом допишу. Если вы вдруг будете это использовать - напишите мне о найденных ошибках или хотя бы оставьте тут комментарий.

TODO:

таблицы
http://moinmo.in/HelpOnAdmonitions
http://moinmo.in/HelpOnEditing - проверить, всё ли учли

DokuWiki, Perl