Различия

Здесь показаны различия между двумя версиями данной страницы.

--- wiki:moinmoin2dokuwiki [2009/07/24 00:18]
+++ wiki:moinmoin2dokuwiki [2009/12/25 23:53] (текущий)
@@ Строка 15: / Строка 15: @@
 # ----------------------------------------------------------------------------------------------- #
-# Hash for smileys replacement (actual for ubuntu.com wiki)
+# Hash for smileys replacement
 %smileys = (
 	#moin		doku
@@ Строка 60: / Строка 60: @@
 }
-# Link
+# Links
-sub link_replacement {
+sub linkReplacement {
-	die "Oops!\n" if @_ != 5;
+	die "Oops!\n" unless @_ == 5;
 	my ($namespace,$br,$target,$text,$params) = @_;
 	# Attachments links
@@ Строка 81: / Строка 81: @@
 }
-# Block
+# Blocks
-sub block_replacement {
+sub blockReplacement {
 	for (my $i = 0; $i < @_ && $_[$i] =~ /^\$*$/; $i++ ) { shift @_ }		# delete empty lines
 	for (my $i = -1; -$i <= @_ && $_[$i] =~ /^\$*$/; $i-- ) { pop @_ }		# delete empty lines
@@ Строка 96: / Строка 96: @@
 }
-# Table
+# Tables
-sub table_replacement {
+sub tableReplacement {
 	@table = @_;
 	for (@table) {
@@ Строка 107: / Строка 107: @@
 			"||  $2  " . '|' x (length($1) - 2)
 		/ge;
-		# Spanning
+		# Span
 			# TODO
 		# Aligment
@@ Строка 150: / Строка 150: @@
 		} elsif ($is_table) {
 			$is_table = 0;
-			@table = table_replacement(@table);
+			@table = tableReplacement(@table);
 			print OUTFILE @table;
 			@table = ();
@@ Строка 164: / Строка 164: @@
 			next MOINSCAN unless ($+{text} && $+{text} !~ /^\s+$/);
 			$_ = "$+{text}\n";
-		} elsif (/(?<data>.+?)}{$block_sep_len}\s*(?<text>.*)\n/ && $is_block) {
+		} elsif (/(?<data>.+?)?}{$block_sep_len}\s*(?<text>.*)\n/ && $is_block) {
 			$is_block = 0;
 			push @block, "$+{data}\n" if $+{data};
-			@block = block_replacement(@block);
+			@block = blockReplacement(@block);
 			print OUTFILE @block;
 			@block = ();
@@ Строка 204: / Строка 204: @@
 			(??{ &inv($+{br}) x 2 })								# closing brackets
 		/
-			&link_replacement($namespace,$+{br},$+{target},$+{text},$+{params})
+			&linkReplacement($namespace,$+{br},$+{target},$+{text},$+{params})
 		/gxe;
 		# CamelCase links
@@ Строка 228: / Строка 228: @@
 		s{~\+(.+?)\+~}<$1>g;										# larger text
 		# Lists and intends
-		if (/^(?<intend>\s+)\*\s*(?<value>\S.*)\n/) {							# dotted list
+		if (/^(?<intend>\s+)\*\s+(?<value>\S.*)\n/) {							# dotted list
 			if ($is_list) { print OUTFILE "\n" }
 			print OUTFILE " "x(2*length($+{intend})),"* ";
@@ Строка 234: / Строка 234: @@
 			$intend = length $+{intend};
 			$is_list = 1;
-		} elsif (/^(?<intend>\s+)[1aAiI]\.(#\d+)?\s*(?<value>\S.*)\n/) {		# numeric list
+		} elsif (/^(?<intend>\s+)[1aAiI]\.(#\d+)?\s+(?<value>\S.*)\n/) {		# numeric list
 			if ($is_list) { print OUTFILE "\n" }
 			print OUTFILE " "x(2*length($+{intend})),"- ";
@@ Строка 288: / Строка 288: @@
 	}
 	if ($is_table) {
-		@table = table_replacement(@table);
+		@table = tableReplacement(@table);
 		print OUTFILE @table;
 	}
 	if ($is_block) {
-		@block = block_replacement(@block);
+		@block = blockReplacement(@block);
 		print OUTFILE @block;
 	}
@@ Строка 315: / Строка 315: @@
   * http://moinmo.in/HelpOnEditing - проверить, всё ли учли
 </note>
+Абсолютно то же самое, только переписанное на Python, вдруг кому пригодится. Те же комментарии, таблицы обрабатываются не до конца и нужно не забыть заменить <!/code> на %%</code>%%:
+<code python>
+#!/usr/bin/env python
+# coding=utf8
+# Author: Vadim Nevorotin (malamut@ubuntu.ru)
+# License: GPLv3
+# Rus:
+# Скрипт конвертации разметки MoinMoin в разметку DokuWiki
+# Использование: moin2doku.py moin_file doku_file [namespace]
+# namespace - это пространство имён для всех ссылок на вложения (картинки, например)
+# Скрипт не до конца обрабатывает таблицы, не учитывается склейка ячеек и выравнивание
+# Так же не добавлена конвертация новой возможности moin: {{{#!wiki caution }}} и прочих выделений (http://moinmo.in/HelpOnAdmonitions)
+# Всё остальное с http://moinmo.in/HelpOnEditing на 11.2009 конвертируется в полном объёме
+# Eng:
+# Usage: ./moin2doku.py moin_file doku_file [namespace]
+# namespace - for all links to images ({{namespace:image.png}})
+import sys
+import re
+from os.path import isfile
+# Hash for smileys replacement
+smileys = {
+	#moin		doku
+	'X-('	:	':-X',
+	':D'	:	':-D',
+	'<:('	:	':-?',
+	':o'	:	':-O',
+	':('	:	':-(',
+	':)'	:	':-)',
+	'B)'	:	'8-)',
+	':))'	:	':-P',
+	';)'	:	';-)',
+	'/!\\'	:	':!:',
+	'<!>'	:	':!:',
+	'(!)'	:	':!:',
+	':-?'	:	':-P',
+	':\\'	:	':-\\',
+	'>:>'	:	'^_^',
+	'|)'	:	':-|',
+	':-('	:	':-(',
+	':-)'	:	':-)',
+	'B-)'	:	'8-)',
+	':-))'	:	':-P',
+	';-)'	:	';-)',
+	'|-)'	:	':-|',
+	'(./)'	:	'LOL',
+	'{OK}'	:	':!:',
+	'{X}'	:	':!:',
+	'{i}'	:	':!:',
+	'{1}'	:	'<1>',
+	'{2}'	:	'<2>',
+	'{3}'	:	'<3>',
+	'{*}'	:	'<ubu>',
+	'{o}'	:	'<circ>',
+}
+# Convert functions
+def inv(br):
+	if br == '{': return '}'
+	if br == '[': return ']'
+	return br
+def linkReplacement(namespace,br,target,text):
+	if not target:
+		print >> sys.stderr, "Fatal error"
+		sys.exit(1)
+	m_att = re.match('(?:(?:attachment|drawing):)(?P<att>.+)$',target)
+	m_inter = re.match('(.+?):(?!//)(.+)$',target)
+	# Attachments links
+	if m_att:
+		if m_att.group('att'):
+			att = re.sub('(.+)/','',m_att.group('att'))	# leave only filename, without namespaces
+		else:
+			att = m_att.groups('att')
+		if text:
+			return '{{'+namespace+':'+att+'|'+text+'}}'
+		else:
+			return '{{'+namespace+':'+att+'}}'
+	# InterWiki links
+	if m_inter:
+		if text:
+			return '[['+m_inter.group(1)+'>'+m_inter.group(2)+'|'+text+']]'
+		else:
+			return '[['+m_inter.group(1)+'>'+m_inter.group(2)+']]'
+	# All other links simply returned in doku format
+	if not re.search('://', target):		# if target not an internet link replace all / to : (for namespaces)
+		target = re.sub('/',':',target)
+	if text:
+		return br + br + target + '|' + text + inv(br) * 2
+	else:
+		return br + br + target + inv(br) * 2
+def tableReplacement(table):
+	for i in range(len(table)):
+		line = table[i]
+		line = re.sub('\|\|(?:\s*(<.+?>))?\s*(.+?)\s*(?=\|\|)', lambda m: '||' + m.group(1) + m.group(2) if m.group(1) else '||' + m.group(2), line)
+		line = re.sub('((?:\|\|){2,})(.+?)(?=\|\|)', lambda m: '||  '+m.group(2)+'  ' + '|' * (len(m.group(1)) - 2), line)
+		line = re.sub('\|\|(\s*)(?:<.+?>\s*?)+(.*?)(?=\|\|)','||\\1\\2', line)
+		line = re.sub('\|\|','|', line)
+		table[i] = line
+	size = len(table)
+	i = 0
+	while i < size:
+		if re.match('\|+$',table[i]):
+			table.pop(i)
+			size=size-1
+		else:
+			i=i+1
+	return table
+def blockReplacement(block):
+	if block and not block[0].strip():
+		block.pop(0)
+	if not block[-1].strip():
+		block.pop()
+	if not block:
+		block.append('<code>\n')
+	elif re.match('\s*#!python',block[0]):
+		block.pop(0)
+		block.insert(0,'<code python>\n')
+	elif re.match('\s*#!cplusplus',block[0]):
+		block.pop(0)
+		block.insert(0,'<code cpp>\n')
+	elif re.match('\s*#!java',block[0]):
+		block.pop(0)
+		block.insert(0,'<code java>\n')
+	elif re.match('\s*#!pascal',block[0]):
+		block.pop(0)
+		block.insert(0,'<code pascal>\n')
+	elif re.match('\s*#!.+',block[0]):
+		block.pop(0)
+		block.insert(0,'<code>\n')
+	else:
+		block.insert(0,'<code>\n')
+	block.append('<!/code>')
+	return block
+def ConvertMoinToDoku(moin_file,doku_file,namespace):
+	try:
+		infile = open(moin_file,"r")
+		outfile = open(doku_file,"w")
+	except:
+		print >> sys.stderr, "Open file error!"
+		sys.exit(1)
+	content=infile.readlines()
+	infile.close()
+	# Conversion
+	st = '~@!'												# For headings conversion
+	camel = '(?<![\\[!:])\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b'	# CamelCase regexp
+	conversion = (
+		# Processing instructions
+		('^##(.*)\n',''),
+		('^#(pragma|format|redirect|refresh|language)(.*)\n',''),
+		('^#deprecated(.*)\n','<note warning>This page is deprecated<note>\n'),
+		# Other elements
+		('(<<BR>>)|(\[\[BR]])','\\\\ '),
+		('^\s*-{4,}\s*$','----\n'),
+		('^(.*)/\*(.+?)\*/(.*)\n','\\1\n>\\2\n\\3\n'),		# inline comments
+		# Macros and another foolish - simply remove
+		('<<.+?>>',''),
+		# Headings
+		('^\s*=====\s*(.+?)\s*=====\s*$',st*2 + ' \\1 ' + st*2 + '\n'),
+		('^\s*====\s*(.+?)\s*====\s*$',st*3 + ' \\1 ' + st*3 + '\n'),
+		('^\s*===\s*(.+?)\s*===\s*$',st*4 + ' \\1 ' + st*4 + '\n'),
+		('^\s*==\s*(.+?)\s*==\s*$',st*5 + ' \\1 ' + st*5 + '\n'),
+		('^\s*=\s*(.+?)\s*=\s*$',st*6 + ' \\1 ' + st*6 + '\n'),
+		(st,'='),
+		# CamelCase links
+		('('+camel+')/('+camel+')','[[\\1:\\2]]'),
+		('\.\./('+camel+')','[[\\1]]'),
+		('/('+camel+')','[['+namespace+':\\1]]'),
+		('('+camel+')','[[\\1]]'),
+		# Avoid automating linking - simply remove
+		("''''''",''),
+		('``',''),
+		('!([A-Z]\w+)','\\1'),
+		# Text formatting
+		("'''''(.+?)'''''",'**//\\1//**'),
+		("'''(.+?)'''",'**\\1**'),
+		("''(.+?)''",'//\\1//'),
+		('`(.+?)`',"''\\1''"),
+		(',,(.+?),,','<sub>\\1</sub>'),
+		('\^(.+?)\^','<sup>\\1</sup>'),
+		('--\((.+?)\)--','<del>\\1</del>'),
+		# Unsupported text formating - simply remove
+		('~-(.+?)-~','\\1'),
+		('~\+(.+?)\+~','\\1')
+	)
+	# Values for conversion
+	intend = 0
+	is_table = 0
+	is_block = 0
+	block_sep_len = 3
+	is_list = 0
+	table = []
+	block = []
+	for i in range(len(content)):
+		line = content[i]
+		# First of all remove all end whitespaces except \n
+		line = re.sub('\s*$','\n',line)
+		# Tables
+		m = re.search('\s*(?P<line>\|\|.+\|\|\n)',line)
+		if m and not is_block:
+			is_table = 1
+			is_list = 0
+			line = m.group('line')
+		elif is_table:
+			is_table = 0
+			table = tableReplacement(table)
+			outfile.writelines(table)
+			table = []
+		# Code blocks parser
+		if not is_block:
+			line = re.sub('\{{3}(.+?)}{3}','%%\\1%%',line)
+		m_open = re.search('(?P<text>.*?)\s*(?P<sep>\{{3,})(?P<mod>.*\n)',line)
+		m_close = re.search('(?P<data>.+?)?}{'+str(block_sep_len)+'}\s*(?P<text>.*)\n',line)
+		if m_open and not is_block and not is_table:
+			block_sep_len = len(m_open.group('sep'))
+			is_block = 1
+			if is_list:
+				outfile.write('\n')
+			is_list = 0
+			block = [m_open.group('mod')]
+			if not ( m_open.group('text') and not re.match('\s+$',m_open.group('text')) ):
+				continue
+			line = m_open.group('text') + '\n'
+		elif m_close and is_block:
+			is_block = 0
+			if m_close.group('data'):
+				block.append(m_close.group('data') + '\n')
+			block = blockReplacement(block)
+			outfile.writelines(block)
+			block = []
+			if not m_close.group('text'):
+				continue
+			line = m_close.group('text') + '\n'
+		elif is_block:
+			block.append(line)
+			continue
+		# Links
+		# Unfortunately, Python is not a Perl, so we can't convert links using only one regexp...
+		link = ''
+		s = ''
+		br = ''
+		is_link = 0
+		last = len(line)-1
+		for i in range(len(line)):
+			if not is_link and i != last and line[i] == '[' and line[i+1] == '[':
+				is_link = 1
+				link = br = '['
+			elif not is_link and i != last and line[i] == '{' and line[i+1] == '{':
+				is_link = 1
+				link = br = '{'
+			elif is_link and line[i] == inv(br) and line[i-1] == inv(br):
+				is_link = 0
+				link = link + line[i]
+				m = re.match('(?P<target>[^\|]+)(?:\|(?P<text>[^\|]+))?',link[2:-2])
+				link = linkReplacement(namespace,link[0],m.group('target'),m.group('text'))
+				s = s + link
+			elif is_link:
+				link = link + line[i]
+			else:
+				s = s + line[i]
+		line = s
+		# Now convert syntax
+		for r in conversion:
+			reg=re.compile(r[0],re.I)
+			line=reg.sub(r[1],line)
+		# Lists and intends
+		m_dotted = re.match('(?P<intend>\s+)\*\s+(?P<value>\S.*)\n',line)
+		m_numeric = re.match('(?P<intend>\s+)[1aAiI]\.(#\d+)?\s+(?P<value>\S.*)\n',line)
+		m_defs = re.match('(?P<intend>\s+)(?P<key>\S[^\[\{]+)::\s+(?P<value>\S.*)\n',line)
+		m_def = re.match('(?P<intend>\s+)(?P<key>\S[^\[\{]+)::\s*',line)
+		m_desc = re.match('(?P<intend>\s+)::\s*(?P<value>\S.*)\n',line)
+		m_white = re.match('(?P<intend>\s+)(\.\s+)?\S',line)
+		if m_dotted:
+			if is_list:
+				outfile.write('\n')
+			outfile.write(" "*(2*len(m_dotted.group('intend')))+"* ")
+			line = m_dotted.group('value')
+			intend = len(m_dotted.group('intend'))
+			is_list = 1
+		elif m_numeric:
+			if is_list:
+				outfile.write('\n')
+			outfile.write(" "*(2*len(m_numeric.group('intend')))+"- ")
+			line = m_numeric.group('value')
+			intend = len(m_numeric.group('intend'))
+			is_list = 1
+		elif m_defs:
+			if is_list:
+				outfile.write('\n')
+			outfile.write('**'+m_defs.group('key')+'**\n')
+			outfile.write('  * ')
+			line = m_defs.group('value')
+			intend = len(m_defs.group('intend'))
+			is_list = 1
+		elif m_def:
+			if is_list:
+				outfile.write('\n')
+			outfile.write('**'+m_def.group('key')+'**\n')
+			line = ''
+			is_list = 0
+		elif m_desc:
+			if is_list:
+				outfile.write('\n')
+			outfile.write('  * ')
+			line = m_desc.group('value')
+			intend = len(m_desc.group('intend'))
+			is_list = 1
+		elif m_white:
+			curr_intend = len(m_white.groups('intend'))
+			if curr_intend != intend and is_list:
+				outfile.write('\n\n')
+				is_list = 0
+			elif curr_intend != intend and not is_list:
+				outfile.write('\n')
+			elif curr_intend == intend and is_list:
+				outfile.write(' ')
+				line = line.rstrip()
+			intend = curr_intend
+			line = re.sub('^\s*(\.\s*)?(?=\S)','',line)
+		else:
+			if is_list and not re.match('\n',line):
+				outfile.write('\n\n')
+			elif intend:
+				outfile.write('\n')
+			intend = 0
+			is_list = 0
+		# Smileys $)
+		for smile in smileys.keys():
+			line = re.sub('(\s|\A)'+re.escape(smile)+'(\s)',' '+smileys[smile]+'\\2',line)
+		# Finally...
+		if is_block and is_list:
+			line += '\n'
+		if not is_table:
+			outfile.write(line)
+		else:
+			table.append(line)
+	# If we haven't close some things
+	if is_table:
+		table = tableReplacement(table)
+		outfile.writelines(table)
+	if is_block:
+		block = blockReplacement(block)
+		outfile.writelines(block)
+	outfile.close()
+# Main script...
+def PrintHelp():
+	print """Usage: moin2doku moin_file doku_file [namespace]
+Convert MoinMoin page to Doku, using namespace for pictures"""
+	sys.exit(0)
+def PrintParameterError():
+	print >> sys.stderr, "Incorrect parameters! Use --help to read more information."
+	sys.exit(1)
+def CheckParameters(moin_file,doku_file):
+	if not isfile(moin_file):
+		print >> sys.stderr, "Moin file doesn't exists!"
+		sys.exit(1)
+	if not isfile(doku_file):
+		print >> sys.stderr, "Doku file doesn't exists!"
+		sys.exit(1)
+if __name__ == '__main__':
+	if len(sys.argv) > 1:
+		if sys.argv[1] in ('-h', '--help'):
+			PrintHelp()
+		elif len(sys.argv) > 2:
+			moin_file = sys.argv[1]
+			doku_file = sys.argv[2]
+			if len(sys.argv) > 3:
+				namespace = sys.argv[3]
+			else:
+				namespace = '.'
+		else:
+			PrintParameterError()
+	else:
+		PrintParameterError()
+	CheckParameters(moin_file,doku_file)
+	ConvertMoinToDoku(moin_file,doku_file,namespace)
+</code>
 ===== Улучшенная версия =====
 Я ж ленивый, поэтому я написал 'обёртку' к представленной выше функции конвертации так, что бы её было удобно вызывать с различными аргументами и, кроме всего прочего, этот вариант умеет скачивать страницы и вложения из интернета. Функция конвертации здесь та же, просто она сопровождается удобным способом её вызывать.
+Описание см. в POD в коде. Не забудьте заменить <!/code> на </code>.
 <code perl>

MoinMoin to DokuWiki Сравнение версий

Различия