ようやくできたかな。まずテスト。
# test_tab2spc.rb require 'test/unit' require 'tab2spc' class Tests < Test::Unit::TestCase def test_fill_ary assert_equal([], fill_ary([])) assert_equal([["a",1,0]], fill_ary([["a",1,0]])) assert_equal([["a",1,0],[" ",1,1],[" ",1,2],[" ",1,3],[" ",1,4],[" ",1,5],[" ",1,6],[" ",1,7],[" b",1,8]], fill_ary([["a",1,0],["\t",8,1],["b",1,8]])) end end
つまり、テキストをいったん配列にして、その要素は、[文字、文字の長さ、場所」というようにしておく。そうしておいて、いろいろいじったあと、スペースで埋める。上の例では、1文字目が"a"で、二文字目がタブで3文字目が"b"で1バイト換算8個目のコラムにあるのを、fill_aryというのを通すと、"a"と"b"の間をスペースで埋めてくれるという話。
次に変換したいサンプルテキストtabsample.txtを作ります。
さて ぺんです # this si comment これはapenんです # this is also a comment 何? #comment 何ですって? #comment おわり # no comment 終わり # end
フォントの関係でちゃんと見えないと思うけど、ゆるしてね。これを、
ruby tab2spc.rb tabsample.txtの結果。
さて ぺんです # this si comment これはapenんです # this is also a comment 何? #comment 何ですって? #comment おわり # no comment 終わり # end
さて、プログラムは以下のとおり。
#tab2space.rb require 'kconv' $KCODE = 's' def chomp_clean(str) while (str.length > 0) && (str[-1].chr==" " || str[-1].chr=="\t" || str[-1].chr=="\n") str = str[0..-2] end return str end def indent2spc(ary) temp_ary = ary new_ary = [] position = 0 while (temp_ary.length > 0) && (temp_ary[0][0] == "\t") new_ary.push([" ", 1, position]) new_ary.push([" ", 1, position+1]) position = position + 2 temp_ary.shift end temp_ary[0][2] = position if temp_ary.length > 0 new_ary = new_ary + temp_ary return new_ary end def str2ary(str, tabwidth = 8) str = chomp_clean(str) position = 0 ary = str.split(//).map{ |item| [item, "#{item.length > 1 ? 2 : 1}".to_i ] } ary = ary.map{ |item| [item[0], (item[0] == "\t" ? 8: item[1])] } ary.each_with_index do |item, index| char = item[0] length = item[1] ary[index] = [char, length, position] position = position + "#{char == "\t" ? (tabwidth - position % tabwidth) : length} ".to_i end return ary end def normalize(ary) position = 0 ary.each_with_index do |item, index| char = item[0] length = item[1] ary[index] = [char, length, position] if (ary[index + 1] != nil) && (ary[index][0] == "\t") position = ary[index+1][2] else position = position + length end end end def fill_ary(ary) newary = [] if ary == [] newary ==[] else while ary.length > 0 if ary[0][0] != "\t" newary.push(ary.shift) else while ary[0][0] == "\t" ary.shift end position = 0 newary.each { |item| position = position + item[1]} (ary[0][2] - position).times do |time| newary.push([" ", 1, position + time]) end end end end return newary end def ary2str(ary) newstr = "" ary.each do |item| newstr = newstr + item[0] end returnstring = newstr.gsub(/\t/, " ") end def tab2spc(str, tabwidth = 8) str = chomp_clean(str) # first, clean trailing return, tab, space, etc. ary = str2ary(str, tabwidth = 8) # make it to an array ary = indent2spc(ary) # make 1 indent to 2 spaces ary = normalize(ary) # normalize position ary = fill_ary(ary) # delete tab and fill spaces str = ary2str(ary) end if $0 == __FILE__ while gets puts tab2spc($_) end end