tab2spc.rb


ようやくできたかな。まずテスト。

# test_tab2spc.rb
require 'test/unit'
require 'tab2spc'
class Tests < Test::Unit::TestCase
  def test_fill_ary
    assert_equal([], fill_ary([]))
    assert_equal([["a",1,0]], fill_ary([["a",1,0]]))
    assert_equal([["a",1,0],[" ",1,1],[" ",1,2],[" ",1,3],[" ",1,4],[" ",1,5],[" ",1,6],[" ",1,7],[" b",1,8]], fill_ary([["a",1,0],["\t",8,1],["b",1,8]]))
  end
end

つまり、テキストをいったん配列にして、その要素は、[文字、文字の長さ、場所」というようにしておく。そうしておいて、いろいろいじったあと、スペースで埋める。上の例では、1文字目が"a"で、二文字目がタブで3文字目が"b"で1バイト換算8個目のコラムにあるのを、fill_aryというのを通すと、"a"と"b"の間をスペースで埋めてくれるという話。
次に変換したいサンプルテキストtabsample.txtを作ります。

さて
	ぺんです		# this si comment
	これはapenんです	# this is also a comment
		何?		#comment
		何ですって?	#comment
	おわり			# no comment
終わり				# end

フォントの関係でちゃんと見えないと思うけど、ゆるしてね。これを、
ruby tab2spc.rb tabsample.txtの結果。

さて
  ぺんです                      # this si comment
  これはapenんです              # this is also a comment
    何?                         #comment
    何ですって?                #comment
  おわり                        # no comment
終わり                          # end

さて、プログラムは以下のとおり。

#tab2space.rb
require 'kconv'
$KCODE = 's'

def chomp_clean(str)
 while (str.length > 0) && (str[-1].chr==" " || str[-1].chr=="\t" ||
str[-1].chr=="\n")
   str = str[0..-2]
 end
 return str
end

def indent2spc(ary)
temp_ary = ary
new_ary = []
position = 0
while (temp_ary.length > 0) && (temp_ary[0][0] == "\t")
 new_ary.push([" ", 1, position])
 new_ary.push([" ", 1, position+1])
 position = position + 2
 temp_ary.shift
end
temp_ary[0][2] = position if temp_ary.length > 0
new_ary = new_ary + temp_ary
return new_ary
end

def str2ary(str, tabwidth = 8)
 str = chomp_clean(str)
 position = 0
 ary = str.split(//).map{ |item| [item,  "#{item.length > 1 ? 2 : 1}".to_i ]  }
 ary = ary.map{ |item| [item[0], (item[0] == "\t" ? 8: item[1])] }
 ary.each_with_index do |item, index|
   char = item[0]
   length = item[1]
   ary[index] = [char, length, position]
   position = position + "#{char == "\t" ? (tabwidth - position %
tabwidth) : length} ".to_i
 end
 return ary
end

def normalize(ary)
 position = 0
 ary.each_with_index do |item, index|
   char = item[0]
   length = item[1]
   ary[index] = [char, length, position]
   if (ary[index + 1] != nil) && (ary[index][0] == "\t")
     position = ary[index+1][2]
   else
     position = position + length
   end
 end
end

def fill_ary(ary)
  newary = []
  if ary == []
    newary ==[]
  else
    while ary.length > 0
      if ary[0][0] != "\t"
        newary.push(ary.shift)
      else
        while ary[0][0] == "\t"
          ary.shift
        end

        position = 0
        newary.each { |item| position = position + item[1]}
        (ary[0][2] - position).times do |time|
          newary.push([" ", 1, position + time])
        end
     end
   end
  end
  return newary
end

def ary2str(ary)
 newstr = ""
 ary.each do |item|
     newstr = newstr + item[0]
 end
 returnstring = newstr.gsub(/\t/, "  ")
end

def tab2spc(str, tabwidth = 8)
 str = chomp_clean(str)                # first, clean trailing return, tab, space, etc.
 ary = str2ary(str, tabwidth = 8)              # make it to an array
 ary = indent2spc(ary)         # make 1 indent to 2 spaces
 ary = normalize(ary)          # normalize position
 ary = fill_ary(ary)                   # delete tab and fill spaces
 str = ary2str(ary)
end

if $0 == __FILE__
 while gets
 puts tab2spc($_)
 end
end