This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# This program "fixes" the type of broken delimiter seperated values output | |
# which is not quoted and contains newlines in columns. It will concatenate | |
# lines together until it has the correct number of separators (or more...) | |
# on the line. *Note* that it will not properly fix files that have such | |
# problems in either the first or last column, as it is not possible to | |
# determine if that broken content properly belongs to the next or prior row. | |
# It defaults to using a separator of |, and counting the number of separators | |
# in the first row to determine the number of columns. | |
require 'optparse' | |
options = {:separator => '|'} | |
option_parser = OptionParser.new do |opts| | |
opts.banner = 'Usage: dsvclean.rb [options]' | |
opts.on('-i', '--input FILE') {|file| options[:in_file] = file} | |
opts.on('-o', '--output FILE') {|file| options[:out_file] = file} | |
opts.on('-s', '--sep SEPARATOR') {|sep| options[:separator] = sep} | |
opts.on('-c', '--count NUM_SEPS') {|c| options[:count] = c.to_i} | |
end | |
option_parser.parse!(ARGV) | |
begin | |
$stdin = File.open(options[:in_file]) if options[:in_file] | |
$stdout = File.open(options[:out_file], 'w') if options[:out_file] | |
separator = options[:separator] | |
count = options[:count] | |
prev_line = '' | |
$stdin.each do |line| | |
count = line.count(separator) if count.nil? | |
concat_line = prev_line + line | |
if concat_line.count(separator) >= count | |
$stdout.puts concat_line | |
prev_line = '' | |
else | |
prev_line = concat_line.chomp | |
end | |
end | |
ensure | |
[$stdin, $stdout].each {|io| io.close rescue nil} | |
end |
This simple script demonstrates optparse and writing a script that uses standard in / out or opens files according to arguments.