How to turn a 300mb csv into 3x100mb ?
Cut and slice with head/tail and add the header on top!
Code
require 'rake' # for `sh` helper
# split giga-csv into n smaller files
def split_csv(original, file_count)
header_lines = 1
lines = Integer(`cat #{original} | wc -l`) - header_lines
lines_per_file = (lines / file_count.to_f).ceil + header_lines
header = `head -n #{header_lines} #{original}`
start = header_lines
file_count.times.map do |i|
finish = start + lines_per_file
file = "#{original}-#{i}.csv"
File.write(file, header)
sh "tail -n #{lines - start} #{original} | head -n #{lines_per_file} >> #{file}"
start = finish
file
end
end