How do I split a single text file with 1000 lines into multiple smaller files of, for example, 300 lines apiece? Please keep in mind that the original file may have more or less than a thousand lines.
file1.txt 300 lines -> rest
file2.txt 300 lines -> rest
file3.txt 300 lines -> 开发者_运维问答rest
file4.txt 100 lines
I tried the following but it's not working.
int counter = 0;
string line;
string lineoutput = (current_dir + "\\" + DateTime.Now.ToString("HHmmss") + ".txt");
System.IO.StreamReader inputfile;
inputfile = new System.IO.StreamReader(new_path);
while ((line = inputfile.ReadLine()) != null)
{
System.IO.StreamWriter file = new System.IO.StreamWriter(current_dir + "\\" + DateTime.Now.ToString("HHmmss") + ".txt", true);
string _replaceBackspace = ReplaceBackspace(read_file.ReadLine().ToLower());
using (StreamWriter writer = new StreamWriter(lineoutput, true))
{
if (counter == 5000)
{
counter = 0;
lineoutput = (current_dir + "\\" + DateTime.Now.ToString("HHmmss") + ".txt");
}
writer.WriteLine(line.ToLower());
}
counter++;
}
Simplest case:
var reader = File.OpenText(infile);
string outFileName = "file{0}.txt";
int outFileNumber = 1;
const int MAX_LINES = 300;
while (!reader.EndOfStream)
{
var writer = File.CreateText(string.Format(outFileName, outFileNumber++));
for (int idx = 0; idx < MAX_LINES; idx++)
{
writer.WriteLine(reader.ReadLine());
if (reader.EndOfStream) break;
}
writer.Close();
}
reader.Close();
string baseName = current_dir + "\\" + DateTime.Now.ToString("HHmmss") + ".";
StreamWriter writer = null;
try
{
using (StreamReader inputfile = new System.IO.StreamReader(new_path))
{
int count = 0;
string line;
while ((line = inputfile.ReadLine()) != null)
{
if (writer == null || count > 300)
{
if (writer != null)
{
writer.Close();
writer = null;
}
writer = new System.IO.StreamWriter(baseName + count.ToString() + ".txt", true);
count = 0;
}
writer.WriteLine(line.ToLower());
++count;
}
}
}
finally
{
if (writer != null)
writer.Close();
}
Loop over File.ReadLines(path)
and write each line to a StreamWriter
.
Keep a counter, and, each time it reaches 300
, close the StreamWriter
and open a new one.
As well as SLaks answer, you can also do it using the extension methods Skip
and Take
in System.Linq
string[] ss = File.ReadAllLines(@"path to the file");
int cycle = 1;
int chunksize = 300;
var chunk = ss.Take(chunksize);
var rem = ss.Skip(chunksize);
while (chunk.Take(1).Count() > 0)
{
string filename = "file" + cycle.ToString() + ".txt";
using (StreamWriter sw = new StreamWriter(filename))
{
foreach(string line in chunk)
{
sw.WriteLine(line);
}
}
chunk = rem.Take(chunksize);
rem = rem.Skip(chunksize);
cycle++;
}
Following on the answer from bigtbl, I added, for the case of generating a series of CSVs, preservation of first row as header on each file. MAX_LINES is inclusive of the header row for total count, which is the reason for start_idx
.
public static void SplitFil(int rows, string inputFile) {
int outFileNumber = 1;
const int MAX_LINES = 50000;
string header = "";
if (GetFileSize(inputFile) > MAX_LINES) {
var reader = File.OpenText(inputFile);
while (!reader.EndOfStream)
{
var start_idx = 0;
var writer = File.CreateText($"filename_{outFileNumber}.csv");
if (outFileNumber > 1) {
writer.WriteLine(header);
start_idx = 1;
}
for (int idx = start_idx; idx < MAX_LINES; idx++)
{
var row = reader.ReadLine();
if (idx == 0 && outFileNumber == 1) header = row;
writer.WriteLine(row);
if (reader.EndOfStream) break;
}
writer.Close();
outFileNumber++;
}
reader.Close();
}
}
Full program:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace SplitTexTfileIntoMultiplefiles
{
class Program
{
static void Main(string[] args)
{
string infile = @"C:\MyProj\file.sql";
var reader = File.OpenText(infile);
int outFileNumber = 1;
Console.WriteLine("Wait...");
const int MAX_LINES = 20000;
while (!reader.EndOfStream)
{
string outfname = Path.GetDirectoryName(infile) + "\\" + Path.GetFileNameWithoutExtension(infile) + outFileNumber.ToString ("D4") + Path.GetExtension(infile);
Console.WriteLine(outfname);
var writer = File.CreateText(outfname);
for (int idx = 0; idx < MAX_LINES; idx++)
{
writer.WriteLine(reader.ReadLine());
if (reader.EndOfStream) break;
}
writer.Close();
outFileNumber++;
}
reader.Close();
Console.WriteLine("Done.");
Console.ReadKey();
}
}
}
精彩评论