Splitting Text Files

I recently responded for a MSDN forum post about splitting text files. Due to the extra level of thought required in dealing with nested operations, I enjoyed creating this code. This code will split a text file with multiple lines and create new files from the original file, and each file will have a specified number of lines. The last file created will have the remaining lines.

    Private Sub splitFileMulti(inFilePath As String, outDirectory As String, outFileBaseName As String, outFileExtension As String, maxLinesPerFile As Integer, lineDelimiter As String)
        'Load the file into memory
        Dim fileContent As String = My.Computer.FileSystem.ReadAllText(inFilePath)
        'Get a list of lines from the file, remove empty lines created by delimiters of more than one character
        Dim allLines As List(Of String) = fileContent.Split(lineDelimiter.ToCharArray, StringSplitOptions.RemoveEmptyEntries).ToList
        'calculate how many lines will be in the last file
        Dim remainingLineCount As Integer = allLines.Count Mod maxLinesPerFile
        'Create a list to hold each files list of lines
        Dim files As New List(Of List(Of String))
        'create a stringbuilder for fast string manipulation
        Dim contentOut As New System.Text.StringBuilder
        'counter is for file naming
        Dim counter As Integer = 0
        'iterate through all the file lines
        For i As Integer = 0 To allLines.Count Step maxLinesPerFile
            'prevent index out of range error by exiting loop before error is possible
            If i + maxLinesPerFile > allLines.Count Then Exit For
            'add each files lines to the list
            files.Add(allLines.GetRange(i, maxLinesPerFile))
        Next
        'add the remaining files lines to the list
        If remainingLineCount > 0 Then files.Add(allLines.GetRange(allLines.Count - remainingLineCount, remainingLineCount))
        'iterate each files lines
        For Each fileLines As List(Of String) In files
            'increment the naming counter
            counter += 1
            'create a zero padded string to append to the file name
            Dim cntString As String = counter.ToString.PadLeft(5, "0"c)
            'clear the stringbuilder
            contentOut.Clear()
            'iterate each line in that files lines
            For Each line As String In fileLines
                'append the line to the stringbuilder
                contentOut.Append(line & lineDelimiter)
            Next
            'create the file name
            Dim fileName As String = outFileBaseName & cntString & "." & outFileExtension
            'remove double periods(happens if user provides extension with a period)
            fileName = fileName.Replace("..", ".")
            'create the outpath
            Dim outPath As String = System.IO.Path.Combine(outDirectory, fileName)
            'write data to file
            My.Computer.FileSystem.WriteAllText(outPath, contentOut.ToString, False)
        Next
    End Sub
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s