VBA - Read a CSV with utf-8 and write out another

2019-09-03 16:17发布

问题:

I am doing the following steps and running into problem with UTF-8 characters:
- Read CSV file (delimited with "|") containing UTF-8 characters.
- Parse the file and save a new file based on certain conditions (Eliminate rows with Remove_ROW text in the same is one of the criteria)

The saved file that I have doesn't save the UTF-8 characters. It is just saving it with some garbled characters.

Set tdaywb = Workbooks.Open(lbltoday.Caption) 'lbltoday.Caption has the filename
Set tdaySht = tdaywb.Sheets(1)
tdayLastRow = tdaySht.Range("A" & Rows.Count).End(xlUp).Row

For x = 2 To tdayLastRow
  If x > tdayLastRow Then
       Exit For
  End If
  If InStr(1, tdaySht.Cells(x, 1), "Remove_ROW") > 0 Then
       tdaySht.Rows(x).EntireRow.Delete
       remCount = remCount + 1
       tdayLastRow = tdayLastRow - 1
  End If
Next x

tdaySht.Activate

With ActiveWorkbook
    .SaveAs "C:\test.csv" 
    .Close 0
End With

I will appreciate help for how can I save this with the UTF-8 characters preserved.

Regards, Ayush

回答1:

After some research here is what I found:

Sub OpenTextFile()
strSheetName = ReadUTF8CSVToSheet("C:\file1.csv")
WriteCSV
End Sub

Function ReadUTF8CSVToSheet(file As String)
Dim ws As Worksheet
Dim strText As String
' read utf-8 file to strText variable
   With CreateObject("ADODB.Stream")
    .Open
    .Type = 1  ' Private Const adTypeBinary = 1
    .LoadFromFile file
    .Type = 2  ' Private Const adTypeText = 2
    .Charset = "utf-8"
    strText = .ReadText(-1)  ' Private Const adReadAll = -1
End With

' parse strText data to a sheet
Set ws = Sheets.Add()
intRow = 1
For Each strLine In Split(strText, Chr(10))
    If strLine <> "" Then
        With ws
            .Cells(intRow, 1) = strLine
            .Cells(intRow, 1).TextToColumns Destination:=Cells(intRow, 1), DataType:=xlDelimited, _
                TextQualifier:=xlDoubleQuote, ConsecutiveDelimiter:=False, Tab:=False, _
                Semicolon:=False, Comma:=False, Space:=False, Other:=True, OtherChar:="|"
        End With

        intRow = intRow + 1
    End If
Next strLine
ReadUTF8CSVToSheet = ws.Name
End Function

Public Sub WriteCSV()
Set wkb = ActiveSheet

Dim fileName As String
Dim MaxCols As Integer
Dim lMaxCol, lMaxRow As Double
fileName = Application.GetSaveAsFilename("", "CSV File (*.csv), *.csv")

If fileName = "False" Then
End
End If

On Error GoTo eh
Const adTypeText = 2
Const adSaveCreateOverWrite = 2

Dim BinaryStream
Set BinaryStream = CreateObject("ADODB.Stream")
BinaryStream.Charset = "UTF-8"
BinaryStream.Type = adTypeText
BinaryStream.Open

C = 1
lMaxCol = 0
While Not Len(wkb.Cells(1, C).Value) = 0    'wkb.Cells(row, column).Value
    s = s & wkb.Cells(1, C).Value & "|"
    C = C + 1
Wend
BinaryStream.WriteText s, 1
lMaxCol = C - 1

r = 1
While Not Len(wkb.Cells(r + 1, 1).Value) = 0  'wkb.Cells(row, column).Value
    r = r + 1
Wend
  lMaxRow = r - 1
For r = 1 To lMaxRow
s = ""
For C = 1 To lMaxCol
    s = s & wkb.Cells(r + 1, C).Value & "|"
Next C
BinaryStream.WriteText s, 1
Next r

BinaryStream.SaveToFile fileName, adSaveCreateOverWrite
BinaryStream.Close

MsgBox "CSV generated successfully"

eh:

End Sub