Convert text to Latin encoding and decode back pro

2019-07-29 04:04发布

问题:

I'm trying to convert Vietnamese to Latin. It is a requirement to send the byte to ESC/P printer (see C# ESC/POS Print Vietnamese for reason why).

But my question is very simple, look at this code:

Encoding enc = Encoding.GetEncoding(1258); //vietnamese code page
string content = "Cơm chiên với các loại gia vị truyền";
string newStr = Encoding.GetEncoding("Latin1").GetString(enc.GetBytes(content));
string origStr = enc.GetString(Encoding.GetEncoding("Latin1").GetBytes(newStr)); //origStr is becomes "Cơm chiên v?i các lo?i gia v? truy?n"

Why is the origStr becomes Cơm chiên v?i các lo?i gia v? truy?n instead of contain the same content with content? But, it works fine in Chinese or Thai. You can test the code below...

For Chinese Simplified

Encoding enc = Encoding.GetEncoding(936); //chinese simplified code page
string content = "印尼炒饭";
string newStr = Encoding.GetEncoding("Latin1").GetString(enc.GetBytes(content));
string origStr = enc.GetString(Encoding.GetEncoding("Latin1").GetBytes(newStr)); //origStr is correct "印尼炒饭"

For Thai

Encoding enc = Encoding.GetEncoding(874); //Thai code page
string content = "ข้าวผัดอินโดนีเซียกับเครื่องเทศแบบดั้ง";
string newStr = Encoding.GetEncoding("Latin1").GetString(enc.GetBytes(content));
string origStr = enc.GetString(Encoding.GetEncoding("Latin1").GetBytes(newStr)); //origStr is correct "ข้าวผัดอินโดนีเซียกับเครื่องเทศแบบดั้ง"

Any idea why? Why Vietnamese won't work? How to make it work so I can send it to Printer?

Thanks for any help :)

PS. Here is the code I used to send to printer

[DllImport("Winspool.drv", EntryPoint = "ClosePrinter", SetLastError = true, ExactSpelling = true, CallingConvention = CallingConvention.StdCall)]
public static extern bool ClosePrinter(IntPtr hPrinter);

[DllImport("Winspool.drv", EntryPoint = "EndDocPrinter", SetLastError = true, ExactSpelling = true, CallingConvention = CallingConvention.StdCall)]
public static extern bool EndDocPrinter(IntPtr hPrinter);

[DllImport("Winspool.drv", EntryPoint = "EndPagePrinter", SetLastError = true, ExactSpelling = true, CallingConvention = CallingConvention.StdCall)]
public static extern bool EndPagePrinter(IntPtr hPrinter);

[DllImport("Winspool.drv", EntryPoint = "OpenPrinterA", SetLastError = true, CharSet = CharSet.Ansi, ExactSpelling = true, CallingConvention = CallingConvention.StdCall)]
public static extern bool OpenPrinter([MarshalAs(UnmanagedType.LPStr)] string szPrinter, out IntPtr hPrinter, IntPtr pd);

[DllImport("Winspool.drv", EntryPoint = "StartDocPrinterA", SetLastError = true, CharSet = CharSet.Ansi, ExactSpelling = true, CallingConvention = CallingConvention.StdCall)]
public static extern bool StartDocPrinter(IntPtr hPrinter, Int32 level, [In, MarshalAs(UnmanagedType.LPStruct)] DOCINFOA di);

[DllImport("Winspool.drv", EntryPoint = "StartPagePrinter", SetLastError = true, ExactSpelling = true, CallingConvention = CallingConvention.StdCall)]
public static extern bool StartPagePrinter(IntPtr hPrinter);

[DllImport("Winspool.drv", EntryPoint = "WritePrinter", SetLastError = true, ExactSpelling = true, CallingConvention = CallingConvention.StdCall)]
public static extern bool WritePrinter(IntPtr hPrinter, IntPtr pBytes, Int32 dwCount, out Int32 dwWritten);

public static bool SendBytesToPrinter(string printerName, IntPtr pBytes, int dwCount, string docName = null, string dataType = "RAW")
{
    DOCINFOA di = new DOCINFOA();
    di.pDocName = string.IsNullOrWhiteSpace(docName) ? string.Empty : docName;
    di.pDataType = string.IsNullOrWhiteSpace(dataType) ? "RAW" : dataType;

    IntPtr hPrinter = new IntPtr(0); int dwError = 0, dwWritten = 0; bool bSuccess = false;
    if (OpenPrinter(printerName.Normalize(), out hPrinter, IntPtr.Zero))
    {
        if (StartDocPrinter(hPrinter, 1, di))
        {
            if (StartPagePrinter(hPrinter))
            {
                bSuccess = WritePrinter(hPrinter, pBytes, dwCount, out dwWritten);
                EndPagePrinter(hPrinter);
            }
            EndDocPrinter(hPrinter);
        }
        ClosePrinter(hPrinter);
    }

    if (bSuccess == false)
        dwError = Marshal.GetLastWin32Error();
    return bSuccess;
}

public static bool SendBytesToPrinter(string printerName, byte[] bytes, string docName)
{
    int dwCount = bytes.Length;
    IntPtr ptrBytes = Marshal.AllocCoTaskMem(Marshal.SizeOf(typeof(byte)) * bytes.Length);
    try
    {
        Marshal.Copy(bytes, 0, ptrBytes, bytes.Length);
        SendBytesToPrinter(printerName, ptrBytes, dwCount, docName);
    }
    finally { Marshal.FreeCoTaskMem(ptrBytes); }
    return true;
}

public static bool SendStringToPrinter(string printerName, string str, string docName)
{
    int dwCount = str.Length;
    IntPtr ptrBytes = Marshal.StringToCoTaskMemAnsi(str);
    try { SendBytesToPrinter(printerName, ptrBytes, dwCount, docName); }
    finally { Marshal.FreeCoTaskMem(ptrBytes); }
    return true;
}

回答1:

Usually this happens when you provide the encoder with data not present in the codepage.

So if you would e.g. try to convert a character with a byte value > 255 (which is not present in codepage 1258) you would get the fallback which is the ? character in this case. My guess is, that you have invalid data in the input string in the first case (maybe characters that look the same but are not the correct ones).

My next step would be to inspect the characters and their corresponding byte values.