「MFCの基礎 - UTF-8」の版間の差分

📢 Webサイト閉鎖と移転のお知らせ

このWebサイトは2026年9月に閉鎖いたします。

移転先はhttps://mochiu.net/index.php?title=MFCの基礎 - UTF-8です。

新しい記事は移転先で追加しております。（旧サイトでは記事を追加しておりません）

2026年1月31日 (土) 20:27時点における最新版

概要

Windows APIでは、wchar_t型はUTF-16に対応している。
Shift-JIS等のマルチバイト文字列からUTF-8へ直接変換することはできないため、1度UTF-16 (wchar_t型) を経由する必要がある。

※注意
CP_ACPはシステムのデフォルトコードページを示し、日本語のWindowsでは通常Shift-JIS (CP932) だが、環境によって異なる場合がある。

基本的な変換

 #include <windows.h>
 
 const char *pstrShiftJIS = "文字列のテスト";
 wchar_t pstrUTF16[512] = {0};
 char pstrUTF8[512] = {0};
 
 // Shift-JISからUTF-16へ変換
 ::MultiByteToWideChar(CP_ACP, 0, pstrShiftJIS, -1, pstrUTF16, 512);
 
 // UTF-16からUTF-8へ変換
 ::WideCharToMultiByte(CP_UTF8, 0, pstrUTF16, -1, pstrUTF8, 512, NULL, NULL);

Shift-JISからUTF-8への変換

以下の例では、バッファサイズの確認とエラーチェックを含む実装例を示している。

 bool ShiftJISToUTF8(const char* pstrShiftJIS, char* pstrUTF8, int nUTF8BufferSize)
 {
    // UTF-16へ変換するために必要なバッファサイズを取得
    int iLengthUTF16 = ::MultiByteToWideChar(CP_ACP, 0, pstrShiftJIS, -1, NULL, 0);
    if(iLengthUTF16 == 0)
    {
       return false; // 変換失敗
    }
 
    // UTF-16用のバッファを確保
    wchar_t* pstrUTF16 = new wchar_t[iLengthUTF16];
    if(pstrUTF16 == NULL)
    {
       return false; // メモリ確保失敗
    }
 
    // Shift-JISからUTF-16へ変換
    if(::MultiByteToWideChar(CP_ACP, 0, pstrShiftJIS, -1, pstrUTF16, iLengthUTF16) == 0)
    {
       delete[] pstrUTF16;
       return false; // 変換失敗
    }
 
    // UTF-8へ変換するために必要なバッファサイズを取得
    int iLengthUTF8 = ::WideCharToMultiByte(CP_UTF8, 0, pstrUTF16, -1, NULL, 0, NULL, NULL);
    if(iLengthUTF8 == 0 || iLengthUTF8 > nUTF8BufferSize)
    {
       delete[] pstrUTF16;
       return false; // 変換失敗またはバッファ不足
    }
 
    // UTF-16からUTF-8へ変換
    if(::WideCharToMultiByte(CP_UTF8, 0, pstrUTF16, -1, pstrUTF8, nUTF8BufferSize, NULL, NULL) == 0)
    {
       delete[] pstrUTF16;
       return false; // 変換失敗
    }
 
    delete[] pstrUTF16;
    return true;
 }

UTF-8からShift-JISへの変換

 bool UTF8ToShiftJIS(const char* pstrUTF8, char* pstrShiftJIS, int nShiftJISBufferSize)
 {
    // UTF-16へ変換するために必要なバッファサイズを取得
    int iLengthUTF16 = ::MultiByteToWideChar(CP_UTF8, 0, pstrUTF8, -1, NULL, 0);
    if(iLengthUTF16 == 0)
    {
       return false; // 変換失敗
    }
 
    // UTF-16用のバッファを確保
    wchar_t* pstrUTF16 = new wchar_t[iLengthUTF16];
    if(pstrUTF16 == NULL)
    {
       return false; // メモリ確保失敗
    }
 
    // UTF-8からUTF-16へ変換
    if(::MultiByteToWideChar(CP_UTF8, 0, pstrUTF8, -1, pstrUTF16, iLengthUTF16) == 0)
    {
       delete[] pstrUTF16;
       return false; // 変換失敗
    }
 
    // Shift-JISへ変換するために必要なバッファサイズを取得
    int iLengthShiftJIS = ::WideCharToMultiByte(CP_ACP, 0, pstrUTF16, -1, NULL, 0, NULL, NULL);
    if(iLengthShiftJIS == 0 || iLengthShiftJIS > nShiftJISBufferSize)
    {
       delete[] pstrUTF16;
       return false; // 変換失敗またはバッファ不足
    }
 
    // UTF-16からShift-JISへ変換
    if(::WideCharToMultiByte(CP_ACP, 0, pstrUTF16, -1, pstrShiftJIS, nShiftJISBufferSize, NULL, NULL) == 0)
    {
       delete[] pstrUTF16;
       return false; // 変換失敗
    }
 
    delete[] pstrUTF16;
    return true;
 }

std::string型を使用した変換

std::string型を使用することにより、より安全で使いやすい実装が可能となる。

 #include <string>
 #include <windows.h>
 
 std::string ShiftJISToUTF8(const std::string& strShiftJIS)
 {
    // UTF-16へ変換するために必要なバッファサイズを取得
    int iLengthUTF16 = ::MultiByteToWideChar(CP_ACP, 0, strShiftJIS.c_str(), -1, NULL, 0);
    if(iLengthUTF16 == 0)
    {
       return ""; // 変換失敗
    }
 
    // UTF-16用のバッファを確保
    std::wstring wstrUTF16(iLengthUTF16, L'\0');
 
    // Shift-JISからUTF-16へ変換
    if(::MultiByteToWideChar(CP_ACP, 0, strShiftJIS.c_str(), -1, &wstrUTF16[0], iLengthUTF16) == 0)
    {
       return ""; // 変換失敗
    }
 
    // UTF-8へ変換するために必要なバッファサイズを取得
    int iLengthUTF8 = ::WideCharToMultiByte(CP_UTF8, 0, wstrUTF16.c_str(), -1, NULL, 0, NULL, NULL);
    if(iLengthUTF8 == 0)
    {
       return ""; // 変換失敗
    }
 
    // UTF-8用のバッファを確保
    std::string strUTF8(iLengthUTF8, '\0');
 
    // UTF-16からUTF-8へ変換
    if(::WideCharToMultiByte(CP_UTF8, 0, wstrUTF16.c_str(), -1, &strUTF8[0], iLengthUTF8, NULL, NULL) == 0)
    {
       return ""; // 変換失敗
    }
 
    // 末尾のnull文字を削除
    strUTF8.resize(iLengthUTF8 - 1);
 
    return strUTF8;
 }

BOM (Byte Order Mark) 一覧

BOMは、テキストファイルの先頭に配置され、文字エンコーディングやバイトオーダーを示すためのバイト列である。

各エンコーディングのBOM一覧
エンコーディング	BOM (16進数)	バイト数	備考
UTF-8	EF BB BF	3	バイトオーダーの概念がないため、エンコーディング識別用のシグネチャとして使用
UTF-16 BE	FE FF	2	ビッグエンディアン (上位バイトが先)
UTF-16 LE	FF FE	2	リトルエンディアン (下位バイトが先)、Windowsで一般的
UTF-32 BE	00 00 FE FF	4	ビッグエンディアン
UTF-32 LE	FF FE 00 00	4	リトルエンディアン
UTF-7	2B 2F 76 38 2B 2F 76 39 2B 2F 76 2B 2B 2F 76 2F	4	4バイト目は複数パターンあり (非推奨エンコーディング)
UTF-1	F7 64 4C	3	ほとんど使用されない
UTF-EBCDIC	DD 73 66 73	4	EBCDIC環境向け、ほとんど使用されない
SCSU	0E FE FF	3	Standard Compression Scheme for Unicode
BOCU-1	FB EE 28	3	Binary Ordered Compression for Unicode
GB 18030	84 31 95 33	4	中国国家規格、BOMはオプション

※注意

UTF-16とUTF-32では、BOMがバイトオーダーの識別に重要である。
UTF-16でBOMが無い場合、仕様上はビッグエンディアンとして扱うことが推奨されている。
UTF-32 LEのBOM (FF FE 00 00) は、UTF-16 LEのBOM (FF FE) で始まるため、判定時は4バイト先読みが必要となる。

下表に、実務上よく使用される4種類の文字コードを示す。

主要なエンコーディングと用途
エンコーディング	用途
UTF-8 (BOM付き / なし)	Web、クロスプラットフォーム開発で最も一般的
UTF-16 LE	Windows API (Win32) で内部的に使用
UTF-16 BE	MacOS、Javaで使用されることがある、
UTF-32	固定長が必要な特殊用途

以下の例では、各UNICODEの判定をしている。

 #include <fstream>
 #include <cstdint>
 
 enum class TextEncoding
 {
    Unknown,
    UTF8_BOM,
    UTF8_NoBOM,
    UTF16_BE,
    UTF16_LE,
    UTF32_BE,
    UTF32_LE
 };
 
 TextEncoding DetectBOM(const std::string& filename)
 {
    std::ifstream ifs(filename, std::ios::binary);
    if (!ifs) {
       return TextEncoding::Unknown;
    }
 
    unsigned char bom[4] = {0};
    ifs.read(reinterpret_cast<char*>(bom), 4);
    std::streamsize bytesRead = ifs.gcount();
 
    // UTF-32 (4バイト) を先に判定
    if (bytesRead >= 4) {
       if (bom[0] == 0x00 && bom[1] == 0x00 && bom[2] == 0xFE && bom[3] == 0xFF) {
          return TextEncoding::UTF32_BE;
       }
       if (bom[0] == 0xFF && bom[1] == 0xFE && bom[2] == 0x00 && bom[3] == 0x00) {
          return TextEncoding::UTF32_LE;
       }
    }
 
    // UTF-8 (3バイト)
    if (bytesRead >= 3) {
       if (bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF) {
          return TextEncoding::UTF8_BOM;
       }
    }
 
    // UTF-16 (2バイト)
    if (bytesRead >= 2) {
       if (bom[0] == 0xFE && bom[1] == 0xFF) {
          return TextEncoding::UTF16_BE;
       }
 
       if (bom[0] == 0xFF && bom[1] == 0xFE) {
          return TextEncoding::UTF16_LE;
       }
    }
 
    return TextEncoding::UTF8_NoBOM;  // BOMなし (またはASCII等)
 }

BOMの検出

以下の例では、ファイルからUTF-8を読み込む際にBOMを検出している。

 #include <fstream>
 #include <cstring>
 
 bool HasUTF8BOM(const std::string& filename)
 {
    std::ifstream ifs(filename, std::ios::binary);
    if(!ifs)
    {
       return false;
    }
 
    unsigned char bom[3] = {0};
    ifs.read(reinterpret_cast<char*>(bom), 3);
 
    if(ifs.gcount() == 3 && bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF)
    {
       return true; // BOM付きUTF-8
    }
 
    return false; // BOM無し
 }

C++ 17以降では、std::filesystem を使用してより安全な実装が可能である。

 #include <fstream>
 #include <cstring>
 #include <filesystem>
 #include <vector>
 
 namespace fs = std::filesystem;
 
 bool HasUTF8BOM(const fs::path& filepath)
 {
    if(!fs::exists(filepath) || fs::file_size(filepath) < 3)
    {
       return false;
    }
 
    std::ifstream ifs(filepath, std::ios::binary);
    if(!ifs)
    {
       return false;
    }
 
    std::vector<unsigned char> bom(3);
    ifs.read(reinterpret_cast<char*>(bom.data()), 3);
 
    return (ifs.gcount() == 3 && 
            bom[0] == 0xEF && 
            bom[1] == 0xBB && 
            bom[2] == 0xBF);
 }

注意事項

バッファサイズは、変換後の文字列が収まる十分なサイズを確保する必要がある
UTF-8では、1文字が最大4バイトになる可能性があるため、バッファサイズには余裕を持たせる
変換に失敗する可能性があるため、戻り値の確認が重要
CP_ACPは環境依存のため、特定のコードページを指定する場合はCP_932（Shift-JIS）などを使用する
MultiByteToWideCharおよびWideCharToMultiByteの第4引数に-1を指定すると、null終端文字を含めた変換が行われる
変換できない文字がある場合、デフォルトでは'?'に置き換えられる（WideCharToMultiByteの最後の引数で制御可能）

@@ 1行目: / 1行目: @@
 == 概要 ==
-wchar_tはUTF-16に対応しており、Shift-JIS等からUTF-8に直接的で変換できない。<br>
+Windows APIでは、wchar_t型はUTF-16に対応している。<br>
-ここでは、様々な文字コードからUTF-8へ変換する方法を記載する。<br>
+Shift-JIS等のマルチバイト文字列からUTF-8へ直接変換することはできないため、1度UTF-16 (wchar_t型) を経由する必要がある。<br>
+<br>
+<u>※注意</u><br>
+<u>CP_ACPはシステムのデフォルトコードページを示し、日本語のWindowsでは通常Shift-JIS (CP932) だが、環境によって異なる場合がある。</u><br>
 <br><br>
-== サンプルコード ==
+== 基本的な変換 ==
-  <source lang="c++">
+  <syntaxhighlight lang="c++">
   #include <windows.h>
-  char *buf1;
+  const char *pstrShiftJIS = "文字列のテスト";
-  wchar_t buf2[512];
+  wchar_t pstrUTF16[512] = {0};
+ char pstrUTF8[512] = {0};
   // Shift-JISからUTF-16へ変換
-  ::MultiByteToWideChar(CP_ACP, 0, buf1, -1, buf2, 512);
+  ::MultiByteToWideChar(CP_ACP, 0, pstrShiftJIS, -1, pstrUTF16, 512);
   // UTF-16からUTF-8へ変換
-  ::WideCharToMultiByte(CP_UTF8, 0, buf2, -1, buf1, 512, NULL, NULL);
+  ::WideCharToMultiByte(CP_UTF8, 0, pstrUTF16, -1, pstrUTF8, 512, NULL, NULL);
-  </source>
+  </syntaxhighlight>
+<br><br>
+== Shift-JISからUTF-8への変換 ==
+以下の例では、バッファサイズの確認とエラーチェックを含む実装例を示している。<br>
+ <syntaxhighlight lang="c++">
+ bool ShiftJISToUTF8(const char* pstrShiftJIS, char* pstrUTF8, int nUTF8BufferSize)
+ {
+    // UTF-16へ変換するために必要なバッファサイズを取得
+    int iLengthUTF16 = ::MultiByteToWideChar(CP_ACP, 0, pstrShiftJIS, -1, NULL, 0);
+    if(iLengthUTF16 == 0)
+    {
+       return false; // 変換失敗
+    }
+    // UTF-16用のバッファを確保
+    wchar_t* pstrUTF16 = new wchar_t[iLengthUTF16];
+    if(pstrUTF16 == NULL)
+    {
+       return false; // メモリ確保失敗
+    }
+    // Shift-JISからUTF-16へ変換
+    if(::MultiByteToWideChar(CP_ACP, 0, pstrShiftJIS, -1, pstrUTF16, iLengthUTF16) == 0)
+    {
+       delete[] pstrUTF16;
+       return false; // 変換失敗
+    }
+    // UTF-8へ変換するために必要なバッファサイズを取得
+    int iLengthUTF8 = ::WideCharToMultiByte(CP_UTF8, 0, pstrUTF16, -1, NULL, 0, NULL, NULL);
+    if(iLengthUTF8 == 0 || iLengthUTF8 > nUTF8BufferSize)
+    {
+       delete[] pstrUTF16;
+       return false; // 変換失敗またはバッファ不足
+    }
+    // UTF-16からUTF-8へ変換
+    if(::WideCharToMultiByte(CP_UTF8, 0, pstrUTF16, -1, pstrUTF8, nUTF8BufferSize, NULL, NULL) == 0)
+    {
+       delete[] pstrUTF16;
+       return false; // 変換失敗
+    }
+    delete[] pstrUTF16;
+    return true;
+ }
+ </syntaxhighlight>
+<br><br>
+== UTF-8からShift-JISへの変換 ==
+ <syntaxhighlight lang="c++">
+ bool UTF8ToShiftJIS(const char* pstrUTF8, char* pstrShiftJIS, int nShiftJISBufferSize)
+ {
+    // UTF-16へ変換するために必要なバッファサイズを取得
+    int iLengthUTF16 = ::MultiByteToWideChar(CP_UTF8, 0, pstrUTF8, -1, NULL, 0);
+    if(iLengthUTF16 == 0)
+    {
+       return false; // 変換失敗
+    }
+    // UTF-16用のバッファを確保
+    wchar_t* pstrUTF16 = new wchar_t[iLengthUTF16];
+    if(pstrUTF16 == NULL)
+    {
+       return false; // メモリ確保失敗
+    }
+    // UTF-8からUTF-16へ変換
+    if(::MultiByteToWideChar(CP_UTF8, 0, pstrUTF8, -1, pstrUTF16, iLengthUTF16) == 0)
+    {
+       delete[] pstrUTF16;
+       return false; // 変換失敗
+    }
+    // Shift-JISへ変換するために必要なバッファサイズを取得
+    int iLengthShiftJIS = ::WideCharToMultiByte(CP_ACP, 0, pstrUTF16, -1, NULL, 0, NULL, NULL);
+    if(iLengthShiftJIS == 0 || iLengthShiftJIS > nShiftJISBufferSize)
+    {
+       delete[] pstrUTF16;
+       return false; // 変換失敗またはバッファ不足
+    }
+    // UTF-16からShift-JISへ変換
+    if(::WideCharToMultiByte(CP_ACP, 0, pstrUTF16, -1, pstrShiftJIS, nShiftJISBufferSize, NULL, NULL) == 0)
+    {
+       delete[] pstrUTF16;
+       return false; // 変換失敗
+    }
+    delete[] pstrUTF16;
+    return true;
+ }
+ </syntaxhighlight>
+<br><br>
+== std::string型を使用した変換 ==
+std::string型を使用することにより、より安全で使いやすい実装が可能となる。<br>
+ <syntaxhighlight lang="c++">
+ #include <string>
+ #include <windows.h>
+ std::string ShiftJISToUTF8(const std::string& strShiftJIS)
+ {
+    // UTF-16へ変換するために必要なバッファサイズを取得
+    int iLengthUTF16 = ::MultiByteToWideChar(CP_ACP, 0, strShiftJIS.c_str(), -1, NULL, 0);
+    if(iLengthUTF16 == 0)
+    {
+       return ""; // 変換失敗
+    }
+    // UTF-16用のバッファを確保
+    std::wstring wstrUTF16(iLengthUTF16, L'\0');
+    // Shift-JISからUTF-16へ変換
+    if(::MultiByteToWideChar(CP_ACP, 0, strShiftJIS.c_str(), -1, &wstrUTF16[0], iLengthUTF16) == 0)
+    {
+       return ""; // 変換失敗
+    }
+    // UTF-8へ変換するために必要なバッファサイズを取得
+    int iLengthUTF8 = ::WideCharToMultiByte(CP_UTF8, 0, wstrUTF16.c_str(), -1, NULL, 0, NULL, NULL);
+    if(iLengthUTF8 == 0)
+    {
+       return ""; // 変換失敗
+    }
+    // UTF-8用のバッファを確保
+    std::string strUTF8(iLengthUTF8, '\0');
+    // UTF-16からUTF-8へ変換
+    if(::WideCharToMultiByte(CP_UTF8, 0, wstrUTF16.c_str(), -1, &strUTF8[0], iLengthUTF8, NULL, NULL) == 0)
+    {
+       return ""; // 変換失敗
+    }
+    // 末尾のnull文字を削除
+    strUTF8.resize(iLengthUTF8 - 1);
+    return strUTF8;
+ }
+ </syntaxhighlight>
+<br><br>
+== BOM (Byte Order Mark) 一覧 ==
+BOMは、テキストファイルの先頭に配置され、文字エンコーディングやバイトオーダーを示すためのバイト列である。<br>
 <br>
-UTF-8では、ファイルの先頭にUTF-8であることを示す3バイトのデータ0xEF、0xBB、0xBFが付加されている。(BOMという)<br>
+<center>
-UTF-8のファイルを出力する場合、以下のように出力しないと、UTF-8として正しく認識されない。<br>
+{| class="wikitable"
-(BOMが付加されていないUTF-8をUTF-8Nという)<br>
+|+ 各エンコーディングのBOM一覧
-  <source lang="c++">
+|-
-  buf2[0] = 0xEF;
+! エンコーディング !! BOM (16進数) !! バイト数 !! 備考
-  buf2[1] = 0xBB;
+|-
-  buf2[2] = 0xBF;
+| UTF-8 || EF BB BF || 3 || バイトオーダーの概念がないため、エンコーディング識別用のシグネチャとして使用
-  buf2[3] = nullptr;
+|-
-  </source>
+| UTF-16 BE || FE FF || 2 || ビッグエンディアン (上位バイトが先)
+|-
+| UTF-16 LE || FF FE || 2 || リトルエンディアン (下位バイトが先)、Windowsで一般的
+|-
+| UTF-32 BE || 00 00 FE FF || 4 || ビッグエンディアン
+|-
+| UTF-32 LE || FF FE 00 00 || 4 || リトルエンディアン
+|-
+| UTF-7 || 2B 2F 76 38<br>2B 2F 76 39<br>2B 2F 76 2B<br>2B 2F 76 2F || 4 || 4バイト目は複数パターンあり (非推奨エンコーディング)
+|-
+| UTF-1 || F7 64 4C || 3 || ほとんど使用されない
+|-
+| UTF-EBCDIC || DD 73 66 73 || 4 || EBCDIC環境向け、ほとんど使用されない
+|-
+| SCSU || 0E FE FF || 3 || Standard Compression Scheme for Unicode
+|-
+| BOCU-1 || FB EE 28 || 3 || Binary Ordered Compression for Unicode
+|-
+| GB 18030 || 84 31 95 33 || 4 || 中国国家規格、BOMはオプション
+|}
+</center>
+<br>
+<u>※注意</u><br>
+* <u>UTF-16とUTF-32では、BOMがバイトオーダーの識別に重要である。</u>
+* <u>UTF-16でBOMが無い場合、仕様上はビッグエンディアンとして扱うことが推奨されている。</u>
+* <u>UTF-32 LEのBOM (FF FE 00 00) は、UTF-16 LEのBOM (FF FE) で始まるため、判定時は4バイト先読みが必要となる。</u>
+<br>
+下表に、実務上よく使用される4種類の文字コードを示す。<br>
+<br>
+<center>
+{| class="wikitable"
+|+ 主要なエンコーディングと用途
+|-
+! エンコーディング !! 用途
+|-
+| UTF-8<br>(BOM付き / なし) || Web、クロスプラットフォーム開発で最も一般的
+|-
+| UTF-16 LE || Windows API (Win32) で内部的に使用
+|-
+| UTF-16 BE || MacOS、Javaで使用されることがある、
+|-
+| UTF-32 || 固定長が必要な特殊用途
+|}
+</center>
+<br>
+以下の例では、各UNICODEの判定をしている。<br>
+  <syntaxhighlight lang="c++">
+  #include <fstream>
+ #include <cstdint>
+ enum class TextEncoding
+ {
+    Unknown,
+    UTF8_BOM,
+    UTF8_NoBOM,
+    UTF16_BE,
+    UTF16_LE,
+    UTF32_BE,
+    UTF32_LE
+ };
+ TextEncoding DetectBOM(const std::string& filename)
+ {
+    std::ifstream ifs(filename, std::ios::binary);
+    if (!ifs) {
+       return TextEncoding::Unknown;
+    }
+    unsigned char bom[4] = {0};
+    ifs.read(reinterpret_cast<char*>(bom), 4);
+    std::streamsize bytesRead = ifs.gcount();
+    // UTF-32 (4バイト) を先に判定
+    if (bytesRead >= 4) {
+       if (bom[0] == 0x00 && bom[1] == 0x00 && bom[2] == 0xFE && bom[3] == 0xFF) {
+          return TextEncoding::UTF32_BE;
+       }
+       if (bom[0] == 0xFF && bom[1] == 0xFE && bom[2] == 0x00 && bom[3] == 0x00) {
+          return TextEncoding::UTF32_LE;
+       }
+    }
+    // UTF-8 (3バイト)
+    if (bytesRead >= 3) {
+       if (bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF) {
+          return TextEncoding::UTF8_BOM;
+       }
+    }
+    // UTF-16 (2バイト)
+    if (bytesRead >= 2) {
+       if (bom[0] == 0xFE && bom[1] == 0xFF) {
+          return TextEncoding::UTF16_BE;
+       }
+       if (bom[0] == 0xFF && bom[1] == 0xFE) {
+          return TextEncoding::UTF16_LE;
+       }
+    }
+    return TextEncoding::UTF8_NoBOM;  // BOMなし (またはASCII等)
+ }
+  </syntaxhighlight>
 <br><br>
+== BOMの検出 ==
+以下の例では、ファイルからUTF-8を読み込む際にBOMを検出している。<br>
+ <syntaxhighlight lang="c++">
+ #include <fstream>
+ #include <cstring>
+ bool HasUTF8BOM(const std::string& filename)
+ {
+    std::ifstream ifs(filename, std::ios::binary);
+    if(!ifs)
+    {
+       return false;
+    }
+    unsigned char bom[3] = {0};
+    ifs.read(reinterpret_cast<char*>(bom), 3);
+    if(ifs.gcount() == 3 && bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF)
+    {
+       return true; // BOM付きUTF-8
+    }
+    return false; // BOM無し
+ }
+ </syntaxhighlight>
+<br>
+C++ 17以降では、<code>std::filesystem</code> を使用してより安全な実装が可能である。<br>
+ <syntaxhighlight lang="c++">
+ #include <fstream>
+ #include <cstring>
+ #include <filesystem>
+ #include <vector>
+ namespace fs = std::filesystem;
+ bool HasUTF8BOM(const fs::path& filepath)
+ {
+    if(!fs::exists(filepath) || fs::file_size(filepath) < 3)
+    {
+       return false;
+    }
+    std::ifstream ifs(filepath, std::ios::binary);
+    if(!ifs)
+    {
+       return false;
+    }
+    std::vector<unsigned char> bom(3);
+    ifs.read(reinterpret_cast<char*>(bom.data()), 3);
+    return (ifs.gcount() == 3 &&
+            bom[0] == 0xEF &&
+            bom[1] == 0xBB &&
+            bom[2] == 0xBF);
+ }
+ </syntaxhighlight>
+<br><br>
+== 注意事項 ==
+* バッファサイズは、変換後の文字列が収まる十分なサイズを確保する必要がある
+* UTF-8では、1文字が最大4バイトになる可能性があるため、バッファサイズには余裕を持たせる
+* 変換に失敗する可能性があるため、戻り値の確認が重要
+* CP_ACPは環境依存のため、特定のコードページを指定する場合はCP_932（Shift-JIS）などを使用する
+* MultiByteToWideCharおよびWideCharToMultiByteの第4引数に-1を指定すると、null終端文字を含めた変換が行われる
+* 変換できない文字がある場合、デフォルトでは'?'に置き換えられる（WideCharToMultiByteの最後の引数で制御可能）
+<br><br>
 __FORCETOC__
 [[カテゴリ:MFC]]