#include #include #include #include #include #include #include std::vector extractUTF16HexValues(const std::string& input) { std::vector utf16HexValues; std::stringstream ss(input); std::string token; // Iterate over tokens separated by "\\u" while (std::getline(ss, token, '\\')) { if (token.empty()) continue; // Check if the token starts with "u" and has exactly 4 characters following it if (token[0] == 'u' && token.length() == 5) { // Extract the hexadecimal part std::string hexValue = token.substr(1, 4); utf16HexValues.push_back(hexValue); } } return utf16HexValues; } std::string convertUTF16HexToUTF8Hex(const std::vector& utf16HexValues) { try { std::wstring_convert, char16_t> converter; std::stringstream utf8HexStream; std::u16string utf16String; // Combine all UTF-16 hexadecimal values into a single u16string for (const std::string& utf16Hex : utf16HexValues) { uint16_t utf16Value = std::stoi(utf16Hex, nullptr, 16); utf16String.push_back(utf16Value); } // Convert the UTF-16 u16string to UTF-8 std::string utf8Encoded = converter.to_bytes(utf16String); // Convert each byte to hexadecimal representation and append to the result for (unsigned char c : utf8Encoded) { utf8HexStream << std::hex << std::setw(2) << std::setfill('0') << static_cast(c); } return utf8HexStream.str(); } catch (const std::invalid_argument& e) { std::cerr << "Error: " << e.what() << std::endl; return ""; } catch (const std::out_of_range& e) { std::cerr << "Error: Invalid UTF-16 hexadecimal value." << std::endl; return ""; } } std::string convertUTF8HexToUTF8Bytes(const std::string& utf8Hex) { std::stringstream utf8ByteStream; // Iterate over pairs of characters representing hexadecimal values for (size_t i = 0; i < utf8Hex.length(); i += 2) { // Get the next two characters std::string byteHex = utf8Hex.substr(i, 2); // Convert hexadecimal string to integer int byteValue; std::stringstream(byteHex) >> std::hex >> byteValue; // Append the byte as a character to the string utf8ByteStream << static_cast(byteValue); } return utf8ByteStream.str(); } int main() { std::string input = "\\uD83E\\uDDA0"; std::vector utf16HexValues = extractUTF16HexValues(input); //std::vector utf16HexValues = {"D83E", "DDA0"}; std::string utf8Hex = convertUTF16HexToUTF8Hex(utf16HexValues); if (!utf8Hex.empty()) { std::cout << "UTF-8 Hex: " << utf8Hex << std::endl; // Output: F09FA6A0 std::string utf8Bytes = convertUTF8HexToUTF8Bytes(utf8Hex); std::cout << "UTF-8 Bytes: " << utf8Bytes << std::endl; // Output: ? } return 0; }