- #include <iostream>
- #include <string>
- #include <codecvt>
- #include <stdexcept>
- #include <sstream>
- #include <iomanip>
- #include <vector>
- std::vector<std::string> extractUTF16HexValues(const std::string& input) {
- std::vector<std::string> utf16HexValues;
- std::stringstream ss(input);
- std::string token;
- // Iterate over tokens separated by "\\u"
- while (std::getline(ss, token, '\\')) {
- if (token.empty())
- continue;
- // Check if the token starts with "u" and has exactly 4 characters following it
- if (token[0] == 'u' && token.length() == 5) {
- // Extract the hexadecimal part
- std::string hexValue = token.substr(1, 4);
- utf16HexValues.push_back(hexValue);
- }
- }
- return utf16HexValues;
- }
- std::string convertUTF16HexToUTF8Hex(const std::vector<std::string>& utf16HexValues) {
- try {
- std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter;
- std::stringstream utf8HexStream;
- std::u16string utf16String;
- // Combine all UTF-16 hexadecimal values into a single u16string
- for (const std::string& utf16Hex : utf16HexValues) {
- uint16_t utf16Value = std::stoi(utf16Hex, nullptr, 16);
- utf16String.push_back(utf16Value);
- }
- // Convert the UTF-16 u16string to UTF-8
- std::string utf8Encoded = converter.to_bytes(utf16String);
- // Convert each byte to hexadecimal representation and append to the result
- for (unsigned char c : utf8Encoded) {
- utf8HexStream << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(c);
- }
- return utf8HexStream.str();
- } catch (const std::invalid_argument& e) {
- std::cerr << "Error: " << e.what() << std::endl;
- return "";
- } catch (const std::out_of_range& e) {
- std::cerr << "Error: Invalid UTF-16 hexadecimal value." << std::endl;
- return "";
- }
- }
- std::string convertUTF8HexToUTF8Bytes(const std::string& utf8Hex) {
- std::stringstream utf8ByteStream;
- // Iterate over pairs of characters representing hexadecimal values
- for (size_t i = 0; i < utf8Hex.length(); i += 2) {
- // Get the next two characters
- std::string byteHex = utf8Hex.substr(i, 2);
- // Convert hexadecimal string to integer
- int byteValue;
- std::stringstream(byteHex) >> std::hex >> byteValue;
- // Append the byte as a character to the string
- utf8ByteStream << static_cast<char>(byteValue);
- }
- return utf8ByteStream.str();
- }
- int main() {
- std::string input = "\\uD83E\\uDDA0";
- std::vector<std::string> utf16HexValues = extractUTF16HexValues(input);
- //std::vector<std::string> utf16HexValues = {"D83E", "DDA0"};
- std::string utf8Hex = convertUTF16HexToUTF8Hex(utf16HexValues);
- if (!utf8Hex.empty()) {
- std::cout << "UTF-8 Hex: " << utf8Hex << std::endl; // Output: F09FA6A0
- std::string utf8Bytes = convertUTF8HexToUTF8Bytes(utf8Hex);
- std::cout << "UTF-8 Bytes: " << utf8Bytes << std::endl; // Output: ?
- }
- return 0;
- }