Facebook
From aaaaaaaaaa, 1 Month ago, written in Plain Text.
Embed
Download Paste or View Raw
Hits: 169
  1. #include <iostream>
  2. #include <string>
  3. #include <codecvt>
  4. #include <stdexcept>
  5. #include <sstream>
  6. #include <iomanip>
  7. #include <vector>
  8.  
  9. std::vector<std::string> extractUTF16HexValues(const std::string& input) {
  10.     std::vector<std::string> utf16HexValues;
  11.     std::stringstream ss(input);
  12.     std::string token;
  13.  
  14.     // Iterate over tokens separated by "\\u"
  15.     while (std::getline(ss, token, '\\')) {
  16.         if (token.empty())
  17.             continue;
  18.  
  19.         // Check if the token starts with "u" and has exactly 4 characters following it
  20.         if (token[0] == 'u' && token.length() == 5) {
  21.             // Extract the hexadecimal part
  22.             std::string hexValue = token.substr(1, 4);
  23.             utf16HexValues.push_back(hexValue);
  24.         }
  25.     }
  26.  
  27.     return utf16HexValues;
  28. }
  29.  
  30. std::string convertUTF16HexToUTF8Hex(const std::vector<std::string>& utf16HexValues) {
  31.     try {
  32.         std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter;
  33.         std::stringstream utf8HexStream;
  34.         std::u16string utf16String;
  35.  
  36.         // Combine all UTF-16 hexadecimal values into a single u16string
  37.         for (const std::string& utf16Hex : utf16HexValues) {
  38.             uint16_t utf16Value = std::stoi(utf16Hex, nullptr, 16);
  39.             utf16String.push_back(utf16Value);
  40.         }
  41.  
  42.         // Convert the UTF-16 u16string to UTF-8
  43.         std::string utf8Encoded = converter.to_bytes(utf16String);
  44.  
  45.         // Convert each byte to hexadecimal representation and append to the result
  46.         for (unsigned char c : utf8Encoded) {
  47.             utf8HexStream << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(c);
  48.         }
  49.  
  50.         return utf8HexStream.str();
  51.     } catch (const std::invalid_argument& e) {
  52.         std::cerr << "Error: " << e.what() << std::endl;
  53.         return "";
  54.     } catch (const std::out_of_range& e) {
  55.         std::cerr << "Error: Invalid UTF-16 hexadecimal value." << std::endl;
  56.         return "";
  57.     }
  58. }
  59.  
  60. std::string convertUTF8HexToUTF8Bytes(const std::string& utf8Hex) {
  61.     std::stringstream utf8ByteStream;
  62.  
  63.     // Iterate over pairs of characters representing hexadecimal values
  64.     for (size_t i = 0; i < utf8Hex.length(); i += 2) {
  65.         // Get the next two characters
  66.         std::string byteHex = utf8Hex.substr(i, 2);
  67.  
  68.         // Convert hexadecimal string to integer
  69.         int byteValue;
  70.         std::stringstream(byteHex) >> std::hex >> byteValue;
  71.  
  72.         // Append the byte as a character to the string
  73.         utf8ByteStream << static_cast<char>(byteValue);
  74.     }
  75.  
  76.     return utf8ByteStream.str();
  77. }
  78.  
  79. int main() {
  80.     std::string input = "\\uD83E\\uDDA0";
  81.     std::vector<std::string> utf16HexValues = extractUTF16HexValues(input);
  82.    
  83.     //std::vector<std::string> utf16HexValues = {"D83E", "DDA0"};
  84.     std::string utf8Hex = convertUTF16HexToUTF8Hex(utf16HexValues);
  85.     if (!utf8Hex.empty()) {
  86.         std::cout << "UTF-8 Hex: " << utf8Hex << std::endl; // Output: F09FA6A0
  87.  
  88.         std::string utf8Bytes = convertUTF8HexToUTF8Bytes(utf8Hex);
  89.         std::cout << "UTF-8 Bytes: " << utf8Bytes << std::endl; // Output: ?
  90.     }
  91.     return 0;
  92. }
  93.