提问人:Unforgiven 提问时间:10/5/2023 最后编辑:StayOnTargetUnforgiven 更新时间:10/6/2023 访问量:123
如何在VB6中将字符串转换或解码为可读格式?
How to convert or decoded string to a readable format in VB6?
问:
下面的字符串是对 json 文件请求的结果。
StrResult ="\u00D8\u00B3\u00D9\u0084\u00D8\u00A7\u00D9\u0085
\u00D8\u00AF\u00D9\u0086\u00DB\u008C\u00D8\u00A7"
如何将此字符串转换为可读字符?
提示:解码后应该接收的字符串是,在英语中等效于“Hello World”。سلام دنیا
其他语言有很多示例代码,包括 Python、.Net 等,但我找不到 VB6 的任何代码。
答:
您提供的字符串不会解码为 ,而是解码为 。你可以在这里确认。سلام دنیا
سلام دنیا
字符串实际包含的是单个 UTF-8 字节,而不是 Unicode 代码点。这使您的任务更加困难,因为 VB6 字符串通常在内存中采用 UTF-16 编码。
我最近开发了一个库,其中包含用于 VBA 的大量字符串功能,但我认为有关转义和取消转义 Unicode 文字的部分应该可以作为 VB6 代码使用。你可以在这里找到GitHub上的整个库,但我可以在这里直接包含应该解决你的问题的部分。
使用我在下面提供的库中的函数,您应该能够实现您想要的结果,如下所示:
StrResult = DecodeUTF8(EncodeANSI(UnescapeUnicode(StrResult)))
这些是必需的功能:
Public Enum UnicodeEscapeFormat
[_efNone] = 0
efPython = 1 '\uXXXX \u00XXXXXX (4 or 8 hex digits, 8 for chars outside BMP)
efRust = 2 '\u{X} \U{XXXXXX} (1 to 6 hex digits)
efUPlus = 4 'u+XXXX u+XXXXXX (4 or 6 hex digits)
efMarkup = 8 '&#ddddddd; (1 to 7 decimal digits)
efAll = 15
[_efMin] = efPython
[_efMax] = efAll
End Enum
Private Type EscapeSequence
ueFormat As UnicodeEscapeFormat
ueSignature As String
letSngSurrogate As Boolean
buffPosition As Long
currPosition As Long
sigSize As Long
escSize As Long
codepoint As Long
unEscSize As Long
End Type
Private Type TwoCharTemplate
s As String * 2
End Type
Private Type LongTemplate
l As Long
End Type
'Replaces all occurences of unicode characters outside the codePoint range
'defined by maxNonEscapedCharCode with literals of the following formats
'specified by `escapeFormat`:
' efPython = 1 ... \uXXXX \u00XXXXXX (4 or 8 hex digits, 8 for chars outside BMP)
' efRust = 2 ... \u{XXXX} \U{XXXXXX} (1 to 6 hex digits)
' efUPlus = 4 ... u+XXXX u+XXXXXX (4 or 6 hex digits)
' efMarkup = 8 ... &#ddddddd; (1 to 7 decimal digits)
'Where:
' - prefixes \u is case insensitive
' - Xes are the digits of the codepoint in hexadecimal. (X = 0-9 or A-F/a-f)
'Note:
' - Avoid u+XXXX syntax if string contains literals without delimiters as it
' can be misinterpreted if adjacent to text starting with 0-9 or a-f.
' - This function accepts all combinations of UnicodeEscapeFormats:
' If called with, e.g. `escapeFormat = efRust Or efPython`, every character
' in the scope will be escaped with in either format, efRust or efPython,
' chosen at random for each replacement.
' - If `escapeFormat` is set to efAll, it will replace every character in the
' scope with a randomly chosen format of all available fotrmats.
' - To escape every character, set `maxNonEscapedCharCode = -1`
Public Function EscapeUnicode(ByRef str As String, _
Optional ByVal maxNonEscapedCharCode As Long = &HFF, _
Optional ByVal escapeFormat As UnicodeEscapeFormat _
= efPython) As String
Const methodName As String = "EscapeUnicode"
If maxNonEscapedCharCode < -1 Then Err.Raise 5, methodName, _
"`maxNonEscapedCharCode` must be greater or equal -1."
If escapeFormat < [_efMin] Or escapeFormat > [_efMax] Then _
Err.Raise 5, methodName, "Invalid escape type."
If Len(str) = 0 Then Exit Function
Dim i As Long
Dim j As Long: j = 1
Dim result() As String: ReDim result(1 To Len(str))
Dim copyChunkSize As Long
Dim rndEscapeFormat As Boolean
rndEscapeFormat = ((escapeFormat And (escapeFormat - 1)) <> 0) 'eFmt <> 2^n
Dim numescapeFormats As Long
If rndEscapeFormat Then
Dim escapeFormats() As Long
For i = 0 To (Log(efAll + 1) / Log(2)) - 1
If 2 ^ i And escapeFormat Then
ReDim Preserve escapeFormats(0 To numescapeFormats)
escapeFormats(numescapeFormats) = 2 ^ i
numescapeFormats = numescapeFormats + 1
End If
Next i
End If
For i = 1 To Len(str)
Dim codepoint As Long: codepoint = AscU(Mid$(str, i, 2))
If codepoint > maxNonEscapedCharCode Then
If copyChunkSize > 0 Then
result(j) = Mid$(str, i - copyChunkSize, copyChunkSize)
copyChunkSize = 0
j = j + 1
End If
If rndEscapeFormat Then
escapeFormat = escapeFormats(Int(numescapeFormats * Rnd))
End If
Select Case escapeFormat
Case efPython
If codepoint > &HFFFF& Then 'Outside BMP
result(j) = "\u" & "00" & Right$("0" & Hex(codepoint), 6)
Else 'BMP
result(j) = "\u" & Right$("000" & Hex(codepoint), 4)
End If
Case efRust
result(j) = "\u{" & Hex(codepoint) & "}"
Case efUPlus
If codepoint < &H1000& Then
result(j) = "u+" & Right$("000" & Hex(codepoint), 4)
Else
result(j) = "u+" & Hex(codepoint)
End If
Case efMarkup
result(j) = "&#" & codepoint & ";"
End Select
If rndEscapeFormat Then
If Int(2 * Rnd) = 1 Then result(j) = UCase(result(j))
End If
j = j + 1
Else
If codepoint < &H10000 Then
copyChunkSize = copyChunkSize + 1
Else
copyChunkSize = copyChunkSize + 2
End If
End If
If codepoint > &HFFFF& Then i = i + 1
Next i
If copyChunkSize > 0 Then _
result(j) = Mid$(str, i - copyChunkSize, copyChunkSize)
EscapeUnicode = Join(result, "")
End Function
'Replaces all occurences of unicode literals
'Accepts the following formattings `escapeFormat`:
' efPython = 1 ... \uXXXX \u000XXXXX (4 or 8 hex digits, 8 for chars outside BMP)
' efRust = 2 ... \u{XXXX} \U{XXXXXXX} (1 to 6 hex digits)
' efUPlus = 4 ... u+XXXX u+XXXXXX (4 or 6 hex digits)
' efMarkup = 8 ... &#ddddddd; (1 to 7 decimal digits)
'Where:
' - prefixes \u is case insensitive
' - Xes are the digits of the codepoint in hexadecimal. (X = 0-9 or A-F/a-f)
'Example:
' - "abcd au+0062\U0063xy\u{64}", efAll returns "abcd abcxyd"
'Notes:
' - Avoid u+XXXX syntax if string contains literals without delimiters as it
' can be misinterpreted if adjacent to text starting with 0-9 or a-f.
' - This function also accepts all combinations of UnicodeEscapeFormats:
' E.g.:
'UnescapeUnicode("abcd au+0062\U0063xy\u{64}", efMarkup Or efRust)
' will return:
'"abcd au+0062\U0063xyd"
' - By default, this function will not invalidate UTF-16 strings if they are
' currently valid, but this can happen if `allowSingleSurrogates = True`
' E.g.: EscapeUnicode(ChrU(&HD801&, True)) returns "\uD801", but this string
' can no longer be un-escaped with UnescapeUnicode because "\uD801"
' represents a surrogate halve which is invalid unicode on its own.
' So UnescapeUnicode("\uD801") returns "\uD801" again, unless called with
' the optional parameter `allowSingleSurrogates = False` like this
' `UnescapeUnicode("\uD801", , True)`. This will return invalid UTF-16.
Public Function UnescapeUnicode(ByRef str As String, _
Optional ByVal escapeFormat As UnicodeEscapeFormat = efAll, _
Optional ByVal allowSingleSurrogates As Boolean = False) _
As String
If escapeFormat < [_efMin] Or escapeFormat > [_efMax] Then
Err.Raise 5, "EscapeUnicode", "Invalid escape format"
End If
Dim escapes() As EscapeSequence: escapes = NewEscapes()
Dim lb As Long: lb = LBound(escapes)
Dim ub As Long: ub = UBound(escapes)
Dim i As Long
For i = lb To ub 'Find first signature for each wanted format
With escapes(i)
If escapeFormat And .ueFormat Then
.buffPosition = InStr(1, str, .ueSignature, vbBinaryCompare)
.letSngSurrogate = allowSingleSurrogates
End If
End With
Next i
UnescapeUnicode = str 'Allocate buffer
Const posByte As Byte = &H80
Const buffSize As Long = 1024
Dim buffSignaturePos(1 To buffSize) As Byte
Dim buffFormat(1 To buffSize) As UnicodeEscapeFormat
Dim buffEscIndex(1 To buffSize) As Long
Dim posOffset As Long
Dim diff As Long
Dim highSur As Long
Dim lowSur As Long
Dim remainingLen As Long: remainingLen = Len(str)
Dim posChar As String: posChar = ChrB$(posByte)
Dim outPos As Long: outPos = 1
Dim inPos As Long: inPos = 1
Do
Dim upperLimit As Long: upperLimit = posOffset + buffSize
For i = lb To ub 'Find all signatures within buffer size
With escapes(i)
Do Until .buffPosition = 0 Or .buffPosition > upperLimit
.buffPosition = .buffPosition - posOffset
buffSignaturePos(.buffPosition) = posByte
buffFormat(.buffPosition) = .ueFormat
buffEscIndex(.buffPosition) = i
.buffPosition = .buffPosition + .sigSize + posOffset
.buffPosition = InStr(.buffPosition, str, .ueSignature)
Loop
End With
Next i
Dim temp As String: temp = buffSignaturePos
Dim nextPos As Long: nextPos = InStrB(1, temp, posChar)
Do Until nextPos = 0 'Unescape all found signatures from buffer
i = buffEscIndex(nextPos)
escapes(i).currPosition = nextPos + posOffset
Select Case buffFormat(nextPos)
Case efPython: TryPythonEscape escapes(i), str
Case efRust: TryRustEscape escapes(i), str
Case efUPlus: TryUPlusEscape escapes(i), str
Case efMarkup: TryMarkupEscape escapes(i), str
End Select
With escapes(i)
If .unEscSize > 0 Then
diff = .currPosition - inPos
If outPos > 1 Then
Mid$(UnescapeUnicode, outPos) = Mid$(str, inPos, diff)
End If
outPos = outPos + diff
If .unEscSize = 1 Then
Mid$(UnescapeUnicode, outPos) = ChrW$(.codepoint)
Else
.codepoint = .codepoint - &H10000
highSur = &HD800& Or (.codepoint \ &H400&)
lowSur = &HDC00& Or (.codepoint And &H3FF&)
Mid$(UnescapeUnicode, outPos) = ChrW$(highSur)
Mid$(UnescapeUnicode, outPos + 1) = ChrW$(lowSur)
End If
outPos = outPos + .unEscSize
inPos = .currPosition + .escSize
nextPos = nextPos + .escSize - .sigSize
End If
nextPos = InStrB(nextPos + .sigSize, temp, posChar)
End With
Loop
remainingLen = remainingLen - buffSize
posOffset = posOffset + buffSize
Erase buffSignaturePos
Loop Until remainingLen < 1
If outPos > 1 Then
diff = Len(str) - inPos + 1
If diff > 0 Then
Mid$(UnescapeUnicode, outPos, diff) = Mid$(str, inPos, diff)
End If
UnescapeUnicode = Left$(UnescapeUnicode, outPos + diff - 1)
End If
End Function
Private Function NewEscapes() As EscapeSequence()
Static escapes(0 To 6) As EscapeSequence
If escapes(0).ueFormat = [_efNone] Then
InitEscape escapes(0), efPython, "\U"
InitEscape escapes(1), efPython, "\u"
InitEscape escapes(2), efRust, "\U{"
InitEscape escapes(3), efRust, "\u{"
InitEscape escapes(4), efUPlus, "U+"
InitEscape escapes(5), efUPlus, "u+"
InitEscape escapes(6), efMarkup, "&#"
End If
NewEscapes = escapes
End Function
Private Sub InitEscape(ByRef escape As EscapeSequence, _
ByVal ueFormat As UnicodeEscapeFormat, _
ByRef ueSignature As String)
With escape
.ueFormat = ueFormat
.ueSignature = ueSignature
.sigSize = Len(ueSignature)
End With
End Sub
Private Sub TryPythonEscape(ByRef escape As EscapeSequence, ByRef str As String)
Const H As String = "[0-9A-Fa-f]"
Const PYTHON_ESCAPE_PATTERN_NOT_BMP = "00[01]" & H & H & H & H & H
Const PYTHON_ESCAPE_PATTERN_BMP As String = H & H & H & H & "*"
Dim potentialEscape As String
With escape
.unEscSize = 0
potentialEscape = Mid$(str, .currPosition + 2, 8) 'Exclude leading \[Uu]
If potentialEscape Like PYTHON_ESCAPE_PATTERN_NOT_BMP Then
.escSize = 10 '\[Uu]00[01]HHHHH
.codepoint = CLng("&H" & potentialEscape) 'No extra Mid$ needed
If .codepoint < &H10000 Then
If IsValidBMP(.codepoint, .letSngSurrogate) Then
.unEscSize = 1
Exit Sub
End If
ElseIf .codepoint < &H110000 Then
.unEscSize = 2
Exit Sub
End If
End If
If potentialEscape Like PYTHON_ESCAPE_PATTERN_BMP Then
.escSize = 6 '\[Uu]HHHH
.codepoint = CLng("&H" & Left$(potentialEscape, 4))
If IsValidBMP(.codepoint, .letSngSurrogate) Then .unEscSize = 1
End If
End With
End Sub
Private Function IsValidBMP(ByVal codepoint As Long, _
ByVal letSingleSurrogate As Boolean) As Boolean
IsValidBMP = (codepoint < &HD800& Or codepoint >= &HE000& Or letSingleSurrogate)
End Function
Private Sub TryRustEscape(ByRef escape As EscapeSequence, ByRef str As String)
Static rustEscPattern(1 To 6) As String
Static isPatternInit As Boolean
Dim potentialEscape As String
Dim nextBrace As Long
If Not isPatternInit Then
Dim i As Long
rustEscPattern(1) = "[0-9A-Fa-f]}*"
For i = 2 To 6
rustEscPattern(i) = "[0-9A-Fa-f]" & rustEscPattern(i - 1)
Next i
isPatternInit = True
End If
With escape
.unEscSize = 0
potentialEscape = Mid$(str, .currPosition + 3, 7) 'Exclude leading \[Uu]{
nextBrace = InStr(2, potentialEscape, "}", vbBinaryCompare)
If nextBrace = 0 Then Exit Sub
If Not potentialEscape Like rustEscPattern(nextBrace - 1) Then Exit Sub
.codepoint = CLng("&H" & Left$(potentialEscape, nextBrace - 1))
.escSize = nextBrace + 3
If .codepoint < &H10000 Then
If IsValidBMP(.codepoint, .letSngSurrogate) Then .unEscSize = 1
ElseIf .codepoint < &H110000 Then
.unEscSize = 2
End If
End With
End Sub
Private Sub TryUPlusEscape(ByRef escape As EscapeSequence, _
ByRef str As String)
Const H As String = "[0-9A-Fa-f]"
Const UPLUS_ESCAPE_PATTERN_4_DIGITS = H & H & H & H & "*"
Const UPLUS_ESCAPE_PATTERN_5_DIGITS = H & H & H & H & H & "*"
Const UPLUS_ESCAPE_PATTERN_6_DIGITS = H & H & H & H & H & H
Dim potentialEscape As String
With escape
.unEscSize = 0
potentialEscape = Mid$(str, .currPosition + 2, 6) 'Exclude leading [Uu]+
If potentialEscape Like UPLUS_ESCAPE_PATTERN_6_DIGITS Then
.escSize = 8
.codepoint = CLng("&H" & potentialEscape)
If .codepoint < &H10000 Then
If IsValidBMP(.codepoint, .letSngSurrogate) Then
.unEscSize = 1
Exit Sub
End If
ElseIf .codepoint < &H110000 Then
.unEscSize = 2
Exit Sub
End If
End If
If potentialEscape Like UPLUS_ESCAPE_PATTERN_5_DIGITS Then
.escSize = 7
.codepoint = CLng("&H" & Left$(potentialEscape, 5))
If .codepoint < &H10000 Then
If IsValidBMP(.codepoint, .letSngSurrogate) Then
.unEscSize = 1
Exit Sub
End If
Else
.unEscSize = 2
Exit Sub
End If
End If
If potentialEscape Like UPLUS_ESCAPE_PATTERN_4_DIGITS Then
.escSize = 6
.codepoint = CLng("&H" & Left$(potentialEscape, 4))
If IsValidBMP(.codepoint, .letSngSurrogate) Then .unEscSize = 1
End If
End With
End Sub
Private Sub TryMarkupEscape(ByRef escape As EscapeSequence, _
ByRef str As String)
Static mEscPattern(1 To 7) As String
Static isPatternInit As Boolean
Dim potentialEscape As String
Dim nextSemicolon As Long
If Not isPatternInit Then
Dim i As Long
For i = 1 To 6
mEscPattern(i) = String$(i, "#") & ";*"
Next i
mEscPattern(7) = "1######;"
isPatternInit = True
End If
With escape
.unEscSize = 0
potentialEscape = Mid$(str, .currPosition + 2, 8) 'Exclude leading &[#]
nextSemicolon = InStr(2, potentialEscape, ";", vbBinaryCompare)
If nextSemicolon = 0 Then Exit Sub
If Not potentialEscape Like mEscPattern(nextSemicolon - 1) Then Exit Sub
.codepoint = CLng(Left$(potentialEscape, nextSemicolon - 1))
.escSize = nextSemicolon + 2
If .codepoint < &H10000 Then
If IsValidBMP(.codepoint, .letSngSurrogate) Then .unEscSize = 1
ElseIf .codepoint < &H110000 Then
.unEscSize = 2
End If
End With
End Sub
'Returns the given unicode codepoint as standard VBA UTF-16LE string
Public Function ChrU(ByVal codepoint As Long, _
Optional ByVal allowSingleSurrogates As Boolean = False) As String
Const methodName As String = "ChrU"
Static st As TwoCharTemplate
Static lt As LongTemplate
If codepoint < &H8000 Then Err.Raise 5, methodName, "Codepoint < -32768"
If codepoint < 0 Then codepoint = codepoint And &HFFFF& 'Incase of uInt input
If codepoint < &HD800& Then
ChrU = ChrW$(codepoint)
ElseIf codepoint < &HE000& And Not allowSingleSurrogates Then
Err.Raise 5, methodName, "Range reserved for surrogate pairs"
ElseIf codepoint < &H10000 Then
ChrU = ChrW$(codepoint)
ElseIf codepoint < &H110000 Then
lt.l = (&HD800& Or (codepoint \ &H400& - &H40&)) _
Or (&HDC00 Or (codepoint And &H3FF&)) * &H10000 '&HDC00 with no &
LSet st = lt
ChrU = st.s
Else
Err.Raise 5, methodName, "Codepoint outside of valid Unicode range."
End If
End Function
'Returns a given characters unicode codepoint as long.
'Note: One unicode character can consist of two VBA "characters", a so-called
' "surrogate pair" (input string of length 2, so Len(char) = 2!)
Public Function AscU(ByRef char As String) As Long
AscU = AscW(char) And &HFFFF&
If Len(char) > 1 Then
Dim lo As Long: lo = AscW(Mid$(char, 2, 1)) And &HFFFF&
If &HDC00& > lo Or lo > &HDFFF& Then Exit Function
AscU = (AscU - &HD800&) * &H400& + (lo - &HDC00&) + &H10000
End If
End Function
'Function transcoding a VBA-native UTF-16LE encoded string to an ANSI string
'Note: Information will be lost for codepoints > 255!
Public Function EncodeANSI(ByRef utf16leStr As String) As String
Dim i As Long
Dim j As Long: j = 0
Dim utf16le() As Byte: utf16le = utf16leStr
Dim ansi() As Byte
ReDim ansi(1 To Len(utf16leStr))
For i = LBound(ansi) To UBound(ansi)
If utf16le(j + 1) = 0 Then
ansi(i) = utf16le(j)
j = j + 2
Else
ansi(i) = &H3F 'Chr(&H3F) = "?"
j = j + 2
End If
Next i
EncodeANSI = ansi
End Function
'Function transcoding an UTF-8 encoded string to the VBA-native UTF-16LE
'Function transcoding an VBA-native UTF-16LE encoded string to UTF-8
Public Function DecodeUTF8(ByRef utf8Str As String, _
Optional ByVal raiseErrors As Boolean = False) As String
Const methodName As String = "DecodeUTF8native"
Dim i As Long
Dim numBytesOfCodePoint As Byte
Static numBytesOfCodePoints(0 To 255) As Byte
Static mask(2 To 4) As Long
Static minCp(2 To 4) As Long
If numBytesOfCodePoints(0) = 0 Then
For i = &H0& To &H7F&: numBytesOfCodePoints(i) = 1: Next i '0xxxxxxx
'110xxxxx - C0 and C1 are invalid (overlong encoding)
For i = &HC2& To &HDF&: numBytesOfCodePoints(i) = 2: Next i
For i = &HE0& To &HEF&: numBytesOfCodePoints(i) = 3: Next i '1110xxxx
'11110xxx - 11110100, 11110101+ (= &HF5+) outside of valid Unicode range
For i = &HF0& To &HF4&: numBytesOfCodePoints(i) = 4: Next i
For i = 2 To 4: mask(i) = (2 ^ (7 - i) - 1): Next i
minCp(2) = &H80&: minCp(3) = &H800&: minCp(4) = &H10000
End If
Dim codepoint As Long
Dim currByte As Byte
Dim utf8() As Byte: utf8 = utf8Str
Dim utf16() As Byte: ReDim utf16(0 To (UBound(utf8) - LBound(utf8) + 1) * 2)
Dim j As Long: j = 0
Dim k As Long
i = LBound(utf8)
Do While i <= UBound(utf8)
codepoint = utf8(i)
numBytesOfCodePoint = numBytesOfCodePoints(codepoint)
If numBytesOfCodePoint = 0 Then
If raiseErrors Then Err.Raise 5, methodName, "Invalid byte"
GoTo insertErrChar
ElseIf numBytesOfCodePoint = 1 Then
utf16(j) = codepoint
j = j + 2
ElseIf i + numBytesOfCodePoint - 1 > UBound(utf8) Then
If raiseErrors Then Err.Raise 5, methodName, _
"Incomplete UTF-8 codepoint at end of string."
GoTo insertErrChar
Else
codepoint = utf8(i) And mask(numBytesOfCodePoint)
For k = 1 To numBytesOfCodePoint - 1
currByte = utf8(i + k)
If (currByte And &HC0&) = &H80& Then
codepoint = (codepoint * &H40&) + (currByte And &H3F)
Else
If raiseErrors Then _
Err.Raise 5, methodName, "Invalid continuation byte"
GoTo insertErrChar
End If
Next k
'Convert the Unicode codepoint to UTF-16LE bytes
If codepoint < minCp(numBytesOfCodePoint) Then
If raiseErrors Then Err.Raise 5, methodName, "Overlong encoding"
GoTo insertErrChar
ElseIf codepoint < &HD800& Then
utf16(j) = CByte(codepoint And &HFF&)
utf16(j + 1) = CByte(codepoint \ &H100&)
j = j + 2
ElseIf codepoint < &HE000& Then
If raiseErrors Then Err.Raise 5, methodName, _
"Invalid Unicode codepoint.(Range reserved for surrogate pairs)"
GoTo insertErrChar
ElseIf codepoint < &H10000 Then
If codepoint = &HFEFF& Then GoTo nextCp '(BOM - will be ignored)
utf16(j) = codepoint And &HFF&
utf16(j + 1) = codepoint \ &H100&
j = j + 2
ElseIf codepoint < &H110000 Then 'Calculate surrogate pair
Dim m As Long: m = codepoint - &H10000
Dim loSurrogate As Long: loSurrogate = &HDC00& Or (m And &H3FF)
Dim hiSurrogate As Long: hiSurrogate = &HD800& Or (m \ &H400&)
utf16(j) = hiSurrogate And &HFF&
utf16(j + 1) = hiSurrogate \ &H100&
utf16(j + 2) = loSurrogate And &HFF&
utf16(j + 3) = loSurrogate \ &H100&
j = j + 4
Else
If raiseErrors Then Err.Raise 5, methodName, _
"Codepoint outside of valid Unicode range"
insertErrChar: utf16(j) = &HFD
utf16(j + 1) = &HFF
j = j + 2
If numBytesOfCodePoint = 0 Then numBytesOfCodePoint = 1
End If
End If
nextCp: i = i + numBytesOfCodePoint 'Move to the next UTF-8 codepoint
Loop
DecodeUTF8 = MidB$(utf16, 1, j)
End Function
注意:该函数可以像这样被“滥用”,因为转义字符串中的 UTF-8 字节将始终被解码为单字节 UTF-16 字符,因为根据定义,它们是单字节。这意味着该函数仅用于从字符串中删除每秒一个字节(由于 UTF-16 表示单字节字符的方式,这些字节都是 null。生成的字符串是所需字符串的 UTF-8 表示形式,然后我们对其进行“解码”(转换为 UTF-16),因为这是 vb6 表示 Unicode 字符串的原生方式。EncodeANSI
EncodeANSI
我也包含了这个函数,所以你可以看到你的字符串实际上应该是什么样子的转义的 unicode 代码点:EscapeUnicode
actualEscapeSequence = EscapeUnicode(DecodeUTF8(EncodeANSI(UnescapeUnicode(StrResult))))
actualEscapeSequence
将等于“\u0633\u0644\u0627\u0645 \u062F\u0646\u06CC\u0627”,您可以在此处确认它是正确的 unicode 转义序列。
评论