1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
// Copyright 2017-2025 WIT.COM Inc. All rights reserved.
// Use of this source code is governed by the GPL 3.0
package bugpb
import (
"bytes"
"fmt"
"io"
"reflect"
"unicode/utf8"
"golang.org/x/text/encoding/charmap"
"google.golang.org/protobuf/proto"
)
// ValidateProtoUTF8 checks all string fields in a proto.Message recursively.
func ValidateProtoUTF8(msg proto.Message) error {
return validateValue(reflect.ValueOf(msg), "")
}
func validateValue(val reflect.Value, path string) error {
if !val.IsValid() {
return nil
}
if val.Kind() == reflect.Ptr {
if val.IsNil() {
return nil
}
return validateValue(val.Elem(), path)
}
switch val.Kind() {
case reflect.Struct:
for i := 0; i < val.NumField(); i++ {
field := val.Field(i)
fieldType := val.Type().Field(i)
fieldPath := fmt.Sprintf("%s.%s", path, fieldType.Name)
if err := validateValue(field, fieldPath); err != nil {
return err
}
}
case reflect.String:
s := val.String()
if !utf8.ValidString(s) {
return fmt.Errorf("invalid UTF-8 string at %s: %q", path, s)
}
case reflect.Slice:
if val.Type().Elem().Kind() == reflect.Uint8 {
return nil // skip []byte
}
for i := 0; i < val.Len(); i++ {
if err := validateValue(val.Index(i), fmt.Sprintf("%s[%d]", path, i)); err != nil {
return err
}
}
case reflect.Map:
for _, key := range val.MapKeys() {
valItem := val.MapIndex(key)
if err := validateValue(valItem, fmt.Sprintf("%s[%v]", path, key)); err != nil {
return err
}
}
}
return nil
}
// SanitizeProtoUTF8 fixes all invalid UTF-8 strings in a proto.Message recursively.
func SanitizeProtoUTF8(msg proto.Message) error {
return sanitizeValue(reflect.ValueOf(msg), "")
}
func sanitizeValue(val reflect.Value, path string) error {
if !val.IsValid() {
return nil
}
if val.Kind() == reflect.Ptr {
if val.IsNil() {
return nil
}
return sanitizeValue(val.Elem(), path)
}
switch val.Kind() {
case reflect.Struct:
for i := 0; i < val.NumField(); i++ {
field := val.Field(i)
fieldType := val.Type().Field(i)
if !field.CanSet() {
continue
}
if err := sanitizeValue(field, fmt.Sprintf("%s.%s", path, fieldType.Name)); err != nil {
return err
}
}
case reflect.String:
s := val.String()
if !utf8.ValidString(s) {
utf8Str, err := latin1ToUTF8(s)
if err != nil {
return fmt.Errorf("failed to convert %s to UTF-8: %v", path, err)
}
val.SetString(utf8Str)
}
case reflect.Slice:
if val.Type().Elem().Kind() == reflect.Uint8 {
return nil // skip []byte
}
for i := 0; i < val.Len(); i++ {
if err := sanitizeValue(val.Index(i), fmt.Sprintf("%s[%d]", path, i)); err != nil {
return err
}
}
case reflect.Map:
for _, key := range val.MapKeys() {
valItem := val.MapIndex(key)
newItem := reflect.New(valItem.Type()).Elem()
newItem.Set(valItem)
if err := sanitizeValue(newItem, fmt.Sprintf("%s[%v]", path, key)); err != nil {
return err
}
val.SetMapIndex(key, newItem)
}
}
return nil
}
func latin1ToUTF8(input string) (string, error) {
reader := charmap.ISO8859_1.NewDecoder().Reader(bytes.NewReader([]byte(input)))
result, err := io.ReadAll(reader)
if err != nil {
return "", err
}
return string(result), nil
}
|