Merge pull request #721 from ttys3/fixup-isBinary
fixup func isBinary to handle CJK runes correctly
Former-commit-id: 08c8613759eed7d4790c9fea99273f60e88531a0 [formerly 82af4df6a155423555865a4d16a62c74befcefd1] [formerly 20751c4cd90807c51145c5284573509f26e5c934 [formerly 22bbad84fb
]]
Former-commit-id: 4561311363fd52b51838e71408ff270c2f376c97 [formerly e6452a14dd39fa07dc8d77a2b47b7a03b58c0029]
Former-commit-id: aca07de59fcdb2f079b839572cb431c792719158
This commit is contained in:
commit
1f8ec36eef
|
@ -164,7 +164,7 @@ func (i *FileInfo) detectType(modify, saveContent bool) error {
|
|||
case strings.HasPrefix(mimetype, "image"):
|
||||
i.Type = "image"
|
||||
return nil
|
||||
case isBinary(string(buffer[:n])) || i.Size > 10*1024*1024: // 10 MB
|
||||
case isBinary(buffer[:n], n) || i.Size > 10*1024*1024: // 10 MB
|
||||
i.Type = "blob"
|
||||
return nil
|
||||
default:
|
||||
|
|
|
@ -1,12 +1,46 @@
|
|||
package files
|
||||
|
||||
func isBinary(content string) bool {
|
||||
for _, b := range content {
|
||||
// 65533 is the unknown char
|
||||
import (
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func isBinary(content []byte, n int) bool {
|
||||
maybeStr := string(content)
|
||||
runeCnt := utf8.RuneCount(content)
|
||||
runeIndex := 0
|
||||
gotRuneErrCnt := 0
|
||||
firstRuneErrIndex := -1
|
||||
|
||||
for _, b := range maybeStr {
|
||||
// 8 and below are control chars (e.g. backspace, null, eof, etc)
|
||||
if b <= 8 || b == 65533 {
|
||||
if b <= 8 {
|
||||
return true
|
||||
}
|
||||
|
||||
// 0xFFFD(65533) is the "error" Rune or "Unicode replacement character"
|
||||
// see https://golang.org/pkg/unicode/utf8/#pkg-constants
|
||||
if b == 0xFFFD {
|
||||
//if it is not the last (utf8.UTFMax - x) rune
|
||||
if runeCnt > utf8.UTFMax && runeIndex < runeCnt-utf8.UTFMax {
|
||||
return true
|
||||
} else {
|
||||
//else it is the last (utf8.UTFMax - x) rune
|
||||
//there maybe Vxxx, VVxx, VVVx, thus, we may got max 3 0xFFFD rune (asume V is the byte we got)
|
||||
//for Chinese, it can only be Vxx, VVx, we may got max 2 0xFFFD rune
|
||||
gotRuneErrCnt++
|
||||
|
||||
//mark the first time
|
||||
if firstRuneErrIndex == -1 {
|
||||
firstRuneErrIndex = runeIndex
|
||||
}
|
||||
}
|
||||
}
|
||||
runeIndex++
|
||||
}
|
||||
|
||||
//if last (utf8.UTFMax - x ) rune has the "error" Rune, but not all
|
||||
if firstRuneErrIndex != -1 && gotRuneErrCnt != runeCnt-firstRuneErrIndex {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue