diff --git a/Software/Development/Language/Python/Python_str_与_bytes_之间的转换.md b/Software/Development/Language/Python/Python_str_与_bytes_之间的转换.md index 093d1d9..0ffa9c2 100644 --- a/Software/Development/Language/Python/Python_str_与_bytes_之间的转换.md +++ b/Software/Development/Language/Python/Python_str_与_bytes_之间的转换.md @@ -1,22 +1,42 @@ -# Python str 与 bytes 之间的转换 +# Python string 与 bytes 之间的转换 + +总的来说,bytes 和 string 的关系是: + +bytes ---decode--> string +bytes <--encode--- string + +常见的几种编码及格式: + +* utf8:形如\xe4\xbb\x8a\xe5\xa4 +* unicode:形如\u4eca\u5929\u5929\u6c14\u4e0d\u9519 + +如果 "\" 变成了 "\\" 说明原字符串是编码后的格式,变成 "\\" 是因为转换成了bytes。 + +## 1.string 转 bytes ```python -# bytes object -b = b"example" - -# str object -s = "example" - -# str to bytes -sb = bytes(s, encoding = "utf8") - -# bytes to str -bs = str(b, encoding = "utf8") - -# an alternative method -# str to bytes -sb2 = str.encode(s) - -# bytes to str -bs2 = bytes.decode(b) +s = "abc" # string +s = "abc".encode() # bytes,encode 默认编码方式是 utf-8 +s = b"abc" # bytes +# 或 +s = "abc" # string +s = bytes(s, encoding = "utf8") # bytes +``` + +## 2.bytes 转 string + +```python +s = b"abc" # bytes +s = b"abc".decode() # string,encode 默认编码方式是 utf-8 +s = str(b"") # string +# 或 +s = b"abc" # bytes +s = str(s, encoding = "utf8") # string +``` + +## 3.bytes 类型的 unicode(中文)输出 + +```python +s = '\\u4eca\\u5929\\u5929\\u6c14\\u4e0d\\u9519' # 中文是:今天天气真不错 +new_s = s.encode().decode('unicode_escape') # 输出为:今天天气真不错 ```