>>> out = open('/tmp/spam', 'w')
>>> inp = open('cow_names', 'r')
>>> inp = open('cow_names')
>>> inp = open('logfile', 'a')
>>> inp = open('dingdong', 'r+')
>>> inp.readline()
'I am name number 1\n'
>>> lines = inp.readlines()
['I am name number 1\n', 'I am name number 1, stupid!\n',→
'I am name number 2, or 3!\n']
>>> inp.read()
'I am name number 1\nI am name number 1, stupid!\n→
I am name number 2, or 3!\n'
>>> inp.read(4)
'I am'
>>> sys.stdin.read(4)
'I am'
>>> inp.close()
>>> for line in open('cow_names'):
print 'I want a %d-tone %s!' %
(random.randint(1, 4), line.strip())
I want a 4-tone Gregorius!
I want a 1-tone Calvin!
I want a 3-tone Shlyushltushlyu!
I want a 3-tone Yabadaduuuu!
>>> print >>out, "Another One Bites the Dust"
>>> out.write("Another One Bites the Dust")
>>> out.writelines(["Gargantua\n", "42 is my lucky birth date!\n"])
>>> print >>sys.stdout, "Bingo was here!"
>>> out.flush()
>>> out.close()
Unicode provides a unique number for every character,
no matter what the platform,
no matter what the program,
no matter what the language.
От | До | Двоична поредица по байтове |
---|---|---|
000000 | 00007F | 0xxxxxxx |
000080 | 0007FF | 110xxxxx 10xxxxxx |
000800 | 00FFFF | 1110xxxx 10xxxxxx 10xxxxxx |
010000 | 10FFFF | 11110xxx 10xxxxxx 10xxxxxx |
str
с/у unicode
>>> s = open('cow_names').read()
>>> s # поредица от байтове
'\xd0\xbf\xd0\xb8\xd0\xbb\xd0\xb5'
>>> u = unicode(s)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xd0 in position 0: ordinal not in range(128)
>>> u = unicode(s, 'utf8') # същото като u = s.decode('utf8')
>>> len(s), len(u)
(8, 4)
>>> s[0], u[0]
('\xd0', u'\u043f')
>>> ord(s[0]), ord(u[0])
(208, 1087)
u = bytestring.decode(encoding)
u = unicode(bytestring, encoding)
\uxxxx
: u'\u03bb' → λλ
, само ако сте поставили коментар като # -*- coding: encoding -*-
>>> out = open('cow_names', 'w')
>>> print >>out, u'баба'
UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-3:→
ordinal not in range(128)
>>> print >>out, u'баба'.encode('utf8')
>>> print >>out, u'баба'.encode('cp1251')
>>> out = codecs.open('cow_names', 'w', encoding='utf8')
>>> out.write(u'баба')
>>> out.close()
>>> inp = codecs.open('cow_names', 'r', encoding='utf8')
>>> inp.readline()
u'\u0431\u0430\u0431\u0430'