stringdemo.py

import unicodedata as uu  # properties of Unicode characters
from string import Template
from string import capwords

# Ordinary string literals in Python. Escape sequences can be used
# to embed various special characters inside string literals.

s1 = "Hello there,\tworld!"
s2 = ""

# Python 3 strings are sequences of arbitrary Unicode characters.

for code in range(100, 10100, 500):
    cc = chr(code)
    s2 += cc
    name, cat = uu.name(cc), uu.category(cc)
    print(f"Character {cc} at code point {code} is {name}, in category {cat}.")

# Old time byte strings can contain only ASCII characters 0-127.

sb = b'Only ASCII characters allowed here!\x60'

print(f"String s1 is <{s1}>.")
print(f"String s2 is <{s2}>.")
print(f"String sb is {sb}.")

a = 42
b = 123.45678

# Python formatted string. There is a whole minilanguage available.

s3 = f"a is now {a} (which is {a:x} in hex), and b is now {b:.3f} \
which is {b:e} in scientific notation."
print(s3)   # note the correct rounding of floating point number b

# The string does not remember where its characters came from.

a = 99
print(s3)   # still says that a is 42

s4 = f"a is now {a}, b is now {b:7.3}."
print(s4)   # Here a is 99

# Format placeholders are evaluated once. They are compiled to Python
# bytecode before executing the program, so syntax errors are revealed
# before the script starts running. Uncomment to see this happen.

# print(f"a is now {a:!4&^#}")

a = 42
print(s4)  # a is still 99 inside the string

# And yet another way to compose strings from smaller pieces. Very
# few examples of people actually using this scheme have been found.
# The mystery deepens.

tt = Template("$who likes $food.")

# Python's flexibility to allow arbitrary keyword arguments in functions
# really displays its power and usefulness here.
print(tt.substitute(who="Bob",  food="sushi"))
print(tt.substitute(who="Jack", food="steak"))

# The old-timey string interpolation operator % behaves like in C.

s2 = "a is now %3d, b is now %.4f." % (a, b)
print(s2)

# Strings are iterable, and can be processed as iterables.

it = iter(s1)
print(next(it))  # H
print(next(it))  # e
for x in it:
    print(ord(x), end=' ')  # whole bunch of Unicode codepoints
print()  # line break

# Raw strings can be handy with regexes and other systems that need
# special characters differently from Python.

s5 = r'Bunch of % \ special characters as they are.'
print(s5)

# Unicode has lots of crazy stuff.
# https://mortoray.com/2013/11/27/the-string-type-is-broken/

s6 = u'noe\u0308l'
print(s6)
print("".join(reversed(s6)))  # reversing unicode is tricksy
print(s6[::-1])  # how to reverse a Python string

# Taken from "WTF Python". What is going on here?
value = 11
valuе = 32    # Linters in PyCharm and such reveal the subterfuge
print(value)  # 11
print(valuе)  # 32

# Some handy methods on strings.

print(s1.title())
print(s1.upper())
print(s1.lower())
print(s1.capitalize())

# Add whitespace to left, right or both sides to reach given length.

print('1234'.center(10))
print('1234'.ljust(10))
print('1234'.rjust(10))

# In principle all string operations could be done using methods
# that we already have, but it's pointless to reinvent the wheel
# unless you feel like exercising.

s = "let us capitalize every word of this sentence."
print(capwords(s))
# This one would not be too difficult to do on our own.
print(" ".join([x.capitalize() for x in s.split()]))