Foundation Summary

  • Print
  • Calculation Function
  • Variable
  • While loop
  • For loop
  • If/Elif/Else Condition
  • Function Definition[Def] with/without parameters
  • Global or Local Variable
  • Read or Write files
    • readlines() and readline()
  • Class
    • __init__ constructor
  • input
  • Tuple & List
    • Both are iterative
  • List
    • append insert remove
  • Multi-dimention List
  • Dictionary
    • del(also can used for list)
  • Import
  • Continue & Break
  • Error processing[Try/Except]
  • Zip

    • Output is an object
      1
      2
      3
      4
      5
      6
      7
      a = [1, 2, 3]
      b = [4, 5]
      # Convert to list
      list(zip(a, b))
      # Also we can use for loop to iterate each elements in object
      for i, j in zip(a,b)
      # Output of list(zip(a, b)) is: [(1, 4), (2, 5)]
  • Lambda
    Example:

    1
    2
    3
    4
    def fun1(x, y):
    return(x + y)
    fun2 = lambda x, y : x + y
    # fun1 is the same as fun2
  • Map

    • Output is an object
      Example:
      1
      2
      3
      4
      5
      def fun1(x, y):
      return(x + y)
      list(map(fun1, [1, 2, 3], [4, 5]))
      # Output is: [5, 7]
      # Note: The output of fun1([1], [2]) is: [1, 2]
  • Copy & Deepcopy

    • python object share address(point)
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      # ********Copy********:
      a = [1, 2, 3]
      b = a
      b[0] = 11
      a = b = [11, 2, 3]
      # id(a) == id(b) is True
      import copy
      c = copy.copy(a)
      # id(a) == id(b) is False
      # Note:
      a = [1, 2, [3, 4]]
      d = copy.copy(a)
      # id(a) == id(d) is False
      # id(a[2]) == id(d[2]) is True
      # Because d[2] == a[2] are both object
      # Note2:
      a = 2
      b = a
      a = 3
      # b = 2 auto copy
      # ********Deepcopy ********:
      e = copy.deepcopy(a)
      # id(a[2]) == id(e[2]) is False

Multi-Thread

Lead to Improve Efficiency

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import threading
import time
# check the number of threads
print(threading.active_count())
# check all the details of threads
print(threading.enumerate())
# check which threads are working
print(threading.current_thread())
# ********Extend********:
def thread_job():
print("MSG : This is a new Thread, number = %s\n" % threading.current_thread())
for i in range(10):
time.sleep(0.1)
print("MSG : T1 Finished.\n")
new_thread = threading.Thread(target = thread_job, Name = 'T1')
def thread_job2():
print("MSG : T2 Start.\n")
print("MSG : T2 Finished.\n")
new2_thread = threading.Thread(target = thread_job2, Name = 'T2')
new_thread.start()
new2_thread.start()
# ********Join********:
# print("MSG : Done.\n")
# when we run the code "Done" will show before "T2 Finished"
new2_thread.join()
print("MSG : Done.\n")
# T1 is slower than T2, so that "Done" will show before "T1 Finished"

Example of queue using in thread:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# ********Queue********:
import threading
import time
from queue import Queue
def job(l, q):
for i in range(len(l)):
l[i] = l[i] ** 2
time.sleep(1)
# thread can not return value
# return l
q.put(l)
def multithreading():
q = Queue()
threads = []
data = [[1,2,3], [4,5,6], [7,8,9]]
for i in range(3):
t = threading.Thread(target = job, args = (data[i], q))
t.start()
print("MSG : Number of thread is %s" % threading.active_count())
threads.append(t)
[t.join() for t in threads]
results = []
for _ in range(3):
results.append(q.get())
print(results)
if __name__ == '__main__':
multithreading()

Global Interpreter lock(GIL):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# ********GIL********:
# GIL shows that only one calculation unit can be run at a time
# Therefore, for example, the efficiency of 4-threading is not equal to normal's * 4
import threading
from queue import Queue
import copy
import time
def job(l, q):
result = sum(l)
q.put(result)
def multi(l):
q = Queue()
threads = []
for i in range(4):
t = threading.Thread(target = job, args = (copy.copy(l), q), name = 'T%i' % i)
t.start()
threads.append(t)
[t.join() for t in threads]
total = 0
for _ in range(4):
total += q.get()
print(total)
def normal(l):
total = sum(l)
print(total)
if __name__ == '__main__':
l = list(range(1000000))
current_time = time.time()
normal(l*4)
print('MSG : normal time: ', time.time() - current_time)
current_time = time.time()
multi(l)
print('MSG : multithreading time: ', time.time() - current_time)

Lock example(Squential operation multi-thread)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# ********Lock********:
imort threading
def job1():
global A, lock
lock.acquire()
for i in range(10):
A += 1
print('MSG : job1 ', A)
lock.release()
def job2():
global A, lock
lock.acquire()
for i in range(10):
A += 10
print('MSG : job2 ', A)
lock.release()
if __name__ == '__main__':
lock = threading.Lock()
A = 0
t1 = threading.Thread(target = job1)
t2 = threading.Thread(target = job2)
t1.start()
t2.start()
t1.join()
t2.join()

Multiprocessing

Create a process

1
2
3
4
5
6
7
8
9
10
11
12
# ********Extend********:
import multiprocessing as mp
import threading as td
def job(a, b):
print(a + b)
# processing must run in __main__
if __name__ == '__main__':
new_process = mp.Process(target = job, args = (1, 2))
new_process.start()
new_process.join()

Example of queue using in processing:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# ********Queue********:
import multiprocessing as mp
def job(q):
result = 0
for i in range(1000):
result += i + i ** 2 + i ** 3
# return (result)
q.put(result)
if __name__ == '__main__':
q = mp.Queue()
# Don't forget the ',' after args while the number of parameter is one
p1 = mp.Process(target = job, args = (q, ))
p2 = mp.Process(target = job, args = (q, ))
p1.start()
p2.start()
p1.join()
p2.join()
result1 = q.get()
result2 = q.get()
print(result1 + result2)

Efficiency Comparison(normal, multithreading, multiprocessing)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# ********Efficiency Comparison********:
import multiprocessing as mp
import threading as td
from queue import Queue
import time
def job(q):
result = 0
for i in range(100000):
result += i + i ** 2 + i ** 3
# return (result)
q.put(result)
def normal():
result = 0
for _ in range(2):
for i in range(100000):
result += i + i ** 2 + i ** 3
print('MSG : normal ', result)
def multiprocess():
q = mp.Queue()
p1 = mp.Process(target = job, args = (q, ))
p2 = mp.Process(target = job, args = (q, ))
p1.start()
p1.join()
p2.start()
p2.join()
result1 = q.get()
result2 = q.get()
print("MSG : Processing ", result1 + result2)
def multithread():
q = Queue()
t1 = td.Thread(target = job, args = (q, ))
t2 = td.Thread(target = job, args = (q, ))
t1.start()
t2.start()
t1.join()
t2.join()
result1 = q.get()
result2 = q.get()
print("MSG : Threading ", result1 + result2)
if __name__ == '__main__':
current_time = time.time()
normal()
print("MSG : normal time: ", time.time() - current_time)
current_time = time.time()
multithread()
print("MSG : multithread time: ", time.time() - current_time)
current_time = time.time()
multiprocess()
print("MSG : multiprocess time: ", time.time() - current_time )

Processing Pool

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# ********Pool********:
import multiprocessing as mp
def job(x):
return x * x
def multiprocess():
pool = mp.Pool(processes = 3)
# type = 'list'
result = pool.map(job, range(10))
print(result)
# type = 'int'
result = pool.apply_async(job, (2, ))
print(result.get())
# Note: pool.apply_async can only input one number for iterating
# type = 'object'
multi_result = [pool.apply_async(job,(i, )) for i in range(10)]
print([result.get() for result in multi_result])
if __name__ == '__main__':
multiprocess()

Shared memory

1
2
3
4
5
6
7
# ********Shared memory********:
import multiprocessing as mp
value = mp.Value('d', 1)
# can be only one dimension
array = mp.Array('i', [1,2,3])
# Value and Array can be share among multiple cores

Lock example(avoid different cores processing out of order with shared variable)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# ********Lock********:
import multiprocessing as mp
import time
def job(v, num, l):
l.acquire()
for _ in range(10):
time.sleep(0.1)
v.value += num
print(v.value)
l.release()
def multiprocess():
l = mp.Lock()
v = mp.Value('i', 0)
p1 = mp.Process(target = job, args = (v, 1, l))
p2 = mp.Process(target = job, args = (v, 3, l))
p1.start()
p2.start()
p1.join()
p2.join()
if __name__ == '__main__':
multiprocess()

Data Processing

Numpy

Numpy Foundation

1
2
3
4
5
6
7
8
9
# ********Numpy Foundation********:
import numpy as np
array = np.array([[1,2,3],[2,3,4]])
print(array)
print("MSG : number of dims= ", array.ndim)
# If only have one dimension, shape will be (num, ) which represent it can be iterated
print("MSG : shape= ", array.shape)
print("MSG : size= ", array.size)

Numpy Array

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# ********Numpy Array********:
import numpy as np
a = np.array([[2,3,4], [5,6,7]], dtype = np.int)
print(a.dtype)
b = np.zeros((3, 4), dtype = np.int32)
print(b)
c = np.ones((3, 4), dtype = np.int32)
print(c)
# The output is a list of numbers that are approximate to zero
d = np.empty((3, 4), dtype = np.int32)
print(d)
e = np.arange(10, 20, 2)
f = np.arange(12).reshape((3,4))
print(e)
print(f)
g = np.linspace(1, 10, 20)
print(g)

Some Useful Numpy Calculation Formula

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# ********Numpy Calculation********:
import numpy as np
a = np.array([10, 20, 30, 40])
b = np.arange(4)
c = a - b
# Output list composed of int numbers
print(c)
# Output list composed of boolean elements
print(b < 3)
np.dot(a, b)
rd = np.arange(2, 6)
# Output has 2 dimensions(0 -> col; 1 -> row)
np.sum(rd, axis = 1)
np.min(rd, axis = 0)
np.max(rd)
np.argmin(rd)
np.mean(rd)
np.average(rd)
np.median(rd)
# Output is [2, 5, 9, 14]
np.cumsum(rd)
# Output is [1, 1, 1]
np.diff(rd)
# Output composed of multi-dimensional array representing the row and col number of all nonzero elements in rd array respectively
np.nonzero(rd)
# sort among each dimensions independent
np.sort(rd)
np.sort(rd.reshape((2, 2)))
# transpose also we can use rd.T to transpose directly
np.transpose(rd)
# matrix multiplication
(rd.T).dot(rd)
np.clip(rd, 2, 4)
# Note: we can use axis to choose 0 -> col or 1 -> row as the target for calculation

Search From Numpy Array

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# ********Numpy Index Search********:
import numpy as np
A = np.arange(3, 15).reshape((3, 4))
A[2] # Output is [11, 12, 13, 14]
A[2][1]
# the same as
A[1, 2]
A[:, 1]
A[1, 1:2]
for row in A:
print(row)
# Trick
for colume in A.T:
print(colume)
# flat function parse elements from A like a generator
# Note: A.flat is different from A.flatten()
# pre-one is an object and the next output a list
for item in A.flat:
print(item)

Merge Numpy Array

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# ********Merge Numpy Array********:
import numpy as np
A = np.array([1, 1, 1])
B = np.array([2, 2, 2])
# vertical stack with output [[1, 1, 1], [2, 2, 2]]
C = np.vstack((A, B))
print(A.shape, C.shape)
# Horizontal stack with with output [1, 1, 1, 2, 2, 2]
D = np.hstack((A, B))
# Note: transpose function can not convert shape(3,) into shape(,3)
A_ = A[:, np.newaxis]) # newaxis is an extend dimension
# If we want to get output by merge col-values like [[1, 2], [1, 2], [1, 2]] we can use:
E = np.hstack((A[:, np.newaxis], B[:, np.newaxis]))
# the same as:
F = np.concatenate((A_, A_), axis = 1)
# Note: np.concatenate((A, B), axis = 1) will shuffle an error because concatenate will reduce dimension when mergement operation happened, and A or B only have one dimension

Split Numpy Array

1
2
3
4
5
6
7
8
9
10
11
12
13
# ********Split Numpy Array********:
import numpy as np
A = np.arange(12).reshape((3, 4))
# every pieces should have the same length
np.split(A, 2, axis = 1)
# If you want to split into pieces that in different size
# Binary split from left to right
np.array_split(A, 3, axis = 1)
# Vertical split
np.vsplit(A, 3)
# Horizontal split
np.hsplit(A, 2)

Numpy Array Copy

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# ********Numpy Array Copy********:
import numpy as np
a = np.arange(4, dtype = np.float32)
b = a
c = a
d = b
a[0] = 0.3
# now a = b = c = d = [0.30000001, 1., 2., 3.]
b is a # result is True
# Note: copy object connected with point
b = a.copy() # deep copy
# or
import copy
b = copy.copy(a)
a is b # result is False

Pandas

Pandas Foundation

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# ********Pandas Data Representation********:
import pandas as dp
import numpy as np
s = pd.Series([1, 3, 6, np.nan, 44, 1])
dates = pd.date_range('20170101', periods = 6)
df = pd.DataFrame(np.random.randn(6, 4), index = dates, columns = ['a', 'b', 'c', 'd'])
df1 = pd.DataFrame(np.arange(12).reshape((3, 4)))
df2 = pd.DataFrame({'A': 1., 'B': pd.Timestamp('20130102'), 'C': pd.Series(1, index = list(range(4)), dtype = 'float32'), 'D': np.array([3] * 4, dtype = 'int32'), 'E': pd.Categorical(['test', 'train', 'test', 'train']), 'F': 'foo'})
print(df2.dtypes)
print(df2.index)
print(df2.columns)
print(df2.values)
# Only fit to number elements
print(df2.describe()) # result index includes count, mean, std, min, 25%, 50%, 75%, max...
print(df2.T)
# Sort for columns
print(df2.sort_index(axis = 1, ascending = False))
# Sort for column values
print(df2.sort_values(by = 'E'))

Pandas Data Sampling

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# ********Data Sampling********:
import pandas as dp
df = pd.DataFrame(np.arange(24).reshape((6, 4)), index = pd.date_range('20130101', periods = 6), columns = ['A', 'B', 'C', 'D'])
print(df['A'])
# the same as :
print(df.A)
print(df[0: 3], '\n', df['20130101': '20130103'])
# select by label:
print(df.loc['20130102'])
print(df.loc['20130102', ['A', 'B']])
# select by position:
print(df.iloc[3, 1])
print(df.iloc[[1, 3, 5], 1: 3])
# mixed selection:
print(df.ix[:3, ['A', 'C']])
# Boolean indexing selection:
print(df[df.A > 8])
# multi-conditions(Can not use 'and'):
print(df[df[2] > 3][df[1] < 2])

Pandas Value Config

1
2
3
4
5
6
7
8
9
10
# ********Pandas change value********:
import pandas as dp
df = pd.DataFrame(np.arange(24).reshape((6, 4)), index = pd.date_range('20130101', periods = 6), columns = ['A', 'B', 'C', 'D'])
df.iloc[2, 2] = 1111
df.loc['20130101', 'B'] = 2222
df[df['A'] > 0] = 0
# add a new column
df['F'] = np.nan
df['E'] = pd.Series(np.arange(6, dtype = np.int32)+1, index = pd.date_range('20130101', periods = 6))

Pandas Handling Nan

1
2
3
4
5
6
7
8
9
10
11
12
13
# ********Pandas NaN********:
import pandas as dp
df = pd.DataFrame(np.arange(24).reshape((6, 4)), index = pd.date_range('20130101', periods = 6), columns = ['A', 'B', 'C', 'D'])
df.iloc[0, 1] = np.nan
df.iloc[1, 2] = np.nan
# 'any' means we drop the row as long as nan exist, 'all' means we drop the row if all the elements are nan
print(df.dropna(axis = 0, how = 'any')) # how = {'any', 'all'}
print(df.drop('A', axis = 1))
# replace nan
print(df.fillna(value = 0))
print(df.isnull()) # result is a dictionary with 'True' and 'False'
print(np.any(df.isnull()) == True)

Pandas Read and Write

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# ********Pandas Read and Write********:
import pandas as dp
# Some useful function like: read_csv, read_excel, read_sql, read_json ...
df = pd.read_csv('Sample.csv', 'r')
# Sample.csv
# A,B,C,D
# 0,1,2,3
# 4,5,6,7
# 8,9,10,11
# Save as pickle file
df.to_pickle('Sample.pickle')

Pandas Concatenating

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# ********Pandas Concatenating********:
import pandas as dp
import numpy as np
df1 = pd.DataFrame(np.zeros((3, 4)), columns = ['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.ones((3, 4)), columns = ['a', 'b', 'c', 'd'])
df3 = pd.DataFrame(np.ones((3, 4))*2, columns = ['a', 'b', 'c', 'd'])
# ignore_index will reset the index from top to bottom
result1 = pd.concat([df1, df2, df3], axis = 0, ignore_index = True)
# concat-join, ['inner', 'outer']
df4 = pd.DataFrame(np.zeros((3, 4)), columns = ['a', 'b', 'c', 'd'], index = [1, 2, 3])
df5 = pd.DataFrame(np.ones((3, 4)), columns = ['b', 'c', 'd', 'e'], index = [2, 3, 4])
# use NaN as the default value
result2 = pd.concat([df4, df5], ignore_index = True, join = 'inner') # 'inner' only remain the same parts
# concat-join_axes
result3 = pd.concat([df4, df5], axis = 1, join_axes = [df4.index]) # result's index is only the index of df4
# append
df6 = pd.DataFrame(np.zeros((3, 4)), columns = ['a', 'b', 'c', 'd'])
df7 = pd.DataFrame(np.ones((3, 4)), columns = ['a', 'b', 'c', 'd'])
df8 = pd.DataFrame(np.ones((3, 4)), columns = ['b', 'c', 'd', 'e'], index = [2, 3, 4])
result4 = df6.append(df7, ignore_index = True)
result5 = df6.append([df7, df8])
s1 = pd.Series([1, 2, 3, 4], index = ['a', 'b', 'c', 'd'])
result6 = df6.append(s1, ignore_index = True)

Pandas Merge(concat without the same parts)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# ********Pandas Merge********:
import pandas as dp
import numpy as np
# merge by index named 'key'(may be used in database)
df1 = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'], 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3']})
df2 = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']})
result = pd.merge(df1, df2, on = 'key') # we have to make sure these two frames contain the same index named 'key'
# consider two keys
df1 = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'], 'key2': ['K0', 'K1', 'K0', 'K1'], 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3']})
df2 = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'], 'key2': ['K0', 'K0', 'K0', 'K0'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']})
# default join = 'inner'
result = pd.merge(df1, df2, on = ['key1', 'key2'])
result2 = pd.merge(df1, df2, on = ['key1', 'key2'], how = 'outer') # how = {'left', 'right', 'outer', 'inner'}
# consider indicator(detail of merge)
df1 = pd.DataFrame({'col1': [0, 1], 'col_left': ['a', 'b']})
df2 = pd.DataFrame({'col1': [1, 2, 2], 'col_right': [2, 2, 2]})
result = pd.merge(df1, df2, on = 'col1', how = 'outer', indicator = True)
result1 = pd.merge(df1, df2, on = 'col1', how = 'outer', indicator = 'indicator_column') # rename 'indicator'
# merged by index
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']}, index = ['K0', 'K1', 'K2'])
df2 = pd.DataFrame({'C': ['C0', 'C2', 'C3'], 'D': ['D0', 'D2', 'D3']}, index = ['K0', 'K2', 'K3'])
result = pd.merge(df1, df2, left_index = True, right_index = True, how = 'outer')
result1 = pd.merge(df1, df2, left_index = True, right_index = True, how = 'outer')
# handle overlapping
df1 = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]})
df2 = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 6]})
result = pd.merge(df1, df2, on = 'k', suffixes = ['_boy', '_girl'], how = 'inner')

Pandas Plot(View)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# ********Pandas Plot********:
import pandas as dp
import numpy as np
import matplotlib.pyplot as plt
# Series
data = pd.Series(np.random.randn(1000), index = np.arange(1000))
data = data.cumsum()
# plt.plot(x = horizontal_value1, y = vertical_value)
data.plot()
plt.show()
# DataFrame
data = pd.DataFrame(np.random.randn(1000).reshape((250, 4)), index = np.arange(250), columns = list(("ABCD")))
data = data.cumsum()
data.plot()
plt.show()
# scatter -> plt.scatter(x = .., y = ..)
# plot methods = {'bar', 'hist', 'box', 'kde', 'area', 'scatter', 'hexbin', 'pie'}
a = data.plot.scatter(x = 'A', y = 'B', color = 'DarkBlue', label = 'Class 1') # only can hold 2 elements
data.plot.scatter(x = 'A', y = 'C', color = 'DarkGreen', label = 'Class 2', ax = a)
plt.show()

Matplotlib

Matplotlib Foundation

1
2
3
4
5
6
7
8
# ********Matplotlib Foundation********:
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(-1, 1, 50)
y = x * 2 + 1
plt.plot(x, y)
plt.show()

Matplotlib Figure

1
2
3
4
5
6
7
8
9
10
11
12
13
# ********Matplotlib Figure********:
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(-3, 3, 50)
y1 = 2 * x + 1
y2 = x ** 2
plt.figure()
plt.plot(x, y1)
plt.figure(num = 3, figsize = (8, 5))
plt.plot(x, y1)
plt.plot(x, y2, color = 'red', linewidth = 1.0, linestyle = '--')
plt.show()

Matplotlib Setting

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# ********Matplotlib Setting********:
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(-3, 3, 50)
y1 = 2 * x + 1
y2 = x ** 2
plt.figure()
plt.plot(x, y2)
plt.plot(x, y1, color = 'red', linewidth = 1.0, linestyle = '--')
plt.xlim((-1, 2))
plt.ylim((-2, 3))
plt.xlabel('I am X')
plt.ylabel('I am Y')
new_ticks = np.linspace(-1, 2, 5) # steps
plt.xticks(new_ticks)
plt.yticks([-2, -1.8, 0, 1.22, 3], [r'$really\ bad$', r'$bad$', r'$normal$', r'$good$', r'$really\ good$']) # alpha need write as '\alpha'
# gca = 'get current axis'
ax = plt.gca()
ax.spines['right'].set_color('none') # right side of boundarys
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
ax.spines['bottom'].set_position(('data', 0)) # 'data' can set to 'outward' , 'axes'...
ax.spines['left'].set_position(('data', 0))
# Legend
x = np.linspace(-3, 3, 50)
y1 = 2 * x + 1
y2 = x ** 2
plt.figure()
plt.xlim((-1, 2))
plt.ylim((-2, 3))
plt.xlabel('I am X')
plt.ylabel('I am Y')
new_ticks = np.linspace(-1, 2, 5) # steps
plt.xticks(new_ticks)
plt.yticks([-2, -1.8, 0, 1.22, 3], [r'$really\ bad$', r'$bad$', r'$normal$', r'$good$', r'$really\ good$']) # alpha need write as '\alpha'
l1, = plt.plot(x, y2, label = 'up') # Don't forget ','
l2, = plt.plot(x, y1, color = 'red', linewidth = 1.0, linestyle = '--', label = 'down')
plt.legend(handles = [l1, l2], labels = ['line 1', 'line 2'], loc = 'best') # loc = {'best', 'upper', 'lower right', 'center'...}
plt.show()

Matplotlib Annotation

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# ********Matplotlib Annotation********:
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(-3, 3, 50)
y = 2 * x + 1
plt.figure(num = 1, figsize = (8, 5))
plt.plot(x, y)
ax = plt.gca()
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0))
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data', 0))
X0 = 1
Y0 = 2 * X0 + 1
# Point
plt.scatter(X0, Y0, s = 50, color = 'b')
# Line
plt.plot([X0, X0], [Y0, 0], 'k--', lw = 2.5)
# Choice one
plt.annotate(r'$2x+1=%s$' % Y0, xy = (X0, Y0), xycoords = 'data', xytext = (+30, -30), textcoords = 'offset points', fontsize = 16, arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3, rad = .2'))
# Choice two
plt.text(-3.7, 3, r'$This\ is\ some\ text.\ \mu\ \sigma_i\ \alpha_t$', fontdict = {'size': 16, 'color': 'r'})
plt.show()