# AI basics: an easy introduction to data visualization (Matplotlib and Seaborn)

## 0 Introduction

Matplotlib is a Python 2D plotting library that generates publication-quality graphics in various hardcopy formats and a cross-platform interactive environment.

With Matplotlib, developers can generate plots, histograms, power spectra, bar graphs, error graphs, scatter plots, etc. with just a few lines of code.

Seaborn is a very popular graphics visualization library based on Python. On the basis of Matplotlib, a more advanced package is carried out to make drawing more convenient and faster. Even people who have no foundation can use minimal code to create analytically valuable and beautiful graphics.

Before that, I have written a quick start for Numpy and Pandas. This article explains the quick start for data visualization:

AI Basics: Getting Started with Numpy ****

AI Basics: Simple Introduction to Pandas ****

github.com/fengdu78/Da...

## 1.Matplotlib

### 1.1 Create a canvas through the figure() function

import matplotlib.pyplot as plt
%matplotlib inline
Copy code
import numpy as np
data_one = np.arange( 100 , 201 ) # Generate an array containing 100 ~ 200
plt.plot(data_one) # draw data1 line chart
plt.show()
Copy code

# Create a new blank canvas and return to the Figure instance
figure_obj = plt.figure()
Copy code
data_two = np.arange( 200 , 301 ) # Generate an array containing 200 ~ 300
plt.figure(facecolor = 'gray' ) # Create a new canvas with a gray background
plt.plot(data_two) # Draw a line graph through data2
plt.show()
Copy code

### 1.2 Create a single subplot through the subplot() function

nums = np.arange( 0 , 101 ) # Generate an array from 0 to 100
# Divided into 2 * 2 matrix area, occupied for the number 1 region, i.e., the first 1 , line 1 sub-column of FIG.
plt.subplot( 221 )
# Draw on the selected subgraph
plt.plot(nums, nums)
# Divided into 2 * 2 matrix area, occupancy number 2 region, i.e., the first 1 , line 2 sub-columns of FIG.
plt.subplot( 222 )
# Draw on the selected subgraph
plt.plot(nums, -nums)
# Divide into a matrix area of 2 * 1 , occupying the area numbered 2 , which is the subgraph in row 2
plt.subplot( 212 )
# Draw on the selected subgraph
plt.plot(nums, nums** 2 )
# Display graphics on this machine
plt.show()
Copy code

### 1.3 Create multiple subplots through the subplots() function

# Generate an array containing all integers between 1 and 100
nums = np.arange( 1 , 101 )
# Divide into a 2 * 2 matrix area and return the subgraph array axes
fig, axes = plt.subplots( 2 , 2 )
= axes AX1 [ 0 , 0 ] # [the index 0 , 0 ] Axes acquired from an array of objects in a sub FIG.
= axes AX2 of [ 0 , 1 ] # [based on the index 0 , 1 ] acquired from an array of objects Axes 2 sub FIG.
= axes AX3 of [ 1 , 0 ] # [based on the index 1 , 0 ] acquired from an array of objects Axes 3 sub FIG.
= axes AX4 [ 1 , 1 ] based on the index # [ 1 , 1 ] acquired from an array of objects Axes 4 sub FIG.
# Draw on the selected subgraph
ax1.plot(nums, nums)
ax2.plot(nums, -nums)
ax3.plot(nums, nums** 2 )
ax4.plot(nums, np.log(nums))
plt.show()
Copy code

# Import the matplotlib package
import matplotlib.pyplot as plt
import numpy as np
# Create a Figure instance
fig = plt.figure()
fig.add_subplot( 2 , 2 , 1 )
fig.add_subplot( 2 , 2 , 2 )
fig.add_subplot( 2 , 2 , 4 )
fig.add_subplot( 2 , 2 , 3 )
# Draw on the subgraph
random_arr = np.random.randn( 100 )
# The default is to draw at the position where the subplot was last used, that is , the position numbered 3
plt.plot(random_arr)
plt.show()
Copy code

import numpy as np
data = np.arange( 0 , 1.1 , 0.01 )
plt.title( "Title" ) # add title
plt.xlabel( "x" ) # Add the name of the x axis
plt.ylabel( "y" ) # Add the name of the y axis
# Set the scale of the x and y axis
plt.xticks([ 0 , 0.5 , 1 ])
plt.yticks([ 0 , 0.5 , 1.0 ])
plt.plot(data, data** 2 ) # draw y=x^ 2 curve
plt.plot(data, data** 3 ) # draw y=x^ 3 curve
plt.legend([ "y=x^2" , "y=x^3" ]) # Add legend
plt.show() # Display graphics on this machine
Copy code

import numpy as np
x=np.linspace( -3 , 3 , 50 )# Generate 50 points between -3 and 3
y1 = 2 *x + 1 #define function
** X = Y2 2
duplicated code
# num= 3 means that the title at the top of the picture becomes figure3, figuresize=(length, width) to set the figure size
plt.figure(num = 3 , figsize=( 8 , 5 ))
plt.plot(x, y2)
# The default width of the red dotted line is 1.0
plt.plot(x, y1, color = 'red' , linewidth = 1.0 , linestyle = '--' )

plt.xlim(( -1 , 2 )) #Set the x-axis range
plt.ylim(( -2 , 3 )) #Set the axis y range

#Set the meaning of the coordinate axis, Note: write directly in English, Chinese need to be followed by the fontproperties property
plt.xlabel(u'price ' , fontproperties = 'SimHei' , fontsize = 16 )
plt.ylabel(u'profit ' , fontproperties = 'SimHei' , fontsize = 16 )

# Set the x-axis scale
# -1 to 2 interval, 5 points, 4 intervals, average score: [ -1. , -0.25 , 0.5 , 1.25 , 2. ]
new_ticks = np.linspace( -1 , 2 , 5 )
print (new_ticks)
plt.xticks(new_ticks)

# Set the y-axis scale
'' '
Set the corresponding coordinates to be expressed in Chinese or English, and the following property fontproperties means that Chinese is visible and not garbled.
The internal English  means to enclose the English, and r means regular matching. In this way, it can be turned into a good-looking font
If you want to display special characters, such as alpha, use the escape character/alpha, and the preceding/means space escape
' ''
plt.yticks([ -2 , -1.8 , -1 , 1.22 , 3. ],
[ 'Very bad' , 'bad' , r '$good/\alpha$' , r '$really/good$' , 'super good' ],
fontproperties = 'SimHei' ,
fontsize = 12 )
plt.show()
Copy code
[ -1.    -0.25   0.5    1.25   2.   ]
Copy code

### 1.6 Draw common types of charts

arr_random = np.random.randn( 100 ) # Create a random array
plt.hist(arr_random, bins = 8 , color = 'g' , alpha = 0.7 ) # draw a histogram
plt.show() # Display graphics
Copy code

# Create an array containing integers 0 ~ 50 to represent the data of the x-axis
x = np.arange( 51 )
# Create another array to represent the y-axis data
y = np.random.rand( 51 ) * 10
plt.scatter(x, y) # draw a scatter chart
plt.show()
Copy code

# Create a one-dimensional array containing 0 ~ 4
x = np.arange( 5 )
# Randomly select integers from the upper and lower limits to create two arrays with 2 rows and 5 columns
y1, y2 = np.random.randint( 1 , 31 , size=( 2 , 5 ))
width = 0.25                                 # The width of the bar
ax = plt.subplot( 1 , 1 , 1 ) # Create a subplot
ax.bar(x, y1, width, color = 'r' ) # draw a red bar chart
ax.bar(x+width, y2, width, color = 'g' ) # draw another green bar chart
ax.set_xticks(x+width) # Set the scale of the x-axis
# Set the scale label of the x-axis
ax.set_xticklabels([ 'January' , 'February' , 'March' , 'April ' , 'May' ])
plt.show() # Display graphics
Copy code

data = np.arange( 1 , 3 , 0.3 )
# Draw a straight line, the color is cyan, the mark is "x", and the line type is a long dotted line
plt.plot(data, color= "c" , marker= "x" , linestyle= "--" )
# Draw a straight line, the color is magenta, the mark is a solid circle, and the line type is a short dashed line
plt.plot(data+ 1 , color= "m" , marker= "o" , linestyle= ":" )
# Draw a straight line, the color is black, the mark is a pentagon, and the line type is a short dot and alternate line
plt.plot(data+ 2 , color= "k" , marker= "p" , linestyle= "-." )
# You can also use the following method to draw three straight lines with different colors, marks and line types
# plt.plot(data, 'cx--' , data+ 1 , 'mo:' , data+ 2 , 'kp-.' )
plt.show()
Copy code

### 1.7 Save graphics locally

# Create a random array containing 100 values
import numpy as np
= np.random.randn random_arr ( 100 )
copying the code
random_arrCopy
code
array([ -2.02009735 , -1.21035005 ,   0.57679581 , -0.00584516 ,   0.59612158 ,
-0.31118333 , -0.67245832 , -0.56589637 ,   0.25570972 ,   0.68256563 ,
-0.45816656 ,   0.34956566 ,   0.51020863 , 0.34956566 , 0.51020863 , -0.15307388 573 ,   499 ,
0.15 ,   0.15 , 0.15 , and   0.15,387 ,   0.39895018 ,
-1.86154032, -1.23949979 , -0.63471999 ,   1.09811855 ,   0.02552633 ,
-0.16804823 ,   0.34956809 ,   0.93485716 ,   0.37747537 , -0.16523647 ,
-1.04335227 , -0.01702448 ,   1.60924259 ,   1.15294223 , -0.15174045 ,
-0.03772519 ,   1.090792   ,   0.65279282 ,   0.38186503 , -1.3393988 ,
0.10098444 , -0.67411024, -2.39433996 , -0.43594683 , -0.155494   ,
0.54676898 , -0.97705035 , -1.34799225 ,   1.64568965 , -1.30594202 ,
-0.30704745 , -0.61612604 ,   1.09322798 ,   0.88921527 , -0.22512233 ,
-1.10477607 , -0.61717627 ,   0.73952416 ,   0.30252205 ,   0.60808863 ,
-0.3400892 , -2.01174842 , -0.46480751,   1.54980369 ,   1.74610516 ,
-0.53146867 , -0.70904096 ,   1.73856111 , -0.09254733 ,   0.43490467 ,
-0.87201768 , -0.73685075 , -0.65868507 , -0.18305015 ,   0.62559549 ,
0.30743734 , -0.78680136 , -0.05808801 , -0.23935035 , -1.14580197 ,
0.99154585 ,   0.07974613 ,   0.61315198 ,   0.93667393,   0.76542518 ,
1.90500996 ,   0.0306359 , -2.53801425 ,   0.17371482 ,   1.75721226 ,
0.25076371 , -1.00032227 ,   0.20617839 ,   0.81751139 ,   0.64920089 ,
1.3145223 ,   1.05360644 ,   2.06404062 ,   1.7208791 , -0.09375516 ])
Copy the code
# Draw a line graph of the data of a random array
plt.plot(random_arr)
plt.show()
Copy code

## 2 seaborn-draw statistical graphics

### 2.1 Visualization of the distribution of data

import seaborn as sns
%matplotlib inline
import numpy as np
sns.set() # Explicitly call set() to get the default drawing
np.random.seed( 0 ) # Determine the seed of the random number generator
arr = np.random.randn( 100 ) # Generate random array
sns.distplot = AX (ARR, bins = 10 ) # histogrammed
copy the code

# Create a random array containing 500 integers between [ 0 , 100 ]
array_random = np.random.randint( 0 , 100 , 500 )
# Draw the nuclear density estimation curve
sns.distplot(array_random, hist=False, rug=True)
Copy code

# Create a DataFrame object
import pandas as pd
dataframe_obj = pd.DataFrame({ "x" : np.random.randn( 500 ), "y" : np.random.randn( 500 )})
dataframe_obj
Copy code
xy
00.4782151.246931
1-0.0539060.187860
2-1.2419011.281412
3-1.6584951.375265
4-0.3533721.420608
51.656508-0.557275
61.5119131.657975
7-0.9068040.452821
8-0.777217-0.368433
9-0.739228-1.286740
100.987989-1.634521
11-0.026473-0.010277
12-1.262669-0.256035
13-1.5611650.918040
14-0.939354-0.127256
150.3354530.217671
16-1.4897520.432434
17-1.066911-0.515731
181.035863-0.297603
190.631313-0.653702
20-1.8943671.868757
210.0365710.237410
22-0.312502-1.319956
230.814248-0.811489
240.382404-0.449499
251.6466660.410724
260.2275530.313078
27-1.3998750.431041
28-2.161313-1.314429
290.2807502.321291
.........
470-1.266559-0.595866
471-0.7665660.096873
4720.205730-1.270893
473-0.608373-1.875642
474-0.3231700.336776
475-1.615268-1.565554
4760.4336791.887319
477-0.217975-0.728759
4781.0233240.201026
479-0.134135-0.746496
4800.0467241.299394
481-0.595088-0.641203
482-1.949716-0.520380
483-0.530026-0.348830
484-1.060356-0.013075
485-0.908488-0.981377
486-0.034975-1.450624
487-1.4263970.320157
488-1.3025371.746811
489-1.1907580.407325
490-0.1705430.311181
4910.8140520.299761
492-0.5201460.591630
4931.934602-0.165131
494-0.052196-0.524848
495-1.0574860.939177
496-0.158090-1.588747
497-0.2384121.627092
4980.279500-0.218554
4991.962078-0.956771

500 rows 2 columns

# Draw a scatter diagram
sns.jointplot (X = "X" , Y = "Y" , Data = dataframe_obj)
copying the code

# Draw a two-dimensional histogram
sns.jointplot (X = "X" , Y = "Y" , Data = dataframe_obj, kind = "hex" )
copy the code

\

# Kernel density estimation
sns.jointplot (X = "X" , Y = "Y" , Data = dataframe_obj, kind = "KDE" )
copy the code

# Load the data set in seaborn
# Draw multiple paired bivariate distributions
sns.pairplot(dataset)
Copy code

### 2.2 Plotting with categorical data

tips = sns.load_dataset( "tips" )
sns.stripplot (X = "Day" , Y = "total_bill" , Data = Tips)
copying the code

tips = sns.load_dataset( "tips" )
sns.stripplot (X = "Day" , Y = "total_bill" , Data = Tips, Jitter = True)
copying the code

sns.swarmplot(x= "day" , y= "total_bill" , data=tips)
Copy code

sns.boxplot (X = "Day" , Y = "total_bill" , Data = Tips)
copying the code

sns.violinplot (X = "Day" , Y = "total_bill" , Data = Tips)
copying the code

sns.barplot (X = "Day" , Y = "total_bill" , Data = Tips)
copying the code

sns.pointplot (X = "Day" , Y = "total_bill" , Data = Tips)
copying the code

Wonderful review of past issues
copy the code