Notice

Recent Posts

Recent Comments

Link

« 2024/05 »
일	월	화	수	목	금	토
			1	2	3	4
5	6	7	8	9	10	11
12	13	14	15	16	17	18
19	20	21	22	23	24	25
26	27	28	29	30	31

Tags more

Archives

Today

Total

관리 메뉴

LiJell's 성장기

18.python_visualization 본문

Bigdata/파이썬_Python

18.python_visualization

All_is_LiJell 2022. 1. 10. 22:37

Modified on Jan 10 2022
# -- coding: utf-8 --
"""
Created on Wed Dec 29 14:14:18 2021

@author: hanju
"""

시각화( visualization)

df1 = pd.read_csv("./data/cancer_test.csv")
df1
'''
           id  diagnosis  ...  symmetry_worst  fractal_dimension_worst
0      842302  Malignant  ...          0.4601                  0.11890
1      842517  Malignant  ...          0.2750                  0.08902
2    84300903  Malignant  ...          0.3613                  0.08758
3    84348301  Malignant  ...          0.6638                  0.17300
4    84358402  Malignant  ...          0.2364                  0.07678
..        ...        ...  ...             ...                      ...
564    926424  Malignant  ...          0.2060                  0.07115
565    926682  Malignant  ...          0.2572                  0.06637
566    926954  Malignant  ...          0.2218                  0.07820
567    927241  Malignant  ...          0.4087                  0.12400
568     92751     Benign  ...          0.2871                  0.07039

[569 rows x 32 columns]
'''

df1.columns
'''
Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave_points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave_points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave_points_worst',
       'symmetry_worst', 'fractal_dimension_worst'],
      dtype='object')
'''

df1.dtypes
'''
id                           int64
diagnosis                   object
radius_mean                float64
texture_mean               float64
perimeter_mean             float64
area_mean                  float64
smoothness_mean            float64
compactness_mean           float64
concavity_mean             float64
concave_points_mean        float64
symmetry_mean               object
fractal_dimension_mean     float64
radius_se                  float64
texture_se                 float64
perimeter_se               float64
area_se                    float64
smoothness_se              float64
compactness_se             float64
concavity_se               float64
concave_points_se          float64
symmetry_se                float64
fractal_dimension_se       float64
radius_worst               float64
texture_worst              float64
perimeter_worst            float64
area_worst                 float64
smoothness_worst           float64
compactness_worst          float64
concavity_worst            float64
concave_points_worst       float64
symmetry_worst             float64
fractal_dimension_worst    float64
dtype: object
'''

선그래프 : plot

import matplotlib.pyplot as plt
plt.plot([1,2,3,4],
         [10,20,30,40],
         marker='^',
         linestyle = '--',
         color = 'r')

s1 = Series([10,20,30,40])
s1.plot()

s1.plot(xticks=[0,1,2,3], # x 눈금 좌표
        ylim=[0,100],     # y 축 범위
        xlabel='x name',  # x 축 라벨
        ylabel='y name',  # y 축 라벨
        rot=90,           # rot (=rotation 회전) 90도 x 눈끔 글 각도
        title='name',     # title plot의 제목
        marker='^',       # 마커
        linestyle='--',   # 선 스타일
        color='red')      # 색

plt.xticks(ticks=[0,1,2,3], labels=['a','b','c','d'], rotation = 180)

plt.xticks(ticks=[0,1,2,3], labels=['a','b','c','d'])
plt.ylim([0,100])

fontdict

font1 = {'family' : 'Malgun Gothic',
         'weight' : 'bold',
         'size' : 15,
         'color' : 'red',
         'style' : 'italic'}

plt.ylabel('y_name', rotation = 0, loc='top', labelpad = 30, fontdict= font1)

global option 변경

plt.rc('font', family= 'Malgun Gothic')

데이터 프레임의 선 그래프 출력

df2 = DataFrame({'apple': [10,20,30,40],'banana':[49,39,30,12],'mango':[10,32,43,40]})
df2
'''
   apple  banana  mango
0     10      49     10
1     20      39     32
2     30      30     43
3     40      12     40
'''

df2.index #RangeIndex(start=0, stop=4, step=1)
df2.index = ['a','b','c','d']
df2
'''
   apple  banana  mango
a     10      49     10
b     20      39     32
c     30      30     43
d     40      12     40
'''
df2.index.name = '지점'
df2
'''
    apple  banana  mango
지점                      
a      10      49     10
b      20      39     32
c      30      30     43
d      40      12     40
'''
df2.columns.name = '과일명'
df2
'''
과일명  apple  banana  mango
지점                       
a       10      49     10
b       20      39     32
c       30      30     43
d       40      12     40
'''

df2.plot()
plt.legend(fontsize=9, loc='best', title = '과일 이름') # best는 범례를 제일 좋은 위치로 옮겨줌

import pandas as pd
import numpy as np
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
plt.rc('font', family = "Malgun Gothic")
plt.rcParams['axes.unicode_minus'] = False

Bar Plot

kimchi = pd.read_csv('./data/kimchi_test.csv' , encoding = 'cp949')
kimchi = kimchi.pivot_table(index = '판매월', columns = '제품', values = '수량', aggfunc='sum')

kimchi.plot(kind='bar')
plt.title('김치별 판매수량 비교')
plt.legend(fontsize=9, loc='best', title ='김치별')
plt.title('김치별 판매수량 비교')
plt.ylim(([0,300000])
plt.ylabel('판매수량')
plt.xticks(rotation = 0) # x축세우기
plot.show()
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------


ratio = [34, 32, 16, 18]
labels = ['Apple', 'Banana', 'Melon', 'Grapes']
colors = ['#d96353', '#53d98b', '#53a1d9', '#fab7fa']   
colors = ['#ff9999', '#ffc000', '#8fd9b6', '#d395d0']
explode = [0.1, 0.1, 0.1, 0.1]
wedgeprops={'width': 0.7, 'edgecolor': 'w', 'linewidth': 5}

plt.pie(ratio,                  # 각 파이 숫자
        labels=labels,          # 각 파이 이름
        autopct='%.1f%%',       # 값의 표현 형태(소수점 첫째자리)
        startangle=260,         # 시작위치
        radius = 0.8,           # 파이 크기
        counterclock=False,     # 시계방향 진행 여부
        explode = explode,      # 중심에서 벗어나는 정도 설정(각각 서로 다른 숫자 전달 가능)
        colors=colors,          # 컬러맵 전달 가능
        shadow=False,           # 그림자 설정
        wedgeprops=wedgeprops)  # 부채꼴 모양 설정

import matplotlib.pyplot as plt
import numpy as np

plt.style.use('ggplot')

make data

x = [1, 2, 3, 4]
colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))

plot

fig, ax = plt.subplots()
ax.pie(x, colors=colors, radius=3, center=(4, 4),
       wedgeprops={"linewidth": 1, "edgecolor": "white"}, frame=True)

ax.set(xlim=(0, 8), xticks=np.arange(1, 8),
       ylim=(0, 8), yticks=np.arange(1, 8))

plt.show()

hist : 히스토그램 (밀도 표현, 전체 합 = 1)

s1 = Series(np.random.rand(1000)) # 정해진 숫자에서 무작위 추출 (균등하게: uniform distribution)
s1.hist()

s1 = Series(np.random.randn(1000)) # 정규분포(normal distribution)에서 무작위 추출
s1.hist()
s1.hist(bins=4) # 막대의 개수 또는 계급의 구간 전달 

plt.hist(s1, density = True)  # True 로 설정시, 막대 아래 총 면적이 1이 되는 밀도함수 출력
                              # , 즉 Y축 값이 확률로 변경되어 출력됨

plt.hist(s1, density = False) # 확률 값으로 출력
s1.plot(kind'kde')            # 커널 밀도 함수(kernel density estimation) 출력
                              # (연속형 히스토그램)

scatter (산점도)

# iris date loading
from sklearn.datasets import load_iris

iris = load_iris()
iris.keys()
# dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

iris['DESCR']
iris_x = iris['data']
x_name = iris ['feature_names']
'''
['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']
'''
# color 설정 안할 시, 모두 동일한 색

plt.subplot(2,2,1) # 2*2 그래프 중 첫번째
plt.scatter(iris_x[:,0],        #x축 좌표 (첫번째 설명변수)
            iris_x[:,1])        #y축 봐표 (두번째 설명변수)


# 서로 다른 숫자 전달 시, 서로 다른 색으로 표현이 됨 (채도)

plt.subplot(2,2,1) # 2*2 그래프 중 첫번째
plt.scatter(iris_x[:,0],        #x축 좌표 (첫번째 설명변수)
            iris_x[:,1],        #y축 봐표 (두번째 설명변수)
            c = iris_x[:,1])
x_name
plt.subplot(2,2,1) # 2*2 그래프 중 첫번째
plt.scatter(iris_x[:,0], iris_x[:,1], c = iris_x[:,1])
plt.spring() # 입력하면 spring 색으로 바뀜
plt.xlabel(x_name[0])
plt.xlabel(x_name[1])
plt.colorbar()

plt.subplot(2,2,2) # 2*2 그래프 중 두번째
plt.scatter(iris_x[:,1], iris_x[:,2], c = iris_x[:,2])
plt.summer()
plt.xlabel(x_name[1])
plt.ylabel(x_name[2])
plt.colorbar()

plt.subplot(2,2,3) #2*2 그래프 중 3번째
plt.scatter(iris_x[:,2], iris_x[:,3], c = iris_x[:,3])
plt.autumn()
plt.xlabel(x_name[2])
plt.ylabel(x_name[3])
plt.colorbar()

plt.subplot(2,2,4) #2*2 그래프 중 4번째
plt.scatter(iris_x[:,3], iris_x[:,0], c = iris_x[:,0])
plt.winter()
plt.xlabel(x_name[3])
plt.ylabel(x_name[0])
plt.colorbar()

boxplot

plt.boxplot(iris_x)
plt.xticks(ticks=[1,2,3,4], labels_xname)

plt.boxplot(iris_x)
plt.xticks(ticks = ['a','b','c','d'], labels=x_names)

'Bigdata > 파이썬_Python' 카테고리의 다른 글

Pandas Styling: 판다스 스타일링 (2)	2022.02.11
19.python_pandas_practice (0)	2022.01.10
17.python_datetime_expression (0)	2022.01.10
16.python_NA_missing_duplicate_value (0)	2022.01.10
15.python_stack_unstack_pivot_table (0)	2022.01.08

'Bigdata/파이썬_Python' Related Articles

Comments