part02데이터과학을 위한 파이썬
넘파이:행렬/선형대수/통계
판다스:데이터 프로세싱
matplotlib,seaborn:시각화
scipy:수학/과학/통계 분석
scikit-learn:머신러닝
#배열 생성 연습
import numpy as np
data_1=[1,2,3,4,5]
data_2=[[1,2,3],[4,5,6]]
data_3=[[1,2.0,3],[4.0,5,6.0]]
vector_1=np.array(data_1)
matrix_1=np.array(data_2)
matrix_2=np.array(data_3)
matrix_3=np.array([[1,2,3],[4,5,6],[7,8,9]])
print(vector_1)
print(matrix_1)
print(matrix_2)
print(matrix_3)
[1 2 3 4 5]
[[1 2 3]
[4 5 6]]
[[1. 2. 3.]
[4. 5. 6.]]
[[1 2 3]
[4 5 6]
[7 8 9]]
#넘파이의 배열 생성을 위한 함수 연습
import numpy as np
print(np.zeros(5)) #요소의 값을 0으로 채운다.
print(np.zeros((2,5))) #요소의 값은 2x5에 0으로 채운다
print(np.ones(5)) #요소값을 1로 채운다
print(np.empty((2,2,3))) #크기만 지정하고 임의의 값이 채워진다
print(np.arange(5)) #range와 유사하지만 리스트가 아니라 넘파이 배열을 리턴한다.
print(np.full((3,3),7)) #해당 값으로 채워진 배열을 리턴한다
[0. 0. 0. 0. 0.]
[[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]]
[1. 1. 1. 1. 1.]
[[[1.06444847e-311 2.81617418e-322 0.00000000e+000]
[0.00000000e+000 0.00000000e+000 4.47032019e-038]]
[[5.44736344e-090 1.63198008e+185 1.26900865e-076]
[1.28925108e+165 6.48224659e+170 4.93432906e+257]]]
[0 1 2 3 4]
[[7 7 7]
[7 7 7]
[7 7 7]]
#배열의 형태를 확인한다
import numpy as np
data_1=[1,2,3,4,5]
data_2=[[1,2,3],[4,5,6]]
data_3=[[1,2.0,3],[4.0,5,6.0]]
vector_1=np.array(data_1)
matrix_1=np.array(data_2)
matrix_2=np.array(data_3)
matrix_3=np.array([[1,2,3],[4,5,6],[7,8,9]])
print(type(vector_1)) #넘파이 배열의 타입을 확인
#타입을 알려준다
print(vector_1.dtype)
print(matrix_1.dtype)
print(matrix_2.dtype)
print(matrix_3.dtype)
#몇 차원 배열인지 알려준다
print(vector_1.ndim)
print(matrix_1.ndim)
print(matrix_2.ndim)
print(matrix_3.ndim)
#행렬이 몇x몇 인지 알려준다.
print(vector_1.shape)
print(matrix_1.shape)
print(matrix_1.shape)
print(matrix_1.shape)
#행렬의 개수를 알려준다
print(vector_1.size)
print(matrix_1.size)
print(matrix_2.size)
print(matrix_3.size)
<class 'numpy.ndarray'>
int32
int32
float64
int32
1
2
2
2
(2, 3)
mport numpy as np
vector_1=np.array([1,2,3,4,5])
matrix_2=np.array([[1,2,3,4,5],
[6,7,8,9,10],
[11,12,13,14,15],
[16,17,18,19,20],
[21,22,23,24,25]
])
print(vector_1[1]) #인덱스1 출력
print(vector_1[-1]) #인덱스-1 출력
print(matrix_2[1,1]) #인덱스 1,1 출력
print(matrix_2[2,-1]) #인덱스 2,-1출력
print(vector_1[:]) #모두
print(vector_1[:4]) #0번째부터 3까지
print(vector_1[1:3]) #1부터 2까지
print(vector_1[2:]) #2부터 끝까지
print(vector_1[2:-1]) #2부터 뒤에서 2까지
2
5
7
15
[1 2 3 4 5]
[1 2 3 4]
[2 3]
[3 4 5]
[3 4]
mport numpy as np
matrix=np.array([[1,2,3,4,5],
[6,7,8,9,10],
[11,12,13,14,15],
[16,17,18,19,20],
[21,22,23,24,25]
])
print(matrix[:2,3:])
print(matrix[:2,3:].shape)
print(matrix[4], matrix[4].shape)
print(matrix[4,:],matrix[4,:].shape)
print(matrix[4:,:],matrix[4:,:].shape)
print(matrix[:,:2],matrix[:,:2].shape)
print(matrix[1,:2],matrix[1,:2].shape)
print(matrix[1:2,:2],matrix[1:2,:2].shape)
[[ 4 5]
[ 9 10]]
(2, 2)
[21 22 23 24 25] (5,)
[21 22 23 24 25] (5,)
[[21 22 23 24 25]] (1, 5)
[[ 1 2]
[ 6 7]
[11 12]
[16 17]
[21 22]] (5, 2)
[6 7] (2,)
[[6 7]] (1, 2)
import numpy as np
matrix=np.array([[1,2,3],[4,5,6],[7,8,9]])
print("sum:",np.sum(matrix))
print("sum:",matrix.sum())
print("mean:",matrix.mean())
print("mean:",np.mean(matrix))
print()
print("std:",np.std(matrix))
print("variance:",np.var(matrix))
print("max:",np.max(matrix))
print("min:",np.min(matrix))
print()
print("axis=0 , max:",np.max(matrix,axis=0))
print("axis=0 , min:",np.min(matrix,axis=0))
print("axis=0 , avg:",np.mean(matrix,axis=0))
print("axis=1 , max:",np.max(matrix,axis=1))
print("axis=1 , min:",np.min(matrix,axis=1))
print("axis=1 , avg:",np.mean(matrix,axis=1))
sum: 45
sum: 45
mean: 5.0
mean: 5.0
std: 2.581988897471611
variance: 6.666666666666667
max: 9
min: 1
axis=0 , max: [7 8 9]
axis=0 , min: [1 2 3]
axis=0 , avg: [4. 5. 6.]
axis=1 , max: [3 6 9]
axis=1 , min: [1 4 7]
axis=1 , avg: [2. 5. 8.]
import numpy as np
np.random.seed(0) #난수 생성의 시드를 지정
print("0 to 1.0 rand num 10 : ",np.random.random(10))
print("1 to 45 rand num 6 : ",np.random.randint(0,46,6))
print("avg=0.0 and std=1.0 normal distribution num 5: ",np.random.normal(0.0,1.0,5))
print("1.0< x < 5.0 num 5: ",np.random.uniform(1.0,5.0,5))
print("0.0 < x < 1.0 num 3 : ",np.random.random_sample((3,3)))
print("0.0 < x < 1.0 num 3 : ",np.random.uniform(0.0,1.0,(3,3)))
print("0.0 < x < 1.0 num 3 : ",np.random.rand(3,3))
print("normal distribution rand num avg=0 and std=1 : \n",np.random.standard_normal((3,3)))
print("normal distribution rand num avg=0 and std=1 : \n",np.random.normal(0.0,1.0,(3,3)))
print("normal distribution rand num avg=0 and std=1 : \n",np.random.randn(3,3))
print("1~3 choice random:",np.random.choice([1,2,3],3))
vector=[1,2,3,4,5]
np.random.shuffle(vector)
print(vector)
print("\n\n\n")
vector=np.random.randn(5)
print(vector)
vector.sort()
print("after sort : ",vector)
print()
matrix_1=np.random.rand(5,5)
print("before sort matrix 1-->",matrix_1)
matrix_1.sort()
print("after sort matrix1-->",matrix_1)
print()
matrix_2=np.random.rand(5,5)
print("before sort matrix 2-->",matrix_2)
matrix_2.sort(axis=0)
print("after sort axis=0 matrix 2-->",matrix_2)
print()
matrix_3=np.random.rand(5,5)
print("before sort matrix 3-->",matrix_3)
matrix_3.sort(axis=1)
print("after sort axis=1 matrix 3-->",matrix_3)
print()
matrix_4=np.random.randn(5,5)
print("before sort 1st col:",matrix_4)
matrix_4[:0].sort()
print("after sort-->",matrix_4)
0 to 1.0 rand num 10 : [0.5488135 0.71518937 0.60276338 0.54488318 0.4236548 0.64589411
0.43758721 0.891773 0.96366276 0.38344152]
1 to 45 rand num 6 : [38 39 23 24 17 37]
avg=0.0 and std=1.0 normal distribution num 5: [-0.470771 0.973016 -1.27814912 1.43737068 -0.07770457]
1.0< x < 5.0 num 5: [4.20364301 3.08190992 3.71551812 3.88253062 3.32807917]
0.0 < x < 1.0 num 3 : [[0.53737323 0.75861562 0.10590761]
[0.47360042 0.18633234 0.73691818]
[0.21655035 0.13521817 0.32414101]]
0.0 < x < 1.0 num 3 : [[0.14967487 0.22232139 0.38648898]
[0.90259848 0.44994999 0.61306346]
[0.90234858 0.09928035 0.96980907]]
0.0 < x < 1.0 num 3 : [[0.65314004 0.17090959 0.35815217]
[0.75068614 0.60783067 0.32504723]
[0.03842543 0.63427406 0.95894927]]
normal distribution rand num avg=0 and std=1 :
[[ 1.08963016 1.25441407 1.41910204]
[-0.74385608 -2.5174371 -1.50709602]
[ 1.14907613 -1.19357825 1.14104245]]
normal distribution rand num avg=0 and std=1 :
[[ 1.50944508 1.06777513 -0.68658948]
[ 0.01487332 -0.3756659 -0.03822364]
[ 0.36797447 -0.0447237 -0.30237513]]
normal distribution rand num avg=0 and std=1 :
[[-2.2244036 0.72400636 0.35900276]
[ 1.07612104 0.19214083 0.85292596]
[ 0.01835718 0.42830357 0.99627783]]
1~3 choice random: [3 1 3]
[1, 2, 5, 3, 4]
[ 0.92525075 -0.90478616 1.84369153 1.52550724 -1.44553558]
after sort : [-1.44553558 -0.90478616 0.92525075 1.52550724 1.84369153]
before sort matrix 1--> [[0.95274901 0.44712538 0.84640867 0.69947928 0.29743695]
[0.81379782 0.39650574 0.8811032 0.58127287 0.88173536]
[0.69253159 0.72525428 0.50132438 0.95608363 0.6439902 ]
[0.42385505 0.60639321 0.0191932 0.30157482 0.66017354]
[0.29007761 0.61801543 0.4287687 0.13547406 0.29828233]]
after sort matrix1--> [[0.29743695 0.44712538 0.69947928 0.84640867 0.95274901]
[0.39650574 0.58127287 0.81379782 0.8811032 0.88173536]
[0.50132438 0.6439902 0.69253159 0.72525428 0.95608363]
[0.0191932 0.30157482 0.42385505 0.60639321 0.66017354]
[0.13547406 0.29007761 0.29828233 0.4287687 0.61801543]]
before sort matrix 2--> [[0.56996491 0.59087276 0.57432525 0.65320082 0.65210327]
[0.43141844 0.8965466 0.36756187 0.43586493 0.89192336]
[0.80619399 0.70388858 0.10022689 0.91948261 0.7142413 ]
[0.99884701 0.1494483 0.86812606 0.16249293 0.61555956]
[0.12381998 0.84800823 0.80731896 0.56910074 0.4071833 ]]
after sort axis=0 matrix 2--> [[0.12381998 0.1494483 0.10022689 0.16249293 0.4071833 ]
[0.43141844 0.59087276 0.36756187 0.43586493 0.61555956]
[0.56996491 0.70388858 0.57432525 0.56910074 0.65210327]
[0.80619399 0.84800823 0.80731896 0.65320082 0.7142413 ]
[0.99884701 0.8965466 0.86812606 0.91948261 0.89192336]]
before sort matrix 3--> [[0.069167 0.69742877 0.45354268 0.7220556 0.86638233]
[0.97552151 0.85580334 0.01171408 0.35997806 0.72999056]
[0.17162968 0.52103661 0.05433799 0.19999652 0.01852179]
[0.7936977 0.22392469 0.34535168 0.92808129 0.7044144 ]
[0.03183893 0.16469416 0.6214784 0.57722859 0.23789282]]
after sort axis=1 matrix 3--> [[0.069167 0.45354268 0.69742877 0.7220556 0.86638233]
[0.01171408 0.35997806 0.72999056 0.85580334 0.97552151]
[0.01852179 0.05433799 0.17162968 0.19999652 0.52103661]
[0.22392469 0.34535168 0.7044144 0.7936977 0.92808129]
[0.03183893 0.16469416 0.23789282 0.57722859 0.6214784 ]]
before sort 1st col: [[ 0.37716061 0.1666735 0.63503144 2.38314477 0.94447949]
[-0.91282223 1.11701629 -1.31590741 -0.4615846 -0.06824161]
[ 1.71334272 -0.74475482 -0.82643854 -0.09845252 -0.66347829]
[ 1.12663592 -1.07993151 -1.14746865 -0.43782004 -0.49803245]
[ 1.92953205 0.94942081 0.08755124 -1.22543552 0.84436298]]
after sort--> [[ 0.37716061 0.1666735 0.63503144 2.38314477 0.94447949]
[-0.91282223 1.11701629 -1.31590741 -0.4615846 -0.06824161]
[ 1.71334272 -0.74475482 -0.82643854 -0.09845252 -0.66347829]
[ 1.12663592 -1.07993151 -1.14746865 -0.43782004 -0.49803245]
[ 1.92953205 0.94942081 0.08755124 -1.22543552 0.84436298]]
import numpy as np
matrix=np.random.randn(5,5)
print(matrix)
vector=np.array([5,3,1,2,4])
matrix[0]=vector
print("insert vector in matrix[0]:\n",matrix)
print("sort with argsort()\n",matrix[:,matrix[0].argsort()])
[[-1.00021535 -1.5447711 1.18802979 0.31694261 0.92085882]
[ 0.31872765 0.85683061 -0.65102559 -1.03424284 0.68159452]
[-0.80340966 -0.68954978 -0.4555325 0.01747916 -0.35399391]
[-1.37495129 -0.6436184 -2.22340315 0.62523145 -1.60205766]
[-1.10438334 0.05216508 -0.739563 1.5430146 -1.29285691]]
insert vector in matrix[0]:
[[ 5. 3. 1. 2. 4. ]
[ 0.31872765 0.85683061 -0.65102559 -1.03424284 0.68159452]
[-0.80340966 -0.68954978 -0.4555325 0.01747916 -0.35399391]
[-1.37495129 -0.6436184 -2.22340315 0.62523145 -1.60205766]
[-1.10438334 0.05216508 -0.739563 1.5430146 -1.29285691]]
sort with argsort()
[[ 1. 2. 3. 4. 5. ]
[-0.65102559 -1.03424284 0.85683061 0.68159452 0.31872765]
[-0.4555325 0.01747916 -0.68954978 -0.35399391 -0.80340966]
[-2.22340315 0.62523145 -0.6436184 -1.60205766 -1.37495129]
[-0.739563 1.5430146 0.05216508 -1.29285691 -1.10438334]]
import numpy as np
data=[ [1,2,3,4],
[5,6,7,8],
[9,10,11,12],
[13,14,15,16],
[17,18,19,20]]
matrix=np.array(data)
print("matrix.shape:",matrix.shape)
print("reshape 2x10 :",matrix.reshape(2,10))
print("size:",matrix.size)
print("reshape 1 , -1 --->",matrix.reshape(1,-1))
print("reshape 2 , -1 --->",matrix.reshape(2,-1))
print("reshape 4 , -1 --->",matrix.reshape(4,-1))
print("reshape 5 , -1 --->",matrix.reshape(5,-1))
print("reshape -1 --->",matrix.reshape(-1))
data=[ [1,2,3,4],
[5,6,7,8],
[9,10,11,12],
[13,14,15,16],
[17,18,19,20]]
matrix=np.array(data)
print("matrix.T:\n",matrix.T)
print("matrix.transpose()\n",matrix.transpose())
print("np.transpose(matrix)\n",np.transpose(matrix))
print("flatten",matrix.flatten())
matrix.shape: (5, 4)
reshape 2x10 : [[ 1 2 3 4 5 6 7 8 9 10]
[11 12 13 14 15 16 17 18 19 20]]
size: 20
reshape 1 , -1 ---> [[ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20]]
reshape 2 , -1 ---> [[ 1 2 3 4 5 6 7 8 9 10]
[11 12 13 14 15 16 17 18 19 20]]
reshape 4 , -1 ---> [[ 1 2 3 4 5]
[ 6 7 8 9 10]
[11 12 13 14 15]
[16 17 18 19 20]]
reshape 5 , -1 ---> [[ 1 2 3 4]
[ 5 6 7 8]
[ 9 10 11 12]
[13 14 15 16]
[17 18 19 20]]
reshape -1 ---> [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20]
matrix.T:
[[ 1 5 9 13 17]
[ 2 6 10 14 18]
[ 3 7 11 15 19]
[ 4 8 12 16 20]]
matrix.transpose()
[[ 1 5 9 13 17]
[ 2 6 10 14 18]
[ 3 7 11 15 19]
[ 4 8 12 16 20]]
np.transpose(matrix)
[[ 1 5 9 13 17]
[ 2 6 10 14 18]
[ 3 7 11 15 19]
[ 4 8 12 16 20]]
flatten [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20]
import numpy as np
matrix_1=np.array(
[[1,2,3],
[4,5,6],
[7,8,9]]
)
matrix_2=np.array(
[[2,4,6],
[8,10,12],
[14,16,18]]
)
print(matrix_1+2)
print(matrix_1-2)
print()
print(matrix_1/2)
print(matrix_1*2)
print()
print(matrix_1+matrix_2)
print(matrix_1-matrix_2)
print(matrix_1/matrix_2)
print(matrix_1*matrix_2)
print()
print(matrix_1+[10,10,10])
print(matrix_1+[[10],[10],[10]])
print(np.add(matrix_1,matrix_2))
print(np.subtract(matrix_1,matrix_2))
print(np.dot(matrix_1,matrix_2))
[[ 3 4 5]
[ 6 7 8]
[ 9 10 11]]
[[-1 0 1]
[ 2 3 4]
[ 5 6 7]]
[[0.5 1. 1.5]
[2. 2.5 3. ]
[3.5 4. 4.5]]
[[ 2 4 6]
[ 8 10 12]
[14 16 18]]
[[ 3 6 9]
[12 15 18]
[21 24 27]]
[[-1 -2 -3]
[-4 -5 -6]
[-7 -8 -9]]
[[0.5 0.5 0.5]
[0.5 0.5 0.5]
[0.5 0.5 0.5]]
[[ 2 8 18]
[ 32 50 72]
[ 98 128 162]]
[[11 12 13]
[14 15 16]
[17 18 19]]
[[11 12 13]
[14 15 16]
[17 18 19]]
[[ 3 6 9]
[12 15 18]
[21 24 27]]
[[-1 -2 -3]
[-4 -5 -6]
[-7 -8 -9]]
[[ 60 72 84]
[132 162 192]
[204 252 300]]
import pandas as pd
series_list_1=pd.Series([1,3,5,7,9])
print(series_list_1)
print(series_list_1.index)
print(series_list_1.values)
series_list_2=pd.Series([2,4,6,8,10],index=['a','b','c','d','e'])
print(series_list_2)
print(series_list_2.index)
print(series_list_2.values)
0 1
1 3
2 5
3 7
4 9
dtype: int64
RangeIndex(start=0, stop=5, step=1)
[1 3 5 7 9]
a 2
b 4
c 6
d 8
e 10
dtype: int64
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
[ 2 4 6 8 10]
import pandas as pd
dict_data={'chicken':16000,'pizza':20000,'hambuger':10000,'friedMeat':25000}
series_list_a=pd.Series(dict_data)
print(series_list_a)
print()
delivery_foods=['chicken','pizza','friedMeat','jajangmyen']
series_list_b=pd.Series(dict_data , index=delivery_foods)
print(series_list_b)
print()
print(series_list_a['chicken'])
print(series_list_a[['chicken','pizza']])
print(series_list_a[series_list_a>19000])
print('chicken'in series_list_a)
chicken 16000
pizza 20000
hambuger 10000
friedMeat 25000
dtype: int64
chicken 16000.0
pizza 20000.0
friedMeat 25000.0
jajangmyen NaN
dtype: float64
16000
chicken 16000
pizza 20000
dtype: int64
pizza 20000
friedMeat 25000
dtype: int64
True
import pandas as pd
df_profile=pd.DataFrame()
df_profile['Sex']=['M','M','W','W']
df_profile['Age']=[21,25,23,20]
df_profile['Name']=['Kim','Han','Park','Lee']
df_profile
SexAgeName0123
| M | 21 | Kim |
| M | 25 | Han |
| W | 23 | Park |
| W | 20 | Lee |
import pandas as pd
import numpy as np
list_col=['Sex','Age','Name']
list_a=[
['M',21,'Kim'],['M',25,'Han'],['W',23,'Park']
]
array_a=np.array(list_a)
df_list=pd.DataFrame(list_a, columns=list_col) #dataFrame with list
df_array=pd.DataFrame(array_a,columns=list_col) #dataFramw with numpyArray
print(df_list)
print(df_array)
print(df_list.shape,df_array.shape)
data_dic={'category':['chicken','pizza','hamburger','ramen','photato'],
'price':[20000,14000,16000,5000,1000],
'size':[13,12,9,5,2]
}
df_food=pd.DataFrame(data_dic)
print(df_food)
Sex Age Name
0 M 21 Kim
1 M 25 Han
2 W 23 Park
Sex Age Name
0 M 21 Kim
1 M 25 Han
2 W 23 Park
(3, 3) (3, 3)
category price size
0 chicken 20000 13
1 pizza 14000 12
2 hamburger 16000 9
3 ramen 5000 5
4 photato 1000 2
import pandas as pd
df_t=pd.read_csv('testFile.csv',encoding='cp949')
df_t
print()
df_t.head()
import pandas as pd
df_t=pd.read_csv('testFile.csv',encoding='cp949')
print(df_t.index)
print(df_t.columns)
print(df_t.values)
print(df_t.shape)
print(df_t.info())
RangeIndex(start=0, stop=27, step=1)
Index(['시.도', '시군구', '측정소명', '측정소코드', '장비\n가동률\n(%)', '유효\n자료\n획득률\n(%)',
'유효\n측정\n일수\n(day)', '유효\n측정\n시간\n(hour)', '월평균\n(ppm)', '최저\n(ppm)',
'최고\n(ppm)', '최고일시', '기준초과\n(회)', '초과율\n(%)', '최저\n(ppm).1',
'최고\n(ppm).1', '최고일'],
dtype='object')
[['인천' '강화군' '석모리' 831481 100.0 77.63 22 559 '0.0013' 0.001 0.0037
2022060112 nan nan 0.0011 0.0019 20220601]
['인천' '옹진군' '덕적도' 831491 100.0 95.55 28 688 '0.0015' 0.001 0.0067
2022061003 nan nan 0.0012 0.0023 20220605]
['경기' '이천' '설성면' 131441 100.0 97.22 29 700 '0.0015' 0.0009 0.0041
2022061111 nan nan 0.0011 0.0021 20220602]
['경기' '파주' '파주' 131373 100.0 97.5 30 702 '0.0012' 0.0009 0.0024
2022060319 nan nan 0.001 0.0015 20220601]
['경기' '포천' '관인면' 131451 100.0 97.91 30 705 '0.0011' 0.0005 0.0021
2022061710 nan nan 0.0006 0.0013 20220608]
['경기' '연천' '연천(DMZ)' 131991 100.0 97.63 29 703 '0.0011' 0.0007 0.0024
2022061710 nan nan 0.0009 0.0015 20220601]
['강원' '양구' '방산면' 132401 100.0 97.5 29 702 '0.0008' 0.0007 0.0022
2022060211 nan nan 0.0007 0.0012 20220601]
['강원' '고성' '간성읍' 632421 100.0 91.11 27 656 '0.0011' 0.0008 0.0021
2022060210 nan nan 0.0008 0.0015 20220601]
['강원' '고성' '인제(DMZ)' 132993 100.0 97.5 29 702 '0.0017' 0.0012 0.0025
2022060211 nan nan 0.0015 0.002 20220601]
['강원' '고성' '고성(DMZ)' 132994 100.0 97.91 30 705 '0.0011' 0.0007 0.0022
2022060202 nan nan 0.001 0.0015 20220601]
['강원' '정선' '북평면' 632371 100.0 97.36 29 701 '0.0016' 0.0013 0.0025
2022060109 nan nan 0.0014 0.002 20220602]
['강원' '횡성' '치악산' 632431 96.49 91.8 27 661 '0.002' 0.0015 0.0026
2022060118 nan nan 0.0018 0.0022 20220602]
['강원' '철원' '철원(DMZ)' 132991 100.0 97.91 29 705 '0.0015' 0.0012 0.003
2022061711 nan nan 0.0014 0.0018 20220625]
['강원' '화천' '화천(DMZ)' 132992 100.0 99.44 30 716 '0.0013' 0.001 0.0018
2022060107 nan nan 0.0011 0.0016 20220601]
['충북' '괴산' '청천면' 633361 100.0 96.8 29 697 '0.001' 0.0007 0.0021
2022060815 nan nan 0.0008 0.0012 20220601]
['충북' '음성' '금왕' 633461 100.0 96.8 29 697 '0.001' 0.0006 0.0056
2022060111 nan nan 0.0007 0.0018 20220601]
['충남' '태안' '파도리' 534461 100.0 97.63 30 703 '0.0012' 0.001 0.0036
2022060512 nan nan 0.001 0.0017 20220610]
['충남' '공주' '사곡면' 534341 100.0 50.41 14 363 '0.0012*' 0.001 0.0019
2022060217 nan nan 0.0011 0.0014 20220602]
['전북' '임실' '운암면' 735351 100.0 96.38 29 694 '0.0018' 0.0009 0.0028
2022061012 nan nan 0.0017 0.002 20220610]
['전북' '부안' '새만금' 735172 100.0 97.08 29 699 '0.0016' 0.0005 0.0029
2022061013 nan nan 0.0006 0.0026 20220601]
['전남' '화순' '송단리' 336451 100.0 97.5 29 702 '0.002' 0.0014 0.0081
2022060420 nan nan 0.0017 0.0041 20220604]
['경북' '영덕' '강구면' 437202 100.0 95.27 28 686 '0.001' 0.0004 0.0064
2022060121 nan nan 0.0007 0.0021 20220601]
['경북' '영천' '화북면' 437401 100.0 97.36 29 701 '0.0009' 0.0007 0.0028
2022062020 nan nan 0.0007 0.0013 20220620]
['경북' '의성' '안계면' 437411 100.0 96.38 29 694 '0.0008' 0.0001 0.0021
2022062023 nan nan 0.0003 0.0012 20220621]
['경남' '창원' '대산면' 238241 100.0 97.22 30 700 '0.0011' 0.0007 0.0037
2022060210 nan nan 0.0008 0.0019 20220602]
['경남' '거제' '저구리' 238191 100.0 98.61 30 710 '0.0014' 0.0001 0.0054
2022061111 nan nan 0.0012 0.002 20220611]
['경남' '거창' '남상면' 238481 92.3 90.0 27 648 '0.0021' 0.0017 0.0055
2022060120 nan nan 0.0018 0.0028 20220601]]
(27, 17)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 시.도 27 non-null object
1 시군구 27 non-null object
2 측정소명 27 non-null object
3 측정소코드 27 non-null int64
4 장비
가동률
(%) 27 non-null float64
5 유효
자료
획득률
(%) 27 non-null float64
6 유효
측정
일수
(day) 27 non-null int64
7 유효
측정
시간
(hour) 27 non-null int64
8 월평균
(ppm) 27 non-null object
9 최저
(ppm) 27 non-null float64
10 최고
(ppm) 27 non-null float64
11 최고일시 27 non-null int64
12 기준초과
(회) 0 non-null float64
13 초과율
(%) 0 non-null float64
14 최저
(ppm).1 27 non-null float64
15 최고
(ppm).1 27 non-null float64
16 최고일 27 non-null int64
dtypes: float64(8), int64(5), object(4)
memory usage: 3.7+ KB
None
import pandas as pd
df_t=pd.read_csv('testFile.csv',encoding='cp949')
df_t.describe()
| 27.000000 | 27.000000 | 27.000000 | 27.000000 | 27.000000 | 27.000000 | 27.000000 | 2.700000e+01 | 0.0 | 0.0 | 27.000000 | 27.000000 | 2.700000e+01 |
| 404457.296296 | 99.584815 | 94.126296 | 28.148148 | 677.740741 | 0.000859 | 0.003452 | 2.022061e+09 | NaN | NaN | 0.001063 | 0.001870 | 2.022061e+07 |
| 250800.529171 | 1.604751 | 9.712634 | 3.254626 | 69.926111 | 0.000382 | 0.001706 | 6.589991e+02 | NaN | NaN | 0.000399 | 0.000611 | 6.877645e+00 |
| 131373.000000 | 92.300000 | 50.410000 | 14.000000 | 363.000000 | 0.000100 | 0.001800 | 2.022060e+09 | NaN | NaN | 0.000300 | 0.001200 | 2.022060e+07 |
| 132992.500000 | 100.000000 | 95.965000 | 28.500000 | 691.000000 | 0.000700 | 0.002200 | 2.022060e+09 | NaN | NaN | 0.000750 | 0.001500 | 2.022060e+07 |
| 437202.000000 | 100.000000 | 97.220000 | 29.000000 | 700.000000 | 0.000900 | 0.002800 | 2.022060e+09 | NaN | NaN | 0.001000 | 0.001800 | 2.022060e+07 |
| 632426.000000 | 100.000000 | 97.565000 | 29.500000 | 702.500000 | 0.001000 | 0.003900 | 2.022061e+09 | NaN | NaN | 0.001300 | 0.002050 | 2.022061e+07 |
| 831491.000000 | 100.000000 | 99.440000 | 30.000000 | 716.000000 | 0.001700 | 0.008100 | 2.022062e+09 | NaN | NaN | 0.001800 | 0.004100 | 2.022062e+07 |
import pandas as pd
df_t=pd.read_csv('testFile.csv',encoding='cp949')
df_t[['최고일','측정소코드','최고\n(ppm)']]
최고일측정소코드최고\n(ppm)01234567891011121314151617181920212223242526
| 20220601 | 831481 | 0.0037 |
| 20220605 | 831491 | 0.0067 |
| 20220602 | 131441 | 0.0041 |
| 20220601 | 131373 | 0.0024 |
| 20220608 | 131451 | 0.0021 |
| 20220601 | 131991 | 0.0024 |
| 20220601 | 132401 | 0.0022 |
| 20220601 | 632421 | 0.0021 |
| 20220601 | 132993 | 0.0025 |
| 20220601 | 132994 | 0.0022 |
| 20220602 | 632371 | 0.0025 |
| 20220602 | 632431 | 0.0026 |
| 20220625 | 132991 | 0.0030 |
| 20220601 | 132992 | 0.0018 |
| 20220601 | 633361 | 0.0021 |
| 20220601 | 633461 | 0.0056 |
| 20220610 | 534461 | 0.0036 |
| 20220602 | 534341 | 0.0019 |
| 20220610 | 735351 | 0.0028 |
| 20220601 | 735172 | 0.0029 |
| 20220604 | 336451 | 0.0081 |
| 20220601 | 437202 | 0.0064 |
| 20220620 | 437401 | 0.0028 |
| 20220621 | 437411 | 0.0021 |
| 20220602 | 238241 | 0.0037 |
| 20220611 | 238191 | 0.0054 |
| 20220601 | 238481 | 0.0055 |
import pandas as pd
df_t=pd.read_csv('testFile.csv',encoding='cp949')
print(df_t['최고\n(ppm)'].values)
print(type(df_t['최고\n(ppm)'].values))
df_t[1:7].values
[0.0037 0.0067 0.0041 0.0024 0.0021 0.0024 0.0022 0.0021 0.0025 0.0022
0.0025 0.0026 0.003 0.0018 0.0021 0.0056 0.0036 0.0019 0.0028 0.0029
0.0081 0.0064 0.0028 0.0021 0.0037 0.0054 0.0055]
<class 'numpy.ndarray'>
array([['인천', '옹진군', '덕적도', 831491, 100.0, 95.55, 28, 688, '0.0015',
0.001, 0.0067, 2022061003, nan, nan, 0.0012, 0.0023, 20220605],
['경기', '이천', '설성면', 131441, 100.0, 97.22, 29, 700, '0.0015',
0.0009, 0.0041, 2022061111, nan, nan, 0.0011, 0.0021, 20220602],
['경기', '파주', '파주', 131373, 100.0, 97.5, 30, 702, '0.0012', 0.0009,
0.0024, 2022060319, nan, nan, 0.001, 0.0015, 20220601],
['경기', '포천', '관인면', 131451, 100.0, 97.91, 30, 705, '0.0011',
0.0005, 0.0021, 2022061710, nan, nan, 0.0006, 0.0013, 20220608],
['경기', '연천', '연천(DMZ)', 131991, 100.0, 97.63, 29, 703, '0.0011',
0.0007, 0.0024, 2022061710, nan, nan, 0.0009, 0.0015, 20220601],
['강원', '양구', '방산면', 132401, 100.0, 97.5, 29, 702, '0.0008',
0.0007, 0.0022, 2022060211, nan, nan, 0.0007, 0.0012, 20220601]],
dtype=object)
import pandas as pd
df_t=pd.read_csv('testFile.csv',encoding='cp949')
df_t[1:7]
시.도시군구측정소명측정소코드장비\n가동률\n(%)유효\n자료\n획득률\n(%)유효\n측정\n일수\n(day)유효\n측정\n시간\n(hour)월평균\n(ppm)최저\n(ppm)최고\n(ppm)최고일시기준초과\n(회)초과율\n(%)최저\n(ppm).1최고\n(ppm).1최고일123456
| 인천 | 옹진군 | 덕적도 | 831491 | 100.0 | 95.55 | 28 | 688 | 0.0015 | 0.0010 | 0.0067 | 2022061003 | NaN | NaN | 0.0012 | 0.0023 | 20220605 |
| 경기 | 이천 | 설성면 | 131441 | 100.0 | 97.22 | 29 | 700 | 0.0015 | 0.0009 | 0.0041 | 2022061111 | NaN | NaN | 0.0011 | 0.0021 | 20220602 |
| 경기 | 파주 | 파주 | 131373 | 100.0 | 97.50 | 30 | 702 | 0.0012 | 0.0009 | 0.0024 | 2022060319 | NaN | NaN | 0.0010 | 0.0015 | 20220601 |
| 경기 | 포천 | 관인면 | 131451 | 100.0 | 97.91 | 30 | 705 | 0.0011 | 0.0005 | 0.0021 | 2022061710 | NaN | NaN | 0.0006 | 0.0013 | 20220608 |
| 경기 | 연천 | 연천(DMZ) | 131991 | 100.0 | 97.63 | 29 | 703 | 0.0011 | 0.0007 | 0.0024 | 2022061710 | NaN | NaN | 0.0009 | 0.0015 | 20220601 |
| 강원 | 양구 | 방산면 | 132401 | 100.0 | 97.50 | 29 | 702 | 0.0008 | 0.0007 | 0.0022 | 2022060211 | NaN | NaN | 0.0007 | 0.0012 | 20220601 |
import pandas as pd
df_t=pd.read_csv('testFile.csv',encoding='cp949')
df_t[df_t['최고일']==20220601]
시.도시군구측정소명측정소코드장비\n가동률\n(%)유효\n자료\n획득률\n(%)유효\n측정\n일수\n(day)유효\n측정\n시간\n(hour)월평균\n(ppm)최저\n(ppm)최고\n(ppm)최고일시기준초과\n(회)초과율\n(%)최저\n(ppm).1최고\n(ppm).1최고일0356789131415192126
| 인천 | 강화군 | 석모리 | 831481 | 100.0 | 77.63 | 22 | 559 | 0.0013 | 0.0010 | 0.0037 | 2022060112 | NaN | NaN | 0.0011 | 0.0019 | 20220601 |
| 경기 | 파주 | 파주 | 131373 | 100.0 | 97.50 | 30 | 702 | 0.0012 | 0.0009 | 0.0024 | 2022060319 | NaN | NaN | 0.0010 | 0.0015 | 20220601 |
| 경기 | 연천 | 연천(DMZ) | 131991 | 100.0 | 97.63 | 29 | 703 | 0.0011 | 0.0007 | 0.0024 | 2022061710 | NaN | NaN | 0.0009 | 0.0015 | 20220601 |
| 강원 | 양구 | 방산면 | 132401 | 100.0 | 97.50 | 29 | 702 | 0.0008 | 0.0007 | 0.0022 | 2022060211 | NaN | NaN | 0.0007 | 0.0012 | 20220601 |
| 강원 | 고성 | 간성읍 | 632421 | 100.0 | 91.11 | 27 | 656 | 0.0011 | 0.0008 | 0.0021 | 2022060210 | NaN | NaN | 0.0008 | 0.0015 | 20220601 |
| 강원 | 고성 | 인제(DMZ) | 132993 | 100.0 | 97.50 | 29 | 702 | 0.0017 | 0.0012 | 0.0025 | 2022060211 | NaN | NaN | 0.0015 | 0.0020 | 20220601 |
| 강원 | 고성 | 고성(DMZ) | 132994 | 100.0 | 97.91 | 30 | 705 | 0.0011 | 0.0007 | 0.0022 | 2022060202 | NaN | NaN | 0.0010 | 0.0015 | 20220601 |
| 강원 | 화천 | 화천(DMZ) | 132992 | 100.0 | 99.44 | 30 | 716 | 0.0013 | 0.0010 | 0.0018 | 2022060107 | NaN | NaN | 0.0011 | 0.0016 | 20220601 |
| 충북 | 괴산 | 청천면 | 633361 | 100.0 | 96.80 | 29 | 697 | 0.001 | 0.0007 | 0.0021 | 2022060815 | NaN | NaN | 0.0008 | 0.0012 | 20220601 |
| 충북 | 음성 | 금왕 | 633461 | 100.0 | 96.80 | 29 | 697 | 0.001 | 0.0006 | 0.0056 | 2022060111 | NaN | NaN | 0.0007 | 0.0018 | 20220601 |
| 전북 | 부안 | 새만금 | 735172 | 100.0 | 97.08 | 29 | 699 | 0.0016 | 0.0005 | 0.0029 | 2022061013 | NaN | NaN | 0.0006 | 0.0026 | 20220601 |
| 경북 | 영덕 | 강구면 | 437202 | 100.0 | 95.27 | 28 | 686 | 0.001 | 0.0004 | 0.0064 | 2022060121 | NaN | NaN | 0.0007 | 0.0021 | 20220601 |
| 경남 | 거창 | 남상면 | 238481 | 92.3 | 90.00 | 27 | 648 | 0.0021 | 0.0017 | 0.0055 | 2022060120 | NaN | NaN | 0.0018 | 0.0028 | 20220601 |