Indian states

This post is just to see how much control we have on plotting in matplotlib.
Here we'll just try to plot four features of Indian states in one plot - life expectancy, GDP, literacy rate, population density

I took all the below data from wikipedia, maybe in the next posts we will try how to crawl in wikipedia pages to get this information instead of manual feeding this.

In [3]:
import matplotlib.pyplot as plt
import numpy as np
In [4]:
states=['Kerala',
'Delhi',
'Jammu and Kashmir',
'Uttarakhand',
'Himachal Pradesh',
'Punjab',
'Maharashtra',
'Tamil Nadu',
'West Bengal',
'Karnataka',
'Gujarat',
'Haryana',
'Andhra Pradesh',
'               Telangana',
'Bihar',
'Rajasthan',
'Jharkhand',
'Odisha',
'Chhattisgarh',
'Madhya Pradesh',
'Uttar Pradesh',
'Assam'
]
In [5]:
life=[74.9,
73.2,
72.6,
71.7,
71.6,
71.6,
71.6,
70.6,
70.2,
68.8,
68.7,
68.6,
68.5,
68.5,
68.1,
67.7,
66.6,
65.8,
64.8,
64.2,
64.1,
63.9,
]
In [6]:
GDP=[115,
96,
23,
35,
19,
72,
390,
210,
141,
200,
199,
95,
105,
115,
95,
115,
43,
61,
45,
110,
225,
38,
]
In [7]:
pop_den=[859,
11297,
57,
189,
123,
550,
365,
555,
1029,
319,
308,
573,
303,
307,
1102,
201,
414,
269,
189,
236,
828,
397,
]
In [8]:
lit_rate=[93.91,
86.34,
68.74,
79.63,
83.78,
76.68,
82.91,
80.33,
77.08,
75.6,
79.31,
76.64,
67.4,
66.5,
63.82,
67.06,
67.63,
73.45,
71.04,
70.63,
69.72,
73.18,
]
In [9]:
len(lit_rate),len(pop_den),len(GDP),len(life),len(states)
Out[9]:
(22, 22, 22, 22, 22)
In [10]:
np_pop=np.array(pop_den)
np_pop=np_pop*3
In [11]:
pop_den[:5]
Out[11]:
[859, 11297, 57, 189, 123]
In [12]:
np_pop[:5]
Out[12]:
array([ 2577, 33891,   171,   567,   369])
In [13]:
11297*3
Out[13]:
33891
In [14]:
sc=plt.scatter(GDP,life,s=np_pop,c=lit_rate, alpha=0.5)
plt.xlabel('GDP in USD Billions', fontsize=20)
plt.ylabel('Life expectancy in years', fontsize=20)
plt.title('Indian States Bubble Chart', fontsize=30)
plt.grid(True)
# plt.figure(figsize=(40,30))
fig = plt.gcf()
fig.set_size_inches(28.5, 20.5)
# plt.colorbar(m)
cb=plt.colorbar(sc)
cb.set_label('Literacy rate in percentage', fontsize=20)
for i, txt in enumerate(states):
    plt.annotate(txt, (GDP[i],life[i]))
# plt.ylim(0,100)
s1 = plt.scatter([],[], s=300, marker='o', color='#555555')
s2 = plt.scatter([],[], s=3000, marker='o', color='#555555')
s3 = plt.scatter([],[], s=0, marker='o', color='#555555')
# s3a = plt.scatter([],[], s=0, marker='o', color='#555555')
# s3b = plt.scatter([],[], s=0, marker='o', color='#555555')
s4 = plt.scatter([],[], s=30000, marker='o', color='#555555')

plt.legend((s1,s2,s3,s4),
       ('100', '1000',' ','10000'),
       scatterpoints=1,
       loc='upper right',
       ncol=4,
       fontsize=20,
           )

plt.text(230, 74, 'Size by population density per sq.km',fontsize=30)
plt.show()

Below cell is for notebook styling, you can ignore it

In [16]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))