The shapefiles for the UK postcode boundaries are provided by http://www.opendoorlogistics.com
In this notebook, there are a number of choropleth plots, showing regional patterns of vehicles having MOT tests.
# Import required libraries
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import datetime
import numpy as np
# Read shapefiles containing mapping information by postcode districts to give a GeoDataFrame
uk_shape_file = "data/UK-postcode-boundaries-Jan-2015/Districts.shp"
uk = gpd.read_file(uk_shape_file)
# Convert co-ordinate reference system
uk = uk.to_crs({'init': 'epsg:3395'})
# Extract Postcode Area
uk['Postcode Area'] = (uk['name'].str.extract('([A-Z]+)', expand=True))
# Read Postcode Area file
postcode_area_file = "data/Postcode Area.csv"
postcode_area = pd.read_csv(postcode_area_file)
# Merge Postcode Area data with mapping information in GeoDataFrame
uk = uk.merge(postcode_area, on='Postcode Area')
uk.head()
training_file = "data/MOT_result_2016_training.csv"
# Create DataFrame of MOT test results
mot_results = pd.read_csv(training_file, parse_dates=['test_date', 'first_use_date'])
# Calculate age of vehicle at time of MOT test
mot_results['age'] = (mot_results['test_date'] - mot_results['first_use_date']) / datetime.timedelta(days=365)
# For simplicity consolidate some categories:
# Count Pass with minor fix with 1 hour (PRS) as a Pass (P)
# Classify all Electric & Hybrid vehicle as G(reen)
replace_dict = {'test_result':{'PRS':'P'}, 'fuel_type':{'EL':'G', 'FC':'G', 'HY':'G'}}
mot_results = mot_results.replace(replace_dict)
mot_results.head()
# Calculate percentage rates of a given category
def calc_rates(summary_filter, column, results_filter="all"):
# Apply filter to MOT results if required, then group by postcode area and column of interest and calculate counts
if results_filter=="all":
mot_totals = mot_results.groupby(['postcode_area', column])[['test_id']].count()
else:
mot_totals = mot_results.query(results_filter).groupby(['postcode_area', column])[['test_id']].count()
# Convert counts into percentages
mot_rates = mot_totals.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))
# Filter by result of interest (e.g. vehicles that passed)
mot_rates = mot_rates.query(summary_filter).rename(columns={'test_id': 'rate'})
mot_rates = mot_rates.reset_index()
return mot_rates
# Generate chorpleth plot
def choropleth(ax, mot_data, column, intervals=10, cmap='viridis', region="all"):
# Merge summarised MOT data with GeoDataFrame
uk_mot = uk.merge(mot_data, left_on='Postcode Area', right_on='postcode_area', how='left')
# Filter by region if required
if (region != 'all'):
uk_mot = uk_mot.query('Country_Region=="' + region + '"')
# Split GeoDataFrame into null (no data for postcode area) and not null
uk_mot_zero = uk_mot[uk_mot[column].isnull()]
uk_mot = uk_mot[uk_mot[column].notnull()]
# Create chorpleth
uk_plot=uk_mot.plot(ax=ax, column=column, scheme='equal_interval', k=intervals, cmap=cmap,
legend=False, linewidth=0.1, edgecolor='grey')
# Fill in areas with no data as white
if (len(uk_mot_zero) > 0):
uk_mot_zero.plot(ax=ax, linewidth=0.1, edgecolor='grey', color='white')
# Choropleth subplot
def sub_choropleth(mot_data, min_rate, max_rate, min_scale, max_scale, fig, location, title):
cmap = custom_cmap(min_rate, max_rate, min_scale, max_scale, st_cols, end_cols)
ax = fig.add_subplot(location)
plt.axis('equal')
plt.axis('off')
# Title
plt.title(title, fontdict={'fontsize':20})
# Generate choropleth
choropleth(ax=ax,
mot_data=mot_data,
column='rate',
cmap=cmap
)
# Calculate range of scale required for colorbar
def scale_required(mot_column, intervals):
# Calculate min & max
min_value = mot_column.min()
max_value = mot_column.max()
# Width of each interval (assuming equal intervals)
interval_width = (max_value - min_value) / intervals
# Choropleth colour scale extends from midpoint of lower interval to midpoint of upper interval, so adjust accordingly
min_value += interval_width/2
max_value -= interval_width/2
return min_value, max_value
# Display colorbar scale (GeoPandas doesn't do this)
def colorbar(ax, min_value, max_value, intervals, percent=True, cmap='viridis'):
# Range required for colorbar, color should match midpt of each interval
levels = np.linspace(min_value, max_value, intervals)
zeros = np.zeros(levels.shape)
# Dummy plot used to create colorbar
CS3 = plt.scatter(x=zeros, y=zeros, s=zeros, c=levels, cmap=cmap)
cbar = plt.colorbar(CS3, cax=ax)
# Format labels as percentages if required
if percent:
labels = cbar.ax.get_yticklabels()
format_labels = [l.get_text()+'%' for l in labels]
cbar.ax.set_yticklabels(format_labels)
# Custom colormap adjusted for range of data within a given subplot to ensure consistent scales
def custom_cmap(min_rate, max_rate, min_scale, max_scale, st_scale, end_scale):
# Width of scale for all subplots
scale_width = max_scale - min_scale
# Adjustments required for subplot
min_adj = (min_rate - min_scale) / scale_width
max_adj = 1.0 - (max_scale - max_rate) / scale_width
# Adjust start and end colours of colour scale
adj_st_scale = [(1-min_adj) * st + min_adj * end for (st,end) in zip(st_scale, end_scale)]
adj_end_scale = [(1-max_adj) * st + max_adj * end for (st,end) in zip(st_scale, end_scale)]
# Colour Dictionary
cdict = {'red': ((0.0, adj_st_scale[0], adj_st_scale[0]),
(1.0, adj_end_scale[0], adj_end_scale[0])),
'green': ((0.0, adj_st_scale[1], adj_st_scale[1]),
(1.0, adj_end_scale[1], adj_end_scale[1])),
'blue': ((0.0, adj_st_scale[2], adj_st_scale[2]),
(1.0, adj_end_scale[2], adj_end_scale[2]))}
# Create the custom color map
cmap = LinearSegmentedColormap('custom_cmap', cdict)
return cmap
# Calculate percentage of cars that passed by Postcode Area
mot_data = calc_rates(summary_filter='test_result=="P"',
column='test_result'
)
# Create figure for plot and add subplot for choropleth
fig = plt.figure(figsize=(10,15))
ax = fig.add_subplot(111)
plt.axis('equal')
plt.axis('off')
# Choropleth appearance
intervals=20
cmap = 'Blues'
# Generate choropleth
choropleth(ax=ax,
mot_data=mot_data,
column='rate',
intervals=intervals,
cmap=cmap
)
# Title
plt.title('MOT (Vehicle Safety Test) Pass Rates by Postcode Area\n Cars, Small passenger vehicles etc.. in 2016',
fontdict={'fontsize':16})
# Calculate range for colorbar
min_value, max_value = scale_required(mot_data['rate'], intervals)
# Add axes for colorbar
cbax = fig.add_axes([0.8, 0.65, 0.03, 0.15])
# Create colorbar
colorbar(ax=cbax,
min_value=min_value,
max_value=max_value,
intervals=intervals,
percent=True,
cmap=cmap
)
plt.show()
There does appear to be some significant regional variation in MOT pass rates. This is not surprising given the diversity of the various regions of the UK.
# Calculate mean age of vehicles having a MOT by postcode area
mot_age = mot_results.groupby('postcode_area')[['age']].mean()
mot_age = mot_age.reset_index()
# Create figure for plot and add subplot for choropleth
fig = plt.figure(figsize=(10,15))
ax = fig.add_subplot(111)
plt.axis('equal')
plt.axis('off')
# Choropleth appearance
intervals=20
cmap='Reds'
# Generate choropleth
choropleth(ax=ax,
mot_data=mot_age,
column='age',
intervals=intervals,
cmap=cmap
)
# Title
plt.title('Average Age of Cars having MOT in UK during 2016',
fontdict={'fontsize':16})
# Calculate range for colorbar
min_value, max_value = scale_required(mot_age['age'], intervals)
# Add axes for colorbar
cbax = fig.add_axes([0.8, 0.65, 0.03, 0.15])
# Create colorbar
colorbar(ax=cbax,
min_value=min_value,
max_value=max_value,
intervals=intervals,
percent=False,
cmap=cmap
)
plt.show()
While this is only the age of vehicles having an MOT, not all vehicles. It is an indicator of vehicles ages in the various regions. Urban areas tend to have younger vehicles (having an MOT test).
However, the younger age of cars have an MOT in Scotland was a surprising result for me at first. This suggested further investigation. So in the next plot, I have compared MOT pass rates conditioned on vehicle age.
# Interval for choropleth
intervals = 20
# Colour scale to be used across all subplots
st_cols = [1,1,1]
end_cols = [0,0,1]
# Create figure for plot and add subplot for choropleth
fig = plt.figure(figsize=(18,10))
fig.suptitle("MOT pass rates by Postcode Area and Age of Vehicle", fontsize=24)
# Calculate pass rates for cars 3-6 years
mot_data_1 = calc_rates(results_filter="age > 3 and age < 6",
summary_filter='test_result=="P"',
column='test_result'
)
# Calculate range for colorscale of subplot
min_rate_1, max_rate_1 = scale_required(mot_data_1['rate'], intervals)
# Calculate pass rates for cars 6-9 years
mot_data_2 = calc_rates(results_filter="age > 6 and age < 9",
summary_filter='test_result=="P"',
column='test_result'
)
# Calculate range for colorscale of subplot
min_rate_2, max_rate_2 = scale_required(mot_data_2['rate'], intervals)
# Calculate pass rates for cars 9-12 years
mot_data_3 = calc_rates(results_filter="age > 9 and age < 12",
summary_filter='test_result=="P"',
column='test_result'
)
# Calculate range for colorscale of subplot
min_rate_3, max_rate_3 = scale_required(mot_data_3['rate'], intervals)
# Find common range
min_scale = min(min_rate_1, min_rate_2, min_rate_3)
max_scale = max(max_rate_1, max_rate_2, max_rate_3)
# 1st Map
sub_choropleth(mot_data_1, min_rate_1, max_rate_1, min_scale, max_scale, fig, 131, '3-6 Years')
# 2nd map
sub_choropleth(mot_data_2, min_rate_2, max_rate_2, min_scale, max_scale, fig, 132, '6-9 Years')
# 3rd Map
sub_choropleth(mot_data_3, min_rate_3, max_rate_3, min_scale, max_scale, fig, 133, '9-12 Years')
# Color bar scale using full range across all maps
cmap = custom_cmap(min_scale, max_scale, min_scale, max_scale, st_cols, end_cols)
# Add axes for colorbar
cbax = fig.add_axes([0.9, 0.65, 0.02, 0.25])
colorbar(cbax, min_scale, max_scale, intervals, percent=True, cmap=cmap)
plt.show()
Vehicles that are 3-6 years old, seem to have a fairly consistent pass rate across the country.
There is some regional variation for vehicles that are 6-9 years old. With vehicles in Scotland having a slightly lower pass rate.
This difference becomes more pronounced for vehicles that are 9-12 years old.
One possible explanation for this is that Scotland experiences colder conditions than the rest of the UK, requiring more frequent gritting of the roads. The salt in the grit is known to cause rust.
As the data this is based on is only for cars having an MOT, the following is only an approximate indicator of the numbers of Diesel cars in various parts of the UK.
# Calculate percentages of vehicles having an MOT that are diesel
mot_data = calc_rates(summary_filter='fuel_type=="DI"',
column='fuel_type'
)
# Create figure for plot and add subplot for choropleth
fig = plt.figure(figsize=(10,15))
ax = fig.add_subplot(111)
plt.axis('equal')
plt.axis('off')
# Choropleth appearance
intervals=20
cmap = 'YlOrBr'
# Generate choropleth
choropleth(ax=ax,
mot_data=mot_data,
column='rate',
intervals=intervals,
cmap=cmap
)
# Title
plt.title('Percentage of Vehicles having a MOT that are Diesel by Postcode Area\n Cars, Small passenger vehicles etc.. in 2016',
fontdict={'fontsize':16})
# Calculate range for colorbar
min_value, max_value = scale_required(mot_data['rate'], intervals)
# Add axes for colorbar
cbax = fig.add_axes([0.8, 0.65, 0.03, 0.15])
# Create colorbar
colorbar(ax=cbax,
min_value=min_value,
max_value=max_value,
intervals=intervals,
percent=True,
cmap=cmap
)
plt.show()
There is a clear regional variation. Areas having higher numbers of Diesel vehicles tend to be rural (Mid Wales, the Highlands etc..).
As the data this is based on is only for cars having an MOT, the following is only an approximate indicator of the numbers of Electric, Fuel Cell or Hybrid cars.
As numbers across the UK were very low, I have concentrated on London which had the highest numbers.
# Calculate percentages of vehicles having an MOT that are Electric or Hybrid
mot_data = calc_rates(summary_filter='fuel_type=="G"',
column='fuel_type'
)
# Create figure for plot and add subplot for choropleth
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
plt.axis('equal')
plt.axis('off')
# Choropleth appearance
intervals=20
cmap = 'YlGn'
# Generate choropleth
choropleth(ax=ax,
mot_data=mot_data,
column='rate',
region='London',
intervals=intervals,
cmap=cmap
)
# Title
plt.title('''Percentage of vehicles having a MOT that are Electric/Hybrid by Postcode Area
Cars, Small passenger vehicles etc.., London Area, in 2016''',
fontdict={'fontsize':16})
# Calculate range for colorbar
min_value, max_value = scale_required(mot_data['rate'], intervals)
# Add axes for colorbar
cbax = fig.add_axes([0.9,0.5, 0.03, 0.15])
# Create colorbar
colorbar(ax=cbax,
min_value=min_value,
max_value=max_value,
intervals=intervals,
percent=True,
cmap=cmap
)
plt.show()
Cars have to be 3 years old before they have there 1st MOT. So this plot does not show current rates of Electric/Hybird vehicles. However it does indicate emerging patterns. Highest rates occur in West London, which is fairly affluent.