All of these charts show negative sentiment of Republican voters in Utah regarding the Caucus-Convention system.
The first chart shows percentage of Republican voters who disapprove of the caucus-convention system by county. Green is low negative sentiment and red is high negative sentiment.
import pandas as pd
import numpy as np
import folium
import branca.colormap as cmc
import matplotlib.pyplot as plt
import seaborn as sns
import math
from scipy.stats import bootstrap
from scipy import stats
import statistics
import sys
import nltk
from nltk.tokenize import word_tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import matplotlib.ticker as mtick
import warnings
import csv
#warnings.filterwarnings('ignore')
#nltk.download('vader_lexicon');
sid = SentimentIntensityAnalyzer();
#path = '\\U:\\Final\\'
#path = 'c:/users/pptallon/desktop/ad/'
datafile = 'caucus_feedback.csv'
caucus_df = pd.read_csv('U:\Final\caucus_feedback.csv', skiprows = 0)
#caucus_df = pd.read_csv('c:/users/pptallon/desktop/ad/caucus_feedback.csv', skiprows = 0)
caucus_df = caucus_df[caucus_df['party'] == 'Republican']
caucus_df['capCounty'] = caucus_df['county'].str.upper()
urban_counties = ['Utah', 'Salt Lake', 'Davis', 'Weber']
for index, row in caucus_df.iterrows():
if caucus_df.loc[index, 'county'] in urban_counties:
caucus_df.loc[index, 'county_type'] = 'urban'
else:
caucus_df.loc[index, 'county_type'] = 'rural'
caucus_df = caucus_df.sort_values(by=['voter_id', 'no', ], axis=0, ascending=False).reset_index(drop=True)
caucus_deduped = caucus_df.groupby(['voter_id', 'city', 'cd', 'ld', 'sd', 'gender', 'age', 'agg_score', 'precinct',
'county', 'caucus', 'p_turnout', 'years_rep', 'capCounty', 'county_type'])[['score', 'yes', 'no']].max().reset_index()
county_df = caucus_deduped.groupby(['county', 'capCounty'])['no'].agg(['mean', 'count']).reset_index()
ld_df = caucus_deduped.groupby(['ld'])['no'].agg(['mean', 'count']).reset_index()
sd_df = caucus_deduped.groupby(['sd'])['no'].agg(['mean', 'count']).reset_index()
cty_df = caucus_deduped.groupby(['city'])['no'].agg(['mean', 'count']).reset_index()
model_df = caucus_deduped[['years_rep', 'score', 'yes', 'no', 'gender', 'age',
'agg_score', 'caucus', 'p_turnout', 'county_type']]
county_df_indexed = county_df.set_index('capCounty')
ld_df['LD'] = ld_df['ld'].astype(int)
granules = [item/100 for item in range(20,71,1)]
#filename = "U:\Final\UtahCountyBoundaries.geojson"
utah_geo = 'u:/Final/UtahCountyBoundaries.geojson'
#utah_geo = 'c:/users/pptallon/desktop/ad/UtahCountyBoundaries.geojson'
center_of_map = [39.555845, -111.733526]
my_map2 = folium.Map(location = center_of_map,
zoom_start = 7,
tiles = 'cartodbpositron',
width = '90%',
height = '100%',
left = '5%',
right = '5%',
top = '0%')
ch_map = folium.Choropleth(geo_data = utah_geo,
name = 'counties',
data = county_df,
columns = ['capCounty', 'mean'],
key_on = 'feature.properties.NAME',
fill_color = 'RdYlGn_r',
bins = [0,*granules,1],
labels = 'aaa',
fillopacity = .9,
line_opacity = 0.4,
legend_name = 'Presidential Preference Poll Negative Sentiment by County',
highlight=True).add_to(my_map2)
for s in ch_map.geojson.data['features']:
s['properties']['mean'] = str(round(county_df_indexed.loc[s['properties']['NAME'], 'mean']*100, 1)) + '%'
s['properties']['county'] = str(county_df_indexed.loc[s['properties']['NAME'], 'county'])
ch_map.geojson.add_child(
folium.features.GeoJsonTooltip(fields = ['county', 'mean'], aliases=['County: ', 'Opposition: '],
labels=True, style = ('background-color: black; color: white')));
my_map2.save('U:/Final/1st_chart.html')
#my_map2.save('c:/users/pptallon/desktop/ad/1st_chart.html');
# you'll need to pip install selenium
import io
from PIL import Image
#img_data = my_map2._to_png(5)
#img = Image.open(io.BytesIO(img_data))
#img.save('U:/Final/1st_chart.png')
This second chart shows caucus sentiment statewide. Red is negative sentiment, green is positive sentiment and blue is indifferent.
The middle chart shows raw survey response when asked to rate the Presidential Preference Poll experience on a scale from 1 - 5. Some responses provided feedback without a sentiment score. Those are listed as “text responses”.
pie_df = caucus_deduped
slices = [item/10 for item in range(20,71,1)]
for index, row in pie_df.iterrows():
if row['score'] == -1.0:
pie_df.loc[index, 'score'] = 0.0
elif row['score'] == 1.5:
pie_df.loc[index, 'score'] = 2.0
elif row['score'] == 2.5:
pie_df.loc[index, 'score'] = 2.0
elif row['score'] == 3.5:
pie_df.loc[index, 'score'] = 4.0
elif row['score'] == 4.5:
pie_df.loc[index, 'score'] = 4.0
for index, row in pie_df.iterrows():
if row['no'] == 1.0:
pie_df.loc[index, 'sentiment'] = 'Detractor'
elif row['score'] == 3.0:
pie_df.loc[index, 'sentiment'] = 'Indifferent'
else:
pie_df.loc[index, 'sentiment'] = 'Supporter'
for index, row in pie_df.iterrows():
if row['sentiment'] == 'Detractor':
if row['score'] in [3.0, 4.0, 5.0]:
pie_df.loc[index, 'sentiment'] = 'Indifferent'
pie_df.loc[index, 'score'] = 3.0
elif row['sentiment'] == 'Supporter':
if row['score'] in [1.0, 2.0]:
pie_df.loc[index, 'sentiment'] = 'Indifferent'
pie_df.loc[index, 'score'] = 3.0
for index, row in pie_df.iterrows():
if row['yes'] == 0 and row['no'] == 0:
if row['score'] == 0:
pie_df.loc[index, 'score'] = float('nan')
pie_df = pie_df.groupby(['sentiment', 'score'], dropna=False).size().reset_index(name='count')
pie_df['sentiment'] = pd.Categorical(pie_df['sentiment'], ["Detractor", "Indifferent", "Supporter"])
pie_df = pie_df.sort_values('sentiment', ascending=True).reset_index(drop=True)
number_outside_colors = len(pie_df.sentiment.unique())
outside_color_ref_number = np.arange(number_outside_colors)*6
number_inside_colors = len(pie_df)
all_color_ref_number = np.arange(number_outside_colors + number_inside_colors)
inside_color_ref_number = []
for each in all_color_ref_number:
if each not in outside_color_ref_number:
inside_color_ref_number.append(each)
my_colors_main = {'0.0':'4',
'1.0':'5',
'2.0':'6',
'3.0':'9',
'4.0':'2',
'5.0':'1',
'nan':'7'}
my_colors_score = {'For':'0',
'Against':'4',
'Indifferent':'8'}
my_scores = {'nan':'Written',
'0.0':'Zero!',
'1.0':'1',
'2.0':'2',
'3.0':'3',
'4.0':'4',
'5.0':'5'}
top = cm.get_cmap('Reds_r', 10)
bottom = cm.get_cmap('Blues_r', 10)
middle = cm.get_cmap('Greens_r', 10)
newcolors = np.vstack((top(np.linspace(0, 1, 10)),
middle(np.linspace(0, 1, 10)),
bottom(np.linspace(0, 1, 10))))
newcmp = ListedColormap(newcolors, name='RedBlue')
labels = ['R < 1', 'R = 1', 'R = 2', 'Text', 'R = 3',
'R = 4', 'R = 5', 'Text']
def label(score):
if math.isnan(score):
return('Text Feedback')
else:
return('score = ' + str(int(score)))
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(1,1,1)
colormap = newcmp
outer_colors = colormap([3, 23, 13])
all_feedback = pie_df['count'].sum()
pie_df.groupby(['sentiment'])['count'].sum().plot(kind='pie',
radius=1,
colors=outer_colors,
pctdistance = .85,
labeldistance = 1.1,
wedgeprops = {'edgecolor':'white'},
textprops = {'fontsize':16},
autopct = lambda p: '{:.2f}%\n({:,.0f})'.format(p,(p/100)*all_feedback),
startangle = 90,
counterclock = False
)
inner_colors = colormap([5, 6, 7, 8, 25, 15, 16, 18])
pie_df['count'].plot(kind='pie',
radius=.7,
colors=inner_colors,
pctdistance = .55,
labeldistance = .65,
wedgeprops = {'edgecolor':'white'},
textprops = {'fontsize':12},
labels = labels,
#autopct = '%1.1f%%',
startangle = 90,
counterclock = False
)
hole = plt.Circle((0,0), 0.3, fc='white')
fig1 = plt.gcf()
fig1.gca().add_artist(hole)
ax.yaxis.set_visible(False)
plt.suptitle('Presidential Preference Poll Sentiment', fontsize=22)
plt.title('5 = stronly support, 1 = stronly oppose\n("text" = no numeric score provided)')
ax.text(0,0, 'Responses\n' + str(round(all_feedback,1)), ha='center', va='center', size=18 )
ax.axis('equal');
plt.show()
The third chart shows survey responses by city. Larger circles are cities with more responses. Color shows sentiment (red = bad, green = good, yellow is 50%)
custom_colormap = cmc.LinearColormap(
['green', 'yellow', 'red'],
#index=[0,*granules,1],# Colors
vmin=.2, vmax=.7, # Min and max values for the data
caption='Custom Color Scale' # Title for the legend
)
def comments(value):
if value != 0:
return('<br>Opposed: ' + str(round(value*100,1)) + '%')
else:
return('')
#filename = 'city3.csv'
city_df = pd.read_csv('U:\Final\city3.csv', skiprows = 0)
#city_df = pd.read_csv('c:/users/pptallon/desktop/ad/city3.csv', skiprows = 0)
#cty_df = map_df.groupby('city')['no'].agg(['mean', 'count']).reset_index()
city_df_merged = pd.merge(cty_df, city_df, on=['city'])
center_of_map = [40.4957, -111.8605]
my_map5 = folium.Map(location = center_of_map,
zoom_start = 9,
width = '90%',
height = '100%',
left = '5%',
right = '5%',
top = '0%'
)
for i in range(0, len(city_df_merged)):
try:
folium.Circle(location = [ city_df_merged.loc[i, 'lat'], city_df_merged.loc[i, 'long'] ],
tooltip = 'City: ' + city_df_merged.loc[i, 'city'] + '<br>Responses: ' + str(city_df_merged.loc[i, 'count']) + comments(city_df_merged.loc[i, 'mean']),
popup = 'Date:',
radius = int(city_df_merged.loc[i, 'count'])*12,
color = custom_colormap(city_df_merged.loc[i, 'mean']),
fill = True,
fill_color = custom_colormap(city_df_merged.loc[i, 'mean']),
opacity = 0.9,
fill_opacity = 0.5).add_to(my_map5)
except:
pass;
my_map5.save('U:/Final/third_chart.html')
#my_map5.save('c:/users/pptallon/desktop/ad/third_chart.html');
Fourth chart shows percentage of negative sentiment by senate district with a confidence interval. As you can see, all senate districts have negative sentiments, but only SD28 has a confidence interval includes 50%.
sd_df['SD'] = sd_df['sd'].astype(int)
sd_df_indexed = sd_df.set_index('SD')
#sd_df_indexed
datafile_sen = 'senators.csv'
sen_df = pd.read_csv('U:\Final\senators.csv', skiprows = 0)
#sen_df = pd.read_csv('c:/users/pptallon/desktop/ad/senators.csv', skiprows = 0)
sen_df = sen_df.set_index('SD')
#sen_df
sd_df_merged = pd.merge(sd_df, sen_df, on=['SD'])
sd_df_merged = sd_df_merged.set_index('SD')
caucus_deduped_clean = caucus_deduped.dropna(subset=['no'])
detractors = caucus_deduped['no'].to_numpy()
for index, row in sd_df_merged.iterrows():
detractors = caucus_deduped_clean[caucus_deduped_clean['sd'] == row['sd']]['no'].to_numpy()
bootstrap_ci = bootstrap((detractors,), statistic=np.mean, n_resamples=1000, confidence_level=0.95,
random_state=1, axis=0)
stdev = round(bootstrap_ci.standard_error*100,2)
low = round(bootstrap_ci.confidence_interval.low*100,1)
high = round(bootstrap_ci.confidence_interval.high*100,1)
sd_df_merged.loc[index, 'low'] = low
sd_df_merged.loc[index, 'high'] = high
sd_df_merged.loc[index, 'stdev'] = stdev
for index, row in county_df.iterrows():
detractors = caucus_deduped_clean[caucus_deduped_clean['capCounty'] == row['capCounty']]['no'].to_numpy()
bootstrap_ci = bootstrap((detractors,), statistic=np.mean, n_resamples=1000, confidence_level=0.95,
random_state=1, axis=0)
stdev = round(bootstrap_ci.standard_error*100,2)
low = round(bootstrap_ci.confidence_interval.low*100,1)
high = round(bootstrap_ci.confidence_interval.high*100,1)
county_df.loc[index, 'low'] = low
county_df.loc[index, 'high'] = high
county_df.loc[index, 'stdev'] = stdev
from matplotlib.colors import Normalize
from matplotlib.cm import ScalarMappable
def to_percent(y, position):
return str(int(y*100)) + '%'
sd_df_sorted = sd_df_merged.sort_values(by='mean', ascending=False)
e_low = (sd_df_sorted['mean']) - (sd_df_sorted['low']/100)
e_high = (sd_df_sorted['high']/100) - (sd_df_sorted['mean'])
asymmetric_error = [e_low, e_high]
norm = Normalize(vmin=.2, vmax=.7)
cmap = plt.cm.RdYlGn_r
colors = [cmap(norm(value)) for value in sd_df_sorted['mean']]
plt.figure(figsize=(8, 10))
ax1 = sns.barplot(sd_df_sorted, x='mean', y='sd', xerr=asymmetric_error, orient="h", palette=colors, color="#d9d9d9", order=sd_df_sorted.sort_values(by='mean', ascending=False).index)
for bar in ax1.patches:
bar.set_alpha(0.6) # Set transparency to 0.5 (adjust as needed)
c=0
for index, row in sd_df_sorted.iterrows():
if row['mean'] >= .65:
clr = 'white'
else:
clr = 'black'
ax1.text(.04, c, str(round(row['mean']*100,1)) + '%',
color=clr, va='center', ha='center', fontsize=9)
c+=1
ax1.xaxis.set_major_formatter(mtick.FuncFormatter(to_percent))
plt.axvline(x=.5, color='#c4c4c4', linestyle='--')
plt.show()
The 5th chart shows the relationship between caucus sentiment and age, with a regression line. As you can see, negative sentiment bottoms out at age 60. Negative sentiment among young people (under age 50) and the elderly (over age 70) is higher.
age_df = caucus_deduped_clean[caucus_deduped_clean['age']<=100].groupby(['age'])['no'].agg(['mean', 'count']).reset_index()
norm = Normalize(vmin=.2, vmax=.7)
cmap = plt.cm.RdYlGn_r
myplot = plt.scatter(age_df['age'], age_df['mean']*100, s=age_df['count'], alpha=0.5, c=age_df['mean'], cmap='OrRd')
z = np.polyfit(age_df['age'], age_df['mean']*100, 4)
p = np.poly1d(z)
plt.plot(age_df['age'], p(age_df['age']), 'r--')
plt.ylim(50, 100);
plt.show()