Mike Conway, 04 Jan 2022
This notebook utilizes and extends code generated by Cole Citrenbaum
We will compare the following stigmatizing and less stigmatizing terms:
"alcohol user", "alcohol users", "alcohol use"
and
"alcohol abuser", "alcohol abusers", "alcohol abuse"
First, import relevant Python3 libraries:
from pytrends.request import TrendReq
import datetime as dt
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [30, 30]
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns
Second, call pytrends library with relevant terms and generate pandas data frame
pytrends = TrendReq(hl='en-US', tz=360) # trend request
kw_list = ["\"alcohol abuser\" + \"alcohol abusers\" + \"alcohol abuse\"", "\"alcohol user\" + \"alcohol users\" + \"alcohol use\"" ] # keyword list
pytrends.build_payload(kw_list, cat=0, timeframe='2004-01-01 2021-12-31', geo='US', gprop='') # build pytrends data
trendinfo = pd.DataFrame(pytrends.interest_over_time()) # interest over time, dataframe format
# SHOWS ALL ROWS
#pd.set_option('display.max_rows',trendinfo.shape[0]+1)
# SHOWS HEAD (first 5 rows in dataframe)
trendinfo.head()
"alcohol abuser" + "alcohol abusers" + "alcohol abuse" | "alcohol user" + "alcohol users" + "alcohol use" | isPartial | |
---|---|---|---|
date | |||
2004-01-01 | 62 | 11 | False |
2004-02-01 | 64 | 21 | False |
2004-03-01 | 50 | 23 | False |
2004-04-01 | 75 | 34 | False |
2004-05-01 | 64 | 26 | False |
Third, plotting
indices = trendinfo.index # in datetime format
x = np.array(trendinfo['"alcohol abuser" + "alcohol abusers" + "alcohol abuse"']).reshape((-1, 1)) # alcohol use array
y = np.array(trendinfo['"alcohol user" + "alcohol users" + "alcohol use"']).reshape((-1, 1)) # alcohol abuse array
fig, ax = plt.subplots()
ax.scatter(indices, y, color='g', s=1,label='\"alcohol use\" OR \"alcohol user\" OR \"alcohol users\"')
ax.scatter(indices, x, color='r',s=1, label='\"alcohol abuse\" OR \"alcohol abuser\" OR \"alcohol abusers\"')
plt.legend(bbox_to_anchor=(.02, 1), loc='upper left', borderaxespad=0.)
plt.ylabel('Search Interest')
plt.xlabel('Year')
plt.show()
fig.savefig("alcohol_use___alcohol_abuse.png", dpi=300)