Mike Conway, 04 Jan 2021
This notebook utilizes and extends code (pytrends.py) generated by Cole Citrenbaum
We will compare the following stigmatizing and less stigmatizing terms:
"opioid user", "opioid users", "opioid use"
and
"opioid abuser", "opioid abusers", "opioid abuse"
First, import relevant Python3 libraries:
from pytrends.request import TrendReq
import datetime as dt
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [30, 30]
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns
Second, call pytrends library with relevant terms and generate pandas data frame
pytrends = TrendReq(hl='en-US', tz=360) # trend request
kw_list = ["\"opioid abuser\" + \"opioid abusers\" + \"opioid abuse\"", "\"opioid user\" + \"opioid users\" + \"opioid use\"" ] # keyword list
pytrends.build_payload(kw_list, cat=0, timeframe='2004-01-01 2021-12-31', geo='US', gprop='') # build pytrends data
trendinfo = pd.DataFrame(pytrends.interest_over_time()) # interest over time, dataframe format
# SHOWS ALL ROWS
#pd.set_option('display.max_rows',trendinfo.shape[0]+1)
# SHOWS HEAD (first 5 rows in dataframe)
trendinfo.head()
"opioid abuser" + "opioid abusers" + "opioid abuse" | "opioid user" + "opioid users" + "opioid use" | isPartial | |
---|---|---|---|
date | |||
2004-01-01 | 0 | 0 | False |
2004-02-01 | 0 | 0 | False |
2004-03-01 | 0 | 0 | False |
2004-04-01 | 0 | 0 | False |
2004-05-01 | 0 | 0 | False |
Third, plotting
indices = trendinfo.index # in datetime format
x = np.array(trendinfo['"opioid abuser" + "opioid abusers" + "opioid abuse"']).reshape((-1, 1)) # opioid use array
y = np.array(trendinfo['"opioid user" + "opioid users" + "opioid use"']).reshape((-1, 1)) # opioid abuse array
fig, ax = plt.subplots()
ax.scatter(indices, y, color='g', s=1,label='\"opioid use\" OR \"opioid user\" OR \"opioid users\"')
ax.scatter(indices, x, color='r',s=1, label='\"opioid abuse\" OR \"opioid abuser\" OR \"opioid abusers\"')
plt.legend(bbox_to_anchor=(.08, 1), loc='upper left', borderaxespad=0.)
plt.ylabel('Search Interest')
plt.xlabel('Year')
plt.show()
fig.savefig("opioid_use___opioid_abuse.png", dpi=300)
Given that the plot generated above has lots of overlapping zero values, all 0 values have been removed to improve clarity.
trendinfo_filtered = trendinfo[trendinfo["\"opioid abuser\" + \"opioid abusers\" + \"opioid abuse\""] > 0]
trendinfo_filtered = trendinfo_filtered[trendinfo_filtered["\"opioid user\" + \"opioid users\" + \"opioid use\""] > 0]
trendinfo_filtered
"opioid abuser" + "opioid abusers" + "opioid abuse" | "opioid user" + "opioid users" + "opioid use" | isPartial | |
---|---|---|---|
date | |||
2007-12-01 | 9 | 9 | False |
2008-07-01 | 15 | 8 | False |
2009-01-01 | 7 | 7 | False |
2009-03-01 | 6 | 6 | False |
2009-10-01 | 6 | 6 | False |
... | ... | ... | ... |
2021-08-01 | 12 | 55 | False |
2021-09-01 | 11 | 72 | False |
2021-10-01 | 20 | 73 | False |
2021-11-01 | 18 | 76 | False |
2021-12-01 | 8 | 46 | False |
120 rows × 3 columns
Generate plot
indices = trendinfo_filtered.index # in datetime format
x = np.array(trendinfo_filtered['"opioid abuser" + "opioid abusers" + "opioid abuse"']).reshape((-1, 1)) # opioid use array
y = np.array(trendinfo_filtered['"opioid user" + "opioid users" + "opioid use"']).reshape((-1, 1)) # opioid abuse array
fig, ax = plt.subplots()
ax.scatter(indices, y, color='g', s=1,label='\"opioid use\" OR \"opioid user\" OR \"opioid users\"')
ax.scatter(indices, x, color='r', s=1, label='\"opioid abuse\" OR \"opioid abuser\" OR \"opioid abuser\"')
plt.legend(bbox_to_anchor=(.08, 1), loc='upper left', borderaxespad=0.)
plt.ylabel('Search Interest')
plt.xlabel('Year')
plt.show()
fig.savefig("opioid_use___opioid_abuse_zero_removed.png", dpi=300)
fig, ax = plt.subplots()
ax.scatter(indices, y, color='g', s=1,label='\"opioid use\" OR \"opioid user\" OR \"opioid users\"')
ax.scatter(indices, x, color='r', s=1, label='\"opioid abuse\" OR \"opioid abuser\" OR \"opioid abusers\"')
plt.legend(bbox_to_anchor=(.07, 1), loc='upper left', borderaxespad=0.)
plt.ylabel('Search Interest')
plt.xlabel('Year')
plt.show()
fig.savefig("opioid_use___opioid_abuse_zero_removed.png", dpi=300)