In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
In [4]:
data = {"Wealth Quartile": ["Q1", "Q2", "Q3", "Q4", "Q5"], 
        "Children 0.5-5 years old with Anemia in LMIC": [25335,20834,18287,15743,11263], 
        "Children 0.5-5 years old with Severe Anemia in LMIC": [1443,1139,1017,729,363],
        "Nonpregnant Women Aged 15-49 years with Anemia in LMIC": [22342,21057,20641,20915,21731],
        "Nonpregnant Women Aged 15-49 years with Severe Anemia in LMIC": [446,395,329,358,343],
    } # source: doi:10.1001/jamanetworkopen.2018.2899
lmianemia = pd.DataFrame.from_dict(data).set_index("Wealth Quartile")
display(lmianemia)
print("And then let's normalize that:")
lmianemia = lmianemia/lmianemia.sum()
display(lmianemia)
print("and... take a sneak peak at the data:")
lmianemia.plot()
print("Data collected from 2016-2018")
Children 0.5-5 years old with Anemia in LMICChildren 0.5-5 years old with Severe Anemia in LMICNonpregnant Women Aged 15-49 years with Anemia in LMICNonpregnant Women Aged 15-49 years with Severe Anemia in LMIC
Wealth Quartile
Q125335144322342446
Q220834113921057395
Q318287101720641329
Q41574372920915358
Q51126336321731343
And then let's normalize that:
Children 0.5-5 years old with Anemia in LMICChildren 0.5-5 years old with Severe Anemia in LMICNonpregnant Women Aged 15-49 years with Anemia in LMICNonpregnant Women Aged 15-49 years with Severe Anemia in LMIC
Wealth Quartile
Q10.2770000.3076100.2094180.238375
Q20.2277890.2428050.1973740.211117
Q30.1999410.2167980.1934740.175842
Q40.1721260.1554040.1960430.191342
Q50.1231440.0773820.2036910.183324
and... take a sneak peak at the data:
Data collected from 2016-2018
In [5]:
data = {"Wealth Bracket": ["High", "Middle High", "Middle Low", "Low"],
        "Odds Ratio of Anemia among Korean Adolescent Girls": [1,1.63,2.12,5.11],
        "Odds Ratio of Iron Deficiency Anemia among Korean Adolescent Girls": [1,1.68,1.86,3.24],
        "Odds Ratio of Anemia among Korean Adolescent Girls (adjusting for read meat intake)": [1,2.48,3.35,7.1],
        "Odds Ratio of Iron Deficiency Anemia among Korean Adolescent Girls (adjusting for read meat intake)": [1,2.76,3.32,5.83]
    } # source: https://www.nature.com/articles/ejcn2013241 
# using model 2, which adjusts for age, BMI, WBC count and red meat intake
kGirlAnemia = pd.DataFrame.from_dict(data).set_index("Wealth Bracket").iloc[::-1]
display(kGirlAnemia)
print("And then let's normalize that:")
kGirlAnemia = kGirlAnemia/kGirlAnemia.sum()
kGirlAnemia.rename(columns = {col: "Normalized "+col for col in kGirlAnemia}, inplace=True)
display(kGirlAnemia)
print("and... take looksies at the data:")
kGirlAnemia.plot()
print("Data collected from 2008–2011")
Odds Ratio of Anemia among Korean Adolescent GirlsOdds Ratio of Iron Deficiency Anemia among Korean Adolescent GirlsOdds Ratio of Anemia among Korean Adolescent Girls (adjusting for read meat intake)Odds Ratio of Iron Deficiency Anemia among Korean Adolescent Girls (adjusting for read meat intake)
Wealth Bracket
Low5.113.247.105.83
Middle Low2.121.863.353.32
Middle High1.631.682.482.76
High1.001.001.001.00
And then let's normalize that:
Normalized Odds Ratio of Anemia among Korean Adolescent GirlsNormalized Odds Ratio of Iron Deficiency Anemia among Korean Adolescent GirlsNormalized Odds Ratio of Anemia among Korean Adolescent Girls (adjusting for read meat intake)Normalized Odds Ratio of Iron Deficiency Anemia among Korean Adolescent Girls (adjusting for read meat intake)
Wealth Bracket
Low0.5182560.4164520.5096910.451588
Middle Low0.2150100.2390750.2404880.257165
Middle High0.1653140.2159380.1780330.213788
High0.1014200.1285350.0717880.077459
and... take looksies at the data:
Data collected from 2008–2011
In [6]:
data = {"Education": ["Illiterate", "Primary School", "Junior High School", "High School", "College or Above"], 
        "% School Children with Anemia by Mothers Education": [27.3, 24.2, 19.8, 17.2, np.nan],
        "% School Children with Anemia by Fathers Education": [28.8, 26.4, 22.1, 19.8, 18.9]
    } # source: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3225109/
chineseStudentAnemia = pd.DataFrame.from_dict(data).set_index("Education")
display(chineseStudentAnemia)
ax = chineseStudentAnemia.plot()
ax.set_xlabel("Education of Parent")
ax.set_ylabel("% of Children that are Anemic")
ax.set_title("Rual China's Elementry Students Rates of Anemia")
% School Children with Anemia by Mothers Education% School Children with Anemia by Fathers Education
Education
Illiterate27.328.8
Primary School24.226.4
Junior High School19.822.1
High School17.219.8
College or AboveNaN18.9
Out[6]:
Text(0.5, 1.0, "Rual China's Elementry Students Rates of Anemia")

anemia-ses.PNG Data below exerpted from above graph (data translation done by hand)

In [7]:
data = {"Birth Year": list(range(75,85)),
       "Higher SES": [5.3, 4.6, 4.7, 3.8, 4, 3.6, 3.8, 3.0, 3.7, 2],
       "Upper Middle SES":[6.4,5.6,5.3,5.1,4.1,4.7,3.9,3.7,3.3,3.1],
       "Lower Middle SES":[6.7,6.3,6.3,5.2,5,4.9,4.1,3.8,3.3,3.1],
       "Lower SES": [7.6,6.8,6.3,5.3,5.4,5.2,5.4,4.7,3.9,3.3]} # source: https://jamanetwork.com/journals/jama/fullarticle/368324
tennAnemia = pd.DataFrame.from_dict(data).set_index("Birth Year")
display(tennAnemia)
ax = tennAnemia.plot()
ax.set_ylabel("% Anemic")
ax.set_xlabel("Birth Year")
ax.set_title("Anemia Trends of Tennessee Children") # enrolled in Woman, Infants, and Children Program
Higher SESUpper Middle SESLower Middle SESLower SES
Birth Year
755.36.46.77.6
764.65.66.36.8
774.75.36.36.3
783.85.15.25.3
794.04.15.05.4
803.64.74.95.2
813.83.94.15.4
823.03.73.84.7
833.73.33.33.9
842.03.13.13.3
Out[7]:
Text(0.5, 1.0, 'Anemia Trends of Tennessee Children')
In [8]:
tennAnemia.mean().iloc[::-1].plot()
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x219ac0f78c8>
In [49]:
# let's bring it all together:
kGirlAnemiaCopy = kGirlAnemia.copy()
knames = {"Normalized Odds Ratio of Anemia among Korean Adolescent Girls": "Adolecent Girls, 10-18yo",
    "Normalized Odds Ratio of Iron Deficiency Anemia among Korean Adolescent Girls": "Adolecent Girls**, 10-18yo",
    "Normalized Odds Ratio of Anemia among Korean Adolescent Girls (adjusting for read meat intake)": "with anemia",
    "Normalized Odds Ratio of Iron Deficiency Anemia among Korean Adolescent Girls (adjusting for read meat intake)": "with severe anemia"}
kGirlAnemiaCopy.rename(columns=knames, inplace=True)
kGirlAnemiaCopy["SES"] = [1,2,3,4]
kGirlAnemiaCopy.set_index("SES", inplace=True)
kGirlAnemiaCopy.drop(columns=["Adolecent Girls**, 10-18yo", "Adolecent Girls, 10-18yo"], inplace=True)
display("Korean Adolecent Girls 2008-2011", kGirlAnemiaCopy)

anemiaNormCopy = lmianemia.copy()
anemiaNormCopy["SES"] = [4/5,8/5,12/5,16/5,20/5]
anemiaNormCopy.set_index("SES", inplace=True)
lmicnames = {"Children 0.5-5 years old with Anemia in LMIC": "with anemia",
       "Children 0.5-5 years old with Severe Anemia in LMIC": "with severe anemia",
       "Nonpregnant Women Aged 15-49 years with Anemia in LMIC": "Nonpregnant Women",
       "Nonpregnant Women Aged 15-49 years with Severe Anemia in LMIC": "Nonpregnant Women**"}
anemiaNormCopy.rename(columns=lmicnames, inplace=True)
anemiaNormCopy.drop(columns=["Nonpregnant Women", "Nonpregnant Women**"], inplace=True)
display("Low and Middle Income Countries 2016-2018", anemiaNormCopy)

chineseStudentAnemiaCopy = chineseStudentAnemia.copy()
csaNames = {"% School Children with Anemia by Mothers Education": "with anemia (by Mother SES)", 
            "% School Children with Anemia by Fathers Education": "with anemia (by Father SES)"}
chineseStudentAnemiaCopy.rename(columns=csaNames, inplace=True)
chineseStudentAnemiaCopy["SES"] = [4/5,8/5,12/5,16/5,20/5]
chineseStudentAnemiaCopy.set_index("SES", inplace=True)
display("Rual Chinese Children (Ningxia and Qinghai Counties) 2011", chineseStudentAnemiaCopy)

tennAnemiaCopy = tennAnemia.mean().iloc[::-1].to_frame()
tennAnemiaCopy.rename(columns={0:"with anemia"}, inplace=True)
tennAnemiaCopy["SES"] = [1,2,3,4]
tennAnemiaCopy.set_index("SES", inplace=True)
display("Tennasee 0.5-5yo Children 1975-1984", tennAnemiaCopy)
'Korean Adolecent Girls 2008-2011'
with anemiawith severe anemia
SES
10.5096910.451588
20.2404880.257165
30.1780330.213788
40.0717880.077459
'Low and Middle Income Countries 2016-2018'
with anemiawith severe anemia
SES
0.80.2770000.307610
1.60.2277890.242805
2.40.1999410.216798
3.20.1721260.155404
4.00.1231440.077382
'Rual Chinese Children (Ningxia and Qinghai Counties) 2011'
with anemia (by Mother SES)with anemia (by Father SES)
SES
0.827.328.8
1.624.226.4
2.419.822.1
3.217.219.8
4.0NaN18.9
'Tennasee 0.5-5yo Children 1975-1984'
with anemia
SES
15.39
24.87
34.52
43.85
In [67]:
dataSources = ["Children in Low and Middle Income Countries, 0.5-5yo (2016-2018)",
               "Korean Adolecent Girls, 10-18yo (2008-2011)",
               "Rual Chinese Children, ~9-11yo (2011)",
               "Tennasee Children, 0.5-5yo (1975-1984)"]
pallets = [sns.color_palette("Blues_r"),
           sns.color_palette("Reds_r"),
           sns.color_palette("Greens_r")[3:],
           sns.dark_palette("purple")]
heights = [0.2, 0.42, 0.64, 0.83]

k=0
ax = False
for j, dataSet in enumerate([kGirlAnemiaCopy, anemiaNormCopy, chineseStudentAnemiaCopy, tennAnemiaCopy]):
    if ax:
        ax = ax.twinx()
    else:
        fig, ax = plt.subplots()
        ax.set_xlabel("Socioeconomic Status")
        ax.set_ylabel("Relative Prevelence")
        plt.xticks([1,2,3,4], ("Low", "Lower-Middle", "Upper-Middle", "High"))
    ax.set_yticks([], [])
    for i, col in enumerate(dataSet):
        pallette = pallets[j]
        ax.plot(dataSet.index, dataSet[col], 
                linestyle = "-.",
                color=pallette[i],
                label=col,
               marker=".")
        k=k+1
    ax.legend(loc='center left', bbox_to_anchor=(1.05, heights[j]), 
              fancybox=True, ncol=1, title=dataSources[j])
plt.title("Rate of Anemia vs Socioeconomic Status")
fig.set_size_inches(7, 5)
plt.savefig("anemiaRates.png", dpi=600, bbox_inches="tight")
In [59]:
filename = "Nutrition__Physical_Activity__and_Obesity_-_Behavioral_Risk_Factor_Surveillance_System.csv"
nutri = pd.read_csv(filename)
nutri.set_index("YearStart", inplace=True)
noveggis = 'Percent of adults who report consuming vegetables less than one time daily'
nofruit  = 'Percent of adults who report consuming fruit less than one time daily'
ax = sns.distplot(nutri[nutri.Question == noveggis]["Data_Value"].dropna(), label=noveggis)
sns.distplot(nutri[nutri.Question == nofruit]["Data_Value"].dropna(), ax=ax, label=nofruit)
plt.legend() 
plt.xlabel("Percent of Americans in 2017")
# Put a legend below current axis
ax.legend(loc='upper center', bbox_to_anchor=(0.55, 1.2), fancybox=True, ncol=1)
print("Why did I make this?")
print("Don't ask questions. Just enjoy plot.")
Why did I make this?
Don't ask questions. Just enjoy plot.