Replication of original calculations

In [1]:
import pandas as pd
import seaborn as sns
from scipy.stats import linregress as linearRegression
In [2]:
extStandard = pd.read_excel("134-data-raw.xlsx", index_col="X", sheet_name="134-external-standard-data-raw")
display(extStandard)
Sample Concentration (mg/mL)Peak AreaPredResiduals
X
0.250.007275152888.620.0074800.000205
0.500.014550488388.330.012338-0.002212
1.000.0291001850244.000.0320590.002959
2.000.0582003589760.130.057248-0.000952
In [3]:
slope, intercept, r_value, p_value, std_err = linearRegression(x=extStandard["Sample Concentration (mg/mL)"],
                                                               y=extStandard["Peak Area"])
display(f"slope: {slope}, intercept: {intercept}, "
        f"r_value: {r_value}, p_value: {p_value}, "
        f"std_err: std_err: {std_err}")
'slope: 69058028.4834902, intercept: -363669.069565217, r_value: 0.9952385535122216, p_value: 0.004761446487778432, std_err: std_err: 4782322.440242962'
In [4]:
p = sns.regplot(x="Sample Concentration (mg/mL)", y="Peak Area", data=extStandard)
p.set_title("External Standard Curve (R$^2$ = {})".format(round(r_value, 4)))
Out[4]:
Text(0.5, 1.0, 'External Standard Curve (R$^2$ = 0.9952)')
In [5]:
pills = pd.read_excel("134-data-raw.xlsx", index_col="Pill", sheet_name="134-pill-data-raw")
display(pills)
GCMS AreaConcentration (mg/mL)Powdered Pill in Sample (g)Initial Pill WeightEstimated Estradiol Valerate in Pill (mg)Confidence Interval
Pill
16761580.640.1031780.09770.14451.5269410.052061
26107657.140.0937090.10930.13951.1971720.044926
36153476.160.0943720.09510.13131.3090730.048599
46179034.760.0947420.10170.13601.2719560.047072
56975218.590.1062710.12070.13941.2286660.040654
67355210.590.1117740.12040.13551.2610720.039615
78286964.050.1252660.10690.12881.5101050.042411
810433601.480.1563510.12860.14281.7443090.039087
97987559.160.1209310.12420.14091.3733710.039933
107206660.820.1096230.12670.13611.1781360.037812
In [6]:
# let's re-estimate the ev in each pill to confirm the model was used correctly
pills["Estimated Estradiol Valerate in Pill (mg) (RECALCULATED)"] = (
    (
    (pills["GCMS Area"]-intercept)/slope
    )*(
    pills["Initial Pill Weight"]/pills["Powdered Pill in Sample (g)"]
    )
)
display(pills)
# There seems to be an off-by-10 issues that I can't explain a year after the fact.
# Additionally, in the original model I controlled for methanol added, which I didn't do here.
GCMS AreaConcentration (mg/mL)Powdered Pill in Sample (g)Initial Pill WeightEstimated Estradiol Valerate in Pill (mg)Confidence IntervalEstimated Estradiol Valerate in Pill (mg) (RECALCULATED)
Pill
16761580.640.1031780.09770.14451.5269410.0520610.152602
26107657.140.0937090.10930.13951.1971720.0449260.119601
36153476.160.0943720.09510.13131.3090730.0485990.130295
46179034.760.0947420.10170.13601.2719560.0470720.126695
56975218.590.1062710.12070.13941.2286660.0406540.122736
67355210.590.1117740.12040.13551.2610720.0396150.125792
78286964.050.1252660.10690.12881.5101050.0424110.150929
810433601.480.1563510.12860.14281.7443090.0390870.173615
97987559.160.1209310.12420.14091.3733710.0399330.137191
107206660.820.1096230.12670.13611.1781360.0378120.117756
In [7]:
pills["Estimated Estradiol Valerate in Pill (mg) (RECALCULATED)"] = pills["Estimated Estradiol Valerate in Pill (mg) (RECALCULATED)"]*10
display(pills)
GCMS AreaConcentration (mg/mL)Powdered Pill in Sample (g)Initial Pill WeightEstimated Estradiol Valerate in Pill (mg)Confidence IntervalEstimated Estradiol Valerate in Pill (mg) (RECALCULATED)
Pill
16761580.640.1031780.09770.14451.5269410.0520611.526016
26107657.140.0937090.10930.13951.1971720.0449261.196005
36153476.160.0943720.09510.13131.3090730.0485991.302949
46179034.760.0947420.10170.13601.2719560.0470721.266955
56975218.590.1062710.12070.13941.2286660.0406541.227359
67355210.590.1117740.12040.13551.2610720.0396151.257920
78286964.050.1252660.10690.12881.5101050.0424111.509287
810433601.480.1563510.12860.14281.7443090.0390871.736149
97987559.160.1209310.12420.14091.3733710.0399331.371910
107206660.820.1096230.12670.13611.1781360.0378121.177558
In [8]:
# Remaking this as a sanity check
sns.scatterplot(x=pills.index, y="Estimated Estradiol Valerate in Pill (mg) (RECALCULATED)", data=pills)
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd140379880>
In [9]:
uv = pd.read_excel("134-data-raw.xlsx", index_col="wavelength (nm)", sheet_name="134-uv-vis-data-raw")
display(uv)
0.5x1x2x4xPill 6
wavelength (nm)
2790.006535-0.00010.06370.06700.0778
2800.0067200.00330.06540.06900.0697
2810.0090400.01550.08670.09410.0580
2820.0096650.01590.09200.10130.0701
2830.0109200.01930.09860.11980.0696
..................
4160.000130-0.00070.0037-0.00110.1019
4170.000135-0.00090.0037-0.00100.1027
4180.000170-0.00040.0041-0.00070.0964
4190.000115-0.00080.0035-0.00120.0941
4200.000085-0.00110.0033-0.00160.0934

142 rows × 5 columns

In [10]:
uv[["0.5x", "1x", "2x", "4x"]].loc[279:300].plot()
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd14027c520>
In [11]:
x = list(uv[["0.5x", "1x", "2x", "4x"]].loc[289])
y = [0.5, 1, 2, 4]
slope, intercept, r_value, p_value, std_err = linearRegression(x, y)
display(f"slope: {slope}, intercept: {intercept}, "
        f"r_value: {r_value}, p_value: {p_value}, "
        f"std_err: std_err: {std_err}")
display("R2 = {}".format(round(r_value,4)))
'slope: 7.6020581954683095, intercept: 0.09167117821606663, r_value: 0.9965645063107781, p_value: 0.0034354936892219134, std_err: std_err: 0.44673196380136687'
'R2 = 0.9966'