18/04/03 00:15:01.95 wYZjtgN6.net
FPRPP.F <- function(r1,r2,n1,n2, prior=0.5){
p.value=fisher.test(matrix(c(r1,n1-r1,r2,n2-r2),2))$p.value
p1=r1/n1
p2=r2/n2
power=statmod::power.fisher.test(p1,p2,n1,n2,alpha=p.value)
FP=(1-prior)*p.value
TP=prior*power
c(p.value=p.value,'false positive rate' = FP/(TP+FP))
}
r1=10;n1=19;r2=11;n2=12
FPRPP.F(r1,r2,n1,n2,0.5)
433:卵の名無しさん
18/04/03 14:26:09.59 wYZjtgN6.net
標準偏差の信頼区間
f247 <- function(n,conf.level=0.95){
df=n-1
lwr=qchisq((1-conf.level)/2,df)
upr=qchisq((1-conf.level)/2,df,lower.tail=FALSE)
c('Lower limit'=sqrt((n-1)/upr),'Upper limit'=sqrt((n-1)/lwr))
}
sd.ci=sapply(c(2,3,4,5,10,25,50,100,500,1000),f247)
colnames(sd.ci)=paste('n =',c(2,3,4,5,10,25,50,100,500,1000))
t(round(sd.ci,2))
434:卵の名無しさん
18/04/04 22:30:14.42 rn9pqwFF.net
URLリンク(www.rdocumentation.org)
π=P(H0=TRUE)=P(association)は
π=P(HA=TRUE)=P(association)の間違いじゃないだろうか?
435:卵の名無しさん
18/04/05 14:14:03.67 xAGVH6hI.net
N=100
xy=rnorm(N)
index=1:N
f254 <- function(r1=5,K=75,PRINT=TRUE){
x0=sample(index,r1) ; y0=sample(index,r1)
PV=numeric()
PV[1]=t.test(xy[x0] , xy[y0])$p.value
x.index=x0 ; y.index=y0
for(i in 2:(K-r1+1)){
add.x=sample(index[-x.index],1)
x.index=append(x.index,add.x)
add.y=sample(index[-y.index],1)
y.index=append(y.index,add.y)
PV[i]=t.test(xy[x.index],xy[y.index])$p.value
i=i+1
}
if(PRINT){
plot(r1:K,PV,type='l',lwd=2, log='y', ylim=c(0.001,1),
xlab='n',ylab='p-value')
abline(h=0.05,lty=3)
}
FPR=mean(PV<0.05)
return(FPR)
}
x11() ; par(mfrow=c(1,1)) ;f254()
x11() ;par(mfrow=c(3,3)) ; replicate(9,f254())
436:卵の名無しさん
18/04/05 14:43:46.32 xAGVH6hI.net
The simulated data were sampled from populations with Gaussian distribution and identical means and standard deviations, An unpaired t test was computed with n=5 in each group,
and the resulting P value is plotted at the left of the graph.Then the t test was repeated with one more value added to each group.
Then one more value was added to each group(n=7),and the P value was computed again. This was continued up to n=75.
こういうのを読むと自分でシミュレーションせずにはいられなくなる。
URLリンク(i.imgur.com)
Rのコードはこれ。
スレリンク(hosp板:410番)
437:卵の名無しさん
18/04/06 19:26:15.51 I1oriqts.net
Appendix Table 1. Joint probability of significance of test and truth of hypothesis
Truth of alternative hypothesis
Significance of test
Significant Not significant Total
True association (1 ) [True positive] [False negative]
No association -
(1 ) [False positive] (1 -
) (1 ) [True negative] 1
Total (1 )-
(1 ) (1 -
) (1 ) 1
440 COMMENTARY Journal of the National Cancer Institute, Vol. 96, No. 6, March 17, 2004
438:卵の名無しさん
18/04/07 13:38:05.89 4kW+Nj56.net
f<- function(d=1,beta=.80,alpha=.05) 2*(abs((qnorm(1-alpha/2))+abs(qnorm(1-beta)))/d)^2
f(5/10)
power.t.test(delta=0.5,power=0.80)$n
# power 50%->80%
dd=seq(0.01,0.99,by=0.01)
g<- function(x) f(x,.50)/f(x,.80)
plot(dd,sapply(dd,g))
(2*(abs(1.96+abs(qnorm(1-0.80))))^2)/(2*(abs(1.96+abs(qnorm(1-0.50))))^2)
(1.96+0.84)^2/(1.96+0)^2
dd=seq(0.01,0.99,by=0.01)
h<-function (x,pow1=0.80,pow2=0.50){
power.t.test(d=x, power=pow1)$n/power.t.test(d=x, power=pow2)$n
}
res=sapply (dd,h)
plot(dd,res)
summary (res)
439:卵の名無しさん
18/04/07 13:41:51.63 4kW+Nj56.net
N=10^5
D=rbeta(N,2,1)
K=rbeta(N,1,2)
par (mfrow=c(2,2))
hist(D)
hist(D-K)
hist (K)
hist(log(D/K))
quantile (D-K, probs=c(0.025,0.5,0.975))
quantile (D/K, probs=c(0.025,0.5,0.975))
summary (D-K)
summary (D/K)
exp(mean (log(D/K)))
median (D/K)
BEST::plotPost (D-K)
BEST::plotPost (D/K)
BEST::plotPost(log(D/K))
440:卵の名無しさん
18/04/07 13:46:36.71 4kW+Nj56.net
N=10^5
D=rbeta(N,2,1)
K=rbeta(N,1,2)
par (mfrow=c(2,2))
hist(D)
hist(D-K)
hist (K)
hist(log(D/K))
quantile (D-K, probs=c(0.025,0.5,0.975))
quantile (D/K, probs=c(0.025,0.5,0.975))
summary (D-K)
summary (D/K)
exp(mean (log(D/K)))
median (D/K)
NNT=1/(D-K)
BEST::plotPost (D-K)
BEST:: plotPost (NNT)
BEST::plotPost (D/K)
BEST::plotPost(log(D/K))
441:卵の名無しさん
18/04/07 14:23:06.29 4kW+Nj56.net
f3.6 <- function(delta,n){
pnorm(-qnorm(.975)-sqrt(n)*delta)+pnorm(qnorm(.975)-sqrt(n)*delta,lower=FALSE)
}
f3.6(0.6,9)
curve (f3.6(x,25),-2,2,xlab=quote(Delta), ylab='power')
curve (f3.6(x,16),add=TRUE)
curve (f3.6(x, 9),add=TRUE)
442:卵の名無しさん
18/04/08 11:09:58.60 vtyfqsbT.net
# HDI for chisq
curve(dchisq(x,9),0,30)
conf.level=0.95
n=10
f <- function(x,df=n-1,conf.level=0.95){
d <- function(d) pchisq(x+d,df) - pchisq(x,df) - conf.level
uniroot(d,c(0,df*3))$root
}
qchisq(1-conf.level,n-1)
low=seq(0,floor(qchisq(1-conf.level,n-1)),by=0.01)
plot(low,sapply(low,f),type='l',lwd=2)
opt=optimise(f,c(0,floor(qchisq(1-conf.level,n-1)))) ; opt
opt[[1]] ; opt[[1]]+opt[[2]]
pchisq(opt[[1]]+opt[[2]],n-1) - pchisq(opt[[1]],n-1)
qchisq(0.025,n-1) ; qchisq(0.975,n-1)
pchisq(qchisq(0.975,n-1),n-1)-pchisq(qchisq(0.025,n-1),n-1)
443:卵の名無しさん
18/04/08 13:30:22.84 vtyfqsbT.net
# 不偏分散u2が重要なのは(ランダムサンプリングでは)
# 不偏分散の期待値が母分散と一致するからです。
# 定理:E[u2]=σ2
N=1000
n=10
X=rpois(N3)
hist(X)
var(X)
k=10^4
mean(replicate(k,var(sample(X,n))))
VAR <- function(x){
n.x=length(x)
var(x)*(n.x-1)/n.x
}
mean(replicate(k,VAR(sample(X,n))))
444:卵の名無しさん
18/04/09 08:33:45.55 v1O7hrCx.net
# 6割以上正解 0.522
sum(dbinom(240:400,400,0.6))
binom.test(240,400,0.6, alt='greater')$p.value
# 6割未満正解 0.478
sum(dbinom(0:239,400,0.6))
binom.test(239,400,0.6,alt='less')$p.value
n=5
# 禁忌枝3個以上選択0.317
binom.test(3,n,1-0.6,alt='g')
sum(dbinom(3:n,n,1-0.6))
0.522*(1-0.317)
# 一般問題を1問1点、臨床実地問題を1問3点とし、
#(1)?(3)のすべての合格基準を満たした者を合格とする。
#(1)必修問題 160点以上/200点
#(2)一般問題及び臨床実地問題 208点以上/299点
#(3)禁忌肢問題選択数 3問以下
p=0.6
binom.test(160,200,p,alt='greater')$p.value
binom.test(200,290,p,alt='greater')$p.value
n=5
445:卵の名無しさん
18/04/09 11:00:00.53 8EAvIhhV.net
現状の国試の合格基準は
一般問題を1問1点、臨床実地問題を1問3点とし、
(1)~(3)のすべての合格基準を満たした者を合格とする。
(1)必修問題 160点以上/200点
(2)一般問題及び臨床実地問題 208点以上/299点
(3)禁忌肢問題選択数 3問以下
来年から400問に減るらしい。
合格基準の正解率は同じ、即ち、(160+208)/499*400=295以上正解を要し、かつ、禁忌肢選択も3問以下とする。
正解率80%の受験生集団の合格率が50%であった�
446:ニき 禁忌肢問題は何問出題されたか答えよ。 f<-function (n,p){ (a=binom.test(295,400,p,alt='greater')$p.value) (b=binom.test(n-3,n,p,alt='greater')$p.value) a*b } nn=3:40 p=0.80 plot (nn,sapply(nn, function (n)f(n,p)))
447:卵の名無しさん
18/04/09 20:23:30.66 wJm8c5Cc.net
# F ~ F(Φ,∞) => Φ*F ~ χ2(Φ1)
fp.100 <- function(){
N=10^4
phi=sample(1:100,1)
rF=rf(N,phi,Inf)
hist(rF,breaks=25,freq=FALSE,col=sample(colours(),1),main=paste('Φ1 = ',phi1,', Φ2 = Inf'),
xlab=' F ')
curve(df(x,phi,Inf),add=TRUE)
hist(phi*rF,breaks=25,freq=FALSE,col=sample(colours(),1),main=paste('Φ1 = ',phi1,', Φ2 = Inf'),
xlab=' Φ*F ')
curve(dchisq(x,phi),add=TRUE,lwd=2)
}
x11() ; par(mfrow=c(3,3)) ; for(i in 1:9) fp.100()
448:卵の名無しさん
18/04/10 08:46:18.28 sK+H3q9V.net
XOF <- function (shape, scale,n=375,med=53.5,lwr=48.0,upr=58.5){
f <- function (v){
sh=v[1]
sc=v[2]
xof=rgamma(n,sh,sc) # sh: shape, sc: scale
(median(xof)-med)^2+ (qgamma(0.025, sh,sc) - lwr)^2 + (qgamma(0.975,sh,sc) - upr)^2
}
opt=optim (c(shape,scale), f, method='N')$par
print (opt)
curve(dgamma(x,opt [1],opt[2]),0,100)
sim=rgamma (n,opt [1],opt[2])
print(quantile (sim, probs=c(0.025,0.5,0.975)))
invisible (opt)
}
XOF(400,7)
449:卵の名無しさん
18/04/10 10:02:47.35 sK+H3q9V.net
XOF <- function (shape, scale,n=375,med=53.5,lwr=48.0,upr=58.5){
f <- function (v){
sh=v[1]
sc=v[2]
N=10^4
xof=rgamma(N,sh,sc) # sh: shape, sc: scale
(median(xof)-med)^2+ (qgamma(0.025, sh,sc) - lwr)^2 + (qgamma(0.975,sh,sc) - upr)^2
}
opt=optim (c(shape,scale), f, method='N')$par
print (opt)
curve(dgamma(x,opt [1],opt[2]),0,100)
sim=rgamma (n,opt [1],opt[2])
print(quantile (sim, probs=c(0.025,0.5,0.975)))
invisible (opt)
}
xof=XOF(400,7,375,53.5,48.0,58.5)
tam=XOF(400,7,377,53.8,50.2,56.4)
X=rgamma (375,xof[1],xof[2])
T=rgamma (377,tam[1],tam[2])
wilcox.test (X,T)
t.test(X,T)
bss=10^3
f1 <- function (){
XO=sample (X,bss, replace=TRUE)
TA=sample (T,bss, replace=TRUE)
mean(XO-TA)
}
dif=replicate (10^3,f1())
quantile (dif,probs=c(.024,.5,.975))
URLリンク(imagizer.imageshack.com)
450:卵の名無しさん
18/04/10 12:35:44.49 sK+H3q9V.net
XOF <- function (shape, scale,n=375,med=53.5,lwr=48.0,upr=58.5){
f <- function (v){
sh=v[1]
sc=v[2]
N=10^4
xof=rgamma(N,sh,sc) # sh: shape, sc: scale
(median(xof)-med)^2+ (qgamma(0.025, sh,sc) - lwr)^2 + (qgamma(0.975,sh,sc) - upr)^2
}
opt=optim (c(shape,scale), f, method='L-BFGS-B')
opt_par=opt$par
print (opt_par)
if(opt$convergence!=0) print(opt)
x11()
curve(dgamma(x,opt_par[1],opt_par[2]),40,70,xlab='time (h)',ylab='',bty='l')
sim=rgamma (n,opt_par[1],opt_par[2])
print(quantile(sim, probs=c(.025,.5,.975)),digits=3)
invisible (opt_par)
}
xof=XOF(400,7,375,53.5,48.0,58.5) ; tam=XOF(1000,20,377,53.8,50.2,56.4)
X=rgamma (375,xof[1],xof[2]) ; T=rgamma (377,tam[1],tam[2])
wilcox.test (X,T)
t.test(X,T)
bss=10^3
f1 <- function (){
XO=sample (X,bss, replace=TRUE)
TA=sample (T,bss, replace=TRUE)
mean(XO-TA)
}
dif=replicate (10^3,f1())
print(quantile (dif,probs=c(.024,.5,.975)),digits=3)
451:卵の名無しさん
18/04/10 15:18:27.99 sK+H3q9V.net
xo=data.frame(n=375,med=53.5,l=48.0,u=58.5)
ta=data.frame(n=377,med=53.8,l=50.2,u=56.4)
(xo$l+xo$u)/2 ; (ta$l+ta$u)/2
sqrt(xo$l*xo$u) ; sqrt(ta$l*ta$u)
ci2sd.t <- function(n,L,U){
(U-L)/2/qt(.975,n-1)*sqrt(n)
}
ci2sd.n <- function(n,L,U){
(U-L)/2/qnorm(.975)*sqrt(n)
}
(sd.xo=ci2sd.n(xo$n,xo$l,xo$u)) ; ci2sd.t(xo$n,xo$l,xo$u)
(sd.ta=ci2sd.n(ta$n,ta$l,ta$u)) ; ci2sd.t(ta$n,ta$l,ta$u)
s1=sd.xo ; s2=sd.ta
n1=xo$n ; n2=ta$n
sd=sqrt(((n1-1)*s1^2 + (n2-1)*s2)/(n1 + n2 -2))
sd
# 両側検定
f7.11n <- function(n,delta,alpha=0.05){ # n1 = n2
df=n+n-2
ncp=delta/sqrt(1/n + 1/n) # ⊿= (μ1-μ0)/σ
pt(qt(alpha/2,df),df,ncp)+
pt(-qt(alpha/2,df),df,ncp,lower=FALSE)
}
f7.17n <- function(power,delta,alpha=0.05){
uniroot(function(n)f7.11n(n,delta,alpha)-power,c(3,10^6))$root
}
f7.17n(0.8,0.3/sd)
452:卵の名無しさん
18/04/10 15:27:03.10 sK+H3q9V.net
# 平均値信頼区間から必要なサンプルサイズを計算
xo=data.frame(n=375,med=53.5,l=48.0,u=58.5)
ta=data.frame(n=377,med=53.8,l=50.2,u=56.4)
(xo$l+xo$u)/2 ; (ta$l+ta$u)/2
sqrt(xo$l*xo$u) ; sqrt(ta$l*ta$u)
ci2sd.t <- function(n,L,U){
(U-L)/2/qt(.975,n-1)*sqrt(n)
}
ci2sd.n <- function(n,L,U){
(U-L)/2/qnorm(.975)*sqrt(n)
}
(sd.xo=ci2sd.t(xo$n,xo$l,xo$u)) ; ci2sd.n(xo$n,xo$l,xo$u)
(sd.ta=ci2sd.t(ta$n,ta$l,ta$u)) ; ci2sd.n(ta$n,ta$l,ta$u)
s1=sd.xo ; s2=sd.ta
n1=xo$n ; n2=ta$n
sd=sqrt(((n1-1)*s1^2 + (n2-1)*s2)/(n1 + n2 -2))
sd
453:卵の名無しさん
18/04/10 15:27:18.84 sK+H3q9V.net
# 両側検定
f7.11n <- function(n,delta,alpha=0.05){ # n1 = n2
df=n+n-2
ncp=delta/sqrt(1/n + 1/n) # ⊿= (μ1-μ0)/σ
pt(qt(alpha/2,df),df,ncp)+
pt(-qt(alpha/2,df),df,ncp,lower=FALSE)
}
f7.17n <- function(power,delta,alpha=0.05){
uniroot(function(n)f7.11n(n,delta,alpha)-power,c(3,10^6))$root
}
f7.17n(0.8,0.3/sd)
# 片側検定 H1: mean(xo) < mean(ta)
f7.16n <- function(n,delta,alpha=0.05){ # n1=n2
df=n+n-2
ncp=delta/sqrt(1/n+1/n) # ⊿= (μ1-μ0)/σ
pt(qt(alpha,df),df,ncp)
}
f7.19n <- function(power,delta,alpha=0.05){
uniroot(function(n)f7.16n(n,delta,alpha)-power,c(3,10^6))$root
}
f7.19n(0.80,-0.3/sd)
454:卵の名無しさん
18/04/10 20:33:17.78 sK+H3q9V.net
xo=c(455,med=53.7,49.5,58.5)
pl=c(230,med=80.2,72.6,87.1)
n1=xo[1] ;n2=pl[1]
ci2sd.t <- function(n,L,U){
(U-L)/2/qt(.975,n-1)*sqrt(n)
}
ci2sd.n <- function(n,L,U){
(U-L)/2/qnorm(.975)*sqrt(n)
}
pooled.sd <- function(sd1,sd2,n1,n2){
sqrt((sd1^2*(n1-1)+
455:sd2^2*(n2-1))/(n1+n2-2)) } sd1=ci2sd.t(n1,xo[3],xo[4]) ; sd1 sd2=ci2sd.t(n2,pl[3],pl[4]) ; sd2 sd.p=pooled.sd(sd1,sd2,n1,n2) ; sd.p pwr::pwr.t2n.test(n1,n2,d=-26.5/sd.p,alt='less') pwr::pwr.t2n.test(n1=NULL,n2=n2,d=-26.5/sd.p,power=0.80,alt='less') # t検定(生データなし,等分散不問,両側検定) Welch.test=function(n1,n2,m1,m2,sd1,sd2){ T=(m1-m2)/sqrt(sd1^2/n1+sd2^2/n2) df=(sd1^2/n1+sd2^2/n2)^2 / (sd1^4/n1^2/(n1-1)+sd2^4/n2^2/(n2-1)) p.value=2*pt(abs(T),df,lower.tail = FALSE) return(p.value) } m1=(xo[3]+xo[4])/2 ; m2=(pl[3]+pl[4])/2 Welch.test(n1,n2,m1,m2,sd1,sd2) T=(m1-m2)/sqrt(sd1^2/n1+sd2^2/n2) df=(sd1^2/n1+sd2^2/n2)^2 / (sd1^4/n1^2/(n1-1)+sd2^4/n2^2/(n2-1)) pt(T,df)
456:卵の名無しさん
18/04/11 11:22:50.41 KzjAFvro.net
# False Positive Report Probability for Proportion
FPRPP <- function(r1,r2,n1,n2, prior=0.5){
p.value=prop.test(c(r1,r2),c(n1,n2))$p.value
p1=r1/n1
p2=r2/n2
h=pwr::ES.h(p1,p2)
power=pwr::pwr.2p2n.test(h=h,n1=n1,n2=n2,sig.level=p.value)$power
FP=(1-prior)*p.value # p-less-than case
TP=prior*power
c(p.value=p.value,'false positive rate' = FP/(TP+FP))
}
457:卵の名無しさん
18/04/11 11:25:58.50 KzjAFvro.net
FPRPP.F <- function(r1,r2,n1,n2, prior=0.5){
p.value=fisher.test(matrix(c(r1,n1-r1,r2,n2-r2),2))$p.value
p1=r1/n1
p2=r2/n2
power=statmod::power.fisher.test(p1,p2,n1,n2,alpha=p.value)
FP=(1-prior)*p.value
TP=prior*power
c(p.value=p.value,'false positive rate' = FP/(TP+FP))
}
r1=18;n1=18;r2=6;n2=18
FPRPP.F(r1,r2,n1,n2,0.5)
p.value false positive rate
2.966259e-05 8.016273e-05
458:卵の名無しさん
18/04/11 21:24:11.18 gNeb3efk.net
# 非心カイ二乗分布
f9.22 <- function(k){
mui=runif(k)
X2.dash=0
f <- function(){
for(i in 1:k){
X2.dash=X2.dash+rnorm(1,mui[i])^2
}
return(X2.dash)
}
chisq.dash=replicate(10^3,f())
hist(chisq.dash,freq=FALSE,col=sample(colours(),1),xlab=quote(chi),main='')
ncp=sum(mui^2)
curve(dchisq(x,k,ncp),add=TRUE,lwd=2)
}
graphics.off()
x11() ; par(mfrow=c(3,3)) ; for(i in 1:9) f9.22(sample(3:10,1))
459:卵の名無しさん
18/04/12 00:50:35.65 GLkdI2IH.net
# S/σ2 ~ χ'(n-1,λ)
fp.142c <- function(n){
mui=rnorm(n)
sigma=runif(1)
mu=mean(mui)
xi=numeric(n)
f <- function(){
for(i in 1:n) xi[i]=rnorm(1,mui[i],sigma)
S=var(xi)*(n-1)
S/sigma^2
}
chisq.dash=replicate(10^3,f())
hist(chisq.dash,freq=FALSE,col=sample(colours(),1),xlab=quote(chi),main='')
ncp=sum((mui-mu)^2/sigma^2)
curve(dchisq(x,n,ncp),add=TRUE,lwd=2)
}
graphics.off()
fp.142c(7)
x11() ; par(mfrow=c(3,3)) ; for(i in 1:9) fp.142c(sample(3:10,1))
graphics.off()
460:卵の名無しさん
18/04/12 10:37:13.22 ZOlQ3FWn.net
URLリンク(imagizer.imageshack.com)
461:卵の名無しさん
18/04/12 18:48:37.23 GLkdI2IH.net
clt <- function(FUN,n=1000,k=10^4){
graphics.off()
x11()
par(mfrow=c(2,1))
hist(FUN(n),col='skyblue',freq=FALSE)
dat=replicate(k,FUN(n))
hist(apply(dat,1,sum),col=sample(colours(),1),freq=FALSE)
}
clt(FUN=function(x) rbeta(x,0.5,0.5))
clt(FUN=function(x) rexp(x))
clt(FUN=function(x) rgamma(x,1,1))
462:卵の名無しさん
18/04/15 01:25:53.84 2+v6Urh8.net
White=c(2.0477, 247, 1.8889)
Black=c(6.3772, 29, 1.4262)
Other=c(12.0832, 37, 3.7964)
# Totalc(3.6351 313 3.9770)
dat=rbind(White,Black,Other)
colnames(dat)=c("Mean","N","SD")
mean.G=sum(dat[,"Mean"]*dat[,"N"])/sum(dat[,"N"])
SS.bit=sum((dat[,"Mean"]-mean.G)^2*dat[,"N"])
SS.wit=sum(dat[,"SD"]^2*(dat[,"N"]-1))
df.bit=nrow(dat)-1
df.wit=sum(dat[,"N"]-1)
MS.bit=SS.bit/df.bit
MS.wit=SS.wit/df.wit
F.ratio=MS.bit/MS.wit
pf(F.ratio,df.bit,df.wit,lower.tail=FALSE)
(η2=(SS.bit)/(SS.bit+SS.wit))
Mean=dat[,'Mean'] ; N=dat[,'N'] ; SD=dat[,'SD']
sqrt(sum((N-1)*SD^2 + N*(Mean-sum(Mean*N)/sum(N))^2)/(sum(N)-1))
463:卵の名無しさん
18/04/15 10:56:43.94 2+v6Urh8.net
total.Mean <- function(Mean,N) sum(N*Mean)/sum(N)
total.SD <- function(Mean,N,SD) sqrt(sum((N-1)*SD^2 + N*(Mean-sum(Mean*N)/sum(N))^2)/(sum(N)-1))
(uniroot(function(x) total.Mean(c(60,65,x),c(20,5,95)) - 45,c(0,100))$root)
total.Mean(c(60,65,40.79),c(20,5,95))
(uniroot(function(x) total.SD(c(60,65,40.79),c(20,5,95),c(3,2,x)) - 10 , c(0,100))$root)
total.SD(c(60,65,40.79),c(20,5,95),c(3,2,6.13))
辞退者=c(60,20,3)
特待生=c(65, 5, 2)
裏口バカ=c(40.79, 95, 6.13)
# Totalc(45 120 10)
dat=rbind(辞退者,特待生,裏口バカ)
colnames(dat)=c("Mean","N","SD")
mean.G=sum(dat[,"Mean"]*dat[,"N"])/sum(dat[,"N"])
SS.bit=sum((dat[,"Mean"]-mean.G)^2*dat[,"N"])
SS.wit=sum(dat[,"SD"]^2*(dat[,"N"]-1))
df.bit=nrow(dat)-1
df.wit=sum(dat[,"N"]-1)
MS.bit=SS.bit/df.bit
MS.wit=SS.wit/df.wit
(F.ratio=MS.bit/MS.wit)
pf(F.ratio,df.bit,df.wit,lower.tail=FALSE)
(η2=(SS.bit)/(SS.bit+SS.wit)) # eta^2
> pf(F.ratio,df.bit,df.wit,lower.tail=FALSE)
[1] 2.789672e-30
> (η2=(SS.bit)/(SS.bit+SS.wit)) # eta^2
[1] 0.687539
>
464:卵の名無しさん
18/04/15 11:13:24.82 2+v6Urh8.net
辞退=scale(rnorm(20))*3+60
特待=scale(rnorm(5))*2+65
裏口=scale(rnorm(95))*6.1+40.79
score=c(辞退,特待,裏口)
group=c(rep('辞退',20),rep('特待',5),rep('裏口',95))
boxplot(score~group)
data=data.frame(score,group)
res=aov(score~group)
re=summary(res) ; re
str(re)
re2=re[[1]] ; re2
F.ratio=(re2$`Sum Sq`[1]/re2$`Df`[1])/(re2$`Sum Sq`[2]/re2$`Df`[2]) ; F.ratio
pf(F.ratio,re2$Df[1],re2$Df[2],lower.tail = FALSE)
465:卵の名無しさん
18/04/15 11:58:07.84 2+v6Urh8.net
##
Jonckheere<-function(L){
f<-function(A,B){
a<-length(A)
b<-length(B)
det<-0
for(i in 1:a){
for(j in 1:b){
det<- det+ifelse(A[i]==B[j],0.5,A[i]>B[j])
}
}
return(det)
}
g<-function(L){ #L=list(A1,,,,Aa),A1 > A2 > A3,..,> Aa : vector
a<-length(L)
comb<-combn(1:a,2)
con<-ncol(comb)
J=0
for(i in 1:con){
J<-J+f(L[[comb[1,i]]],L[[comb[2,i]]])
}
return(J)
}
466:卵の名無しさん
18/04/15 11:58:18.63 2+v6Urh8.net
J<-g(L)
a<-length(L)
n=double(a)
for(i in 1:a){
n[i]<-length(L[[i]])
}
N<-sum(n)
EJ <- (N^2-sum(n^2))/4
VJ <- (N^2*(2*N+3)-sum(n^2*(2*n+3)))/72
Z <- abs(J-EJ)/sqrt(VJ)
p.value<-pnorm(Z,lower.tail=FALSE)
return(p.value)
}
467:卵の名無しさん
18/04/15 16:37:48.33 2+v6Urh8.net
特待=scale(rnorm(5))*2+65
辞退=scale(rnorm(20))*3+60
裏口=scale(rnorm(95))*6.1+40.79
score=c(特待,辞退,裏口)
group=as.factor(c(rep(1,5),rep(2,20),rep(3,95)))
levels(group) <- c('特待','辞退','裏口')
boxplot(score~group)
data=data.frame(score,group)
res=aov(score~group)
re=summary(res) ; re
str(re)
re2=re[[1]] ; re2
F.ratio=(re2$`Sum Sq`[1]/re2$`Df`[1])/(re2$`Sum Sq`[2]/re2$`Df`[2]) ; F.ratio
pf(F.ratio,re2$Df[1],re2$Df[2],lower.tail = FALSE)
oneway.test(score~group)
kruskal.test(score~group,data=data)
Tk=TukeyHSD(aov(score~group,data=data)) ; Tk[[1]]
pairwise.t.test(score,group)
pairwise.t.test(score,group,p.adj='none')
pairwise.t.test(score,group,p.adj='bon')
pairwise.t.test(score,group,p.adj='bon',pool.sd = FALSE)
pairwise.wilcox.test(score,group)
#
library(PMCMR)
jonckheere.test(data$score,data$group,alternative = 'dec')
468:卵の名無しさん
18/04/16 08:27:01.84 9hc3WnzC.net
FPRP = Pr(H0|y)
= BF*PO/(BF*PO+1) BF = Pr(y|H0)/Pr(y | H1) : Bayes factor PO = π0/(1-π0) the prior odds of no association(true null hypothesis)
469:卵の名無しさん
18/04/16 11:44:56.69 j1Crkpch.net
αエラー、βエラーでのコストをCα,βCとすると
Pr(H0|y) < (Cβ/Cα)/(1+(Cβ/Cα))
A Bayesian Measure of the Probability of False Discovery
in Genetic Epidemiology Studies
470:卵の名無しさん
18/04/16 11:45:23.85 j1Crkpch.net
αエラー、βエラーでのコストをCα,Cβとすると
Pr(H0|y) < (Cβ/Cα)/(1+(Cβ/Cα))
が成立するとのこと。
A Bayesian Measure of the Probability of False Discovery
in Genetic Epidemiology Studies
471:卵の名無しさん
18/04/18 20:27:56.69 OaKjeXLn.net
f.nxP0<-function(x,n,P0,Ignore=FALSE){
P.hat=x/n
if((!Ignore)&(n*P.hat<5 | n*(1-P.hat)<5 | n*P0<5 | n*(1-P0)<5)){
warning('unfit for asymptotics')
return(NA)
}
u0= (P.hat-P0)/(sqrt(P0*(1-P0)/n))
p.value=pnorm(-abs(u0))
data.frame(u0,p.value)
}
> f.nxP0(x=3,n=100,P0=0.10)
[1] NA
Warning message:
In f.nxP0(x = 3, n = 100, P0 = 0.1) : unfit for asymptotics
> f.nxP0(3,100,0.10,Ignore=TRUE)
u0 p.value
1 -2.333333 0.009815329
472:卵の名無しさん
18/04/21 17:35:11.37 Hyq4QfSG.net
fisher.testと超幾何分布
x = 2 # of white balls drawn (<=m)
m = 6 # of white balls in urn
n = 4 # of black balls in urn
k = 5 # drawn
sum(dhyper(0:k,m,n,k)[dhyper(0:k,m,n,k)<=dhyper(x,m,n,k)])
fisher.test(matrix(c(x,k-x,m-x,n-k+x),ncol=2,byrow=TRUE))$p.value
473:卵の名無しさん
18/04/21 20:09:41.70 Hyq4QfSG.net
# URLリンク(oku.edu.mie-u.ac.jp)
?dhyper
x=2
wd=x # white drawn : x < td (= k)
wu=6 # white in urn : m
bu=4 #
474:blac in urn : n td=5 # total drawn : k wd2p <- function(wd) dhyper(wd,wu,bu,td) #white drawn to probability (probs=sapply(0:td,wd2p)) plot(0:td,probs,type='h') sum(probs[probs<=wd2p(x)]) fisher.test(matrix(c(wd,td-wd,wu-wd,bu-(td-wd)),ncol=2,byrow=TRUE))$p.value td=4 (probs=sapply(0:td,wd2p)) plot(0:td,probs,type='h') wd=1 sum(probs[probs<=wd2p(wd)]) fisher.test(matrix(c(wd,td-wd,wu-wd,bu-(td-wd)),ncol=2,byrow=TRUE))$p.value
475:卵の名無しさん
18/04/21 20:10:40.58 Hyq4QfSG.net
ex1 = matrix(c(6,12,12,5), nrow=2)
fisher.test(ex1)
exact2x2::fisher.exact(ex1)
476:卵の名無しさん
18/04/21 21:23:35.57 Hyq4QfSG.net
Event no Event
exposure a b
no exposure c d
OR=(a/c)/(b/d) = ad/bc ; ad=bc*OR
RR=a/(a+b) ÷ c/(c+d)=a(c+d)÷c(a+b)=(ac+ad)/(ac+bc)
=(ac+bc*OR)/(ac+bc)
=(b*OR+a)/(a+b)
=OR*b/(a+b)+a/(a+b)
=OR*( 1-a/(a+b) ) + a/(a+b)
=OR*(1-P1) + P1 # P1:暴露群でのイベント発生率=a/(a+b)
477:卵の名無しさん
18/04/21 21:25:20.08 Hyq4QfSG.net
Event no Event
exposure a b
no exposure c d
OR=(a/c)/(b/d) = ad/bc ; bc=ad/OR
RR=a/(a+b) ÷ c/(c+d)=a(c+d)÷c(a+b)=(ac+ad)/(ac+bc)
=(ac+ad)/(ac+ad/OR)
=(c+d) /(c + d/OR) # (c+d) で割る
=1 / {c/(c+d) + d/[(c+d)/OR]}
=OR/{OR*c/(c+d) + d/(c+d)} # ORを掛ける
=OR/{OR*c/(c+d) + (c+d)/(c+d)-c/(c+d)}
=OR/(OR*P0 + 1-P0) # P0:非暴露群でのイベント発生率=c/(c+d)
####
RR=OR*(1-P1) + P1
OR=(RR-P1)/(1-P1) # P1:暴露群でのイベント発生率=a/(a+b)
RR=OR/(OR*P0 + 1-P0)
OR=(1-P0)*RR/(1-P0*RR) # P0:非暴露群でのイベント発生率=c/(c+d)
####
OR2RRp0=function(OR,p0) OR/(OR*p0+1-p0) #p0:非暴露群でのイベント発生率
OR2RRp1=function(OR,p1) OR*(1-p1)+p1 #P1:暴露群でのイベント発生率
478:卵の名無しさん
18/04/21 21:32:57.14 Hyq4QfSG.net
##
RR=OR*(1-P1) + P1
OR=(RR-P1)/(1-P1) # P1:暴露群でのイベント発生率=a/(a+b)
P1=(OR-RR)/(OR-1)
RR=OR/(OR*P0 + 1-P0)
OR=(1-P0)*RR/(1-P0*RR) # P0:非暴露群でのイベント発生率=c/(c+d)
P0=(OR-RR)/(OR-1)/RR
RR2ORp1 <- function(RR,P1) (RR-P1)/(1-P1) # P1:暴露群でのイベント発生率=a/(a+b)
RR2ORp0 <- function(RR,P0) (1-P0)*RR/(1-P0*RR) # P0:非暴露群でのイベント発生率=c/(c+d)
R2p1 <- function(RR,OR) (OR-RR)/(OR-1)
R2p0 <- function(RR,OR) (OR-RR)/(OR-1)/RR
479:卵の名無しさん
18/04/22 08:01:09.15 TOqm84tJ.net
URLリンク(swada.net)
480:卵の名無しさん
18/04/22 23:30:16.86 TOqm84tJ.net
# 女子大生が全部で100人いるとする。
# 身長は170cmか,150cmのどちらかである。
# 身長170cmの人の胸囲は90cm, 身長150cmの人の胸囲は75cm.
# そして,胸囲90cmの人は確率0.9でクラス1へ,確率0.1でクラス2に振り分けられる.
# 一方で,胸囲75cmの人は確率0.8でクラス2へ,確率0.2でクラス1に振り分けられる.
# そして,胸囲が90cmと75cmの人はそれぞれ30,70人である。
# クラス1とクラス2の胸囲調整済み平均身長を求めよ。
# Class1
(170*30*0.9 + 150*70*0.2)/(30*0.9 + 70*0.2) # 27+14人
mean(c(rep(170,30*0.9),rep(150,70*0.2)))
# Class2
(170*30*0.1 + 150*70*0.8)/(30*0.1 + 70*0.8) # 3+56人
mean(c(rep(170,30*0.1),rep(150,70*0.8)))
# average
mean(c(rep(170,30),rep(150,70)))
# Inverse Probability Weighting(IPW)
# Class 1 adjusted
(170*1/0.9*27 + 150*1/0.2*14)/sum(1/0.9*27+1/0.2*14)
weighted.mean(c(rep(170,30*0.9),rep(150,70*0.2)),c(rep(1/0.9,30*0.9),rep(1/0.2,70*0.2)))
# Class 2 adjusted
(170*1/0.1*3 + 150*1/0.8*56)/sum(1/0.1*3+1/0.8*56)
weighted.mean(c(rep(170,30*0.1),rep(150,70*0.8)),c(rep(1/0.1,30*0.1),rep(1/0.8,70*0.8)))
481:卵の名無しさん
18/04/23 19:00:36.74 4SvCBV2R.net
Inverse Propensity score Weighting
IPW8 <- function(Y,Tr,ps) weighted.mean(Y,Tr/ps) # Y:dead or alive, Tr:treated or control, ps:propensity score
IPW8(Y,Tr,ps) - IPW8(Y,1-Tr,1-ps)
482:卵の名無しさん
18/04/24 12:13:34.39 6fgOc8MP.net
回帰分析による推定では,傾向スコアと,目的変数が線形な関係になる必要があるが,傾向スコア自体は(ロジスティック回帰による処置群に含まれる確率であるため)0-1の間の値をとるので,線形性を仮定するのは論理的におかしい
483:卵の名無しさん
18/04/24 22:47:05.61
484:5RekAiwJ.net
485:卵の名無しさん
18/04/25 10:26:10.03 5SphbBIi.net
# Lindner Center data on 996 PCI patients analyzed by Kereiakes et al. (2000)
library(MatchLinReg)
data("lindner")
head(lindner)
formula.stent=stent ~ abcix + height + female + diabetic + acutemi + ejecfrac + ves1proc-1
re.glm=glm(stent ~ abcix + height + female + diabetic + acutemi +ejecfrac + ves1proc-1,family=binomial,data=lindner)
summary(re.glm)
Epi::ROC(form=stent~.-1,data=lindner)$AUC
rms::lrm( stent ~ abcix + height + female + diabetic + acutemi +
ejecfrac + ves1proc, data=lindner)$stat['C']
ps=re.glm$fitted.values
Y=lindner$lifepres
Tr=lindner$stent
weighted.mean(Y,Tr/ps) - weighted.mean(Y,(1-Tr)/(1-ps))
486:卵の名無しさん
18/04/25 10:26:32.95 5SphbBIi.net
library(Matching)
m.out=Match(Y,Tr,ps)
summary(m.out)
## (Doubly Robust: DR)
n <- nrow(lindner)
y <- lindner['lifepres']
z <- lindner['stent']
lindner1 <- lindner[lindner['stent']==1,]
lindner0 <- lindner[lindner['stent']==0,]
model1=lm(formula=lifepres ~ abcix + height + female + diabetic
+ acutemi +ejecfrac + ves1proc, data=lindner1)
model0=lm(formula=lifepres ~ abcix + height + female + diabetic
+ acutemi +ejecfrac + ves1proc, data=lindner0)
fitted1 <- predict(model1, lindner)
fitted0 <- predict(model0, lindner)
dre1 <- (1/n)*sum(y+((z-ps)/ps)*(y-fitted1))
dre0 <- (1/n)*sum(((1-z)*y)/(1-ps)+(1-(1-z)/(1-ps))*fitted0)
c(treated=dre1,control=dre0,ATE=dre1-dre0)
#
487:卵の名無しさん
18/04/25 10:26:42.47 5SphbBIi.net
match0 <- function(Y,Tr,X,caliper=0.05){
interval = seq(0,1,by=caliper)
n=length(interval)
res = numeric(n)
for(i in 1:(n-1)){
temp0 = Y[ (Tr==0) & (interval[i]<X) & (X<interval[i+1]) ]
temp1 = Y[ (Tr==1) & (interval[i]<X) & (X<interval[i+1]) ]
res[i] = mean(temp1) - mean(temp0)
}
mean(res,na.rm=TRUE)
}
match0(Y=lindner$lifepres,Tr=lindner$stent,X=ps)
summary(Match(Y=lindner$lifepres,Tr=lindner$stent,X=ps, caliper=0.05))
MatchBalance(formula.stent,data=lindner)
488:卵の名無しさん
18/04/25 10:27:40.82 5SphbBIi.net
>>455
臨床統計のRスクリプトを公開するスレ。
489:卵の名無しさん
18/04/26 18:44:43.66 UTg/BRA4.net
ではHPVワクチン由来と断定できる重篤例に的を絞った質の高いエビデンスは得られるのでしょうか。
上記の相対リスク=1.82を論文記載の対照群のリスク(36人/10万人)等を基に有意なエビデンスとして検出するには約19万人を対象とする臨床試験が必要です。
WHOにより検証された世界の質の高い臨床試験の総人数が7万3697人)であることを考えると,現時点で因果関係を高い質で問うのは困難であるとわかります
URLリンク(www.igaku-shoin.co.jp)
とあるが、
俺の計算では(Rのpackage、Epiで計算)
各群約46000人で
p=0.05
Relative Risk: 1.8200 1 3.3125
> Epi::twoby2(mat)
2 by 2 table analysis:
------------------------------------------------------
Outcome : Col 1
Comparing : Row 1 vs. Row 2
Col 1 Col 2 P(Col 1) 95% conf. interval
Row 1 30.1927 46051.50 7e-04 5e-04 9e-04
Row 2 16.5894 46065.11 4e-04 2e-04 6e-04
95% conf. interval
Relative Risk: 1.8200 1 3.3125
Sample Odds Ratio: 1.8205 1 3.3144
Probability difference: 0.0003 0 0.0006
Asymptotic P-value: 0.05
------------------------------------------------------
>
490:卵の名無しさん
18/04/26 18:58:06.22 UTg/BRA4.net
# URLリンク(www.igaku-shoin.co.jp)
# 相対リスク=1.82を論文記載の対照群のリスク(36人/10万人)
# 1.82(95%信頼区間0.79-4.20)
rr=1.82
r0=36/10^5
r1=rr*r0
nn=seq(10^4,10^5,by=100)
rr2p.e <- function(n){
Epi::twoby2(matrix(c(n*r1,n*r0,n-n*r1,n-n*r0),ncol=2),print=FALSE)$p.value
}
plot(nn,sapply(nn,rr2p.e),type='l') ; abline(h=0.05, lty=3)
n=uniroot(function(n) rr2p.e(n)-0.05, c(10^4,10^5))$root
mat=matrix(c(n*r1,n*r0,n-n*r1,n-n*r0),ncol=2)
Epi::twoby2(mat)
491:卵の名無しさん
18/04/28 09:44:39.14 w/IREkFx.net
# 2 6 12 54 56+ 68 89 96 96 125+ 128+ 131+ 140+ 141+ 143 145+ 146 148+ 162+ 168 173+ 181+
time=c(2,6,12,54,56,68,89,96,96,125,128,131,140,141,143,145,146,148,162,168,173,181)
y=c(21/22,20/21,19/20,18/19,1,16/17,15/16,14/15,13/14,1,1,1,1,1,7/8,1,5/6,1,1,2/3,1,1)
survival=cumprod(y)
plot(time,survival,type='S',ylim=c(0,1))
censored=c(0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,1,1)
event=1-censored
library(survival)
Surv(time,event)
plot(survfit(Surv(time,event)~1))
492:卵の名無しさん
18/04/28 11:58:57.59 w/IREkFx.net
カプランマイヤーのproduct limit法を理解していたらパッケージに頼らずにグラフも書けるな。
打ち切りポイントも描画できるように変更。
time=c(2,6,12,54,56,68,89,96,96,125,128,131,140,141,143,145,146,148,162,168,173,181)
y=c(21/22,20/21,19/20,18/19,1,16/17,15/16,14/15,13/14,1,1,1,1,1,7/8,1,5/6,1,1,2/3,1,1)
survival=cumprod(y)
censored=c(0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,1,1)
plot(time,survival,type='s',ylim=c(0,1))
for(i in 1:22) if(censored[i]) points(time[i],survival [i],pch='+')
493:卵の名無しさん
18/04/30 11:29:32.46 T13xGV6f.net
# rule of thumb
# サンプルサイズと分散が同等な正規分布する集団からの無作為抽出で標本平均±標準誤差区間が重ならないときは母平均の有意差有無の判断はできない。
N=1000
f <- function(mx){
set.seed(123)
x1=rnorm(N,0 ,1)
x2=rnorm(N,mx,1)
t.test(x1,x2,var.equal = TRUE)$p.value
}
xx=seq(0,0.5,length=1000)
plot(xx,sapply(xx,f))
abline(h=0.05,lty=3)
uniroot(function(x) f(x)-0.05,c(0,1))$root
mx=0.0615 # 0.615以上で有意
g <- function(mx){
set.seed(123)
x1=rnorm(N,0 ,1)
x2=rnorm(N,mx,1)
n1=length(x1) ; n2=length(x2)
m1=mean(x1) ; m2=mean(x2)
s1=sd(x1) ; s2=sd(x2)
se1=s1/sqrt(n1) ; se2=s2/sqrt(n1)
(m1+se1) > (m2-se2) # overlap?
}
xx=seq(0,0.5,length=1000)
plot(xx,sapply(xx,g))
abline(h=0,lty=3)
uniroot(g,c(0,1))$root # non-overlap には0.0369以上
f(0.07) # 有意差あり
g(0.07) # overlapなし
f(0.05) # 有意差なし
g(0.05) # overlapなし
494:卵の名無しさん
18/04/30 12:22:04.07 T13xGV6f.net
# rule of thumb
# サンプルサイズと分散が同等な正規分布する集団からの無作為抽出で
# 標本平均±標準誤差区間が重なるときは母平均に有意差はない。
##
495:卵の名無しさん
18/04/30 14:24:19.80 T13xGV6f.net
# rule of thumb
# サンプルサイズと分散が同等な正規分布する集団からの無作為抽出で
# 標本平均±標準誤差区間が重なるときは母平均に有意差はない(p>0.05)。
#
x1=rnorm(n,m1,s)
x2=rnorm(n,m2,s) # m1 < m2
t.stat=(m1-m2)/(sqrt(2/n)*s) # < 0
=(m1-m2)*sqrt(n)/(sqrt(2)*s)
m1 + se > m2 -se # overlap
m1 - m2 > -2*se = -2*s/sqrt(n)
m1 - m2 = t.stat*(sqrt(2)*s)/sqrt(n) > -2*s/sqrt(n)
t.stat > -2/sqrt(2)=-sqrt(2)
df=198 # 例
curve(pt(x,df),-5,0) # x < 0で増加関数
pt(t.stat,df) > pt(-sqrt(2),df)
pt(-sqrt(2),df)
# dfを変化させる
curve(pt(-sqrt(2),x),200,ylim=c(0,0.5),xlab='df',ylab='p.value',lwd=2)
abline(h=0.05,lty=3)
pt(-sqrt(2),Inf) ; pnorm(-sqrt(2))
text(0,0.05,'0.05')
496:卵の名無しさん
18/04/30 14:24:33.69 T13xGV6f.net
# 平均値の95%信頼区間が重なった場合にはそれで有意差判定はできない。
set.seed(123)
x1=rnorm(100,0 ,1)
x2=rnorm(100,0.3 , 1)
t.test(x1,x2,var.equal = TRUE)$p.value # > 0.05
t.test(x1)$conf[2] > t.test(x2)$conf[1] # overlap TRUE
plot(NULL,NULL,ylim=c(0,0.05),xlim=c(-0.75,0.75),yaxt='n',ann=FALSE,bty='l')
segments(t.test(x1)$conf[1],0,t.test(x1)$conf[2],0,lwd=5,col=1)
segments(t.test(x2)$conf[1],0.01,t.test(x2)$conf[2],0.01,lwd=5,col=2)
set.seed(123)
x1=rnorm(100,0 ,1)
x2=rnorm(100,0.5 ,1)
t.test(x1,x2,var.equal = TRUE)$p.value # < 0.05
t.test(x1)$conf[2] > t.test(x2)$conf[1] # overlap TRUE
plot(NULL,NULL,ylim=c(0,0.05),xlim=c(-0.75,0.75),yaxt='n',ann=FALSE,bty='l')
segments(t.test(x1)$conf[1],0,t.test(x1)$conf[2],0,lwd=5,col=1)
segments(t.test(x2)$conf[1],0.01,t.test(x2)$conf[2],0.01,lwd=5,col=2)
497:卵の名無しさん
18/05/01 08:02:38.81 v/zhryAm.net
T.test2=function(n,dm,var1,var2){
SE12=sqrt((1/n+1/n)*((n-1)*var1+(n-1)*var2)/((n-1)+(n-1)))
T=dm/SE12
2*pt(abs(T),n-1+n-1,lower.tail = FALSE)
}
m1-m2=dm
n1=n2=n
T.test3=function(n,dm,var1,var2){
SE12=sqrt((2/n)*(var1+var2)/2))
T=dm/SE12
2*pt(abs(T),n-1+n-1,lower.tail = FALSE)
}
T.test4=function(x,n=1000,dm=1,var1=1){
SE=sqrt((2/n)*(var1+x)/2)
T=dm/SE
2*pt(abs(T),n-1+n-1,lower.tail = FALSE)
}
curve(T.test4(x),0,100,xlab='variance=x against variance=1',ylab='p.value')
498:卵の名無しさん
18/05/01 08:23:44.83 v/zhryAm.net
# t検定(生データなし,等分散不問)
Welch.test=function(n1,n2,m1,m2,var1,var2){
T=(m1-m2)/sqrt(var1/n1+var2/n2)
df=(var1/n1+var2/n2)^2 / (var1^2/n1^2/(n1-1)+var2^2/n2^2/(n2-1))
p.value=2*pt(abs(T),df,lower.tail = FALSE)
return(p.value)
}
n1=n2=n
m1-m2=dm
var2=x
Welch.test2=function(x,n=1000,dm=1,var1=1){
T=dm/sqrt(var1/n+x/n)
df=(var1/n+x/n)^2 / (var1^2/n^2/(n-1)+x^2/n^2/(n-1))
p.value=2*pt(abs(T),df,lower.tail = FALSE)
return(p.value)
}
curve(Welch.test2(x),0,100,xlab='variance=x against variance=1',ylab='p.value')
499:卵の名無しさん
18/05/01 09:37:21.50 UP3hBRO4.net
N=1000
n1=40
n2=60
SDR=10
set.seed(1234)
A=scale(rnorm(N))
B=scale(rnorm(N))*SDR
f2.b <- function(){
a=sample(A,n1)
b=sample(B,n2)
t.test(a,b, var=TRUE)$p.value
}
p.t=replicate (10^4,f2.b())
hist (p.t)
f2.bW <- function(){
a=sample(A,n1)
b=sample(B,n2)
t.test(a,b, var=FALSE)$p.value
}
p.W=replicate (10^4,f2.bW())
hist (p.W)
500:卵の名無しさん
18/05/01 10:15:19.84 UP3hBRO4.net
N=1000
n1=40
n2=60
SDR=1
set.seed(1234)
A=scale(rnorm(N))
B=scale(rnorm(N))*SDR
f2.b <- function(){
a=sample(A,n1)
b=sample(B,n2)
t.test(a,b, var=TRUE)$p.value
}
p.t=replicate (10^4,f2.b())
hist (p.t)
f2.bW <- function(){
a=sample(A,n1)
b=sample(B,n2)
t.test(a,b, var=FALSE)$p.value
}
p.W=replicate (10^4,f2.bW())
hist (p.W)
501:卵の名無しさん
18/05/01 10:21:41.36 UP3hBRO4.net
N=1000
n1=40
n2=60
SDR=1
dm=0.1
set.seed(1234)
A=scale(rnorm(N))
B=scale(rnorm(N))*SDR+dm
f2.b <- function(){
a=sample(A,n1)
b=sample(B,n2)
t.test(a,b, var=TRUE)$p.value
}
p.t=replicate (10^4,f2.b())
hist (p.t)
f2.bW <- function(){
a=sample(A,n1)
b=sample(B,n2)
t.test(a,b, var=FALSE)$p.value
}
p.W=replicate (10^4,f2.bW())
hist (p.W)
502:卵の名無しさん
18/05/01 12:51:04.77 UP3hBRO4.net
T.test4=function(var2,n1,n2,dm=0.5,var1=1){
SE12=sqrt((1/n1+1/n2)*((n1-1)*var1+(n2-1)*var2)/((n1-1)+(n2-1)))
T=dm/SE12
2*pt(abs(T),n1-1+n2-1,lower.tail = FALSE)
}
curve(T.test4(x,40,160,0.1),0,100,xlab='variance=x against variance=1',ylab='p.value')
503:卵の名無しさん
18/05/01 14:51:52.52 W1Ln0POm.net
# URLリンク(www.rips-irsp.com)
# Why Psychologists Should by Default Use Welch’s t-test
# Instead of Student’s t-test
par(mfrow=c(2,1))
N=1000
n1=10
n2=90
SDR=10
dm=1
k=10^4
set.seed(1234)
A=scale(rnorm(N))
B=scale(rnorm(N))*SDR+dm
f2.b <- function(){
a=sample(A,n1)
b=sample(B,n2)
t.test(a,b, var=TRUE)$p.value
}
p.t=replicate (k,f2.b())
hist (p.t,main='Student\'s t.test',col=sample(colours(),1))
mean(p.t < 0.05) # power(when dm!=0) or Type I error(when dm=0)
f2.bW <- function(){
a=sample(A,n1)
b=sample(B,n2)
t.test(a,b, var=FALSE)$p.value
}
p.W=replicate (k,f2.bW())
hist (p.W,main='Welch\'s t.test',col=sample(colours(),1))
mean(p.W < 0.05) # power(when dm!=0) or Type I error(when dm=0)
504:卵の名無しさん
18/05/01 14:57:05.14 W1Ln0POm.net
Why Psychologists Should by Default Use Welch’s t-test Instead of Student’s t-test
URLリンク(www.rips-irsp.com)
に触発されてシミュレーションの条件(サンプルサイズや分散比)を変えて追試してみた。
結果にびっくり
URLリンク(i.imgur.com)
あたかもド底辺シリツ医大卒を最高学府を履修したと呼ぶほどの差異に等しい。
Welchのrobustnessに再度感銘した。
505:卵の名無しさん
18/05/01 19:34:12.07 W1Ln0POm.net
par(mfrow=c(2,1))
N=1000
n1=50
n2=25
SDR=5
dm=0 # null hypothesis is true when dm==0
k=10^4
set.seed(123)
A=scale(rnorm(N))
B=scale(rnorm(N))*SDR+dm
f2.b <- function(){
a=sample(A,n1)
b=sample(B,n2)
t.test(a,b, var=TRUE)$p.value
}
p.t=replicate (k,f2.b())
hist (p.t,main='Student\'s t.test',col=sample(colours(),1),
xlab=paste('n1 = ',n1,', sd1 = 1 ; n2 = ',n2,' , sd2 = ',SDR))
mean(p.t < 0.05) # power(when dm!=0) or Type I error(when dm=0)
f2.bW <- function(){
a=sample(A,n1)
b=sample(B,n2)
t.test(a,b, var=FALSE)$p.value
}
p.W=replicate (k,f2.bW())
hist (p.W,main='Welch\'s t.test',col=sample(colours(),1),
xlab=paste('n1 = ',n1,', sd1 = 1 ; n2 = ',n2,' , sd2 = ',SDR))
mean(p.W < 0.05) # power(when dm!=0) or Type I error(when dm=0)
506:卵の名無しさん
18/05/04 16:46:09.21 6BNcb2/S.net
x=log.nor=seq(-8.0,-3.0,by=0.5) ; n=length(x)
y=relax=c(2.6,10.5,15.8,21.1,36.8,57.9,73.7,89.5,94.7,100.0,100.0)
(re.nls=nls(y ~ Top/(1+10^((LogEC50-x)*HillSlope)),
start=c(Top=100,LogEC50=-5,HillSlope=1),algo='port'))
nls2R2(re.nls,y)
plot(x,y,bty='l',pch=19)
lines(x,predict(re.nls))
#
R2 <- function(dat){
n = nrow(dat)
sm = summary(
nls(dat$y ~ Top/(1+10^((LogEC50-dat$x)*HillSlope)),
start=c(Top=100,LogEC50=-5,HillSlope=0.5),algorithm = 'port')
)
SSalt = sum(sm$residuals^2)
SSnull = var(dat$y)*(n-1)
Rsq=1-SSalt/SSnull
Rsq
# k=sm$df[1]
# AdjRsq=1-(1-Rsq)*(n-1)/(n-k-1)
# data.frame(Rsq,AdjRsq)
}
xy
507:=data.frame(x,y) R2(xy) library(boot) re=boot(xy,function(data,indices) R2(data[indices,]),R=1000) re$t0 BEST::plotPost(re$t) ; HDInterval::hdi(re$t) quantile(re$t,c(.025,.975)) nls2R2(re.nls,y)
508:卵の名無しさん
18/05/04 17:55:57.07 6BNcb2/S.net
anova(lm)$Pr[1])
min( pf(summary(lm)$fstatistic[1],
summary(lm)$fstatistic[2],summary(lm)$fstatistic[2],lower=FALSE),
pf(summary(lm)$fstatistic[1],
summary(lm)$fstatistic[2],summary(lm)$fstatistic[2] )
509:卵の名無しさん
18/05/04 18:01:55.70 6BNcb2/S.net
lm = lm(formula,data)
lm2p <- function(lm){ # p_value=anova(lm)$Pr[1]
f=summary(lm)$fstatistic
min(
pf(f[1],f[2l,f[3],lower=FALSE),
pf(f[1],f[2l,f[3])
)
}
510:卵の名無しさん
18/05/04 20:28:04.76 6BNcb2/S.net
## http://www.public.asu.edu/~gasweete/crj604/readings/1983-Freedman%20(Screening%20Regression%20Equations).pdf
noise2sig <- function(seed,sequential=FALSE){
set.seed(seed)
Noise=matrix(rnorm(100*51),ncol=51,nrow=100)
# first pass
lm1=lm(Noise[,51] ~ Noise[,-51]) # Noise[,51] : 目的変数Y
cat('\nR2 (1st) = ',summary(lm1)$r.squared)
cat('\np.value (1st) = ',anova(lm1)$Pr[1])
coefs1=summary(lm1)$coef[,'Pr(>|t|)']
length(coefs1)
coefs1[1]
coeffs1=coefs1[-1]
cat('\ncoef < 0.25 =',sum(coeffs1<0.25))
cat('\ncoef < 0.05 =',sum(coeffs1<0.05))
indx25=which(coeffs1<0.25)
511:卵の名無しさん
18/05/04 20:28:19.24 6BNcb2/S.net
# second pass
lm2=lm(Noise[,51] ~ Noise[,indx25])
cat('\n\nR2 (2nd) = ',summary(lm2)$r.squared)
cat('\np.value(2nd) = ',anova(lm2)$Pr[1])
coefs2=summary(lm2)$coef[,'Pr(>|t|)']
length(coefs2)
coefs2[1]
coeffs2=coefs2[-1]
cat('\ncoef < 0.25 (2nd) =',sum(coeffs2<0.25))
cat('\ncoef < 0.05 (2nd) =',sum(coeffs2<0.05))
cat('\n\n')
if(sequential){
cat('Hit Return Key in console window')
no_save <- scan(n=1, what=character(), quiet=TRUE)
}
}
noise2sig(1)
for(i in 2:5) noise2sig(i,seq=TRUE)
512:卵の名無しさん
18/05/07 08:41:59.73 /y3oEDZQ.net
UG.Purge <- function(alt='two.sided'){
pv=numeric(10)
for(j in 1:10){
L=list()
tmp=UG[-j,]
for(i in 1:6) L=append(L,list(tmp[,i]))
pv[j]=jonckheere(L,cat=FALSE,alternative = alt)
}
return(pv)
}
which(UG.Purge('two') < 0.05)
which(UG.Purge('increasing') < 0.05)
513:卵の名無しさん
18/05/08 17:13:30.89 4Ru4wKk7.net
# http://file.scratchhit.pazru.com/TkySchBnf.txt
## fitted is a sequence of means
## nis is a corresponding sequence of sample sizes for each mean
## df is the residual df from the ANOVA table
## MSE = mean squared error from the ANOVA table
## conf.level is the family-wise confidence level, defaults to .95
pairwise.scheffe.t�
514:�st <- function(re.aov, conf.level = 0.95){ model = re.aov$model colnames(model) = c('score','group') fitted = with(model,tapply(score,group,mean)) nis = with(model,tapply(score,group,length)) df = re.aov$df.residual MSE = summary(re.aov)[[1]]['Residuals','Mean Sq'] r = length(fitted) pairs = combn(1:r,2) diffs = fitted[pairs[1,]] - fitted[pairs[2,]]
515:卵の名無しさん
18/05/08 17:13:57.38 4Ru4wKk7.net
T = sqrt((r-1)*qf(conf.level,r-1,df))
hwidths = T*sqrt(MSE*(1/nis[pairs[1,]] + 1/nis[pairs[2,]]))
fvs = (diffs^2)/(MSE*(1/nis[pairs[1,]] + 1/nis[pairs[2,]]))/(r-1)
pvs = 1-pf(fvs, r-1, df)
val = cbind(diffs - hwidths, diffs, diffs + hwidths, fvs, r-1, df, pvs)
dimnames(val) = list(paste("subject",pairs[1,]," - subject", pairs[2,],
sep = ""), c("Lower", "Diff","Upper", "F Value", "nmdf", "dndf", "Pr(>F)"))
Sc = as.matrix(val)
p.Sc = Sc[,'Pr(>F)']
n = re.aov$rank - 1
mat.Sc = matrix(rep(NA,n*n),n)
for(k in 1:n) mat.Sc[,k] = c(rep(NA,k-1), p.Sc[((k-1)*(n+1-k/2)+1):((k-1)*(n+1-k/2)+1+n-k)])
colnames(mat.Sc) = model[[2]][1:n]
rownames(mat.Sc) = model[[2]][2:(n+1)]
d = round(mat.Sc,5)
d[is.na(d)] = '-'
print(d,quote = FALSE)
invisible(val)
}
516:卵の名無しさん
18/05/09 14:26:52.13 L5gMx8M1.net
# disese, No disease
# positive test TP(A) FP(B)
# negative test FN(C) TN(D)
PV <- function(prevalence=c(10^-4,1),sn=0.80,sp=0.95,...){
plot(NULL,NULL,xlim=prevalence,ylim=c(0,1),xlab='Prevalence',
ylab='Predicative Value(Test Credibility)',type='n',bty='l',
main=paste0('感度 = ',sn, ' 特異度 = ',sp), ...)
legend('center',bty='n',lwd=2,lty=1:3,legend=c('陽性適中率','陰性適中率','偽陽性率'),
col=c('red','darkgreen','brown'))
ppv<-function(prevalence,sensitivity=sn,specificity=sp){
PPV=prevalence*sensitivity/
(prevalence*sensitivity+(1-prevalence)*(1-specificity))
return(PPV)
}
curve(ppv(x),lty=1,lwd=2,col='red', add=TRUE)
npv<-function(prevalence,sensitivity=sn,specificity=sp){
NPV=(1-prevalence)*specificity/
( (1-prevalence)*specificity + prevalence*(1-sensitivity) )
return(NPV)
}
curve(npv(x),lty=2,lwd=2,col='darkgreen',add=TRUE)
false_negative_rate <- function(prevalence,sensitivity=sn,specificity=sp){
FNR <- prevalence*(1-sensitivity)/
( (1-prevalence)*specificity + prevalence*(1-sensitivity) )
return(FNR)
}
curve(false_negative_rate(x), lty=3,lwd=2,col='brown',add=TRUE)
}
517:卵の名無しさん
18/05/11 20:34:40.05 MdgPLO2C.net
zodiac=c('Aquarius','Aries','Cancer','Capricorn','Gemini','Leo','Libra','Pisces','Sagittarius','Scorpio','Taurus','Virgo')
N=c(856301,888348,917553,844635,937615,903009,897503,893332,846813,850128,918512,921196)
X=c(1433,1476,1496,1343,1553,1497,1350,1522,1277,1297,1534,1445)
iP=which(zodiac=='Pisces')
p.Pisces=prop.test(c(X[iP],sum(X[-iP])),c(N[iP],sum(N[-iP])))$p.value
p0=sum(X)/sum(N)
se
518:t.seed(16062006) p.max=numeric() for(i in 1:10^4){ xi=rbinom(length(N),N,p0) I=which.max(xi/N) re=prop.test(c(xi[I],sum(xi[-I])),c(N[I],sum(N[-I]))) p.max[i]=re$p.value } mean(p.max <= p.Pisces) cat("P-value for Pisces and CHF: ", p.max, file="Pisces.txt",fill=TRUE, append=TRUE)
519:卵の名無しさん
18/05/13 14:56:25.30 lYhicRHD.net
TRAP9p <- function(seed=NULL){
set.seed(seed)
A0=rnorm(10,50,10)
A1=A0+rnorm(10,0,5)+5
B0=rnorm(10,45,10)
B1=B0+rnorm(10,0,5)+5
A0=round(A0)
B0=round(B0)
A1=round(A1)
B1=round(B1)
a=t.test(A0,A1,paired=TRUE)$p. # 外部有意差なし
b=t.test(B0,B1,paired=TRUE)$p. # 内部有意差あり
ab=t.test(A1-A0,B1-B0)$p. # 加点差有意差なし
a0b0=t.test(A0,B0)$p.# base差なし
a1b1=t.test(A1,B1)$p.# final差
list(p=c(外部=a,内部=b,差異=ab,base差=a0b0,final差=a1b1),A0=A0,B0=B0,A1=A1,B1=B1)
}
TRAP9p()$p
rep=replicate(1000,TRAP9p()$p)
mean(rep['外部',]>0.05 & rep['内部',]<0.05 & rep['差異',]>0.05
& re['base差',] > 0.05 & re['final差',]<0.05)
seeds=c(4799, 10638, 12173, 12908, 13671, 17145, 18955)
re=sapply(seeds,function(x) TRAP9p(x)$p) # wait several ten seconds
idx=which(re['外部',]>0.05 & re['内部',]<0.05 & re['差異',]>0.05
& re['base差',] > 0.05 & re['final差',]<0.05)
idx ; re[,idx]
Tx=as.data.frame(TRAP9p(4799)[2:5]) # 4799 10638 12173 12908 13671 17145 18955
U=with(Tx,data.frame(外部.無=A0,外部.有=A1,内部.無=B0,内部.有=B1))
520:卵の名無しさん
18/05/13 14:57:15.49 lYhicRHD.net
TRAP9p()$p
rep=replicate(1000,TRAP9p()$p)
mean(rep['外部',]>0.05 & rep['内部',]<0.05 & rep['差異',]>0.05
& re['base差',] > 0.05 & re['final差',]<0.05)
seeds=c(4799, 10638, 12173, 12908, 13671, 17145, 18955)
re=sapply(seeds,function(x) TRAP9p(x)$p) # wait several ten seconds
idx=which(re['外部',]>0.05 & re['内部',]<0.05 & re['差異',]>0.05
& re['base差',] > 0.05 & re['final差',]<0.05)
idx ; re[,idx]
Tx=as.data.frame(TRAP9p(4799)[2:5]) # 4799 10638 12173 12908 13671 17145 18955
U=with(Tx,data.frame(外部.無=A0,外部.有=A1,内部.無=B0,内部.有=B1))
U
.plot = function(){
plot (NULL,xlim=c(0,5),ylim=c(30,80),bty='l',type='n',
xaxt='n',ylab='score',xlab='',main='「任意」の寄付金効果')
axis(side=1, at=c(1.5,3.5), labels=c('外部入学','内部進学'))
points(rep(1,10),U[,1])
points(rep(2,10),U[,2])
segments(rep(1,10),U[,1],rep(2,10),U[,2])
points(rep(3,10),U[,3],pch=19)
points(rep(4,10),U[,4],pch=19)
segments(rep(3,10),U[,3],rep(4,10),U[,4])
} ; .plot()
with(U,t.test(外部.無,外部.有,paired=TRUE))$p.value # 外部での効果
with(U,t.test(内部.無,内部.有,paired=TRUE))$p.value # 内部での効果
with(U,t.test(外部.有-外部.無,内部.有-内部.無)$p.value) # 内外部効果比較
with(U,t.test(log(外部.有/外部.無),log(内部.有/内部.無))$p.value)
521:卵の名無しさん
18/05/14 08:43:54.12 R8CQudRE.net
alpha=0.05 # Pr(significantH0)
power=0.80 # Pr(significant|H1)
N=100
prior=0.10
disease=prior*N
TP=disease*power
FP=N*(1-prior)*alpha
(FPRP=FP/(TP+FP))
alpha.L=0.045
alpha.U=0.055
522:卵の名無しさん
18/05/14 14:11:13.07 elEQ53K0.net
sensitivity=0.80 # Pr(reject|H1)
specificity=1-0.05
alpha=1-specificity. # Pr(reject|H0)
n=1000
prior=0.1
prop.p<- function (){
a=rbinom(1,n, prior)
b=rbinom(1,n, prior)
prop.test(c(a,b),c(n,n))$p.value
}
k=1000
re=replicate (k,prop.p()1)
hist(re)
mean(re<0.05)
mean (0.045<re & re<0.050)
TP=sensitivfity*prior
do qFP=(1-prior)*(1-sensitivity)
FPRP=FP/(TP+FP)
523:卵の名無しさん
18/05/15 08:47:26.31 NGCOKTk3.net
knowing that the data are ‘rare’ when there is no true difference is of little use unless one determines whether
or not they are also ‘rare’ when there is a true difference.
524:卵の名無しさん
18/05/15 18:26:03.26 NGCOKTk3.net
BayesFactor=Pr(sig|H0)/Pr(sig|H1)=alpha/power=(1-specificity)/sensitivity=FP/TP=1/pLR
pLR=TP/FP=sensitivity/(1-specificity)=1/BayesFactor
nLR=FN/TN=(1-sensitivity)/specificity
525:卵の名無しさん
18/05/16 07:16:23.92 nUz5W2k6.net
calc.FPR.p <- function(r1,r2,n1,n2,alpha=0.05){ #
526:n1=n2 p.val=prop.test(c(r1,r2),c(n1,n2))$p.value k=2 df=k-1 Pi=c(r1/n1,r2/n2) n=mean(n1,n2) theta.i=asin(sqrt(Pi)) delta=4*var(theta.i)*(k-1) ncp=n*delta power=pchisq(qchisq(1-alpha,df),df,ncp,lower=FALSE) qcrit=qchisq(max(p.val,1-p.val),df,ncp=0) curve(dchisq(x,df),0,15,ann=FALSE,bty='n') # H0 curve(dchisq(x,df,ncp),add=TRUE, lty=2) # H1 abline(v=qcrit,lty=3) x0=qcrit y0=dt(x0,df,0) x1=x0 y1=dchisq(x1,df,ncp=ncp) FPR=y0/(y0+y1) print(c(p.value=p.val,FPR=FPR),digits=3) } calc.FPR.p(85,95,100,100)
527:卵の名無しさん
18/05/16 20:15:48.59 nUz5W2k6.net
calc.FPR.chisq <- function(r,alpha=0.05){ # r=c(37,21,21,21) vs n=c(25,25,25,25)
m=length(r)
n=rep(mean(r),m)
p.val=chisq.test(rbind(r,n))$p.value
df=m-1
P0=n/sum(n)
P1=(r/n)/sum(r/n)
N=sum(r) # == sum(n)
ncp1=0
for(i in 1:m) ncp1=ncp1+N*(P1[i]-P0[i])^2/P0[i]
qcrit=qchisq(1-alpha,df,ncp=0)
curve(dchisq(x,df),0,20,xlab=quote(chi),ylab='Density',bty='n') # H0
curve(dchisq(x,df,ncp1),add=TRUE,lty=2,col=2) # H1
abline(v=qcrit,col='gray') ; text(qcrit,0,round(qcrit,2))
legend('topright',bty='n',legend=c('H0:Central','H1:Noncentral'),col=1:2,lty=1:2)
power=pchisq(qcrit,df,ncp1,lower=FALSE)
y0=dchisq(qcrit,df,0)
y1=dchisq(qcrit,df,ncp=ncp1)
FPR=y0/(y0+y1)
print(c(power=power,p.value=p.val,FPR=FPR),digits=3)
}
calc.FPR.chisq(c(10,16,34,9,10,26))
calc.FPR.chisq(c(1,0,9,2,1,1))
528:卵の名無しさん
18/05/17 06:32:52.54 1zxWbYbO.net
サンプルサイズが異なっても算出できるように改造した。
パーッケージpwrのpwr.2p2n.testの.コードを参考にした。
calc.FPR.p2 <- function(r1,r2,n1,n2,alpha=0.05){
p.val=prop.test(c(r1,r2),c(n1,n2))$p.value
k=length(c(r1,r2))
df=k-1
p1=r1/n1 ; p2=r2/n2
Pi=c(r1/n1,r2/n2)
theta.i=asin(sqrt(Pi))
delta=4*var(theta.i)*(k-1)
N=2/(1/n1+1/n2)
ncp1=N*delta
power.chi=pchisq(qchisq(1-alpha,df),df,ncp1,lower=FALSE)
qcrit=qchisq(max(p.val,1-p.val),df,ncp=0)
curve(dchisq(x,df),0,20,xlab='ChiSquare',ylab='Density',bty='n') # H0
curve(dchisq(x,df,ncp1),add=TRUE,lty=2,col=2) # H1
abline(v=qcrit,col='gray')
legend('top',bty='n',legend=c('H0','H1'),col=1:2,lty=1:2)
y0=dchisq(qcrit,df,0)
y1=dchisq(qcrit,df,ncp=ncp1)
FPR=y0/(y0+y1)
VAL=c(power=power.chi,p.value=p.val,FPR=FPR)
print(VAL,digits=3)
invisible(VAL)
}
529:卵の名無しさん
18/05/17 07:09:30.35 1zxWbYbO.net
calc.FPR.p2 <- function(r1,r2,n1,n2,alpha=0.05){
p.val=prop.test(c(r1,r2),c(n1,n2))$p.value
k=length(c(r1,r2))
df=k-1
p1=r1/n1 ; p2=r2/n2
Pi=c(r1/n1,r2/n2)
theta.i=asin(sqrt(Pi))
delta=4*var(theta.i)*(k-1)
N=2/(1/n1+1/n2)
ncp1=N*delta
power.chi=pchisq(qchisq(1-alpha,df),df,ncp1,lower=FALSE)
qcrit=qchisq(max(p.val,1-p.val),df,ncp=0)
curve(dchisq(x,df),0,ncp1*3,xlab=quote(chi),ylab='Density',bty='n') # H0
curve(dchisq(x,df,ncp1),add=TRUE,lty=2,col=2) # H1
abline(v=qcrit,col='gray')
legend('top',bty='n',legend=c('H0','H1'),col=1:2,lty=1:2)
y0=dchisq(qcrit,df,0)
y1=dchisq(qcrit,df,ncp=ncp1)
FPR.equal=y0/(y0+y1)
FPR.less=p.val/(p.val+power.chi)
VAL=c(power=power.chi,p.value=p.val,FPR.equal=FPR.equal,FPR.less=FPR.less)
print(VAL,digits=3)
invisible(VAL)
}
calc.FPR.p2(95,85,100,110)
530:卵の名無しさん
18/05/17 10:16:58.64 Z8D8umCF.net
calc.FPR.p2 <- function(r1,r2,n1,n2,alpha=0.05){
p.val=prop.test(c(r1,r2),c(n1,n2))$p.value
k=length(c(r1,r2))
df=k-1
p1=r1/n1 ; p2=r2/n2
Pi=c(p1,p2)
theta.i=asin(sqrt(Pi)) # arcsine conversion
delta=4*var(theta.i)*(k-1) # sum of squares
N=2/(1/n1+1/n2) # harmonic mean, subcontrary mean
ncp1=N*delta # non-central parameter
power.chi=pchisq(qchisq(1-alpha,df),df,ncp1,lower=FALSE)
# qchisq(1-0.05,1): 3.841
qcrit=qchisq(max(p.val,1-p.val),df,ncp=0)
# qcrit = prop.test(c(r1,r2),c(n1,n2))$statistic
curve(dchisq(x,df),0,2*qcrit,xlab=quote(chi),ylab='Density',bty='n') # H0
curve(dchisq(x,df,ncp1),add=TRUE,lty=2,col=2) # H1
# power.chi=AUC of right half of the H1 curve
abline(v=qcrit,col='gray')
legend('top',bty='n',legend=c('H0','H1'),col=1:2,lty=1:2)
text(qcrit,0,round(qcrit,2))
y0=dchisq(qcrit,df,0)
y1=dchisq(qcrit,df,ncp=ncp1)
FPR.equal=y0/(y0+y1) # length ratio
FPR.less=p.val/(p.val+power.chi) # area ratio
FPR.alpha=alpha/(alpha+power.chi) # FPR before analysis
VAL=c(power=power.chi,p.value=p.val,FPR.equal=FPR.equal,
FPR.less=FPR.less,FPR.alpha=FPR.alpha)
print(VAL,digits=3)
invisible(VAL)
}
calc.FPR.p2(85,95,100,100, alpha=0.05)
calc.FPR.p2(85,95,100,100, alpha=0.01)
531:卵の名無しさん
18/05/17 11:17:41.13 Z8D8umCF.net
calc.FPR.p2 <- function(r1,r2,n1,n2,alpha=0.05){
p.val=prop.test(c(r1,r2),c(n1,n2))$p.value
k=length(c(r1,r2))
df=k-1
p1=r1/n1 ; p2=r2/n2
Pi=c(p1,p2)
theta.i=asin(sqrt(Pi)) # arcsine conversion
delta=4*var(theta.i)*(k-1) # sum of squares
N=2/(1/n1+1/n2) # harmonic mean, subcontrary mean
ncp1=N*delta # non-central parameter
power.chi=pchisq(qchisq(1-alpha,df),df,ncp1,lower=FALSE)
q.alpha=qchisq(1-alpha,df) # 3.841
qcrit=qchisq(max(p.val,1-p.val),df,ncp=0)
# qcrit = prop.test(c(r1,r2),c(n1,n2))$statistic
curve(dchisq(x,df),0,2*qcrit,xlab=quote(chi),ylab='Density',bty='n',lwd=2) # H0
curve(dchisq(x,df,ncp1),add=TRUE,lty=2,col=2,lwd.2) # H1
# power.chi=AUC of right half of the H1 curve
abline(v=qcrit)
abline(v=q.alpha,col='gray',lty=3)
legend('topright',bty='n',legend=c('H0','H1','chisq@p.value','chisq@alpha'),col=c(1,2,1,'gray'),lty=c(1,2,1,3),lwd=c(2,2,1,1))
text(qcrit,0,round(qcrit,2))
y0=dchisq(qcrit,df,0)
y1=dchisq(qcrit,df,ncp=ncp1)
FPR.equal=y0/(y0+y1) # length ratio
FPR.less=p.val/(p.val+power.chi) # area ratio
FPR.alpha=alpha/(alpha+power.chi) # FPR before analysis
VAL=c(power=power.chi,p.value=p.val,FPR.equal=FPR.equal,
FPR.less=FPR.less,FPR.alpha=FPR.alpha)
print(VAL,digits=3)
invisible(VAL)
}
532:卵の名無しさん
18/05/19 07:58:23.94 rrC4yXIM.net
PPV2prevalence <- function(sensitivity,specificity,PPV) {
(1-specificity)*PPV/((1-specificity)*PPV)+(1-PPV)*sensitivity))
}
FPP2prior <- function(power,FPR=0.05,pval=0.05){
pval*(1-FPR)/(pval*(1-FPR)+FPR*power)
}
533:卵の名無しさん
18/05/19 10:40:37.24 oTDRH91u.net
PPV2prevalence <- function(sensitivity,specificity,PPV) {
(1-specificity)*PPV/((1-specificity)*PPV+(1-PPV)*sensitivity)
}
PPV2prevalence(0.75,0.99,0.9)
FPP2prior <- function(power,FPR=0.05,pval=0.05){
pval*(1-FPR)/(pval*(1-FPR)+FPR*power)
}
534:卵の名無しさん
18/05/20 11:45:44.17 n2fbjQMc.net
# These date were used in 1908 by W. S. Gosset ('Student')
# as an example to illustrate the use of his t test,
# in the paper in which the test was introduced.
A=c(0.7,-
535:1.6,-0.2,-1.2,-0.1,3.4,3.7,0.8,0.0,2.0) B=c(1.9,0.8,1.1,0.1,-0.1,4.4,5.5,1.6,4.6,3.4) t.test(A,B,var.equal = TRUE) mean(A) ; mean(B) sd(A) ; sd(B) (E=mean(B)-mean(A)) nA=length(A) ; nB=length(B) # The pooled estimate of the error within groups SEpooled=sqrt(weighted.mean(c(var(A),var(B)),c(nA-1,nB-1))) # Standard deviation of effect size SE=sqrt(SEpooled^2/nA+SEpooled^2/nB) (t.statistic=E/SE) 2*pt(t.statistic,nA+nB-2,lower=FALSE) # two-sided
536:卵の名無しさん
18/05/20 18:46:26.99 n2fbjQMc.net
# URLリンク(www.nejm.org)
r1=14
n1=840
r2=73 # placebo
n2=829
calc.FPR.p2(r1,r2,n1,n2)
calc.FPR0.p2(r1,r2,n1,n2)
prop.test(c(r1,r2),c(n1,n2))
prior.needed <- function(r1,r2,n1,n2,FPR=0.05){
pval=prop.test(c(r1,r2),c(n1,n2))$p.value
ES=pwr::ES.h(r1/n1,r2/n2)
power=pwr::pwr.2p2n.test(ES,n1,n2,sig.level=pval)$power
prior=pval*(1-FPR)/(pval*(1-FPR)+FPR*power)
return(prior)
}
prior.needed(r1,r2,n1,n2,FPR=0.05)
537:卵の名無しさん
18/05/21 00:13:06.30 r/Pmsrg8.net
# URLリンク(www.ncbi.nlm.nih.gov)
# p.508
# When p is a p-value with n1 samples, 95% ci of p-value of next experiment with n2 samples
# is supposed to be estimated by p2ci-function below.
p2ci <- function(p,n1=100,n2=100,sig.level=0.95){
lwr=pnorm(qnorm(p)*sqrt(n2/n1)-qnorm(1-(1-sig.level)/2)*sqrt(1+n2/n1))
# pnorm(qnorm(p1)-2.771808) # when n1=n2, 1.96 *s qrt(2) = 2.77
upr=pnorm(qnorm(p)*sqrt(n2/n1)+qnorm(1-(1-sig.level)/2)*sqrt(1+n2/n1))
# pnorm(qnorm(p1)+2.771808)
c(lwr,upr)
}
p2ci(0.05)
p2ci(0.001)
graphics.off()
pp=seq(0,1,length=1001)
plot(pp,sapply(pp,function(p)p2ci(p)[1]),type='l',bty='n',
xlab='initial p-value',ylab='C.I. of next p-value')
lines(pp,sapply(pp,function(p)p2ci(p)[2]))
###
(opt=optimise(function(p)p2ci(p)[1],c(0,1)))
p2ci(opt$minimum)
f <- function(n1=10,n2=10)c(t.test(rnorm(n1))$p.value,t.test(rnorm(n1))$p.value)
re=replicate(10^4,f())
points(re[1,],re[2,],col=rgb(0.01,0.01,0.01,0.01))
538:卵の名無しさん
18/05/22 15:20:40.05 pgTq+n72.net
等分散を仮定しないWelch法でのt検定からでも算出できるように
スクリプトを変更。
# URLリンク(www.physiology.org)
# Explorations in statistics: statistical facets of reproducibility Douglas Curran-Everett
p2p2w <- function(p.value,n1=10,n2=10,sd1=1,sd2=1,alpha=0.05){
p=p.value/2 # two.sided comparison
# Z-test
z=qnorm(p)
d.z=z*sqrt(sd1^2/n1+sd2^2/n2) # estimated difference of means by z.test
p2.z=pnorm(-qnorm(alpha/2)+z,lower=FALSE)
# Student
df=n1-1+n2-1
t=qt(p,df)
d.t=t*sqrt((1/n1+1/n2)*((n1-1)*sd1^2+(n2-1)*sd2^2)/((n1-1)+(n2-1)))
p2.t=pt(-qt(alpha/2,df)+t,df,lower=FALSE)
# Welch
var1=sd1^2 ; var2=sd2^2
dfw=(var1/n1+var2/n2)^2 / (var1^2/n1^2/(n1-1)+var2^2/n2^2/(n2-1))
t.w=qt(p,dfw)
d.w=t.w*sqrt(var1/n1+var2/n2)
p2.w=pt(-qt(alpha/2,dfw)+t.w,dfw,lower=FALSE)
data.frame(p2.w, p2.t, p2.z)
}
539:卵の名無しさん
18/05/24 21:03:43.35 R1FtDNpg.net
# simulation on the assumption of the event ~ poisson distribution
library(rstan)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
stanString='
data{
int<lower=0> r1;
int<lower=0> r2;
int<lower=0> n1;
int<lower=0> n2;
}
parameters{
real<lower=0> lambda1;
real<lower=0> lambda2;
}
model{
r1 ~ poisson(lambda1) T[0,n1];
r2 ~ poisson(lambda2) T[0,n2];
}
'
# execute only on first occa
540:tion # stanModel=stan_model(model_code = stanString) # saveRDS(stanModel,'2x2p.rds')
541:卵の名無しさん
18/05/24 21:05:13.78 R1FtDNpg.net
prop.poisson <- function(r1,r2,n1,n2, alpha=0.05){
data <- list(r1=r1,r2=r2,n1=n1,n2=n2)
stanModel=readRDS('2x2p.rds')
fit=sampling(stanModel,data=data,chain=1,iter=10000)
print(fit)
ms=rstan::extract(fit)
k=length(ms$lambda1)
p.sim=numeric(k)
for(i in 1:k){
p.sim[i]=prop.test(c(ms$lambda1[i],ms$lambda2[i]),c(n1,n2))$p.value
}
BEST::plotPost(p.sim)
mean(p.sim<alpha)
cat('Pr(p.value < ',alpha,') = ', mean(p.sim<alpha),'\n')
cat('Original p.value = ',prop.test(c(r1,r2),c(n1,n2))$p.value,'\n')
}
542:卵の名無しさん
18/05/25 22:02:04.22 8Ot6XASs.net
URLリンク(www.ncbi.nlm.nih.gov)
543:卵の名無しさん
18/05/30 10:36:17.99 5L7BTTj3.net
r=3
f<- function(x) (1-x)^(r-1)*x
curve (f(x))
auc=integrate (f,0,1)$value
pdf <- function (x) f(x)/auc
integrate(pdf, 0.5,1)$value
integrate (function (x)x*pdf(x),0,1)$value
2/(r+2)
z=3;N=9
f <- function (x) choose(N,z)*x^z*(1-x)^(N-z)
curve (f(x))
auc=integrate (f,0,1)$value
pdf <- function (x) f(x)/auc
integrate(pdf, 0.5,1)$value
integrate (function (x)x*pdf(x),0,1)$value
(z+1)/(N+2)
544:卵の名無しさん
18/05/30 20:54:56.95 w7KIivv+.net
f <- function(x) 1/sqrt(x*(1-x))
z <- function(x) integrate(f,0,x)$value
z(0.1)
z(0.5)
curve(f(x))
xx=seq(0,1,0.01)
plot(xx,sapply(xx,z))
545:卵の名無しさん
18/05/30 21:02:47.29 w7KIivv+.net
f <- function(x) 1/sqrt(x*(1-x))
z <- function(x) integrate(f,0,x)$value/pi
z(0.1)
z(0.5)
curve(f(x))
x=seq(.001,.999,by=.001)
plot(x,sapply(x,z))
abline(a=0,b=1)
546:卵の名無しさん
18/05/30 21:11:44.87 w7KIivv+.net
curve(pbeta(x,0.5,0.5))
547:卵の名無しさん
18/05/30 21:31:43.66 w7KIivv+.net
f <- function(x)dbeta(x,0.5,0.5)
zz <- function(x) integrate(f,0,x)$value
x=seq(.001,.999,by=.001)
plot(x,sapply(x,zz))
abline(a=0,b=1)
548:卵の名無しさん
18/06/01 11:46:08.93 UJHj2xQL.net
# marginal likelihoo of Fixed Effect Model
m.FE <- function(r1,n1,r2,n2,shape1=1,shape2=1){
choose(n1,r1)*choose(n2,r2)*beta(r1+r2+shape1,n1-r1+n2-r2+shape2)/beta(shape1,shape2)
}
# marginal likelihoo of Random Effect Model θ1~B(a1,ba),θ2~B(a2,b2)
m.REJ <- function(r1,n1,r2,n2,a1=1,b1=1,a2=1,b2=1){
2*choose(n1,r1)*choose(n2,r2)*integrate(
function(x){
dbeta(x,r1+a1,n1-r1+b1)*beta(r1+a1,n1-r1+b1)/beta(a1,b1)*
pbeta(x,r2+a2,n2-r2+b2,lower=FALSE)*beta(r2+a2,n2-r2+b2)/beta(a2,b2)
}
,0,1)$value
}
549:卵の名無しさん
18/06/02 14:59:28.21 iq0fCXui.net
Haldane <- function(x) 1/(x*(1-x))
curve(Haldane(x),0,1)
f <- function(x) integrate(Haldane,0.5,x)$value
x=seq(0.001,0.999,by=0.001)
plot(x,sapply(x,f),col=3)
curve(log(x/(1-x)),add=T)
550:卵の名無しさん
18/06/04 15:43:10.18 UVDFwwqx.net
エビデンス=周辺尤度= p(D |M1)=
∫
p(D | θ,M1)p(θ |M1)dθ
551:卵の名無しさん
18/06/04 15:43:29.56 UVDFwwqx.net
エビデンス=周辺尤度
= p(D |M1)=
∫
p(D | θ,M1)p(θ |M1)dθ
552:卵の名無しさん
18/06/04 18:46:43.68 UVDFwwqx.net
marginal likelihood of your model Mx is given by
p(D |Mx)= p(ξ1)p(D | ξ1)+p(ξ2)p(D | ξ2)+p(ξ3)p(D | ξ3)
=0.6×0.001+0.3×0.002+0.1×0.003
=0.0015.
The marginal likelihood is computed
553:卵の名無しさん
18/06/04 21:45:19.03 UVDFwwqx.net
stanStringJ='
data{
int<lower=0> N1;
int<lower=0> N2;
real Y1[N1];
real Y2[N2];
}
parameters{
real mu;
real<lower=0> sigma;
real alpha;
}
transformed parameters{
real mu1;
real mu2;
real delta;
real<lower=0> precision;
delta = alpha/sigma;
mu1 = mu + alpha/2;
mu2 = mu - alpha/2;
precision = 1/(sigma^2);
}
model{
Y1 ~ normal(mu1,sigma);
Y2 ~ normal(mu2,sigma);
mu ~ uniform(-9999,9999);
precision ~ gamma(0.5,0.5);
delta ~ cauchy(0,1);
}
'
554:卵の名無しさん
18/06/04 21:46:02.48 UVDFwwqx.net
N1=n1=137 ; m1=23.8 ; sd1=10.4 ; N2=n2=278 ; m2=25.2 ; sd2=12.3
U=scale(rnorm(n1))*sd1+m1 ; E=scale(rnorm(n2))*sd2+m2
Y1=as.vector(U) ; Y2=as.vector(E)
data=list(N1=N1,N2=N2,Y1=Y1,Y2=Y2)
# execute only on first occation
# JZS.model=stan_model(model_code = stanStringJ)
# JZS.model=stan_model('JZS.stan')
# saveRDS(JZS.model
555:,'JZS.rds') JZS.model=readRDS('JZS.rds') fitJ=sampling(JZS.model,data=data, iter=20000) print(fitJ,probs = c(.025,.5,.975)) ms=rstan::extract(fitJ) dES=density(ms$delta) plot(dES,lwd=2, xlab='Effect Size (δ)',main='The Savage?Dickey method',bty='l') curve(dcauchy(x),lty=3, add=TRUE) abline(v=0,col=8) (d1=dES$y[which.min(dES$x^2)]) # posterior density at ES=0 (d0=dcauchy(0)) # prior density at ES=0 points(0,d0,cex=2) points(0,d1,pch=19,cex=2) legend('topleft',bty='n',legend=c('prior','posterior'),lty=c(3,1),lwd=c(1,2)) (BF10=d1/d0) text(0,0,paste('BF10=',round(BF10,2))) library(BayesFactor) 1/exp(ttestBF(Y1,Y2,rscale = 1)@bayesFactor[['bf']])
556:卵の名無しさん
18/06/05 09:10:30.95 Ob/ggqKh.net
N=10000
x=rbeta(N,0.5,0.5)
y=rbeta(N,0.5,0.5)
z=x-y
hist(z,freq=F)
dz=density(z)
dz$y[which.min(dz$x^2)]
z=x/y
hist(log10(z),freq=F)
dz=density(z)
min=1
dz$y[which.min((dz$x-min)^2)]
557:卵の名無しさん
18/06/05 09:14:46.94 Ob/ggqKh.net
data{ //binomBF.stan
int r1;
int r2;
int n1;
int n2;
real shape1;
real shape2;
}
parameters{
real <lower=0, upper=1> theta1;
real <lower=0, upper=1> theta2;
real <lower=0, upper=1> th1;
real <lower=0, upper=1> th2;
}
transformed parameters{
real rd;
real rr;
real rd0;
real rr0;
rd = theta1 - theta2;
rd0 = th1 - th2;
rr = theta1/theta2;
rr0 = th1/th2
}
model{
r1 ~ binomial(n1,theta1);
r2 ~ binomial(n2,theta2);
theta1 ~ beta(shape1,shape2);
theta2 ~ beta(shape1,shape2);
th1 ~ beta(shape1,shape2);
th2 ~ beta(shape1,shape2);
}
558:卵の名無しさん
18/06/06 21:19:25.17 IpiYYmjt.net
ある大学の入学者男女の比率は1であるという帰無仮説を検定する課題が花子と太郎に課された。
花子は50人を調査できたら終了として入学者を50人をみつけて18人が女子であるという結果を得た。
帰無仮説のもとで
50人中18人が女子である確率は 0.01603475
これ以下になるのは50人中0~18人と32~50人が女子の場合なので
両側検定して
> sum(dbinom(c(0:18,32:50),50,0.5))
[1] 0.06490865
> binom.test(18,50,0.5)$p.value
[1] 0.06490865
で帰無仮説は棄却できないと結論した。
URLリンク(i.imgur.com)
559:卵の名無しさん
18/06/06 21:19:31.46 IpiYYmjt.net
一方、十八という数字が好きな太郎は一人ずつ調べて18人めの女子がみつかったところで調査を終えることにした。
18人めがみつかったのは花子と同じく50人めであった。
帰無仮説のもとで
18人がみつかるのが50人めである確率は0.005772512
これ以下になるのは23人以下50人以上番めで女子18人めがみつかった場合なので
両側検定して
pnb=dnbinom(0:999,18,0.5)
> 1 - sum(pnb[-which(pnb<=dnbinom(50-18,18,0.5))]) # < 0.05
[1] 0.02750309
URLリンク(i.imgur.com)
で帰無仮説は棄却される。
どちらの検定が正しいか、どちらも正しくないか?
検定する意図によってp値が変わるのは頻度主義統計の欠陥といえるか?
花子の横軸は女子数、太郎の横軸はサンプル数なので
サンプルでの女子の割合を横軸にして95%信頼区間を示す。
花子の検定での信頼区間は0.36~0.72で18/50を含む、p=0.06491
URLリンク(i.imgur.com)
太郎の検定での信頼区間は0.375~0.72で18/50を含まない、p= 0.0275
URLリンク(i.imgur.com)
主観である、検定の中止の基準の差でp値や信頼区間が変化するのは変だという批判である。
560:卵の名無しさん
18/06/07 07:36:58.84 6l5aJg03.net
50C18 * 0.5^18 * 0.5^32 �
561:ニ 49C17 * 0.5^17 * 0.5^32 * 0.5 の違いでしょう 18人目を見つけた人数を調べるというデザインがおかしいよね これ事前確率0.5で50人調査して女が18人っていうのを ベイズ更新していったらどうなる?
562:卵の名無しさん
18/06/07 07:47:39.40 Ni5wt/sw.net
>>524
酷いのになるとp<0.05になったらやめるとかいうのもあるな。
p-hackingと呼ばれる
563:卵の名無しさん
18/06/08 22:06:59.94 dTNUKiNw.net
r=21
N=20
a=0.5
b=0.5
p=a/(a+b+r+(1:N))
q=cumsum(p)
q
plot(1:N,p,ann=F)
plot(1:N,q,ann=F)
​
564:卵の名無しさん
18/06/12 00:02:23.51 XLL1LdWn.net
N=50
z=40
FP=0.01
shape1=1
shape2=1
data = list(N=N,z=z,FP=FP,shape1=shape1,shape2=shape2)
stanString=paste0('
data{
int N;
int z;
real FP;
real shape1;
real shape2;
}
parameters{
real<lower=0,upper=1> TP;
}
transformed parameters{
real<lower=0,upper=1> theta;
theta=((z*1.0)/(N*1.0)-FP)/(TP-FP);
}
model{
z ~ binomial(N,theta);
TP ~ beta(shape1,shape2);
}
')
565:卵の名無しさん
18/06/12 00:34:11.64 XLL1LdWn.net
N=50
z=40
FP=0.01
shape1=1
shape2=1
data = list(N=N,z=z,FP=FP,shape1=shape1,shape2=shape2)
stanString=paste0('
data{
int N;
int z;
real FP;
real shape1;
real shape2;
}
parameters{
real<lower=0,upper=1> TP;
}
transformed parameters{
real<lower=0,upper=1> theta;
theta=((z*1.0)/(N*1.0)-FP)/(TP-FP);
}
model{
z ~ binomial(N,theta);
TP ~ beta(shape1,shape2) T[0.5,];
}
')
model=stan_model(model_code = stanString)
fit=sampling(model,data=data,control=list(adapt_delta=0.99),iter=10000)
print(fit,digits=3,probs=c(.025,.50,.975))
566:卵の名無しさん
18/06/12 19:09:54.64 XLL1LdWn.net
# pdfからcdfの逆関数を作ってHDIを表示させて逆関数を返す
pdf2hdi <- function(pdf,xMIN=0,xMAX=1,cred=0.95,Print=TRUE){
nxx=1001
xx=seq(xMIN,xMAX,length=nxx)
xx=xx[-nxx]
xx=xx[-1]
xmin=xx[1]
xmax=xx[nxx-2]
AUC=integrate(pdf,xmin,xmax)$value
PDF=function(x)pdf(x)/AUC
cdf <- function(x) integrate(PDF,xmin,x)$value
ICDF <- function(x) uniroot(function(y) cdf(y)-x,c(xmin,xmax))$root
hdi=HDInterval::hdi(ICDF,credMass=cred)
print(c(hdi[1],hdi[2]),digits=5)
if(Print){
par(mfrow=c(3,1))
plot(xx,sapply(xx,PDF),main='pdf',type='h',xlab='x',ylab='Density',col='lightgreen')
legend('top',bty='n',legend=paste('HDI:',round(hdi,3)))
plot(xx,sapply(xx,cdf),main='cdf',type='h',xlab='x',ylab='Probability',col='lightblue')
pp=seq(0,1,length=nxx)
pp=pp[-nxx]
pp=pp[-1]
plot(pp,sapply(pp,ICDF),type='l',xlab='p',ylab='x',main='ICDF')
par(mfrow=c(1,1))
}
invisible(ICDF)
}
567:卵の名無しさん
18/06/12 19:12:46.61 XLL1LdWn.net
.n=50
.r=40
b=0.01
f = function(x,a,b,n,r){ # x:prevalence, a:TP, b:FP=0.01
p=x*a+(1-x)*b
choose(n,r)*p^r*(1-p)^(n-r)
}
f2 = function(x,n=.n,r=.r){
cubature::adaptIntegrate(function(ab)f(x,ab[1],b,n,r),
c(0,0),c(1,1))$integral
}
vf2=Vectorize(function(x)f2(x,n=.n,r=.r))
curve(vf2(x),ylab='',yaxs='i',axes=FALSE,lwd=2,xlab='prevalence') ; axis(1)
# points(0:10/10,vf2(0:10/10),pch=19)
optimise(function(x) vf2(x),c(0,1),maximum = TRUE)
auc=integrate(function(x)vf2(x),0,1)$value
pdf<-function(x)vf2(x)/auc
vpdf=Vectorize(pdf)
integrate(function(x)x*pdf(x),0,1)$value # mean
cdf=function(x)integrate(pdf,0,x)$value
vcdf=Vectorize(cdf)
# time consuming processes
# curve(vcdf(x),bty='l',n=64)
inv_cdf <- function(u){
uniroot(function(x,u0=u)vcdf(x)-u0,c(0,1),tol = 1e-18)$root
}
vinv_cdf=Vectorize(inv_cdf)
# curve(vinv_cdf(x),lwd=2,n=64)
hdi=HDInterval::hdi(vinv_cdf) ; hdi
# lower upper
# 0.7456652 1.0000000
568:卵の名無しさん
18/06/12 19:23:42.87 XLL1LdWn.net
# random numbers following PDF by John von Neuman's method
vonNeumann2 <- function(PDF,xmin
569:=0,xmax=1,N=10000,Print=TRUE,...){ xx=seq(xmin,xmax,length=N+1) xx=xx[-(N+1)] xx=xx[-1] ymax=max(PDF(xx)) Ux=runif(N,xmin,xmax) Uy=runif(N,0,ymax) Rand=Ux[which(Uy<=PDF(Ux))] if(Print){ hist(Rand,xlim=c(xmin,xmax),freq=FALSE,col=sample(colors(),1),main='',...) AUC=integrate(PDF,xmin,xmax)$value lines(xx,sapply(xx,function(x)PDF(x)/AUC)) } hdi=HDInterval::hdi(Rand) print(c(hdi[1],hdi[2]),digits=5) invisible(Rand) }
570:卵の名無しさん
18/06/12 20:53:05.85 Ex3k8fq/.net
>>531
ここでもうりゅう先輩が迷惑掛けてんのか?
ウリュウなあ
こいつはなあ、生まれついてのビッグマウスであちこちに自分を売り込むが、
卒業しても国試浪人で医師免許ない50過ぎでは相手にされない
国試対策塾で非常識講師で細々と食つなぐが学生に馬鹿にされる
自分の医師コンプを隠すために医学生たちを「底辺」などという
実は自分が凄まじい底辺なのだが気づいていない
こんな嘘つきデブがのさばっているスレだな
ご苦労なこったよ、うりゅうのおっさん
わからねえとでも思ってんだろどーせ
571:卵の名無しさん
18/06/13 08:53:38.73 rcG4xYvl.net
library(rjags)
N=50
z=40
FP=0.01
shape1=1
shape2=1
dataList=list(N=N,z=z,FP=FP,shape1=shape1,shape2=shape2)
modelstring <- paste0("
model{
theta=TP*x+FP*(1-x)
z ~ dbinom(theta,N)
TP ~ dbeta(shape1,shape2)
x ~ dbeta(shape1,shape2)
}"
)
writeLines( modelstring , con="TEMPmodel.txt" )
jagsModel = jags.model( file="TEMPmodel.txt" , data=dataList, quiet=TRUE)
update(jagsModel)
codaSamples = coda.samples( jagsModel ,
variable=c("TP","x","theta"), n.iter=100000 )
js=as.matrix(codaSamples)
head(js)
BEST::plotPost(js[,'TP'],xlab='sensitivity')
BEST::plotPost(js[,'x'],xlab='prevalence')
BEST::plotPost(js[,'theta'],xlab='positive result',showMode = TRUE)
572:卵の名無しさん
18/06/16 16:23:17.07 V7A3qxKV.net
経理課の須藤は着服をやめろ!
勤務実態もないのに、グループ病院内から管理手当て(10万円)をもらうな!!!
意図的な給与操作、どうにかしろ!
573:卵の名無しさん
18/06/17 05:51:35.25 ifh2AARM.net
seqN <- function(N=100,K=5){
a=numeric(N)
for(i in 1:K) a[i]=2^(i-1)
for(i in K:(N-1)){
a[i+1]=0
for(j in 0:(K-1)){
a[i+1]=a[i+1]+a[i-j] # recursion formula
}
}
P0=numeric(N)
for(i in 1:N) P0[i]=a[i]/2^i # P0(n)=a(n)/2^n
P0
MP=matrix(rep(NA,N*K),ncol=K)
colnames(MP)=paste0('P',0:(K-1))
MP[,1]=P0
head(MP);tail(MP)
MP[1,2]=1/2
for(i in (K-2):K) MP[1,i]=0
for(k in 2:K){
for(i in 1:(N-1)) MP[i+1,k]=1/2*MP[i,k-1]
} # Pk(n+1)=1/2*P(k-1)(n)
ret=1-apply(MP,1,sum)
ret[N]
}
seqN(100,5)
seqN(1000,10)
574:卵の名無しさん
18/06/17 07:58:05.56 ifh2AARM.net
## 表の出る確率がpであるとき、N回コインを投げて K回以上表が連続する確率
seqNp <- function(N=100,K=5,p=0.5){
q=1-p
a=numeric(N) # a(n)=P0(n)/p^n , P0(n)=a(n)*p^n
for(i in 1:K) a[i]=q/p^i # P0(i)=q
for(i in K:(N-1)){ # recursive formula
a[i+1]=0
for(j in 0:(K-1)){
a[i+1]=(a[i+1]+a[i-j])
}
a[i+1]=q/p*a[i+1]
575: } P0=numeric(N) for(i in 1:N) P0[i]=a[i]*p^i # P0(n)=a(n)*p^n P0 MP=matrix(rep(NA,N*K),ncol=K) colnames(MP)=paste0('P',0:(K-1)) MP[,'P0']=P0 head(MP);tail(MP) MP[1,'P1']=p for(i in (K-2):K) MP[1,i]=0 for(k in 2:K){ for(i in 1:(N-1)) MP[i+1,k]=p*MP[i,k-1] } # Pk(n+1)=p*P(k-1)(n) ret=1-apply(MP,1,sum) ret[N] }
576:卵の名無しさん
18/06/17 08:33:04.03 LIEnVEKd.net
p:表
q=1-p
# Pk(n) (k=0,1,2,3,4)を途中、5連続して表が出ていなくて
# 最後のk回は連続して表が出ている確率とする。
#
P0(1)=q
P1(1)=p
P2(1)=P3(1)=P4(1)=0
P(k+1)(n+1)=p*Pk(n)
P0(n+1)=q*{P0(n)+P1(n)+P2(n)+P3(n)+P4(n)}
=q*{P0(n)+p*P0(n-1)+p^2*P0(n-2)+p^3*P0(n-3)+p^4*P0(n-4)}
P0(n)=a(n)*p^n
# a(n+1)p^(n+1)=q*p^n{a(n)+a(n-1)+a(n-2)+a(n-3)+a(n-4)}
# a(n+1)=q/p1*(a(n)+a(n-1)+a(n-2)+a(n-3)+a(n-4))
a(n)=P0(n)/p^n
577:卵の名無しさん
18/06/17 09:43:49.13 ifh2AARM.net
>>532
統計くらいできるのが国立卒の普通の臨床医。
おい、ド底辺
統計処理からはおまえは
都外のド底辺シリツ医大卒と推測されたが、あってるか?
578:卵の名無しさん
18/06/17 09:45:16.20 ifh2AARM.net
## 表の出る確率がpであるとき、N回コインを投げて K回以上表が連続する確率に一般化してみた。
seqNp <- function(N=100,K=5,p=0.5){
q=1-p
a=numeric(N) # a(n)=P0(n)/p^n , P0(n)=a(n)*p^n
for(i in 1:K) a[i]=q/p^i # P0(i)=q
for(i in K:(N-1)){ # recursive formula
a[i+1]=0
for(j in 0:(K-1)){
a[i+1]=(a[i+1]+a[i-j])
}
a[i+1]=q/p*a[i+1]
}
P0=numeric(N)
for(i in 1:N) P0[i]=a[i]*p^i # P0(n)=a(n)*p^n
MP=matrix(rep(NA,N*K),ncol=K)
colnames(MP)=paste0('P',0:(K-1))
MP[,'P0']=P0
head(MP);tail(MP)
MP[1,'P1']=p
for(i in (K-2):K) MP[1,i]=0
for(k in 2:K){
for(i in 1:(N-1)) MP[i+1,k]=p*MP[i,k-1]
} # Pk(n+1)=p*P(k-1)(n)
ret=1-apply(MP,1,sum)
ret[N]
}
579:卵の名無しさん
18/06/17 22:30:10.45 ifh2AARM.net
# pdfからcdfの逆関数を作ってHDIを表示させて逆関数を返す
pdf2hdi <- function(pdf,xMIN=0,xMAX=1,cred=0.95,Print=TRUE){
nxx=1001
xx=seq(xMIN,xMAX,length=nxx)
xx=xx[-nxx]
xx=xx[-1]
xmin=xx[1]
xmax=xx[nxx-2]
AUC=integrate(pdf,xmin,xmax)$value
PDF=function(x)pdf(x)/AUC
cdf <- function(x) integrate(PDF,xmin,x)$value
ICDF <- function(x) uniroot(function(y) cdf(y)-x,c(xmin,xmax))$root
hdi=HDInterval::hdi(ICDF,credMass=cred)
print(c(hdi[1],hdi[2]),digits=5)
if(Print){
par(mfrow=c(3,1))
plot(xx,sapply(xx,PDF),main='pdf',type='h',xlab='x',ylab='Density',col='lightgreen')
legend('top',bty='n',legend=paste('HDI:',round(hdi,3)))
plot(xx,sapply(xx,cdf),main='cdf',type='h',xlab='x',ylab='Probability',col='lightblue')
pp=seq(0,1,length=nxx)
pp=pp[-nxx]
pp=pp[-1]
plot(pp,sapply(pp,ICDF),type='l',xlab='p',ylab='x',main='ICDF')
par(mfrow=c(1,1))
}
invisible(ICDF)
}
580:卵の名無しさん
18/06/18 20:41:20.64 G5tabFnb.net
library(rjags)
modelstring <- paste0("
model{
theta=TP*x+FP*(1-x)
z ~ dbinom(theta,N)
TP ~ dbeta(shape1,shape2)
x ~ dbeta(shape1,shape2)
}"
)
writeLines( modelstring , con="TEMPmodel.txt" )
N=100 ; FP=0.01 ; shape1=1 ; shape2=1
guess.TP <- function(z){
dataList=list(N=N,z=z,FP=FP,shape1=shape1,shape2=shape2)
jagsModel = jags.model( file="TEMPmodel.txt" , data=dataList, quiet=TRUE)
update(jagsModel)
codaSamples = coda.samples(jagsModel,variable=c("TP","x"), n.iter=10000)
js=as.matrix(codaSamples)
m.TP=mean(js[,'TP'])
ci.TP=HPDinterval(as.mcmc(js[,'TP']))
m.x=mean(js[,'x'])
ci.x=HPDinterval(as.mcmc(js[,'x']))
c(m.TP=m.TP,ci.TP=ci.TP,m.x=m.x,ci.x=ci.x)
}
zz=1:20*5
re=sapply(zz,guess.TP)
head(re[,1:4])
re=as.matrix(re)
plot(zz,re['m.TP',],bty='l',ylim=c(0,1),type='n',las=1,
xlab='n : positives out of 100',ylab='sensitivity')
segments(zz,re[2,],zz,re[3,],col=8,lwd=3)
points(zz,re['m.TP',],pch=16)
581:卵の名無しさん
18/06/18 22:45:56.48 G5tabFnb.net
guess.TP2 <- function(z,FP){
dataList=list(N=N,z=z,FP=FP,shape1=shape1,shape2=shape2)
jagsModel = jags.model( file="TEMPmodel.txt" , data=dataList, quiet=TRUE)
update(jagsModel)
codaSamples = coda.samples(jagsModel,variable=c("TP"), n.iter=10000,thin=5)
js=as.matrix(codaSamples)
mean(js[,'TP'])
}
vG=Vectorize(guess.TP2)
n=1:20*5
FP=seq(0,0.25,length=20)
TP=outer(n,FP,vG) # wait several minutes
contour(n,FP,TP, col='navy',
xlab='n : positives out of 100',ylab='FP : 1-specificity',bty='l',nlevels=64)
points(50,0.10,pch='+',col='red',cex=1.5)
582:卵の名無しさん
18/06/19 22:41:00.16 ant1u+bV.net
n=5 # prime number
nn=1:(n-1)
tasu <- function(x,y) (x+y)%%n
hiku <- function(x,y) (x-y)%%n # row - col
kake <- function(x,y) (x*y)%%n
g=function(x) nn[which(x==1)]
.M=outer(nn,nn,kake)
G=apply(.M,2,g)
gyaku <- function(x) nn[which(G==(x%%n))]
waru <- function(x,y) (x*gyaku(y))%%n # row / col
waru(3,2)
xx=yy=c(0,nn)
names(xx)=paste0('x',c(0,nn))
names(yy)=paste0('y',c(0,nn))
outer(xx,yy,tasu) # x + y
outer(xx,yy,hiku) # x - y
outer(xx,yy,kake) # x * y
X=Y=nn
outer(X,Y,waru) # WRONG!!
outer(X,Y,Vectorize(waru))
a=expand.grid(X,Y)
b=matrix(mapply(waru,a[,1],a[,2]),ncol=length(X))
rownames(b)=paste0('x',nn)
colnames(b)=paste0('y',nn)
b # x / y
583:卵の名無しさん
18/06/20 19:25:59.38 myhyWcyK.net
rule3 <- function(n,confidence.level=0.95){
p=1/n
q=1-p # q^n.sample > 1-confidence.level
n.sample = log(1-confidence.level)/log(q)
return(n.sample)
}
n.sample=log(0.05)/log(0.999)
584:卵の名無しさん
18/06/22 14:47:07.52 ETsHYxXe.net
shuffle <- function(Cards){
n=length(Cards)
n1=ceiling(n/2)
n2=n-n1
C1=Cards[1:n1]
C2=Cards[(n1+1):n]
ret=NULL
for(i in 1:n1){
ret=c(ret,C1[i],C2[i])
}
ret[is.na(ret)==F]
}
x=as.character(c('A',2:10,'J','Q','K'))
cat(x,'\n') ; cat(shuffle(x))
Shuffles <- function(x){
tmp=shuffle(x)
i=1
while(!identical(x,tmp)){
tmp=shuffle(tmp)
i=i+1
}
return(i)
}
f =function(x)Shuffles(1:x)
nn=1:53
y=sapply(nn,f)
plot(nn,y,pch=16,bty='l',xlab='cards',ylab='shuffles')
cbind(nn,y)
585:卵の名無しさん
18/06/22 20:07:08.88 ETsHYxXe.net
URLリンク(000013.blogspot.com)
inversion <- function(x){ # 転倒数
n=length(x)
ret=numeric(n)
for(i in 1:(n-1)){
ret[i] = sum(x[i] > x[(i+1):n])
}
sum(ret)
}
x=c(4, 3, 5, 2, 1)
inversion(x)
is.even= function(x) !inversion(x)%%2
is.even(x)
prisoner99 <- function(n=100){
indx=sample(1:n,1) # defective number
X=sample((1:n)[-indx]) ; is.even(X)
Y=numeric(n-1)
for (i in 1:(n-1)){
x1=X[-i]
x2=(1:n)[!(1:n) %in% x1] # 囚人iが見えない番号
tmp=X
tmp[i]=x2[1] ; tmp[n]=x2[2]
Y[i]=ifelse(is.even(tmp), x2[1],x2[2]) # 偶順列になるように選択
}
all(X==Y)
}
mean(replicate(1e3,prisoner99()))
586:卵の名無しさん
18/06/26 07:03:32.73 kT/81/Gu.net
# URLリンク(000013.blogspot.com)
inversion <- function(x){
n=length(x)
ret=numeric(n)
for(i in 1:(n-1)){
ret[i] = sum(x[i] > x[(i+1):n])
}
sum(ret) # inversion number
}
is.even= function(x) !inversion(x)%%2 # is inverion number even?
prisoner99 <- function(n=100){
indx=sample(1:n,1) # defective number
X=sample((1:n)[-indx])
Y=numeric(n-1)
for (i in 1:(n-1)){ # select as even permutation
x1=X[-i]
x2=(1:n)[!(1:n) %in% x1] # two numbers unseen for i-th prisoner
tmp=X
tmp[i]=x2[1] ; tmp[n]=x2[2]
Y[i]=ifelse(is.even(tmp), x2[1],x2[2])
}
all(X==Y)
}
mean(replicate(1e3,prisoner99()))
587:卵の名無しさん
18/06/26 10:42:11.82 kT/81/Gu.net
inversion <- function(x){ #inversion number
n=length(x)
ret=numeric(n)
for(i in 1:(n-1)){
ret[i] = sum(x[i] > x[(i+1):n])
}
sum(ret)
}
is.even <- function(x) !inversion(x)%%2 # even inversion?
even.perm <- function(n=100){
indx=sample(1:n,1) # defective number
X=sample((1:n)[-indx]) # row of 99 prisoner numbers
is.even(X)
}
mean(replicate(1e3,even.perm())) # probability of even permutation
588:卵の名無しさん
18/06/27 14:01:31.44 gge/PUDl.net
#ある大学の学生数は500以上1000人以下であることはわかっている。
#無作為に2人を抽出して調べたところ
#二人とも女子学生である確率は1/2であった。
#この大学の女子学生数と男子学生数は何人か?
girlsboys <- function(g,b) g*(g-1)/(g+b)/(g+b-1)==1/2
gr=expand.grid(1:1000,1:1000)
(re=gr[which(mapply(girlsboys,gr[,1],gr[,2])),])
girlsboys(re[nrow(re),1],re[nrow(re),2])
589:卵の名無しさん
18/06/27 14:03:56.49 gge/PUDl.net
# ある大学の学生数は500以上1000人以下であることはわかっている。
# 無作為に2人を抽出して調べたところ
# 二人とも女子学生である確率は1/2であった。
# この大学の女子学生数と男子学生数は何人か?
girlsboys <- function(g,b) g*(g-1)/(g+b)/(g+b-1)==1/2
gr=expand.grid(1:1000,1:1000)
(re=gr[which(mapply(girlsboys,gr[,1],gr[,2])),])
# 検証
Vectorize(girlsboys)(re[,1],re[,2])
590:卵の名無しさん
18/06/27 17:53:09.08 gge/PUDl.net
N=120
r=10
D=c(rep(1,r),rep(0,N-r))
hiseiki <- function(m){
found=0
for(i in 1:(N-r+m)){
found=found+sample(D,1)
if(found==m) break
}
return(i)
}
re=replicate(1e4,hiseiki(3))
mean(re)
sd(re)
BEST::plotPost(re,breaks=30)
591:卵の名無しさん
18/06/29 07:33:02.38 zpGshx3p.net
cereals <- function(n=5){
coupons=NULL
while(!all((1:n) %in% coupons)){
coupons=append(sample(1:n,1),coupons)
}
return(length(coupons))
}
re=replicate(100,mean(replicate(1e3,cereals(5))))
> mean(re)
[1] 11.43503
592:卵の名無しさん
18/06/29 08:13:57.68 zpGshx3p.net
p=4:1
cereals <- function(p){
n=length(p)
coupons=NULL
while(!all((1:n) %in% coupons)){
coupons=append(sample(1:n,1,p=p),coupons)
}
return(length(coupons))
}
mean(replicate(1e3,cereals(p)))
re=replicate(100,mean(replicate(1e3,cereals(p))))
mean(re)
593:卵の名無しさん
18/06/29 16:00:32.65 zpGshx3p.net
blood.type <- function(p,need){
n=length(p)
ABO=NULL
enough <- function(x){
pool=numeric(n)
for(i in 1:n) pool[i]=sum(ABO==i)
all(pool >= need)
}
while(!enough(ABO)){
ABO=append(sample(1:n,1,p=p),ABO)
}
return(length(ABO))
}
p=4:1
need=c(10,10,5,2)
re=replicate(1e4,blood.type(p,need))
BEST::plotPost(re)
594:卵の名無しさん
18/06/30 05:47:26.70 BDmYWstD.net
BT <- function (a,b,c,d) 1/a + 1/b + 1/c + 1/d - 1/(a+b) - 1/(a+c) - 1/(b+c) - 1/(a+d) - 1/(b+d) - 1/(c+d) + 1/(a+b
595:+c) + 1/(d+a+b) + 1/(c+d+a) + 1/(b+c+d) - 1/(a+b+c+d) a=4 b=3 c=2 d=1 s =a+b+c+d a=a/s b=b/s c=c/s d=d/s BT(a,b,c,d)
596:卵の名無しさん
18/06/30 13:48:31.26 j2CU1Lw0.net
even.tally <- function(a=3 , b=2){
idx=combn(1:(a+b),a)
n=ncol(idx)
mat=matrix(0,nrow=n,ncol=a+b)
for(i in 1:n) mat[i,idx[,i]]=1
tally <- function (x) any(cumsum(x)==cumsum(1-x))
mean(apply (mat,1,tally))
}
even.tally()
even.tally(5,10)
597:卵の名無しさん
18/06/30 15:09:55.32 j2CU1Lw0.net
a=750 ; b=250
v=c(rep(1,a),rep(0,b))
f <- function(v){
x=sample(v)
any(cumsum(x)==cumsum(1-x))
}
mean(replicate(1e5,f(v)))
598:卵の名無しさん
18/07/01 10:28:46.39 F3KIhVUK.net
date=1:366
p=c(97/400,rep(1,365))
same.birth <- function(n,lwr=2,upr=1e6){
x=sample(date,n,replace=TRUE,prob=p)
di=max(table(x)
lwr<=di & di<=upr
}
birth <- function(n,lwr=2,upr=1e6,k=1e4){
mean(replicate(k,same.birth(n,lwr,upr)))
}
#
birth(100, 3)
vrb=Vectorize(birth)
x=1:50
y=vrb(x)
plot(x,y,pch=19)
abline(h=0.5,lty=3,col=4)
min(x[whic(y > 0.5)])
599:卵の名無しさん
18/07/01 11:53:44.57 F3KIhVUK.net
date=1:366
p=c(97/400,rep(1,365))
same.birth <- function(n,lwr=2,upr=1e6){
x=sample(date,n,replace=TRUE,prob=p)
di=max(table(x))
lwr<=di & di<=upr
}
birth <- function(n,lwr=2,upr=1e6,k=1e4){
mean(replicate(k,same.birth(n,lwr,upr)))
}
#
birth(100, 3)
vrb=Vectorize(birth)
x=1:50
y=vrb(x)
plot(x,y,pch=19)
abline(h=0.5,lty=3,col=4)
min(x[whic(y > 0.5)])
600:卵の名無しさん
18/07/01 12:03:30.64 F3KIhVUK.net
date=1:366
p=c(97/400,rep(1,365))
same.birth <- function(n,lwr=2,upr=1e6){
x=sample(date,n,replace=TRUE,prob=p)
di=max(table(x)
lwr<=di & di<=upr
}
birth <- function(n,lwr=2,upr=1e6,k=1e4){
mean(replicate(k,same.birth(n,lwr,upr)))
}
#
birth(100, 3)
vrb=Vectorize(birth)
x=1:50
y=vrb(x)
plot(x,y,pch=19)
abline(h=0.5,lty=3,col=4)
min(x[whic(y > 0.5)])
601:卵の名無しさん
18/07/01 12:08:34.56 F3KIhVUK.net
インフルエンザの迅速キットは特異度は高いが感度は検査時期によって左右される。
ある診断キットが開発されたとする。
このキットは特異度は99%と良好であったが、
感度については確かな情報がない。
事前確率分布として一様分布を仮定する。
50人を無作為抽出してこの診断キットで診断したところ40人が陽性であった。
この母集団の有病率の期待値と95%信用区間はいくらか?
またこの診断キットの感度の期待値と95%信用区間はいくらか
暇つぶしにこれをMCMCを使わずに解く方法を考えていた。
偽陽性率FP=0.01として
陽性確率p=TP*x+(1-x)*FP
尤度が50C40*p^40*(1-p)^10
TPは一様分布なので積分消去して
確率密度関数に比例する関数を作ってarea under the curveで割って確率密度関数化したのち積分して累積密度関数をつくる。この累積密度関数の逆関数を作って95%区間が最短になる区間を計算すれば信頼区間が算出できる。
この結果がstanでのシミュレーションの結果と一致すればよし。
602:卵の名無しさん
18/07/01 22:00:46.73 lSk1MYzX.net
# # choose(n,r) == gamma(n+1) / (gamma(r+1) * gamma(n-r+1))
same.birthday <- function(n) 1-choose(365+97/400,n)*factorial(n)/(365+97/400)^n
plot(x,y,bty='l',xlab='subjects',ylab='probability')
curve(same.birthday(x),add = TRUE)
abline(h=0.5,col=8)
same.birthday(22:23)
603:卵の名無しさん
18/07/02 01:26:48.74 pp47QgIN.net
library(rjags)
N=50
z=40
FP=0.01
shape1=1
shape2=1
dataList=list(N=N,z=z,FP=FP,shape1=shape1,shape2=shape2)
modelstring <- paste0("
model{
theta=TP*x+FP*(1-x)
z ~ dbinom(theta,N)
TP ~ dbeta(shape1,shape2)
x ~ dbeta(shape1,shape2)
}"
)
writeLines( modelstring , con="TEMPmodel.txt" )
jagsModel = jags.model( file="TEMPmodel.txt" , data=dataList, quiet=TRUE)
update(jagsModel)
codaSamples = coda.samples( jagsModel ,
variable=c("TP","x","theta"), n.iter=100000 )
js=as.matrix(codaSamples)
head(js)
BEST::plotPost(js[,'TP'],xlab='sensitivity')
BEST::plotPost(js[,'x'],xlab='prevalence')
BEST::plotPost(js[,'theta'],xlab='positive result',showMode = TRUE)
604:卵の名無しさん
18/07/02 01:28:46.92 pp47QgIN.net
N=50
z=40
FP=0.01
shape1=1
shape2=1
data = list(N=N,z=z,FP=FP,shape1=shape1,shape2=shape2)
stanString=paste0('
data{
int N;
int z;
real FP;
real shape1;
real shape2;
}
parameters{
real<lower=0,upper=1> TP;
real<lower=0,upper=1> x; //prevalence
}
transformed parameters{
real<lower=0,upper=1> theta;
theta=TP*x+FP*(1-x);
}
model{
z ~ binomial(N,theta);
TP ~ beta(shape1,shape2); // T[0.5,];
x ~ beta(shape1,shape2);
}
')
605:卵の名無しさん
18/07/02 01:29:10.57 pp47QgIN.net
# model=stan_model(model_code = stanString)
# saveRDS(model,'quick_kit.rds')
model=readRDS('quick_kit.rds')
fit=sampling(model,data=data,iter=10000)
print(fit,digits=3,probs=c(.025,.50,.975))
stan_trace(fit)
# stan_diag(fit)
stan_ac(fit)
stan_dens(fit,separate_chains = TRUE)
stan_hist(fit,fill='skyblue',bins=15,pars=c('x','TP'))
ms=rstan::extract(fit)
BEST::plotPost(ms$TP,showMode = TRUE,xlab='TP')
BEST::plotPost(ms$x,showMode = FALSE,xlab='prevalence',col=sample(colours(),1))
606:卵の名無しさん
18/07/02 21:40:12.88 tMXA02ZR.net
escape.cliff <- function(p=2/3,k=1000){
pos=1
while(pos<k){
if(pos==0) return(FALSE)
pos=pos+sample(c(1,-1)
607:,1,prob=c(p,1-p)) } return(TRUE) } mean(replicate(1e2,escape.cliff()))
608:卵の名無しさん
18/07/02 22:58:14.27 tMXA02ZR.net
escape.cliff <- function(pos=1,p=2/3,k=1000){
while(pos<k){
if(pos==0) return(FALSE)
pos=pos+sample(c(1,-1),1,prob=c(p,1-p))
}
return(TRUE)
}
mean(replicate(1e3,escape.cliff(10,0.5,100)))
#
totter <- function(p=2/3,pos=1,pos0=NULL,k=1e3){
if(is.null(pos0)) pos0=pos
for(i in 1:k) {
if(pos==0) return(FALSE)
pos=pos+sample(c(1,-1),1,prob=c(p,1-p))
}
return(pos>pos0)
}
mean(replicate(1e3,totter(0.5,10,5 )))