sampleMr _ function (i, ppp=F){
x _ (scan (paste(i,".all",sep=""),
list(mr="",comm="",gen="",log="",stMr=0,stImr=0,
proj="",imrBN="",found="",pri=0,origin="",type="",su="",
deltaFrom=0,deltaTo=0,nDelta=0,
lAdd=0,lDel=0,lSame=0,mods="",files="",exts="",bnc="",bnc2=""), 
sep=";"));
ss _ 1:length(x$mr)
if (ppp == T){
prob _ rep(1/length(x$mr), length(x$mr))
prob[x$bnc2=="c"] _ 1/max(1,sum(x$bnc2=="c"))
prob[x$bnc2=="i"] _ 1/max(1,sum(x$bnc2=="i"))
prob[x$bnc2=="n"] _ 1/max(1,sum(x$bnc2=="n"))
prob[x$bnc2=="b"] _ 1/max(1,sum(x$bnc2=="b"))
prob[x$imrBN=="n" & x$bnc2 == "b"] _ 1/max(1,sum(x$imrBN=="n" & x$bnc2 == "b"))
prob[x$imrBN=="b" & (x$bnc2 =="n"|x$bnc2 == "i")] _ 1/max (1,sum(x$imrBN=="b" & (x$bnc2 =="n"|x$bnc2 == "i")))
prob[x$bnc2 == "0"]  _ 1/max(1,sum(x$bnc2 == "0"))
mrs _ sample(ss, size=30, prob=prob)
}else{
cc  _ ss[x$bnc2=="c"];       ## where cleanup belongs 
ii _ ss[x$bnc2=="i"];      ## can inspection be recognized
typeNB _ ss[(x$imrBN=="n" & x$bnc2 == "b")] ;
typeBN _ ss[(x$imrBN=="b" & (x$bnc2 =="n"|x$bnc2 == "i"))];
bias_ss[x$bnc2 == "0"] ;     # bias in non classification
if (length(cc)>0 && length(cc) > 10) cc _ sample(cc, 10);
if (length(ii)>0 && length(ii) > 10) ii _ sample(ii, 10);
if (length(typeNB)>0 && length(typeNB) > 10) typeNB _ sample(typeNB,10);
if (length(typeBN)>0 && length(typeBN) > 10) typeBN _ sample(typeBN,10);
if (length(bias)>0 && length(bias) > 6) bias _ sample(bias,6);
mrs _ unique(c(cc, ii, typeNB, typeBN, bias));
if (length(mrs) > 30) mrs_sample(mrs, 30);
if (length(mrs) < 30) mrs_c(mrs, sample(ss[-mrs], 30-length(mrs)));
}
write(paste("^",sort(x$mr[mrs]), ";", sep=""), file=paste(i,".mr", sep=""), 1)
}
################
#latest
##### doris claa dkbrink ccc jsc kjs

for i in tom cuervo 
do gunzip < allmrs2.clean.bb.gz | grep  ';'$i';' > $i.b 
gunzip < allimr1.gz | grep ';'$i';' > $i.imr1
gunzip < decay/alldelta2.clean.gz | grep ';'$i';' > $i.delta1
done

for i in tom cuervo
do awk -F\; '{if($14 >788940000) print $0;}' $i.b | grep '^oa' | \
  sed 's/unknown/-1/g' | sed 's/;bb$/;b/' |  sed 's/;ii$/;i/'  |  sed 's/;cc$/;c/' \
 | sed 's/;nn$/;n/' | sed 's/-1/NA/g' > $i.all
done
#############
for i in tom cuervo 
do echo "sampleMr"'("'$i'",ppp=T)' | S
done
#table(x$imrBN[mrs], x$bnc2[mrs])

#####
for i in tom cuervo; do gunzip < ALLMR.gz | grep -f $i.mr > $i.MR;done
for i in tom cuervo 
do grep -f $i.mr $i.all > $i.exp
cut -d\; -f1,3,5,15,20,21 $i.exp | sort > tmp1
sort   $i.MR | cut -d\; -f4 > tmp2
cut -d\; -f3 tmp1 > tmp3.1
cut -d\; -f4 tmp1 > tmp3.2
echo 'source("exp.S")' | S
cut -d\; -f1,2,5- tmp1 | paste -d\; - tmp4.1 | \
paste -d\; - tmp4.2 | paste -d\; - tmp2 |\
awk -F\; '{ print $1 ";" $2 ";" $5";" $6 ";" $7 ";" $3 ";" $4;}'>$i.final
perl experiment.perl < $i.final > $i.html
done
##########################################
###########################################

##cut -d\; -f1 jsc.final
Now get answers enter in $i.a
and sort-lines
 
for i in ccc claa doris dkbrink jsc kjs tom; do cat $i.a | cut -d\; -f1 | awk '{print $1 ";";}' > tmp
sed  's/;e/;1/' $i.a | sed 's/;m/;2/' | sed 's/;h/;3/' > tmp2
#gunzip < allmrs2.clean.bb.gz | fgrep -f tmp > tmp1
gunzip < decay/dicer/mrdata.gz | fgrep -f tmp > tmp1
sort tmp1 | paste -d\; - tmp2 | sed 's/unknown/-1/g' \
 | sed 's/;bb;/;b;/' |  sed 's/;ii;/;i;/'  |  sed 's/;cc;/;c;/' \
 | sed 's/;nn;/;n;/' | sed 's/-1/NA/g' > $i.dat
done

##########################################
#try different classifications
##########################################
#prep data
gunzip < decay/dicer/mrdata.gz | grep '^oa' > /tmp/oamrdata

for i in ccc claa doris dkbrink jsc kjs tom
do cat $i.a | cut -d\; -f1 | awk '{print $1 ";";}'
done | sort > tmp
for i in ccc claa doris dkbrink jsc kjs tom
do sed  's/;e/;1/' $i.a | sed 's/;m/;2/' | sed 's/;h/;3/' | sed 's/;nn;/;n;/' 
done | sort > tmp2
cat /tmp/oamrdata | fgrep -f tmp | sort > experimentMrs
#now edit things to avoid two logins!

cut -d\; -f2,3 tmp2 | paste -d\; experimentMrs - | perl testClass.perl >test.dat
cut -d\; -f2,3 tmp2 | paste -d\; experimentMrs - | perl testClass1.perl >test1.dat
cut -d\; -f2,3 tmp2 | paste -d\; experimentMrs - | perl testClass2.perl low >test2.dat
cut -d\; -f11-12 test2.dat | perl -ane '@x=split(/\;/);print split(/\:/,$x[0]) .";".split(/\:/,$x[1])."\n";'>tmpm

cat /tmp/oamrdata |perl testClass2.perl low >/tmp/oamrdata3
cat /tmp/oamrdata |perl testClass2.perl >/tmp/oamrdata2
cat /tmp/oamrdata |perl testClass2.perl >/tmp/oamrdata1
cut -d\; -f3,6-9,15 /tmp/oamrdata1 > /tmp/tmp

cut -d\; -f2 /tmp/oamrdata1 | awk '{print NF;}' > /tmp/tmp1
cut -d\; -f15 /tmp/oamrdata1 > /tmp/tmp2
paste -d\; /tmp/tmp1 /tmp/tmp2 > /tmp/tmp
y_scan ("/tmp/tmp",list(nw=0,t=""),sep=";");
tapply(y$nw,x$t,mean);

x_scan ("/tmp/tmp",list(bnc="",nd=0,nAdd=0,nDel=0,nSam=0,t=""),sep=";");
z_tapply(x$nSam,x$t,sum);
sum(z[c("b","B")])/sum(z); sum(z[c("n","N")])/sum(z); sum(z[c("c","C")])/sum(z); 
z[c("i","0")]/sum(z);sum(z);
sum(z[c("b","n","c","i")])/sum(z);

y_table(x$t);sum(y[c("b","n","c","i")])/sum(y);
sum(y[c("b","B")])/sum(y); sum(y[c("n","N")])/sum(y); sum(y[c("c","C")])/sum(y); 
y["0"]/sum(y);sum(y);

S
nmf _ data.frame(scan ("tmpm", list(m=0,f=0),sep=";"))
all _ data.frame(scan ("test.dat",
list(mr="",comm="",bnc="", stImr=0, endImr=0,
nDelta=0,lAdd=0,lDel=0,lSame=0,login="", 
mods="",files="",imrBN="",imrFound="",
 devBN="",devEMH=0,t=""), sep=";",flush=T));

#preliminary survey
ind_as.character(all2$login) == "doris";table(all2$t[ind],all2$devBN[ind])[,c("0","b","n")];
ind_as.character(all2$login) == "claa";table(all2$t[ind],all2$devBN[ind])[,c("0","b","n")];
ind_as.character(all2$login) == "claa" | as.character(all2$login) == "doris";table(all2$t[ind],all2$devBN[ind])[,c("0","b","n")];
#next survey
table(all2$devBN[!ind],all2$t[!ind])[c("b","n","c","i","0","o"),c("b","n","c","i","0")]
table(all2$devBN,all2$t)[c("b","n","c","i","0","o"),c("b","n","c","i","0")]

#dealing with unclassified
tmp _ as.character(all2$t) 
tmp[tmp == "0"] _ "b"
all2$tt _ as.factor(tmp)

#proportion of correctly classified, out of classified, proportion classified
ff(all2$tt)
logLin (all2$tt, all2$devBN)

cl_data.frame(Count=as.vector(table(all2$tt,all2$devBN)[,c("b","c","i","n")]),
auto= c("b","c","i","n","b","c","i","n","b","c","i","n","b","c","i","n"),
devel=c("b","b","b","b","c","c","c","c","i","i","i","i","n","n","n","n"),
bb=c(T,F,F,F,F,F,F,F,F,F,F,F,F,F,F,F),
cc=c(F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,F),
ii=c(F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,F),
nn=c(F,F,F,F,F,F,F,F,F,F,F,F,F,F,F,T),
id=c(T,F,F,F,F,T,F,F,F,F,T,F,F,F,F,T)
)

mod_glm(Count~auto + devel+I(bb == TRUE | cc == TRUE) + I(auto=="i"&devel=="i")
+ I(auto=="n"&devel=="n"), family=poisson,data=cl)
mod_glm(Count~auto + devel+bb + cc  + ii + nn, family=poisson,data=cl)
mod0_glm(Count~auto + devel+id, family=poisson,data=cl)
mod00_glm(Count~auto + devel, family=poisson,data=cl)
mod1_glm(Count~auto + devel+I(ii == TRUE | cc == TRUE)+I(bb == TRUE | nn == TRUE), family=poisson,data=cl)
mod3_glm(Count~auto + devel+bb+nn+I(ii == TRUE | cc == TRUE),family=poisson,data=cl)
mod6_glm(Count~auto + devel+cc+I(ii==TRUE|bb==TRUE|nn==TRUE),
family=poisson,data=cl)
mod7_glm(Count~auto + devel+I(cc==TRUE|ii==TRUE|bb==TRUE|nn==TRUE),
family=poisson,data=cl)

mod2_glm(Count~auto + devel+ii+cc+I(bb==TRUE|nn==TRUE), family=poisson,data=cl)
mod4_glm(Count~auto + devel+ii+I(cc==TRUE|bb==TRUE)+nn, family=poisson,data=cl)
mod5_glm(Count~auto + devel+ii+I(cc==TRUE|bb==TRUE|nn==TRUE), family=poisson,data=cl)

mod51_glm(Count~auto + devel+I(ii==TRUE|cc==TRUE|bb==TRUE|nn==TRUE)+ii, family=poisson,data=cl)
summary(mod51)



#no
summary(mod0)
summary(mod1)
summary(mod3)
summary(mod6)
summary(mod7)

#yes
summary(mod2)
summary(mod4)
summary(mod5)

summary(mod)
cl$Count-exp(predict(mod))

mod_glm(Count~id + auto + devel, family=poisson,data=cl)

ff(all2$t);ff(all1$t);ff(all$t);ff(all$bnc);
ff _ function(var){
c(sum(as.character(all2$devBN) == as.character(var))/length(var),
sum(as.character(all2$devBN) == as.character(var) & as.character(var) != "0")/
sum(as.character(var) != "0"),
sum(as.character(var) != "0")/length(var));
}

#FM and Jacard statistics
fJ(all2$tt);fJ(all2$t);fJ(all1$t);fJ(all$t);fJ(all$bnc);
fJ _ function(var){
a_table (var, all$devBN);
a1_as.vector(table (var));
a2_as.vector(table (all$devBN));
a1_ a1[a1 > 2];
a2_ a2[a2 > 2];
b_as.vector(a);
b _ b[b > 2];
x0_sum(b*(b-1)/2);
x1_sum(a1*(a1-1)/2);
x2_sum(a2*(a2-1)/2);
c(x0/(sqrt((x0+x1)*(x0+x2))), x0/(x0+x1+x2));
}

##########################################
#Profile

#Effort
ind_as.character(all2$devBN) != "o" & as.character(all2$devBN) != "0"
table(as.character(all2$devBN[ind]), all2$devEMH[ind])

logLin _ function (a, b){
table(a,b) - loglin(table(a,b), margin=c(1,0,2));
}
logLin(all2$devBN, all2$devEMH)#effect for the bug MRs
logLin(all2$tt, all2$devEMH)
logLin(all2$imrF[all2$devBN=="b"], all2$devEMH[all2$devBN=="b"])


anova(glm(devEMH ~ nDelta+devBN+login,  data=all2))
bf _ all2$devBN == "b";bf[bf==F]_0;bf[bf==T]_1;
coplot(devEMH~  nDelta| bf , data=all2)
anova(lm(devEMH ~ nDelta + bf + login,  data=all2))


anova(lm(devEMH ~ nDelta + devBN + login,  data=all2))
#all significant
anova(lm(devEMH ~ nDelta+tt+login,  data=all2))

#some effect on imrFound
tmp_as.character(all2$imrFound)
tmp[tmp == "-1" | tmp == "av" | tmp == "dw" | tmp == "lb" | tmp
=="rf" |tmp =="rt"|tmp =="rv"|tmp =="sv"|tmp =="ca"|tmp =="ot"] _ "na"
all2$imrF_as.factor(tmp) 
anova(lm(devEMH ~ nDelta+tt+login+imrF,  data=all2))

all2$stImr/3600/24/365.25 + 70
int_(all2$endImr-all2$stImr)/3600/24

summary(lm(devEMH ~ nDelta+int+devBN+login+imrF,  data=all2))$r.square
summary(lm(devEMH ~ nDelta+int+devBN+login+imrF,  data=all2))$coefficients
step(lm(devEMH ~ nDelta+int+devBN+login+imrF+imrBN,  data=all2))

summary(lm(devEMH ~ nDelta+int+I(devBN=="b")+login+imrF,  data=all2))$coefficients
summary(lm(devEMH ~ nDelta+int+I(devBN=="b")+I(devBN=="n")+login+imrF,  data=all2))$coefficients
step(lm(devEMH ~nDelta+int+I(devBN=="b")+I(devBN=="n")+I(devBN=="i")+I(devBN=="c")+login+imrF,data=all2))
summary(lm(devEMH ~nDelta+int+I(devBN=="b")+I(devBN=="c")+login+imrF,data=all2))

for (i in 1:30/10){
v_summary(lm(devEMH~nDelta+I(exp(int))+I(devBN=="b")+I(devBN=="c")+login,data=all2))$r.square
print (paste (i, v));
}
summary(lm(devEMH~nDelta+I(log(int+1))+I(devBN=="b")I(devBN=="n")+I(devBN=="i")++I(devBN=="c")+login,data=all2,subset=(mr != "oa581685bI")
))$r.square

#Full
mod_lm(devEMH~nDelta+I(log(int+1))+I(devBN=="b")+I(devBN=="n")+I(devBN=="c")+I(devBN=="i")+login,data=all2,subset=(mr != "oa581685bI"))
summary(mod)
step(mod)
mod1_lm(devEMH~nDelta+I(log(int+1))+I(devBN=="b")+I(devBN=="c")+login,data=all2)
summary(,subset=(mr != "oa581685bI")
))$r.square



##############################
#Size
oa _ scan("/n/notmafia/tmp/oamrdata2", 
list(mr="",comm="",bnc="", stImr=0, endImr=0,
nDelta=0,lAdd=0,lDel=0,lSame=0,login="", 
mods="",files="",imrBN="",imrFound="",t=""), sep=";");

pquants _ function (val, labx, title){ 
 val[val < 0] _ 0
 xx _ seq(1,max(val),max(val)/10);yy _ xx-1; yy [yy>99] _ 99;
 plot (xx,yy,xaxp=c(1,max(val),5),xaxs="e", pch=" ", log="x",
 ylab="Probability", xlab=labx, main=title)
 for (i in 1:dim(val)[2]){lines(val[,i],1:100-1, lty=i) }
  
 #lines(val[,i],1:100-1, lty=ceiling(i/2)) 
}

quants _ function (conds, oa){
 ans _ F
 for (i in conds){
  sel _ eval(parse(text=i))
  tmp _ rbind(
  quantile (oa$nDelta[sel], 0:100/100),
  quantile (oa$endImr[sel]-oa$stImr[sel], 0:100/100),
  quantile (oa$lAdd[sel], 0:100/100),
  quantile (oa$lDel[sel], 0:100/100),
  quantile ((oa$lAdd + oa$lDel)[sel], 0:100/100),
  quantile (oa$lSame[sel], 0:100/100)
  )
 if (!is.list(ans)){
	ans$nDelta _ tmp[1,]
	ans$interval _ tmp[2,]
	ans$added _ tmp[3,]
	ans$deleted _ tmp[4,]
	ans$changed _ tmp[5,]
	ans$same _ tmp[6,]
 }else{
	ans$nDelta _ rbind (ans$nDelta, tmp[1,])
	ans$interval _ rbind (ans$interval, tmp[2,])
	ans$added _ rbind (ans$added, tmp[3,])
	ans$deleted _ rbind (ans$deleted, tmp[4,])
	ans$changed _ rbind (ans$changed, tmp[5,])
	ans$same _ rbind (ans$same, tmp[6,])
 }
}
ans
}

res_quants(c("oa$t == \"b\" | oa$t == \"B\" | oa$t == \"0\"",
"oa$t == \"n\" | oa$t == \"N\"",
"oa$t == \"c\" | oa$t == \"C\"",
"oa$t == \"i\""), oa)
postscript("bncprof.ps")
par(mfrow=c(3,1))
labx _ "Days"
title_ "Change Interval"
val _ t(1+res$interval/3600/24)[1:100,];"b,n great"
pquants (val[,],labx, title);
legend(mean(val), 50,legend=c("Corrective","Adaptive",  "Perfective","Inspection"), lty=c(1,2,3,4)) 

labx _ "number of lines"
title_ "Lines Added"
val _ (t(res$added)[1:100,]); "b,i great, b,n OK"
pquants (val[,],labx, title);
legend(mean(val), 50,legend=c("Corrective","Adaptive",  "Perfective","Inspection"), lty=c(1,2,3,4)) 

labx _ "number of lines"
title_ "Lines Deleted"
val _ t(res$deleted)[1:100,]; "b, i" 
pquants (val[,],labx, title);
legend(mean(val), 50,legend=c("Corrective","Adaptive",  "Perfective","Inspection"), lty=c(1,2,3,4)) 

#investigate cleanup
perl testCleanup.perl < /tmp/oamrdata2 > testcl
oacl _ data.frame(scan("testcl", 
list(mr="",comm="",bnc="", stImr=0, endImr=0,
nDelta=0,lAdd=0,lDel=0,lSame=0,login="", 
mods="",files="",imrBN="",imrFound="",t="",cl=""), sep=";"));
oa _ oacl
postscript("tmp.ps")
pquants(c(
"oacl$cl == \"clean up\" | oa$cl == \"cleanup\"",
"oacl$cl == \"delete\"",
"oacl$cl == \"lint\"",
"oacl$cl == \"unneeded\"",
"oacl$cl == \"carry forward\""
),oacl) 


########################################################
#validating on fc
gunzip < decay/dicer/mrdata.gz | grep '^fc' > /tmp/fcmrdata
cat /tmp/fcmrdata |perl testClass2.perl >/tmp/fcmrdata2
fc _ scan("../fcmrdata2", 
list(mr="",comm="",bnc="", stImr=0, endImr=0,
nDelta=0,lAdd=0,lDel=0,lSame=0,login="", 
mods="",files="",imrBN="",imrFound="",t=""), sep=";");

try_list();
try$nDelta_c(oa$nDelta,fc$nDelta);
try$endImr_c(oa$endImr,fc$endImr);
try$stImr_c(oa$stImr,fc$stImr);
try$lAdd_c(oa$lAdd,fc$lAdd);
try$lDel_c(oa$lDel,fc$lDel);
try$lSame_c(oa$lSame,fc$lSame);
try$t_c(oa$t,fc$t);
ioa _ rep(F, length(try$nDelta));ioa[1:length(oa$nDelta)]_T;
pquants(c("ioa == T","ioa == F"),try)#similar!!!!!
bb _ "ioa == T & (try$t == \"b\" | try$t == \"B\" |  try$t == \"0\")";
bb1_ "ioa == F & (try$t == \"b\" | try$t == \"B\" |  try$t == \"0\")";
nn _ "ioa == T & (try$t == \"n\" | try$t == \"N\")";
nn1_ "ioa == F & (try$t == \"n\" | try$t == \"N\")";
cc _ "ioa == T & (try$t == \"c\" | try$t == \"C\")";
cc1_ "ioa == F & (try$t == \"c\" | try$t == \"C\")";
ii _ "ioa == T & try$t == \"i\"";
ii1 _"ioa == F & try$t == \"i\"";
res_quants(c(bb,bb1,nn,nn1,cc,cc1,ii,ii1), try)

postscript("oafc.ps")
par (mfrow=c(3,1))
labx _ "Days"
title_ "Change Interval"
val _ t(1+res$interval/3600/24)[1:100,];"b,n great"
pquants (val[,c(1,2,3,4)],labx, title);
legend(mean(val), 50,
legend=c("Product 1, Corrective","Product 2, Corrective", 
          "Product 1,Adaptive","Product 2, Adaptive"), 
lty=c(1,2,3,4)) 
labx _ "number of lines"
title_ "Lines Added"
val _ (t(res$added)[1:100,]); "b,i great, b,n OK"
pquants (val[,1:4],labx, title);
legend(mean(val), 50,
legend=c("Product 1, Corrective","Product 2, Corrective", 
          "Product 1, Adaptive","Product 2, Adaptive"), 
lty=c(1,2,3,4)) 
labx _ "number of lines"
title_ "Lines Deleted"
val _ t(res$deleted)[1:100,]; "b, i" 
pquants (val[,c(1,2,7,8)],labx, title);
legend(mean(val), 50,
legend=c("Product 1, Corrective","Product 2, Corrective", 
          "Product 1, Inspection","Product 2, Inspection"), 
lty=c(1,2,3,4)) 


#ndelta   
#val _ log(2+t(res$nDelta)[1:100,]);"b,i great, b,n OK"




###########################
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Priority number increases (priority decreases) with the IMR interval)
a_lm(devEMH~devBN+bnc+log(nDelta)+log(stImr-endImr+1e2)+log(deltaTo-deltaFrom+1e2),na.action= na.omit, data=all)
summary(a)
anova(a, test="F")
!!!!!!!!!!!!!!!!!!!!!!!!!!
Hardness increases with number of delta

##########################################
for i in (danf pyrce kgr ogd); do
gunzip < allmrs.clean.gz | grep ';danf;' > danf &
done

all="danf pyrce kgr ogd"
for i in $(echo $all); do cut -d\; -f1,2,3,12,13,14,15,16 $i > $i.prn; done
for i in $(echo $all); do cut -d\; -f8,9 $i | awk -F\; '{print (($1/3600/24/365) + 1970), (($2/3600/24/365) +1970);}'> $i.y; done

for i in $(echo $all); do paste $i.y $i.prn |perl -ane 'chop($_);print "$_\n( )\n";'> $i.rep; done 



nenscript -r -p tmp kgr.prn


Please mark each mr as being done with purpose to:
add new features (n)
correct existing problems (b)
improve structure of the code (add, fix comments, remove trash, etc) (c)
code inspection rework (i)
other (o) please explain the type of that activity.

The Mr records show year of the first and last change, 
mr number, abstract, release,
lines added and lines deleted, 
list of files, modules, and file extensions.

 Thank you, Audris






