% This R code does the PCAN approach based on two counts data
#% Model: contains Rstan model
#% 
#%

library("Heatplus")
library(CCA)
library(mvtnorm)
library(lattice)
library(rstan)
library(edgeR)
library(PMA)

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#%%%%% Model contains the Stan Model
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#set_cppo("fast")  # for best running speed
Model <-'data {
  
  int<lower=1> N; // sample size
  int<lower=1> n1;
  int<lower=1> n2;
  int<lower=1> d;
  int X[N,n1];
  int Y[N,n2];
  row_vector<lower=0>[N] S1;
  row_vector<lower=0>[N] S2;
  row_vector<lower=0>[n2] genl;
}

parameters {
  
  real<lower=0> bet[d];
  real<lower=0> sig_te;
  real<lower=0> sig_lb;
  matrix[N,d] Z;
  cholesky_factor_cov[n1,d] A; 
  cholesky_factor_cov[n2,d] B; 
  matrix[N,n1] teta;
  matrix[N,n2] lambd;
  row_vector[n1] mux;
  row_vector[n2] muy;
}

transformed parameters {
  
  matrix[N,n1] mu_te;
  matrix[N,n2] mu_lb;
  real<lower=0> sigte_sr;
  real<lower=0> siglb_sr;
  real<lower=0> beta_sqt[d];
  mu_te <- Z*A';
  mu_lb <- Z*B';
  siglb_sr <- sqrt(sig_lb);
  sigte_sr <- sqrt(sig_te);
  for(i in 1:d)
    beta_sqt[i] <- sqrt(bet[i]);
}

model{
  mux ~ normal(0, 10);
  muy ~ normal(0, 10);
  bet ~ inv_gamma(2, 1);
  sig_lb ~ scaled_inv_chi_square(10, .05);
  sig_te ~ scaled_inv_chi_square(10, .05);
  
  for(i in 1:n1)
    A[i] ~ normal(0, beta_sqt);
  
  for(i in 1:n2)
    B[i] ~ normal(0, beta_sqt);
  
  for(i in 1:N)
  {
    Z[i] ~normal(0, 1);
    teta[i] ~ normal(mu_te[i] + mux, sigte_sr);
    X[i] ~ poisson_log(teta[i] + log(S1[i]));
    lambd[i] ~ normal(mu_lb[i] + muy, siglb_sr);
    Y[i] ~ poisson_log(lambd[i] + log(S2[i]) +  log(genl));
  }
  
}
'
#******************************************************#
X =   ## Store the X data sets
Y =   ## Store the Y data sets

Y_fac <- calcNormFactors(Y)
#Xn <- scale(
X_fac <- calcNormFactors(X)
#stop()
#***************** what is above depected the new data generation approach ***************************#

#**********************************************************************************************
############################################################################
norm_vec <- function(x){ sqrt(sum(x^2))} ## computes the euclidean norm
norm_l1 <- function(x){ sum(abs(x))} ## computes the l1 norm

##########################################################################

Poste_summaries<-function(out,d,n1,n2) ### take in the MCMC output
{
  
  A=matrix(out[1:(d*n1)],nrow=n1,ncol=d,byrow=FALSE)
  
  ul=n1*d+(1:(n2*d)) 
  
  B=matrix(out[ul],nrow=n2,ncol=d,byrow=FALSE)
  
  #cx=length(ul)
  
  siglb = out[d*n1+n2*d+2]
  
  sigte = out[d*n1+n2*d+1]
  
  out_all_log =as.vector(Post_cor_logn(A,B,sigte,siglb))  
  
  return(out_all_log)    
  
} 

#################################################################################

Post_cor_logn <-function(A,B,sigte,siglb) ## A(n1*d), B(n2*d) this function returns the  correlation estimates
{
  
  n1=nrow(A)
  
  n2=nrow(B)
  
  zxl=rbind(tcrossprod(A) + diag(sigte,n1), tcrossprod(B, A))
  
  zxu=rbind(tcrossprod(A,B) , tcrossprod(B) + diag(siglb,n2)) 
  
  mat_r <- cbind(zxl,zxu)
  
  #out = cov2cor(mat_r) #[1:n1,(n1+1):(n1+n2)]
  
  #ouf=list("cov"=mat_r,"cor_all"=cov2cor(mat_r))
  
  ouf=cov2cor(mat_r)
  
  return(ouf)   
  
}

############################################################################
#stop()
n.samp=20000

d = d_val[idf]        # norm_mi_target_",n1,"/d_"

n1=nrow(X)

n2=nrow(Y)

N=ncol(X)


cat("\n Compute the normalizing factors \n")

X_fac <- calcNormFactors(X)
Y_fac <- calcNormFactors(Y) ## vector of length n2


cat("\n Make the data sets! \n")
bcca_dat<- list("X"=round(t(X)),"Y"=round(t(Y)),"n1"=n1,"n2"=n2,"N"=N,"d"=d,"S1"=X_fac,"S2"=Y_fac,"genl"=rep(1,n2)) ### correct_poi_scale4.stan 29 bet~ inv_gamma(2,1) -- shrinking

cat("\n Fit the model to the data \n")

rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
fit <- stan(model_name = "Model",data = bcca_dat, thin=1, iter = n.samp, chains=2, pars=c("A","B","sig_te","sig_lb","bet"))

cat("  Extract the samples ..... \n")
outval=as.matrix(fit) ## convert the draws in a matrix n * (numb parms)
dim(outval)
head(outval[,1:10])


cat("### Compute Posterior Summaries --log \n")
out_cor = NULL
out_cor = t(apply(X=outval,MARGIN=1,FUN=Poste_summaries,n1=n1,n2=n2,d=d)) #Poste_summaries


dim(out_cor)
sum_cor=NULL
cor_mn=colMeans(out_cor)
sum_cor=rbind(sum_cor,cor_mn)
cor_med=apply(out_cor,2,quantile,probs=.5,na.rm = T)
sum_cor=rbind(sum_cor, cor_med)
cor_up=apply(out_cor,2,quantile,probs=.975,na.rm = T)
sum_cor=rbind(sum_cor,cor_up)
cor_lw=apply(out_cor,2,quantile,probs=.025,na.rm = T)
sum_cor=rbind(sum_cor,cor_lw)