#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include "programming.h"
#include "constantunits.h"

#include "errors.h"
#include "outputs.h"
#include "interface.h"
#include "coordinates.h"
//#include "cartesian.h"
#include "calculate.h"

int calculatemmatrix(int m,double *w,double *mm,double *mi)	// m=nc u=f->m mm=h->m mi=h->mi
{
	int i,j;
	for(j=0;j<m;j++)
		for(i=0;i<=j;i++)
			mi[uindex(i,j)]=(mm[uindex(i,j)]=0);
	for(j=0;j<m;j++)
		mi[uindex(j,j)]=1/(mm[uindex(j,j)]=w[j]);
	return(m);
}


// Clear the bt array and calculate the transpose of the B matrix array anew 

int calculatebtmatrix(int nr,int nc,ZMT *z,double *r,double *x,double *bt)
{
	int i;
	ZMT *zi;
	fDCLEAR(nc*nr,bt,1);	
	for(i=0;i<nr;i++)
	{
		zi=z+i;
		zi->r=zi->f*(r[i]=calculatebtcolumn(zi->type,zi->id,x,bt+i*nc));
	}
	return(nr);
}


// calculate the transpose of an m (rows) X n (columns) matrix

int calculatetranspose(int m,int n,double *a,double *t) // m=nc n=nr a=h->bt t=h->b
{
	int i,j;
	for(j=0;j<m;j++)
		for(i=0;i<n;i++)
			t[rindex(i,j,n,m)]=a[rindex(j,i,m,n)];
	return(m*n);
}


// this function calculates matrix product M^-1 B^T = (B M^-1^T)^T = (B M^-1)^T
// M is a diagonal matrix whose elements are the atomic masses.

int calculatemibtmatrix(int m,int n,double *bt,double *w,double *mibt)	// m=nc n=nr w=f->m
{
	int i,j;
	for(j=0;j<n;j++)
		for(i=0;i<m;i++)
			mibt[rindex(i,j,m,n)]=bt[rindex(i,j,m,n)]/w[i];
			//	fDCOPY(n*m,bt,1,mibt,1);	// another definition of B^-1
	return(m);
}


// this function calculates the transpose of the G-Matrix
// G^T = G = (B^T)^T (M^-1 B^T)

int calculategtmatrix(int m,int n,double *bt,double *mibt,double *g) // m=nc n=nr 
{
	fDGEMM('T','N',n,n,m,1.0,bt,m,mibt,m,0.0,g,n);
	return(0);
}


// this function calculates the G-Matrix
// G = B (M^-1 B^T)

int calculategmatrix(int m,int n,double *b,double *mibt,double *g) // m=nc n=nr 
{
	fDGEMM('N','N',n,n,m,1.0,b,n,mibt,m,0.0,g,n);
	return(0);
}

int calculategimatrix(int n,double *g,double *gi) // n=nr
{
	int j,nz=0,INFO=0;
	double *reciprocal=NULL,*eigenvalue=NULL,*eigenvector=NULL,*WORK=NULL;
	double TOLERANCE=1E-10*sqrt((double)(n));
	if(NULL==(WORK=(double *)calloc(3*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(reciprocal=(double *)calloc(n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(eigenvalue=(double *)calloc(n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(eigenvector=(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");

	calculatetriangular(n,g,gi);
	fDSPEV('V','U',n,gi,eigenvalue,eigenvector,n,WORK,&INFO);
	for(j=0;j<n;j++)	// retrieve symmetry
	{
		if(eigenvalue[j]<TOLERANCE) { nz++; reciprocal[j]=0; }	// SMALL = 1E-6
		else
			reciprocal[j]=1/eigenvalue[j];
	}
	evsimilartransform(n,reciprocal,eigenvector,gi);

	free(eigenvector);
	free(eigenvalue);
	free(reciprocal);
	free(WORK);
	return(nz);
}


// diagonalize the G-matrix and calculate its inverse, its square root and its inverse square root
// the function return the number of eigenvalues of G with are below the tolerance threshold 

int calculategsmatrix(int n,double *g,double *gi,double *gs,double *gq) // n=nr
{
	int j,nz=0,INFO=0;
	double *reciprocal=NULL,*eigenvalue=NULL,*eigenvector=NULL,*WORK=NULL;
	double TOLERANCE=1E-10*sqrt((double)(n));
	if(NULL==(WORK=(double *)calloc(3*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(reciprocal=(double *)calloc(n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(eigenvalue=(double *)calloc(n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(eigenvector=(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");

	calculatetriangular(n,g,gi);					// gi is temporary storage
	fDSPEV('V','U',n,gi,eigenvalue,eigenvector,n,WORK,&INFO);	// compute the eigenvector matrix and the eigenvalues, convert gi into tridiagonal form
	for(j=0;j<n;j++)
		if(eigenvalue[j]<TOLERANCE) nz++;			// count the number of eigenvalue below the tolerance level	
		// fprintf(stderr,"%5d: %12.8lf\n",j+1,eigenvalue[j]);

	for(j=0;j<n;j++)	// retrieve symmetry
		if(eigenvalue[j]<TOLERANCE) reciprocal[j]=0; 
		else reciprocal[j]=1/eigenvalue[j];
	evsimilartransform(n,reciprocal,eigenvector,gi);		// gi = (eigenvector)(1/eigenvalue)(eigenvector)^T  
									// G is real and symmetric, so the matrix of eigenvectors is orthogonal
	for(j=0;j<n;j++)
		if(eigenvalue[j]<TOLERANCE) reciprocal[j]=0; 
		else reciprocal[j]=sqrt(eigenvalue[j]);
	evsimilartransform(n,reciprocal,eigenvector,gs);

	for(j=0;j<n;j++)
		if(eigenvalue[j]<TOLERANCE) reciprocal[j]=0; 
		else reciprocal[j]=1/sqrt(eigenvalue[j]);
	evsimilartransform(n,reciprocal,eigenvector,gq);

	free(eigenvector);
	free(eigenvalue);
	free(reciprocal);
	free(WORK);
	return(nz);
}


// calculate the tridiagonal form, gi, of a symmetric matrix, g. 

int calculategeigenvalue(int n,double *g,double *gi) // n=nr
{
	int j,INFO=0;
	double *eigenvalue=NULL,*eigenvector=NULL,*WORK=NULL;
	double TOLERANCE=1E-10*sqrt((double)(n));
	if(NULL==(WORK=(double *)calloc(3*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(eigenvalue=(double *)calloc(n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(eigenvector=(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");

	calculatetriangular(n,g,gi);					// gi is temporary strorage
	fDSPEV('V','U',n,gi,eigenvalue,eigenvector,n,WORK,&INFO);	// convert gi onto tridiagonal form
//	fprintf(stderr,"printing eigenvalues of G matrix\n");
//	for(j=0;j<n;j++)
//		fprintf(stderr,"%5d: %12.8lf\n",j+1,eigenvalue[j]);

	free(eigenvector);
	free(eigenvalue);
	free(WORK);
	return(0);
}


// this function calculates the inverse of the B-Matrix, B^-1 = (M^-1 B^T) G^-1

int calculatebimatrix(int m,int n,double *mibt,double *gi,double *bi)	// m=nc n=nr
{
	fDGEMM('N','N',m,n,n,1.0,mibt,m,gi,n,0.0,bi,m); 
	return(0);
}


// calculate the upper triangular half of a symmetric n X n matrix, r, and save it in t

int calculatetriangular(int n,double *r,double *t)	// n=nr r=h->g t=h->gi
{
	int i,j;
	for(j=0;j<n;j++)
		for(i=0;i<=j;i++)
			t[uindex(i,j)]=0.5*(r[rindex(i,j,n,n)]+r[rindex(j,i,n,n)]);
	return(n);
}


// calculate the matrix product T = U V U^T , (V is a diagonal matrix) 

int evsimilartransform(int n,double *v,double *u,double *t)
{
	int i,j,k;
	double e;
	for(i=0;i<n;i++)
	{
		for(j=0;j<n;j++)
		{
			e=0;
			for(k=0;k<n;k++)
				e+=u[rindex(i,k,n,n)]*v[k]*u[rindex(j,k,n,n)];
			t[rindex(i,j,n,n)]=e;
		}
	}
	return(0);
}


// check if B B^-1 = I is true for a non-square matrix B

int checkvalidinverse(int m,int n,double *b,double *a,char *s) // m=nc n=nr b=b a=bi
{
	double *u;
	if(NULL==(u=(double *)calloc(n*n,sizeof(double)))) ued3error("memory");
	fDGEMM('N','N',n,n,m,1.0,b,n,a,m,0.0,u,n); 
	fprintf(stderr,"%s %s^-1 = I\n",s,s);
	printrectangularmatrix(n,n,u);
	free(u);
	return(0);
}

int calculategmatrix_direct(int m,int n,double *mf,double *b,double *g)
{
	int i,j,k;
	double e;
	for(i=0;i<n;i++)
	{
		for(j=0;j<n;j++)
		{
			e=0;
			for(k=0;k<m;k++)
				e+=b[rindex(k,i,m,n)]/mf[k]*b[rindex(k,j,m,n)];
			g[rindex(j,i,n,m)]=e;
		}
	}
	return(n);
}


// this function calculates the inverted hessian matrix

int calculatehimatrix(int n,double *h,double *hi) // same as calculategimatrix
{
	int j,nz=0,INFO=0;
	double *reciprocal=NULL,*eigenvalue=NULL,*eigenvector=NULL,*WORK=NULL;
	double TOLERANCE=1E-10*sqrt((double)(n));
	if(NULL==(WORK=(double *)calloc(3*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(reciprocal=(double *)calloc(n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(eigenvalue=(double *)calloc(n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(eigenvector=(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");

	calculatetriangular(n,h,hi);					// hi is temporary storage		
	fDSPEV('V','U',n,hi,eigenvalue,eigenvector,n,WORK,&INFO);
	for(j=0;j<n;j++)	// retrieve symmetry
	{
		if(eigenvalue[j]<TOLERANCE) { nz++; reciprocal[j]=0; }	// reject small eigenvalues for the inverse operation
		else
			reciprocal[j]=1/eigenvalue[j];
	}
	evsimilartransform(n,reciprocal,eigenvector,hi);		// hi = (eigenvector)(1/eigenvalue)(eigenvector)^T

	free(eigenvector);
	free(eigenvalue);
	free(reciprocal);
	free(WORK);
	return(nz);							// return the number of eigenvalues below the threshold
}


// Assign the linear correction terms to the structure internal

int calculatelinearcorrection(int m,int n,double *x0,ZMT *z,double *lc)
{
	int i;
	for(i=0;i<n;i++)
		lc[i]=calculatenonlinearity(z[i].type,z[i].id,x0);
	return n;
}


// this function calculates the transformation matrices on the fly without saving them to memory
// it points its last argument pointer to G^-1

void instantaneousgi(int m,int n,ZMT *z,double *x,double *w,double *r,double *gi)
{
	double *bt=NULL,*mibt=NULL,*g=NULL,*gs=NULL,*gq=NULL;

	if(NULL==(bt =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(mibt =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(g =(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(gs =(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(gq =(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");
	if(r==NULL)
	{
		double *ri=NULL;
		if(NULL==(ri =(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");
		calculatebtmatrix(n,m,z,ri,x,bt);	// B^T
		free(ri);
	}
	else
	{
		calculatebtmatrix(n,m,z,r,x,bt);
	}
	calculatemibtmatrix(m,n,bt,w,mibt);		// M^-1 B^T = (B M^-1^T)^T = (B M^-1)^T
	calculategtmatrix(m,n,bt,mibt,g);		// G^T = G = (B^T)^T (M^-1 B^T)
	calculategsmatrix(n,g,gi,gs,gq);		// G^-1, G^(1/2), G^-(1/2)

	free(gq);
	free(gs);
	free(g);
	free(mibt);
	free(bt);
	return;
}


// this function calculates the transformation matrices on the fly without saving them to memory
// it points its last argument pointer to B^-1

void instantaneous(int m,int n,ZMT *z,double *x,double *w,double *r,double *bi)
{
	double *bt=NULL,*mibt=NULL,*g=NULL,*gs=NULL,*gi=NULL,*gq=NULL;

	if(NULL==(bt =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(mibt =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(g =(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(gi =(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(gs =(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(gq =(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");
	if(r==NULL)
	{
		double *ri=NULL;
		if(NULL==(ri =(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");
		calculatebtmatrix(n,m,z,ri,x,bt);	// B^T
		free(ri);
	}
	else
	{
		calculatebtmatrix(n,m,z,r,x,bt);
	}

	calculatemibtmatrix(m,n,bt,w,mibt);	// M^-1 B^T = (B M^-1^T)^T = (B M^-1)^T
	calculategtmatrix(m,n,bt,mibt,g);	// G^T = G = (B^T)^T (M^-1 B^T)
	calculategsmatrix(n,g,gi,gs,gq);	// G^-1, G^(1/2), G^-(1/2)
	calculatebimatrix(m,n,mibt,gi,bi);	// B^-1 = (M^-1 B^T) G^-1

	free(gq);
	free(gs);
	free(gi);
	free(g);
	free(mibt);
	free(bt);
	return;
}


// this function calculates d(G^-1)/dr exactly

int calculatedgidrtensor(int m,int n,ZMT *z,double *x0,double *w,double *gi2)
{
	int i,j,k;
	double s,*dgidx=NULL,*bi=NULL,*gip=NULL,*gin=NULL,*x=NULL;

	if(NULL==(dgidx =(double *)calloc(m*n*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(gip =(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(gin =(double *)calloc(n*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(bi =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(x =(double *)calloc(m,sizeof(double)))) ued3error("lack of memory");

	for(k=0;k<m;k++)				// evaluate d(G^-1)/dx numerically
	{
		fDCOPY(m,x0,1,x,1);
		x[k]+= DIFFSTEP*Angstrom;
		instantaneousgi(m,n,z,x,w,NULL,gip);
		fDCOPY(m,x0,1,x,1);
		x[k]-= DIFFSTEP*Angstrom;
		instantaneousgi(m,n,z,x,w,NULL,gin);
		for(j=0;j<n;j++)
			for(i=0;i<n;i++)
				dgidx[cindex(k,j,i,m,n,n)]
				 =(gip[rindex(j,i,n,n)]-gin[rindex(j,i,n,n)])
				  /(2*DIFFSTEP*Angstrom);
	}
	instantaneous(m,n,z,x0,w,NULL,bi);		// evaluate B^-1
	
	for(j=0;j<n;j++)				// evaluate d(G^-1)/dr = dx/dr*d(G^-1)/dx = B^-1*dgidx
		for(i=0;i<n;i++)
			fDGEMV('T',m,n,1.0,bi,m,dgidx+cindex(0,j,i,m,n,n),1,0.0,gi2+cindex(0,j,i,n,n,n),1);

//	for(i=0;i<n;i++)
//		for(j=0;j<n;j++)
//			for(k=0;k<j;k++)
//			{
//				s=0.5*(gi2[cindex(i,j,k,n,n,n)]+gi2[cindex(i,k,j,n,n,n)]);
//				gi2[cindex(i,j,k,n,n,n)]=s;
//				gi2[cindex(i,k,j,n,n,n)]=s;
//			}	// NOT NECESSARY //

//	// to eliminate nonlinearity artifact		// doesn't work
//	for(j=0;j<n;j++) gi2[cindex(j,j,j,n,n,n)]=0;

#if(defined(SQUARE_ONLY))
	for(i=0;i<n;i++)
		for(j=0;j<n;j++)
			for(k=0;k<j;k++)
				gi2[cindex(k,j,i,n,n,n)]=gi2[cindex(j,k,i,n,n,n)]=0;	// too small
			//	gi2[cindex(j,i,k,n,n,n)]=gi2[cindex(k,i,j,n,n,n)]=0;
			//	gi2[cindex(i,j,k,n,n,n)]=gi2[cindex(i,k,j,n,n,n)]=0;
#endif

	free(x);
	free(bi);
	free(gin);
	free(gip);
	free(dgidx);
	return(0);
}


// this function calculates d(G^-1)/dr by subtraction of a linear correction term

#include "intrinsic.h"
int calculatedgidrlineartensor(int m,int n,ZMT *z,double *x0,double *w,double *gi2)
{
	int i,j,k;
	double *dbidx=NULL,*dbidr=NULL,*dgidr=NULL,*bi=NULL,*bip=NULL,*bin=NULL,*x=NULL,*lc=NULL;

	if(NULL==(dbidx =(double *)calloc(m*n*m,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(dbidr =(double *)calloc(m*n*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(dgidr =(double *)calloc(n*n*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(bip =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(bin =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(bi =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(x =(double *)calloc(m,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(lc =(double *)calloc(n,sizeof(double)))) ued3error("lack of memory");

	for(k=0;k<m;k++)			// evaluate d(B^-1)/dx numerically
	{
		fDCOPY(m,x0,1,x,1);
		x[k]+=DIFFSTEP*Angstrom;
		instantaneous(m,n,z,x,w,NULL,bip);
		fDCOPY(m,x0,1,x,1);
		x[k]-=DIFFSTEP*Angstrom;
		instantaneous(m,n,z,x,w,NULL,bin);
		for(j=0;j<n;j++)
			for(i=0;i<m;i++)
				dbidx[cindex(k,j,i,m,n,m)]
				 =(bip[rindex(i,j,m,n)]-bin[rindex(i,j,m,n)])
				  /(2*DIFFSTEP*Angstrom);
	}
	instantaneous(m,n,z,x0,w,NULL,bi);	// evaluate B^-1
	for(j=0;j<n;j++)
		for(i=0;i<m;i++)
			fDGEMV('T',m,n,1.0,bi,m,dbidx+cindex(0,j,i,m,n,m),1,0.0,dbidr+cindex(i,j,0,m,n,n),m*n);

	for(k=0;k<n;k++)
		lc[k]=calculateallinearity(z[k].type,z[k].id,x0);
	// z = f(r) , dz/dr = f' , d2z/dr2 = f"
	// BI = dx/dz = dx/dr*dr/dz
	// dBI/dz = dx/dr*d2r/dz2 + d2x/dzdr*dr/dz = dx/dr*d2r/dz2 + d2x/dr2*(dr/dz)^2
	for(k=0;k<n;k++)
		fDAXPY(m,-lc[k],bi+rindex(0,k,m,n),1,dbidr+cindex(0,k,k,m,n,n),1);

	for(j=0;j<n;j++)
		for(i=0;i<m;i++)
			bip[rindex(i,j,m,n)]=bi[rindex(i,j,m,n)]*w[i];	// B^-1 M

	for(k=0;k<n;k++)
		fDGEMM('T','N',n,n,m,1.0,bip,m,dbidr+cindex(0,0,k,m,n,n),m,0.0,dgidr+cindex(0,0,k,n,n,n),n);
	for(k=0;k<n;k++)
		fDGEMM('T','N',n,n,m,1.0,dbidr+cindex(0,0,k,m,n,n),m,bip,m,1.0,dgidr+cindex(0,0,k,n,n,n),n);

	for(k=0;k<n;k++)
		for(j=0;j<n;j++)
			for(i=0;i<n;i++)
				gi2[cindex(k,i,j,n,n,n)]=dgidr[cindex(i,j,k,n,n,n)];

	free(lc);
	free(x);
	free(bi);
	free(bin);
	free(bip);
	free(dgidr);
	free(dbidr);
	free(dbidx);
	return(0);
}


// this function calculates d(G^-1)/dr by a different algorythm for numerical differentiation.

int calculatedgibitensor(int m,int n,ZMT *z,double *x0,double *w,double *gi2)
{
	fprintf(stderr," & evaluating dG^-1/dr approximately\n");
	int i,j,k;
	double mult=0,step=0,*dgidr=NULL,*dbidr=NULL,*bi=NULL,*bip=NULL,*bin=NULL,*x=NULL;

	if(NULL==(dgidr =(double *)calloc(n*n*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(dbidr =(double *)calloc(m*n*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(bip =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(bin =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(bi =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(x =(double *)calloc(m,sizeof(double)))) ued3error("lack of memory");

	instantaneous(m,n,z,x0,w,NULL,bi);	// evaluate B^-1
	for(k=0;k<n;k++)
	{
		switch(z[k].type)
		{
		case DIHEDRAL_ANGLE:
		case DIHEDRAL_360:
			step = DIFFSTEP;
			mult = 0;
			break;
		case BENDING_ANGLE:
		case BENDING2_ANGLE:
			step = DIFFSTEP;
			mult = 1;
			break;
		default:
			step = DIFFSTEP*Angstrom;
			mult = 1;
			break;
		}
		fDCOPY(m,x0,1,x,1);
		fDAXPY(m, step,bi+rindex(0,k,m,n),1,x,1);
		instantaneous(m,n,z,x,w,NULL,bip);
		fDCOPY(m,x0,1,x,1);
		fDAXPY(m,-step,bi+rindex(0,k,m,n),1,x,1);
		instantaneous(m,n,z,x,w,NULL,bin);
		for(j=0;j<n;j++)
			for(i=0;i<m;i++)
				dbidr[cindex(i,j,k,m,n,n)]
				 =(bip[rindex(i,j,m,n)]-bin[rindex(i,j,m,n)])
				  /(2*step)*mult;
	}
	for(j=0;j<n;j++)
		for(i=0;i<m;i++)
			bip[rindex(i,j,m,n)]=bi[rindex(i,j,m,n)]*w[i];	// B^-1 M
#if(defined(FURTHER_APPROXIMATE_GINVERSE_DERIVATIVE))
	fprintf(stderr," & evaluating dG^-1/dr approximately with single differentiation\n");
	// resulting dG^-1/dr is not symmetric, but centrifugal force ends up the same
	for(k=0;k<n;k++)
		fDGEMM('T','N',n,n,m,2.0,bip,m,dbidr+cindex(0,0,k,m,n,n),m,0.0,dgidr+cindex(0,0,k,n,n,n),n);
#else
	fprintf(stderr," & evaluating dG^-1/dr approximately with double differentiation\n");
	// resulting dG^-1/dr is exactly same as the exact one in above, except torsion
	for(k=0;k<n;k++)
		fDGEMM('T','N',n,n,m,1.0,bip,m,dbidr+cindex(0,0,k,m,n,n),m,0.0,dgidr+cindex(0,0,k,n,n,n),n);
	for(k=0;k<n;k++)
		fDGEMM('T','N',n,n,m,1.0,dbidr+cindex(0,0,k,m,n,n),m,bip,m,1.0,dgidr+cindex(0,0,k,n,n,n),n);
#endif

	for(k=0;k<n;k++)
		for(j=0;j<n;j++)
			for(i=0;i<n;i++)
				gi2[cindex(k,i,j,n,n,n)]=dgidr[cindex(i,j,k,n,n,n)];

	if(0) for(k=0;k<n;k++) gi2[cindex(k,k,k,n,n,n)]=0;
	
	free(x);
	free(bi);
	free(bin);
	free(bip);
	free(dbidr);
	free(dgidr);
	return(0);
}


// this function calculates a second order tensor

int calculatebi2tensor(int m,int n,ZMT *z,double *x0,double *w,double *bi2)
{
	int i,j,k;
	double s,*dbidx=NULL,*bip=NULL,*bin=NULL,*x=NULL,*lc=NULL;

	if(NULL==(dbidx =(double *)calloc(m*m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(bip =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(bin =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(x =(double *)calloc(m,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(lc =(double *)calloc(n,sizeof(double)))) ued3error("lack of memory");

	for(k=0;k<m;k++)		// evaluate d(B^-1)/dx numerically
	{
		fDCOPY(m,x0,1,x,1);
		x[k]+= DIFFSTEP*Angstrom;
		instantaneous(m,n,z,x,w,NULL,bip);
		fDCOPY(m,x0,1,x,1);
		x[k]-= DIFFSTEP*Angstrom;
		instantaneous(m,n,z,x,w,NULL,bin);
		for(j=0;j<n;j++)
			for(i=0;i<m;i++)
				dbidx[cindex(k,j,i,m,n,m)]
				 =(bip[rindex(i,j,m,n)]-bin[rindex(i,j,m,n)])
				  /(2*DIFFSTEP*Angstrom);
	}
	instantaneous(m,n,z,x0,w,NULL,bip);	// evaluate B^-1
	for(j=0;j<n;j++)
		for(i=0;i<m;i++)
			fDGEMV('T',m,n,1.0,bip,m,dbidx+cindex(0,j,i,m,n,m),1,0.0,bi2+cindex(0,j,i,n,n,m),1);  // (B^-1)^T*d(B^-1)/dx = ???
	
	for(i=0;i<m;i++)			// correct for numerical error
		for(j=0;j<n;j++)
			for(k=0;k<j;k++)
			{
				s=0.5*(bi2[cindex(k,j,i,n,n,m)]+bi2[cindex(j,k,i,n,n,m)]);
#if(defined(SQUARE_ONLY))
				s=0;
#endif
				bi2[cindex(k,j,i,n,n,m)]=s;
				bi2[cindex(j,k,i,n,n,m)]=s;
			}

//#if(defined(SQUARE_ONLY))
//	for(i=0;i<m;i++)
//		for(j=0;j<n;j++)
//			for(k=0;k<j;k++)
//				bi2[cindex(k,j,i,n,n,m)]=bi2[cindex(j,k,i,n,n,m)]=0;
//#endif

//L	for(k=0;k<n;k++)
//L		lc[k]=calculatenonlinearity(z[k].type,z[k].id,x0);
//L	for(k=0;k<n;k++)
//L		fDAXPY(m,-lc[k],bip+rindex(0,k,m,n),1,bi2+cindex(k,k,0,n,n,m),n*n);

	free(lc);
	free(x);
	free(bin);
	free(bip);
	free(dbidx);
	return(0);
}

// calculate the C^T tensor, i.e. the derivative of the B^T-Matrix numerically

int calculateb2ttensor(int m,int n,ZMT *z,double *x0,double *w,double *ct)
{
	int i,j,k;
	double *btp=NULL,*btn=NULL,*x=NULL,*r=NULL,s;

	if(NULL==(btp =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(btn =(double *)calloc(m*n,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(x =(double *)calloc(m,sizeof(double)))) ued3error("lack of memory");
	if(NULL==(r =(double *)calloc(n,sizeof(double)))) ued3error("lack of memory");

	for(k=0;k<m;k++)				// differentiate wrt to each cartesian coordinate
	{
		fDCOPY(m,x0,1,x,1);
		x[k]+= DIFFSTEP*Angstrom;
		calculatebtmatrix(n,m,z,r,x,btp);
		fDCOPY(m,x0,1,x,1);
		x[k]-= DIFFSTEP*Angstrom;
		calculatebtmatrix(n,m,z,r,x,btn);
		for(j=0;j<m;j++)
			for(i=0;i<n;i++)
				ct[cindex(k,j,i,m,m,n)]
				 =(btp[rindex(j,i,m,n)]-btn[rindex(j,i,m,n)])
				  /(2*DIFFSTEP*Angstrom);
	}
	for(i=0;i<n;i++)				// correct for numerical error
		for(j=1;j<m;j++)
			for(k=0;k<j;k++)
			{
				s = 0.5*(ct[cindex(k,j,i,m,m,n)]+ct[cindex(j,k,i,m,m,n)]);
#if(defined(SQUARE_ONLY))
				s=0;
#endif
				ct[cindex(k,j,i,m,m,n)] = s;
				ct[cindex(j,k,i,m,m,n)] = s;
			}

	free(r);
	free(x);
	free(btn);
	free(btp);
	return(0);
}

