[gmx-developers] MDAnalysis' libxdrfile2 vs. GROMACS' xdrfile

Dominik 'Rathann' Mierzejewski dominik at greysector.net
Mon May 11 23:23:12 CEST 2015


Dear developers,
there seems to be a fork of GROMACS' xdrfile by the MDAnalysis project:
https://github.com/MDAnalysis/mdanalysis/tree/develop/package/src/xdrfile

What are the plans for xdrfile? Would you be open to incorporating
MDAnalysis' fork changes into your library?

They have added proper python bindings (via swig) and some new C APIs.
There are also small changes in API, for example in read_trr (new
parameter) and xdrstdio_(get|set)pos() functions (parameter types).
I'm attaching a sanitized diff of the C code. Python changes are too
extensive to make a diff of.

The reason I'm asking is that I'm trying to package MDAnalysis for
Fedora (and I became the GROMACS maintainer in Fedora recently as well).
Our guidelines forbid bundling of third-party libraries - system
versions are to be used. xdrfile is already packaged in Fedora, but
MDAnalysis bundles its fork - libxdrfile2 which I have to unbundle
and package separately. It would make everyone's life much easier if
xdrfile could be simply updated to include the new python bindings and
APIs so that MDAnalysis could use them.

Regards,
Dominik
-- 
Fedora http://fedoraproject.org/wiki/User:Rathann
RPMFusion http://rpmfusion.org
"Faith manages."
        -- Delenn to Lennier in Babylon 5:"Confessions and Lamentations"
-------------- next part --------------
diff -up xdrfile-1.1.4/include/xdrfile.h.xdr2 xdrfile-1.1.4/include/xdrfile.h
--- xdrfile-1.1.4/include/xdrfile.h.xdr2	2014-06-24 16:34:15.000000000 +0200
+++ xdrfile-1.1.4/include/xdrfile.h	2015-05-11 15:27:18.240834520 +0200
@@ -626,6 +626,11 @@ extern "C"
 									double *     precision,
 									XDRFILE *    xfp);
 
+	int64_t
+	xdr_tell(XDRFILE *xd);
+
+	int
+	xdr_seek(XDRFILE *xd, int64_t pos, int whence);
 
 
 #ifdef __cplusplus
diff -up xdrfile-1.1.4/include/xdrfile_trr.h.xdr2 xdrfile-1.1.4/include/xdrfile_trr.h
--- xdrfile-1.1.4/include/xdrfile_trr.h.xdr2	2014-06-24 16:34:11.000000000 +0200
+++ xdrfile-1.1.4/include/xdrfile_trr.h	2015-05-11 15:27:18.240834520 +0200
@@ -46,16 +46,27 @@ extern "C" {
   /* This function returns the number of atoms in the xtc file in *natoms */
   extern int read_trr_natoms(char *fn,int *natoms);
   
+  /* Skip through trajectory, reading headers, obtain the total number of frames in the trr */ 
+  extern int read_trr_numframes(char *fn, int *numframes, int64_t **offsets);
+
   /* Read one frame of an open xtc file. If either of x,v,f,box are
      NULL the arrays will be read from the file but not used.  */
   extern int read_trr(XDRFILE *xd,int natoms,int *step,float *t,float *lambda,
-		      matrix box,rvec *x,rvec *v,rvec *f);
+		      matrix box,rvec *x,rvec *v,rvec *f, int *has_prop);
 
   /* Write a frame to xtc file */
   extern int write_trr(XDRFILE *xd,int natoms,int step,float t,float lambda,
 		       matrix box,rvec *x,rvec *v,rvec *f);
 
+/* Minimum TRR header size. It can have 8 bytes more if we have double time and lambda. */
+#define TRR_MIN_HEADER_SIZE 54
+#define TRR_DOUBLE_XTRA_HEADER 8
   
+/* Flags to signal the update of pos/vel/forces */
+#define HASX 1
+#define HASV 2
+#define HASF 4
+
 #ifdef __cplusplus
 }
 #endif
diff -up xdrfile-1.1.4/include/xdrfile_xtc.h.xdr2 xdrfile-1.1.4/include/xdrfile_xtc.h
--- xdrfile-1.1.4/include/xdrfile_xtc.h.xdr2	2014-06-24 16:34:07.000000000 +0200
+++ xdrfile-1.1.4/include/xdrfile_xtc.h	2015-05-11 15:27:18.240834520 +0200
@@ -46,16 +46,28 @@ extern "C" {
    
   /* This function returns the number of atoms in the xtc file in *natoms */
   extern int read_xtc_natoms(char *fn,int *natoms);
+
+  /* Seek through trajectory counting and indexing frames */
+  extern int read_xtc_numframes(char *fn, int *numframes, int64_t **offsets);
   
   /* Read one frame of an open xtc file */
   extern int read_xtc(XDRFILE *xd,int natoms,int *step,float *time,
 		      matrix box,rvec *x,float *prec);
-  
+
   /* Write a frame to xtc file */
   extern int write_xtc(XDRFILE *xd,
 		       int natoms,int step,float time,
 		       matrix box,rvec *x,float prec);
   
+/* XTC header fields until coord floats: *** only for trajectories of less than 10 atoms! ***  */
+/* magic natoms step time DIM*DIM_box_vecs natoms */
+#define XTC_SHORTHEADER_SIZE (20 + DIM*DIM*4)
+/* Short XTCs store each coordinate as a 32-bit float. */
+#define XTC_SHORT_BYTESPERATOM 12
+/* XTC header fields until frame bytes: *** only for trajectories of more than 9 atoms! ***  */
+/* magic natoms step time DIM*DIM_box_vecs natoms prec DIM_min_xyz DIM_max_xyz smallidx */
+#define XTC_HEADER_SIZE (DIM*DIM*4 + DIM*2 + 46)
+
 #ifdef __cplusplus
 }
 #endif
diff -up xdrfile-1.1.4/src/xdrfile.c.xdr2 xdrfile-1.1.4/src/xdrfile.c
--- xdrfile-1.1.4/src/xdrfile.c.xdr2	2014-06-24 16:33:48.000000000 +0200
+++ xdrfile-1.1.4/src/xdrfile.c	2015-05-11 15:28:00.042496896 +0200
@@ -42,8 +42,6 @@
 #include <math.h>
 #include <limits.h>
 
-#define _FILE_OFFSET_BITS  64
-
 /* get fixed-width types if we are using ANSI C99 */
 #ifdef HAVE_STDINT_H
 #  include <stdint.h>
@@ -131,7 +129,7 @@ struct XDR
 		int (*x_putbytes) (XDR *__xdrs, char *__addr, unsigned int __len);
 		/* two next routines are not 64-bit IO safe - don't use! */
 		unsigned int (*x_getpostn) (XDR *__xdrs); 
-		int (*x_setpostn) (XDR *__xdrs, unsigned int __pos);
+		int (*x_setpostn) (XDR *__xdrs, off_t __pos);
 		void (*x_destroy) (XDR *__xdrs); 
 	} 
     *x_ops;	    
@@ -2517,8 +2515,8 @@ static int xdrstdio_getlong (XDR *, int3
 static int xdrstdio_putlong (XDR *, int32_t *);
 static int xdrstdio_getbytes (XDR *, char *, unsigned int);
 static int xdrstdio_putbytes (XDR *, char *, unsigned int);
-static unsigned int xdrstdio_getpos (XDR *);
-static int xdrstdio_setpos (XDR *, unsigned int);
+static off_t xdrstdio_getpos (XDR *);
+static int xdrstdio_setpos (XDR *, off_t, int);
 static void xdrstdio_destroy (XDR *);
 
 /*
@@ -2599,19 +2597,34 @@ xdrstdio_putbytes (XDR *xdrs, char *addr
 	return 1;
 }
 
-/* 32 bit fileseek operations */
-static unsigned int
+static off_t
 xdrstdio_getpos (XDR *xdrs)
 {
-	return (unsigned int) ftell ((FILE *) xdrs->x_private);
+    return ftello((FILE *) xdrs->x_private);
 }
 
 static int
-xdrstdio_setpos (XDR *xdrs, unsigned int pos)
+xdrstdio_setpos (XDR *xdrs, off_t pos, int whence)
+{
+    return fseeko((FILE *) xdrs->x_private, pos, whence) < 0 ? exdrNR : exdrOK;
+}
+
+
+int64_t xdr_tell(XDRFILE *xd)
+/* Reads position in file */
 {
-	return fseek ((FILE *) xdrs->x_private, pos, 0) < 0 ? 0 : 1;
+    return (int64_t)xdrstdio_getpos(xd->xdr);
 }
 
+int xdr_seek(XDRFILE *xd, int64_t pos, int whence)
+/* Seeks to position in file */
+{
+    int result;
+    if ((result = xdrstdio_setpos(xd->xdr, (off_t) pos, whence)) != exdrOK)
+        return result;
+
+    return exdrOK;
+}
 
 
 #endif /* HAVE_RPC_XDR_H not defined */
diff -up xdrfile-1.1.4/src/xdrfile_trr.c.xdr2 xdrfile-1.1.4/src/xdrfile_trr.c
--- xdrfile-1.1.4/src/xdrfile_trr.c.xdr2	2014-06-24 16:33:34.000000000 +0200
+++ xdrfile-1.1.4/src/xdrfile_trr.c	2015-05-11 15:27:18.241834512 +0200
@@ -425,7 +425,7 @@ static int do_htrn(XDRFILE *xd,mybool bR
 }
 
 static int do_trn(XDRFILE *xd,mybool bRead,int *step,float *t,float *lambda,
-				  matrix box,int *natoms,rvec *x,rvec *v,rvec *f)
+				  matrix box,int *natoms,rvec *x,rvec *v,rvec *f, int *has_prop)
 {
     t_trnheader *sh;
     int result;
@@ -452,6 +452,13 @@ static int do_trn(XDRFILE *xd,mybool bRe
         *step   = sh->step;
         *t      = sh->td;
         *lambda = sh->lambdad;
+        /* Flag what we read */
+        if (sh->x_size)
+            *has_prop |= HASX;
+        if (sh->v_size)
+            *has_prop |= HASV;
+        if (sh->f_size)
+            *has_prop |= HASF;
     }
     if ((result = do_htrn(xd,bRead,sh,box,x,v,f)) != exdrOK)
         return result;
@@ -484,15 +491,92 @@ int read_trr_natoms(char *fn,int *natoms
 	return exdrOK;
 }
 
+int read_trr_numframes(char *fn, int *numframes, int64_t **offsets)
+{
+    XDRFILE *xd;
+    t_trnheader sh;
+    float time, lambda;
+    int result, framebytes, est_nframes, totalframebytes;
+    int64_t filesize, frame_offset;
+
+    if ((xd = xdrfile_open(fn,"r"))==NULL)
+        return exdrFILENOTFOUND;
+    if (xdr_seek(xd, 0L, SEEK_END) != exdrOK)
+    {
+        xdrfile_close(xd);
+        return exdrNR;
+    }
+    filesize = xdr_tell(xd);
+    if (xdr_seek(xd, 0L, SEEK_SET) != exdrOK)
+    {
+        xdrfile_close(xd);
+        return exdrNR;
+    }
+
+    if ((result = do_trnheader(xd,1,&sh)) != exdrOK)
+    {
+        xdrfile_close(xd);
+        return result;
+    }
+
+    framebytes = sh.ir_size + sh.e_size + sh.box_size +
+                 sh.vir_size + sh.pres_size + sh.top_size +
+                 sh.sym_size + sh.x_size + sh.v_size + sh.f_size;
+
+    est_nframes = (int) (filesize/((int64_t) (framebytes + TRR_MIN_HEADER_SIZE)) + 1); // add one because it'd be easy to underestimate low frame numbers. 
+    est_nframes += est_nframes/5;
+
+    /* Allocate memory for the frame index array */
+    if ((*offsets=(int64_t *)malloc(sizeof(int64_t)*est_nframes))==NULL)
+    {
+        xdrfile_close(xd);
+        return exdrNOMEM;
+    }
+
+    (*offsets)[0] = 0L;
+    *numframes = 1;
+    while (1)
+    {
+        if (xdr_seek(xd, (int64_t) (framebytes), SEEK_CUR) != exdrOK) {
+            free(*offsets);
+            xdrfile_close(xd);
+            return exdrNR;
+        }
+        frame_offset = xdr_tell(xd); /* Store it now, before we read the header */
+        if ((result = do_trnheader(xd,1,&sh)) != exdrOK) /* Interpreting as EOF */
+            break;
+        /* Read was successful; this is another frame */
+        /* Check if we need to enlarge array */
+        if (*numframes == est_nframes){
+            est_nframes += est_nframes/5 + 1; // Increase in 20% stretches
+            if ((*offsets = realloc(*offsets, sizeof(int64_t)*est_nframes))==NULL)
+            {
+                xdrfile_close(xd);
+                return exdrNOMEM;
+            }
+        }
+        (*offsets)[*numframes] = frame_offset;
+        (*numframes)++;
+        /* Calculate how much to skip this time */
+        framebytes = sh.ir_size + sh.e_size + sh.box_size +
+                     sh.vir_size + sh.pres_size + sh.top_size +
+                     sh.sym_size + sh.x_size + sh.v_size + sh.f_size;
+    }
+    xdrfile_close(xd);
+    return exdrOK;
+}
+
+
 int write_trr(XDRFILE *xd,int natoms,int step,float t,float lambda,
 			  matrix box,rvec *x,rvec *v,rvec *f)
 {
-	return do_trn(xd,0,&step,&t,&lambda,box,&natoms,x,v,f);
+	int *plcholder;
+	return do_trn(xd,0,&step,&t,&lambda,box,&natoms,x,v,f, plcholder);
 }
 
 int read_trr(XDRFILE *xd,int natoms,int *step,float *t,float *lambda,
-			 matrix box,rvec *x,rvec *v,rvec *f)
+			 matrix box,rvec *x,rvec *v,rvec *f, int *has_prop)
 {
-	return do_trn(xd,1,step,t,lambda,box,&natoms,x,v,f);
+	return do_trn(xd,1,step,t,lambda,box,&natoms,x,v,f,has_prop);
 }
 
diff -up xdrfile-1.1.4/src/xdrfile_xtc.c.xdr2 xdrfile-1.1.4/src/xdrfile_xtc.c
--- xdrfile-1.1.4/src/xdrfile_xtc.c.xdr2	2014-06-24 16:33:42.000000000 +0200
+++ xdrfile-1.1.4/src/xdrfile_xtc.c	2015-05-11 15:27:18.241834512 +0200
@@ -118,6 +118,101 @@ int read_xtc(XDRFILE *xd,
 	return exdrOK;
 }
 
+int read_xtc_numframes(char *fn, int *numframes, int64_t **offsets)
+{
+    XDRFILE *xd;
+    int framebytes, natoms, step;
+    float time;
+    int64_t filesize;
+
+    if ((xd = xdrfile_open(fn,"r"))==NULL)
+        return exdrFILENOTFOUND;
+
+    if (xtc_header(xd,&natoms,&step,&time,TRUE) != exdrOK)
+    {
+        xdrfile_close(xd);
+        return exdrHEADER;
+    }
+
+    if (xdr_seek(xd, 0L, SEEK_END) != exdrOK)
+    {
+        xdrfile_close(xd);
+        return exdrNR;
+    }
+    filesize = xdr_tell(xd);
+
+    /* Case of fewer than 10 atoms. Framesize known. */
+    if (natoms < 10)
+    {
+        int i;
+        xdrfile_close(xd);
+        framebytes = XTC_SHORTHEADER_SIZE + XTC_SHORT_BYTESPERATOM*natoms;
+        *numframes = filesize/framebytes; /* Should we complain if framesize doesn't divide filesize? */
+        /* Allocate memory for the frame index array */
+        if ((*offsets=(int64_t *)malloc(sizeof(int64_t)*(*numframes)))==NULL)
+            return exdrNOMEM;
+        for (i=0; i<*numframes; i++)
+        {
+            (*offsets)[i] = i*framebytes;
+        }
+        return exdrOK;
+    }
+    else /* No easy way out. We must iterate. */
+    {
+        int est_nframes;
+        /* Estimation of number of frames, with 20% allowance for error. */
+        if (xdr_seek(xd, (int64_t) XTC_HEADER_SIZE, SEEK_SET) != exdrOK)
+        {
+            xdrfile_close(xd);
+            return exdrNR;
+        }
+        if (xdrfile_read_int(&framebytes,1,xd) == 0)
+        {
+            xdrfile_close(xd);
+            return exdrENDOFFILE;
+        }
+        framebytes = (framebytes + 3) & ~0x03; //Rounding to the next 32-bit boundary
+        est_nframes = (int) (filesize/((int64_t) (framebytes+XTC_HEADER_SIZE)) + 1); // add one because it'd be easy to underestimate low frame numbers. 
+        est_nframes += est_nframes/5;
+
+        /* Allocate memory for the frame index array */
+        if ((*offsets=(int64_t *)malloc(sizeof(int64_t)*est_nframes))==NULL)
+        {
+            xdrfile_close(xd);
+            return exdrNOMEM;
+        }
+        (*offsets)[0] = 0L;
+        *numframes = 1;
+        while (1)
+        {
+            if (xdr_seek(xd, (int64_t) (framebytes+XTC_HEADER_SIZE), SEEK_CUR) != exdrOK) {
+                free(*offsets);
+                xdrfile_close(xd);
+                return exdrNR;
+            }
+            if (xdrfile_read_int(&framebytes,1,xd) == 0)
+                break;
+            /* Read was successful; this is another frame */
+            /* Check if we need to enlarge array */
+            if (*numframes == est_nframes){
+                est_nframes += est_nframes/5 + 1; // Increase in 20% stretches
+                if ((*offsets = realloc(*offsets, sizeof(int64_t)*est_nframes))==NULL)
+                {
+                    free(*offsets);
+                    xdrfile_close(xd);
+                    return exdrNOMEM;
+                }
+            }
+            (*offsets)[*numframes] = xdr_tell(xd) - 4L - (int64_t) (XTC_HEADER_SIZE); //Account for the header and the nbytes bytes we read.
+            (*numframes)++;
+            framebytes = (framebytes + 3) & ~0x03; //Rounding to the next 32-bit boundary
+        }
+        xdrfile_close(xd);
+        return exdrOK;
+    }
+}
+
+
 int write_xtc(XDRFILE *xd,
 			  int natoms,int step,float time,
 			  matrix box,rvec *x,float prec)


More information about the gromacs.org_gmx-developers mailing list