#!/usr/bin/perl -w # gnom_Dmax_scan.pl # ---------------------------------------------------------------- # This is a script to run series of regularized Fourier transforms # for small-angle X-ray scattering (SAXS) data with different # values for the maximum intermolecular distance Dmax. # # Author: Jan Lipfert # Date: 11/1/2007 # ### --- ACKNOWLEDGEMENT AND CITATION --- ### # If you find this script useful, please cite it as # # Jan Lipfert, Rhiju Das, Vincent B. Chu, Madhuri Kudaravalli, Nathan Boyd, # Daniel Herschlag, and Sebastian Doniach # "Structural Transitions and Thermodynamics of a Glycine-Dependent Riboswitch from Vibrio cholerae" # J. Mol. Biol. 365:1393-1406 (2007) # # # # The script calls the program GNOM by D. I. Svergun # Svergun, D.I., J. Appl. Cryst. 25:495-503 (1992). # # ###--- BEFORE THE SCRIPT CAN BE USED ---### # The script requires a working copy of GNOM to be installed # and executable on the system. GNOM can be downloaded from # http://www.embl-hamburg.de/ExternalInfo/Research/Sax/software.html # # Furthermore, a number of variables need to be set in the variable # block below. The description of the variables is as follows: # # $GNOM_exec is the name of the GNOM executable, it depends on the # operating system used. See the GNOM documentation for details. # If the GNOM executable is not in the path, give the full path. # Examples: # $GNOM_exec = 'gnom45.osx'; # (For use with the Mac-OS and with the executable in the path) # $GNOM_exec = '/home/user/lipfert/software/gnom45_ux/gnom47.l86'; # (For use on a Linux operating system, with specification of the full path) # # $min_q is the minimum q vector that should be used in the analysis. # This is to cut of the very low q data close to the beam stop that tends # to be very noisy due to parasitic scattering. $min_q should be determined for # each measurement set-up, e.g. from analysis of a scattering standard. # $min_q is defined as 4 pi sin(theta)/lambda, # where 2 theta is the total scattering and lambda the X-ray wavelength, # in units of Angstroem^-1. # # $n_points is the number of points from the scattering profile to use, # with q values higher than $min_q. # # Finally, there are three variables that control which values of Dmax # are used in the series of runs with different Dmax values. The script # runs GNOM for Dmax values (in units of Angstroem) between # $Dmax_low and $Dmax_high, in steps of $Dmax_step # It is recommended to at least initially scan a rather large range of Dmax values, # with $Dmax_high - $Dmax_low >= 100 A. # # ###--- USAGE OF THE SCRIPT AND OUTPUT ---### # When all variables described above are set, # the script can be run from the command line with two arguments: # # 1) is the filename of the SAXS profile for which # the inverse transforms should be computed. # The file should be an ascii file with two columns: # The first column should be q (q = 4 pi sin(theta)/lambda in Angstroem^-1) # and the second column is the intensity. # # 2) is an arbitrary name. The output files will be called # .Dmax.extension # ### --- OUTPUT FILES --- ### # Several output files are written, the extensions are: # *.exp is the experimental scattering profile, truncated using the parameters # $min_q and $n_points # # *.ireg is the regularized profile, i.e. the fit of the P(r) derived scattering function # to the experimental data file # # *.out is the GNOM output file. This file can be used as an input to a number of # other programs by the Svergun group. # # *.pr contains the P(r) function with two columns: r (in Abgstroems) and P(r) # # These output files are written for every value of Dmax used in the loop from # $Dmax_low to $Dmax_high in steps of $Dmax_step. # # A log file is written for the entire series of transformations, which is called # .param # This file contains the goodness of fit parameters that GNOM outputs. # For more information about these parameter and their interpretation, see the GNOM # documentation: http://www.embl-hamburg.de/ExternalInfo/Research/Sax/manual_gnom.html # # Every row of the log file has the information for the transformation # with one particular value of Dmax. The file has 8 columns that contain: # Dmax - the Dmax value used # Discrepancy (between fit and data; Ideal: 0.7) # Oscillations (of the P(r) solution; Ideal: 1.1) # Stability (of the solution; Ideal: 0.0) # Systematic deviations (between fit and data; Ideal: 1.0) # Positivity (of the solution; Ideal: 1.0) # Validity (of the chosen interval in real space; Ideal: 1.0) # Real space Rg (in Angstroem) # ### --- FURTHER TOOLS --- ### # The log file can be conveniently read and visualized using the matlab script: # eval_GNOM_transform.m # # To inspect the experimental data and the scattering profiles computed # from the P(r) solutions, a convenient matlab script is: # check_Ireg_Pr.m # # These scripts are distributed, free of charge, from the website # drizzle.stanford.edu/scripts.html # ###--- Variables that need to be set before use ---### ###---(see header for a description of the variables)---### #$GNOM_exec = 'foo'; # GNOM executable $GNOM_exec = 'gnom45.osx'; # GNOM executable $min_q = 0.01889; # The minimal q value to use $n_points = 500; # Number of points from the profile to use $Dmax_low = 50; # Lowest Dmax value to use $Dmax_high = 200; # Highest Dmax value to use $Dmax_step = 5; # Increase Dmax with this step size ###--- START OF SCRIPT ---### if( ($GNOM_exec =~ /foo/) || $Dmax_low == 0) { print "### --- gnom_Dmax_scan.pl --- ###\n"; print "You need to define several variables before the first use!\n"; print "See the top of the file for further information. \n(Open gnom_Dmax_scan.pl with any text viewer).\n"; die "\n"; } if(!( defined($ARGV[0])) || !( defined($ARGV[1]))) { print "Usage: ./gnom_Dmax_scan.pl \n"; print " - SAXS scattering profile, 1st column q, 2nd column I(q) \n"; print " - Output files will be named .Dmax.* \n"; die "\n"; } ### Read in the SAXS profile from file ### $projectname = $ARGV[1]; if(-e "$projectname.param") { system("mv $projectname.param $projectname.param.old"); } $infile = $ARGV[0]; open IN, "$infile" or die "Can't open $infile \n"; $Nentries =0; while ($currLin = ) { chomp $currLin; # if($currLin =~ /^(0\.\d+|1\.\d+)\s+(\S+)/) if($currLin =~ /(\S+)\s+(\S+)/) { $Nentries++; push @qs, $1; push @Is, $2; } } close IN; ### Comment this in to print out SAXS profile info ### if(0) { for($i=0; $i<$Nentries; $i++) { print "$qs[$i] $Is[$i]\n"; } } print "Found $Nentries q and I(q) values \n"; ### Print a cropped profile to file ### open OUT, ">temp.profile" or die "Can't open temp.profile \n"; if($qs[0] > $min_q) { die "First entry has q=$qs[0], requested minimal q is $min_q \n ... Exiting...\n"; } $n_written = 0; for($i=0; $i<$Nentries; $i++) { if($qs[$i] >= $min_q) { print OUT "$qs[$i] $Is[$i]\n"; $n_written++; } if($n_written >= $n_points) { last; } } close OUT; ### Now run GNOM for a range of Dmax parameter values ### if($Dmax_low > $Dmax_high) { die "Dmax_low (= $Dmax_low) is larger than Dmax_high (= $Dmax_high)! ... Exiting ... \n"; } $Dmax_curr = $Dmax_low; while($Dmax_curr <= $Dmax_high) { print "Running GNOM with Dmax = $Dmax_curr \n"; $basename = $projectname . "." . "$Dmax_curr"; ### Create an input file for GNOM ### open FP, ">temp.parameterfile"; print FP "postscr \n"; # Print setting to use print FP "temp.profile \n"; # SAXS profile to use print FP "$basename.out \n"; # Output file name print FP "0 \n"; # Number of initial points to skip print FP "none \n"; # No second data set print FP "0 \n"; # Number of final points to skip print FP "0.0 \n"; # Estimate standard deviations from smoothing print FP "1 \n"; # Angle should be in q = 4 pi sin(theta)/lambda in 1/Angstroem print FP "No \n"; # Do not print input data print FP "none \n"; # No file with expert parameters print FP "No \n"; # Kernel is not already calculated print FP "0 \n"; # Monodisperse system print FP "Yes \n"; # Zero condition at r=rmin print FP "Yes \n"; # Zero condition at r=rmax print FP "$Dmax_curr \n"; # Run with the current Dmax value! print FP "\n"; # Use default number of points in real space print FP "kern.bin \n"; # Use default kernel file print FP "0 \n"; # Experimental set-up without smearing print FP "0.0 \n"; # Inital alpha print FP "\n"; # Continue after alpha plot ### For the last options it somehow does not want the answers. Just say "Enter" to all... print FP " \n"; # Do print results print FP " \n"; # ... and go on print FP " \n"; # Do evaluate errors print FP " \n"; # Do print print FP " \n"; # ... and go on print FP " \n"; # No more runs close FP; system("$GNOM_exec < temp.parameterfile"); ### Now capture different outputs ### system("cp temp.profile $basename.exp"); open PARA, ">>$projectname.param" or die "Can't open file $projectname.param \n"; open IREG, ">$basename.ireg" or die "Can't open $basename.ireg \n"; open PR, ">$basename.pr" or die "Can't open $basename.pr \n"; open IN, "$basename.out" or die "Can't open $basename.out \n"; $read_Ireg_flag = 0; $read_Pr_flag = 0; while ($currLin = ) { chomp $currLin; ### Find the goodness-of-fit parameter and print if($currLin =~ /Current\s\s\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)/) { print PARA "$Dmax_curr $1 $2 $3 $4 $5 $6 "; } ### Extract the regularized profile ### if($read_Ireg_flag == 1) { if($currLin =~ /(\S+)\s\s\s\s+(\S+)/) { print IREG "$1 $2\n"; } elsif($currLin =~ /(\S+) \S+ \S+ \S+ (\S+)/) { print IREG "$1 $2\n"; } } if($currLin =~ / S J EXP ERROR J REG I REG/) { $read_Ireg_flag = 1; } if($currLin =~ / Distance distribution function of particle /) { $read_Ireg_flag = 0; $read_Pr_flag = 1; } if($read_Pr_flag == 1) { if($currLin =~ /(\S+) (\S+) \S+/) { print PR "$1 $2\n"; } } if($currLin =~ /Reciprocal/) { $read_Pr_flag = 0; } if($currLin =~ /Real space: Rg =\s+(\S+)/) { print PARA "$1\n"; } } close IN; close PARA; close IREG; close PR; $Dmax_curr = $Dmax_curr + $Dmax_step; } ### Clean up ### system("rm temp.profile temp.parameterfile"); system("rm kern.bin gnu-capture"); sub makeRightLen { local $inLin = $_[0]; local $absVal = abs($inLin); local $absurdLen = $absVal + 0.0000001; local $corrLen = substr $absurdLen, 0, 6; local $retStr; if ($inLin < 0) { $retStr = "-".$corrLen; } else { $retStr = " ".$corrLen; } }