Below I enclose the PCA software, as used in various of my papers. Please feel free to borrow, modify and distribute as you see free. I only ask that any published work using this program cites my 1992 paper (Francis et al. 1992, ApJ 398, 476) PROGRAM SPCA *- * SPCA * * Reads in a list of spectra, sends them to their rest frame, * rebins part of them, normalised by its mean. This then has * the mean spectrum subtracted from it, and each of the residuals * is optionally divided by the variance for that bin. * * SVD is then used to find the principal components of these, * which are output, along with the strength of each component * in each spectrum. * * Input:- List of spectra and redshifts (an ascii file containing * spectrum names in the first column and redshifts in the second. The * input format is fixed - edit the program if you have longer or * shorter file names) * * Calls:- RDATA, REBIN, SVDCMP (from Numerical Recipes) * * Returns: * Two files - one for the principle components, a second for the * component weights in each spectrum * * Components file returns as a first column the rest-frame * wavelength, then the mean spectrum, the standard deviation * spectrum, followed by the first ten principle components * * The weights file returns with the spectrum names as the first * column, then the redshifts, then the weights of the first ten * principle components * * Paul Francis, IoA Cambridge, March 1990. Modified extensively * this version 28th June 1996 * * Paul Francis, University of Melbourne * pfrancis@physics.unimelb.edu.au *- IMPLICIT NONE INTEGER I, J, RNUM, BMAX, SMAX, MINMN INTEGER IFAIL, SCREEN, KEYBD, INFILE, OUTFIL INTEGER COUNT, OSTART INTEGER MAXSIZ, NBIN, NUM PARAMETER (SMAX=800, BMAX=800, SCREEN=6, : KEYBD=5, INFILE=7, OUTFIL=8, MAXSIZ=4000) **************************************************************** * SMAX is the maximum number of spectra likely to be used. * BMAX the maximum number of bins in each spectrum after the * trimming and rebinning has been performed by the program. * SMAX HAS TO BE AT LEAST AS LARGE AS RMAX. The smaller you * can make these numbers, the less memory your program will * take and the faster it will run. * MAXNUM is the maximum number of bins in each input spectrum * before trimming and rebinning **************************************************************** REAL D(SMAX,BMAX), LAMBDA(SMAX) REAL V(BMAX,SMAX) REAL WAV(MAXSIZ), SIG(MAXSIZ) REAL START, END, BINSIZ, TEMP, RBDAT(MAXSIZ) REAL RBWAV(MAXSIZ), MEAN, SCALE, MNSPEC(MAXSIZ) REAL MNVAR(MAXSIZ), RED(BMAX), TRED CHARACTER*20 SPCNAM, CONAME, SPNAME(SMAX), TNAME, FILNAM CHARACTER*1 ANS * Find the list of spectra: WRITE (SCREEN,'(A)')' List of spectra:' READ (KEYBD,'(A)') FILNAM OPEN (INFILE, FILE=FILNAM, STATUS='OLD') * Set the rebinning parameters. WRITE(SCREEN,'(A)')' Start of wanted rest-wavelength region?' READ(KEYBD,*) START WRITE(SCREEN,'(A)')' End of wanted region?' READ(KEYBD,*) END WRITE(SCREEN,'(A)')' Wavelength binsize to use?' READ(KEYBD,*) BINSIZ WRITE(SCREEN,'(A)')' Divide by the standard deviation? (Y/N)' READ(KEYBD,'(A)') ANS WRITE(SCREEN,'(A)')' Output file name for components?' READ(KEYBD,'(A)') CONAME WRITE(SCREEN,'(A)')' Output file for weights?' READ(KEYBD,'(A)') SPCNAM * Prepare the rebinned wavelength scale NBIN = INT((END-START)/BINSIZ) TEMP=START DO 10 I=1,NBIN RBWAV(I)=TEMP TEMP=TEMP+BINSIZ MNSPEC(I) = 0.0 MNVAR(I) = 0.0 10 CONTINUE * Loop through all the spectra in the list: COUNT=0 DO 80 I=1,SMAX WRITE(SCREEN,*) I * Read in spectrum name: ******************************************************************* * The line below reads spectrum names from an ASCII file, using * a fixed format read - tinker with the format if necessary ******************************************************************* READ(INFILE,20,END=90) TNAME,TRED 20 FORMAT(A10,F9.4) * Check the redshift ******************************************************************* * The line below is used to do a quick "first pass" rejection of * spectra at inappropriate redshifts - modify if necessary if IR * and/or UV data is avaliable ******************************************************************* IF((START.GT.(3200.0/(1.0+TRED))).AND. : (END.LT.(10000.0/(1.0+TRED)))) THEN * Read in the spectrum CALL RDATA(TNAME, NUM, WAV, SIG, MAXSIZ) * Double check the wavelength range IF((WAV(1).LE.(START*(1.0+TRED))).AND. : (WAV(NUM).GE.(END*(1.0+TRED)))) THEN COUNT = COUNT + 1 SPNAME(COUNT)=TNAME RED(COUNT) = TRED * Bring the spectrum to its rest frame. DO 30 J=1,NUM SIG(J) = SIG(J)*1.0E7 WAV(J) = WAV(J)/(1.0+TRED) 30 CONTINUE CALL REBIN(NUM,NBIN,WAV,SIG,RBWAV,RBDAT) * Find the mean flux MEAN = 0.0 DO 40 J=1, NBIN MEAN = MEAN + RBDAT(J) 40 CONTINUE MEAN = MEAN/REAL(NBIN) * Scale by the mean, and construct the bin mean and varience DO 50 J=1, NBIN D(COUNT,J) = RBDAT(J)/MEAN MNSPEC(J) = MNSPEC(J) + D(COUNT,J) MNVAR(J) = MNVAR(J) + D(COUNT,J)**2 50 CONTINUE END IF END IF 80 CONTINUE 90 CLOSE(INFILE) WRITE(SCREEN,'(A)')' Number of spectra used:-' WRITE(SCREEN,*) COUNT WRITE(SCREEN,'(A)')' Loading finished, mean subtraction starting.' * Find the mean spectrum, and subtract it off each. * Loop through the spectra, subtracting the means DO 100 I=1, NBIN MNSPEC(I) = MNSPEC(I)/REAL(COUNT) MNVAR(I) = SQRT(MNVAR(I)/COUNT - MNSPEC(I)**2) 100 CONTINUE DO 120 I=1, NBIN DO 110 J=1, COUNT D(J,I) = D(J,I) - MNSPEC(I) IF (ANS.EQ.'Y' .OR. ANS .EQ. 'y') THEN D(J,I) = D(J,I)/MNVAR(I) END IF 110 CONTINUE 120 CONTINUE WRITE(SCREEN,'(A)')' Mean subtraction finished, SVD starting.' * Perform the Singular Value Decomposition on D IFAIL = 0 MINMN = MIN(COUNT,NBIN) * Set excess rows to zero if COUNT .lt. NBIN RNUM = COUNT IF (COUNT .LT. NBIN) THEN DO 140 I= 1, NBIN DO 130 J=COUNT+1, NBIN D(J,I) = 0.0 130 CONTINUE 140 CONTINUE RNUM = NBIN ENDIF CALL SVDCMP(D,RNUM,NBIN,SMAX,BMAX,LAMBDA,V) * Turn the values in Lambda into proportions of the variation. SCALE = 0.0 DO 150 I=1,MINMN SCALE = SCALE + LAMBDA(I) 150 CONTINUE * Output the components found, with their strengths WRITE(SCREEN,'(A)') ' Component Eigenvalues:' DO 155 I=1, 10 WRITE(SCREEN,*) I, LAMBDA(I)/SCALE 155 CONTINUE OSTART=1 OPEN(OUTFIL, FILE=CONAME, STATUS='NEW') DO 170 J=1,NBIN IF (ANS.EQ.'Y' .OR. ANS .EQ. 'y') THEN WRITE(OUTFIL,180) RBWAV(J),MNSPEC(J),MNVAR(J), : (V(J,I)*MNVAR(J),I=OSTART,OSTART+9) ELSE WRITE(OUTFIL,180) RBWAV(J),MNSPEC(J),MNVAR(J), : (V(J,I),I=OSTART,OSTART+9) END IF 170 CONTINUE 180 FORMAT(1X,F6.1,12F10.6) CLOSE(OUTFIL) * Output the component weights in each spectrum OPEN(OUTFIL, FILE=SPCNAM, STATUS='NEW') DO 200 I=1,COUNT WRITE(OUTFIL,210) SPNAME(I),RED(I), : (LAMBDA(J)*D(I,J),J=OSTART,OSTART+9) 200 CONTINUE 210 FORMAT(1X,A10,F10.4,F11.5,9F9.5) CLOSE(OUTFIL) END SUBROUTINE REBIN(NUM,NUMB,WAVI,DATI,WAVB,RBDAT) *- * REBIN * Rebins the spectrum in WAVI, DATI, to the wavelength scale * of WAVB, putting the result in RBDAT. *- INTEGER NUM,NUMB,K,I,J,LOLIM,HILIM REAL WAVI(NUM),DATI(NUM), : WAVB(NUMB),RBDAT(NUMB),A,B,C,D, : INI,INB,STI,STB * Find start and interval of both spectra. STI=WAVI(1) INI=(WAVI(NUM)-WAVI(1))/(NUM-1) STB=WAVB(1) INB=(WAVB(NUMB)-WAVB(1))/(NUMB-1) * Prepare array to hold rebinned first spectrum. DO 5 K=1,NUMB RBDAT(K)=0.0 5 CONTINUE * Perform rebinning. DO 7 I=1,NUM LOLIM=INT((WAVI(I)-0.5*(INI+INB)-STB)/INB) HILIM=INT((WAVI(I)+0.5*(INI+INB)-STB)/INB)+2 DO 6 J=LOLIM,HILIM IF ((J.GE.1).AND.(J.LE.NUMB)) THEN A=WAVI(I)-0.5*INI B=WAVI(I)+0.5*INI C=WAVB(J)-0.5*INB D=WAVB(J)+0.5*INB IF ((C.LE.B).AND.(A.LE.D)) THEN IF (C.LT.A) THEN IF (D.LT.B) THEN RBDAT(J)=RBDAT(J)+(D-A)*DATI(I)/(B-A) ELSE RBDAT(J)=RBDAT(J)+DATI(I) ENDIF ELSE IF (D.LT.B) THEN RBDAT(J)=RBDAT(J)+(D-C)*DATI(I)/(B-A) ELSE RBDAT(J)=RBDAT(J)+(B-C)*DATI(I)/(B-A) ENDIF ENDIF ENDIF ENDIF 6 CONTINUE 7 CONTINUE * Eliminate edge effects. DO 20 I=1,NUMB IF (((WAVB(I)-0.5*INB).LT.(STI+0.5*INI)).OR. : ((WAVB(I)+0.5*INB).GT.(STI+NUM*INI-0.5*INI))) THEN RBDAT(I)=0.0 ENDIF 20 CONTINUE END SUBROUTINE RDATA(TNAME, NUM, WAV, SIG, MAXSIZ) *- * RDATA * Given a character string TNAME, this subroutine reads into the * arrays WAV, SIG the ascii file * * Paul Francis, University of Melbourne, Dec 1993 *- INTEGER I, FILNUM, NUM PARAMETER (FILNUM=9) REAL WAV(MAXSIZ), SIG(MAXSIZ) REAL WAVE, FLUX CHARACTER TNAME*20 * Open the file for read access only. OPEN(FILNUM, FILE=TNAME, STATUS='OLD') * Read in the spectrum, discarding zero wavelength rows. NUM = 0 DO 10 I=1,10000 READ(FILNUM,*,END=20) WAVE,FLUX IF (WAVE .GT. 1200.0 .AND. WAVE .LT. 10000.) THEN NUM = NUM + 1 WAV(NUM) = WAVE SIG(NUM) = FLUX END IF 10 CONTINUE * Close the file. 20 CLOSE(FILNUM) END SUBROUTINE SVDCMP(A,M,N,MP,NP,W,V) PARAMETER (NMAX=800) DIMENSION A(MP,NP),W(NP),V(NP,NP),RV1(NMAX) G=0.0 SCALE=0.0 ANORM=0.0 DO 25 I=1,N L=I+1 RV1(I)=SCALE*G G=0.0 S=0.0 SCALE=0.0 IF (I.LE.M) THEN DO 11 K=I,M SCALE=SCALE+ABS(A(K,I)) 11 CONTINUE IF (SCALE.NE.0.0) THEN DO 12 K=I,M A(K,I)=A(K,I)/SCALE S=S+A(K,I)*A(K,I) 12 CONTINUE F=A(I,I) G=-SIGN(SQRT(S),F) H=F*G-S A(I,I)=F-G IF (I.NE.N) THEN DO 15 J=L,N S=0.0 DO 13 K=I,M S=S+A(K,I)*A(K,J) 13 CONTINUE F=S/H DO 14 K=I,M A(K,J)=A(K,J)+F*A(K,I) 14 CONTINUE 15 CONTINUE ENDIF DO 16 K= I,M A(K,I)=SCALE*A(K,I) 16 CONTINUE ENDIF ENDIF W(I)=SCALE *G G=0.0 S=0.0 SCALE=0.0 IF ((I.LE.M).AND.(I.NE.N)) THEN DO 17 K=L,N SCALE=SCALE+ABS(A(I,K)) 17 CONTINUE IF (SCALE.NE.0.0) THEN DO 18 K=L,N A(I,K)=A(I,K)/SCALE S=S+A(I,K)*A(I,K) 18 CONTINUE F=A(I,L) G=-SIGN(SQRT(S),F) H=F*G-S A(I,L)=F-G DO 19 K=L,N RV1(K)=A(I,K)/H 19 CONTINUE IF (I.NE.M) THEN DO 23 J=L,M S=0.0 DO 21 K=L,N S=S+A(J,K)*A(I,K) 21 CONTINUE DO 22 K=L,N A(J,K)=A(J,K)+S*RV1(K) 22 CONTINUE 23 CONTINUE ENDIF DO 24 K=L,N A(I,K)=SCALE*A(I,K) 24 CONTINUE ENDIF ENDIF ANORM=MAX(ANORM,(ABS(W(I))+ABS(RV1(I)))) 25 CONTINUE DO 32 I=N,1,-1 IF (I.LT.N) THEN IF (G.NE.0.0) THEN DO 26 J=L,N V(J,I)=(A(I,J)/A(I,L))/G 26 CONTINUE DO 29 J=L,N S=0.0 DO 27 K=L,N S=S+A(I,K)*V(K,J) 27 CONTINUE DO 28 K=L,N V(K,J)=V(K,J)+S*V(K,I) 28 CONTINUE 29 CONTINUE ENDIF DO 31 J=L,N V(I,J)=0.0 V(J,I)=0.0 31 CONTINUE ENDIF V(I,I)=1.0 G=RV1(I) L=I 32 CONTINUE DO 39 I=N,1,-1 L=I+1 G=W(I) IF (I.LT.N) THEN DO 33 J=L,N A(I,J)=0.0 33 CONTINUE ENDIF IF (G.NE.0.0) THEN G=1.0/G IF (I.NE.N) THEN DO 36 J=L,N S=0.0 DO 34 K=L,M S=S+A(K,I)*A(K,J) 34 CONTINUE F=(S/A(I,I))*G DO 35 K=I,M A(K,J)=A(K,J)+F*A(K,I) 35 CONTINUE 36 CONTINUE ENDIF DO 37 J=I,M A(J,I)=A(J,I)*G 37 CONTINUE ELSE DO 38 J= I,M A(J,I)=0.0 38 CONTINUE ENDIF A(I,I)=A(I,I)+1.0 39 CONTINUE DO 49 K=N,1,-1 DO 48 ITS=1,30 DO 41 L=K,1,-1 NM=L-1 IF ((ABS(RV1(L))+ANORM).EQ.ANORM) GO TO 2 IF ((ABS(W(NM))+ANORM).EQ.ANORM) GO TO 1 41 CONTINUE 1 C=0.0 S=1.0 DO 43 I=L,K F=S*RV1(I) IF ((ABS(F)+ANORM).NE.ANORM) THEN G=W(I) H=SQRT(F*F+G*G) W(I)=H H=1.0/H C= (G*H) S=-(F*H) DO 42 J=1,M Y=A(J,NM) Z=A(J,I) A(J,NM)=(Y*C)+(Z*S) A(J,I)=-(Y*S)+(Z*C) 42 CONTINUE ENDIF 43 CONTINUE 2 Z=W(K) IF (L.EQ.K) THEN IF (Z.LT.0.0) THEN W(K)=-Z DO 44 J=1,N V(J,K)=-V(J,K) 44 CONTINUE ENDIF GO TO 3 ENDIF IF (ITS.EQ.30) PAUSE 'No convergence in 30 iterations' X=W(L) NM=K-1 Y=W(NM) G=RV1(NM) H=RV1(K) F=((Y-Z)*(Y+Z)+(G-H)*(G+H))/(2.0*H*Y) G=SQRT(F*F+1.0) F=((X-Z)*(X+Z)+H*((Y/(F+SIGN(G,F)))-H))/X C=1.0 S=1.0 DO 47 J=L,NM I=J+1 G=RV1(I) Y=W(I) H=S*G G=C*G Z=SQRT(F*F+H*H) RV1(J)=Z C=F/Z S=H/Z F= (X*C)+(G*S) G=-(X*S)+(G*C) H=Y*S Y=Y*C DO 45 NM=1,N X=V(NM,J) Z=V(NM,I) V(NM,J)= (X*C)+(Z*S) V(NM,I)=-(X*S)+(Z*C) 45 CONTINUE Z=SQRT(F*F+H*H) W(J)=Z IF (Z.NE.0.0) THEN Z=1.0/Z C=F*Z S=H*Z ENDIF F= (C*G)+(S*Y) X=-(S*G)+(C*Y) DO 46 NM=1,M Y=A(NM,J) Z=A(NM,I) A(NM,J)= (Y*C)+(Z*S) A(NM,I)=-(Y*S)+(Z*C) 46 CONTINUE 47 CONTINUE RV1(L)=0.0 RV1(K)=F W(K)=X 48 CONTINUE 3 CONTINUE 49 CONTINUE RETURN END