How can I profile a program

Here is an example of program instrumentation and time profiling as I often do it. The example makes use on a simple instrumentation library, the source code of which I also include directly in this memo.

Starting from the source code:

      program trivial

      REAL a(10),b(10)

      DO 100 k=1,5

      DO 10 i=1,10
         a(i)=i
 10   ENDDO

      DO 20 j=1,10
         b(j)=a(j)
 20   ENDDO

      print *,b

100   CONTINUE
      END
I enclose each loop with a "start-timer/stop-timer" pair. In addition, there is an "init" and "finalize" call at the beginning and end of the program, resp. The init call initializes the library, the finalize call writes statistics about the collected times to a file.

The instrumented program looks like this:

      PROGRAM trivial
      REAL a(10), b(10)
      CALL instrument()

      DO 100 k=1,5

      CALL start_interval(1)
C  LOOPLABEL 'TRIVIAL_do10'
      DO 10 i = 1, 10
        a(i) = i
10    ENDDO
      CALL end_interval(1)
      CALL start_interval(2)
C  LOOPLABEL 'TRIVIAL_do20'
      DO 20 j = 1, 10
        b(j) = a(j)
20    ENDDO
      CALL end_interval(2)
      PRINT *, b

100   CONTINUE

      CALL exit_intervals('TRIVIAL.sum')
      END

      SUBROUTINE instrument
c This subroutine maps loop names to loop numbers. This info is used by 
c exit_intervals() to generate the printable summary.
      CALL init_intervals('')
      CALL enter_interval(1, 'TRIVIAL_do10')
      CALL enter_interval(2, 'TRIVIAL_do20')
      END
This kind of instrumentation can also be generated automatically by the Polaris compiler. Polaris can be run through the web at http://punch.ecn.purdue.edu/ParHub/

Now, you need to compile the program together with the instrumentation library functions, such as:

f77 trivial.f interval.f -o trivial
Then, when you run the program "trivial" it generates a file TRIVIAL.sum with the following content:
  TRIVIAL_do10 5 AVE: 0.000026 MIN: 0.000022 MAX: 0.000041 TOT: 0.000132 
  TRIVIAL_do20 5 AVE: 0.000017 MIN: 0.000017 MAX: 0.000017 TOT: 0.000087 

 OVERALL time -    0.022050 - - - - - - 
This gives you, for each instrumented program section, the number of invocations and the average, minimum, maximum, and total execution time.

The source code of the library (the file interval.f, above) is this:

	subroutine init_intervals(filename)
	character*(*) filename

	common /intvldata/ start(1000),count(1000),
     *                     total(1000), overall_start,
     *                     min(1000), max(1000),
     *                     nintervals, intvlname(1000)
	character*30 intvlname
	real start, total, overall_start, min, max, tt(2)
	integer*4 count, nintervals

	integer int_number
	character*30 int_name

	if (filename .eq. ' ') then
	  nintervals = 0
	  return
	endif

	open(file=filename,status='old',unit=83)

	nintervals = 0
100	read(83,*,end=200) int_number, int_name

	nintervals = nintervals + 1

	if (nintervals .ne. int_number) then
		print *, 'Warning: Interval number .ne.  record number: ', 
     *			  int_name
	endif	
	intvlname(int_number)(:) = int_name(:)
	count(int_number) = 0
	total(int_number) = 0
	min(int_number) = 1e31
	max(int_number) = 0
	goto 100

200	overall_start = etime(tt)

	close(unit=83)
	return
	end


c--------------------------------
       subroutine enter_interval ( number, name )
       character*(*) name
       integer number

       common /intvldata/ start(1000),count(1000),
     *                     total(1000), overall_start,
     *                     min(1000), max(1000),
     *                     nintervals, intvlname(1000)
       character*30 intvlname
       real start, total, overall_start, min, max, tt(2)
       integer*4 count, nintervals
       nintervals = nintervals + 1
       intvlname(number) = name
       count(number) = 0
       total(number) = 0
       min(number) = 1e31
       max(number) = 0
       end

c--------------------------------
	subroutine start_interval ( interval )

	integer interval

	common /intvldata/ start(1000),count(1000),
     *                     total(1000), overall_start,
     *                     min(1000), max(1000),
     *                     nintervals, intvlname(1000)
	character*30 intvlname
	real start, total, overall_start, min, max, tt(2)
	integer*4 count, nintervals

	start(interval) = etime(tt)

	return
	end

c--------------------------------
	subroutine end_interval ( interval )

	integer interval

	common /intvldata/ start(1000),count(1000),
     *                     total(1000), overall_start,
     *                     min(1000), max(1000),
     *                     nintervals, intvlname(1000)
	character*30 intvlname
	real start, total, overall_start, min, max, tt(2)
	integer*4 count, nintervals

	real period

        period = etime(tt) - start(interval)
        total(interval) = total(interval) + period
        count(interval) = count(interval) + 1
	if (period.lt.min(interval)) min(interval)=period
	if (period.gt.max(interval)) max(interval)=period
	return
	end


c--------------------------------
	subroutine exit_intervals(filename)

	character*(*) filename

	common /intvldata/ start(1000),count(1000),
     *                     total(1000), overall_start,
     *                     min(1000), max(1000),
     *                     nintervals, intvlname(1000)
	character*30 intvlname
        parameter(overhead_etime=0.71E-6)
	real start, total, overall_start, min, max, tt(2)
	real overhead_etime
	integer*4 count, nintervals

	real overall_end
	character*200 buffer, output_line


	overall_end = etime(tt)

	open(file=filename, unit=83, status='unknown')

       do i=1,nintervals
           if (count(i) .ne. 0) then
                buffer(:) = ' '
                write(buffer,10) intvlname(i)(:),
     *                  count(i),
     *                  (total(i)-overhead_etime*count(i))/count(i),
     *                  min(i)-overhead_etime,
     *                  max(i)-overhead_etime,
     *                  total(i)-overhead_etime*count(i)
10              format(1x,a30,1x,i7,' AVE: ',f12.6,' MIN: ', f12.6,
     *                 ' MAX: ', f12.6,' TOT: ',f12.6)
C               call xqueexe(buffer, output_line, length)
C               write(83,15) output_line(1:length)
                write(83,15) buffer
15              format(1x,a)
           endif
        end do

	write(83,20)
20	format(1x)
	write(83,30) overall_end-overall_start-overhead_etime
30	format(1x,'OVERALL time - ', f11.6,' - - - - - - ')
	return
	end

	subroutine xqueexe(string_in, string_out, length)
	character*(*) string_in
	character*(*) string_out
	integer length

	integer length_in, out, in
	logical inblanks

	length_in = len(string_in)

	inblanks = .false.
	out = 0
	do in=1,length_in
		if (string_in(in:in) .ne. ' ') then
			out = out + 1
			string_out(out:out) = string_in(in:in)
			inblanks = .false.
		elseif (.not. inblanks) then
			out = out + 1
			string_out(out:out) = string_in(in:in)
			inblanks = .true.
		endif
	end do

	length = out
	return
	end