#! shebang goes here

# Program   :   trend.awk
# Purpose   :   Gather information from multiple files and create a trend table (for eventual charting)
# Author    :   Bob Jonkman   bjonkman@sobac.com
# Date      :   14 August 2007

# Usage     :   gawk -f trend.awk -f library.awk -v SOURCEFIELD=fieldname_to_tabulate -v SOURCEHEADER=fieldname_for_headerfields inputfiles > outputfile.csv


BEGIN       {   # SOURCEFIELD  = "Space remaining on volume"
                # SOURCEHEADER = "Volume" ;

                if(!SOURCEFIELD)
                {
                    error = "SOURCEFIELD not specified" ;
                    exit ;
                }

                if(!SOURCEHEADER)
                {
                    error = "SOURCEHEADER not specified" ;
                    exit ;
                }


                OFS = "," ;
                ORS = ""  ;
                QUOTE = "\"" ;

}


$0 == "====="   {   nextfile ;
}


FNR == 1    {   numheaders = getheaders($0, headername, headernumber) ;
#               for(i in headername) print("##### DEBUG ##### FILENAME=" FILENAME "; headername[" i "]=" headername[i] "\n") ;
                

                if(ARGIND == 2) # Print output headers once first file is complete
                {
                    print(printcsv(SOURCEFIELD) OFS);

                    for(i=1; i<numoutheaders; i++)
                        print(printcsv(outheadernumber[i]) OFS) ;

                    print(printcsv(outheadernumber[numoutheaders]) "\n") ; # print last item
                }

                
                if(ARGIND > 1)
                {
                    print(printcsv(datefromfilename[0]) OFS) ;

                    for(i=1; i< numoutheaders; i++)
                        print(printcsv(xout[outheadernumber[i]]) OFS) ;

                    print(printcsv(xout[outheadernumber[numoutheaders]]) "\n") ;
                }

                # extract date (yyyy-mm-dd) from filename, otherwise use filename
                if(!match(FILENAME,/[[:digit:]][[:digit:]][[:digit:]][[:digit:]]-[[:digit:]][[:digit:]]-[[:digit:]][[:digit:]]/, datefromfilename))
                    datefromfilename = FILENAME ;

                # Prepare for next input file (next output record)
                delete xout ;
                rownum = 0 ;
}


FNR > 1     {   numfields = getheaders($0, fieldcontent, fieldnumber) ;

                if(!outheadername[fieldnumber[headername[SOURCEHEADER]]])
                {
                   numoutheaders++ ;
                   outheadername[fieldnumber[headername[SOURCEHEADER]]] = numoutheaders ;
                   outheadernumber[numoutheaders] = fieldnumber[headername[SOURCEHEADER]] ;
                }

#     for(i=1; i<=numoutheaders; i++) print("##### DEBUG ##### FNR=" FNR "; outheadernumber[" i "]=" outheadernumber[i] " ;FILENAME=" FILENAME "\n") ;
#     for(i in outheadername)         print("##### DEBUG ##### FNR=" FNR "; outheadername[" i "]="   outheadername[i] "\n") ;

                rownum++
                xout[fieldnumber[headername[SOURCEHEADER]]] = fieldnumber[headername[SOURCEFIELD]] ;

}

END         {   if(error)
                {
                    print("Error: " error) > "/dev/stderr"
                    exit ;
                }


                # complete the last line

                if(ARGIND == 2) # Print output headers once first file is complete
                {
                    print(printcsv(SOURCEFIELD) OFS);

                    for(i=1; i<numoutheaders; i++)
                        print(printcsv(outheadernumber[i]) OFS) ;

                    print(printcsv(outheadernumber[numoutheaders]) "\n") ; # print last item
                }

                
                if(ARGIND > 1)
                {
                    print(printcsv(datefromfilename[0]) OFS) ;

                    for(i=1; i< numoutheaders; i++)
                        print(printcsv(xout[outheadernumber[i]]) OFS) ;

                    print(printcsv(xout[outheadernumber[numoutheaders]]) "\n") ;
                }

                # print the headers again, in case there are more of them
                print("=====\n") ;
                print(SOURCEFIELD OFS) ;

                for(i=1; i<numoutheaders; i++)
                    print(outheadernumber[i] OFS) ;

                print(outheadernumber[numoutheaders] "\n") ;

}




# EOF: trend.awk

