# Program   : LIBRARY.AWK
# Purpose   : Contains functions common to many AWK scripts
# Author    : Bob Jonkman <bjonkman@sobac.com>

# Copyright 2008 Bob Jonkman and/or SOBAC Microcomputer Services

#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Date      : 19 July 2005

# Contents  :
#             rstring()         - Returns the right-most n characters of string
#             max()             - Maximum of items in list
#             min()             - Minimum of items in list
#             trim()            - Trim whitespace from both ends of a string
#             ltrim()           - Trim whitespace from beginning (left side) of a string
#             rtrim()           - Trim whitespace from end (right side) of a string
#             gwid2smtp()       - escape reserved SMTP characters in GW Object
#                           see http://www.novell.com/documentation/gw55/index.html?page=/documentation/gw55/gw55ia/data/a30135u.html#a30135u 
#             smtp2gwid()       - un-escape 
#             reverse()         - reverse order of delimited string, eg. a.b.c -> c_b_a
#             txt2html()        - escape reserved HTML characters
#             txt2xml()         - escape reserved XML characters
#             html2txt()        - restore reserved HTML characters
#             txt2uri()         - escape reserved URI characters
#             uri2txt()         - restore reserved URI characters
#             makehtml()        - Create href links (http:// file:// mailto:) from text  (maito: was microformatted with class="vcard" on 2006-08-04)
#             parsecsv()        - Split Comma-Quote fields in string into an array
#             printcsv()        - Determine whether to print a field with delimiters
#             valnameid()       - Validate Name and ID token names (see http://www.w3.org/TR/html4/types.html#type-id )
#             printhtmlhead()   - print headers for an HTML file
#             parsecgi()        - Parse QUERY_STRING or POST data into an array
#             txt2gwapi()       - escape reserved characters in GW API files
#                           (see pg 42,43 "Keyword Ordering Requirements and Delimiters" in GroupWise API Gateway documentation)
#             getheaders()      - Create reverse lookup array for input string


function rstring(string,n)    {     #  Returns the right-most n characters of string
                                    return(substr(string,length(string)-n+1))
}



function max(a,b)   {   return(( a > b ) ? a : b)
}

function min(a,b)   {   return(( a < b ) ? a : b)
}

function trim(string)   {
                            string = rtrim(string) ;
                            string = ltrim(string) ;
                            return(string) ;
}

##### End trim #####




# trim left-side whitespace
function ltrim(string)	{   gsub(/^[ \t]+/, "", string)
                            return(string)
}
##### End ltrim #####
        




# trim right-side whitespace
function rtrim(string)  {   gsub(/[ \t]+$/, "", string)
                            return(string) 
}

##### End rtrim #####





function gwid2smtp(text,   smtptext)   # Perform GroupWise character translation
# http://www.novell.com/documentation/gw55/index.html?page=/documentation/gw55/gw55ia/data/a30135u.html#a30135u 
             {      
                 smtptext = text ;
                 gsub(/#/   ,"#h#",smtptext) ;
                 gsub(/_/   ,"#u#",smtptext) ;
                 gsub(/ /   ,"_"  ,smtptext) ;
                 gsub(/\(/  ,"#l#",smtptext) ;
                 gsub(/)/   ,"#r#",smtptext) ;
                 gsub(/,/   ,"#m#",smtptext) ;
                 gsub(/:/   ,"#c#",smtptext) ;
                 gsub(/\\/  ,"#b#",smtptext) ;
                 gsub(/=/   ,"#e#",smtptext) ;
                 gsub(/\//  ,"#s#",smtptext) ;
              
                 return(smtptext) ;
}

##### End gwid2smpt #####

function smtp2gwid(text,   gwidtext)  
         {  
             gwidtext = text ; 
             gsub(/#s#/,"/" ,gwidtext);
             gsub(/#e#/,"=" ,gwidtext);
             gsub(/#b#/,"\\",gwidtext);
             gsub(/#c#/,":" ,gwidtext);
             gsub(/#m#/,"," ,gwidtext);
             gsub(/#r#/,")" ,gwidtext);
             gsub(/#l#/,"(" ,gwidtext);
             gsub(/_/  ," " ,gwidtext);
             gsub(/#u#/,"_" ,gwidtext);
             gsub(/#h#/,"#" ,gwidtext);
             return(gwidtext) ;
}

##### End smtp2gwid #####




function reverse(instring,inseparator,outseparator,       numelements,array,i,outstring)
        {    
            numelements = split(instring, array, inseparator);
            outstring = array[numelements];
            for (i=numelements-1; i>0; i--)
                outstring = outstring outseparator array[i];
            return outstring;
        }

##### End reverse #####





function txt2html(text,   htmltext)
            {
            htmltext = text ;
            gsub(/\&/  , "\\&amp;"  ,htmltext)
            gsub(/>/   , "\\&gt;"   ,htmltext)
            gsub(/</   , "\\&lt;"   ,htmltext)
            gsub(/"/   , "\\&quot;" ,htmltext)
            gsub(/'/   , "\\&#39;"  ,htmltext) # ' or tick appears to be invalid in XML UTF-8 documents, substitute numeric entity
            gsub(/\x91/, "\\&lsquo;",htmltext) # &lsquo; left-single-quote or backtick becomes tick
            gsub(/\x92/, "\\&rsquo;",htmltext) # &rsquo; right-single-quote becomes tick
            gsub(/\x93/, "\\&ldquo;",htmltext) # &ldquo; left-double-quote becomes double-quote
            gsub(/\x94/, "\\&rdquo;",htmltext) # &rdquo; right-double-quote becomes double-quote
            gsub(/\x95/, "\\&ndash" ,htmltext) # &ndash; N-dash becomes hyphen (for XML)
            gsub(/\x96/, "\\&mdash" ,htmltext) # &mdash; EM Dash becomes two hyphens

# print("\n##### DEBUG ##### txt2html: text=" text "; htmltext=" htmltext "\n")

        ##### DEBUG #####    gsub(/[^a-zA-Z0-9 \!\#\$\%\&\(\)\*\+\,\-\.\/\:\;\=\?\@\[\\\]\^\_\`\{\|\}\~\n\t]/,"",htmltext) # remove all other non-ascii characters

            return(htmltext) ;
            }

##### End txt2html #####

function txt2xml(text,   xmltext)   # converts plain text or HTML to XML
            {
            xmltext = text ;
            gsub(/\&/     , "\\&amp;"  ,xmltext)
            gsub(/>/      , "\\&gt;"   ,xmltext)
            gsub(/</      , "\\&lt;"   ,xmltext)
            gsub(/"/      , "\\&quot;" ,xmltext)
            gsub(/'/      , "\\&#39;"  ,xmltext) # ' or tick appears to be invalid in XML UTF-8 documents, substitute numeric entity
            gsub(/\x91/   , "\\&#39;"  ,xmltext) #  left-single-quote or backtick becomes tick
            gsub(/&lsquo;/, "\\&#39;"  ,xmltext) # &lsquo; left-single-quote  becomes tick
            gsub(/\x92/   , "\\&#39;"  ,xmltext) # &rsquo; right-single-quote becomes tick
            gsub(/&rsquo;/, "\\&#39;"  ,xmltext) #         right-single-quote becomes tick
            gsub(/\x93/   , "\\&quot;" ,xmltext) # &ldquo; left-double-quote becomes double-quote
            gsub(/&ldquo;/, "\\&quot;" ,xmltext) #         left-double-quote becomes double-quote
            gsub(/\x94/   , "\\&quot;" ,xmltext) # &rdquo; right-double-quote becomes double-quote
            gsub(/&rdquo;/, "\\&quot;" ,xmltext) #         right-double-quote becomes double-quote
            gsub(/\x95/   , "-"        ,xmltext) # &ndash; N-dash becomes hyphen (for XML)
            gsub(/&ndash;/, "-"        ,xmltext) # &ndash; N-dash becomes hyphen (for XML)
            gsub(/\x96/   , "--"       ,xmltext) # &mdash; EM Dash becomes two hyphens
            gsub(/&mdash;/, "--"       ,xmltext) # &mdash; EM Dash becomes two hyphens

        ##### DEBUG #####            gsub(/[^a-zA-Z0-9 \!\#\$\%\&\(\)\*\+\,\-\.\/\:\;\=\?\@\[\\\]\^\_\`\{\|\}\~\n\t]/,"",xmltext) # remove all other non-ascii characters

            return(xmltext) ;
            }


function html2txt(htmltext,   text)
            {
            text = htmltext ;
            gsub(/\&#8211;/ , "-"   , text)
            gsub(/\&ndash;/ , "-"   , text)
            gsub(/\&#8212;/ , "--"  , text)
            gsub(/\&mdash;/ , "--"  , text)
            gsub(/\&#8216;/ , "'"   , text) # left-single-quote value
            gsub(/\&lsquo;/ , "'"   , text) # left-single-quote entity
            gsub(/\&#8217;/ , "'"   , text) # right-single-quote value
            gsub(/\&rsquo;/ , "'"   , text) # right-single-quote entity
            gsub(/\&#8220;/ , "\""  , text) # left-double-quote value
            gsub(/\&ldquo;/ , "\""  , text) # left-double-quote entity
            gsub(/\&#8221;/ , "\""  , text) # right-double-quote value
            gsub(/\&rdquo;/ , "\""  , text) # right-double-quote entity
            gsub(/\&#39;/   , "'"   , text) # apostrophe or tick '
            gsub(/\&apos;/  , "'"   , text) # apostrophe entity
            gsub(/\&quot;/  , "\""  , text)
            gsub(/\&nbsp;/  , " "   , text)
            gsub(/\&lt;/    , "<"   , text)
            gsub(/\&gt;/    , ">"   , text)
            gsub(/\&amp;/   , "\\&" , text)

            return(text) ;
            }

##### End html2txt #####



function txt2uri(text,   uritext)   # see RFC3986 (STD0066) section 2.2
            {
            uritext = text ;
            gsub(/%/   ,"%25",uritext)
            gsub(/ /   ,"%20",uritext)
            gsub(/:/   ,"%3A",uritext)
            gsub(/\//  ,"%2F",uritext)
            gsub(/\?/  ,"%3F",uritext)
            gsub(/#/   ,"%23",uritext)
            gsub(/\[/  ,"%5B",uritext)
            gsub(/]/   ,"%5D",uritext)
            gsub(/@/   ,"%40",uritext)
            gsub(/!/   ,"%21",uritext)
            gsub(/\$/  ,"%24",uritext)
            gsub(/&/   ,"%26",uritext)
            gsub(/'/   ,"%27",uritext)
            gsub(/\(/  ,"%28",uritext)
            gsub(/)/   ,"%29",uritext)
            gsub(/\*/  ,"%2A",uritext)
            gsub(/\+/  ,"%2B",uritext)
            gsub(/,/   ,"%2C",uritext)
            gsub(/;/   ,"%3B",uritext)
            gsub(/=/   ,"%3D",uritext)  
            gsub(/\|/  ,"%7C",uritext)  # Not part of RFC3986 (I think)
            gsub(/~/   ,"%7E",uritext)  # Not part of RFC3986 (I think)
            gsub(/\^/  ,"%5E",uritext)  # Not part of RFC3986 (I think)
            gsub(/`/   ,"%60",uritext)  # Not part of RFC3986 (I think)
            gsub(/\{/  ,"%7B",uritext)  # Not part of RFC3986 (I think)
            gsub(/\}/  ,"%7D",uritext)  # Not part of RFC3986 (I think)
            gsub(/\"/  ,"%22",uritext)  # Not part of RFC3986 (I think)
            gsub(/</   ,"%3C",uritext)  # Not part of RFC3986 (I think)
            gsub(/>/   ,"%3E",uritext)  # Not part of RFC3986 (I think)
            gsub(/\\/  ,"%5C",uritext)  # Not part of RFC3986 (I think)

            return(uritext) ;
            }
##### End txt2uri #####


function uri2txt(uri,  text)
            {
            text = uri
            gsub(/%20/," "      ,text)
            gsub(/%3A/,":"      ,text)
            gsub(/%2F/,"/"      ,text)
            gsub(/%3F/,"?"      ,text)
            gsub(/%23/,"#"      ,text)
            gsub(/%5B/,"["      ,text)
            gsub(/%5D/,"]"      ,text)
            gsub(/%40/,"@"      ,text)
            gsub(/%21/,"!"      ,text)
            gsub(/%24/,"$"      ,text)
            gsub(/%26/,"\\&"    ,text) # unescaped '&' would indicate "replacement text"
            gsub(/%27/,"'"      ,text)
            gsub(/%28/,"("      ,text)
            gsub(/%29/,")"      ,text)
            gsub(/%2A/,"*"      ,text)
            gsub(/%2B/,"+"      ,text)
            gsub(/%2C/,","      ,text)
            gsub(/%3B/,";"      ,text)
            gsub(/%3D/,"="      ,text)
            gsub(/%7C/,"|"      ,text)
            gsub(/%7E/,"~"      ,text)
            gsub(/%5E/,"^"      ,text)
            gsub(/%60/,"`"      ,text)
            gsub(/%7B/,"{"      ,text)
            gsub(/%7D/,"}"      ,text)
            gsub(/%22/,"\""     ,text)
            gsub(/%3C/,"<"      ,text)
            gsub(/%3E/,">"      ,text)
            gsub(/%0D%0A/,"\\n" ,text)

            gsub(/%5C/,"\\"     ,text)   # not part of RFC3986 ?

            gsub(/%25/,"%"     ,text)
            return(text)
            }


function makehtml(string,   htmlstring,path,gwiduri)    
        {   
            if(substr(string,1,2) == "\\\\")
            {
                    path = string ;
                    gsub(/\\\\/,"/",path) ;
                    htmlstring = "<a href=" QUOTE "file:///" txt2uri(path) QUOTE ">" txt2html(string) "</a>" ;
            }
            else if(substr(string,1,7) == "http://")  # If the string starts with http:// then assume it is already txt2uri converted
                    htmlstring = "<a href=" QUOTE "http://" substr(string,8) QUOTE ">" txt2html(string) "</a>" 
            else if(atpos=index(string, "@"))
                 {  htmlstring = "<span class=" QUOTE "vcard" QUOTE "><a class=" QUOTE "email fn" QUOTE " href=" QUOTE "mailto:" txt2html(string) QUOTE " title=" QUOTE "E-mail to " txt2html(string) QUOTE ">" txt2html(string) "</a>" ;    # E-mail address is not cleaned with txt2uri() so @ stays as symbol -- but txt2uri() may be necessary!
#                    if ((substr(string,atpos+1) == "sobac.com") && !NOEGUIDE)  
#                    {   gwiduri = txt2uri(smtp2gwid(substr(string,1,atpos-1))) ;
#                        htmlstring = htmlstring " <a class=" QUOTE "url" QUOTE " href=" QUOTE "http://eguide.sobac.com/eGuide/servlet/eGuide?Action=Detail.get&amp;User.dn=cn%3d" gwiduri "%2cou%3dStaff%2co%3dToronto&amp;Directory.uid=Staff" QUOTE " title=" QUOTE "Look up " gwiduri " in eGuide Staff Container" QUOTE " target=" QUOTE "ldap" QUOTE ">S</a>"
#                        htmlstring = htmlstring " <a class=" QUOTE "url" QUOTE " href=" QUOTE "http://eguide.sobac.com/eGuide/servlet/eGuide?Action=Detail.get&amp;User.dn=cn%3d" gwiduri "%2cou%3dNon%2dStaff%2co%3dToronto&amp;Directory.uid=Non_Staff" QUOTE " title=" QUOTE "Look up " gwiduri " in eGuide Non-Staff Container" QUOTE " target=" QUOTE "ldap" QUOTE ">N</a>"
#                    }
                    htmlstring = htmlstring "</span>"
                 }
            else htmlstring = txt2html(string) ;
                
            return(htmlstring) ;
}

##### End makehtml #####




function parsecsv(rawfield,fieldarray,    fieldnum,qflag,i,char)
	{	

#
# Note that parsecsv() does not currently handle embedded CRLF
#       eg. "data","data<CRLF>data","data"
#

		delete fieldarray ;
		qflag = 0 ;				# TRUE if inside field delimiters
		fieldnum = 1 ;

        if(!FIELD_SEPARATOR)
            FIELD_SEPARATOR = "," ;

        if(!FIELD_DELIMITER)
            FIELD_DELIMITER = "\"" ;


		for(i=1; i <= length(rawfield); i++)
		{   char     = substr(rawfield,i,1)
	                
			if (char == FIELD_DELIMITER)
            {
                if (substr(rawfield,i+1,1) == FIELD_DELIMITER)  # check for two adjacent field delimiters; treat as one character
                {   
                    i++     # skip over the next character, and add the delimiter to the output string
                    fieldarray[fieldnum] = fieldarray[fieldnum] FIELD_DELIMITER 
                }
                else
    				qflag = !qflag;
			
			
            }
			else if (char == FIELD_SEPARATOR)
			{	if (qflag)
					fieldarray[fieldnum] = fieldarray[fieldnum] FIELD_SEPARATOR ;
				
				
				else
				{	
					fieldnum++ ;
				}
				
				
			}
	
			else
			{	fieldarray[fieldnum] = fieldarray[fieldnum] char ;
			}		
	
		}

		return(fieldnum);
	}



#####  End of parsecsv() #####


function printcsv(field)    # Determine whether to print a field with delimiters
    {
        if(!FIELD_SEPARATOR)
            FIELD_SEPARATOR = "," ;

        if(!FIELD_DELIMITER)
            FIELD_DELIMITER = "\"" ;

# Double field delimiters to escape them
        gsub(FIELD_DELIMITER,FIELD_DELIMITER FIELD_DELIMITER,field)

# Apply field delimiters when field separator is in field
        if (field ~ FIELD_SEPARATOR)
            field = FIELD_DELIMITER field FIELD_DELIMITER
        return(field)
    }
            


function valnameid(text,   valtext)     # Validate NAME and ID token names
    {
        valtext = text
        gsub(/[^A-Za-z0-9\-_:\.]/, "_", valtext)
        if(substr(valtext,1,1) !~ /[A-Za-z]/)
        {
            valtext = "a" valtext
        }
        return(valtext)
    }

##### End of valnameid() #####



function printhtmlhead(title)       
    {       if (!QUOTE)
                QUOTE = "\""

            if (!NOHTTP)
                print("Content-type: text/html; charset=UTF-8\n")

            print("<?xml version=" QUOTE "1.0" QUOTE " encoding=" QUOTE "utf-8" QUOTE "?>") ;
            print("<!DOCTYPE html PUBLIC " QUOTE "-//W3C//DTD XHTML 1.0 Strict//EN" QUOTE ) ;
            print("    " QUOTE "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" QUOTE ">") ;
            print("<html xmlns=" QUOTE "http://www.w3.org/1999/xhtml" QUOTE ) ;
            print("      xml:lang=" QUOTE "en" QUOTE ) ;
            print("      lang=" QUOTE "en" QUOTE ">") ;
            print(" <head>") ;
            print("  <meta http-equiv=" QUOTE "Content-Type" QUOTE ) ;
            print("           content=" QUOTE "text/html;charset=utf-8" QUOTE " />") ;
            print("  <link rel=" QUOTE "stylesheet" QUOTE ) ;
            print("        href=" QUOTE "/default.css" QUOTE ) ;
            print("        type=" QUOTE "text/css" QUOTE " />") ;

            for(i in LINKREL)
                print("  " LINKREL[i])
    
            print("  <meta name=" QUOTE "generator" QUOTE ) ;
            print("        content=" QUOTE "AWK tools for HTML by Bob Jonkman" QUOTE " />") ;
            print("  <meta name=" QUOTE "author" QUOTE ) ;
            print("     content=" QUOTE "Bob Jonkman  bjonkman@sobac.com;" QUOTE " />") ;
            print("  <meta name=" QUOTE "description" QUOTE ) ;
    print("        content=" QUOTE txt2html(title) QUOTE " />") ;
            print("  <title>" txt2html(title) "</title>") ;
            print(" </head>") ;
    }

##### End of printhtmlhead() #####



function parsecgi(inputstring,outputarray,     querystring,paramstring,numparam)
    {   delete querystring
        delete outputarray

        numparam = split(inputstring,querystring,"&")


        for(i in querystring)
        {

            gsub(/\+/," ",querystring[i])            # remove + as space substitute

            split(querystring[i],paramstring,"=")


# allow multi-value parameters, separate with FS
            if (outputarray[uri2txt(paramstring[1])])
            {
                outputarray[uri2txt(paramstring[1])] = outputarray[uri2txt(paramstring[1])] FS uri2txt(paramstring[2]) ;
                numparam-- ;
            }   
            else    
                outputarray[uri2txt(paramstring[1])] = uri2txt(paramstring[2]) ;
        }


        return(numparam)
    }

##### End of parsecgi() #####



function txt2gwapi(text)
    {
        gsub(/;/  ,"\\;"  , text)    # escape semicolon to backslash semicolon
        gsub(/\"/ , "\\\"", text)    # escape unpaired double quote to backslash unpaired double quote
        return(text)
    }

##### End of txt2gwapi()



# Results of getheaders("Alpha,Beta,Gamma",namearray,numberarray) 
#                           namearray["Alpha"] == 1
#                           namearray["Beta"]  == 2
#                           namearray["Gamma"] == 3
#                           numberarray[1] == "Alpha"
#                           numberarray[2] == "Beta"
#                           numberarray[3] == "Gamma"

function    getheaders(instring,namearray,numberarray,   numfields,i)    {
                    numfields = parsecsv(instring,numberarray)
                    for(i=1; i<=numfields; i++)
                        namearray[numberarray[i]] = i ;
                    return(numfields)
}

##### End of getheaders()

# EOF: LIBRARY.AWK
