/*
program: ih2merge.cmd
type:    REXXSAA-OS/2
purpose: produce a HTML-version which contains *all* HTML-files in one *big*
         file which allows for printing the entire document e.g. via
         WebExplorer; allow for merging just a subset of the HTML-files, if
         starting and ending file-number is given

usage:   ih2merge prefix [start [end]]
         ... prefix     ... prefix of HTML-files, e.g. "CMDREF"
         ... start      ... number of starting HTML-files (prefix.2.START.html)
         ... end        ... number of ending   HTML-files (prefix.2.END.html)

>> hint: in order to find the starting and ending numbers for chunks, load
>>       the TOC-file (table of contents), point to the first link & look up
>>       the chunk-number and point to the last link & look up the chunk number


         ... giving file:

                prefix.9.all.html   ... HTML-file containing *all*
                                        HTML-files (order: toc, files, idx)

                prefix.9.range.START-END.html   ... HTML-file containing
                                         all HTML-files within given range,
                                         starting with START (prefix.2.START.html),
                                         ending with END (prefix.2.END.html)


version: 1.0
date:    1995-04-12
changed: 1995-05-29, merely changed docs, ---rgf


needs:   HTML-files produced by INFHTML-package of Peter Childs,
         HPFS-partition to work on

author:  Rony G. Flatscher,
         Wirtschaftsuniversitt/Vienna
         Rony.Flatscher@wu-wien.ac.at

All rights reserved, copyrighted 1995, no guarantee that it works without
errors, etc. etc.

donated to the public domain granted that you are not charging anything
(money etc.) for it and derivates based upon it, as you did not write it,
etc. if that holds you may bundle it with commercial programs too

Please, if you find an error, post me a message describing it, I will
try to fix and rerelease it to the net.
*/

PARSE ARG Prefix Start End .

gHeaders.  = 0          /* stem to keep the forward references for chunks which don't have a heading */

IF ARG() = 0 | POS("?", Prefix) > 0 THEN
DO
   PARSE SOURCE . . befehl
   SAY "Syntax:" FILESPEC("Name", befehl) "Prefix of files to merge [start# [end#]]"
   EXIT
END

bRange = 0              /* default to all file-chunks to be merged */

IF Start <> "" & DATATYPE(Start, "W") THEN
DO
   bRange = 1           /* just a range of file-chunks to be merged */

   IF \DATATYPE(End, "W") THEN End = 9999       /* default to the highest number of chunks */
   NewName       = Prefix || ".9.range." || Start || "-" || End || ".html"
   StartName     = Prefix || ".2." || RIGHT(Start, 4, "0") || ".html"
   EndName       = Prefix || ".2." || RIGHT(End,   4, "0") || ".html"
   SearchPattern = Prefix || ".2.*.html"           /* just search file-chunks, no toc, no idx */
END
ELSE                    /* if all file-chunks to be merged */
DO
   NewName       = Prefix || ".9.all.html"
   SearchPattern = Prefix || "*.html"
END

IF RxFuncQuery('SysLoadFuncs') THEN     /* if REXX-utils not loaded, then load all of them */
DO
  Call RxFuncAdd 'SysLoadFuncs', 'REXXUTIL', 'SysLoadFuncs'
  Call SysLoadFuncs
END

CALL SysFileDelete NewName              /* delete old file */

rc = SysFileTree(SearchPattern, "Files", "O")    /* Files suchen */
IF Files.0 = 0 THEN
DO
   SAY "No files found ! ["SearchPattern"]"
   EXIT
END

IF bRange THEN          /* just a range of chunks, find index into Files.-stem */
DO
   Start = binary_search(StartName)
   IF Start = 0 THEN
   DO
      SAY "Beginning of Range not found ! ["StartName"]"
      EXIT
   END

   End = binary_search(EndName)
   If End = 0 THEN End = Files.0                /* not found, then include up to last file-chunk */
END
ELSE                    /* all chunks */
DO
   Start = 1
   End   = Files.0
END


OutFile = SysTempFileName("tmp?????.tmp")
CALL STREAM OutFile, "C", "OPEN WRITE"

DO i = Start TO End                     /* loop over given range */
   FileName = FILESPEC("Name", Files.i)
   SAY RIGHT(i, 4) || "/" || RIGHT(Files.0, 4) "["FileName"]"
   CALL merge FileName, Prefix, OutFile, (i = Start), (i = End), bRange
END

CALL STREAM OutFile, "C", "CLOSE"

ADDRESS CMD "@REN" OutFile NewName


RETURN


MERGE: PROCEDURE EXPOSE gHeaders.
   PARSE ARG InFile, Prefix, OutFile, bFirstFile, bLastFile, bRange

   upcPrefix = TRANSLATE(Prefix)
   bLeaveAlone = bFirstFile | bLastFile /* leave very first and very last chunk intact */

   leftPattern  = 'a href="'
   rightPattern = '"'

   CALL STREAM InFile,  "C", "OPEN READ"

   bJustStarted = 1                     /* just started to read file */
   bReferenceInserted = 0               /* no reference inserted as of yet */
   bhrWrite = 0                         /* <hr> written ?       */

   DO WHILE CHARS(InFile) > 0
      tmpLine = LINEIN(InFile)            /* read input file */

      PARSE VAR tmpLine "<" tag ">" .

      IF bJustStarted & \bLeaveAlone THEN
      DO
         SELECT
            WHEN tag = "html" THEN
                 DO
                    CALL LINEOUT OutFile, "<html>"
                    ITERATE
                 END

            WHEN tag = "h1" | tag = "hr" THEN ITERATE

            WHEN tag = "p align=""center""" THEN
                 DO
                    bJustStarted = 0
                    ITERATE
                 END

            OTHERWISE NOP
         END
      END

      IF tag = "hr" & \(bhrWrite | bLeaveAlone) THEN ITERATE

      IF \bReferenceInserted & WORDPOS(tag, "h1 h2 h3 h4 h5 h6 pre") > 0 THEN
      DO
         opentag  = "<"tag">"
         closetag = "</"tag">"

         PARSE VAR tmpLine (opentag) header (closetag)       /* </pre> on same line ? */

         IF tag = "pre" THEN                    /* no header given, supply a <h3>-Tag ! */
         DO
            bNoCloseTag = ( POS(closetag, tmpLine) = 0 )
            IF header = "" | LENGTH(header) > 72 THEN           /* insert infos from first source file */
            DO
               IF gHeaders.InFile.0 = 0 THEN                    /* no references ever made to this file */
                  header = "<strong>*** no references ever made to this section ! ***</strong>"
               ELSE                                             /* use file which made first reference to build header-info */
                  header = gHeaders.InFile.1.eSourceH2 "[" || gHeaders.InFile.1.eSourceText || "]"
            END
            ELSE                                /* header available, no need to write line in hand unaltered */
            DO
               tmpLine = ""
            END

            CALL LINEOUT OutFile, '<h3><a name="' || InFile || '">' || header || '</a></h3>'

            IF bNoCloseTag  THEN                /* write open tag, because the matching endtag will show up later */
               CALL LINEOUT OutFile, opentag

            IF tmpLine <> "" THEN               /* if line was not processed, write it */
               CALL LINEOUT OutFile, tmpLine

            IF gHeaders.InFile.0 = 0 THEN       /* build references to source files */
            DO
               CALL LINEOUT OutFile, "<cite><strong>*** no references ever made to this section ! ***</strong></cite>"
            END
            ELSE                                /* build internal references to source[s] of link */
            DO
               sProcessed. = 0                  /* stem to memorize which files were already processed in the following loop */
               tmpRefHint = 'Referenced by:'    /* hint for very first reference */

               DO i = 1 TO gHeaders.InFile.0
                  tmpReference = gHeaders.InFile.i.eHRefSource

                  IF \sProcessed.tmpReference THEN
                  DO
                     CALL LINEOUT OutFile, tmpRefHint '<cite><'leftPattern ||,
                                           "#"  || tmpReference || rightPattern || ">",
                                           gHeaders.InFile.i.eSourceH2 || '</a></cite>'
                     sProcessed.tmpReference = 1                /* don't write multiple references, if one was written already */
                     tmpRefHint = LEFT("", LENGTH(tmpRefHint))  /* replace hint with blanks */
                  END
               END
               DROP sProcessed.
            END
         END
         ELSE   /* normal header supplied, *that* was easy :-) */
         DO
            IF tag <> "h1" THEN
               CALL LINEOUT OutFile, opentag'<a name="' || InFile || '">' || header || "</a>"closetag
         END

         PresentHeader = header                 /* memorize header */
         bReferenceInserted = 1
         bhrWrite = 1                           /* writing horizontal lines now o.k. */
         ITERATE
      END


      /* only write message at last file */
      IF \bLastFile & tmpLine = "<p>Inf-HTML End Run - Successful</p>" THEN
         ITERATE

      IF POS(leftPattern, tmpLine) > 0 THEN
      DO
         tmp = tmpLine

         tmpLine = ""
         DO WHILE tmp <> ""
            PARSE VAR tmp left (leftPattern) tmpFile (rightPattern) tmp

            IF tmpFile <> "" THEN
            DO
               IF ABBREV(TRANSLATE(tmpFile), upcPrefix) THEN    /* insert localize character "#" */
                  tmpLine = tmpLine || left || leftPattern || "#"  || tmpFile || rightPattern
               ELSE
                  tmpLine = tmpLine || left || leftPattern || tmpFile || rightPattern

               /* try to get text within anchor; should be used on unnamed "articles" as header and href-target */
               PARSE VAR tmp tmpText "</a>"

               textBeforeAnchor = STRIP(SUBSTR(tmpText, LASTPOS(">", tmpText) + 1))
               IF textBeforeAnchor <> "" THEN
               DO
                  IF \DATATYPE(gHeaders.tmpFile.0, "W") THEN
                     gHeaders.tmpFile.0 = 0

                  tmpIndex = gHeaders.tmpFile.0 + 1                             /* could be a target in more than one source */
                  gHeaders.tmpFile.tmpIndex.eHRefSource = InFile                /* source of href */
                  gHeaders.tmpFile.tmpIndex.eSourceText = textBeforeAnchor      /* text found in source, to be used as header text, just in case */
                  gHeaders.tmpFile.tmpIndex.eSourceH2   = PresentHeader         /* value of Header to use */
                  gHeaders.tmpFile.0 = tmpIndex                                 /* memorize number of references */
               END

            END
            ELSE
               tmpLine = tmpLine || left
         END
      END

      CALL LINEOUT OutFile, tmpLine /* write output file */
   END

   CALL STREAM InFile,        "C", "CLOSE"

   RETURN



/* do a binary search for those needed file-chunks */
BINARY_SEARCH: PROCEDURE EXPOSE files.
    value = TRANSLATE(ARG(1))                   /* use uppercase for comparisons */

    bMatched = 0
    low =  1
    high = files.0

    DO WHILE \bMatched & low <= high
       Index = TRUNC((low + high) / 2)
       tmpValue = TRANSLATE(FILESPEC("N", files.Index))        /* use uppercase for comparisons */

       IF tmpValue > value THEN
       DO
          high = Index - 1
       END
       ELSE
       DO
          IF tmpValue < value THEN
             low  = Index + 1
          ELSE
             bMatched = 1               /* match found ! */
       END
    END

    IF \bMatched THEN Index = 0

    RETURN  Index

