/***************************************************************************** * * * Copyright (C) 1988 by ETAK, Inc. * * All Rights Reserved * * * * INTSTRING - International string processing for TRAVELPILOT * * * * Module Contains: * * skip_alt * * numcmp * * numncmp * * int_ncmp * * int_strcmp (public) * * int_strncmp (public) * * int_strcpy (public) * * int_strncpy (public) * * int_thmcmp (public) * * * * Original code 03-MAY-1988 mth * * Modified for sorting numbers 06-MAY-1988 mth * * mn_table=>external; fixes to satisfy lint 06-MAY-1988 tsk * * Modified to skip everything except letters and numbers 23-JAN-1989 mth * * Fixed ordering of most numbers with slashes 24-JAN-1989 mth * * Fixed problem with AU AM RHEIN, AU"DLENGERNHEIDE 12-MAR-1989 mth * * Changed far to __FAR__, fixed lint complaints 10-OCT-1989 mth * * int_strncmp no longer needs 1st string to be compressed 10-APR-1990 mth * * Include prototype file, add test main 14-NOV-1990 mth * * Fixed problem with umlauts in thumb index 14-FEB-1991 mth * * Added numbers to mn_table for space(32) and dash(45), * * and made ' and ` have the same number as a space, 32. 26-SEP-1995 cmb * * * *****************************************************************************/ #include #include "include.h" #include #include /* Needed for isdigit() */ #include /* Needed for size_t */ #include "idxtyp.h" /* Needed for pchar, puchar */ #include "internat.h" /* Needed for internal constants */ #ifdef PROTOTYPES #include "utlproto.h" /* Prototype file */ #endif /* Declare static function prototypes. */ static int skip_alt( #ifdef PROTOTYPES puchar * #endif ); static int numcmp( #ifdef PROTOTYPES puchar, puchar #endif ); static int numncmp( #ifdef PROTOTYPES puchar, puchar, puchar, puchar, short #endif ); static int int_ncmp( #ifdef PROTOTYPES pchar, pchar, int, short #endif ); /* ESC, SO, and SI should be set equal to their value if alternate character set sequences are to be skipped. */ unsigned char mn_table[256] = { /* multinational ordering of letters/digits */ /* NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI */ 0,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */ SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP, /* SP ! " # $ % & ' ( ) * + , - . / */ 32,SKP,SKP,SKP,SKP,SKP,SKP,32,SKP,SKP,SKP,SKP,SKP,45,SKP,SKP, /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,SKP,SKP,SKP,SKP,SKP,SKP, /* @ A B C D E F G H I J K L M N O */ SKP, 65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, /* P Q R S T U V W X Y Z [ \ ] ^ _ */ 95, 97, 99,101,103,105,107,109,111,113,115,SKP,SKP,SKP,SKP,SKP, /* ` a b c d e f g h i j k l m n o */ 32, 65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, /* p q r s t u v w x y z { | } ~ DEL */ 95, 97, 99,101,103,105,107,109,111,113,115,SKP,SKP,SKP,SKP,SKP, /* X80 X81 X82 X83 IND NEL SSA ESA HTS HTJ VTS PLD PLU RI SS2 SS3 */ SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP, /* DCS PU1 PU2 STS CCH MW SPA EPA X98 X99 X9A CSI ST OSC PM APC */ SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP, /* XA0 !! ct lb XA4 yen XA6 sec cur cop foi << XAC XAD XAE XAF */ SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP, /* deg +- 2^ 3^ XB4 mu pp .^ XB8 1^ moi >> 1/4 1/2 XBE ?? */ SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP,SKP, /* A` A' A^ A~ A" Ao AE C, E` E' E^ E" I` I' I^ I" */ 65, 65, 65, 65,65, 65,A_U, 69, 73, 73, 73, 73, 81, 81, 81, 81, /* XD0 N~ O` O' O^ O~ O" OE O/ U` U' U^ U" Y" XDE ss */ SKP, 91, 93, 93, 93, 93,93,O_U,O_U,105,105,105,105,113,SKP,SKP, /* a` a' a^ a~ a" ao ae c, e` e' e^ e" i` i' i^ i" */ 67, 65, 65, 65,65, 65,A_U, 67, 73, 73, 73, 73, 81, 81, 81, 81, /* XF0 n~ o` o' o^ o~ o" oe o/ u` u' u^ u" y" XFE XFF */ SKP, 91, 93, 93, 93, 93,93,O_U,O_U,105,105,105,105,113,SKP,SKP }; int num_compare(s1,s2) unsigned char *s1; unsigned char *s2; { return ((atoi(s1) < atoi(s2)) ? -1: +1); } int simple_compare(s1,s2) unsigned char *s1; unsigned char *s2; { for(; mn_table[*s1] == mn_table[*s2]; ++s1, ++s2) if (*s1 == (unsigned char) '\0') return 0; return ((mn_table[*s1] < mn_table[*s2]) ? -1: +1); } /*-------------------------------------------------------------------------*/ /* Alternate character set sequences begin with $ , where is a single character specifying the character set chosen. The alternate character set is then invoked by a and returned to normal by a */ static int skip_alt (str) puchar *str; { int skipped = 0; /* Skip alternate character set sequences and other ignored characters. */ while (**str) { switch (mn_table[**str]) { /* The alternate sequence skipping code is turned off, since it is likely that comparisons may be made on sequences that are entirely escaped. */ /* case ESC: if (*((*str)+1) != '$') return (skipped); else if (*((*str)+2)) { (*str) += 3; skipped = 3; } break; case SI: skipped++; while (*((*str)++) != SO) { skipped++; if (!(**str)) return (skipped); } skipped++; break; */ case SKP: /* Characters with this code are not compared. */ skipped++; (*str)++; break; default: return (skipped); } } return (skipped); } /*-------------------------------------------------------------------------*/ static int numcmp (str1, str2) puchar str1; /* first input string */ puchar str2; /* second input string */ { int leading_order; /* the leading digits of str1 and str2 are always different, they will determine which number is greater if the digit-sequences are the same length */ if (*str1 > *str2) leading_order = 1; else leading_order = -1; for (str1++, str2++; *str1 && *str2; str1++, str2++) { /* check for digits */ if (isdigit(*str1)) { if (isdigit(*str2)) continue; else /* str1 has more digits than str2, and hence is larger */ return (1); } else if (isdigit(*str2)) /* str1 has fewer digits than str2, and hence is smaller */ return (-1); else return (leading_order); } /* one (or both) of the numbers terminates the string */ if (*str1) { if (isdigit(*str1)) /* str1 has more digits than str2, and hence is larger */ return (1); else /* both numbers have the same number of digits */ return (leading_order); } else { if (isdigit(*str2)) /* str1 has fewer digits than str2, and hence is smaller */ return (-1); else /* both numbers have the same number of digits */ return (leading_order); } } /*-------------------------------------------------------------------------*/ static int numncmp (str1, str2, str1_end, str2_end, flag) puchar str1; /* first input string */ puchar str2; /* second input string */ puchar str1_end; /* first input string end */ puchar str2_end; /* second input string end */ short flag; /* set to 0 if numncmp is to return equality when it can't decide the value of the comparison */ { int leading_order; puchar s; short len1, len2; if (flag) { leading_order = (*str1 > *str2) ? 1 : -1; } for (s = str1; (s < str1_end) && isdigit (*s); s++) { ; } str1_end = s; for (s = str2; (s < str2_end) && isdigit (*s); s++) { ; } str2_end = s; len1 = str1_end - str1; len2 = str2_end - str2; return ((len1 != len2) ? ((len1 > len2) ? 1 : -1) : (flag ? leading_order : 0)); } #define STR1_BROKE (1) #define STR2_BROKE (2) #define BOTH_BROKE (STR1_BROKE | STR2_BROKE) /*-------------------------------------------------------------------------*/ static int int_ncmp (pstr1, pstr2, leng, flag) pchar pstr1; /* first input string */ pchar pstr2; /* second input string */ int leng; /* where to stop if a null isn't reached */ short flag; /* set to 0 if numncmp is to return equality when it can't decide the value of the comparison */ { puchar str1 = (puchar) pstr1; /* first input string */ puchar str2 = (puchar) pstr2; /* second input string */ unsigned char last_match = 0; /* Last matching character */ puchar str1_end; /* First input string end */ puchar str2_end; /* Second input string end */ int skipped; /* Number of characters skipped in skip_alt */ int number_break = 0; /* Broken number flag (e.g., 1 1/2) */ str1_end = str1+leng; str2_end = str2+leng; for (; (*str1 && *str2) && (str1 < str1_end || str2 < str2_end); str1++, str2++) { /* Skip over any imbedded alternate character set sequences. */ number_break = ((skipped = skip_alt (&str1)) != 0) ? STR1_BROKE : 0; str1_end += skipped; number_break |= ((skipped = skip_alt (&str2)) != 0) ? STR2_BROKE : 0; str2_end += skipped; /* Stop comparing if one of the strings terminates after an alternate character set sequence. */ if (!(*str1) || !(*str2)) break; /* Also stop if both pointers have gone beyond the length. */ if (str1 >= str1_end && str2 >= str2_end) break; /* See whether next characters are equal */ if (mn_table[*str1] == mn_table[*str2]) { /* Check for broken numbers, which are only equal if broken in the same place. (e.g., 1 1/2 == 1 1 2 != 1/12). */ if (number_break ? (isdigit (last_match) && isdigit (*str1)) : 0) { switch (number_break) { case BOTH_BROKE: /* The two breaks were in the same place. */ break; case STR1_BROKE: /* The number sequence in string 1 broke, therefore it has fewer digits, and is less than string 2. */ return (-1); case STR2_BROKE: /* The number sequence in string 2 broke, therefore it has fewer digits, and is less than string 1. */ return (1); } } last_match = *str1; } /* Check for trailing numbers. */ else if (isdigit(*str1)) { if (isdigit(*str2)) { if (number_break ? isdigit (last_match) : 0) { switch (number_break) { case BOTH_BROKE: /* The two breaks were in the same place. */ return (numncmp (str1, str2, str1_end, str2_end, flag)); case STR1_BROKE: /* The number sequence in string 1 broke, therefore it has fewer digits, and is less than string 2. */ return (-1); case STR2_BROKE: /* The number sequence in string 2 broke, therefore it has fewer digits, and is less than string 1. */ return (1); } } else return (numncmp (str1, str2, str1_end, str2_end, flag)); } else { if (isdigit(last_match)) /* The number in str2 has fewer digits than str1. */ return (1); /* A number is beginning in str1, but not str2. */ else if (mn_table[*str1] < mn_table[*str2]) return (-1); else return (1); } } else if (isdigit(*str2)) { if (isdigit(last_match)) /* The number in str1 has fewer digits than str2. */ return (-1); /* A number is beginning in str2, but not str1. */ else if (mn_table[*str1] < mn_table[*str2]) return (-1); else return (1); } /* Check for umlaut ordering only when the difference is right. */ else if ((((int) mn_table[*str2]) - ((int) mn_table[*str1])) == COMPOUND_DIFF) { switch (mn_table[*str2]) { case A_U: case O_U: case U_U: if (!(*(++str1))) { /* str1 ended, making it effectively shorter than str2 */ if (str1 < str1_end) return (-1); else return (0); } if (skip_alt (&str1)) { if (!(*str1)) { if (str1 < str1_end) return (-1); else return (0); } } if (mn_table[*str1] == mn_table['E']) { /* the appropriate char+E diphthong matched */ last_match = *str1; continue; } /* Check for ordering of this character relative to E. */ else if (mn_table[*str1] < mn_table['E']) return (-1); else return (1); /* break; */ default: return (-1); } } else if ((((int) mn_table[*str1]) - ((int) mn_table[*str2])) == COMPOUND_DIFF) { switch (mn_table[*str1]) { case A_U: case O_U: case U_U: if (!(*(++str2))) { /* str2 ended, making it effectively shorter than str1 */ if (str2 < str2_end) return (1); else return (0); } if (skip_alt (&str2)) { if (!(*str2)) { if (str2 < str2_end) return (1); else return (0); } } if (mn_table[*str2] == mn_table['E']) { /* The appropriate char+E diphthong matched. */ last_match = *str2; continue; } /* Check for ordering of this character relative to E. */ else if (mn_table[*str2] < mn_table['E']) return (1); else return (-1); /* break; */ default: return (1); } } else if (mn_table[*str1] < mn_table[*str2]) return (-1); else return (1); } /* The strings are equal up to the point where one (or both) terminates, determine whether either string is not yet terminated. This logic does not work well when both strings end with a digit and they have been truncated. */ if (*str1 && str1 < str1_end) { skip_alt (&str1); if (*str1 && str1 < str1_end && str2 != str2_end) return (1); else return (0); } else if (*str2 && str2 < str2_end) { skip_alt (&str2); if (*str2 && str2 < str2_end && str1 != str1_end) return (-1); else return (0); } else return (0); } /*-------------------------------------------------------------------------*/ int int_strcmp (pstr1, pstr2) pchar pstr1; /* first input string */ pchar pstr2; /* second input string */ { puchar str1 = (puchar) pstr1; /* first input string */ puchar str2 = (puchar) pstr2; /* second input string */ unsigned char last_match = 0; /* Last matching character */ int number_break = 0; /* Broken number flag (e.g., 1 1/2) */ for (; *str1 && *str2; str1++, str2++) { /* Skip over any imbedded alternate character set sequences. */ number_break = skip_alt (&str1) ? STR1_BROKE : 0; number_break |= skip_alt (&str2) ? STR2_BROKE : 0; /* Stop comparing if one of the strings terminates after an alternate character set sequence. */ if (!(*str1) || !(*str2)) break; /* See whether next characters are equal. */ if (mn_table[*str1] == mn_table[*str2]) { /* Check for broken numbers, which are only equal if broken in the same place. (e.g., 1 1/2 == 1 1 2 != 1/12). */ if (number_break ? (isdigit (last_match) && isdigit (*str1)) : 0) { switch (number_break) { case BOTH_BROKE: /* The two breaks were in the same place. */ break; case STR1_BROKE: /* The number sequence in string 1 broke, therefore it has fewer digits, and is less than string 2. */ return (-1); case STR2_BROKE: /* The number sequence in string 2 broke, therefore it has fewer digits, and is less than string 1. */ return (1); } } last_match = *str1; } /* Check for trailing numbers. */ else if (isdigit(*str1)) { if (isdigit(*str2)) { if (number_break ? isdigit (last_match) : 0) { switch (number_break) { case BOTH_BROKE: /* The two breaks were in the same place. */ return (numcmp (str1, str2)); case STR1_BROKE: /* The number sequence in string 1 broke, therefore it has fewer digits, and is less than string 2. */ return (-1); case STR2_BROKE: /* The number sequence in string 2 broke, therefore it has fewer digits, and is less than string 1. */ return (1); } } else return (numcmp (str1, str2)); } else { if (isdigit (last_match)) /* the number in str2 has fewer digits than str1 */ return (1); /* a number is beginning in str1, but not str2 */ else if (mn_table[*str1] < mn_table[*str2]) return (-1); else return (1); } } else if (isdigit(*str2)) { if (isdigit (last_match)) /* the number in str1 has fewer digits than str2 */ return (-1); /* a number is beginning in str2, but not str1 */ else if (mn_table[*str1] < mn_table[*str2]) return (-1); else return (1); } /* check for umlaut ordering only when the difference is right */ else if ((((int) mn_table[*str2]) - ((int) mn_table[*str1])) == COMPOUND_DIFF) { switch (mn_table[*str2]) { case A_U: case O_U: case U_U: if (!(*(++str1))) /* str1 ended, making it effectively shorter than str2 */ return (-1); if (skip_alt (&str1)) { if (!(*str1)) return (-1); } if (mn_table[*str1] == mn_table['E']) /* the appropriate char+E diphthong matched */ continue; /* check for ordering of this character relative to E */ else if (mn_table[*str1] < mn_table['E']) return (-1); else return (1); /* break; */ default: return (-1); } } else if ((((int) mn_table[*str1]) - ((int) mn_table[*str2])) == COMPOUND_DIFF) { switch (mn_table[*str1]) { case A_U: case O_U: case U_U: if (!(*(++str2))) /* str2 ended, making it effectively shorter than str1 */ return (1); if (skip_alt (&str2)) { if (!(*str2)) return (1); } if (mn_table[*str2] == mn_table['E']) /* the appropriate char+E diphthong matched */ continue; /* check for ordering of this character relative to E */ else if (mn_table[*str2] < mn_table['E']) return (1); else return (-1); /* break; */ default: return (1); } } else if (mn_table[*str1] < mn_table[*str2]) return (-1); else return (1); } /* the strings are equal up to the point where one (or both) terminates, determine whether either string is not yet terminated (this logic also works for numbers) */ if (*str1) { skip_alt (&str1); if (*str1) return (1); else return (0); } else if (*str2) { skip_alt (&str2); if (*str2) return (-1); else return (0); } else return (0); } /*-------------------------------------------------------------------------*/ int int_strncmp (pstr1, pstr2, leng) pchar pstr1; /* first input string */ pchar pstr2; /* second input string */ int leng; /* where to stop if a null isn't reached */ { return (int_ncmp (pstr1, pstr2, leng, 1)); } /*-------------------------------------------------------------------------*/ /* The following routine copies one string to another, leaving out the characters that are skipped in the comparison. Alternate character sequences are copied. This type of copy operation is necessary to insure that numbered comparisons (a la int_strncmp) are done correctly. A NULL terminator is placed at the end of the output string. */ void int_strcpy(out_str, in_str) pchar out_str; /* place to copy input string to */ pchar in_str; /* input string */ { puchar ostr = (puchar) out_str; puchar istr = (puchar) in_str; while (*istr) { switch (mn_table[*istr]) { case SKP: istr++; break; default: *ostr++ = *istr++; break; } } *ostr = 0; } /*-------------------------------------------------------------------------*/ /* The following routine copies one string to another, leaving out the characters that are skipped in the comparison. Alternate character sequences are copied. This type of copy operation is necessary to insure that numbered comparisons (a la int_strncmp) are done correctly. The input length specifies the maximum number of characters that will be copied from the input string if no NULL character is first encountered in the input string. If a NULL is encountered before the requested number of characters are copied to the output string, then the output string is padded with NULLs out to the length requested. Otherwise, no NULLs are put into the output string. */ void int_strncpy(out_str, in_str, leng) pchar out_str; /* place to copy input string to */ pchar in_str; /* input string */ int leng; /* number of chars to copy (if no NULL) */ { puchar out_end; /* the end of the output string */ puchar ostr = (puchar) out_str; puchar istr = (puchar) in_str; out_end = ostr+leng; while (*istr && ostr < out_end) { switch (mn_table[*istr]) { case SKP: istr++; break; default: *ostr++ = *istr++; break; } } while (ostr < out_end) *ostr++ = 0; } /*-------------------------------------------------------------------------*/ int int_thmcmp (pstr1, pstr2, leng) pchar pstr1; /* first input string */ pchar pstr2; /* second input string */ int leng; /* where to stop if a null isn't reached */ { return (int_ncmp (pstr1, pstr2, leng, 0)); } #ifdef TEST #define compare(A) ((A) ? (((A) < 0) ? '<' : '>') : '=') #include static int read_string (str, prompt) char *str; /* String read from input */ char *prompt; /* Prompt string */ { /* This function prompts for and reads a string from standard input, returning zero when only RETURN was entered. */ printf ("%s", prompt); return (gets (str) ? *str : 0); } main () { char str1[80], str2[80], str3[80]; int cmp, cmpn; int leng; /* The following tests the comparison functions int_strcmp and int_strncmp, providing an indication of the sense of the comparison. */ while (read_string (str1, "Enter string 1: ")) { if (read_string (str2, "Enter string 2: ")) { while (read_string (str3, "Enter comparison length: ")) { sscanf(str3, "%d", &leng); if (leng > 0) { cmpn = int_strncmp ((pchar) str1, (pchar) str2, leng); printf("%.*s %c %.*s\n", leng, str1, compare (cmpn), leng, str2); break; } else printf ("Invalid length: %d\n", leng); } cmp = int_strcmp ((pchar) str1, (pchar) str2); printf("%s %c %s\n", str1, compare (cmp), str2); } else break; } } #endif /* TEST */