fix up hyphen 2.8.2/2.8.3 conflicts

2011-10-24 15:38:01 +01:00
parent 161a463b81
commit e894342d6b
2 changed files with 1 additions and 326 deletions
--- a/hyphen/hyphen-2.7.1-2.8.2.patch
+++ b/hyphen/hyphen-2.7.1-2.8.2.patch
@@ -1,325 +0,0 @@
 --- misc/build/hyphen-2.7.1/hyphen.c.old	2011-10-07 15:51:25.883686906 +0200
 +++ misc/build/hyphen-2.7.1/hyphen.c	2011-10-07 15:51:59.363686900 +0200
@@ -242,99 +242,45 @@
 }
 #endif
 -HyphenDict *
 -hnj_hyphen_load (const char *fn)
 -{
 -  HyphenDict *dict[2];
 -  HashTab *hashtab;
 -  FILE *f;
 -  char buf[MAX_CHARS];
 +void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {
 +  int i, j;
   char word[MAX_CHARS];
   char pattern[MAX_CHARS];
   char * repl;
   signed char replindex;
   signed char replcut;
 -  int state_num = 0, last_state;
 -  int i, j, k;
 +  int state_num = 0;
 +  int last_state;
   char ch;
   int found;
 -  HashEntry *e;
 -  int nextlevel = 0;
 -
 -  f = fopen (fn, "r");
 -  if (f == NULL)
 -    return NULL;
 -// loading one or two dictionaries (separated by NEXTLEVEL keyword)
 -for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { 
 -  hashtab = hnj_hash_new ();
 -#ifdef VERBOSE
 -  global = hashtab;
 -#endif
 -  hnj_hash_insert (hashtab, "", 0);
 -  dict[k] = hnj_malloc (sizeof(HyphenDict));
 -  dict[k]->num_states = 1;
 -  dict[k]->states = hnj_malloc (sizeof(HyphenState));
 -  dict[k]->states[0].match = NULL;
 -  dict[k]->states[0].repl = NULL;
 -  dict[k]->states[0].fallback_state = -1;
 -  dict[k]->states[0].num_trans = 0;
 -  dict[k]->states[0].trans = NULL;
 -  dict[k]->nextlevel = NULL;
 -  dict[k]->lhmin = 0;
 -  dict[k]->rhmin = 0;
 -  dict[k]->clhmin = 0;
 -  dict[k]->crhmin = 0;
 -  dict[k]->nohyphen = NULL;
 -  dict[k]->nohyphenl = 0;
 -
 -  /* read in character set info */
 -  if (k == 0) {
 -    for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
 -    fgets(dict[k]->cset,  sizeof(dict[k]->cset),f);
 -    for (i=0;i<MAX_NAME;i++)
 -      if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
 -        dict[k]->cset[i] = 0;
 -    dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
 -  } else {
 -    strcpy(dict[k]->cset, dict[0]->cset);
 -    dict[k]->utf8 = dict[0]->utf8;
 -  }
 -
 -  while (fgets (buf, sizeof(buf), f) != NULL)
 -    {
 -      if (buf[0] != '%')
 -	{
 -	  if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
 -	    nextlevel = 1;
 -	    break;
 -	  } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
 -	    dict[k]->lhmin = atoi(buf + 13);
 -	    continue;
 +	  if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
 +	    dict->lhmin = atoi(buf + 13);
 +	    return;
 	  } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
 -	    dict[k]->rhmin = atoi(buf + 14);
 -	    continue;
 +	    dict->rhmin = atoi(buf + 14);
 +	    return;
 	  } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
 -	    dict[k]->clhmin = atoi(buf + 21);
 -	    continue;
 +	    dict->clhmin = atoi(buf + 21);
 +	    return;
 	  } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
 -	    dict[k]->crhmin = atoi(buf + 22);
 -	    continue;
 +	    dict->crhmin = atoi(buf + 22);
 +	    return;
 	  } else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
 	    char * space = buf + 8;
 	    while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
 -	    if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space);
 -	    if (dict[k]->nohyphen) {
 -	        char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1;
 +	    if (*buf != '\0') dict->nohyphen = hnj_strdup(space);
 +	    if (dict->nohyphen) {
 +	        char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;
 	        *nhe = 0;
 -	        for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) {
 +	        for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {
 	                if (*nhe == ',') {
 -	                    dict[k]->nohyphenl++;
 +	                    dict->nohyphenl++;
 	                    *nhe = 0;
 	                }
 	        }
 	    }
 -	    continue;
 +	    return;
 	  } 
 	  j = 0;
 	  pattern[j] = '0';
@@ -379,7 +325,7 @@
           } else {
             if (*word == '.') i++;
             /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
 -            if (dict[k]->utf8) {
 +            if (dict->utf8) {
                 int pu = -1;        /* unicode character position */
                 int ps = -1;        /* unicode start position (original replindex) */
                 int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
@@ -403,14 +349,14 @@
 	  printf ("word %s pattern %s, j = %d  repl: %s\n", word, pattern + i, j, repl);
 #endif
 	  found = hnj_hash_lookup (hashtab, word);
 -	  state_num = hnj_get_state (dict[k], hashtab, word);
 -	  dict[k]->states[state_num].match = hnj_strdup (pattern + i);
 -	  dict[k]->states[state_num].repl = repl;
 -	  dict[k]->states[state_num].replindex = replindex;
 +	  state_num = hnj_get_state (dict, hashtab, word);
 +	  dict->states[state_num].match = hnj_strdup (pattern + i);
 +	  dict->states[state_num].repl = repl;
 +	  dict->states[state_num].replindex = replindex;
           if (!replcut) {
 -            dict[k]->states[state_num].replcut = (signed char) strlen(word);
 +            dict->states[state_num].replcut = (signed char) strlen(word);
           } else {
 -            dict[k]->states[state_num].replcut = replcut;
 +            dict->states[state_num].replcut = replcut;
           }
 	  /* now, put in the prefix transitions */
@@ -420,11 +366,81 @@
 	      ch = word[j - 1];
 	      word[j - 1] = '\0';
 	      found = hnj_hash_lookup (hashtab, word);
 -	      state_num = hnj_get_state (dict[k], hashtab, word);
 -	      hnj_add_trans (dict[k], state_num, last_state, ch);
 +	      state_num = hnj_get_state (dict, hashtab, word);
 +	      hnj_add_trans (dict, state_num, last_state, ch);
 	    }
 -	}
 +}
 +
 +HyphenDict *
 +hnj_hyphen_load (const char *fn)
 +{
 +  HyphenDict *dict[2];
 +  HashTab *hashtab;
 +  FILE *f;
 +  char buf[MAX_CHARS];
 +  int nextlevel = 0;
 +  int i, j, k;
 +  HashEntry *e;
 +  int state_num = 0;
 +
 +  f = fopen (fn, "r");
 +  if (f == NULL)
 +    return NULL;
 +
 +// loading one or two dictionaries (separated by NEXTLEVEL keyword)
 +for (k = 0; k < 2; k++) { 
 +  hashtab = hnj_hash_new ();
 +#ifdef VERBOSE
 +  global = hashtab;
 +#endif
 +  hnj_hash_insert (hashtab, "", 0);
 +  dict[k] = hnj_malloc (sizeof(HyphenDict));
 +  dict[k]->num_states = 1;
 +  dict[k]->states = hnj_malloc (sizeof(HyphenState));
 +  dict[k]->states[0].match = NULL;
 +  dict[k]->states[0].repl = NULL;
 +  dict[k]->states[0].fallback_state = -1;
 +  dict[k]->states[0].num_trans = 0;
 +  dict[k]->states[0].trans = NULL;
 +  dict[k]->nextlevel = NULL;
 +  dict[k]->lhmin = 0;
 +  dict[k]->rhmin = 0;
 +  dict[k]->clhmin = 0;
 +  dict[k]->crhmin = 0;
 +  dict[k]->nohyphen = NULL;
 +  dict[k]->nohyphenl = 0;
 +
 +  /* read in character set info */
 +  if (k == 0) {
 +    for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
 +    fgets(dict[k]->cset,  sizeof(dict[k]->cset),f);
 +    for (i=0;i<MAX_NAME;i++)
 +      if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
 +        dict[k]->cset[i] = 0;
 +    dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
 +  } else {
 +    strcpy(dict[k]->cset, dict[0]->cset);
 +    dict[k]->utf8 = dict[0]->utf8;
 +  }
 +
 +  if (k == 0 || nextlevel) {
 +    while (fgets (buf, sizeof(buf), f) != NULL) {
 +      if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
 +	nextlevel = 1;
 +	break;
 +      } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);
     }
 +  } else if (k == 1) {
 +    /* default first level: hyphen and ASCII apostrophe */
 +    if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN -,'\n", dict[k], hashtab);
 +    else hnj_hyphen_load_line("NOHYPHEN -,',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab);
 +    hnj_hyphen_load_line("1-1\n", dict[k], hashtab); /* hyphen */
 +    hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */
 +    if (dict[0]->utf8) {
 +      hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */
 +      hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */
 +    }
 +  }
   /* Could do unioning of matches here (instead of the preprocessor script).
      If we did, the pseudocode would look something like this:
@@ -476,7 +492,15 @@
   state_num = 0;
 }
   fclose(f);
 -  if (k == 2) dict[0]->nextlevel = dict[1];
 +  if (nextlevel) dict[0]->nextlevel = dict[1];
 +  else {
 +    dict[1] -> nextlevel = dict[0];
 +    dict[1]->lhmin = dict[0]->lhmin;
 +    dict[1]->rhmin = dict[0]->rhmin;
 +    dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 2);
 +    dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 2);
 +    return dict[1];
 +  }
   return dict[0];
 }
@@ -527,8 +551,13 @@
   j = 0;
   prep_word[j++] = '.';
 -  for (i = 0; i < word_size; i++)
 +  for (i = 0; i < word_size; i++) {
 +    if (word[i] <= '9' && word[i] >= '0') {
 +      prep_word[j++] = '.';
 +    } else {
       prep_word[j++] = word[i];
 +    }
 +  }
   prep_word[j++] = '.';
   prep_word[j] = '\0';
@@ -670,6 +699,9 @@
       i += hnj_ligature(word[2]);
     }
 +    // ignore numbers
 +    for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;
 +
     for (j = 0; i < lhmin && word[j] != '\0'; i++) do {
       // check length of the non-standard part
       if (*rep && *pos && *cut && (*rep)[j]) {
@@ -696,9 +728,13 @@
 int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
 	char *** rep, int ** pos, int ** cut, int rhmin)
 {
 -    int i;
 -    int j = word_size - 2;    
 -    for (i = 1; i < rhmin && j > 0; j--) {
 +    int i = 1;
 +    int j;
 +
 +    // ignore numbers
 +    for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;
 +
 +    for (j = word_size - 2; i < rhmin && j > 0; j--) {
       // check length of the non-standard part
       if (*rep && *pos && *cut && (*rep)[j]) {
         char * rh = strchr((*rep)[j], '=');
@@ -756,8 +792,15 @@
   j = 0;
   prep_word[j++] = '.';
 -  for (i = 0; i < word_size; i++)
 +  for (i = 0; i < word_size; i++) {
 +    if (word[i] <= '9' && word[i] >= '0') {
 +      prep_word[j++] = '.';
 +    } else {
       prep_word[j++] = word[i];
 +    }
 +  }
 +
 +
   prep_word[j++] = '.';
   prep_word[j] = '\0';
@@ -1093,8 +1136,10 @@
 	char *hyphword, char *** rep, int ** pos, int ** cut,
 	int lhmin, int rhmin, int clhmin, int crhmin)
 {
 -  lhmin = (lhmin > 0 ? lhmin : dict->lhmin);
 -  rhmin = (rhmin > 0 ? rhmin : dict->rhmin);
 +  lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;
 +  rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;
 +  clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;
 +  crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;
   hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
     clhmin, crhmin, 1, 1);
   hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
--- a/hyphen/makefile.mk
+++ b/hyphen/makefile.mk
@@ -44,7 +44,7 @@ ADDITIONAL_FILES += makefile.mk
 PATCH_FILES= \
    hyphen-2.7.1.patch \
    hyphen-2.7.1-read-charset.patch \
-    hyphen-2.7.1-2.8.2.patch
+    hyphen-2.7.1-2.8.3.patch
 .IF "$(GUI)"=="UNX"
 CONFIGURE_DIR=$(BUILD_DIR)