| File: | libs/tiff-4.0.2/contrib/iptcutil/iptcutil.c |
| Location: | line 395, column 3 |
| Description: | Value stored to 'length' is never read |
| 1 | /* $Id: iptcutil.c,v 1.8 2011-05-08 00:44:18 fwarmerdam Exp $ */ |
| 2 | |
| 3 | #include "tif_config.h" |
| 4 | |
| 5 | #include <stdio.h> |
| 6 | #include <stdlib.h> |
| 7 | #include <string.h> |
| 8 | #include <ctype.h> |
| 9 | |
| 10 | #ifdef HAVE_STRINGS_H1 |
| 11 | # include <strings.h> |
| 12 | #endif |
| 13 | |
| 14 | #ifdef HAVE_IO_H |
| 15 | # include <io.h> |
| 16 | #endif |
| 17 | |
| 18 | #ifdef HAVE_FCNTL_H1 |
| 19 | # include <fcntl.h> |
| 20 | #endif |
| 21 | |
| 22 | #ifdef WIN32 |
| 23 | #define STRNICMPstrncasecmp strnicmp |
| 24 | #else |
| 25 | #define STRNICMPstrncasecmp strncasecmp |
| 26 | #endif |
| 27 | |
| 28 | typedef struct _tag_spec |
| 29 | { |
| 30 | short |
| 31 | id; |
| 32 | |
| 33 | char |
| 34 | *name; |
| 35 | } tag_spec; |
| 36 | |
| 37 | static tag_spec tags[] = { |
| 38 | { 5,"Image Name" }, |
| 39 | { 7,"Edit Status" }, |
| 40 | { 10,"Priority" }, |
| 41 | { 15,"Category" }, |
| 42 | { 20,"Supplemental Category" }, |
| 43 | { 22,"Fixture Identifier" }, |
| 44 | { 25,"Keyword" }, |
| 45 | { 30,"Release Date" }, |
| 46 | { 35,"Release Time" }, |
| 47 | { 40,"Special Instructions" }, |
| 48 | { 45,"Reference Service" }, |
| 49 | { 47,"Reference Date" }, |
| 50 | { 50,"Reference Number" }, |
| 51 | { 55,"Created Date" }, |
| 52 | { 60,"Created Time" }, |
| 53 | { 65,"Originating Program" }, |
| 54 | { 70,"Program Version" }, |
| 55 | { 75,"Object Cycle" }, |
| 56 | { 80,"Byline" }, |
| 57 | { 85,"Byline Title" }, |
| 58 | { 90,"City" }, |
| 59 | { 95,"Province State" }, |
| 60 | { 100,"Country Code" }, |
| 61 | { 101,"Country" }, |
| 62 | { 103,"Original Transmission Reference" }, |
| 63 | { 105,"Headline" }, |
| 64 | { 110,"Credit" }, |
| 65 | { 115,"Source" }, |
| 66 | { 116,"Copyright String" }, |
| 67 | { 120,"Caption" }, |
| 68 | { 121,"Local Caption" }, |
| 69 | { 122,"Caption Writer" }, |
| 70 | { 200,"Custom Field 1" }, |
| 71 | { 201,"Custom Field 2" }, |
| 72 | { 202,"Custom Field 3" }, |
| 73 | { 203,"Custom Field 4" }, |
| 74 | { 204,"Custom Field 5" }, |
| 75 | { 205,"Custom Field 6" }, |
| 76 | { 206,"Custom Field 7" }, |
| 77 | { 207,"Custom Field 8" }, |
| 78 | { 208,"Custom Field 9" }, |
| 79 | { 209,"Custom Field 10" }, |
| 80 | { 210,"Custom Field 11" }, |
| 81 | { 211,"Custom Field 12" }, |
| 82 | { 212,"Custom Field 13" }, |
| 83 | { 213,"Custom Field 14" }, |
| 84 | { 214,"Custom Field 15" }, |
| 85 | { 215,"Custom Field 16" }, |
| 86 | { 216,"Custom Field 17" }, |
| 87 | { 217,"Custom Field 18" }, |
| 88 | { 218,"Custom Field 19" }, |
| 89 | { 219,"Custom Field 20" } |
| 90 | }; |
| 91 | |
| 92 | /* |
| 93 | * We format the output using HTML conventions |
| 94 | * to preserve control characters and such. |
| 95 | */ |
| 96 | void formatString(FILE *ofile, const char *s, int len) |
| 97 | { |
| 98 | putc('"', ofile)_IO_putc ('"', ofile); |
| 99 | for (; len > 0; --len, ++s) { |
| 100 | int c = *s; |
| 101 | switch (c) { |
| 102 | case '&': |
| 103 | fputs("&", ofile); |
| 104 | break; |
| 105 | #ifdef HANDLE_GT_LT |
| 106 | case '<': |
| 107 | fputs("<", ofile); |
| 108 | break; |
| 109 | case '>': |
| 110 | fputs(">", ofile); |
| 111 | break; |
| 112 | #endif |
| 113 | case '"': |
| 114 | fputs(""", ofile); |
| 115 | break; |
| 116 | default: |
| 117 | if (iscntrl(c)((*__ctype_b_loc ())[(int) ((c))] & (unsigned short int) _IScntrl )) |
| 118 | fprintf(ofile, "&#%d;", c); |
| 119 | else |
| 120 | putc(*s, ofile)_IO_putc (*s, ofile); |
| 121 | break; |
| 122 | } |
| 123 | } |
| 124 | fputs("\"\n", ofile); |
| 125 | } |
| 126 | |
| 127 | typedef struct _html_code |
| 128 | { |
| 129 | short |
| 130 | len; |
| 131 | const char |
| 132 | *code, |
| 133 | val; |
| 134 | } html_code; |
| 135 | |
| 136 | static html_code html_codes[] = { |
| 137 | #ifdef HANDLE_GT_LT |
| 138 | { 4,"<",'<' }, |
| 139 | { 4,">",'>' }, |
| 140 | #endif |
| 141 | { 5,"&",'&' }, |
| 142 | { 6,""",'"' } |
| 143 | }; |
| 144 | |
| 145 | /* |
| 146 | * This routine converts HTML escape sequence |
| 147 | * back to the original ASCII representation. |
| 148 | * - returns the number of characters dropped. |
| 149 | */ |
| 150 | int convertHTMLcodes(char *s, int len) |
| 151 | { |
| 152 | if (len <=0 || s==(char*)NULL((void*)0) || *s=='\0') |
| 153 | return 0; |
| 154 | |
| 155 | if (s[1] == '#') |
| 156 | { |
| 157 | int val, o; |
| 158 | |
| 159 | if (sscanf(s,"&#%d;",&val) == 1) |
| 160 | { |
| 161 | o = 3; |
| 162 | while (s[o] != ';') |
| 163 | { |
| 164 | o++; |
| 165 | if (o > 5) |
| 166 | break; |
| 167 | } |
| 168 | if (o < 5) |
| 169 | strcpy(s+1, s+1+o); |
| 170 | *s = val; |
| 171 | return o; |
| 172 | } |
| 173 | } |
| 174 | else |
| 175 | { |
| 176 | int |
| 177 | i, |
| 178 | codes = sizeof(html_codes) / sizeof(html_code); |
| 179 | |
| 180 | for (i=0; i < codes; i++) |
| 181 | { |
| 182 | if (html_codes[i].len <= len) |
| 183 | if (STRNICMPstrncasecmp(s, html_codes[i].code, html_codes[i].len) == 0) |
| 184 | { |
| 185 | strcpy(s+1, s+html_codes[i].len); |
| 186 | *s = html_codes[i].val; |
| 187 | return html_codes[i].len-1; |
| 188 | } |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | return 0; |
| 193 | } |
| 194 | |
| 195 | int formatIPTC(FILE *ifile, FILE *ofile) |
| 196 | { |
| 197 | unsigned int |
| 198 | foundiptc, |
| 199 | tagsfound; |
| 200 | |
| 201 | unsigned char |
| 202 | recnum, |
| 203 | dataset; |
| 204 | |
| 205 | char |
| 206 | *readable, |
| 207 | *str; |
| 208 | |
| 209 | long |
| 210 | tagindx, |
| 211 | taglen; |
| 212 | |
| 213 | int |
| 214 | i, |
| 215 | tagcount = sizeof(tags) / sizeof(tag_spec); |
| 216 | |
| 217 | char |
| 218 | c; |
| 219 | |
| 220 | foundiptc = 0; /* found the IPTC-Header */ |
| 221 | tagsfound = 0; /* number of tags found */ |
| 222 | |
| 223 | c = getc(ifile)_IO_getc (ifile); |
| 224 | while (c != EOF(-1)) |
| 225 | { |
| 226 | if (c == 0x1c) |
| 227 | foundiptc = 1; |
| 228 | else |
| 229 | { |
| 230 | if (foundiptc) |
| 231 | return -1; |
| 232 | else |
| 233 | continue; |
| 234 | } |
| 235 | |
| 236 | /* we found the 0x1c tag and now grab the dataset and record number tags */ |
| 237 | dataset = getc(ifile)_IO_getc (ifile); |
| 238 | if ((char) dataset == EOF(-1)) |
| 239 | return -1; |
| 240 | recnum = getc(ifile)_IO_getc (ifile); |
| 241 | if ((char) recnum == EOF(-1)) |
| 242 | return -1; |
| 243 | /* try to match this record to one of the ones in our named table */ |
| 244 | for (i=0; i< tagcount; i++) |
| 245 | { |
| 246 | if (tags[i].id == recnum) |
| 247 | break; |
| 248 | } |
| 249 | if (i < tagcount) |
| 250 | readable = tags[i].name; |
| 251 | else |
| 252 | readable = ""; |
| 253 | |
| 254 | /* then we decode the length of the block that follows - long or short fmt */ |
| 255 | c = getc(ifile)_IO_getc (ifile); |
| 256 | if (c == EOF(-1)) |
| 257 | return 0; |
| 258 | if (c & (unsigned char) 0x80) |
| 259 | { |
| 260 | unsigned char |
| 261 | buffer[4]; |
| 262 | |
| 263 | for (i=0; i<4; i++) |
| 264 | { |
| 265 | c = buffer[i] = getc(ifile)_IO_getc (ifile); |
| 266 | if (c == EOF(-1)) |
| 267 | return -1; |
| 268 | } |
| 269 | taglen = (((long) buffer[ 0 ]) << 24) | |
| 270 | (((long) buffer[ 1 ]) << 16) | |
| 271 | (((long) buffer[ 2 ]) << 8) | |
| 272 | (((long) buffer[ 3 ])); |
| 273 | } |
| 274 | else |
| 275 | { |
| 276 | unsigned char |
| 277 | x = c; |
| 278 | |
| 279 | taglen = ((long) x) << 8; |
| 280 | x = getc(ifile)_IO_getc (ifile); |
| 281 | if ((char)x == EOF(-1)) |
| 282 | return -1; |
| 283 | taglen |= (long) x; |
| 284 | } |
| 285 | /* make a buffer to hold the tag data and snag it from the input stream */ |
| 286 | str = (char *) malloc((unsigned int) (taglen+1)); |
| 287 | if (str == (char *) NULL((void*)0)) |
| 288 | { |
| 289 | printf("Memory allocation failed"); |
| 290 | return 0; |
| 291 | } |
| 292 | for (tagindx=0; tagindx<taglen; tagindx++) |
| 293 | { |
| 294 | c = str[tagindx] = getc(ifile)_IO_getc (ifile); |
| 295 | if (c == EOF(-1)) |
| 296 | { |
| 297 | free(str); |
| 298 | return -1; |
| 299 | } |
| 300 | } |
| 301 | str[ taglen ] = 0; |
| 302 | |
| 303 | /* now finish up by formatting this binary data into ASCII equivalent */ |
| 304 | if (strlen(readable) > 0) |
| 305 | fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable); |
| 306 | else |
| 307 | fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum); |
| 308 | formatString( ofile, str, taglen ); |
| 309 | free(str); |
| 310 | |
| 311 | tagsfound++; |
| 312 | |
| 313 | c = getc(ifile)_IO_getc (ifile); |
| 314 | } |
| 315 | return tagsfound; |
| 316 | } |
| 317 | |
| 318 | int tokenizer(unsigned inflag,char *token,int tokmax,char *line, |
| 319 | char *white,char *brkchar,char *quote,char eschar,char *brkused, |
| 320 | int *next,char *quoted); |
| 321 | |
| 322 | char *super_fgets(char *b, int *blen, FILE *file) |
| 323 | { |
| 324 | int |
| 325 | c, |
| 326 | len; |
| 327 | |
| 328 | char |
| 329 | *q; |
| 330 | |
| 331 | len=*blen; |
| 332 | for (q=b; ; q++) |
| 333 | { |
| 334 | c=fgetc(file); |
| 335 | if (c == EOF(-1) || c == '\n') |
| 336 | break; |
| 337 | if (((long)q - (long)b + 1 ) >= (long) len) |
| 338 | { |
| 339 | long |
| 340 | tlen; |
| 341 | |
| 342 | tlen=(long)q-(long)b; |
| 343 | len<<=1; |
| 344 | b=(char *) realloc((char *) b,(len+2)); |
| 345 | if ((char *) b == (char *) NULL((void*)0)) |
| 346 | break; |
| 347 | q=b+tlen; |
| 348 | } |
| 349 | *q=(unsigned char) c; |
| 350 | } |
| 351 | *blen=0; |
| 352 | if ((unsigned char *)b != (unsigned char *) NULL((void*)0)) |
| 353 | { |
| 354 | int |
| 355 | tlen; |
| 356 | |
| 357 | tlen=(long)q - (long)b; |
| 358 | if (tlen == 0) |
| 359 | return (char *) NULL((void*)0); |
| 360 | b[tlen] = '\0'; |
| 361 | *blen=++tlen; |
| 362 | } |
| 363 | return b; |
| 364 | } |
| 365 | |
| 366 | #define BUFFER_SZ4096 4096 |
| 367 | |
| 368 | int main(int argc, char *argv[]) |
| 369 | { |
| 370 | unsigned int |
| 371 | length; |
| 372 | |
| 373 | unsigned char |
| 374 | *buffer; |
| 375 | |
| 376 | int |
| 377 | i, |
| 378 | mode; /* iptc binary, or iptc text */ |
| 379 | |
| 380 | FILE |
| 381 | *ifile = stdinstdin, |
| 382 | *ofile = stdoutstdout; |
| 383 | |
| 384 | char |
| 385 | c, |
| 386 | *usage = "usage: iptcutil -t | -b [-i file] [-o file] <input >output"; |
| 387 | |
| 388 | if( argc < 2 ) |
| 389 | { |
| 390 | puts(usage); |
| 391 | return 1; |
| 392 | } |
| 393 | |
| 394 | mode = 0; |
| 395 | length = -1; |
Value stored to 'length' is never read | |
| 396 | buffer = (unsigned char *)NULL((void*)0); |
| 397 | |
| 398 | for (i=1; i<argc; i++) |
| 399 | { |
| 400 | c = argv[i][0]; |
| 401 | if (c == '-' || c == '/') |
| 402 | { |
| 403 | c = argv[i][1]; |
| 404 | switch( c ) |
| 405 | { |
| 406 | case 't': |
| 407 | mode = 1; |
| 408 | #ifdef WIN32 |
| 409 | /* Set "stdout" to binary mode: */ |
| 410 | _setmode( _fileno( ofile ), _O_BINARY ); |
| 411 | #endif |
| 412 | break; |
| 413 | case 'b': |
| 414 | mode = 0; |
| 415 | #ifdef WIN32 |
| 416 | /* Set "stdin" to binary mode: */ |
| 417 | _setmode( _fileno( ifile ), _O_BINARY ); |
| 418 | #endif |
| 419 | break; |
| 420 | case 'i': |
| 421 | if (mode == 0) |
| 422 | ifile = fopen(argv[++i], "rb"); |
| 423 | else |
| 424 | ifile = fopen(argv[++i], "rt"); |
| 425 | if (ifile == (FILE *)NULL((void*)0)) |
| 426 | { |
| 427 | printf("Unable to open: %s\n", argv[i]); |
| 428 | return 1; |
| 429 | } |
| 430 | break; |
| 431 | case 'o': |
| 432 | if (mode == 0) |
| 433 | ofile = fopen(argv[++i], "wt"); |
| 434 | else |
| 435 | ofile = fopen(argv[++i], "wb"); |
| 436 | if (ofile == (FILE *)NULL((void*)0)) |
| 437 | { |
| 438 | printf("Unable to open: %s\n", argv[i]); |
| 439 | return 1; |
| 440 | } |
| 441 | break; |
| 442 | default: |
| 443 | printf("Unknown option: %s\n", argv[i]); |
| 444 | return 1; |
| 445 | } |
| 446 | } |
| 447 | else |
| 448 | { |
| 449 | puts(usage); |
| 450 | return 1; |
| 451 | } |
| 452 | } |
| 453 | |
| 454 | if (mode == 0) /* handle binary iptc info */ |
| 455 | formatIPTC(ifile, ofile); |
| 456 | |
| 457 | if (mode == 1) /* handle text form of iptc info */ |
| 458 | { |
| 459 | char |
| 460 | brkused, |
| 461 | quoted, |
| 462 | *line, |
| 463 | *token, |
| 464 | *newstr; |
| 465 | |
| 466 | int |
| 467 | state, |
| 468 | next; |
| 469 | |
| 470 | unsigned char |
| 471 | recnum = 0, |
| 472 | dataset = 0; |
| 473 | |
| 474 | int |
| 475 | inputlen = BUFFER_SZ4096; |
| 476 | |
| 477 | line = (char *) malloc(inputlen); |
| 478 | token = (char *)NULL((void*)0); |
| 479 | while((line = super_fgets(line,&inputlen,ifile))!=NULL((void*)0)) |
| 480 | { |
| 481 | state=0; |
| 482 | next=0; |
| 483 | |
| 484 | token = (char *) malloc(inputlen); |
| 485 | newstr = (char *) malloc(inputlen); |
| 486 | while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0, |
| 487 | &brkused,&next,"ed)==0) |
| 488 | { |
| 489 | if (state == 0) |
| 490 | { |
| 491 | int |
| 492 | state, |
| 493 | next; |
| 494 | |
| 495 | char |
| 496 | brkused, |
| 497 | quoted; |
| 498 | |
| 499 | state=0; |
| 500 | next=0; |
| 501 | while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0, |
| 502 | &brkused, &next, "ed)==0) |
| 503 | { |
| 504 | if (state == 0) |
| 505 | dataset = (unsigned char) atoi(newstr); |
| 506 | else |
| 507 | if (state == 1) |
| 508 | recnum = (unsigned char) atoi(newstr); |
| 509 | state++; |
| 510 | } |
| 511 | } |
| 512 | else |
| 513 | if (state == 1) |
| 514 | { |
| 515 | int |
| 516 | next; |
| 517 | |
| 518 | unsigned long |
| 519 | len; |
| 520 | |
| 521 | char |
| 522 | brkused, |
| 523 | quoted; |
| 524 | |
| 525 | next=0; |
| 526 | len = strlen(token); |
| 527 | while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0, |
| 528 | &brkused, &next, "ed)==0) |
| 529 | { |
| 530 | if (brkused && next > 0) |
| 531 | { |
| 532 | char |
| 533 | *s = &token[next-1]; |
| 534 | |
| 535 | len -= convertHTMLcodes(s, strlen(s)); |
| 536 | } |
| 537 | } |
| 538 | |
| 539 | fputc(0x1c, ofile); |
| 540 | fputc(dataset, ofile); |
| 541 | fputc(recnum, ofile); |
| 542 | if (len < 0x10000) |
| 543 | { |
| 544 | fputc((len >> 8) & 255, ofile); |
| 545 | fputc(len & 255, ofile); |
| 546 | } |
| 547 | else |
| 548 | { |
| 549 | fputc(((len >> 24) & 255) | 0x80, ofile); |
| 550 | fputc((len >> 16) & 255, ofile); |
| 551 | fputc((len >> 8) & 255, ofile); |
| 552 | fputc(len & 255, ofile); |
| 553 | } |
| 554 | next=0; |
| 555 | while (len--) |
| 556 | fputc(token[next++], ofile); |
| 557 | } |
| 558 | state++; |
| 559 | } |
| 560 | free(token); |
| 561 | token = (char *)NULL((void*)0); |
| 562 | free(newstr); |
| 563 | newstr = (char *)NULL((void*)0); |
| 564 | } |
| 565 | free(line); |
| 566 | |
| 567 | fclose( ifile ); |
| 568 | fclose( ofile ); |
| 569 | } |
| 570 | |
| 571 | return 0; |
| 572 | } |
| 573 | |
| 574 | /* |
| 575 | This routine is a generalized, finite state token parser. It allows |
| 576 | you extract tokens one at a time from a string of characters. The |
| 577 | characters used for white space, for break characters, and for quotes |
| 578 | can be specified. Also, characters in the string can be preceded by |
| 579 | a specifiable escape character which removes any special meaning the |
| 580 | character may have. |
| 581 | |
| 582 | There are a lot of formal parameters in this subroutine call, but |
| 583 | once you get familiar with them, this routine is fairly easy to use. |
| 584 | "#define" macros can be used to generate simpler looking calls for |
| 585 | commonly used applications of this routine. |
| 586 | |
| 587 | First, some terminology: |
| 588 | |
| 589 | token: used here, a single unit of information in |
| 590 | the form of a group of characters. |
| 591 | |
| 592 | white space: space that gets ignored (except within quotes |
| 593 | or when escaped), like blanks and tabs. in |
| 594 | addition, white space terminates a non-quoted |
| 595 | token. |
| 596 | |
| 597 | break character: a character that separates non-quoted tokens. |
| 598 | commas are a common break character. the |
| 599 | usage of break characters to signal the end |
| 600 | of a token is the same as that of white space, |
| 601 | except multiple break characters with nothing |
| 602 | or only white space between generate a null |
| 603 | token for each two break characters together. |
| 604 | |
| 605 | for example, if blank is set to be the white |
| 606 | space and comma is set to be the break |
| 607 | character, the line ... |
| 608 | |
| 609 | A, B, C , , DEF |
| 610 | |
| 611 | ... consists of 5 tokens: |
| 612 | |
| 613 | 1) "A" |
| 614 | 2) "B" |
| 615 | 3) "C" |
| 616 | 4) "" (the null string) |
| 617 | 5) "DEF" |
| 618 | |
| 619 | quote character: a character that, when surrounding a group |
| 620 | of other characters, causes the group of |
| 621 | characters to be treated as a single token, |
| 622 | no matter how many white spaces or break |
| 623 | characters exist in the group. also, a |
| 624 | token always terminates after the closing |
| 625 | quote. for example, if ' is the quote |
| 626 | character, blank is white space, and comma |
| 627 | is the break character, the following |
| 628 | string ... |
| 629 | |
| 630 | A, ' B, CD'EF GHI |
| 631 | |
| 632 | ... consists of 4 tokens: |
| 633 | |
| 634 | 1) "A" |
| 635 | 2) " B, CD" (note the blanks & comma) |
| 636 | 3) "EF" |
| 637 | 4) "GHI" |
| 638 | |
| 639 | the quote characters themselves do |
| 640 | not appear in the resultant tokens. the |
| 641 | double quotes are delimiters i use here for |
| 642 | documentation purposes only. |
| 643 | |
| 644 | escape character: a character which itself is ignored but |
| 645 | which causes the next character to be |
| 646 | used as is. ^ and \ are often used as |
| 647 | escape characters. an escape in the last |
| 648 | position of the string gets treated as a |
| 649 | "normal" (i.e., non-quote, non-white, |
| 650 | non-break, and non-escape) character. |
| 651 | for example, assume white space, break |
| 652 | character, and quote are the same as in the |
| 653 | above examples, and further, assume that |
| 654 | ^ is the escape character. then, in the |
| 655 | string ... |
| 656 | |
| 657 | ABC, ' DEF ^' GH' I ^ J K^ L ^ |
| 658 | |
| 659 | ... there are 7 tokens: |
| 660 | |
| 661 | 1) "ABC" |
| 662 | 2) " DEF ' GH" |
| 663 | 3) "I" |
| 664 | 4) " " (a lone blank) |
| 665 | 5) "J" |
| 666 | 6) "K L" |
| 667 | 7) "^" (passed as is at end of line) |
| 668 | |
| 669 | |
| 670 | OK, now that you have this background, here's how to call "tokenizer": |
| 671 | |
| 672 | result=tokenizer(flag,token,maxtok,string,white,break,quote,escape, |
| 673 | brkused,next,quoted) |
| 674 | |
| 675 | result: 0 if we haven't reached EOS (end of string), and |
| 676 | 1 if we have (this is an "int"). |
| 677 | |
| 678 | flag: right now, only the low order 3 bits are used. |
| 679 | 1 => convert non-quoted tokens to upper case |
| 680 | 2 => convert non-quoted tokens to lower case |
| 681 | 0 => do not convert non-quoted tokens |
| 682 | (this is a "char"). |
| 683 | |
| 684 | token: a character string containing the returned next token |
| 685 | (this is a "char[]"). |
| 686 | |
| 687 | maxtok: the maximum size of "token". characters beyond |
| 688 | "maxtok" are truncated (this is an "int"). |
| 689 | |
| 690 | string: the string to be parsed (this is a "char[]"). |
| 691 | |
| 692 | white: a string of the valid white spaces. example: |
| 693 | |
| 694 | char whitesp[]={" \t"}; |
| 695 | |
| 696 | blank and tab will be valid white space (this is |
| 697 | a "char[]"). |
| 698 | |
| 699 | break: a string of the valid break characters. example: |
| 700 | |
| 701 | char breakch[]={";,"}; |
| 702 | |
| 703 | semicolon and comma will be valid break characters |
| 704 | (this is a "char[]"). |
| 705 | |
| 706 | IMPORTANT: do not use the name "break" as a C |
| 707 | variable, as this is a reserved word in C. |
| 708 | |
| 709 | quote: a string of the valid quote characters. an example |
| 710 | would be |
| 711 | |
| 712 | char whitesp[]={"'\""); |
| 713 | |
| 714 | (this causes single and double quotes to be valid) |
| 715 | note that a token starting with one of these characters |
| 716 | needs the same quote character to terminate it. |
| 717 | |
| 718 | for example, |
| 719 | |
| 720 | "ABC ' |
| 721 | |
| 722 | is unterminated, but |
| 723 | |
| 724 | "DEF" and 'GHI' |
| 725 | |
| 726 | are properly terminated. note that different quote |
| 727 | characters can appear on the same line; only for |
| 728 | a given token do the quote characters have to be |
| 729 | the same (this is a "char[]"). |
| 730 | |
| 731 | escape: the escape character (NOT a string ... only one |
| 732 | allowed). use zero if none is desired (this is |
| 733 | a "char"). |
| 734 | |
| 735 | brkused: the break character used to terminate the current |
| 736 | token. if the token was quoted, this will be the |
| 737 | quote used. if the token is the last one on the |
| 738 | line, this will be zero (this is a pointer to a |
| 739 | "char"). |
| 740 | |
| 741 | next: this variable points to the first character of the |
| 742 | next token. it gets reset by "tokenizer" as it steps |
| 743 | through the string. set it to 0 upon initialization, |
| 744 | and leave it alone after that. you can change it |
| 745 | if you want to jump around in the string or re-parse |
| 746 | from the beginning, but be careful (this is a |
| 747 | pointer to an "int"). |
| 748 | |
| 749 | quoted: set to 1 (true) if the token was quoted and 0 (false) |
| 750 | if not. you may need this information (for example: |
| 751 | in C, a string with quotes around it is a character |
| 752 | string, while one without is an identifier). |
| 753 | |
| 754 | (this is a pointer to a "char"). |
| 755 | */ |
| 756 | |
| 757 | /* states */ |
| 758 | |
| 759 | #define IN_WHITE0 0 |
| 760 | #define IN_TOKEN1 1 |
| 761 | #define IN_QUOTE2 2 |
| 762 | #define IN_OZONE3 3 |
| 763 | |
| 764 | int _p_state; /* current state */ |
| 765 | unsigned _p_flag; /* option flag */ |
| 766 | char _p_curquote; /* current quote char */ |
| 767 | int _p_tokpos; /* current token pos */ |
| 768 | |
| 769 | /* routine to find character in string ... used only by "tokenizer" */ |
| 770 | |
| 771 | int sindex(char ch,char *string) |
| 772 | { |
| 773 | char *cp; |
| 774 | for(cp=string;*cp;++cp) |
| 775 | if(ch==*cp) |
| 776 | return (int)(cp-string); /* return postion of character */ |
| 777 | return -1; /* eol ... no match found */ |
| 778 | } |
| 779 | |
| 780 | /* routine to store a character in a string ... used only by "tokenizer" */ |
| 781 | |
| 782 | void chstore(char *string,int max,char ch) |
| 783 | { |
| 784 | char c; |
| 785 | if(_p_tokpos>=0&&_p_tokpos<max-1) |
| 786 | { |
| 787 | if(_p_state==IN_QUOTE2) |
| 788 | c=ch; |
| 789 | else |
| 790 | switch(_p_flag&3) |
| 791 | { |
| 792 | case 1: /* convert to upper */ |
| 793 | c=toupper(ch)(__extension__ ({ int __res; if (sizeof (ch) > 1) { if (__builtin_constant_p (ch)) { int __c = (ch); __res = __c < -128 || __c > 255 ? __c : (*__ctype_toupper_loc ())[__c]; } else __res = toupper (ch); } else __res = (*__ctype_toupper_loc ())[(int) (ch)]; __res ; })); |
| 794 | break; |
| 795 | |
| 796 | case 2: /* convert to lower */ |
| 797 | c=tolower(ch)(__extension__ ({ int __res; if (sizeof (ch) > 1) { if (__builtin_constant_p (ch)) { int __c = (ch); __res = __c < -128 || __c > 255 ? __c : (*__ctype_tolower_loc ())[__c]; } else __res = tolower (ch); } else __res = (*__ctype_tolower_loc ())[(int) (ch)]; __res ; })); |
| 798 | break; |
| 799 | |
| 800 | default: /* use as is */ |
| 801 | c=ch; |
| 802 | break; |
| 803 | } |
| 804 | string[_p_tokpos++]=c; |
| 805 | } |
| 806 | return; |
| 807 | } |
| 808 | |
| 809 | int tokenizer(unsigned inflag,char *token,int tokmax,char *line, |
| 810 | char *white,char *brkchar,char *quote,char eschar,char *brkused, |
| 811 | int *next,char *quoted) |
| 812 | { |
| 813 | int qp; |
| 814 | char c,nc; |
| 815 | |
| 816 | *brkused=0; /* initialize to null */ |
| 817 | *quoted=0; /* assume not quoted */ |
| 818 | |
| 819 | if(!line[*next]) /* if we're at end of line, indicate such */ |
| 820 | return 1; |
| 821 | |
| 822 | _p_state=IN_WHITE0; /* initialize state */ |
| 823 | _p_curquote=0; /* initialize previous quote char */ |
| 824 | _p_flag=inflag; /* set option flag */ |
| 825 | |
| 826 | for(_p_tokpos=0;(c=line[*next]);++(*next)) /* main loop */ |
| 827 | { |
| 828 | if((qp=sindex(c,brkchar))>=0) /* break */ |
| 829 | { |
| 830 | switch(_p_state) |
| 831 | { |
| 832 | case IN_WHITE0: /* these are the same here ... */ |
| 833 | case IN_TOKEN1: /* ... just get out */ |
| 834 | case IN_OZONE3: /* ditto */ |
| 835 | ++(*next); |
| 836 | *brkused=brkchar[qp]; |
| 837 | goto byebye; |
| 838 | |
| 839 | case IN_QUOTE2: /* just keep going */ |
| 840 | chstore(token,tokmax,c); |
| 841 | break; |
| 842 | } |
| 843 | } |
| 844 | else if((qp=sindex(c,quote))>=0) /* quote */ |
| 845 | { |
| 846 | switch(_p_state) |
| 847 | { |
| 848 | case IN_WHITE0: /* these are identical, */ |
| 849 | _p_state=IN_QUOTE2; /* change states */ |
| 850 | _p_curquote=quote[qp]; /* save quote char */ |
| 851 | *quoted=1; /* set to true as long as something is in quotes */ |
| 852 | break; |
| 853 | |
| 854 | case IN_QUOTE2: |
| 855 | if(quote[qp]==_p_curquote) /* same as the beginning quote? */ |
| 856 | { |
| 857 | _p_state=IN_OZONE3; |
| 858 | _p_curquote=0; |
| 859 | } |
| 860 | else |
| 861 | chstore(token,tokmax,c); /* treat as regular char */ |
| 862 | break; |
| 863 | |
| 864 | case IN_TOKEN1: |
| 865 | case IN_OZONE3: |
| 866 | *brkused=c; /* uses quote as break char */ |
| 867 | goto byebye; |
| 868 | } |
| 869 | } |
| 870 | else if((qp=sindex(c,white))>=0) /* white */ |
| 871 | { |
| 872 | switch(_p_state) |
| 873 | { |
| 874 | case IN_WHITE0: |
| 875 | case IN_OZONE3: |
| 876 | break; /* keep going */ |
| 877 | |
| 878 | case IN_TOKEN1: |
| 879 | _p_state=IN_OZONE3; |
| 880 | break; |
| 881 | |
| 882 | case IN_QUOTE2: |
| 883 | chstore(token,tokmax,c); /* it's valid here */ |
| 884 | break; |
| 885 | } |
| 886 | } |
| 887 | else if(c==eschar) /* escape */ |
| 888 | { |
| 889 | nc=line[(*next)+1]; |
| 890 | if(nc==0) /* end of line */ |
| 891 | { |
| 892 | *brkused=0; |
| 893 | chstore(token,tokmax,c); |
| 894 | ++(*next); |
| 895 | goto byebye; |
| 896 | } |
| 897 | switch(_p_state) |
| 898 | { |
| 899 | case IN_WHITE0: |
| 900 | --(*next); |
| 901 | _p_state=IN_TOKEN1; |
| 902 | break; |
| 903 | |
| 904 | case IN_TOKEN1: |
| 905 | case IN_QUOTE2: |
| 906 | ++(*next); |
| 907 | chstore(token,tokmax,nc); |
| 908 | break; |
| 909 | |
| 910 | case IN_OZONE3: |
| 911 | goto byebye; |
| 912 | } |
| 913 | } |
| 914 | else /* anything else is just a real character */ |
| 915 | { |
| 916 | switch(_p_state) |
| 917 | { |
| 918 | case IN_WHITE0: |
| 919 | _p_state=IN_TOKEN1; /* switch states */ |
| 920 | |
| 921 | case IN_TOKEN1: /* these 2 are */ |
| 922 | case IN_QUOTE2: /* identical here */ |
| 923 | chstore(token,tokmax,c); |
| 924 | break; |
| 925 | |
| 926 | case IN_OZONE3: |
| 927 | goto byebye; |
| 928 | } |
| 929 | } |
| 930 | } /* end of main loop */ |
| 931 | |
| 932 | byebye: |
| 933 | token[_p_tokpos]=0; /* make sure token ends with EOS */ |
| 934 | |
| 935 | return 0; |
| 936 | } |
| 937 | /* |
| 938 | * Local Variables: |
| 939 | * mode: c |
| 940 | * c-basic-offset: 8 |
| 941 | * fill-column: 78 |
| 942 | * End: |
| 943 | */ |