1 /* 2 * Copyright 2002-2007, Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Michael Wilber 7 * Axel Dörfler, axeld@pinc-software.de 8 */ 9 10 11 #include "STXTTranslator.h" 12 #include "STXTView.h" 13 14 #include <CharacterSet.h> 15 #include <CharacterSetRoster.h> 16 #include <MimeType.h> 17 #include <String.h> 18 #include <UTF8.h> 19 20 #include <new> 21 #include <string.h> 22 #include <stdio.h> 23 #include <stdint.h> 24 25 26 using namespace BPrivate; 27 28 29 #define READ_BUFFER_SIZE 32768 30 #define DATA_BUFFER_SIZE 256 31 32 // The input formats that this translator supports. 33 translation_format gInputFormats[] = { 34 { 35 B_TRANSLATOR_TEXT, 36 B_TRANSLATOR_TEXT, 37 TEXT_IN_QUALITY, 38 TEXT_IN_CAPABILITY, 39 "text/plain", 40 "Plain text file" 41 }, 42 { 43 B_STYLED_TEXT_FORMAT, 44 B_TRANSLATOR_TEXT, 45 STXT_IN_QUALITY, 46 STXT_IN_CAPABILITY, 47 "text/x-vnd.Be-stxt", 48 "Be styled text file" 49 } 50 }; 51 52 // The output formats that this translator supports. 53 translation_format gOutputFormats[] = { 54 { 55 B_TRANSLATOR_TEXT, 56 B_TRANSLATOR_TEXT, 57 TEXT_OUT_QUALITY, 58 TEXT_OUT_CAPABILITY, 59 "text/plain", 60 "Plain text file" 61 }, 62 { 63 B_STYLED_TEXT_FORMAT, 64 B_TRANSLATOR_TEXT, 65 STXT_OUT_QUALITY, 66 STXT_OUT_CAPABILITY, 67 "text/x-vnd.Be-stxt", 68 "Be styled text file" 69 } 70 }; 71 72 // Default settings for the Translator 73 TranSetting gDefaultSettings[] = { 74 {B_TRANSLATOR_EXT_HEADER_ONLY, TRAN_SETTING_BOOL, false}, 75 {B_TRANSLATOR_EXT_DATA_ONLY, TRAN_SETTING_BOOL, false} 76 }; 77 78 // --------------------------------------------------------------- 79 // make_nth_translator 80 // 81 // Creates a STXTTranslator object to be used by BTranslatorRoster 82 // 83 // Preconditions: 84 // 85 // Parameters: n, The translator to return. Since 86 // STXTTranslator only publishes one 87 // translator, it only returns a 88 // STXTTranslator if n == 0 89 // 90 // you, The image_id of the add-on that 91 // contains code (not used). 92 // 93 // flags, Has no meaning yet, should be 0. 94 // 95 // Postconditions: 96 // 97 // Returns: NULL if n is not zero, 98 // a new STXTTranslator if n is zero 99 // --------------------------------------------------------------- 100 BTranslator * 101 make_nth_translator(int32 n, image_id you, uint32 flags, ...) 102 { 103 if (!n) 104 return new (std::nothrow) STXTTranslator(); 105 106 return NULL; 107 } 108 109 110 // #pragma mark - ascmagic.c from the BSD file tool 111 /* 112 * The following code has been taken from version 4.17 of the BSD file tool, 113 * file ascmagic.c, modified for our purpose. 114 */ 115 116 /* 117 * Copyright (c) Ian F. Darwin 1986-1995. 118 * Software written by Ian F. Darwin and others; 119 * maintained 1995-present by Christos Zoulas and others. 120 * 121 * Redistribution and use in source and binary forms, with or without 122 * modification, are permitted provided that the following conditions 123 * are met: 124 * 1. Redistributions of source code must retain the above copyright 125 * notice immediately at the beginning of the file, without modification, 126 * this list of conditions, and the following disclaimer. 127 * 2. Redistributions in binary form must reproduce the above copyright 128 * notice, this list of conditions and the following disclaimer in the 129 * documentation and/or other materials provided with the distribution. 130 * 131 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 132 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 133 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 134 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 135 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 136 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 137 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 138 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 139 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 140 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 141 * SUCH DAMAGE. 142 */ 143 /* 144 * ASCII magic -- file types that we know based on keywords 145 * that can appear anywhere in the file. 146 * 147 * Extensively modified by Eric Fischer <enf@pobox.com> in July, 2000, 148 * to handle character codes other than ASCII on a unified basis. 149 * 150 * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit 151 * international characters, now subsumed into this file. 152 */ 153 154 #include <stdio.h> 155 #include <string.h> 156 #include <memory.h> 157 #include <ctype.h> 158 #include <stdlib.h> 159 #include <unistd.h> 160 #include "names.h" 161 162 typedef unsigned long my_unichar; 163 164 #define MAXLINELEN 300 /* longest sane line length */ 165 #define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \ 166 || (x) == 0x85 || (x) == '\f') 167 168 static int looks_ascii(const unsigned char *, size_t, my_unichar *, size_t *); 169 static int looks_utf8(const unsigned char *, size_t, my_unichar *, size_t *); 170 static int looks_unicode(const unsigned char *, size_t, my_unichar *, size_t *); 171 static int looks_latin1(const unsigned char *, size_t, my_unichar *, size_t *); 172 static int looks_extended(const unsigned char *, size_t, my_unichar *, size_t *); 173 static void from_ebcdic(const unsigned char *, size_t, unsigned char *); 174 static int ascmatch(const unsigned char *, const my_unichar *, size_t); 175 176 177 static int 178 file_ascmagic(const unsigned char *buf, size_t nbytes, BMimeType* mimeType, 179 const char*& encoding) 180 { 181 size_t i; 182 unsigned char *nbuf = NULL; 183 my_unichar *ubuf = NULL; 184 size_t ulen; 185 struct names *p; 186 int rv = -1; 187 188 const char *code = NULL; 189 encoding = NULL; 190 const char *type = NULL; 191 const char *subtype = NULL; 192 const char *subtype_mime = NULL; 193 194 int has_escapes = 0; 195 int has_backspace = 0; 196 int seen_cr = 0; 197 198 int n_crlf = 0; 199 int n_lf = 0; 200 int n_cr = 0; 201 int n_nel = 0; 202 203 int last_line_end = -1; 204 int has_long_lines = 0; 205 206 if ((nbuf = (unsigned char*)malloc((nbytes + 1) * sizeof(nbuf[0]))) == NULL) 207 goto done; 208 if ((ubuf = (my_unichar*)malloc((nbytes + 1) * sizeof(ubuf[0]))) == NULL) 209 goto done; 210 211 /* 212 * Then try to determine whether it's any character code we can 213 * identify. Each of these tests, if it succeeds, will leave 214 * the text converted into one-my_unichar-per-character Unicode in 215 * ubuf, and the number of characters converted in ulen. 216 */ 217 if (nbytes == 0) { 218 code = "UTF-8 Unicode"; 219 encoding = NULL; // "UTF-8"; 220 type = "text"; 221 rv = 1; 222 } else if (looks_ascii(buf, nbytes, ubuf, &ulen)) { 223 code = "ASCII"; 224 encoding = NULL; //"us-ascii"; 225 type = "text"; 226 if (nbytes == 1) { 227 // no further tests 228 rv = 1; 229 } 230 } else if (looks_utf8(buf, nbytes, ubuf, &ulen)) { 231 code = "UTF-8 Unicode"; 232 encoding = NULL; // "UTF-8"; 233 type = "text"; 234 } else if ((i = looks_unicode(buf, nbytes, ubuf, &ulen)) != 0) { 235 if (i == 1) { 236 code = "Little-endian UTF-16 Unicode"; 237 encoding = "UTF-16"; 238 } else { 239 code = "Big-endian UTF-16 Unicode"; 240 encoding = "UTF-16"; 241 } 242 243 type = "character data"; 244 } else if (looks_latin1(buf, nbytes, ubuf, &ulen)) { 245 code = "ISO-8859"; 246 type = "text"; 247 encoding = "iso-8859-1"; 248 } else if (looks_extended(buf, nbytes, ubuf, &ulen)) { 249 code = "Non-ISO extended-ASCII"; 250 type = "text"; 251 encoding = "unknown"; 252 } else { 253 from_ebcdic(buf, nbytes, nbuf); 254 255 if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) { 256 code = "EBCDIC"; 257 type = "character data"; 258 encoding = "ebcdic"; 259 } else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) { 260 code = "International EBCDIC"; 261 type = "character data"; 262 encoding = "ebcdic"; 263 } else { 264 rv = 0; 265 goto done; /* doesn't look like text at all */ 266 } 267 } 268 269 if (nbytes <= 1) { 270 if (rv == -1) 271 rv = 0; 272 goto done; 273 } 274 275 /* 276 * for troff, look for . + letter + letter or .\"; 277 * this must be done to disambiguate tar archives' ./file 278 * and other trash from real troff input. 279 * 280 * I believe Plan 9 troff allows non-ASCII characters in the names 281 * of macros, so this test might possibly fail on such a file. 282 */ 283 if (*ubuf == '.') { 284 my_unichar *tp = ubuf + 1; 285 286 while (ISSPC(*tp)) 287 ++tp; /* skip leading whitespace */ 288 if ((tp[0] == '\\' && tp[1] == '\"') || 289 (isascii((unsigned char)tp[0]) && 290 isalnum((unsigned char)tp[0]) && 291 isascii((unsigned char)tp[1]) && 292 isalnum((unsigned char)tp[1]) && 293 ISSPC(tp[2]))) { 294 subtype_mime = "text/troff"; 295 subtype = "troff or preprocessor input"; 296 goto subtype_identified; 297 } 298 } 299 300 if ((*buf == 'c' || *buf == 'C') && ISSPC(buf[1])) { 301 subtype_mime = "text/fortran"; 302 subtype = "fortran program"; 303 goto subtype_identified; 304 } 305 306 /* look for tokens from names.h - this is expensive! */ 307 308 i = 0; 309 while (i < ulen) { 310 size_t end; 311 312 /* 313 * skip past any leading space 314 */ 315 while (i < ulen && ISSPC(ubuf[i])) 316 i++; 317 if (i >= ulen) 318 break; 319 320 /* 321 * find the next whitespace 322 */ 323 for (end = i + 1; end < nbytes; end++) 324 if (ISSPC(ubuf[end])) 325 break; 326 327 /* 328 * compare the word thus isolated against the token list 329 */ 330 for (p = names; p < names + NNAMES; p++) { 331 if (ascmatch((const unsigned char *)p->name, ubuf + i, 332 end - i)) { 333 subtype = types[p->type].human; 334 subtype_mime = types[p->type].mime; 335 goto subtype_identified; 336 } 337 } 338 339 i = end; 340 } 341 342 subtype_identified: 343 344 /* 345 * Now try to discover other details about the file. 346 */ 347 for (i = 0; i < ulen; i++) { 348 if (ubuf[i] == '\n') { 349 if (seen_cr) 350 n_crlf++; 351 else 352 n_lf++; 353 last_line_end = i; 354 } else if (seen_cr) 355 n_cr++; 356 357 seen_cr = (ubuf[i] == '\r'); 358 if (seen_cr) 359 last_line_end = i; 360 361 if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */ 362 n_nel++; 363 last_line_end = i; 364 } 365 366 /* If this line is _longer_ than MAXLINELEN, remember it. */ 367 if ((int)i > last_line_end + MAXLINELEN) 368 has_long_lines = 1; 369 370 if (ubuf[i] == '\033') 371 has_escapes = 1; 372 if (ubuf[i] == '\b') 373 has_backspace = 1; 374 } 375 376 rv = 1; 377 done: 378 if (nbuf) 379 free(nbuf); 380 if (ubuf) 381 free(ubuf); 382 383 if (rv) { 384 // If we have identified the subtype, return it, otherwise just 385 // text/plain. 386 if (subtype_mime) 387 mimeType->SetTo(subtype_mime); 388 else 389 mimeType->SetTo("text/plain"); 390 } 391 392 return rv; 393 } 394 395 static int 396 ascmatch(const unsigned char *s, const my_unichar *us, size_t ulen) 397 { 398 size_t i; 399 400 for (i = 0; i < ulen; i++) { 401 if (s[i] != us[i]) 402 return 0; 403 } 404 405 if (s[i]) 406 return 0; 407 else 408 return 1; 409 } 410 411 /* 412 * This table reflects a particular philosophy about what constitutes 413 * "text," and there is room for disagreement about it. 414 * 415 * Version 3.31 of the file command considered a file to be ASCII if 416 * each of its characters was approved by either the isascii() or 417 * isalpha() function. On most systems, this would mean that any 418 * file consisting only of characters in the range 0x00 ... 0x7F 419 * would be called ASCII text, but many systems might reasonably 420 * consider some characters outside this range to be alphabetic, 421 * so the file command would call such characters ASCII. It might 422 * have been more accurate to call this "considered textual on the 423 * local system" than "ASCII." 424 * 425 * It considered a file to be "International language text" if each 426 * of its characters was either an ASCII printing character (according 427 * to the real ASCII standard, not the above test), a character in 428 * the range 0x80 ... 0xFF, or one of the following control characters: 429 * backspace, tab, line feed, vertical tab, form feed, carriage return, 430 * escape. No attempt was made to determine the language in which files 431 * of this type were written. 432 * 433 * 434 * The table below considers a file to be ASCII if all of its characters 435 * are either ASCII printing characters (again, according to the X3.4 436 * standard, not isascii()) or any of the following controls: bell, 437 * backspace, tab, line feed, form feed, carriage return, esc, nextline. 438 * 439 * I include bell because some programs (particularly shell scripts) 440 * use it literally, even though it is rare in normal text. I exclude 441 * vertical tab because it never seems to be used in real text. I also 442 * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85), 443 * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline 444 * character to. It might be more appropriate to include it in the 8859 445 * set instead of the ASCII set, but it's got to be included in *something* 446 * we recognize or EBCDIC files aren't going to be considered textual. 447 * Some old Unix source files use SO/SI (^N/^O) to shift between Greek 448 * and Latin characters, so these should possibly be allowed. But they 449 * make a real mess on VT100-style displays if they're not paired properly, 450 * so we are probably better off not calling them text. 451 * 452 * A file is considered to be ISO-8859 text if its characters are all 453 * either ASCII, according to the above definition, or printing characters 454 * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF. 455 * 456 * Finally, a file is considered to be international text from some other 457 * character code if its characters are all either ISO-8859 (according to 458 * the above definition) or characters in the range 0x80 ... 0x9F, which 459 * ISO-8859 considers to be control characters but the IBM PC and Macintosh 460 * consider to be printing characters. 461 */ 462 463 #define F 0 /* character never appears in text */ 464 #define T 1 /* character appears in plain ASCII text */ 465 #define I 2 /* character appears in ISO-8859 text */ 466 #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ 467 468 static char text_chars[256] = { 469 /* BEL BS HT LF FF CR */ 470 F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ 471 /* ESC */ 472 F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ 473 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ 474 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ 475 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ 476 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ 477 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ 478 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ 479 /* NEL */ 480 X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */ 481 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */ 482 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */ 483 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */ 484 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */ 485 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */ 486 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */ 487 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */ 488 }; 489 490 static int 491 looks_ascii(const unsigned char *buf, size_t nbytes, my_unichar *ubuf, 492 size_t *ulen) 493 { 494 int i; 495 496 *ulen = 0; 497 498 for (i = 0; i < (int)nbytes; i++) { 499 int t = text_chars[buf[i]]; 500 501 if (t != T) 502 return 0; 503 504 ubuf[(*ulen)++] = buf[i]; 505 } 506 507 return 1; 508 } 509 510 static int 511 looks_latin1(const unsigned char *buf, size_t nbytes, my_unichar *ubuf, size_t *ulen) 512 { 513 int i; 514 515 *ulen = 0; 516 517 for (i = 0; i < (int)nbytes; i++) { 518 int t = text_chars[buf[i]]; 519 520 if (t != T && t != I) 521 return 0; 522 523 ubuf[(*ulen)++] = buf[i]; 524 } 525 526 return 1; 527 } 528 529 static int 530 looks_extended(const unsigned char *buf, size_t nbytes, my_unichar *ubuf, 531 size_t *ulen) 532 { 533 int i; 534 535 *ulen = 0; 536 537 for (i = 0; i < (int)nbytes; i++) { 538 int t = text_chars[buf[i]]; 539 540 if (t != T && t != I && t != X) 541 return 0; 542 543 ubuf[(*ulen)++] = buf[i]; 544 } 545 546 return 1; 547 } 548 549 static int 550 looks_utf8(const unsigned char *buf, size_t nbytes, my_unichar *ubuf, size_t *ulen) 551 { 552 int i, n; 553 my_unichar c; 554 int gotone = 0; 555 556 *ulen = 0; 557 558 for (i = 0; i < (int)nbytes; i++) { 559 if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ 560 /* 561 * Even if the whole file is valid UTF-8 sequences, 562 * still reject it if it uses weird control characters. 563 */ 564 565 if (text_chars[buf[i]] != T) 566 return 0; 567 568 ubuf[(*ulen)++] = buf[i]; 569 } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */ 570 return 0; 571 } else { /* 11xxxxxx begins UTF-8 */ 572 int following; 573 574 if ((buf[i] & 0x20) == 0) { /* 110xxxxx */ 575 c = buf[i] & 0x1f; 576 following = 1; 577 } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */ 578 c = buf[i] & 0x0f; 579 following = 2; 580 } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */ 581 c = buf[i] & 0x07; 582 following = 3; 583 } else if ((buf[i] & 0x04) == 0) { /* 111110xx */ 584 c = buf[i] & 0x03; 585 following = 4; 586 } else if ((buf[i] & 0x02) == 0) { /* 1111110x */ 587 c = buf[i] & 0x01; 588 following = 5; 589 } else 590 return 0; 591 592 for (n = 0; n < following; n++) { 593 i++; 594 if (i >= (int)nbytes) 595 goto done; 596 597 if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40)) 598 return 0; 599 600 c = (c << 6) + (buf[i] & 0x3f); 601 } 602 603 ubuf[(*ulen)++] = c; 604 gotone = 1; 605 } 606 } 607 done: 608 return gotone; /* don't claim it's UTF-8 if it's all 7-bit */ 609 } 610 611 static int 612 looks_unicode(const unsigned char *buf, size_t nbytes, my_unichar *ubuf, 613 size_t *ulen) 614 { 615 int bigend; 616 int i; 617 618 if (nbytes < 2) 619 return 0; 620 621 if (buf[0] == 0xff && buf[1] == 0xfe) 622 bigend = 0; 623 else if (buf[0] == 0xfe && buf[1] == 0xff) 624 bigend = 1; 625 else 626 return 0; 627 628 *ulen = 0; 629 630 for (i = 2; i + 1 < (int)nbytes; i += 2) { 631 /* XXX fix to properly handle chars > 65536 */ 632 633 if (bigend) 634 ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i]; 635 else 636 ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1]; 637 638 if (ubuf[*ulen - 1] == 0xfffe) 639 return 0; 640 if (ubuf[*ulen - 1] < 128 && 641 text_chars[(size_t)ubuf[*ulen - 1]] != T) 642 return 0; 643 } 644 645 return 1 + bigend; 646 } 647 648 #undef F 649 #undef T 650 #undef I 651 #undef X 652 653 /* 654 * This table maps each EBCDIC character to an (8-bit extended) ASCII 655 * character, as specified in the rationale for the dd(1) command in 656 * draft 11.2 (September, 1991) of the POSIX P1003.2 standard. 657 * 658 * Unfortunately it does not seem to correspond exactly to any of the 659 * five variants of EBCDIC documented in IBM's _Enterprise Systems 660 * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh 661 * Edition, July, 1999, pp. I-1 - I-4. 662 * 663 * Fortunately, though, all versions of EBCDIC, including this one, agree 664 * on most of the printing characters that also appear in (7-bit) ASCII. 665 * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all. 666 * 667 * Fortunately too, there is general agreement that codes 0x00 through 668 * 0x3F represent control characters, 0x41 a nonbreaking space, and the 669 * remainder printing characters. 670 * 671 * This is sufficient to allow us to identify EBCDIC text and to distinguish 672 * between old-style and internationalized examples of text. 673 */ 674 675 static unsigned char ebcdic_to_ascii[] = { 676 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, 677 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, 678 128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, 679 144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, 680 ' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|', 681 '&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~', 682 '-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?', 683 186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"', 684 195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201, 685 202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208, 686 209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215, 687 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231, 688 '{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237, 689 '}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243, 690 '\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249, 691 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255 692 }; 693 694 #ifdef notdef 695 /* 696 * The following EBCDIC-to-ASCII table may relate more closely to reality, 697 * or at least to modern reality. It comes from 698 * 699 * http://ftp.s390.ibm.com/products/oe/bpxqp9.html 700 * 701 * and maps the characters of EBCDIC code page 1047 (the code used for 702 * Unix-derived software on IBM's 390 systems) to the corresponding 703 * characters from ISO 8859-1. 704 * 705 * If this table is used instead of the above one, some of the special 706 * cases for the NEL character can be taken out of the code. 707 */ 708 709 static unsigned char ebcdic_1047_to_8859[] = { 710 0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, 711 0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, 712 0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, 713 0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, 714 0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C, 715 0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E, 716 0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F, 717 0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, 718 0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1, 719 0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4, 720 0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE, 721 0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7, 722 0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5, 723 0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF, 724 0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, 725 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F 726 }; 727 #endif 728 729 /* 730 * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII. 731 */ 732 static void 733 from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out) 734 { 735 int i; 736 737 for (i = 0; i < (int)nbytes; i++) { 738 out[i] = ebcdic_to_ascii[buf[i]]; 739 } 740 } 741 742 743 // #pragma mark - 744 745 746 /*! 747 Determines if the data in inSource is of the STXT format. 748 749 \param header the STXT stream header read in by Identify() or Translate() 750 \param inSource the stream with the STXT data 751 \param outInfo information about the type of data from inSource is stored here 752 \param outType the desired output type for the data in inSource 753 \param ptxtheader if this is not NULL, the TEXT header from 754 inSource is copied to it 755 */ 756 status_t 757 identify_stxt_header(const TranslatorStyledTextStreamHeader &header, 758 BPositionIO *inSource, translator_info *outInfo, uint32 outType, 759 TranslatorStyledTextTextHeader *ptxtheader = NULL) 760 { 761 const ssize_t ktxtsize = sizeof(TranslatorStyledTextTextHeader); 762 const ssize_t kstylsize = sizeof(TranslatorStyledTextStyleHeader); 763 764 uint8 buffer[max(ktxtsize, kstylsize)]; 765 766 // Check the TEXT header 767 TranslatorStyledTextTextHeader txtheader; 768 if (inSource->Read(buffer, ktxtsize) != ktxtsize) 769 return B_NO_TRANSLATOR; 770 771 memcpy(&txtheader, buffer, ktxtsize); 772 if (swap_data(B_UINT32_TYPE, &txtheader, ktxtsize, 773 B_SWAP_BENDIAN_TO_HOST) != B_OK) 774 return B_ERROR; 775 776 if (txtheader.header.magic != 'TEXT' 777 || txtheader.header.header_size != sizeof(TranslatorStyledTextTextHeader) 778 || txtheader.charset != B_UNICODE_UTF8) 779 return B_NO_TRANSLATOR; 780 781 // skip the text data 782 off_t seekresult, pos; 783 pos = header.header.header_size + txtheader.header.header_size 784 + txtheader.header.data_size; 785 seekresult = inSource->Seek(txtheader.header.data_size, 786 SEEK_CUR); 787 if (seekresult < pos) 788 return B_NO_TRANSLATOR; 789 if (seekresult > pos) 790 return B_ERROR; 791 792 // check the STYL header (not all STXT files have this) 793 ssize_t read = 0; 794 TranslatorStyledTextStyleHeader stylheader; 795 read = inSource->Read(buffer, kstylsize); 796 if (read < 0) 797 return read; 798 if (read != kstylsize && read != 0) 799 return B_NO_TRANSLATOR; 800 801 // If there is a STYL header 802 if (read == kstylsize) { 803 memcpy(&stylheader, buffer, kstylsize); 804 if (swap_data(B_UINT32_TYPE, &stylheader, kstylsize, 805 B_SWAP_BENDIAN_TO_HOST) != B_OK) 806 return B_ERROR; 807 808 if (stylheader.header.magic != 'STYL' 809 || stylheader.header.header_size != 810 sizeof(TranslatorStyledTextStyleHeader)) 811 return B_NO_TRANSLATOR; 812 } 813 814 // if output TEXT header is supplied, fill it with data 815 if (ptxtheader) { 816 ptxtheader->header.magic = txtheader.header.magic; 817 ptxtheader->header.header_size = txtheader.header.header_size; 818 ptxtheader->header.data_size = txtheader.header.data_size; 819 ptxtheader->charset = txtheader.charset; 820 } 821 822 // return information about the data in the stream 823 outInfo->type = B_STYLED_TEXT_FORMAT; 824 outInfo->group = B_TRANSLATOR_TEXT; 825 outInfo->quality = STXT_IN_QUALITY; 826 outInfo->capability = STXT_IN_CAPABILITY; 827 strcpy(outInfo->name, "Be styled text file"); 828 strcpy(outInfo->MIME, "text/x-vnd.Be-stxt"); 829 830 return B_OK; 831 } 832 833 834 /*! 835 Determines if the data in \a inSource is of the UTF8 plain 836 837 \param data buffer containing data already read (must be at 838 least DATA_BUFFER_SIZE bytes large) 839 \param nread number of bytes that have already been read from the stream 840 \param header the STXT stream header read in by Identify() or Translate() 841 \param inSource the stream with the STXT data 842 \param outInfo information about the type of data from inSource is stored here 843 \param outType the desired output type for the data in inSource 844 */ 845 status_t 846 identify_text(uint8* data, int32 bytesRead, BPositionIO* source, 847 translator_info* outInfo, uint32 outType, const char*& encoding) 848 { 849 ssize_t readLater = source->Read(data + bytesRead, DATA_BUFFER_SIZE - bytesRead); 850 if (readLater < B_OK) 851 return B_NO_TRANSLATOR; 852 853 bytesRead += readLater; 854 855 // TODO: identify encoding as possible! 856 BMimeType type; 857 if (!file_ascmagic((const unsigned char*)data, bytesRead, &type, encoding)) 858 return B_NO_TRANSLATOR; 859 860 float capability = TEXT_IN_CAPABILITY; 861 if (bytesRead < 20) 862 capability = .1f; 863 864 // return information about the data in the stream 865 outInfo->type = B_TRANSLATOR_TEXT; 866 outInfo->group = B_TRANSLATOR_TEXT; 867 outInfo->quality = TEXT_IN_QUALITY; 868 outInfo->capability = capability; 869 870 char description[B_MIME_TYPE_LENGTH]; 871 if (type.GetLongDescription(description) == B_OK) 872 strlcpy(outInfo->name, description, sizeof(outInfo->name)); 873 else 874 strlcpy(outInfo->name, "Plain text file", sizeof(outInfo->name)); 875 876 //strlcpy(outInfo->MIME, type.Type(), sizeof(outInfo->MIME)); 877 strcpy(outInfo->MIME, "text/plain"); 878 return B_OK; 879 } 880 881 882 // --------------------------------------------------------------- 883 // translate_from_stxt 884 // 885 // Translates the data in inSource to the type outType and stores 886 // the translated data in outDestination. 887 // 888 // Preconditions: 889 // 890 // Parameters: inSource, the data to be translated 891 // 892 // outDestination, where the translated data is 893 // put 894 // 895 // outType, the type to convert inSource to 896 // 897 // txtheader, the TEXT header from inSource 898 // 899 // 900 // Postconditions: 901 // 902 // Returns: B_BAD_VALUE, if outType is invalid 903 // 904 // B_NO_TRANSLATOR, if this translator doesn't understand the data 905 // 906 // B_ERROR, if there was an error allocating memory or converting 907 // data 908 // 909 // B_OK, if all went well 910 // --------------------------------------------------------------- 911 status_t 912 translate_from_stxt(BPositionIO *inSource, BPositionIO *outDestination, 913 uint32 outType, const TranslatorStyledTextTextHeader &txtheader) 914 { 915 if (inSource->Seek(0, SEEK_SET) != 0) 916 return B_ERROR; 917 918 const ssize_t kstxtsize = sizeof(TranslatorStyledTextStreamHeader); 919 const ssize_t ktxtsize = sizeof(TranslatorStyledTextTextHeader); 920 921 bool btoplain; 922 if (outType == B_TRANSLATOR_TEXT) 923 btoplain = true; 924 else if (outType == B_STYLED_TEXT_FORMAT) 925 btoplain = false; 926 else 927 return B_BAD_VALUE; 928 929 uint8 buffer[READ_BUFFER_SIZE]; 930 ssize_t nread = 0, nwritten = 0, nreed = 0, ntotalread = 0; 931 932 // skip to the actual text data when outputting a 933 // plain text file 934 if (btoplain) { 935 if (inSource->Seek(kstxtsize + ktxtsize, SEEK_CUR) != 936 kstxtsize + ktxtsize) 937 return B_ERROR; 938 } 939 940 // Read data from inSource 941 // When outputing B_TRANSLATOR_TEXT, the loop stops when all of 942 // the text data has been read and written. 943 // When outputting B_STYLED_TEXT_FORMAT, the loop stops when all 944 // of the data from inSource has been read and written. 945 if (btoplain) 946 nreed = min(READ_BUFFER_SIZE, 947 txtheader.header.data_size - ntotalread); 948 else 949 nreed = READ_BUFFER_SIZE; 950 nread = inSource->Read(buffer, nreed); 951 while (nread > 0) { 952 nwritten = outDestination->Write(buffer, nread); 953 if (nwritten != nread) 954 return B_ERROR; 955 956 if (btoplain) { 957 ntotalread += nread; 958 nreed = min(READ_BUFFER_SIZE, 959 txtheader.header.data_size - ntotalread); 960 } else 961 nreed = READ_BUFFER_SIZE; 962 nread = inSource->Read(buffer, nreed); 963 } 964 965 if (btoplain && static_cast<ssize_t>(txtheader.header.data_size) != 966 ntotalread) 967 // If not all of the text data was able to be read... 968 return B_NO_TRANSLATOR; 969 else 970 return B_OK; 971 } 972 973 // --------------------------------------------------------------- 974 // output_headers 975 // 976 // Outputs the Stream and Text headers from the B_STYLED_TEXT_FORMAT 977 // to outDestination, setting the data_size member of the text header 978 // to text_data_size 979 // 980 // Preconditions: 981 // 982 // Parameters: outDestination, where the translated data is 983 // put 984 // 985 // text_data_size, number of bytes in data section 986 // of the TEXT header 987 // 988 // 989 // Postconditions: 990 // 991 // Returns: 992 // 993 // B_ERROR, if there was an error writing to outDestination or 994 // an error with converting the byte order 995 // 996 // B_OK, if all went well 997 // --------------------------------------------------------------- 998 status_t 999 output_headers(BPositionIO *outDestination, uint32 text_data_size) 1000 { 1001 const int32 kHeadersSize = sizeof(TranslatorStyledTextStreamHeader) + 1002 sizeof(TranslatorStyledTextTextHeader); 1003 status_t result; 1004 TranslatorStyledTextStreamHeader stxtheader; 1005 TranslatorStyledTextTextHeader txtheader; 1006 1007 uint8 buffer[kHeadersSize]; 1008 1009 stxtheader.header.magic = 'STXT'; 1010 stxtheader.header.header_size = sizeof(TranslatorStyledTextStreamHeader); 1011 stxtheader.header.data_size = 0; 1012 stxtheader.version = 100; 1013 memcpy(buffer, &stxtheader, stxtheader.header.header_size); 1014 1015 txtheader.header.magic = 'TEXT'; 1016 txtheader.header.header_size = sizeof(TranslatorStyledTextTextHeader); 1017 txtheader.header.data_size = text_data_size; 1018 txtheader.charset = B_UNICODE_UTF8; 1019 memcpy(buffer + stxtheader.header.header_size, &txtheader, 1020 txtheader.header.header_size); 1021 1022 // write out headers in Big Endian byte order 1023 result = swap_data(B_UINT32_TYPE, buffer, kHeadersSize, 1024 B_SWAP_HOST_TO_BENDIAN); 1025 if (result == B_OK) { 1026 ssize_t nwritten = 0; 1027 nwritten = outDestination->Write(buffer, kHeadersSize); 1028 if (nwritten != kHeadersSize) 1029 return B_ERROR; 1030 else 1031 return B_OK; 1032 } 1033 1034 return result; 1035 } 1036 1037 // --------------------------------------------------------------- 1038 // output_styles 1039 // 1040 // Writes out the actual style information into outDestination 1041 // using the data from pflatRunArray 1042 // 1043 // Preconditions: 1044 // 1045 // Parameters: outDestination, where the translated data is 1046 // put 1047 // 1048 // text_size, size in bytes of the text in 1049 // outDestination 1050 // 1051 // data_size, size of pflatRunArray 1052 // 1053 // Postconditions: 1054 // 1055 // Returns: 1056 // 1057 // B_ERROR, if there was an error writing to outDestination or 1058 // an error with converting the byte order 1059 // 1060 // B_OK, if all went well 1061 // --------------------------------------------------------------- 1062 status_t 1063 output_styles(BPositionIO *outDestination, uint32 text_size, 1064 uint8 *pflatRunArray, ssize_t data_size) 1065 { 1066 const ssize_t kstylsize = sizeof(TranslatorStyledTextStyleHeader); 1067 1068 uint8 buffer[kstylsize]; 1069 1070 // output STYL header 1071 TranslatorStyledTextStyleHeader stylheader; 1072 stylheader.header.magic = 'STYL'; 1073 stylheader.header.header_size = 1074 sizeof(TranslatorStyledTextStyleHeader); 1075 stylheader.header.data_size = data_size; 1076 stylheader.apply_offset = 0; 1077 stylheader.apply_length = text_size; 1078 1079 memcpy(buffer, &stylheader, kstylsize); 1080 if (swap_data(B_UINT32_TYPE, buffer, kstylsize, 1081 B_SWAP_HOST_TO_BENDIAN) != B_OK) 1082 return B_ERROR; 1083 if (outDestination->Write(buffer, kstylsize) != kstylsize) 1084 return B_ERROR; 1085 1086 // output actual style information 1087 if (outDestination->Write(pflatRunArray, 1088 data_size) != data_size) 1089 return B_ERROR; 1090 1091 return B_OK; 1092 } 1093 1094 1095 /*! 1096 Convert the plain text (UTF8) from inSource to plain or 1097 styled text in outDestination 1098 */ 1099 status_t 1100 translate_from_text(BPositionIO* source, const char* encoding, bool forceEncoding, 1101 BPositionIO* destination, uint32 outType) 1102 { 1103 if (outType != B_TRANSLATOR_TEXT && outType != B_STYLED_TEXT_FORMAT) 1104 return B_BAD_VALUE; 1105 1106 // find the length of the text 1107 off_t size = source->Seek(0, SEEK_END); 1108 if (size < 0) 1109 return (status_t)size; 1110 if (size > UINT32_MAX && outType == B_STYLED_TEXT_FORMAT) 1111 return B_NOT_SUPPORTED; 1112 1113 status_t status = source->Seek(0, SEEK_SET); 1114 if (status < B_OK) 1115 return status; 1116 1117 if (outType == B_STYLED_TEXT_FORMAT) { 1118 // output styled text headers 1119 status = output_headers(destination, (uint32)size); 1120 if (status != B_OK) 1121 return status; 1122 } 1123 1124 class MallocBuffer { 1125 public: 1126 MallocBuffer() : fBuffer(NULL), fSize(0) {} 1127 ~MallocBuffer() { free(fBuffer); } 1128 1129 void* Buffer() { return fBuffer; } 1130 size_t Size() const { return fSize; } 1131 1132 status_t 1133 Allocate(size_t size) 1134 { 1135 fBuffer = malloc(size); 1136 if (fBuffer != NULL) { 1137 fSize = size; 1138 return B_OK; 1139 } 1140 return B_NO_MEMORY; 1141 } 1142 1143 private: 1144 void* fBuffer; 1145 size_t fSize; 1146 } encodingBuffer; 1147 BMallocIO encodingIO; 1148 uint32 encodingID = 0; 1149 // defaults to UTF-8 or no encoding 1150 1151 BNode* node = dynamic_cast<BNode*>(source); 1152 if (node != NULL) { 1153 // determine encoding, if available 1154 const BCharacterSet* characterSet = NULL; 1155 bool hasAttribute = false; 1156 if (encoding != NULL && !forceEncoding) { 1157 BString name; 1158 if (node->ReadAttrString("be:encoding", &name) == B_OK) { 1159 encoding = name.String(); 1160 hasAttribute = true; 1161 } else { 1162 int32 value; 1163 ssize_t bytesRead = node->ReadAttr("be:encoding", B_INT32_TYPE, 0, 1164 &value, sizeof(value)); 1165 if (bytesRead == (ssize_t)sizeof(value)) { 1166 hasAttribute = true; 1167 if (value != 65535) 1168 characterSet = BCharacterSetRoster::GetCharacterSetByConversionID(value); 1169 } 1170 } 1171 } else { 1172 hasAttribute = true; 1173 // we don't write the encoding in this case 1174 } 1175 if (characterSet == NULL && encoding != NULL) 1176 characterSet = BCharacterSetRoster::FindCharacterSetByName(encoding); 1177 1178 if (characterSet != NULL) { 1179 encodingID = characterSet->GetConversionID(); 1180 encodingBuffer.Allocate(READ_BUFFER_SIZE * 4); 1181 } 1182 1183 if (!hasAttribute && encoding != NULL) { 1184 // add encoding attribute, so that someone opening the file can 1185 // retrieve it for persistance 1186 node->WriteAttr("be:encoding", B_STRING_TYPE, 0, encoding, 1187 strlen(encoding)); 1188 } 1189 } 1190 1191 off_t outputSize = 0; 1192 ssize_t bytesRead; 1193 int32 state = 0; 1194 1195 // output the actual text part of the data 1196 do { 1197 uint8 buffer[READ_BUFFER_SIZE]; 1198 bytesRead = source->Read(buffer, READ_BUFFER_SIZE); 1199 if (bytesRead < B_OK) 1200 return bytesRead; 1201 if (bytesRead == 0) 1202 break; 1203 1204 if (encodingBuffer.Size() == 0) { 1205 // default, no encoding 1206 ssize_t bytesWritten = destination->Write(buffer, bytesRead); 1207 if (bytesWritten != bytesRead) { 1208 if (bytesWritten < B_OK) 1209 return bytesWritten; 1210 1211 return B_ERROR; 1212 } 1213 1214 outputSize += bytesRead; 1215 } else { 1216 // decode text file to UTF-8 1217 char* pos = (char*)buffer; 1218 int32 encodingLength = encodingIO.BufferLength(); 1219 int32 bytesLeft = bytesRead; 1220 int32 bytes; 1221 do { 1222 encodingLength = READ_BUFFER_SIZE * 4; 1223 bytes = bytesLeft; 1224 1225 status = convert_to_utf8(encodingID, pos, &bytes, 1226 (char*)encodingBuffer.Buffer(), &encodingLength, &state); 1227 if (status < B_OK) 1228 return status; 1229 1230 ssize_t bytesWritten = destination->Write(encodingBuffer.Buffer(), 1231 encodingLength); 1232 if (bytesWritten < encodingLength) { 1233 if (bytesWritten < B_OK) 1234 return bytesWritten; 1235 1236 return B_ERROR; 1237 } 1238 1239 pos += bytes; 1240 bytesLeft -= bytes; 1241 outputSize += encodingLength; 1242 } while (encodingLength > 0 && bytesLeft > 0); 1243 } 1244 } while (bytesRead > 0); 1245 1246 if (outType != B_STYLED_TEXT_FORMAT) 1247 return B_OK; 1248 1249 if (encodingBuffer.Size() != 0 && size != outputSize) { 1250 if (outputSize > UINT32_MAX) 1251 return B_NOT_SUPPORTED; 1252 1253 // we need to update the header as the decoded text size has changed 1254 status = destination->Seek(0, SEEK_SET); 1255 if (status == B_OK) 1256 status = output_headers(destination, (uint32)outputSize); 1257 if (status == B_OK) 1258 status = destination->Seek(0, SEEK_END); 1259 1260 if (status < B_OK) 1261 return status; 1262 } 1263 1264 // Read file attributes if outputting styled data 1265 // and source is a BNode object 1266 1267 if (node == NULL) 1268 return B_OK; 1269 1270 // Try to read styles - we only propagate an error if the actual on-disk 1271 // data is likely to be okay 1272 1273 const char *kAttrName = "styles"; 1274 attr_info info; 1275 if (node->GetAttrInfo(kAttrName, &info) != B_OK) 1276 return B_OK; 1277 1278 if (info.type != B_RAW_TYPE || info.size < 160) { 1279 // styles seem to be broken, but since we got the text, 1280 // we don't propagate the error 1281 return B_OK; 1282 } 1283 1284 uint8* flatRunArray = new (std::nothrow) uint8[info.size]; 1285 if (flatRunArray == NULL) 1286 return B_NO_MEMORY; 1287 1288 bytesRead = node->ReadAttr(kAttrName, B_RAW_TYPE, 0, flatRunArray, info.size); 1289 if (bytesRead != info.size) 1290 return B_OK; 1291 1292 output_styles(destination, size, flatRunArray, info.size); 1293 1294 delete[] flatRunArray; 1295 return B_OK; 1296 } 1297 1298 1299 // #pragma mark - 1300 1301 1302 STXTTranslator::STXTTranslator() 1303 : BaseTranslator("StyledEdit Files", "StyledEdit files translator", 1304 STXT_TRANSLATOR_VERSION, 1305 gInputFormats, sizeof(gInputFormats) / sizeof(translation_format), 1306 gOutputFormats, sizeof(gOutputFormats) / sizeof(translation_format), 1307 "STXTTranslator_Settings", 1308 gDefaultSettings, sizeof(gDefaultSettings) / sizeof(TranSetting), 1309 B_TRANSLATOR_TEXT, B_STYLED_TEXT_FORMAT) 1310 { 1311 } 1312 1313 1314 STXTTranslator::~STXTTranslator() 1315 { 1316 } 1317 1318 1319 status_t 1320 STXTTranslator::Identify(BPositionIO *inSource, 1321 const translation_format *inFormat, BMessage *ioExtension, 1322 translator_info *outInfo, uint32 outType) 1323 { 1324 if (!outType) 1325 outType = B_TRANSLATOR_TEXT; 1326 if (outType != B_TRANSLATOR_TEXT && outType != B_STYLED_TEXT_FORMAT) 1327 return B_NO_TRANSLATOR; 1328 1329 const ssize_t kstxtsize = sizeof(TranslatorStyledTextStreamHeader); 1330 1331 uint8 buffer[DATA_BUFFER_SIZE]; 1332 status_t nread = 0; 1333 // Read in the header to determine 1334 // if the data is supported 1335 nread = inSource->Read(buffer, kstxtsize); 1336 if (nread < 0) 1337 return nread; 1338 1339 // read in enough data to fill the stream header 1340 if (nread == kstxtsize) { 1341 TranslatorStyledTextStreamHeader header; 1342 memcpy(&header, buffer, kstxtsize); 1343 if (swap_data(B_UINT32_TYPE, &header, kstxtsize, 1344 B_SWAP_BENDIAN_TO_HOST) != B_OK) 1345 return B_ERROR; 1346 1347 if (header.header.magic == B_STYLED_TEXT_FORMAT 1348 && header.header.header_size == (int32)kstxtsize 1349 && header.header.data_size == 0 1350 && header.version == 100) 1351 return identify_stxt_header(header, inSource, outInfo, outType); 1352 } 1353 1354 // if the data is not styled text, check if it is plain text 1355 const char* encoding; 1356 return identify_text(buffer, nread, inSource, outInfo, outType, encoding); 1357 } 1358 1359 1360 status_t 1361 STXTTranslator::Translate(BPositionIO* source, const translator_info* info, 1362 BMessage* ioExtension, uint32 outType, BPositionIO* outDestination) 1363 { 1364 if (!outType) 1365 outType = B_TRANSLATOR_TEXT; 1366 if (outType != B_TRANSLATOR_TEXT && outType != B_STYLED_TEXT_FORMAT) 1367 return B_NO_TRANSLATOR; 1368 1369 const ssize_t headerSize = sizeof(TranslatorStyledTextStreamHeader); 1370 uint8 buffer[DATA_BUFFER_SIZE]; 1371 status_t result; 1372 translator_info outInfo; 1373 // Read in the header to determine 1374 // if the data is supported 1375 ssize_t bytesRead = source->Read(buffer, headerSize); 1376 if (bytesRead < 0) 1377 return bytesRead; 1378 1379 // read in enough data to fill the stream header 1380 if (bytesRead == headerSize) { 1381 TranslatorStyledTextStreamHeader header; 1382 memcpy(&header, buffer, headerSize); 1383 if (swap_data(B_UINT32_TYPE, &header, headerSize, 1384 B_SWAP_BENDIAN_TO_HOST) != B_OK) 1385 return B_ERROR; 1386 1387 if (header.header.magic == B_STYLED_TEXT_FORMAT 1388 && header.header.header_size == sizeof(TranslatorStyledTextStreamHeader) 1389 && header.header.data_size == 0 1390 && header.version == 100) { 1391 TranslatorStyledTextTextHeader textHeader; 1392 result = identify_stxt_header(header, source, &outInfo, outType, 1393 &textHeader); 1394 if (result != B_OK) 1395 return result; 1396 1397 return translate_from_stxt(source, outDestination, outType, textHeader); 1398 } 1399 } 1400 1401 // if the data is not styled text, check if it is ASCII text 1402 bool forceEncoding = false; 1403 const char* encoding = NULL; 1404 result = identify_text(buffer, bytesRead, source, &outInfo, outType, encoding); 1405 if (result != B_OK) 1406 return result; 1407 1408 if (ioExtension != NULL) { 1409 const char* value; 1410 if (ioExtension->FindString("be:encoding", &value) == B_OK 1411 && value[0]) { 1412 // override encoding 1413 encoding = value; 1414 forceEncoding = true; 1415 } 1416 } 1417 1418 return translate_from_text(source, encoding, forceEncoding, outDestination, outType); 1419 } 1420 1421 1422 BView * 1423 STXTTranslator::NewConfigView(TranslatorSettings *settings) 1424 { 1425 return new STXTView(BRect(0, 0, 225, 175), "STXTTranslator Settings", 1426 B_FOLLOW_ALL, B_WILL_DRAW, settings); 1427 } 1428 1429