xref: /haiku/src/add-ons/kernel/file_systems/cdda/cdda.cpp (revision a3e794ae459fec76826407f8ba8c94cd3535f128)
1 /*
2  * Copyright 2007-2010, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "cdda.h"
8 
9 #include <KernelExport.h>
10 #include <device/scsi.h>
11 
12 #include <algorithm>
13 #include <ctype.h>
14 #include <errno.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <strings.h>
18 
19 
20 struct cdtext_pack_data {
21 	uint8	id;
22 	uint8	track;
23 	uint8	number;
24 	uint8	character_position : 4;
25 	uint8	block_number : 3;
26 	uint8	double_byte : 1;
27 	char	text[12];
28 	uint8	crc[2];
29 } _PACKED;
30 
31 enum {
32 	kTrackID	= 0x80,
33 	kArtistID	= 0x81,
34 	kMessageID	= 0x85,
35 };
36 
37 static const uint32 kBufferSize = 16384;
38 static const uint32 kSenseSize = 1024;
39 
40 
41 //	#pragma mark - string functions
42 
43 
44 static char *
45 copy_string(const char *string)
46 {
47 	if (string == NULL || !string[0])
48 		return NULL;
49 
50 	return strdup(string);
51 }
52 
53 
54 static char *
55 to_utf8(const char* string)
56 {
57 	char buffer[256];
58 	size_t out = 0;
59 
60 	// TODO: assume CP1252 or ISO-8859-1 character set for now
61 	while (uint32 c = (uint8)string[0]) {
62 
63 		if (c < 0x80) {
64 			if (out >= sizeof(buffer) - 1)
65 				break;
66 			// ASCII character: no change needed
67 			buffer[out++] = c;
68 		} else {
69 			if (c < 0xA0) {
70 				// Windows CP-1252 - Use a lookup table
71 				static const uint32 lookup[] = {
72 					0x20AC, 0, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
73 					0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0, 0x017D, 0,
74 					0, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
75 					0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0, 0x017E, 0x0178
76 				};
77 
78 				c = lookup[c - 0x80];
79 			}
80 
81 			// Convert to 2 or 3-byte representation
82 			if (c == 0) {
83 				// invalid character, ignore
84 			} else if (c < 0x800) {
85 				if (out >= sizeof(buffer) - 2)
86 					break;
87 				buffer[out++] = 0xc0 | (c >> 6);
88 				buffer[out++] = 0x80 | (c & 0x3f);
89 			} else {
90 				if (out >= sizeof(buffer) - 3)
91 					break;
92 				buffer[out++] = 0xe0 | (c >> 12);
93 				buffer[out++] = 0x80 | ((c >> 6) & 0x3f);
94 				buffer[out++] = 0x80 | (c & 0x3f);
95 			}
96 		}
97 
98 		string++;
99 	}
100 	buffer[out++] = '\0';
101 
102 	char *copy = (char *)malloc(out);
103 	if (copy == NULL)
104 		return NULL;
105 
106 	memcpy(copy, buffer, out);
107 	return copy;
108 }
109 
110 
111 static bool
112 is_garbage(char c)
113 {
114 	return isspace(c) || c == '-' || c == '/' || c == '\\';
115 }
116 
117 
118 static void
119 sanitize_string(char *&string)
120 {
121 	if (string == NULL)
122 		return;
123 
124 	// strip garbage at the start
125 
126 	uint32 length = strlen(string);
127 	uint32 garbage = 0;
128 	while (is_garbage(string[garbage])) {
129 		garbage++;
130 	}
131 
132 	length -= garbage;
133 	if (garbage)
134 		memmove(string, string + garbage, length + 1);
135 
136 	// strip garbage from the end
137 
138 	while (length > 1 && isspace(string[length - 1])) {
139 		string[--length] = '\0';
140 	}
141 
142 	if (!string[0]) {
143 		// free string if it's empty
144 		free(string);
145 		string = NULL;
146 	}
147 }
148 
149 
150 //! Finds the first occurrence of \a find in \a string, ignores case.
151 static char*
152 find_string(const char *string, const char *find)
153 {
154 	if (string == NULL || find == NULL)
155 		return NULL;
156 
157 	char first = tolower(find[0]);
158 	if (first == '\0')
159 		return (char *)string;
160 
161 	int32 findLength = strlen(find) - 1;
162 	find++;
163 
164 	for (; string[0]; string++) {
165 		if (tolower(string[0]) != first)
166 			continue;
167 		if (strncasecmp(string + 1, find, findLength) == 0)
168 			return (char *)string;
169 	}
170 
171 	return NULL;
172 }
173 
174 
175 static void
176 cut_string(char *string, const char *cut)
177 {
178 	if (string == NULL || cut == NULL)
179 		return;
180 
181 	char *found = find_string(string, cut);
182 	if (found != NULL) {
183 		uint32 foundLength = strlen(found);
184 		uint32 cutLength = strlen(cut);
185 		memmove(found, found + cutLength, foundLength + 1 - cutLength);
186 	}
187 }
188 
189 
190 static void
191 sanitize_album(cdtext &text)
192 {
193 	cut_string(text.album, text.artist);
194 	sanitize_string(text.album);
195 
196 	if (text.album != NULL && !strcasecmp(text.album, "My CD")) {
197 		// don't laugh, people really do that!
198 		free(text.album);
199 		text.album = NULL;
200 	}
201 
202 	if ((text.artist == NULL || text.artist[0] == '\0') && text.album != NULL) {
203 		// try to extract artist from album
204 		char *space = strstr(text.album, "  ");
205 		if (space != NULL) {
206 			space[0] = '\0';
207 			text.artist = text.album;
208 			text.album = copy_string(space + 2);
209 
210 			sanitize_string(text.artist);
211 			sanitize_string(text.album);
212 		}
213 	}
214 }
215 
216 
217 static void
218 sanitize_titles(cdtext &text)
219 {
220 	for (uint8 i = 0; i < text.track_count; i++) {
221 		cut_string(text.titles[i], "(Album Version)");
222 		sanitize_string(text.titles[i]);
223 		sanitize_string(text.artists[i]);
224 
225 		if (text.artists[i] != NULL && text.artist != NULL
226 			&& !strcasecmp(text.artists[i], text.artist)) {
227 			// if the title artist is the same as the main artist, remove it
228 			free(text.artists[i]);
229 			text.artists[i] = NULL;
230 		}
231 
232 		if (text.titles[i] != NULL && text.titles[i][0] == '\t' && i > 0)
233 			text.titles[i] = copy_string(text.titles[i - 1]);
234 	}
235 }
236 
237 
238 static bool
239 single_case(const char *string, bool &upper, bool &first)
240 {
241 	if (string == NULL)
242 		return true;
243 
244 	while (string[0]) {
245 		while (!isalpha(string[0])) {
246 			string++;
247 		}
248 
249 		if (first) {
250 			upper = isupper(string[0]) != 0;
251 			first = false;
252 		} else if ((isupper(string[0]) != 0) ^ upper)
253 			return false;
254 
255 		string++;
256 	}
257 
258 	return true;
259 }
260 
261 
262 static void
263 capitalize_string(char *string)
264 {
265 	if (string == NULL)
266 		return;
267 
268 	bool newWord = isalpha(string[0]) || isspace(string[0]);
269 	while (string[0]) {
270 		if (isalpha(string[0])) {
271 			if (newWord) {
272 				string[0] = toupper(string[0]);
273 				newWord = false;
274 			} else
275 				string[0] = tolower(string[0]);
276 		} else if (string[0] != '\'')
277 			newWord = true;
278 
279 		string++;
280 	}
281 }
282 
283 
284 static void
285 correct_case(cdtext &text)
286 {
287 	// check if all titles share a single case
288 	bool first = true;
289 	bool upper;
290 	if (!single_case(text.album, upper, first)
291 		|| !single_case(text.artist, upper, first))
292 		return;
293 
294 	for (int32 i = 0; i < text.track_count; i++) {
295 		if (!single_case(text.titles[i], upper, first)
296 			|| !single_case(text.artists[i], upper, first))
297 			return;
298 	}
299 
300 	// If we get here, everything has a single case; we fix that
301 	// and capitalize each word
302 
303 	capitalize_string(text.album);
304 	capitalize_string(text.artist);
305 	for (int32 i = 0; i < text.track_count; i++) {
306 		capitalize_string(text.titles[i]);
307 		capitalize_string(text.artists[i]);
308 	}
309 }
310 
311 
312 //	#pragma mark - CD-Text
313 
314 
315 cdtext::cdtext()
316 	:
317 	artist(NULL),
318 	album(NULL),
319 	genre(NULL),
320 	track_count(0)
321 {
322 	memset(titles, 0, sizeof(titles));
323 	memset(artists, 0, sizeof(artists));
324 }
325 
326 
327 cdtext::~cdtext()
328 {
329 	free(album);
330 	free(artist);
331 	free(genre);
332 
333 	for (uint8 i = 0; i < track_count; i++) {
334 		free(titles[i]);
335 		free(artists[i]);
336 	}
337 }
338 
339 
340 static bool
341 is_string_id(uint8 id)
342 {
343 	return id >= kTrackID && id <= kMessageID;
344 }
345 
346 
347 /*!	Parses a \a pack data into the provided text buffer; the corresponding
348 	track number will be left in \a track, and the type of the data in \a id.
349 	The pack data is explained in SCSI MMC-3.
350 
351 	\a id, \a track, and \a state must stay constant between calls to this
352 	function. \a state must be initialized to zero for the first call.
353 */
354 static bool
355 parse_pack_data(cdtext_pack_data *&pack, uint32 &packLeft,
356 	cdtext_pack_data *&lastPack, uint8 &id, uint8 &track, uint8 &state,
357 	char *buffer, size_t &length)
358 {
359 	if (packLeft < sizeof(cdtext_pack_data))
360 		return false;
361 
362 	uint8 number = pack->number;
363 	size_t size = length;
364 
365 	if (state != 0) {
366 		// we had a terminated string and a missing track
367 		track++;
368 
369 		memcpy(buffer, lastPack->text + state, 12 - state);
370 		if (pack->track - track == 1)
371 			state = 0;
372 		else
373 			state += strnlen(buffer, 12 - state);
374 		return true;
375 	}
376 
377 	id = pack->id;
378 	track = pack->track;
379 
380 	buffer[0] = '\0';
381 	length = 0;
382 
383 	size_t position = pack->character_position;
384 	if (position > 0 && lastPack != NULL) {
385 		memcpy(buffer, &lastPack->text[12 - position], position);
386 		length = position;
387 	}
388 
389 	while (id == pack->id && track == pack->track) {
390 #if 0
391 		dprintf("%u.%u.%u, %u.%u.%u, ", pack->id, pack->track, pack->number,
392 			pack->double_byte, pack->block_number, pack->character_position);
393 		for (int32 i = 0; i < 12; i++) {
394 			if (isprint(pack->text[i]))
395 				dprintf("%c", pack->text[i]);
396 			else
397 				dprintf("-");
398 		}
399 		dprintf("\n");
400 #endif
401 		if (is_string_id(id)) {
402 			// TODO: support double byte characters
403 			if (length + 12 < size) {
404 				memcpy(buffer + length, pack->text, 12);
405 				length += 12;
406 			}
407 		}
408 
409 		packLeft -= sizeof(cdtext_pack_data);
410 		if (packLeft < sizeof(cdtext_pack_data))
411 			return false;
412 
413 		lastPack = pack;
414 		number++;
415 		pack++;
416 
417 		if (pack->number != number)
418 			return false;
419 	}
420 
421 	if (id == pack->id) {
422 		length -= pack->character_position;
423 		if (length >= size)
424 			length = size - 1;
425 		buffer[length] = '\0';
426 
427 		if (pack->track > lastPack->track + 1) {
428 			// there is a missing track
429 			for (int32 i = 0; i < 12; i++) {
430 				if (lastPack->text[i] == '\0') {
431 					state = i + (lastPack->double_byte ? 2 : 1);
432 					break;
433 				}
434 			}
435 		}
436 	}
437 
438 	return true;
439 }
440 
441 
442 static void
443 dump_cdtext(cdtext &text)
444 {
445 	if (text.album)
446 		dprintf("Album:    \"%s\"\n", text.album);
447 	if (text.artist)
448 		dprintf("Artist:   \"%s\"\n", text.artist);
449 	for (uint8 i = 0; i < text.track_count; i++) {
450 		dprintf("Track %02u: \"%s\"%s%s%s\n", i + 1, text.titles[i],
451 			text.artists[i] ? " (" : "", text.artists[i] ? text.artists[i] : "",
452 			text.artists[i] ? ")" : "");
453 	}
454 }
455 
456 
457 static void
458 dump_toc(scsi_toc_toc *toc)
459 {
460 	int32 numTracks = toc->last_track + 1 - toc->first_track;
461 
462 	for (int32 i = 0; i < numTracks; i++) {
463 		scsi_toc_track& track = toc->tracks[i];
464 		scsi_cd_msf& next = toc->tracks[i + 1].start.time;
465 			// the last track is always lead-out
466 		scsi_cd_msf& start = toc->tracks[i].start.time;
467 		scsi_cd_msf length;
468 
469 		uint64 diff = next.minute * kFramesPerMinute
470 			+ next.second * kFramesPerSecond + next.frame
471 			- start.minute * kFramesPerMinute
472 			- start.second * kFramesPerSecond - start.frame;
473 		length.minute = diff / kFramesPerMinute;
474 		length.second = (diff % kFramesPerMinute) / kFramesPerSecond;
475 		length.frame = diff % kFramesPerSecond;
476 
477 		dprintf("%02u. %02u:%02u.%02u (length %02u:%02u.%02u)\n",
478 			track.track_number, start.minute, start.second, start.frame,
479 			length.minute, length.second, length.frame);
480 	}
481 }
482 
483 
484 static status_t
485 read_frames(int fd, off_t firstFrame, uint8 *buffer, size_t count)
486 {
487 	size_t framesLeft = count;
488 
489 	while (framesLeft > 0) {
490 		// If the initial count was >= 32, and not a multiple of 8, and the
491 		// ioctl fails, we switch to reading 8 frames at a time. However the
492 		// last read can read between 1 and 7 frames only, to not overflow
493 		// the buffer.
494 		count = std::min(count, framesLeft);
495 
496 		scsi_read_cd read;
497 		read.start_m = firstFrame / kFramesPerMinute;
498 		read.start_s = (firstFrame / kFramesPerSecond) % 60;
499 		read.start_f = firstFrame % kFramesPerSecond;
500 
501 		read.length_m = count / kFramesPerMinute;
502 		read.length_s = (count / kFramesPerSecond) % 60;
503 		read.length_f = count % kFramesPerSecond;
504 
505 		read.buffer_length = count * kFrameSize;
506 		read.buffer = (char *)buffer;
507 		read.play = false;
508 
509 		if (ioctl(fd, B_SCSI_READ_CD, &read) < 0) {
510 			// drive couldn't read data - try again to read with a smaller block size
511 			if (count == 1)
512 				return errno;
513 
514 			if (count >= 32)
515 				count = 8;
516 			else
517 				count = 1;
518 
519 			continue;
520 		}
521 
522 		buffer += count * kFrameSize;
523 		framesLeft -= count;
524 		firstFrame += count;
525 	}
526 
527 	return B_OK;
528 }
529 
530 
531 static status_t
532 read_table_of_contents(int fd, uint32 track, uint8 format, uint8 *buffer,
533 	size_t bufferSize)
534 {
535 	raw_device_command raw;
536 	uint8 *senseData = (uint8 *)malloc(kSenseSize);
537 	if (senseData == NULL)
538 		return B_NO_MEMORY;
539 
540 	memset(&raw, 0, sizeof(raw_device_command));
541 	memset(senseData, 0, kSenseSize);
542 	memset(buffer, 0, bufferSize);
543 
544 	scsi_cmd_read_toc &toc = *(scsi_cmd_read_toc*)&raw.command;
545 	toc.opcode = SCSI_OP_READ_TOC;
546 	toc.time = 1;
547 	toc.format = format;
548 	toc.track = track;
549 	toc.allocation_length = B_HOST_TO_BENDIAN_INT16(bufferSize);
550 
551 	raw.command_length = 10;
552 	raw.flags = B_RAW_DEVICE_DATA_IN | B_RAW_DEVICE_REPORT_RESIDUAL
553 		| B_RAW_DEVICE_SHORT_READ_VALID;
554 	raw.scsi_status = 0;
555 	raw.cam_status = 0;
556 	raw.data = buffer;
557 	raw.data_length = bufferSize;
558 	raw.timeout = 10000000LL;	// 10 secs
559 	raw.sense_data = senseData;
560 	raw.sense_data_length = sizeof(kSenseSize);
561 
562 	if (ioctl(fd, B_RAW_DEVICE_COMMAND, &raw) == 0
563 		&& raw.scsi_status == 0 && raw.cam_status == 1) {
564 		free(senseData);
565 		return B_OK;
566 	}
567 
568 	free(senseData);
569 	return B_ERROR;
570 }
571 
572 
573 //	#pragma mark - exported functions
574 
575 
576 status_t
577 read_cdtext(int fd, struct cdtext &cdtext)
578 {
579 	uint8 *buffer = (uint8 *)malloc(kBufferSize);
580 	if (buffer == NULL)
581 		return B_NO_MEMORY;
582 
583 	// do it twice, just in case...
584 	// (at least my CD-ROM sometimes returned broken data on first try)
585 	read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_CD_TEXT, buffer,
586 		kBufferSize);
587 	if (read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_CD_TEXT, buffer,
588 			kBufferSize) != B_OK) {
589 		free(buffer);
590 		return B_ERROR;
591 	}
592 
593 	scsi_toc_general *header = (scsi_toc_general *)buffer;
594 
595 	uint32 packLength = B_BENDIAN_TO_HOST_INT16(header->data_length) - 2;
596 	cdtext_pack_data *pack = (cdtext_pack_data *)(header + 1);
597 	cdtext_pack_data *lastPack = NULL;
598 	uint8 state = 0;
599 	uint8 track = 0;
600 	uint8 id = 0;
601 	char text[256];
602 
603 	// TODO: determine encoding!
604 
605 	while (true) {
606 		size_t length = sizeof(text);
607 
608 		if (!parse_pack_data(pack, packLength, lastPack, id, track,
609 				state, text, length))
610 			break;
611 
612 		switch (id) {
613 			case kTrackID:
614 				if (track == 0) {
615 					if (cdtext.album == NULL)
616 						cdtext.album = to_utf8(text);
617 				} else if (track <= kMaxTracks) {
618 					if (cdtext.titles[track - 1] == NULL)
619 						cdtext.titles[track - 1] = to_utf8(text);
620 					if (track > cdtext.track_count)
621 						cdtext.track_count = track;
622 				}
623 				break;
624 
625 			case kArtistID:
626 				if (track == 0) {
627 					if (cdtext.artist == NULL)
628 						cdtext.artist = to_utf8(text);
629 				} else if (track <= kMaxTracks) {
630 					if (cdtext.artists[track - 1] == NULL)
631 						cdtext.artists[track - 1] = to_utf8(text);
632 				}
633 				break;
634 
635 			default:
636 				if (is_string_id(id))
637 					dprintf("UNKNOWN %u: \"%s\"\n", id, text);
638 				break;
639 		}
640 	}
641 
642 	free(buffer);
643 
644 	if (cdtext.artist == NULL && cdtext.album == NULL)
645 		return B_ERROR;
646 
647 	for (int i = 0; i < cdtext.track_count; i++) {
648 		if (cdtext.titles[i] == NULL)
649 			return B_ERROR;
650 	}
651 
652 	sanitize_string(cdtext.artist);
653 	sanitize_album(cdtext);
654 	sanitize_titles(cdtext);
655 	correct_case(cdtext);
656 
657 	dump_cdtext(cdtext);
658 	return B_OK;
659 }
660 
661 
662 status_t
663 read_table_of_contents(int fd, scsi_toc_toc *toc, size_t length)
664 {
665 	status_t status = read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_TOC,
666 		(uint8*)toc, length);
667 	if (status < B_OK)
668 		return status;
669 
670 	// make sure the values in the TOC make sense
671 
672 	int32 lastTrack = toc->last_track + 1 - toc->first_track;
673 	size_t dataLength = B_BENDIAN_TO_HOST_INT16(toc->data_length) + 2;
674 	if (dataLength < sizeof(scsi_toc_toc) || lastTrack <= 0)
675 		return B_BAD_DATA;
676 
677 	if (length > dataLength)
678 		length = dataLength;
679 
680 	length -= sizeof(scsi_toc_general);
681 
682 	if (lastTrack * sizeof(scsi_toc_track) > length)
683 		toc->last_track = length / sizeof(scsi_toc_track) + toc->first_track;
684 
685 	dump_toc(toc);
686 	return B_OK;
687 }
688 
689 
690 status_t
691 read_cdda_data(int fd, off_t endFrame, off_t offset, void *data, size_t length,
692 	off_t bufferOffset, void *buffer, size_t bufferSize)
693 {
694 	if (bufferOffset >= 0 && bufferOffset <= offset + (off_t)length
695 		&& bufferOffset + (off_t)bufferSize > offset) {
696 		if (offset >= bufferOffset) {
697 			// buffer reaches into the beginning of the request
698 			off_t dataOffset = offset - bufferOffset;
699 			size_t bytes = min_c(bufferSize - dataOffset, length);
700 			if (user_memcpy(data, (uint8 *)buffer + dataOffset, bytes) < B_OK)
701 				return B_BAD_ADDRESS;
702 
703 			data = (void *)((uint8 *)data + bytes);
704 			length -= bytes;
705 			offset += bytes;
706 		} else if (offset < bufferOffset
707 			&& offset + length < bufferOffset + bufferSize) {
708 			// buffer overlaps at the end of the request
709 			off_t dataOffset = bufferOffset - offset;
710 			size_t bytes = length - dataOffset;
711 			if (user_memcpy((uint8 *)data + dataOffset, buffer, bytes) < B_OK)
712 				return B_BAD_ADDRESS;
713 
714 			length -= bytes;
715 		}
716 		// we don't handle the case where we would need to split the request
717 	}
718 
719 	while (length > 0) {
720 		off_t frame = offset / kFrameSize;
721 		uint32 count = bufferSize / kFrameSize;
722 		if (frame + count > endFrame)
723 			count = endFrame - frame;
724 
725 		status_t status = read_frames(fd, frame, (uint8 *)buffer, count);
726 		if (status < B_OK)
727 			return status;
728 
729 		off_t dataOffset = offset % kFrameSize;
730 		size_t bytes = bufferSize - dataOffset;
731 		if (bytes > length)
732 			bytes = length;
733 
734 		if (user_memcpy(data, (uint8 *)buffer + dataOffset, bytes) < B_OK)
735 			return B_BAD_ADDRESS;
736 
737 		data = (void *)((uint8 *)data + bytes);
738 		length -= bytes;
739 		offset += bytes;
740 	}
741 
742 	return B_OK;
743 }
744