xref: /haiku/src/add-ons/kernel/file_systems/cdda/cdda.cpp (revision 5fae0bc1a2f74ccf56b7e3958149317d6af2cccc)
1 /*
2  * Copyright 2007-2010, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "cdda.h"
8 
9 #include <KernelExport.h>
10 #include <device/scsi.h>
11 
12 #include <ctype.h>
13 #include <errno.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <strings.h>
17 
18 
19 struct cdtext_pack_data {
20 	uint8	id;
21 	uint8	track;
22 	uint8	number;
23 	uint8	character_position : 4;
24 	uint8	block_number : 3;
25 	uint8	double_byte : 1;
26 	char	text[12];
27 	uint8	crc[2];
28 } _PACKED;
29 
30 enum {
31 	kTrackID	= 0x80,
32 	kArtistID	= 0x81,
33 	kMessageID	= 0x85,
34 };
35 
36 static const uint32 kBufferSize = 16384;
37 static const uint32 kSenseSize = 1024;
38 
39 
40 //	#pragma mark - string functions
41 
42 
43 static char *
44 copy_string(const char *string)
45 {
46 	if (string == NULL || !string[0])
47 		return NULL;
48 
49 	return strdup(string);
50 }
51 
52 
53 static char *
54 to_utf8(const char* string)
55 {
56 	char buffer[256];
57 	size_t out = 0;
58 
59 	// TODO: assume ISO-8859-1 character set for now
60 	while (uint32 c = (uint8)string[0]) {
61 		if (out == sizeof(buffer) - 1)
62 			break;
63 
64 		if (c < 0x80)
65 			buffer[out++] = c;
66 		else if (c < 0x800) {
67 			buffer[out++] = 0xc0 | (c >> 6);
68 			buffer[out++] = 0x80 | (c & 0x3f);
69 		}
70 
71 		string++;
72 	}
73 	buffer[out++] = '\0';
74 
75 	char *copy = (char *)malloc(out);
76 	if (copy == NULL)
77 		return NULL;
78 
79 	memcpy(copy, buffer, out);
80 	return copy;
81 }
82 
83 
84 static bool
85 is_garbage(char c)
86 {
87 	return isspace(c) || c == '-' || c == '/' || c == '\\';
88 }
89 
90 
91 static void
92 sanitize_string(char *&string)
93 {
94 	if (string == NULL)
95 		return;
96 
97 	// strip garbage at the start
98 
99 	uint32 length = strlen(string);
100 	uint32 garbage = 0;
101 	while (is_garbage(string[garbage])) {
102 		garbage++;
103 	}
104 
105 	length -= garbage;
106 	if (garbage)
107 		memmove(string, string + garbage, length + 1);
108 
109 	// strip garbage from the end
110 
111 	while (length > 1 && isspace(string[length - 1])) {
112 		string[--length] = '\0';
113 	}
114 
115 	if (!string[0]) {
116 		// free string if it's empty
117 		free(string);
118 		string = NULL;
119 	}
120 }
121 
122 
123 //! Finds the first occurrence of \a find in \a string, ignores case.
124 static char*
125 find_string(const char *string, const char *find)
126 {
127 	if (string == NULL || find == NULL)
128 		return NULL;
129 
130 	char first = tolower(find[0]);
131 	if (first == '\0')
132 		return (char *)string;
133 
134 	int32 findLength = strlen(find) - 1;
135 	find++;
136 
137 	for (; string[0]; string++) {
138 		if (tolower(string[0]) != first)
139 			continue;
140 		if (strncasecmp(string + 1, find, findLength) == 0)
141 			return (char *)string;
142 	}
143 
144 	return NULL;
145 }
146 
147 
148 static void
149 cut_string(char *string, const char *cut)
150 {
151 	if (string == NULL || cut == NULL)
152 		return;
153 
154 	char *found = find_string(string, cut);
155 	if (found != NULL) {
156 		uint32 foundLength = strlen(found);
157 		uint32 cutLength = strlen(cut);
158 		memmove(found, found + cutLength, foundLength + 1 - cutLength);
159 	}
160 }
161 
162 
163 static void
164 sanitize_album(cdtext &text)
165 {
166 	cut_string(text.album, text.artist);
167 	sanitize_string(text.album);
168 
169 	if (text.album != NULL && !strcasecmp(text.album, "My CD")) {
170 		// don't laugh, people really do that!
171 		free(text.album);
172 		text.album = NULL;
173 	}
174 
175 	if ((text.artist == NULL || text.artist[0] == '\0') && text.album != NULL) {
176 		// try to extract artist from album
177 		char *space = strstr(text.album, "  ");
178 		if (space != NULL) {
179 			space[0] = '\0';
180 			text.artist = text.album;
181 			text.album = copy_string(space + 2);
182 
183 			sanitize_string(text.artist);
184 			sanitize_string(text.album);
185 		}
186 	}
187 }
188 
189 
190 static void
191 sanitize_titles(cdtext &text)
192 {
193 	for (uint8 i = 0; i < text.track_count; i++) {
194 		cut_string(text.titles[i], "(Album Version)");
195 		sanitize_string(text.titles[i]);
196 		sanitize_string(text.artists[i]);
197 
198 		if (text.artists[i] != NULL && text.artist != NULL
199 			&& !strcasecmp(text.artists[i], text.artist)) {
200 			// if the title artist is the same as the main artist, remove it
201 			free(text.artists[i]);
202 			text.artists[i] = NULL;
203 		}
204 
205 		if (text.titles[i] != NULL && text.titles[i][0] == '\t' && i > 0)
206 			text.titles[i] = copy_string(text.titles[i - 1]);
207 	}
208 }
209 
210 
211 static bool
212 single_case(const char *string, bool &upper, bool &first)
213 {
214 	if (string == NULL)
215 		return true;
216 
217 	while (string[0]) {
218 		while (!isalpha(string[0])) {
219 			string++;
220 		}
221 
222 		if (first) {
223 			upper = isupper(string[0]) != 0;
224 			first = false;
225 		} else if ((isupper(string[0]) != 0) ^ upper)
226 			return false;
227 
228 		string++;
229 	}
230 
231 	return true;
232 }
233 
234 
235 static void
236 capitalize_string(char *string)
237 {
238 	if (string == NULL)
239 		return;
240 
241 	bool newWord = isalpha(string[0]) || isspace(string[0]);
242 	while (string[0]) {
243 		if (isalpha(string[0])) {
244 			if (newWord) {
245 				string[0] = toupper(string[0]);
246 				newWord = false;
247 			} else
248 				string[0] = tolower(string[0]);
249 		} else if (string[0] != '\'')
250 			newWord = true;
251 
252 		string++;
253 	}
254 }
255 
256 
257 static void
258 correct_case(cdtext &text)
259 {
260 	// check if all titles share a single case
261 	bool first = true;
262 	bool upper;
263 	if (!single_case(text.album, upper, first)
264 		|| !single_case(text.artist, upper, first))
265 		return;
266 
267 	for (int32 i = 0; i < text.track_count; i++) {
268 		if (!single_case(text.titles[i], upper, first)
269 			|| !single_case(text.artists[i], upper, first))
270 			return;
271 	}
272 
273 	// If we get here, everything has a single case; we fix that
274 	// and capitalize each word
275 
276 	capitalize_string(text.album);
277 	capitalize_string(text.artist);
278 	for (int32 i = 0; i < text.track_count; i++) {
279 		capitalize_string(text.titles[i]);
280 		capitalize_string(text.artists[i]);
281 	}
282 }
283 
284 
285 //	#pragma mark - CD-Text
286 
287 
288 cdtext::cdtext()
289 	:
290 	artist(NULL),
291 	album(NULL),
292 	genre(NULL),
293 	track_count(0)
294 {
295 	memset(titles, 0, sizeof(titles));
296 	memset(artists, 0, sizeof(artists));
297 }
298 
299 
300 cdtext::~cdtext()
301 {
302 	free(album);
303 	free(artist);
304 	free(genre);
305 
306 	for (uint8 i = 0; i < track_count; i++) {
307 		free(titles[i]);
308 		free(artists[i]);
309 	}
310 }
311 
312 
313 static bool
314 is_string_id(uint8 id)
315 {
316 	return id >= kTrackID && id <= kMessageID;
317 }
318 
319 
320 /*!	Parses a \a pack data into the provided text buffer; the corresponding
321 	track number will be left in \a track, and the type of the data in \a id.
322 	The pack data is explained in SCSI MMC-3.
323 
324 	\a id, \a track, and \a state must stay constant between calls to this
325 	function. \a state must be initialized to zero for the first call.
326 */
327 static bool
328 parse_pack_data(cdtext_pack_data *&pack, uint32 &packLeft,
329 	cdtext_pack_data *&lastPack, uint8 &id, uint8 &track, uint8 &state,
330 	char *buffer, size_t &length)
331 {
332 	if (packLeft < sizeof(cdtext_pack_data))
333 		return false;
334 
335 	uint8 number = pack->number;
336 	size_t size = length;
337 
338 	if (state != 0) {
339 		// we had a terminated string and a missing track
340 		track++;
341 
342 		memcpy(buffer, lastPack->text + state, 12 - state);
343 		if (pack->track - track == 1)
344 			state = 0;
345 		else
346 			state += strnlen(buffer, 12 - state);
347 		return true;
348 	}
349 
350 	id = pack->id;
351 	track = pack->track;
352 
353 	buffer[0] = '\0';
354 	length = 0;
355 
356 	size_t position = pack->character_position;
357 	if (position > 0 && lastPack != NULL) {
358 		memcpy(buffer, &lastPack->text[12 - position], position);
359 		length = position;
360 	}
361 
362 	while (id == pack->id && track == pack->track) {
363 #if 0
364 		dprintf("%u.%u.%u, %u.%u.%u, ", pack->id, pack->track, pack->number,
365 			pack->double_byte, pack->block_number, pack->character_position);
366 		for (int32 i = 0; i < 12; i++) {
367 			if (isprint(pack->text[i]))
368 				dprintf("%c", pack->text[i]);
369 			else
370 				dprintf("-");
371 		}
372 		dprintf("\n");
373 #endif
374 		if (is_string_id(id)) {
375 			// TODO: support double byte characters
376 			if (length + 12 < size) {
377 				memcpy(buffer + length, pack->text, 12);
378 				length += 12;
379 			}
380 		}
381 
382 		packLeft -= sizeof(cdtext_pack_data);
383 		if (packLeft < sizeof(cdtext_pack_data))
384 			return false;
385 
386 		lastPack = pack;
387 		number++;
388 		pack++;
389 
390 		if (pack->number != number)
391 			return false;
392 	}
393 
394 	if (id == pack->id) {
395 		length -= pack->character_position;
396 		if (length >= size)
397 			length = size - 1;
398 		buffer[length] = '\0';
399 
400 		if (pack->track > lastPack->track + 1) {
401 			// there is a missing track
402 			for (int32 i = 0; i < 12; i++) {
403 				if (lastPack->text[i] == '\0') {
404 					state = i + (lastPack->double_byte ? 2 : 1);
405 					break;
406 				}
407 			}
408 		}
409 	}
410 
411 	return true;
412 }
413 
414 
415 static void
416 dump_cdtext(cdtext &text)
417 {
418 	if (text.album)
419 		dprintf("Album:    \"%s\"\n", text.album);
420 	if (text.artist)
421 		dprintf("Artist:   \"%s\"\n", text.artist);
422 	for (uint8 i = 0; i < text.track_count; i++) {
423 		dprintf("Track %02u: \"%s\"%s%s%s\n", i + 1, text.titles[i],
424 			text.artists[i] ? " (" : "", text.artists[i] ? text.artists[i] : "",
425 			text.artists[i] ? ")" : "");
426 	}
427 }
428 
429 
430 static void
431 dump_toc(scsi_toc_toc *toc)
432 {
433 	int32 numTracks = toc->last_track + 1 - toc->first_track;
434 
435 	for (int32 i = 0; i < numTracks; i++) {
436 		scsi_toc_track& track = toc->tracks[i];
437 		scsi_cd_msf& next = toc->tracks[i + 1].start.time;
438 			// the last track is always lead-out
439 		scsi_cd_msf& start = toc->tracks[i].start.time;
440 		scsi_cd_msf length;
441 
442 		uint64 diff = next.minute * kFramesPerMinute
443 			+ next.second * kFramesPerSecond + next.frame
444 			- start.minute * kFramesPerMinute
445 			- start.second * kFramesPerSecond - start.frame;
446 		length.minute = diff / kFramesPerMinute;
447 		length.second = (diff % kFramesPerMinute) / kFramesPerSecond;
448 		length.frame = diff % kFramesPerSecond;
449 
450 		dprintf("%02u. %02u:%02u.%02u (length %02u:%02u.%02u)\n",
451 			track.track_number, start.minute, start.second, start.frame,
452 			length.minute, length.second, length.frame);
453 	}
454 }
455 
456 
457 static status_t
458 read_frames(int fd, off_t firstFrame, uint8 *buffer, size_t count)
459 {
460 	size_t framesLeft = count;
461 
462 	while (framesLeft > 0) {
463 		scsi_read_cd read;
464 		read.start_m = firstFrame / kFramesPerMinute;
465 		read.start_s = (firstFrame / kFramesPerSecond) % 60;
466 		read.start_f = firstFrame % kFramesPerSecond;
467 
468 		read.length_m = count / kFramesPerMinute;
469 		read.length_s = (count / kFramesPerSecond) % 60;
470 		read.length_f = count % kFramesPerSecond;
471 
472 		read.buffer_length = count * kFrameSize;
473 		read.buffer = (char *)buffer;
474 		read.play = false;
475 
476 		if (ioctl(fd, B_SCSI_READ_CD, &read) < 0) {
477 			// drive couldn't read data - try again to read with a smaller block size
478 			if (count == 1)
479 				return errno;
480 
481 			if (count >= 32)
482 				count = 8;
483 			else
484 				count = 1;
485 			continue;
486 		}
487 
488 		buffer += count * kFrameSize;
489 		framesLeft -= count;
490 		firstFrame += count;
491 	}
492 
493 	return B_OK;
494 }
495 
496 
497 static status_t
498 read_table_of_contents(int fd, uint32 track, uint8 format, uint8 *buffer,
499 	size_t bufferSize)
500 {
501 	raw_device_command raw;
502 	uint8 *senseData = (uint8 *)malloc(kSenseSize);
503 	if (senseData == NULL)
504 		return B_NO_MEMORY;
505 
506 	memset(&raw, 0, sizeof(raw_device_command));
507 	memset(senseData, 0, kSenseSize);
508 	memset(buffer, 0, bufferSize);
509 
510 	scsi_cmd_read_toc &toc = *(scsi_cmd_read_toc*)&raw.command;
511 	toc.opcode = SCSI_OP_READ_TOC;
512 	toc.time = 1;
513 	toc.format = format;
514 	toc.track = track;
515 	toc.allocation_length = B_HOST_TO_BENDIAN_INT16(bufferSize);
516 
517 	raw.command_length = 10;
518 	raw.flags = B_RAW_DEVICE_DATA_IN | B_RAW_DEVICE_REPORT_RESIDUAL
519 		| B_RAW_DEVICE_SHORT_READ_VALID;
520 	raw.scsi_status = 0;
521 	raw.cam_status = 0;
522 	raw.data = buffer;
523 	raw.data_length = bufferSize;
524 	raw.timeout = 10000000LL;	// 10 secs
525 	raw.sense_data = senseData;
526 	raw.sense_data_length = sizeof(kSenseSize);
527 
528 	if (ioctl(fd, B_RAW_DEVICE_COMMAND, &raw) == 0
529 		&& raw.scsi_status == 0 && raw.cam_status == 1) {
530 		free(senseData);
531 		return B_OK;
532 	}
533 
534 	free(senseData);
535 	return B_ERROR;
536 }
537 
538 
539 //	#pragma mark - exported functions
540 
541 
542 status_t
543 read_cdtext(int fd, struct cdtext &cdtext)
544 {
545 	uint8 *buffer = (uint8 *)malloc(kBufferSize);
546 	if (buffer == NULL)
547 		return B_NO_MEMORY;
548 
549 	// do it twice, just in case...
550 	// (at least my CD-ROM sometimes returned broken data on first try)
551 	read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_CD_TEXT, buffer,
552 		kBufferSize);
553 	if (read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_CD_TEXT, buffer,
554 			kBufferSize) != B_OK) {
555 		free(buffer);
556 		return B_ERROR;
557 	}
558 
559 	scsi_toc_general *header = (scsi_toc_general *)buffer;
560 
561 	uint32 packLength = B_BENDIAN_TO_HOST_INT16(header->data_length) - 2;
562 	cdtext_pack_data *pack = (cdtext_pack_data *)(header + 1);
563 	cdtext_pack_data *lastPack = NULL;
564 	uint8 state = 0;
565 	uint8 track = 0;
566 	uint8 id = 0;
567 	char text[256];
568 
569 	// TODO: determine encoding!
570 
571 	while (true) {
572 		size_t length = sizeof(text);
573 
574 		if (!parse_pack_data(pack, packLength, lastPack, id, track,
575 				state, text, length))
576 			break;
577 
578 		switch (id) {
579 			case kTrackID:
580 				if (track == 0) {
581 					if (cdtext.album == NULL)
582 						cdtext.album = to_utf8(text);
583 				} else if (track <= kMaxTracks) {
584 					if (cdtext.titles[track - 1] == NULL)
585 						cdtext.titles[track - 1] = to_utf8(text);
586 					if (track > cdtext.track_count)
587 						cdtext.track_count = track;
588 				}
589 				break;
590 
591 			case kArtistID:
592 				if (track == 0) {
593 					if (cdtext.artist == NULL)
594 						cdtext.artist = to_utf8(text);
595 				} else if (track <= kMaxTracks) {
596 					if (cdtext.artists[track - 1] == NULL)
597 						cdtext.artists[track - 1] = to_utf8(text);
598 				}
599 				break;
600 
601 			default:
602 				if (is_string_id(id))
603 					dprintf("UNKNOWN %u: \"%s\"\n", id, text);
604 				break;
605 		}
606 	}
607 
608 	free(buffer);
609 
610 	if (cdtext.artist == NULL && cdtext.album == NULL)
611 		return B_ERROR;
612 
613 	for (int i = 0; i < cdtext.track_count; i++) {
614 		if (cdtext.titles[i] == NULL)
615 			return B_ERROR;
616 	}
617 
618 	sanitize_string(cdtext.artist);
619 	sanitize_album(cdtext);
620 	sanitize_titles(cdtext);
621 	correct_case(cdtext);
622 
623 	dump_cdtext(cdtext);
624 	return B_OK;
625 }
626 
627 
628 status_t
629 read_table_of_contents(int fd, scsi_toc_toc *toc, size_t length)
630 {
631 	status_t status = read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_TOC,
632 		(uint8*)toc, length);
633 	if (status < B_OK)
634 		return status;
635 
636 	// make sure the values in the TOC make sense
637 
638 	int32 lastTrack = toc->last_track + 1 - toc->first_track;
639 	size_t dataLength = B_BENDIAN_TO_HOST_INT16(toc->data_length) + 2;
640 	if (dataLength < sizeof(scsi_toc_toc) || lastTrack <= 0)
641 		return B_BAD_DATA;
642 
643 	if (length > dataLength)
644 		length = dataLength;
645 
646 	length -= sizeof(scsi_toc_general);
647 
648 	if (lastTrack * sizeof(scsi_toc_track) > length)
649 		toc->last_track = length / sizeof(scsi_toc_track) + toc->first_track;
650 
651 	dump_toc(toc);
652 	return B_OK;
653 }
654 
655 
656 status_t
657 read_cdda_data(int fd, off_t endFrame, off_t offset, void *data, size_t length,
658 	off_t bufferOffset, void *buffer, size_t bufferSize)
659 {
660 	if (bufferOffset >= 0 && bufferOffset <= offset + (off_t)length
661 		&& bufferOffset + (off_t)bufferSize > offset) {
662 		if (offset >= bufferOffset) {
663 			// buffer reaches into the beginning of the request
664 			off_t dataOffset = offset - bufferOffset;
665 			size_t bytes = min_c(bufferSize - dataOffset, length);
666 			if (user_memcpy(data, (uint8 *)buffer + dataOffset, bytes) < B_OK)
667 				return B_BAD_ADDRESS;
668 
669 			data = (void *)((uint8 *)data + bytes);
670 			length -= bytes;
671 			offset += bytes;
672 		} else if (offset < bufferOffset
673 			&& offset + length < bufferOffset + bufferSize) {
674 			// buffer overlaps at the end of the request
675 			off_t dataOffset = bufferOffset - offset;
676 			size_t bytes = length - dataOffset;
677 			if (user_memcpy((uint8 *)data + dataOffset, buffer, bytes) < B_OK)
678 				return B_BAD_ADDRESS;
679 
680 			length -= bytes;
681 		}
682 		// we don't handle the case where we would need to split the request
683 	}
684 
685 	while (length > 0) {
686 		off_t frame = offset / kFrameSize;
687 		uint32 count = bufferSize / kFrameSize;
688 		if (frame + count > endFrame)
689 			count = endFrame - frame;
690 
691 		status_t status = read_frames(fd, frame, (uint8 *)buffer, count);
692 		if (status < B_OK)
693 			return status;
694 
695 		off_t dataOffset = offset % kFrameSize;
696 		size_t bytes = bufferSize - dataOffset;
697 		if (bytes > length)
698 			bytes = length;
699 
700 		if (user_memcpy(data, (uint8 *)buffer + dataOffset, bytes) < B_OK)
701 			return B_BAD_ADDRESS;
702 
703 		data = (void *)((uint8 *)data + bytes);
704 		length -= bytes;
705 		offset += bytes;
706 	}
707 
708 	return B_OK;
709 }
710