xref: /haiku/src/add-ons/kernel/file_systems/cdda/cdda.cpp (revision 820dca4df6c7bf955c46e8f6521b9408f50b2900)
1 /*
2  * Copyright 2007-2010, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "cdda.h"
8 
9 #include <KernelExport.h>
10 #include <device/scsi.h>
11 
12 #include <ctype.h>
13 #include <errno.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 
18 struct cdtext_pack_data {
19 	uint8	id;
20 	uint8	track;
21 	uint8	number;
22 	uint8	character_position : 4;
23 	uint8	block_number : 3;
24 	uint8	double_byte : 1;
25 	char	text[12];
26 	uint8	crc[2];
27 } _PACKED;
28 
29 enum {
30 	kTrackID	= 0x80,
31 	kArtistID	= 0x81,
32 	kMessageID	= 0x85,
33 };
34 
35 static const uint32 kBufferSize = 16384;
36 static const uint32 kSenseSize = 1024;
37 
38 
39 //	#pragma mark - string functions
40 
41 
42 static char *
43 copy_string(const char *string)
44 {
45 	if (string == NULL || !string[0])
46 		return NULL;
47 
48 	return strdup(string);
49 }
50 
51 
52 static char *
53 to_utf8(const char* string)
54 {
55 	char buffer[256];
56 	size_t out = 0;
57 
58 	// TODO: assume ISO-8859-1 character set for now
59 	while (uint32 c = (uint8)string[0]) {
60 		if (out == sizeof(buffer) - 1)
61 			break;
62 
63 		if (c < 0x80)
64 			buffer[out++] = c;
65 		else if (c < 0x800) {
66 			buffer[out++] = 0xc0 | (c >> 6);
67 			buffer[out++] = 0x80 | (c & 0x3f);
68 		}
69 
70 		string++;
71 	}
72 	buffer[out++] = '\0';
73 
74 	char *copy = (char *)malloc(out);
75 	if (copy == NULL)
76 		return NULL;
77 
78 	memcpy(copy, buffer, out);
79 	return copy;
80 }
81 
82 
83 static bool
84 is_garbage(char c)
85 {
86 	return isspace(c) || c == '-' || c == '/' || c == '\\';
87 }
88 
89 
90 static void
91 sanitize_string(char *&string)
92 {
93 	if (string == NULL)
94 		return;
95 
96 	// strip garbage at the start
97 
98 	uint32 length = strlen(string);
99 	uint32 garbage = 0;
100 	while (is_garbage(string[garbage])) {
101 		garbage++;
102 	}
103 
104 	length -= garbage;
105 	if (garbage)
106 		memmove(string, string + garbage, length + 1);
107 
108 	// strip garbage from the end
109 
110 	while (length > 1 && isspace(string[length - 1])) {
111 		string[--length] = '\0';
112 	}
113 
114 	if (!string[0]) {
115 		// free string if it's empty
116 		free(string);
117 		string = NULL;
118 	}
119 }
120 
121 
122 //! Finds the first occurrence of \a find in \a string, ignores case.
123 static char*
124 find_string(const char *string, const char *find)
125 {
126 	if (string == NULL || find == NULL)
127 		return NULL;
128 
129 	char first = tolower(find[0]);
130 	if (first == '\0')
131 		return (char *)string;
132 
133 	int32 findLength = strlen(find) - 1;
134 	find++;
135 
136 	for (; string[0]; string++) {
137 		if (tolower(string[0]) != first)
138 			continue;
139 		if (strncasecmp(string + 1, find, findLength) == 0)
140 			return (char *)string;
141 	}
142 
143 	return NULL;
144 }
145 
146 
147 static void
148 cut_string(char *string, const char *cut)
149 {
150 	if (string == NULL || cut == NULL)
151 		return;
152 
153 	char *found = find_string(string, cut);
154 	if (found != NULL) {
155 		uint32 foundLength = strlen(found);
156 		uint32 cutLength = strlen(cut);
157 		memmove(found, found + cutLength, foundLength + 1 - cutLength);
158 	}
159 }
160 
161 
162 static void
163 sanitize_album(cdtext &text)
164 {
165 	cut_string(text.album, text.artist);
166 	sanitize_string(text.album);
167 
168 	if (text.album != NULL && !strcasecmp(text.album, "My CD")) {
169 		// don't laugh, people really do that!
170 		free(text.album);
171 		text.album = NULL;
172 	}
173 
174 	if ((text.artist == NULL || text.artist[0] == '\0') && text.album != NULL) {
175 		// try to extract artist from album
176 		char *space = strstr(text.album, "  ");
177 		if (space != NULL) {
178 			space[0] = '\0';
179 			text.artist = text.album;
180 			text.album = copy_string(space + 2);
181 
182 			sanitize_string(text.artist);
183 			sanitize_string(text.album);
184 		}
185 	}
186 }
187 
188 
189 static void
190 sanitize_titles(cdtext &text)
191 {
192 	for (uint8 i = 0; i < text.track_count; i++) {
193 		cut_string(text.titles[i], "(Album Version)");
194 		sanitize_string(text.titles[i]);
195 		sanitize_string(text.artists[i]);
196 
197 		if (text.artists[i] != NULL && text.artist != NULL
198 			&& !strcasecmp(text.artists[i], text.artist)) {
199 			// if the title artist is the same as the main artist, remove it
200 			free(text.artists[i]);
201 			text.artists[i] = NULL;
202 		}
203 
204 		if (text.titles[i] != NULL && text.titles[i][0] == '\t' && i > 0)
205 			text.titles[i] = copy_string(text.titles[i - 1]);
206 	}
207 }
208 
209 
210 static bool
211 single_case(const char *string, bool &upper, bool &first)
212 {
213 	if (string == NULL)
214 		return true;
215 
216 	while (string[0]) {
217 		while (!isalpha(string[0])) {
218 			string++;
219 		}
220 
221 		if (first) {
222 			upper = isupper(string[0]) != 0;
223 			first = false;
224 		} else if ((isupper(string[0]) != 0) ^ upper)
225 			return false;
226 
227 		string++;
228 	}
229 
230 	return true;
231 }
232 
233 
234 static void
235 capitalize_string(char *string)
236 {
237 	if (string == NULL)
238 		return;
239 
240 	bool newWord = isalpha(string[0]) || isspace(string[0]);
241 	while (string[0]) {
242 		if (isalpha(string[0])) {
243 			if (newWord) {
244 				string[0] = toupper(string[0]);
245 				newWord = false;
246 			} else
247 				string[0] = tolower(string[0]);
248 		} else if (string[0] != '\'')
249 			newWord = true;
250 
251 		string++;
252 	}
253 }
254 
255 
256 static void
257 correct_case(cdtext &text)
258 {
259 	// check if all titles share a single case
260 	bool first = true;
261 	bool upper;
262 	if (!single_case(text.album, upper, first)
263 		|| !single_case(text.artist, upper, first))
264 		return;
265 
266 	for (int32 i = 0; i < text.track_count; i++) {
267 		if (!single_case(text.titles[i], upper, first)
268 			|| !single_case(text.artists[i], upper, first))
269 			return;
270 	}
271 
272 	// If we get here, everything has a single case; we fix that
273 	// and capitalize each word
274 
275 	capitalize_string(text.album);
276 	capitalize_string(text.artist);
277 	for (int32 i = 0; i < text.track_count; i++) {
278 		capitalize_string(text.titles[i]);
279 		capitalize_string(text.artists[i]);
280 	}
281 }
282 
283 
284 //	#pragma mark - CD-Text
285 
286 
287 cdtext::cdtext()
288 	:
289 	artist(NULL),
290 	album(NULL),
291 	genre(NULL),
292 	track_count(0)
293 {
294 	memset(titles, 0, sizeof(titles));
295 	memset(artists, 0, sizeof(artists));
296 }
297 
298 
299 cdtext::~cdtext()
300 {
301 	free(album);
302 	free(artist);
303 	free(genre);
304 
305 	for (uint8 i = 0; i < track_count; i++) {
306 		free(titles[i]);
307 		free(artists[i]);
308 	}
309 }
310 
311 
312 static bool
313 is_string_id(uint8 id)
314 {
315 	return id >= kTrackID && id <= kMessageID;
316 }
317 
318 
319 /*!	Parses a \a pack data into the provided text buffer; the corresponding
320 	track number will be left in \a track, and the type of the data in \a id.
321 	The pack data is explained in SCSI MMC-3.
322 
323 	\a id, \a track, and \a state must stay constant between calls to this
324 	function. \a state must be initialized to zero for the first call.
325 */
326 static bool
327 parse_pack_data(cdtext_pack_data *&pack, uint32 &packLeft,
328 	cdtext_pack_data *&lastPack, uint8 &id, uint8 &track, uint8 &state,
329 	char *buffer, size_t &length)
330 {
331 	if (packLeft < sizeof(cdtext_pack_data))
332 		return false;
333 
334 	uint8 number = pack->number;
335 	size_t size = length;
336 
337 	if (state != 0) {
338 		// we had a terminated string and a missing track
339 		track++;
340 
341 		memcpy(buffer, lastPack->text + state, 12 - state);
342 		if (pack->track - track == 1)
343 			state = 0;
344 		else
345 			state += strnlen(buffer, 12 - state);
346 		return true;
347 	}
348 
349 	id = pack->id;
350 	track = pack->track;
351 
352 	buffer[0] = '\0';
353 	length = 0;
354 
355 	size_t position = pack->character_position;
356 	if (position > 0 && lastPack != NULL) {
357 		memcpy(buffer, &lastPack->text[12 - position], position);
358 		length = position;
359 	}
360 
361 	while (id == pack->id && track == pack->track) {
362 #if 0
363 		dprintf("%u.%u.%u, %u.%u.%u, ", pack->id, pack->track, pack->number,
364 			pack->double_byte, pack->block_number, pack->character_position);
365 		for (int32 i = 0; i < 12; i++) {
366 			if (isprint(pack->text[i]))
367 				dprintf("%c", pack->text[i]);
368 			else
369 				dprintf("-");
370 		}
371 		dprintf("\n");
372 #endif
373 		if (is_string_id(id)) {
374 			// TODO: support double byte characters
375 			if (length + 12 < size) {
376 				memcpy(buffer + length, pack->text, 12);
377 				length += 12;
378 			}
379 		}
380 
381 		packLeft -= sizeof(cdtext_pack_data);
382 		if (packLeft < sizeof(cdtext_pack_data))
383 			return false;
384 
385 		lastPack = pack;
386 		number++;
387 		pack++;
388 
389 		if (pack->number != number)
390 			return false;
391 	}
392 
393 	if (id == pack->id) {
394 		length -= pack->character_position;
395 		if (length >= size)
396 			length = size - 1;
397 		buffer[length] = '\0';
398 
399 		if (pack->track > lastPack->track + 1) {
400 			// there is a missing track
401 			for (int32 i = 0; i < 12; i++) {
402 				if (lastPack->text[i] == '\0') {
403 					state = i + (lastPack->double_byte ? 2 : 1);
404 					break;
405 				}
406 			}
407 		}
408 	}
409 
410 	return true;
411 }
412 
413 
414 static void
415 dump_cdtext(cdtext &text)
416 {
417 	if (text.album)
418 		dprintf("Album:    \"%s\"\n", text.album);
419 	if (text.artist)
420 		dprintf("Artist:   \"%s\"\n", text.artist);
421 	for (uint8 i = 0; i < text.track_count; i++) {
422 		dprintf("Track %02u: \"%s\"%s%s%s\n", i + 1, text.titles[i],
423 			text.artists[i] ? " (" : "", text.artists[i] ? text.artists[i] : "",
424 			text.artists[i] ? ")" : "");
425 	}
426 }
427 
428 
429 static void
430 dump_toc(scsi_toc_toc *toc)
431 {
432 	int32 numTracks = toc->last_track + 1 - toc->first_track;
433 
434 	for (int32 i = 0; i < numTracks; i++) {
435 		scsi_toc_track& track = toc->tracks[i];
436 		scsi_cd_msf& next = toc->tracks[i + 1].start.time;
437 			// the last track is always lead-out
438 		scsi_cd_msf& start = toc->tracks[i].start.time;
439 		scsi_cd_msf length;
440 
441 		uint64 diff = next.minute * kFramesPerMinute
442 			+ next.second * kFramesPerSecond + next.frame
443 			- start.minute * kFramesPerMinute
444 			- start.second * kFramesPerSecond - start.frame;
445 		length.minute = diff / kFramesPerMinute;
446 		length.second = (diff % kFramesPerMinute) / kFramesPerSecond;
447 		length.frame = diff % kFramesPerSecond;
448 
449 		dprintf("%02u. %02u:%02u.%02u (length %02u:%02u.%02u)\n",
450 			track.track_number, start.minute, start.second, start.frame,
451 			length.minute, length.second, length.frame);
452 	}
453 }
454 
455 
456 static status_t
457 read_frames(int fd, off_t firstFrame, uint8 *buffer, size_t count)
458 {
459 	size_t framesLeft = count;
460 
461 	while (framesLeft > 0) {
462 		scsi_read_cd read;
463 		read.start_m = firstFrame / kFramesPerMinute;
464 		read.start_s = (firstFrame / kFramesPerSecond) % 60;
465 		read.start_f = firstFrame % kFramesPerSecond;
466 
467 		read.length_m = count / kFramesPerMinute;
468 		read.length_s = (count / kFramesPerSecond) % 60;
469 		read.length_f = count % kFramesPerSecond;
470 
471 		read.buffer_length = count * kFrameSize;
472 		read.buffer = (char *)buffer;
473 		read.play = false;
474 
475 		if (ioctl(fd, B_SCSI_READ_CD, &read) < 0) {
476 			// drive couldn't read data - try again to read with a smaller block size
477 			if (count == 1)
478 				return errno;
479 
480 			if (count >= 32)
481 				count = 8;
482 			else
483 				count = 1;
484 			continue;
485 		}
486 
487 		buffer += count * kFrameSize;
488 		framesLeft -= count;
489 		firstFrame += count;
490 	}
491 
492 	return B_OK;
493 }
494 
495 
496 static status_t
497 read_table_of_contents(int fd, uint32 track, uint8 format, uint8 *buffer,
498 	size_t bufferSize)
499 {
500 	raw_device_command raw;
501 	uint8 *senseData = (uint8 *)malloc(kSenseSize);
502 	if (senseData == NULL)
503 		return B_NO_MEMORY;
504 
505 	memset(&raw, 0, sizeof(raw_device_command));
506 	memset(senseData, 0, kSenseSize);
507 	memset(buffer, 0, bufferSize);
508 
509 	scsi_cmd_read_toc &toc = *(scsi_cmd_read_toc*)&raw.command;
510 	toc.opcode = SCSI_OP_READ_TOC;
511 	toc.time = 1;
512 	toc.format = format;
513 	toc.track = track;
514 	toc.allocation_length = B_HOST_TO_BENDIAN_INT16(bufferSize);
515 
516 	raw.command_length = 10;
517 	raw.flags = B_RAW_DEVICE_DATA_IN | B_RAW_DEVICE_REPORT_RESIDUAL
518 		| B_RAW_DEVICE_SHORT_READ_VALID;
519 	raw.scsi_status = 0;
520 	raw.cam_status = 0;
521 	raw.data = buffer;
522 	raw.data_length = bufferSize;
523 	raw.timeout = 10000000LL;	// 10 secs
524 	raw.sense_data = senseData;
525 	raw.sense_data_length = sizeof(kSenseSize);
526 
527 	if (ioctl(fd, B_RAW_DEVICE_COMMAND, &raw) == 0
528 		&& raw.scsi_status == 0 && raw.cam_status == 1) {
529 		free(senseData);
530 		return B_OK;
531 	}
532 
533 	free(senseData);
534 	return B_ERROR;
535 }
536 
537 
538 //	#pragma mark - exported functions
539 
540 
541 status_t
542 read_cdtext(int fd, struct cdtext &cdtext)
543 {
544 	uint8 *buffer = (uint8 *)malloc(kBufferSize);
545 	if (buffer == NULL)
546 		return B_NO_MEMORY;
547 
548 	// do it twice, just in case...
549 	// (at least my CD-ROM sometimes returned broken data on first try)
550 	read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_CD_TEXT, buffer,
551 		kBufferSize);
552 	if (read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_CD_TEXT, buffer,
553 			kBufferSize) != B_OK) {
554 		free(buffer);
555 		return B_ERROR;
556 	}
557 
558 	scsi_toc_general *header = (scsi_toc_general *)buffer;
559 
560 	size_t packLength = B_BENDIAN_TO_HOST_INT16(header->data_length) - 2;
561 	cdtext_pack_data *pack = (cdtext_pack_data *)(header + 1);
562 	cdtext_pack_data *lastPack = NULL;
563 	uint8 state = 0;
564 	uint8 track = 0;
565 	uint8 id = 0;
566 	char text[256];
567 
568 	// TODO: determine encoding!
569 
570 	while (true) {
571 		size_t length = sizeof(text);
572 
573 		if (!parse_pack_data(pack, packLength, lastPack, id, track,
574 				state, text, length))
575 			break;
576 
577 		switch (id) {
578 			case kTrackID:
579 				if (track == 0) {
580 					if (cdtext.album == NULL)
581 						cdtext.album = to_utf8(text);
582 				} else if (track <= kMaxTracks) {
583 					if (cdtext.titles[track - 1] == NULL)
584 						cdtext.titles[track - 1] = to_utf8(text);
585 					if (track > cdtext.track_count)
586 						cdtext.track_count = track;
587 				}
588 				break;
589 
590 			case kArtistID:
591 				if (track == 0) {
592 					if (cdtext.artist == NULL)
593 						cdtext.artist = to_utf8(text);
594 				} else if (track <= kMaxTracks) {
595 					if (cdtext.artists[track - 1] == NULL)
596 						cdtext.artists[track - 1] = to_utf8(text);
597 				}
598 				break;
599 
600 			default:
601 				if (is_string_id(id))
602 					dprintf("UNKNOWN %u: \"%s\"\n", id, text);
603 				break;
604 		}
605 	}
606 
607 	free(buffer);
608 
609 	if (cdtext.artist == NULL && cdtext.album == NULL)
610 		return B_ERROR;
611 
612 	for (int i = 0; i < cdtext.track_count; i++) {
613 		if (cdtext.titles[i] == NULL)
614 			return B_ERROR;
615 	}
616 
617 	sanitize_string(cdtext.artist);
618 	sanitize_album(cdtext);
619 	sanitize_titles(cdtext);
620 	correct_case(cdtext);
621 
622 	dump_cdtext(cdtext);
623 	return B_OK;
624 }
625 
626 
627 status_t
628 read_table_of_contents(int fd, scsi_toc_toc *toc, size_t length)
629 {
630 	status_t status = read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_TOC,
631 		(uint8*)toc, length);
632 	if (status < B_OK)
633 		return status;
634 
635 	// make sure the values in the TOC make sense
636 
637 	int32 lastTrack = toc->last_track + 1 - toc->first_track;
638 	size_t dataLength = B_BENDIAN_TO_HOST_INT16(toc->data_length) + 2;
639 	if (dataLength < sizeof(scsi_toc_toc) || lastTrack <= 0)
640 		return B_BAD_DATA;
641 
642 	if (length > dataLength)
643 		length = dataLength;
644 
645 	length -= sizeof(scsi_toc_general);
646 
647 	if (lastTrack * sizeof(scsi_toc_track) > length)
648 		toc->last_track = length / sizeof(scsi_toc_track) + toc->first_track;
649 
650 	dump_toc(toc);
651 	return B_OK;
652 }
653 
654 
655 status_t
656 read_cdda_data(int fd, off_t endFrame, off_t offset, void *data, size_t length,
657 	off_t bufferOffset, void *buffer, size_t bufferSize)
658 {
659 	if (bufferOffset >= 0 && bufferOffset <= offset + length
660 		&& bufferOffset + bufferSize > offset) {
661 		if (offset >= bufferOffset) {
662 			// buffer reaches into the beginning of the request
663 			off_t dataOffset = offset - bufferOffset;
664 			size_t bytes = min_c(bufferSize - dataOffset, length);
665 			if (user_memcpy(data, (uint8 *)buffer + dataOffset, bytes) < B_OK)
666 				return B_BAD_ADDRESS;
667 
668 			data = (void *)((uint8 *)data + bytes);
669 			length -= bytes;
670 			offset += bytes;
671 		} else if (offset < bufferOffset
672 			&& offset + length < bufferOffset + bufferSize) {
673 			// buffer overlaps at the end of the request
674 			off_t dataOffset = bufferOffset - offset;
675 			size_t bytes = length - dataOffset;
676 			if (user_memcpy((uint8 *)data + dataOffset, buffer, bytes) < B_OK)
677 				return B_BAD_ADDRESS;
678 
679 			length -= bytes;
680 		}
681 		// we don't handle the case where we would need to split the request
682 	}
683 
684 	while (length > 0) {
685 		off_t frame = offset / kFrameSize;
686 		uint32 count = bufferSize / kFrameSize;
687 		if (frame + count > endFrame)
688 			count = endFrame - frame;
689 
690 		status_t status = read_frames(fd, frame, (uint8 *)buffer, count);
691 		if (status < B_OK)
692 			return status;
693 
694 		off_t dataOffset = offset % kFrameSize;
695 		size_t bytes = bufferSize - dataOffset;
696 		if (bytes > length)
697 			bytes = length;
698 
699 		if (user_memcpy(data, (uint8 *)buffer + dataOffset, bytes) < B_OK)
700 			return B_BAD_ADDRESS;
701 
702 		data = (void *)((uint8 *)data + bytes);
703 		length -= bytes;
704 		offset += bytes;
705 	}
706 
707 	return B_OK;
708 }
709