xref: /haiku/src/add-ons/kernel/file_systems/iso9660/iso9660_identify.cpp (revision 2222d0559df303a9846a2fad53741f8b20b14d7c)
1 /*
2  * Copyright 2007-2009, Axel Dörfler, axeld@pinc-software.de.
3  * Copyright 2002, Tyler Dauwalder.
4  *
5  * This file may be used under the terms of the MIT License.
6  */
7 
8 /*!
9 	<h5>iso9660</h5>
10 	The standard to which this module is written is ECMA-119 second
11 	edition, a freely available iso9660 equivalent.
12 
13 	<h5>Joliet</h5>
14 	Joliet support comes courtesy of the following document:
15 
16 	http://www-plateau.cs.berkeley.edu/people/chaffee/jolspec.htm
17 
18 	As specified there, the existence of any of the following escape
19 	sequences in a supplementary volume descriptor's "escape sequences"
20 	field denotes a Joliet volume descriptor using unicode ucs-2
21 	character encoding (2-byte characters, big-endian):
22 
23 	- UCS-2 Level 1: 0x252F40 == "%/@"
24 	- UCS-2 Level 2: 0x252F43 == "%/C"
25 	- UCS-2 Level 3: 0x252F45 == "%/E"
26 
27 	The following UCS-2 characters are considered illegal (we allow them,
28 	printing out a warning if encountered):
29 
30 	- All values between 0x0000 and 0x001f inclusive == control chars
31 	- 0x002A == '*'
32 	- 0x002F == '/'
33 	- 0x003A == ':'
34 	- 0x003B == ';'
35 	- 0x003F == '?'
36 	- 0x005C == '\'
37 */
38 
39 #include "iso9660_identify.h"
40 
41 #ifndef FS_SHELL
42 #	include <errno.h>
43 #	include <stdlib.h>
44 #	include <string.h>
45 #	include <unistd.h>
46 #	include <stdio.h>
47 
48 #	include <ByteOrder.h>
49 #	include <fs_info.h>
50 #	include <KernelExport.h>
51 #endif
52 
53 #include "iso9660.h"
54 
55 //#define TRACE(x) ;
56 #define TRACE(x) dprintf x
57 
58 
59 static const char *kISO9660Signature = "CD001";
60 static const uint32 kVolumeDescriptorLength = 2048;
61 #define ISO9660_VOLUME_IDENTIFIER_LENGTH 32
62 #define ISO9660_ESCAPE_SEQUENCE_LENGTH 32
63 
64 //! Volume descriptor types
65 typedef enum {
66 	ISO9660VD_BOOT,
67 	ISO9660VD_PRIMARY,
68 	ISO9660VD_SUPPLEMENTARY,
69 	ISO9660VD_PARTITION,
70 	ISO9660VD_TERMINATOR = 255
71 } iso9660_descriptor_type;
72 
73 /*! \brief The portion of the volume descriptor common to all
74     descriptor types.
75 */
76 typedef struct iso9660_common_descriptor {
77 	uint8	type;
78 	char	standard_identifier[5];	// should be 'CD001'
79 	uint8	version;
80 	// Remaining bytes are unused
81 } __attribute__((packed)) iso9660_common_volume_descriptor;
82 
83 typedef struct iso9660_volume_descriptor {
84 	iso9660_common_descriptor common;
85 	uint8	flags;
86 	char	system_identifier[32];
87 	char	identifier[ISO9660_VOLUME_IDENTIFIER_LENGTH];
88 	uint8	_reserved0[8];
89 	uint32	size;
90 	uint32	size_big_endian;
91 	char	escape_sequences[ISO9660_ESCAPE_SEQUENCE_LENGTH];
92 				// unused on primary descriptor
93 	uint16	set_size;
94 	uint16	set_size_big_endian;
95 	uint16	sequence_number;
96 	uint16	sequence_number_big_endian;
97 	uint16	logical_block_size;
98 	uint16	logical_block_size_big_endian;
99 	uint32	path_table_size;
100 	uint32	path_table_size_big_endian;
101 	uint32	_reserved1[4];
102 	uint8	root_directory_record[34];
103 	char	set_identifier[28];
104 	// Remaining bytes are disinteresting to us
105 } __attribute__((packed)) iso9660_volume_descriptor;
106 
107 typedef struct iso9660_directory_record {
108 	uint8	length;
109 	uint8	extended_attribute_record_length;
110 	uint32	location;
111 	uint32	location_big_endian;
112 	uint32	data_length;
113 	uint8	_reserved[14];
114 	uint16	volume_space;
115 } __attribute__((packed)) iso9660_directory_record;
116 
117 
118 static void dump_directory_record(iso9660_directory_record *record,
119 	const char *indent);
120 
121 
122 //	#pragma mark -
123 
124 
125 /*! \brief Creates a new iso9660_info struct with empty volume names.
126 
127 	\note Use the applicable set_XYZ_volume_name() functions rather than
128 	messing with the volume name data members directly.
129 */
130 iso9660_info::iso9660_info()
131 	:
132 	iso9660_name(NULL),
133 	joliet_name(NULL)
134 {
135 }
136 
137 
138 iso9660_info::~iso9660_info()
139 {
140 	free(iso9660_name);
141 	free(joliet_name);
142 }
143 
144 
145 /*! \brief Returns true if a valid volume name exists.
146 */
147 bool
148 iso9660_info::IsValid()
149 {
150 	return iso9660_name != NULL || joliet_name != NULL;
151 }
152 
153 
154 /*! \brief Sets the iso9660 volume name.
155 
156 	\param name UTF-8 string containing the name.
157 	\param length The length (in bytes) of the string.
158 */
159 void
160 iso9660_info::SetISO9660Name(const char *name, uint32 length)
161 {
162 	_SetString(&iso9660_name, name, length);
163 }
164 
165 
166 /*! \brief Sets the Joliet volume name.
167 
168 	\param name UTF-8 string containing the name.
169 	\param length The length (in bytes) of the string.
170 */
171 void
172 iso9660_info::SetJolietName(const char *name, uint32 length)
173 {
174 	_SetString(&joliet_name, name, length);
175 }
176 
177 
178 /*! \brief Returns the volume name of highest precedence.
179 
180 	Currently, the ordering is (decreasingly):
181 	- Joliet
182 	- iso9660
183 */
184 const char*
185 iso9660_info::PreferredName()
186 {
187 	if (joliet_name)
188 		return joliet_name;
189 
190 	return iso9660_name;
191 }
192 
193 
194 /*! \brief Copies the given string into the old string, managing memory
195 	deallocation and allocation as necessary.
196 */
197 void
198 iso9660_info::_SetString(char **string, const char *newString,
199 	uint32 newLength)
200 {
201 	if (string == NULL)
202 		return;
203 
204 	TRACE(("iso9660_info::set_string(%p ('%s'), '%s', %u)\n", string,
205 		*string, newString, (unsigned)newLength));
206 
207 	char *&oldString = *string;
208 	free(oldString);
209 
210 	if (newString) {
211 		oldString = (char*)malloc(newLength + 1);
212 		if (oldString != NULL) {
213 			memcpy(oldString, newString, newLength);
214 			oldString[newLength] = '\0';
215 		}
216 	} else
217 		oldString = NULL;
218 }
219 
220 
221 //	#pragma mark - C functions
222 
223 
224 /*! \brief Converts the given unicode character to utf8.
225 */
226 static void
227 unicode_to_utf8(uint32 c, char **out)
228 {
229 	char *s = *out;
230 
231 	if (c < 0x80)
232 		*(s++) = c;
233 	else if (c < 0x800) {
234 		*(s++) = 0xc0 | (c >> 6);
235 		*(s++) = 0x80 | (c & 0x3f);
236 	} else if (c < 0x10000) {
237 		*(s++) = 0xe0 | (c >> 12);
238 		*(s++) = 0x80 | ((c >> 6) & 0x3f);
239 		*(s++) = 0x80 | (c & 0x3f);
240 	} else if (c <= 0x10ffff) {
241 		*(s++) = 0xf0 | (c >> 18);
242 		*(s++) = 0x80 | ((c >> 12) & 0x3f);
243 		*(s++) = 0x80 | ((c >> 6) & 0x3f);
244 		*(s++) = 0x80 | (c & 0x3f);
245 	}
246 	*out = s;
247 }
248 
249 
250 static const char*
251 descriptor_type_to_string(iso9660_descriptor_type type)
252 {
253 	switch (type) {
254 		case ISO9660VD_BOOT:
255 			return "boot";
256 		case ISO9660VD_PRIMARY:
257 			return "primary";
258 		case ISO9660VD_SUPPLEMENTARY:
259 			return "supplementary";
260 		case ISO9660VD_PARTITION:
261 			return "partiton";
262 		case ISO9660VD_TERMINATOR:
263 			return "terminator";
264 		default:
265 			return "invalid";
266 	}
267 }
268 
269 
270 static void
271 dump_common_descriptor(iso9660_common_descriptor *common,
272 	const char *indent, bool printHeader)
273 {
274 	if (printHeader)
275 		TRACE(("%siso9660_common_descriptor:\n", indent));
276 
277 	TRACE(("%s  volume descriptor type: %d (%s)\n", indent,
278 		common->type, descriptor_type_to_string(
279 			(iso9660_descriptor_type)common->type)));
280 	TRACE(("%s  standard identifier:    %.5s (%s)\n", indent,
281 		common->standard_identifier,
282 		strncmp(common->standard_identifier, kISO9660Signature, 5) == 0
283 			? "valid" : "INVALID"));
284 	TRACE(("%s  version:                %d\n", indent, common->version));
285 }
286 
287 
288 static void
289 dump_primary_descriptor(iso9660_volume_descriptor *primary,
290 	const char *indent, bool printHeader)
291 {
292 	if (printHeader)
293 		TRACE(("%siso9660_primary_descriptor:\n", indent));
294 
295 	dump_common_descriptor(&primary->common, indent, false);
296 	TRACE(("%s  identifier:             '%.32s'\n", indent,
297 		primary->identifier));
298 	TRACE(("%s  size:                   %d\n", indent,
299 		(int)B_LENDIAN_TO_HOST_INT32(primary->size)));
300 	TRACE(("%s  set size:               %d\n", indent,
301 		(int)B_LENDIAN_TO_HOST_INT32(primary->set_size)));
302 	TRACE(("%s  sequence number:        %d\n", indent,
303 		(int)B_LENDIAN_TO_HOST_INT32(primary->sequence_number)));
304 	TRACE(("%s  logical block size:     %d\n", indent,
305 		(int)B_LENDIAN_TO_HOST_INT32(primary->logical_block_size)));
306 	TRACE(("%s  path table size:        %d\n", indent,
307 		(int)B_LENDIAN_TO_HOST_INT32(primary->path_table_size)));
308 	TRACE(("%s  set identifier:         %.28s\n", indent,
309 		primary->set_identifier));
310 	dump_directory_record((iso9660_directory_record*)
311 		primary->root_directory_record, indent);
312 }
313 
314 
315 static void
316 dump_supplementary_descriptor(iso9660_volume_descriptor *supplementary,
317 	const char *indent, bool printHeader)
318 {
319 	if (printHeader)
320 		TRACE(("%siso9660_supplementary_descriptor:\n", indent));
321 
322 	dump_primary_descriptor(supplementary, indent, false);
323 	TRACE(("%s  escape sequences:      ", indent));
324 	for (int i = 0; i < ISO9660_ESCAPE_SEQUENCE_LENGTH; i++) {
325 		TRACE((" %2x", supplementary->escape_sequences[i]));
326 		if (i == ISO9660_ESCAPE_SEQUENCE_LENGTH / 2 - 1)
327 			TRACE(("\n                          "));
328 	}
329 	TRACE(("\n"));
330 }
331 
332 
333 static void
334 dump_directory_record(iso9660_directory_record *record, const char *indent)
335 {
336 	TRACE(("%s  root directory record:\n", indent));
337 	TRACE(("%s    length:               %d\n", indent, record->length));
338 	TRACE(("%s    location:             %d\n", indent,
339 		(int)B_LENDIAN_TO_HOST_INT32(record->location)));
340 	TRACE(("%s    data length:          %d\n", indent,
341 		(int)B_LENDIAN_TO_HOST_INT32(record->data_length)));
342 	TRACE(("%s    volume space:         %d\n", indent,
343 		B_LENDIAN_TO_HOST_INT16(record->volume_space)));
344 }
345 
346 
347 static status_t
348 check_common_descriptor(iso9660_common_descriptor *common)
349 {
350 	if (common == NULL)
351 		return B_BAD_VALUE;
352 
353 	return strncmp(common->standard_identifier, kISO9660Signature, 5) == 0
354 		? B_OK : B_BAD_DATA;
355 }
356 
357 
358 //	#pragma mark - Public functions
359 
360 
361 // iso9660_fs_identify
362 /*! \brief Returns true if the given partition is a valid iso9660 partition.
363 
364 	See fs_identify_hook() for more information.
365 
366 	\todo Fill in partitionInfo->mounted_at with something useful.
367 */
368 status_t
369 iso9660_fs_identify(int deviceFD, iso9660_info *info)
370 {
371 	char buffer[ISO_PVD_SIZE];
372 	bool exit = false;
373 	bool found = false;
374 	status_t error = B_OK;
375 
376 	TRACE(("identify(%d, %p)\n", deviceFD, info));
377 	off_t offset = 0x8000;
378 
379 	// Read through the volume descriptors looking for a primary descriptor.
380 	// If for some reason there are more than one primary descriptor, the
381 	// volume name from the last encountered descriptor will be used.
382 	while (!error && !exit) {// && count++ < 10) {
383 		iso9660_common_descriptor *common = NULL;
384 
385 		// Read the block containing the current descriptor
386 		error = read_pos(deviceFD, offset, (void *)&buffer, ISO_PVD_SIZE);
387 		offset += ISO_PVD_SIZE;
388 		if (error < ISO_PVD_SIZE)
389 			break;
390 
391 		common = (iso9660_common_descriptor*)buffer;
392 		error = check_common_descriptor(common);
393 		if (error < B_OK)
394 			break;
395 
396 //		dump_common_descriptor(common, "", true);
397 
398 		// Handle each type of descriptor appropriately
399 		TRACE(("found %s descriptor\n", descriptor_type_to_string(
400 			(iso9660_descriptor_type)common->type)));
401 		found = true;
402 
403 		switch (common->type) {
404 			case ISO9660VD_BOOT:
405 				break;
406 
407 			case ISO9660VD_PRIMARY:
408 			{
409 				iso9660_volume_descriptor *primary
410 					= (iso9660_volume_descriptor*)buffer;
411 				int i;
412 
413 				dump_primary_descriptor(primary, "  ", true);
414 
415 				// Cut off any trailing spaces from the volume id. Note
416 				// that this allows for spaces INSIDE the volume id, even
417 				// though that's not technically allowed by the standard;
418 				// this was necessary to support certain RedHat 6.2 CD-ROMs
419 				// from a certain Linux company who shall remain unnamed. ;-)
420 				for (i = ISO9660_VOLUME_IDENTIFIER_LENGTH - 1; i >= 0;
421 						i--) {
422 					if (primary->identifier[i] != 0x20)
423 						break;
424 				}
425 
426 				// Give a holler if the iso9660 name is already set
427 				if (info->iso9660_name) {
428 					char name[ISO9660_VOLUME_IDENTIFIER_LENGTH + 1];
429 					strlcpy(name, primary->identifier, i + 1);
430 					TRACE(("duplicate iso9660 volume name found, using "
431 						"latter (`%s') instead of former (`%s')\n", name,
432 						info->iso9660_name));
433 				}
434 
435 				info->SetISO9660Name(primary->identifier, i + 1);
436 				info->max_blocks = B_LENDIAN_TO_HOST_INT32(primary->set_size);
437 				break;
438 			}
439 
440 			case ISO9660VD_SUPPLEMENTARY:
441 			{
442 				iso9660_volume_descriptor *supplementary
443 					= (iso9660_volume_descriptor*)buffer;
444 				dump_supplementary_descriptor(supplementary, "  ", true);
445 
446 				// Copy and null terminate the escape sequences
447 				char escapes[ISO9660_ESCAPE_SEQUENCE_LENGTH + 1];
448 				strlcpy(escapes, supplementary->escape_sequences,
449 					ISO9660_ESCAPE_SEQUENCE_LENGTH + 1);
450 
451 				// Check for a Joliet VD
452 				if (strstr(escapes, "%/@") || strstr(escapes, "%/C")
453 					|| strstr(escapes, "%/E")) {
454 					char name[(ISO9660_VOLUME_IDENTIFIER_LENGTH * 3 / 2) + 1];
455 						// Since we're dealing with 16-bit Unicode, each
456 						// UTF-8 sequence will be at most 3 bytes long.
457 					char *pos = name;
458 					uint16 ch;
459 
460 					// Walk thru the unicode volume name, converting to utf8 as we go.
461 					for (int i = 0; (ch = B_BENDIAN_TO_HOST_INT16(
462 								((uint16*)supplementary->identifier)[i]))
463 							&& i < ISO9660_VOLUME_IDENTIFIER_LENGTH; i++) {
464 						// Give a warning if the character is technically
465 						// illegal
466 						if (ch <= 0x001F || ch == '*' || ch == '/'
467 						    || ch == ':' || ch == ';'
468 						    || ch == '?' || ch == '\\') {
469 							TRACE(("warning: illegal Joliet character "
470 								"found: 0%4x\n", ch));
471 						}
472 
473 						// Convert to utf-8
474 						unicode_to_utf8(ch, &pos);
475 					}
476 					pos[0] = '\0';
477 
478 					// Give a holler if the joliet name is already set
479 					if (info->joliet_name) {
480 						TRACE(("duplicate joliet volume name found, using "
481 							"latter (`%s') instead of former (`%s')\n",
482 							name, info->joliet_name));
483 					}
484 
485 					info->SetJolietName(name, pos - name);
486 				}
487 				break;
488 			}
489 
490 			case ISO9660VD_PARTITION:
491 				break;
492 
493 			case ISO9660VD_TERMINATOR:
494 				exit = true;
495 				break;
496 
497 			default:
498 				break;
499 		}
500 	}
501 
502 	return found ? B_OK : error;
503 }
504 
505