xref: /haiku/src/add-ons/kernel/file_systems/iso9660/iso9660_identify.cpp (revision 28143c31d16ed369569743bdb5e6cb4a7ae605d0)
1 /*
2  * Copyright 2007, Axel Dörfler, axeld@pinc-software.de.
3  * Copyright 2002, Tyler Dauwalder.
4  *
5  * This file may be used under the terms of the MIT License.
6  */
7 
8 /*!
9 	<h5>iso9660</h5>
10 	The standard to which this module is written is ECMA-119 second
11 	edition, a freely available iso9660 equivalent.
12 
13 	<h5>Joliet</h5>
14 	Joliet support comes courtesy of the following document:
15 
16 	http://www-plateau.cs.berkeley.edu/people/chaffee/jolspec.htm
17 
18 	As specified there, the existence of any of the following escape
19 	sequences in a supplementary volume descriptor's "escape sequences"
20 	field denotes a Joliet volume descriptor using unicode ucs-2
21 	character encoding (2-byte characters, big-endian):
22 
23 	- UCS-2 Level 1: 0x252F40 == "%/@"
24 	- UCS-2 Level 2: 0x252F43 == "%/C"
25 	- UCS-2 Level 3: 0x252F45 == "%/E"
26 
27 	The following UCS-2 characters are considered illegal (we allow them,
28 	printing out a warning if encountered):
29 
30 	- All values between 0x0000 and 0x001f inclusive == control chars
31 	- 0x002A == '*'
32 	- 0x002F == '/'
33 	- 0x003A == ':'
34 	- 0x003B == ';'
35 	- 0x003F == '?'
36 	- 0x005C == '\'
37 */
38 
39 #include "iso9660_identify.h"
40 
41 #include <errno.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <unistd.h>
45 #include <stdio.h>
46 
47 #include <ByteOrder.h>
48 #include <fs_info.h>
49 #include <KernelExport.h>
50 
51 #include "iso9660.h"
52 
53 //#define TRACE(x) ;
54 #define TRACE(x) dprintf x
55 
56 // misc constants
57 static const char *kISO9660Signature = "CD001";
58 static const uint32 kVolumeDescriptorLength = 2048;
59 #define ISO9660_VOLUME_IDENTIFIER_LENGTH 32
60 #define ISO9660_ESCAPE_SEQUENCE_LENGTH 32
61 
62 //! Volume descriptor types
63 typedef enum {
64 	ISO9660VD_BOOT,
65 	ISO9660VD_PRIMARY,
66 	ISO9660VD_SUPPLEMENTARY,
67 	ISO9660VD_PARTITION,
68 	ISO9660VD_TERMINATOR = 255
69 } iso9660_descriptor_type;
70 
71 /*! \brief The portion of the volume descriptor common to all
72     descriptor types.
73 */
74 typedef struct iso9660_common_descriptor {
75 	uchar	type;
76 	char	standard_identifier[5];	// should be 'CD001'
77 	uchar	version;
78 	// Remaining bytes are unused
79 } __attribute__((packed)) iso9660_common_volume_descriptor;
80 
81 typedef struct iso9660_volume_descriptor {
82 	iso9660_common_descriptor common;
83 	uchar	flags;
84 	char	system_identifier[32];
85 	char	identifier[ISO9660_VOLUME_IDENTIFIER_LENGTH];
86 	uchar	_reserved0[8];
87 	uint32	size;
88 	uint32	size_big_endian;
89 	char	escape_sequences[ISO9660_ESCAPE_SEQUENCE_LENGTH];
90 				// unused on primary descriptor
91 	uint16	set_size;
92 	uint16	set_size_big_endian;
93 	uint16	sequence_number;
94 	uint16	sequence_number_big_endian;
95 	uint16	logical_block_size;
96 	uint16	logical_block_size_big_endian;
97 	uint32	path_table_size;
98 	uint32	path_table_size_big_endian;
99 	uint32	_reserved1[4];
100 	uchar	root_directory_record[34];
101 	char	set_identifier[28];
102 	// Remaining bytes are disinteresting to us
103 } __attribute__((packed)) iso9660_volume_descriptor;
104 
105 typedef struct iso9660_directory_record {
106 	uint8	length;
107 	uint8	extended_attribute_record_length;
108 	uint32	location;
109 	uint32	location_big_endian;
110 	uint32	data_length;
111 	uchar	_reserved[14];
112 	uint16	volume_space;
113 } __attribute__((packed)) iso9660_directory_record;
114 
115 
116 static void dump_directory_record(iso9660_directory_record *record,
117 	const char *indent);
118 
119 
120 //	#pragma mark -
121 
122 
123 /*! \brief Creates a new iso9660_info struct with empty volume names.
124 
125 	\note Use the applicable set_XYZ_volume_name() functions rather than
126 	messing with the volume name data members directly.
127 */
128 iso9660_info::iso9660_info()
129 	:
130 	iso9660_name(NULL),
131 	joliet_name(NULL)
132 {
133 }
134 
135 
136 iso9660_info::~iso9660_info()
137 {
138 	free(iso9660_name);
139 	free(joliet_name);
140 }
141 
142 
143 /*! \brief Returns true if a valid volume name exists.
144 */
145 bool
146 iso9660_info::IsValid()
147 {
148 	return iso9660_name != NULL || joliet_name != NULL;
149 }
150 
151 
152 /*! \brief Sets the iso9660 volume name.
153 
154 	\param name UTF-8 string containing the name.
155 	\param length The length (in bytes) of the string.
156 */
157 void
158 iso9660_info::SetISO9660Name(const char *name, uint32 length)
159 {
160 	_SetString(&iso9660_name, name, length);
161 }
162 
163 
164 /*! \brief Sets the Joliet volume name.
165 
166 	\param name UTF-8 string containing the name.
167 	\param length The length (in bytes) of the string.
168 */
169 void
170 iso9660_info::SetJolietName(const char *name, uint32 length)
171 {
172 	_SetString(&joliet_name, name, length);
173 }
174 
175 
176 /*! \brief Returns the volume name of highest precedence.
177 
178 	Currently, the ordering is (decreasingly):
179 	- Joliet
180 	- iso9660
181 */
182 const char*
183 iso9660_info::PreferredName()
184 {
185 	if (joliet_name)
186 		return joliet_name;
187 
188 	return iso9660_name;
189 }
190 
191 
192 /*! \brief Copies the given string into the old string, managing memory
193 	deallocation and allocation as necessary.
194 */
195 void
196 iso9660_info::_SetString(char **string, const char *newString,
197 	uint32 newLength)
198 {
199 	TRACE(("iso9660_info::set_string(%p ('%s'), '%s', %ld)\n", string,
200 		*string, newString, newLength));
201 	if (string == NULL)
202 		return;
203 
204 	char *&oldString = *string;
205 	free(oldString);
206 
207 	if (newString) {
208 		oldString = (char*)malloc(newLength + 1);
209 		if (oldString != NULL) {
210 			memcpy(oldString, newString, newLength);
211 			oldString[newLength] = '\0';
212 		}
213 	} else
214 		oldString = NULL;
215 }
216 
217 
218 //	#pragma mark - C functions
219 
220 
221 /*! \brief Converts the given unicode character to utf8.
222 */
223 static void
224 unicode_to_utf8(uint32 c, char **out)
225 {
226 	char *s = *out;
227 
228 	if (c < 0x80)
229 		*(s++) = c;
230 	else if (c < 0x800) {
231 		*(s++) = 0xc0 | (c >> 6);
232 		*(s++) = 0x80 | (c & 0x3f);
233 	} else if (c < 0x10000) {
234 		*(s++) = 0xe0 | (c >> 12);
235 		*(s++) = 0x80 | ((c >> 6) & 0x3f);
236 		*(s++) = 0x80 | (c & 0x3f);
237 	} else if (c <= 0x10ffff) {
238 		*(s++) = 0xf0 | (c >> 18);
239 		*(s++) = 0x80 | ((c >> 12) & 0x3f);
240 		*(s++) = 0x80 | ((c >> 6) & 0x3f);
241 		*(s++) = 0x80 | (c & 0x3f);
242 	}
243 	*out = s;
244 }
245 
246 
247 static const char*
248 descriptor_type_to_string(iso9660_descriptor_type type)
249 {
250 	switch (type) {
251 		case ISO9660VD_BOOT:
252 			return "boot";
253 		case ISO9660VD_PRIMARY:
254 			return "primary";
255 		case ISO9660VD_SUPPLEMENTARY:
256 			return "supplementary";
257 		case ISO9660VD_PARTITION:
258 			return "partiton";
259 		case ISO9660VD_TERMINATOR:
260 			return "terminator";
261 		default:
262 			return "invalid";
263 	}
264 }
265 
266 
267 static void
268 dump_common_descriptor(iso9660_common_descriptor *common,
269 	const char *indent, bool printHeader)
270 {
271 	if (printHeader)
272 		TRACE(("%siso9660_common_descriptor:\n", indent));
273 
274 	TRACE(("%s  volume descriptor type: %d (%s)\n", indent,
275 		common->type, descriptor_type_to_string(
276 			(iso9660_descriptor_type)common->type)));
277 	TRACE(("%s  standard identifier:    %.5s (%s)\n", indent,
278 		common->standard_identifier,
279 		strncmp(common->standard_identifier, kISO9660Signature, 5) == 0
280 			? "valid" : "INVALID"));
281 	TRACE(("%s  version:                %d\n", indent, common->version));
282 }
283 
284 
285 static void
286 dump_primary_descriptor(iso9660_volume_descriptor *primary,
287 	const char *indent, bool printHeader)
288 {
289 	if (printHeader)
290 		TRACE(("%siso9660_primary_descriptor:\n", indent));
291 
292 	dump_common_descriptor(&primary->common, indent, false);
293 	TRACE(("%s  identifier:             '%.32s'\n", indent,
294 		primary->identifier));
295 	TRACE(("%s  size:                   %ld\n", indent,
296 		B_LENDIAN_TO_HOST_INT32(primary->size)));
297 	TRACE(("%s  set size:               %ld\n", indent,
298 		B_LENDIAN_TO_HOST_INT32(primary->set_size)));
299 	TRACE(("%s  sequence number:        %ld\n", indent,
300 		B_LENDIAN_TO_HOST_INT32(primary->sequence_number)));
301 	TRACE(("%s  logical block size:     %ld\n", indent,
302 		B_LENDIAN_TO_HOST_INT32(primary->logical_block_size)));
303 	TRACE(("%s  path table size:        %ld\n", indent,
304 		B_LENDIAN_TO_HOST_INT32(primary->path_table_size)));
305 	TRACE(("%s  set identifier:         %.28s\n", indent,
306 		primary->set_identifier));
307 	dump_directory_record((iso9660_directory_record*)
308 		primary->root_directory_record, indent);
309 }
310 
311 
312 static void
313 dump_supplementary_descriptor(iso9660_volume_descriptor *supplementary,
314 	const char *indent, bool printHeader)
315 {
316 	if (printHeader)
317 		TRACE(("%siso9660_supplementary_descriptor:\n", indent));
318 
319 	dump_primary_descriptor(supplementary, indent, false);
320 	TRACE(("%s  escape sequences:      ", indent));
321 	for (int i = 0; i < ISO9660_ESCAPE_SEQUENCE_LENGTH; i++) {
322 		TRACE((" %2x", supplementary->escape_sequences[i]));
323 		if (i == ISO9660_ESCAPE_SEQUENCE_LENGTH / 2 - 1)
324 			TRACE(("\n                          "));
325 	}
326 	TRACE(("\n"));
327 }
328 
329 
330 static void
331 dump_directory_record(iso9660_directory_record *record, const char *indent)
332 {
333 	TRACE(("%s  root directory record:\n", indent));
334 	TRACE(("%s    length:               %d\n", indent, record->length));
335 	TRACE(("%s    location:             %ld\n", indent,
336 		B_LENDIAN_TO_HOST_INT32(record->location)));
337 	TRACE(("%s    data length:          %ld\n", indent,
338 		B_LENDIAN_TO_HOST_INT32(record->data_length)));
339 	TRACE(("%s    volume space:         %d\n", indent,
340 		B_LENDIAN_TO_HOST_INT16(record->volume_space)));
341 }
342 
343 
344 static status_t
345 check_common_descriptor(iso9660_common_descriptor *common)
346 {
347 	if (common == NULL)
348 		return B_BAD_VALUE;
349 
350 	return strncmp(common->standard_identifier, kISO9660Signature, 5) == 0
351 		? B_OK : B_BAD_DATA;
352 }
353 
354 
355 //	#pragma mark - Public functions
356 
357 
358 // iso9660_fs_identify
359 /*! \brief Returns true if the given partition is a valid iso9660 partition.
360 
361 	See fs_identify_hook() for more information.
362 
363 	\todo Fill in partitionInfo->mounted_at with something useful.
364 */
365 status_t
366 iso9660_fs_identify(int deviceFD, iso9660_info *info)
367 {
368 	char buffer[ISO_PVD_SIZE];
369 	bool exit = false;
370 	bool found = false;
371 	status_t error = B_OK;
372 
373 	TRACE(("identify(%d, %p)\n", deviceFD, info));
374 	off_t offset = 0x8000;
375 
376 	// Read through the volume descriptors looking for a primary descriptor.
377 	// If for some reason there are more than one primary descriptor, the
378 	// volume name from the last encountered descriptor will be used.
379 	while (!error && !exit) {// && count++ < 10) {
380 		iso9660_common_descriptor *common = NULL;
381 
382 		// Read the block containing the current descriptor
383 		error = read_pos(deviceFD, offset, (void *)&buffer, ISO_PVD_SIZE);
384 		offset += ISO_PVD_SIZE;
385 		if (error < ISO_PVD_SIZE)
386 			break;
387 
388 		common = (iso9660_common_descriptor*)buffer;
389 		error = check_common_descriptor(common);
390 		if (error < B_OK)
391 			break;
392 
393 //		dump_common_descriptor(common, "", true);
394 
395 		// Handle each type of descriptor appropriately
396 		TRACE(("found %s descriptor\n", descriptor_type_to_string(
397 			(iso9660_descriptor_type)common->type)));
398 		found = true;
399 
400 		switch (common->type) {
401 			case ISO9660VD_BOOT:
402 				break;
403 
404 			case ISO9660VD_PRIMARY:
405 			{
406 				iso9660_volume_descriptor *primary
407 					= (iso9660_volume_descriptor*)buffer;
408 				int i;
409 
410 				dump_primary_descriptor(primary, "  ", true);
411 
412 				// Cut off any trailing spaces from the volume id. Note
413 				// that this allows for spaces INSIDE the volume id, even
414 				// though that's not technically allowed by the standard;
415 				// this was necessary to support certain RedHat 6.2 CD-ROMs
416 				// from a certain Linux company who shall remain unnamed. ;-)
417 				for (i = ISO9660_VOLUME_IDENTIFIER_LENGTH - 1; i >= 0;
418 						i--) {
419 					if (primary->identifier[i] != 0x20)
420 						break;
421 				}
422 
423 				// Give a holler if the iso9660 name is already set
424 				if (info->iso9660_name) {
425 					char name[ISO9660_VOLUME_IDENTIFIER_LENGTH + 1];
426 					strlcpy(name, primary->identifier, i + 1);
427 					TRACE(("duplicate iso9660 volume name found, using "
428 						"latter (`%s') instead of former (`%s')\n", name,
429 						info->iso9660_name));
430 				}
431 
432 				info->SetISO9660Name(primary->identifier, i + 1);
433 				info->max_blocks = B_LENDIAN_TO_HOST_INT32(primary->set_size);
434 				break;
435 			}
436 
437 			case ISO9660VD_SUPPLEMENTARY:
438 			{
439 				iso9660_volume_descriptor *supplementary
440 					= (iso9660_volume_descriptor*)buffer;
441 				dump_supplementary_descriptor(supplementary, "  ", true);
442 
443 				// Copy and null terminate the escape sequences
444 				char escapes[ISO9660_ESCAPE_SEQUENCE_LENGTH + 1];
445 				strlcpy(escapes, supplementary->escape_sequences,
446 					ISO9660_ESCAPE_SEQUENCE_LENGTH + 1);
447 
448 				// Check for a Joliet VD
449 				if (strstr(escapes, "%/@") || strstr(escapes, "%/C")
450 					|| strstr(escapes, "%/E")) {
451 					char name[(ISO9660_VOLUME_IDENTIFIER_LENGTH * 3 / 2) + 1];
452 						// Since we're dealing with 16-bit Unicode, each
453 						// UTF-8 sequence will be at most 3 bytes long.
454 					char *pos = name;
455 					uint16 ch;
456 
457 					// Walk thru the unicode volume name, converting to utf8 as we go.
458 					for (int i = 0; (ch = B_BENDIAN_TO_HOST_INT16(
459 								((uint16*)supplementary->identifier)[i]))
460 							&& i < ISO9660_VOLUME_IDENTIFIER_LENGTH; i++) {
461 						// Give a warning if the character is technically
462 						// illegal
463 						if (ch <= 0x001F || ch == '*' || ch == '/'
464 						    || ch == ':' || ch == ';'
465 						    || ch == '?' || ch == '\\') {
466 							TRACE(("warning: illegal Joliet character "
467 								"found: 0%4x\n", ch));
468 						}
469 
470 						// Convert to utf-8
471 						unicode_to_utf8(ch, &pos);
472 					}
473 					pos[0] = '\0';
474 
475 					// Give a holler if the joliet name is already set
476 					if (info->joliet_name) {
477 						TRACE(("duplicate joliet volume name found, using "
478 							"latter (`%s') instead of former (`%s')\n",
479 							name, info->joliet_name));
480 					}
481 
482 					info->SetJolietName(name, pos - name);
483 				}
484 				break;
485 			}
486 
487 			case ISO9660VD_PARTITION:
488 				break;
489 
490 			case ISO9660VD_TERMINATOR:
491 				exit = true;
492 				break;
493 
494 			default:
495 				break;
496 		}
497 	}
498 
499 	return found ? B_OK : error;
500 }
501 
502