xref: /haiku/src/add-ons/kernel/generic/scsi_periph/error_handling.cpp (revision 1294543de9ac0eff000eaea1b18368c36435d08e)
1 /*
2  * Copyright 2002/03, Thomas Kurschel. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 /*
7 	Part of Open SCSI Peripheral Driver
8 
9 	Error handling
10 */
11 
12 
13 #include "scsi_periph_int.h"
14 
15 
16 /** decode sense data and generate error code */
17 
18 static
19 err_res check_sense(scsi_periph_device_info *device, scsi_ccb *request)
20 {
21 	scsi_sense *sense = (scsi_sense *)request->sense;
22 
23 	if ((request->subsys_status & SCSI_AUTOSNS_VALID) == 0) {
24 		SHOW_ERROR0(2, "No auto-sense (but there should be)");
25 
26 		// shouldn't happen (cam_status should be CAM_AUTOSENSE_FAIL
27 		// as we asked for autosense)
28 		return MK_ERROR(err_act_fail, B_ERROR);
29 	}
30 
31 	if (SCSI_MAX_SENSE_SIZE - request->sense_resid
32 			< (int)offsetof(scsi_sense, add_sense_length) + 1) {
33 		SHOW_ERROR(2, "sense too short (%d bytes)", SCSI_MAX_SENSE_SIZE - request->sense_resid);
34 
35 		// that's a bit too short
36 		return MK_ERROR(err_act_fail, B_ERROR);
37 	}
38 
39 	switch (sense->error_code) {
40 		case SCSIS_DEFERRED_ERROR:
41 			// we are doomed - some previous request turned out to have failed
42 			// we neither know which one nor can we resubmit it
43 			SHOW_ERROR0(2, "encountered DEFERRED ERROR - bye, bye");
44 			return MK_ERROR(err_act_ok, B_OK);
45 
46 		case SCSIS_CURR_ERROR:
47 			// we start with very specific and finish very general error infos
48 			switch ((sense->asc << 8) | sense->ascq) {
49 				case SCSIS_ASC_AUDIO_PLAYING:
50 					SHOW_INFO0(2, "busy because playing audio");
51 
52 					// we need something like "busy"
53 					return MK_ERROR(err_act_fail, B_DEV_NOT_READY);
54 
55 				case SCSIS_ASC_LUN_NEED_INIT:
56 					SHOW_INFO0(2, "LUN needs init");
57 
58 					// reported by some devices that are idle and spun down
59 					// sending START UNIT should awake them
60 					return MK_ERROR(err_act_start, B_NO_INIT);
61 
62 				case SCSIS_ASC_LUN_NEED_MANUAL_HELP:
63 					SHOW_ERROR0(2, "LUN needs manual help");
64 
65 					return MK_ERROR(err_act_fail, B_DEV_NOT_READY);
66 
67 				case SCSIS_ASC_LUN_FORMATTING:
68 					SHOW_INFO0(2, "LUN is formatting");
69 
70 					// we could wait, but as formatting normally takes quite long,
71 					// we give up without any further retries
72 					return MK_ERROR(err_act_fail, B_DEV_NOT_READY);
73 
74 				case SCSIS_ASC_MEDIUM_CHANGED:
75 					SHOW_FLOW0(3, "Medium changed");
76 					periph_media_changed(device, request);
77 					return MK_ERROR(err_act_fail, B_DEV_MEDIA_CHANGED);
78 
79 				case SCSIS_ASC_WRITE_ERR_AUTOREALLOC:
80 					SHOW_ERROR0(2, "Recovered write error - block got reallocated automatically");
81 					return MK_ERROR(err_act_ok, B_OK);
82 
83 				case SCSIS_ASC_ID_RECOV:
84 					SHOW_ERROR0(2, "Recovered ID with ECC");
85 					return MK_ERROR(err_act_ok, B_OK);
86 
87 				case SCSIS_ASC_REMOVAL_REQUESTED:
88 					SHOW_INFO0(2, "Removal requested");
89 					ACQUIRE_BEN(&device->mutex);
90 					device->removal_requested = true;
91 					RELEASE_BEN(&device->mutex);
92 
93 					return MK_ERROR(err_act_retry, B_DEV_MEDIA_CHANGE_REQUESTED);
94 
95 				case SCSIS_ASC_LUN_BECOMING_READY:
96 					SHOW_INFO0(2, "Becoming ready");
97 					// wait a bit - the device needs some time
98 					snooze(100000);
99 					return MK_ERROR(err_act_many_retries, B_DEV_NOT_READY);
100 
101 				case SCSIS_ASC_WAS_RESET:
102 					SHOW_INFO0(2, "Unit was reset");
103 					// TBD: need a better error code here
104 					// as some earlier command led to the reset, we are innocent
105 					return MK_ERROR(err_act_retry, B_DEV_NOT_READY);
106 			}
107 
108 			switch (sense->asc) {
109 				case SCSIS_ASC_DATA_RECOV_NO_ERR_CORR >> 8:
110 				case SCSIS_ASC_DATA_RECOV_WITH_CORR >> 8:
111 					// these are the groups of recovered data with or without correction
112 					// we should print at least a warning here
113 					SHOW_ERROR(0, "Recovered data, asc=0x%2x, ascq=0x%2x",
114 						sense->asc, sense->ascq);
115 					return MK_ERROR(err_act_ok, B_OK);
116 
117 				case SCSIS_ASC_WRITE_PROTECTED >> 8:
118 					SHOW_ERROR0( 2, "Write protected" );
119 
120 					// isn't there any proper "write protected" error code?
121 					return MK_ERROR(err_act_fail, B_READ_ONLY_DEVICE);
122 
123 				case SCSIS_ASC_NO_MEDIUM >> 8:
124 					SHOW_FLOW0(2, "No medium");
125 					return MK_ERROR(err_act_fail, B_DEV_NO_MEDIA);
126 			}
127 
128 			// we issue this info very late, so we don't clutter syslog with
129 			// messages about changed or missing media
130 			SHOW_ERROR(3, "0x%04x", (sense->asc << 8) | sense->ascq);
131 
132 			switch (sense->sense_key) {
133 				case SCSIS_KEY_NO_SENSE:
134 					SHOW_ERROR0(2, "No sense");
135 
136 					// we thought there was an error, huh?
137 					return MK_ERROR(err_act_ok, B_OK);
138 
139 				case SCSIS_KEY_RECOVERED_ERROR:
140 					SHOW_ERROR0(2, "Recovered error");
141 
142 					// we should probably tell about that; perhaps tomorrow
143 					return MK_ERROR(err_act_ok, B_OK);
144 
145 				case SCSIS_KEY_NOT_READY:
146 					return MK_ERROR(err_act_retry, B_DEV_NOT_READY);
147 
148 				case SCSIS_KEY_MEDIUM_ERROR:
149 					SHOW_ERROR0(2, "Medium error");
150 					return MK_ERROR( err_act_retry, B_DEV_RECALIBRATE_ERROR);
151 
152 				case SCSIS_KEY_HARDWARE_ERROR:
153 					SHOW_ERROR0(2, "Hardware error");
154 					return MK_ERROR(err_act_retry, B_DEV_SEEK_ERROR);
155 
156 				case SCSIS_KEY_ILLEGAL_REQUEST:
157 					SHOW_ERROR0(2, "Illegal request");
158 					return MK_ERROR(err_act_invalid_req, B_ERROR);
159 
160 				case SCSIS_KEY_UNIT_ATTENTION:
161 					SHOW_ERROR0(2, "Unit attention");
162 					return MK_ERROR( err_act_retry, B_DEV_NOT_READY);
163 
164 				case SCSIS_KEY_DATA_PROTECT:
165 					SHOW_ERROR0(2, "Data protect");
166 
167 					// we could set "permission denied", but that's probably
168 					// irritating to the user
169 					return MK_ERROR(err_act_fail, B_NOT_ALLOWED);
170 
171 				case SCSIS_KEY_BLANK_CHECK:
172 					SHOW_ERROR0(2, "Is blank");
173 
174 					return MK_ERROR(err_act_fail, B_DEV_UNREADABLE);
175 
176 				case SCSIS_KEY_VENDOR_SPECIFIC:
177 					return MK_ERROR(err_act_fail, B_ERROR);
178 
179 				case SCSIS_KEY_COPY_ABORTED:
180 					// we don't use copy, so this is really wrong
181 					return MK_ERROR(err_act_fail, B_ERROR);
182 
183 				case SCSIS_KEY_ABORTED_COMMAND:
184 					// proper error code?
185 					return MK_ERROR(err_act_retry, B_ERROR);
186 
187 				case SCSIS_KEY_EQUAL:
188 				case SCSIS_KEY_MISCOMPARE:
189 					// we don't search, so this is really wrong
190 					return MK_ERROR(err_act_fail, B_ERROR);
191 
192 				case SCSIS_KEY_VOLUME_OVERFLOW:
193 					// not the best return code, but this error doesn't apply
194 					// to devices we currently support
195 					return MK_ERROR(err_act_fail, B_DEV_SEEK_ERROR);
196 
197 				case SCSIS_KEY_RESERVED:
198 				default:
199 					return MK_ERROR(err_act_fail, B_ERROR);
200 			}
201 
202 		default:
203 			// shouldn't happen - there are only 2 error codes defined
204 			SHOW_ERROR(2, "Invalid sense type (0x%x)", sense->error_code);
205 			return MK_ERROR(err_act_fail, B_ERROR);
206 	}
207 }
208 
209 
210 /** check scsi status, using sense if available */
211 
212 static err_res
213 check_scsi_status(scsi_periph_device_info *device, scsi_ccb *request)
214 {
215 	SHOW_FLOW(3, "%d", request->device_status & SCSI_STATUS_MASK);
216 
217 	switch (request->device_status & SCSI_STATUS_MASK) {
218 		case SCSI_STATUS_GOOD:
219 			// shouldn't happen (cam_status should be CAM_REQ_CMP)
220 			return MK_ERROR(err_act_ok, B_OK);
221 
222 		case SCSI_STATUS_CHECK_CONDITION:
223 			return check_sense(device, request);
224 
225 		case SCSI_STATUS_QUEUE_FULL:
226 			// SIM should have automatically requeued request, fall through
227 		case SCSI_STATUS_BUSY:
228 			// take deep breath and try again
229 			snooze(1000000);
230 			return MK_ERROR(err_act_retry, B_DEV_TIMEOUT);
231 
232 		case SCSI_STATUS_COMMAND_TERMINATED:
233 			return MK_ERROR(err_act_retry, B_INTERRUPTED);
234 
235 		default:
236 			return MK_ERROR(err_act_retry, B_ERROR);
237 	}
238 }
239 
240 
241 /**	check result of request
242  *	1. check SCSI subsystem problems
243  *	2. if request hit device, check SCSI status
244  *	3. if request got executed, check sense
245  */
246 
247 err_res
248 periph_check_error(scsi_periph_device_info *device, scsi_ccb *request)
249 {
250 	SHOW_FLOW(4, "%d", request->subsys_status & SCSI_SUBSYS_STATUS_MASK);
251 
252 	switch (request->subsys_status & SCSI_SUBSYS_STATUS_MASK) {
253 		// everything is ok
254 		case SCSI_REQ_CMP:
255 			return MK_ERROR(err_act_ok, B_OK);
256 
257 		// no device
258 		case SCSI_LUN_INVALID:
259 		case SCSI_TID_INVALID:
260 		case SCSI_PATH_INVALID:
261 		case SCSI_DEV_NOT_THERE:
262 		case SCSI_NO_HBA:
263 			SHOW_ERROR0(2, "No device");
264 			return MK_ERROR(err_act_fail, B_DEV_BAD_DRIVE_NUM);
265 
266 		// device temporary unavailable
267 		case SCSI_SEL_TIMEOUT:
268 		case SCSI_BUSY:
269 		case SCSI_SCSI_BUSY:
270 		case SCSI_HBA_ERR:
271 		case SCSI_MSG_REJECT_REC:
272 		case SCSI_NO_NEXUS:
273 		case SCSI_FUNC_NOTAVAIL:
274 		case SCSI_RESRC_UNAVAIL:
275 			// take a deep breath and hope device becomes ready
276 			snooze(1000000);
277 			return MK_ERROR(err_act_retry, B_DEV_TIMEOUT);
278 
279 		// data transmission went wrong
280 		case SCSI_DATA_RUN_ERR:
281 		case SCSI_UNCOR_PARITY:
282 			SHOW_ERROR0(2, "Data transmission failed");
283 			// retry immediately
284 			return MK_ERROR(err_act_retry, B_DEV_READ_ERROR);
285 
286 		// request broken
287 		case SCSI_REQ_INVALID:
288 			SHOW_ERROR0(2, "Invalid request");
289 			return MK_ERROR(err_act_fail, B_ERROR);
290 
291 		// request aborted
292 		case SCSI_REQ_ABORTED:
293 		case SCSI_SCSI_BUS_RESET:
294 		case SCSI_REQ_TERMIO:
295 		case SCSI_UNEXP_BUSFREE:
296 		case SCSI_BDR_SENT:
297 		case SCSI_CMD_TIMEOUT:
298 		case SCSI_IID_INVALID:
299 		case SCSI_UNACKED_EVENT:
300 		case SCSI_IDE:
301 		case SCSI_SEQUENCE_FAIL:
302 			// take a small breath and retry
303 			snooze(100000);
304 			return MK_ERROR(err_act_retry, B_DEV_TIMEOUT);
305 
306 		// device error
307 		case SCSI_REQ_CMP_ERR:
308 			return check_scsi_status(device, request);
309 
310 		// device error, but we don't know what happened
311 		case SCSI_AUTOSENSE_FAIL:
312 			SHOW_ERROR0(2, "Auto-sense failed, don't know what really happened");
313 			return MK_ERROR(err_act_fail, B_ERROR);
314 
315 		// should not happen, give up
316 		case SCSI_BUS_RESET_DENIED:
317 		case SCSI_PROVIDE_FAIL:
318 		case SCSI_UA_TERMIO:
319 		case SCSI_CDB_RECVD:
320 		case SCSI_LUN_ALLREADY_ENAB:
321 			// supposed to fall through
322 		default:
323 			return MK_ERROR(err_act_fail, B_ERROR);
324 	}
325 }
326