xref: /haiku/src/add-ons/kernel/generic/scsi_periph/error_handling.cpp (revision 220d04022750f40f8bac8f01fa551211e28d04f2)
1 /*
2  * Copyright 2011, Haiku, Inc. All RightsReserved.
3  * Copyright 2002-03, Thomas Kurschel. All rights reserved.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //!	Error handling
9 
10 
11 #include "scsi_periph_int.h"
12 
13 
14 /*! Decode sense data and generate error code. */
15 static
16 err_res check_sense(scsi_periph_device_info *device, scsi_ccb *request)
17 {
18 	scsi_sense *sense = (scsi_sense *)request->sense;
19 
20 	if ((request->subsys_status & SCSI_AUTOSNS_VALID) == 0) {
21 		SHOW_ERROR0(2, "No auto-sense (but there should be)");
22 
23 		// shouldn't happen (cam_status should be CAM_AUTOSENSE_FAIL
24 		// as we asked for autosense)
25 		return MK_ERROR(err_act_fail, B_ERROR);
26 	}
27 
28 	if (SCSI_MAX_SENSE_SIZE - request->sense_resid
29 			< (int)offsetof(scsi_sense, add_sense_length) + 1) {
30 		SHOW_ERROR(2, "sense too short (%d bytes)", SCSI_MAX_SENSE_SIZE - request->sense_resid);
31 
32 		// that's a bit too short
33 		return MK_ERROR(err_act_fail, B_ERROR);
34 	}
35 
36 	switch (sense->error_code) {
37 		case SCSIS_DEFERRED_ERROR:
38 			// we are doomed - some previous request turned out to have failed
39 			// we neither know which one nor can we resubmit it
40 			SHOW_ERROR0(2, "encountered DEFERRED ERROR - bye, bye");
41 			return MK_ERROR(err_act_ok, B_OK);
42 
43 		case SCSIS_CURR_ERROR:
44 			// we start with very specific and finish very general error infos
45 			switch ((sense->asc << 8) | sense->ascq) {
46 				case SCSIS_ASC_AUDIO_PLAYING:
47 					SHOW_INFO0(2, "busy because playing audio");
48 
49 					// we need something like "busy"
50 					return MK_ERROR(err_act_fail, B_DEV_NOT_READY);
51 
52 				case SCSIS_ASC_LUN_NEED_INIT:
53 					SHOW_INFO0(2, "LUN needs init");
54 
55 					// reported by some devices that are idle and spun down
56 					// sending START UNIT should awake them
57 					return MK_ERROR(err_act_start, B_NO_INIT);
58 
59 				case SCSIS_ASC_LUN_NEED_MANUAL_HELP:
60 					SHOW_ERROR0(2, "LUN needs manual help");
61 
62 					return MK_ERROR(err_act_fail, B_DEV_NOT_READY);
63 
64 				case SCSIS_ASC_LUN_FORMATTING:
65 					SHOW_INFO0(2, "LUN is formatting");
66 
67 					// we could wait, but as formatting normally takes quite long,
68 					// we give up without any further retries
69 					return MK_ERROR(err_act_fail, B_DEV_NOT_READY);
70 
71 				case SCSIS_ASC_MEDIUM_CHANGED:
72 					SHOW_FLOW0(3, "Medium changed");
73 					periph_media_changed(device, request);
74 					return MK_ERROR(err_act_fail, B_DEV_MEDIA_CHANGED);
75 
76 				case SCSIS_ASC_WRITE_ERR_AUTOREALLOC:
77 					SHOW_ERROR0(2, "Recovered write error - block got reallocated automatically");
78 					return MK_ERROR(err_act_ok, B_OK);
79 
80 				case SCSIS_ASC_ID_RECOV:
81 					SHOW_ERROR0(2, "Recovered ID with ECC");
82 					return MK_ERROR(err_act_ok, B_OK);
83 
84 				case SCSIS_ASC_REMOVAL_REQUESTED:
85 					SHOW_INFO0(2, "Removal requested");
86 					mutex_lock(&device->mutex);
87 					device->removal_requested = true;
88 					mutex_unlock(&device->mutex);
89 
90 					return MK_ERROR(err_act_retry, B_DEV_MEDIA_CHANGE_REQUESTED);
91 
92 				case SCSIS_ASC_LUN_BECOMING_READY:
93 					SHOW_INFO0(2, "Becoming ready");
94 					// wait a bit - the device needs some time
95 					snooze(100000);
96 					return MK_ERROR(err_act_many_retries, B_DEV_NOT_READY);
97 
98 				case SCSIS_ASC_WAS_RESET:
99 					SHOW_INFO0(2, "Unit was reset");
100 					// TBD: need a better error code here
101 					// as some earlier command led to the reset, we are innocent
102 					return MK_ERROR(err_act_retry, B_DEV_NOT_READY);
103 			}
104 
105 			switch (sense->asc) {
106 				case SCSIS_ASC_DATA_RECOV_NO_ERR_CORR >> 8:
107 				case SCSIS_ASC_DATA_RECOV_WITH_CORR >> 8:
108 					// these are the groups of recovered data with or without correction
109 					// we should print at least a warning here
110 					SHOW_ERROR(0, "Recovered data, asc=0x%2x, ascq=0x%2x",
111 						sense->asc, sense->ascq);
112 					return MK_ERROR(err_act_ok, B_OK);
113 
114 				case SCSIS_ASC_WRITE_PROTECTED >> 8:
115 					SHOW_ERROR0( 2, "Write protected" );
116 
117 					// isn't there any proper "write protected" error code?
118 					return MK_ERROR(err_act_fail, B_READ_ONLY_DEVICE);
119 
120 				case SCSIS_ASC_NO_MEDIUM >> 8:
121 					SHOW_FLOW0(2, "No medium");
122 					return MK_ERROR(err_act_fail, B_DEV_NO_MEDIA);
123 			}
124 
125 			// we issue this info very late, so we don't clutter syslog with
126 			// messages about changed or missing media
127 			SHOW_ERROR(3, "0x%04x", (sense->asc << 8) | sense->ascq);
128 
129 			switch (sense->sense_key) {
130 				case SCSIS_KEY_NO_SENSE:
131 					SHOW_ERROR0(2, "No sense");
132 
133 					// we thought there was an error, huh?
134 					return MK_ERROR(err_act_ok, B_OK);
135 
136 				case SCSIS_KEY_RECOVERED_ERROR:
137 					SHOW_ERROR0(2, "Recovered error");
138 
139 					// we should probably tell about that; perhaps tomorrow
140 					return MK_ERROR(err_act_ok, B_OK);
141 
142 				case SCSIS_KEY_NOT_READY:
143 					return MK_ERROR(err_act_retry, B_DEV_NOT_READY);
144 
145 				case SCSIS_KEY_MEDIUM_ERROR:
146 					SHOW_ERROR0(2, "Medium error");
147 					return MK_ERROR( err_act_retry, B_DEV_RECALIBRATE_ERROR);
148 
149 				case SCSIS_KEY_HARDWARE_ERROR:
150 					SHOW_ERROR0(2, "Hardware error");
151 					return MK_ERROR(err_act_retry, B_DEV_SEEK_ERROR);
152 
153 				case SCSIS_KEY_ILLEGAL_REQUEST:
154 					SHOW_ERROR0(2, "Illegal request");
155 					return MK_ERROR(err_act_invalid_req, B_ERROR);
156 
157 				case SCSIS_KEY_UNIT_ATTENTION:
158 					SHOW_ERROR0(2, "Unit attention");
159 					return MK_ERROR( err_act_retry, B_DEV_NOT_READY);
160 
161 				case SCSIS_KEY_DATA_PROTECT:
162 					SHOW_ERROR0(2, "Data protect");
163 
164 					// we could set "permission denied", but that's probably
165 					// irritating to the user
166 					return MK_ERROR(err_act_fail, B_NOT_ALLOWED);
167 
168 				case SCSIS_KEY_BLANK_CHECK:
169 					SHOW_ERROR0(2, "Is blank");
170 
171 					return MK_ERROR(err_act_fail, B_DEV_UNREADABLE);
172 
173 				case SCSIS_KEY_VENDOR_SPECIFIC:
174 					return MK_ERROR(err_act_fail, B_ERROR);
175 
176 				case SCSIS_KEY_COPY_ABORTED:
177 					// we don't use copy, so this is really wrong
178 					return MK_ERROR(err_act_fail, B_ERROR);
179 
180 				case SCSIS_KEY_ABORTED_COMMAND:
181 					// proper error code?
182 					return MK_ERROR(err_act_retry, B_ERROR);
183 
184 				case SCSIS_KEY_EQUAL:
185 				case SCSIS_KEY_MISCOMPARE:
186 					// we don't search, so this is really wrong
187 					return MK_ERROR(err_act_fail, B_ERROR);
188 
189 				case SCSIS_KEY_VOLUME_OVERFLOW:
190 					// not the best return code, but this error doesn't apply
191 					// to devices we currently support
192 					return MK_ERROR(err_act_fail, B_DEV_SEEK_ERROR);
193 
194 				case SCSIS_KEY_RESERVED:
195 				default:
196 					return MK_ERROR(err_act_fail, B_ERROR);
197 			}
198 
199 		default:
200 			// shouldn't happen - there are only 2 error codes defined
201 			SHOW_ERROR(2, "Invalid sense type (0x%x)", sense->error_code);
202 			return MK_ERROR(err_act_fail, B_ERROR);
203 	}
204 }
205 
206 
207 /*!	Check scsi status, using sense if available. */
208 static err_res
209 check_scsi_status(scsi_periph_device_info *device, scsi_ccb *request)
210 {
211 	SHOW_FLOW(3, "%d", request->device_status & SCSI_STATUS_MASK);
212 
213 	switch (request->device_status & SCSI_STATUS_MASK) {
214 		case SCSI_STATUS_GOOD:
215 			// shouldn't happen (cam_status should be CAM_REQ_CMP)
216 			return MK_ERROR(err_act_ok, B_OK);
217 
218 		case SCSI_STATUS_CHECK_CONDITION:
219 			return check_sense(device, request);
220 
221 		case SCSI_STATUS_QUEUE_FULL:
222 			// SIM should have automatically requeued request, fall through
223 		case SCSI_STATUS_BUSY:
224 			// take deep breath and try again
225 			snooze(1000000);
226 			return MK_ERROR(err_act_retry, B_DEV_TIMEOUT);
227 
228 		case SCSI_STATUS_COMMAND_TERMINATED:
229 			return MK_ERROR(err_act_retry, B_INTERRUPTED);
230 
231 		default:
232 			return MK_ERROR(err_act_retry, B_ERROR);
233 	}
234 }
235 
236 
237 /*!	Check result of request
238  *	1. check SCSI subsystem problems
239  *	2. if request hit device, check SCSI status
240  *	3. if request got executed, check sense
241  */
242 err_res
243 periph_check_error(scsi_periph_device_info *device, scsi_ccb *request)
244 {
245 	SHOW_FLOW(4, "%d", request->subsys_status & SCSI_SUBSYS_STATUS_MASK);
246 
247 	switch (request->subsys_status & SCSI_SUBSYS_STATUS_MASK) {
248 		// everything is ok
249 		case SCSI_REQ_CMP:
250 			return MK_ERROR(err_act_ok, B_OK);
251 
252 		// no device
253 		case SCSI_LUN_INVALID:
254 		case SCSI_TID_INVALID:
255 		case SCSI_PATH_INVALID:
256 		case SCSI_DEV_NOT_THERE:
257 		case SCSI_NO_HBA:
258 			SHOW_ERROR0(2, "No device");
259 			return MK_ERROR(err_act_fail, B_DEV_BAD_DRIVE_NUM);
260 
261 		// device temporary unavailable
262 		case SCSI_SEL_TIMEOUT:
263 		case SCSI_BUSY:
264 		case SCSI_SCSI_BUSY:
265 		case SCSI_HBA_ERR:
266 		case SCSI_MSG_REJECT_REC:
267 		case SCSI_NO_NEXUS:
268 		case SCSI_FUNC_NOTAVAIL:
269 		case SCSI_RESRC_UNAVAIL:
270 			// take a deep breath and hope device becomes ready
271 			snooze(1000000);
272 			return MK_ERROR(err_act_retry, B_DEV_TIMEOUT);
273 
274 		// data transmission went wrong
275 		case SCSI_DATA_RUN_ERR:
276 		case SCSI_UNCOR_PARITY:
277 			SHOW_ERROR0(2, "Data transmission failed");
278 			// retry immediately
279 			return MK_ERROR(err_act_retry, B_DEV_READ_ERROR);
280 
281 		// request broken
282 		case SCSI_REQ_INVALID:
283 			SHOW_ERROR0(2, "Invalid request");
284 			return MK_ERROR(err_act_fail, B_ERROR);
285 
286 		// request aborted
287 		case SCSI_REQ_ABORTED:
288 		case SCSI_SCSI_BUS_RESET:
289 		case SCSI_REQ_TERMIO:
290 		case SCSI_UNEXP_BUSFREE:
291 		case SCSI_BDR_SENT:
292 		case SCSI_CMD_TIMEOUT:
293 		case SCSI_IID_INVALID:
294 		case SCSI_UNACKED_EVENT:
295 		case SCSI_IDE:
296 		case SCSI_SEQUENCE_FAIL:
297 			// take a small breath and retry
298 			snooze(100000);
299 			return MK_ERROR(err_act_retry, B_DEV_TIMEOUT);
300 
301 		// device error
302 		case SCSI_REQ_CMP_ERR:
303 			return check_scsi_status(device, request);
304 
305 		// device error, but we don't know what happened
306 		case SCSI_AUTOSENSE_FAIL:
307 			SHOW_ERROR0(2, "Auto-sense failed, don't know what really happened");
308 			return MK_ERROR(err_act_fail, B_ERROR);
309 
310 		// should not happen, give up
311 		case SCSI_BUS_RESET_DENIED:
312 		case SCSI_PROVIDE_FAIL:
313 		case SCSI_UA_TERMIO:
314 		case SCSI_CDB_RECVD:
315 		case SCSI_LUN_ALLREADY_ENAB:
316 			// supposed to fall through
317 		default:
318 			return MK_ERROR(err_act_fail, B_ERROR);
319 	}
320 }
321