xref: /haiku/src/build/libshared/NaturalCompare.cpp (revision 3ec770177938efc5a7fee54d03b45fa593a57988)
1*3ec77017SAdrien Destugues /*
2*3ec77017SAdrien Destugues  * Copyright 2009, Dana Burkart
3*3ec77017SAdrien Destugues  * Copyright 2009, Stephan Aßmus <superstippi@gmx.de>
4*3ec77017SAdrien Destugues  * Copyright 2010, Axel Dörfler, axeld@pinc-software.de
5*3ec77017SAdrien Destugues  * Copyright 2010, Rene Gollent (anevilyak@gmail.com)
6*3ec77017SAdrien Destugues  * Distributed under the terms of the MIT License.
7*3ec77017SAdrien Destugues  */
8*3ec77017SAdrien Destugues 
9*3ec77017SAdrien Destugues 
10*3ec77017SAdrien Destugues #include <NaturalCompare.h>
11*3ec77017SAdrien Destugues 
12*3ec77017SAdrien Destugues #include <ctype.h>
13*3ec77017SAdrien Destugues #include <string.h>
14*3ec77017SAdrien Destugues #include <strings.h>
15*3ec77017SAdrien Destugues 
16*3ec77017SAdrien Destugues #include <StorageDefs.h>
17*3ec77017SAdrien Destugues #include <SupportDefs.h>
18*3ec77017SAdrien Destugues 
19*3ec77017SAdrien Destugues 
20*3ec77017SAdrien Destugues namespace BPrivate {
21*3ec77017SAdrien Destugues 
22*3ec77017SAdrien Destugues 
23*3ec77017SAdrien Destugues // #pragma mark - Natural sorting
24*3ec77017SAdrien Destugues 
25*3ec77017SAdrien Destugues 
26*3ec77017SAdrien Destugues struct natural_chunk {
27*3ec77017SAdrien Destugues 	enum chunk_type {
28*3ec77017SAdrien Destugues 		NUMBER,
29*3ec77017SAdrien Destugues 		ASCII,
30*3ec77017SAdrien Destugues 		END
31*3ec77017SAdrien Destugues 	};
32*3ec77017SAdrien Destugues 	chunk_type	type;
33*3ec77017SAdrien Destugues 	char		buffer[B_FILE_NAME_LENGTH];
34*3ec77017SAdrien Destugues 	int32		length;
35*3ec77017SAdrien Destugues };
36*3ec77017SAdrien Destugues 
37*3ec77017SAdrien Destugues 
38*3ec77017SAdrien Destugues inline int32
FetchNaturalChunk(natural_chunk & chunk,const char * source)39*3ec77017SAdrien Destugues FetchNaturalChunk(natural_chunk& chunk, const char* source)
40*3ec77017SAdrien Destugues {
41*3ec77017SAdrien Destugues 	if (chunk.type == natural_chunk::ASCII) {
42*3ec77017SAdrien Destugues 		// string chunk
43*3ec77017SAdrien Destugues 		int32 pos = 0;
44*3ec77017SAdrien Destugues 		while (!isdigit(source[pos]) && !isspace(source[pos])
45*3ec77017SAdrien Destugues 			&& source[pos] != '\0') {
46*3ec77017SAdrien Destugues 			pos++;
47*3ec77017SAdrien Destugues 		}
48*3ec77017SAdrien Destugues 		strlcpy(chunk.buffer, source, pos + 1);
49*3ec77017SAdrien Destugues 		chunk.length = pos;
50*3ec77017SAdrien Destugues 		return pos;
51*3ec77017SAdrien Destugues 	}
52*3ec77017SAdrien Destugues 
53*3ec77017SAdrien Destugues 	// Skip leading zeros and whitespace characters
54*3ec77017SAdrien Destugues 	int32 skip = 0;
55*3ec77017SAdrien Destugues 	while (source[0] == '0' || isspace(source[0])) {
56*3ec77017SAdrien Destugues 		source++;
57*3ec77017SAdrien Destugues 		skip++;
58*3ec77017SAdrien Destugues 	}
59*3ec77017SAdrien Destugues 
60*3ec77017SAdrien Destugues 	// Number chunk (stop at next white space)
61*3ec77017SAdrien Destugues 	int32 pos = 0;
62*3ec77017SAdrien Destugues 	while (isdigit(source[pos])) {
63*3ec77017SAdrien Destugues 		pos++;
64*3ec77017SAdrien Destugues 	}
65*3ec77017SAdrien Destugues 
66*3ec77017SAdrien Destugues 	strlcpy(chunk.buffer, source, pos + 1);
67*3ec77017SAdrien Destugues 	chunk.length = pos;
68*3ec77017SAdrien Destugues 
69*3ec77017SAdrien Destugues 	// Skip trailing whitespace as well
70*3ec77017SAdrien Destugues 	while (isspace(source[pos])) {
71*3ec77017SAdrien Destugues 		source++;
72*3ec77017SAdrien Destugues 		skip++;
73*3ec77017SAdrien Destugues 	}
74*3ec77017SAdrien Destugues 
75*3ec77017SAdrien Destugues 	return pos + skip;
76*3ec77017SAdrien Destugues }
77*3ec77017SAdrien Destugues 
78*3ec77017SAdrien Destugues 
79*3ec77017SAdrien Destugues //! Compares two strings naturally, as opposed to lexicographically
80*3ec77017SAdrien Destugues int
NaturalCompare(const char * stringA,const char * stringB)81*3ec77017SAdrien Destugues NaturalCompare(const char* stringA, const char* stringB)
82*3ec77017SAdrien Destugues {
83*3ec77017SAdrien Destugues 	if (stringA == NULL)
84*3ec77017SAdrien Destugues 		return stringB == NULL ? 0 : -1;
85*3ec77017SAdrien Destugues 	if (stringB == NULL)
86*3ec77017SAdrien Destugues 		return 1;
87*3ec77017SAdrien Destugues 
88*3ec77017SAdrien Destugues 	natural_chunk a;
89*3ec77017SAdrien Destugues 	natural_chunk b;
90*3ec77017SAdrien Destugues 
91*3ec77017SAdrien Destugues 	uint32 indexA = 0;
92*3ec77017SAdrien Destugues 	uint32 indexB = 0;
93*3ec77017SAdrien Destugues 
94*3ec77017SAdrien Destugues 	while (true) {
95*3ec77017SAdrien Destugues 		// Determine type of next chunks in each string based on first char
96*3ec77017SAdrien Destugues 		if (stringA[indexA] == '\0')
97*3ec77017SAdrien Destugues 			a.type = natural_chunk::END;
98*3ec77017SAdrien Destugues 		else if (isdigit(stringA[indexA]) || isspace(stringA[indexA]))
99*3ec77017SAdrien Destugues 			a.type = natural_chunk::NUMBER;
100*3ec77017SAdrien Destugues 		else
101*3ec77017SAdrien Destugues 			a.type = natural_chunk::ASCII;
102*3ec77017SAdrien Destugues 
103*3ec77017SAdrien Destugues 		if (stringB[indexB] == '\0')
104*3ec77017SAdrien Destugues 			b.type = natural_chunk::END;
105*3ec77017SAdrien Destugues 		else if (isdigit(stringB[indexB]) || isspace(stringB[indexB]))
106*3ec77017SAdrien Destugues 			b.type = natural_chunk::NUMBER;
107*3ec77017SAdrien Destugues 		else
108*3ec77017SAdrien Destugues 			b.type = natural_chunk::ASCII;
109*3ec77017SAdrien Destugues 
110*3ec77017SAdrien Destugues 		// Check if we reached the end of either string
111*3ec77017SAdrien Destugues 		if (a.type == natural_chunk::END)
112*3ec77017SAdrien Destugues 			return b.type == natural_chunk::END ? 0 : -1;
113*3ec77017SAdrien Destugues 		if (b.type == natural_chunk::END)
114*3ec77017SAdrien Destugues 			return 1;
115*3ec77017SAdrien Destugues 
116*3ec77017SAdrien Destugues 		if (a.type != b.type) {
117*3ec77017SAdrien Destugues 			// Different chunk types, just compare the remaining strings
118*3ec77017SAdrien Destugues 			return strcasecmp(&stringA[indexA], &stringB[indexB]);
119*3ec77017SAdrien Destugues 		}
120*3ec77017SAdrien Destugues 
121*3ec77017SAdrien Destugues 		// Fetch the next chunks
122*3ec77017SAdrien Destugues 		indexA += FetchNaturalChunk(a, &stringA[indexA]);
123*3ec77017SAdrien Destugues 		indexB += FetchNaturalChunk(b, &stringB[indexB]);
124*3ec77017SAdrien Destugues 
125*3ec77017SAdrien Destugues 		// Compare the two chunks based on their type
126*3ec77017SAdrien Destugues 		if (a.type == natural_chunk::ASCII) {
127*3ec77017SAdrien Destugues 			// String chunks
128*3ec77017SAdrien Destugues 			int result = strcasecmp(a.buffer, b.buffer);
129*3ec77017SAdrien Destugues 			if (result != 0)
130*3ec77017SAdrien Destugues 				return result;
131*3ec77017SAdrien Destugues 		} else {
132*3ec77017SAdrien Destugues 			// Number chunks - they are compared as strings to allow an
133*3ec77017SAdrien Destugues 			// almost arbitrary number of digits.
134*3ec77017SAdrien Destugues 			if (a.length > b.length)
135*3ec77017SAdrien Destugues 				return 1;
136*3ec77017SAdrien Destugues 			if (a.length < b.length)
137*3ec77017SAdrien Destugues 				return -1;
138*3ec77017SAdrien Destugues 
139*3ec77017SAdrien Destugues 			int result = strcmp(a.buffer, b.buffer);
140*3ec77017SAdrien Destugues 			if (result != 0)
141*3ec77017SAdrien Destugues 				return result;
142*3ec77017SAdrien Destugues 		}
143*3ec77017SAdrien Destugues 
144*3ec77017SAdrien Destugues 		// The chunks were equal, proceed with the next chunk
145*3ec77017SAdrien Destugues 	}
146*3ec77017SAdrien Destugues 
147*3ec77017SAdrien Destugues 	return 0;
148*3ec77017SAdrien Destugues }
149*3ec77017SAdrien Destugues 
150*3ec77017SAdrien Destugues 
151*3ec77017SAdrien Destugues } // namespace BPrivate
152