xref: /haiku/src/add-ons/kernel/file_systems/websearchfs/duckduckgo_request.cpp (revision ed24eb5ff12640d052171c6a7feba37fab8a75d1)
1 /*
2  * Copyright 2004-2008, François Revol, <revol@free.fr>.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 #include <OS.h>
7 #include <KernelExport.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <netinet/in.h>
12 #include <arpa/inet.h>
13 #include <malloc.h>
14 #include <sys/socket.h>
15 #include "duckduckgo_request.h"
16 
17 #include "websearchfs.h"
18 #include "lists2.h"
19 #include "settings.h"
20 #include "string_utils.h"
21 
22 #include <UrlProtocolRoster.h>
23 #include <UrlRequest.h>
24 
25 using namespace BPrivate::Network;
26 
27 #define DO_PUBLISH
28 //#define FAKE_INPUT "/boot/home/devel/drivers/websearchfs/log2.html"
29 
30 #define TESTURL "http://www.duckduckgo.com/search?hl=en&ie=UTF-8&num=50&q=beos"
31 #define BASEURL "https://html.duckduckgo.com/html/?kd=-1"
32 	// kd=-1 disables redirection of all URLs through duckduckgo servers
33 #define FMT_NUM "&num=%u"
34 	// TODO remove this, duckduckgo does not have this option
35 #define FMT_Q "&q=%s"
36 
37 /* parse_duckduckgo_html.c */
38 extern int duckduckgo_parse_results(const char *html, size_t htmlsize, long *nextid, struct duckduckgo_result **results);
39 
40 
41 status_t duckduckgo_request_process(struct duckduckgo_request *req)
42 {
43 	struct BUrlRequest *cnx = NULL;
44 	struct duckduckgo_result *res;
45 	status_t err;
46 	int count;
47 	char *p = NULL;
48 	char *url = NULL;
49 	BMallocIO output;
50 	thread_id t;
51 
52 	err = ENOMEM;
53 	req->cnx = cnx;
54 #ifndef FAKE_INPUT
55 	p = urlify_string(req->query_string);
56 	if (!p)
57 		goto err_con;
58 
59 	err = ENOMEM;
60 	url = (char*)malloc(strlen(BASEURL)+strlen(FMT_NUM)+10+strlen(FMT_Q)+strlen(p)+2);
61 	if (!url)
62 		goto err_url;
63 	strcpy(url, BASEURL);
64 	sprintf(url+strlen(url), FMT_NUM, (unsigned int)max_results);
65 	sprintf(url+strlen(url), FMT_Q, p);
66 
67 	fprintf(stderr, "duckduckgo_request: final URL: %s\n", url);
68 
69 	cnx = BUrlProtocolRoster::MakeRequest(url, &output, NULL);
70 	if (cnx == NULL)
71 		return ENOMEM;
72 
73 	t = cnx->Run();
74 	wait_for_thread(t, &err);
75 
76 	fprintf(stderr, "duckduckgo_request: buffer @ %p, len %ld\n", output.Buffer(), output.BufferLength());
77 	{
78 		int fd;
79 		// debug output
80 		fd = open("/tmp/duckduckgo.html", O_CREAT|O_TRUNC|O_RDWR, 0644);
81 		write(fd, output.Buffer(), output.BufferLength());
82 		close(fd);
83 	}
84 #else
85 	{
86 		int fd;
87 		struct stat st;
88 		// debug output
89 		fd = open(FAKE_INPUT, O_RDONLY, 0644);
90 		if (fd < 0)
91 			return -1;
92 		if (fstat(fd, &st) < 0) {
93 			close(fd);
94 			return -1;
95 		}
96 		cnx->datalen = st.st_size;
97 		cnx->data = malloc(cnx->datalen);
98 		if (!cnx->data)
99 			return ENOMEM;
100 		if (read(fd, cnx->data, cnx->datalen) < cnx->datalen)
101 			return -1;
102 		close(fd);
103 	}
104 #endif /* FAKE_INPUT */
105 	err = count = duckduckgo_parse_results((const char*)output.Buffer(), output.BufferLength(),
106 		&req->nextid, &req->results);
107 	if (err < 0)
108 		goto err_get;
109 #ifdef DO_PUBLISH
110 	while ((res = SLL_DEQUEUE(req->results, next))) {
111 		res->next = NULL;
112 		websearchfs_push_result_to_query(req, res);
113 	}
114 #endif
115 	free(url);
116 	free(p);
117 	// request is kept and deleted in duckduckgo_request_close
118 	return B_OK;
119 
120 
121 err_get:
122 	free(url);
123 err_url:
124 	free(p);
125 err_con:
126 	delete cnx;
127 	req->cnx = NULL;
128 	return err;
129 }
130 
131 status_t duckduckgo_request_process_async(struct duckduckgo_request *req)
132 {
133 	return ENOSYS;
134 }
135 
136 status_t duckduckgo_request_close(struct duckduckgo_request *req)
137 {
138 	if (!req)
139 		return EINVAL;
140 	if (!req->cnx)
141 		return B_OK;
142 	delete(req->cnx);
143 	req->cnx = NULL;
144 	return B_OK;
145 }
146 
147 status_t duckduckgo_request_open(const char *query_string, struct fs_volume *volume, struct fs_node *query_node, struct duckduckgo_request **req)
148 {
149 	struct duckduckgo_request *r;
150 	if (!req)
151 		return EINVAL;
152 	r = (duckduckgo_request*)malloc(sizeof(struct duckduckgo_request));
153 	if (!r)
154 		return ENOMEM;
155 	memset(r, 0, sizeof(struct duckduckgo_request));
156 	r->query_string = strdup(query_string);
157 	r->volume = volume;
158 	r->query_node = query_node;
159 	*req = r;
160 	return B_OK;
161 }
162 
163 status_t duckduckgo_request_free(struct duckduckgo_request *req)
164 {
165 	if (!req)
166 		return EINVAL;
167 	free(req->query_string);
168 	free(req);
169 	return B_OK;
170 }
171