xref: /haiku/src/add-ons/kernel/file_systems/websearchfs/duckduckgo_request.cpp (revision 96e28a400a120f6f1289929c8c92f68bb2dbaf93)
1*96e28a40SPulkoMandy /*
2*96e28a40SPulkoMandy  * Copyright 2004-2008, François Revol, <revol@free.fr>.
3*96e28a40SPulkoMandy  * Distributed under the terms of the MIT License.
4*96e28a40SPulkoMandy  */
5*96e28a40SPulkoMandy 
6*96e28a40SPulkoMandy #include <OS.h>
7*96e28a40SPulkoMandy #include <KernelExport.h>
8*96e28a40SPulkoMandy #include <stdio.h>
9*96e28a40SPulkoMandy #include <stdlib.h>
10*96e28a40SPulkoMandy #include <string.h>
11*96e28a40SPulkoMandy #include <netinet/in.h>
12*96e28a40SPulkoMandy #include <arpa/inet.h>
13*96e28a40SPulkoMandy #include <malloc.h>
14*96e28a40SPulkoMandy #include <sys/socket.h>
15*96e28a40SPulkoMandy #include "duckduckgo_request.h"
16*96e28a40SPulkoMandy 
17*96e28a40SPulkoMandy #include "websearchfs.h"
18*96e28a40SPulkoMandy #include "lists2.h"
19*96e28a40SPulkoMandy #include "settings.h"
20*96e28a40SPulkoMandy #include "string_utils.h"
21*96e28a40SPulkoMandy 
22*96e28a40SPulkoMandy #include <UrlProtocolRoster.h>
23*96e28a40SPulkoMandy #include <UrlRequest.h>
24*96e28a40SPulkoMandy 
25*96e28a40SPulkoMandy using namespace BPrivate::Network;
26*96e28a40SPulkoMandy 
27*96e28a40SPulkoMandy #define DO_PUBLISH
28*96e28a40SPulkoMandy //#define FAKE_INPUT "/boot/home/devel/drivers/websearchfs/log2.html"
29*96e28a40SPulkoMandy 
30*96e28a40SPulkoMandy #define TESTURL "http://www.duckduckgo.com/search?hl=en&ie=UTF-8&num=50&q=beos"
31*96e28a40SPulkoMandy #define BASEURL "https://html.duckduckgo.com/html/?kd=-1"
32*96e28a40SPulkoMandy 	// kd=-1 disables redirection of all URLs through duckduckgo servers
33*96e28a40SPulkoMandy #define FMT_NUM "&num=%u"
34*96e28a40SPulkoMandy 	// TODO remove this, duckduckgo does not have this option
35*96e28a40SPulkoMandy #define FMT_Q "&q=%s"
36*96e28a40SPulkoMandy 
37*96e28a40SPulkoMandy /* parse_duckduckgo_html.c */
38*96e28a40SPulkoMandy extern int duckduckgo_parse_results(const char *html, size_t htmlsize, long *nextid, struct duckduckgo_result **results);
39*96e28a40SPulkoMandy 
40*96e28a40SPulkoMandy 
duckduckgo_request_process(struct duckduckgo_request * req)41*96e28a40SPulkoMandy status_t duckduckgo_request_process(struct duckduckgo_request *req)
42*96e28a40SPulkoMandy {
43*96e28a40SPulkoMandy 	struct BUrlRequest *cnx = NULL;
44*96e28a40SPulkoMandy 	struct duckduckgo_result *res;
45*96e28a40SPulkoMandy 	status_t err;
46*96e28a40SPulkoMandy 	int count;
47*96e28a40SPulkoMandy 	char *p = NULL;
48*96e28a40SPulkoMandy 	char *url = NULL;
49*96e28a40SPulkoMandy 	BMallocIO output;
50*96e28a40SPulkoMandy 	thread_id t;
51*96e28a40SPulkoMandy 
52*96e28a40SPulkoMandy 	err = ENOMEM;
53*96e28a40SPulkoMandy 	req->cnx = cnx;
54*96e28a40SPulkoMandy #ifndef FAKE_INPUT
55*96e28a40SPulkoMandy 	p = urlify_string(req->query_string);
56*96e28a40SPulkoMandy 	if (!p)
57*96e28a40SPulkoMandy 		goto err_con;
58*96e28a40SPulkoMandy 
59*96e28a40SPulkoMandy 	err = ENOMEM;
60*96e28a40SPulkoMandy 	url = (char*)malloc(strlen(BASEURL)+strlen(FMT_NUM)+10+strlen(FMT_Q)+strlen(p)+2);
61*96e28a40SPulkoMandy 	if (!url)
62*96e28a40SPulkoMandy 		goto err_url;
63*96e28a40SPulkoMandy 	strcpy(url, BASEURL);
64*96e28a40SPulkoMandy 	sprintf(url+strlen(url), FMT_NUM, (unsigned int)max_results);
65*96e28a40SPulkoMandy 	sprintf(url+strlen(url), FMT_Q, p);
66*96e28a40SPulkoMandy 
67*96e28a40SPulkoMandy 	fprintf(stderr, "duckduckgo_request: final URL: %s\n", url);
68*96e28a40SPulkoMandy 
69*96e28a40SPulkoMandy 	cnx = BUrlProtocolRoster::MakeRequest(url, &output, NULL);
70*96e28a40SPulkoMandy 	if (cnx == NULL)
71*96e28a40SPulkoMandy 		return ENOMEM;
72*96e28a40SPulkoMandy 
73*96e28a40SPulkoMandy 	t = cnx->Run();
74*96e28a40SPulkoMandy 	wait_for_thread(t, &err);
75*96e28a40SPulkoMandy 
76*96e28a40SPulkoMandy 	fprintf(stderr, "duckduckgo_request: buffer @ %p, len %ld\n", output.Buffer(), output.BufferLength());
77*96e28a40SPulkoMandy 	{
78*96e28a40SPulkoMandy 		int fd;
79*96e28a40SPulkoMandy 		// debug output
80*96e28a40SPulkoMandy 		fd = open("/tmp/duckduckgo.html", O_CREAT|O_TRUNC|O_RDWR, 0644);
81*96e28a40SPulkoMandy 		write(fd, output.Buffer(), output.BufferLength());
82*96e28a40SPulkoMandy 		close(fd);
83*96e28a40SPulkoMandy 	}
84*96e28a40SPulkoMandy #else
85*96e28a40SPulkoMandy 	{
86*96e28a40SPulkoMandy 		int fd;
87*96e28a40SPulkoMandy 		struct stat st;
88*96e28a40SPulkoMandy 		// debug output
89*96e28a40SPulkoMandy 		fd = open(FAKE_INPUT, O_RDONLY, 0644);
90*96e28a40SPulkoMandy 		if (fd < 0)
91*96e28a40SPulkoMandy 			return -1;
92*96e28a40SPulkoMandy 		if (fstat(fd, &st) < 0) {
93*96e28a40SPulkoMandy 			close(fd);
94*96e28a40SPulkoMandy 			return -1;
95*96e28a40SPulkoMandy 		}
96*96e28a40SPulkoMandy 		cnx->datalen = st.st_size;
97*96e28a40SPulkoMandy 		cnx->data = malloc(cnx->datalen);
98*96e28a40SPulkoMandy 		if (!cnx->data)
99*96e28a40SPulkoMandy 			return ENOMEM;
100*96e28a40SPulkoMandy 		if (read(fd, cnx->data, cnx->datalen) < cnx->datalen)
101*96e28a40SPulkoMandy 			return -1;
102*96e28a40SPulkoMandy 		close(fd);
103*96e28a40SPulkoMandy 	}
104*96e28a40SPulkoMandy #endif /* FAKE_INPUT */
105*96e28a40SPulkoMandy 	err = count = duckduckgo_parse_results((const char*)output.Buffer(), output.BufferLength(),
106*96e28a40SPulkoMandy 		&req->nextid, &req->results);
107*96e28a40SPulkoMandy 	if (err < 0)
108*96e28a40SPulkoMandy 		goto err_get;
109*96e28a40SPulkoMandy #ifdef DO_PUBLISH
110*96e28a40SPulkoMandy 	while ((res = SLL_DEQUEUE(req->results, next))) {
111*96e28a40SPulkoMandy 		res->next = NULL;
112*96e28a40SPulkoMandy 		websearchfs_push_result_to_query(req, res);
113*96e28a40SPulkoMandy 	}
114*96e28a40SPulkoMandy #endif
115*96e28a40SPulkoMandy 	free(url);
116*96e28a40SPulkoMandy 	free(p);
117*96e28a40SPulkoMandy 	// request is kept and deleted in duckduckgo_request_close
118*96e28a40SPulkoMandy 	return B_OK;
119*96e28a40SPulkoMandy 
120*96e28a40SPulkoMandy 
121*96e28a40SPulkoMandy err_get:
122*96e28a40SPulkoMandy 	free(url);
123*96e28a40SPulkoMandy err_url:
124*96e28a40SPulkoMandy 	free(p);
125*96e28a40SPulkoMandy err_con:
126*96e28a40SPulkoMandy 	delete cnx;
127*96e28a40SPulkoMandy 	req->cnx = NULL;
128*96e28a40SPulkoMandy 	return err;
129*96e28a40SPulkoMandy }
130*96e28a40SPulkoMandy 
duckduckgo_request_process_async(struct duckduckgo_request * req)131*96e28a40SPulkoMandy status_t duckduckgo_request_process_async(struct duckduckgo_request *req)
132*96e28a40SPulkoMandy {
133*96e28a40SPulkoMandy 	return ENOSYS;
134*96e28a40SPulkoMandy }
135*96e28a40SPulkoMandy 
duckduckgo_request_close(struct duckduckgo_request * req)136*96e28a40SPulkoMandy status_t duckduckgo_request_close(struct duckduckgo_request *req)
137*96e28a40SPulkoMandy {
138*96e28a40SPulkoMandy 	if (!req)
139*96e28a40SPulkoMandy 		return EINVAL;
140*96e28a40SPulkoMandy 	if (!req->cnx)
141*96e28a40SPulkoMandy 		return B_OK;
142*96e28a40SPulkoMandy 	delete(req->cnx);
143*96e28a40SPulkoMandy 	req->cnx = NULL;
144*96e28a40SPulkoMandy 	return B_OK;
145*96e28a40SPulkoMandy }
146*96e28a40SPulkoMandy 
duckduckgo_request_open(const char * query_string,struct fs_volume * volume,struct fs_node * query_node,struct duckduckgo_request ** req)147*96e28a40SPulkoMandy status_t duckduckgo_request_open(const char *query_string, struct fs_volume *volume, struct fs_node *query_node, struct duckduckgo_request **req)
148*96e28a40SPulkoMandy {
149*96e28a40SPulkoMandy 	struct duckduckgo_request *r;
150*96e28a40SPulkoMandy 	if (!req)
151*96e28a40SPulkoMandy 		return EINVAL;
152*96e28a40SPulkoMandy 	r = (duckduckgo_request*)malloc(sizeof(struct duckduckgo_request));
153*96e28a40SPulkoMandy 	if (!r)
154*96e28a40SPulkoMandy 		return ENOMEM;
155*96e28a40SPulkoMandy 	memset(r, 0, sizeof(struct duckduckgo_request));
156*96e28a40SPulkoMandy 	r->query_string = strdup(query_string);
157*96e28a40SPulkoMandy 	r->volume = volume;
158*96e28a40SPulkoMandy 	r->query_node = query_node;
159*96e28a40SPulkoMandy 	*req = r;
160*96e28a40SPulkoMandy 	return B_OK;
161*96e28a40SPulkoMandy }
162*96e28a40SPulkoMandy 
duckduckgo_request_free(struct duckduckgo_request * req)163*96e28a40SPulkoMandy status_t duckduckgo_request_free(struct duckduckgo_request *req)
164*96e28a40SPulkoMandy {
165*96e28a40SPulkoMandy 	if (!req)
166*96e28a40SPulkoMandy 		return EINVAL;
167*96e28a40SPulkoMandy 	free(req->query_string);
168*96e28a40SPulkoMandy 	free(req);
169*96e28a40SPulkoMandy 	return B_OK;
170*96e28a40SPulkoMandy }
171