1*96e28a40SPulkoMandy /*
2*96e28a40SPulkoMandy * Copyright 2004-2008, François Revol, <revol@free.fr>.
3*96e28a40SPulkoMandy * Distributed under the terms of the MIT License.
4*96e28a40SPulkoMandy */
5*96e28a40SPulkoMandy
6*96e28a40SPulkoMandy #include <OS.h>
7*96e28a40SPulkoMandy #include <KernelExport.h>
8*96e28a40SPulkoMandy #include <stdio.h>
9*96e28a40SPulkoMandy #include <stdlib.h>
10*96e28a40SPulkoMandy #include <string.h>
11*96e28a40SPulkoMandy #include <netinet/in.h>
12*96e28a40SPulkoMandy #include <arpa/inet.h>
13*96e28a40SPulkoMandy #include <malloc.h>
14*96e28a40SPulkoMandy #include <sys/socket.h>
15*96e28a40SPulkoMandy #include "duckduckgo_request.h"
16*96e28a40SPulkoMandy
17*96e28a40SPulkoMandy #include "websearchfs.h"
18*96e28a40SPulkoMandy #include "lists2.h"
19*96e28a40SPulkoMandy #include "settings.h"
20*96e28a40SPulkoMandy #include "string_utils.h"
21*96e28a40SPulkoMandy
22*96e28a40SPulkoMandy #include <UrlProtocolRoster.h>
23*96e28a40SPulkoMandy #include <UrlRequest.h>
24*96e28a40SPulkoMandy
25*96e28a40SPulkoMandy using namespace BPrivate::Network;
26*96e28a40SPulkoMandy
27*96e28a40SPulkoMandy #define DO_PUBLISH
28*96e28a40SPulkoMandy //#define FAKE_INPUT "/boot/home/devel/drivers/websearchfs/log2.html"
29*96e28a40SPulkoMandy
30*96e28a40SPulkoMandy #define TESTURL "http://www.duckduckgo.com/search?hl=en&ie=UTF-8&num=50&q=beos"
31*96e28a40SPulkoMandy #define BASEURL "https://html.duckduckgo.com/html/?kd=-1"
32*96e28a40SPulkoMandy // kd=-1 disables redirection of all URLs through duckduckgo servers
33*96e28a40SPulkoMandy #define FMT_NUM "&num=%u"
34*96e28a40SPulkoMandy // TODO remove this, duckduckgo does not have this option
35*96e28a40SPulkoMandy #define FMT_Q "&q=%s"
36*96e28a40SPulkoMandy
37*96e28a40SPulkoMandy /* parse_duckduckgo_html.c */
38*96e28a40SPulkoMandy extern int duckduckgo_parse_results(const char *html, size_t htmlsize, long *nextid, struct duckduckgo_result **results);
39*96e28a40SPulkoMandy
40*96e28a40SPulkoMandy
duckduckgo_request_process(struct duckduckgo_request * req)41*96e28a40SPulkoMandy status_t duckduckgo_request_process(struct duckduckgo_request *req)
42*96e28a40SPulkoMandy {
43*96e28a40SPulkoMandy struct BUrlRequest *cnx = NULL;
44*96e28a40SPulkoMandy struct duckduckgo_result *res;
45*96e28a40SPulkoMandy status_t err;
46*96e28a40SPulkoMandy int count;
47*96e28a40SPulkoMandy char *p = NULL;
48*96e28a40SPulkoMandy char *url = NULL;
49*96e28a40SPulkoMandy BMallocIO output;
50*96e28a40SPulkoMandy thread_id t;
51*96e28a40SPulkoMandy
52*96e28a40SPulkoMandy err = ENOMEM;
53*96e28a40SPulkoMandy req->cnx = cnx;
54*96e28a40SPulkoMandy #ifndef FAKE_INPUT
55*96e28a40SPulkoMandy p = urlify_string(req->query_string);
56*96e28a40SPulkoMandy if (!p)
57*96e28a40SPulkoMandy goto err_con;
58*96e28a40SPulkoMandy
59*96e28a40SPulkoMandy err = ENOMEM;
60*96e28a40SPulkoMandy url = (char*)malloc(strlen(BASEURL)+strlen(FMT_NUM)+10+strlen(FMT_Q)+strlen(p)+2);
61*96e28a40SPulkoMandy if (!url)
62*96e28a40SPulkoMandy goto err_url;
63*96e28a40SPulkoMandy strcpy(url, BASEURL);
64*96e28a40SPulkoMandy sprintf(url+strlen(url), FMT_NUM, (unsigned int)max_results);
65*96e28a40SPulkoMandy sprintf(url+strlen(url), FMT_Q, p);
66*96e28a40SPulkoMandy
67*96e28a40SPulkoMandy fprintf(stderr, "duckduckgo_request: final URL: %s\n", url);
68*96e28a40SPulkoMandy
69*96e28a40SPulkoMandy cnx = BUrlProtocolRoster::MakeRequest(url, &output, NULL);
70*96e28a40SPulkoMandy if (cnx == NULL)
71*96e28a40SPulkoMandy return ENOMEM;
72*96e28a40SPulkoMandy
73*96e28a40SPulkoMandy t = cnx->Run();
74*96e28a40SPulkoMandy wait_for_thread(t, &err);
75*96e28a40SPulkoMandy
76*96e28a40SPulkoMandy fprintf(stderr, "duckduckgo_request: buffer @ %p, len %ld\n", output.Buffer(), output.BufferLength());
77*96e28a40SPulkoMandy {
78*96e28a40SPulkoMandy int fd;
79*96e28a40SPulkoMandy // debug output
80*96e28a40SPulkoMandy fd = open("/tmp/duckduckgo.html", O_CREAT|O_TRUNC|O_RDWR, 0644);
81*96e28a40SPulkoMandy write(fd, output.Buffer(), output.BufferLength());
82*96e28a40SPulkoMandy close(fd);
83*96e28a40SPulkoMandy }
84*96e28a40SPulkoMandy #else
85*96e28a40SPulkoMandy {
86*96e28a40SPulkoMandy int fd;
87*96e28a40SPulkoMandy struct stat st;
88*96e28a40SPulkoMandy // debug output
89*96e28a40SPulkoMandy fd = open(FAKE_INPUT, O_RDONLY, 0644);
90*96e28a40SPulkoMandy if (fd < 0)
91*96e28a40SPulkoMandy return -1;
92*96e28a40SPulkoMandy if (fstat(fd, &st) < 0) {
93*96e28a40SPulkoMandy close(fd);
94*96e28a40SPulkoMandy return -1;
95*96e28a40SPulkoMandy }
96*96e28a40SPulkoMandy cnx->datalen = st.st_size;
97*96e28a40SPulkoMandy cnx->data = malloc(cnx->datalen);
98*96e28a40SPulkoMandy if (!cnx->data)
99*96e28a40SPulkoMandy return ENOMEM;
100*96e28a40SPulkoMandy if (read(fd, cnx->data, cnx->datalen) < cnx->datalen)
101*96e28a40SPulkoMandy return -1;
102*96e28a40SPulkoMandy close(fd);
103*96e28a40SPulkoMandy }
104*96e28a40SPulkoMandy #endif /* FAKE_INPUT */
105*96e28a40SPulkoMandy err = count = duckduckgo_parse_results((const char*)output.Buffer(), output.BufferLength(),
106*96e28a40SPulkoMandy &req->nextid, &req->results);
107*96e28a40SPulkoMandy if (err < 0)
108*96e28a40SPulkoMandy goto err_get;
109*96e28a40SPulkoMandy #ifdef DO_PUBLISH
110*96e28a40SPulkoMandy while ((res = SLL_DEQUEUE(req->results, next))) {
111*96e28a40SPulkoMandy res->next = NULL;
112*96e28a40SPulkoMandy websearchfs_push_result_to_query(req, res);
113*96e28a40SPulkoMandy }
114*96e28a40SPulkoMandy #endif
115*96e28a40SPulkoMandy free(url);
116*96e28a40SPulkoMandy free(p);
117*96e28a40SPulkoMandy // request is kept and deleted in duckduckgo_request_close
118*96e28a40SPulkoMandy return B_OK;
119*96e28a40SPulkoMandy
120*96e28a40SPulkoMandy
121*96e28a40SPulkoMandy err_get:
122*96e28a40SPulkoMandy free(url);
123*96e28a40SPulkoMandy err_url:
124*96e28a40SPulkoMandy free(p);
125*96e28a40SPulkoMandy err_con:
126*96e28a40SPulkoMandy delete cnx;
127*96e28a40SPulkoMandy req->cnx = NULL;
128*96e28a40SPulkoMandy return err;
129*96e28a40SPulkoMandy }
130*96e28a40SPulkoMandy
duckduckgo_request_process_async(struct duckduckgo_request * req)131*96e28a40SPulkoMandy status_t duckduckgo_request_process_async(struct duckduckgo_request *req)
132*96e28a40SPulkoMandy {
133*96e28a40SPulkoMandy return ENOSYS;
134*96e28a40SPulkoMandy }
135*96e28a40SPulkoMandy
duckduckgo_request_close(struct duckduckgo_request * req)136*96e28a40SPulkoMandy status_t duckduckgo_request_close(struct duckduckgo_request *req)
137*96e28a40SPulkoMandy {
138*96e28a40SPulkoMandy if (!req)
139*96e28a40SPulkoMandy return EINVAL;
140*96e28a40SPulkoMandy if (!req->cnx)
141*96e28a40SPulkoMandy return B_OK;
142*96e28a40SPulkoMandy delete(req->cnx);
143*96e28a40SPulkoMandy req->cnx = NULL;
144*96e28a40SPulkoMandy return B_OK;
145*96e28a40SPulkoMandy }
146*96e28a40SPulkoMandy
duckduckgo_request_open(const char * query_string,struct fs_volume * volume,struct fs_node * query_node,struct duckduckgo_request ** req)147*96e28a40SPulkoMandy status_t duckduckgo_request_open(const char *query_string, struct fs_volume *volume, struct fs_node *query_node, struct duckduckgo_request **req)
148*96e28a40SPulkoMandy {
149*96e28a40SPulkoMandy struct duckduckgo_request *r;
150*96e28a40SPulkoMandy if (!req)
151*96e28a40SPulkoMandy return EINVAL;
152*96e28a40SPulkoMandy r = (duckduckgo_request*)malloc(sizeof(struct duckduckgo_request));
153*96e28a40SPulkoMandy if (!r)
154*96e28a40SPulkoMandy return ENOMEM;
155*96e28a40SPulkoMandy memset(r, 0, sizeof(struct duckduckgo_request));
156*96e28a40SPulkoMandy r->query_string = strdup(query_string);
157*96e28a40SPulkoMandy r->volume = volume;
158*96e28a40SPulkoMandy r->query_node = query_node;
159*96e28a40SPulkoMandy *req = r;
160*96e28a40SPulkoMandy return B_OK;
161*96e28a40SPulkoMandy }
162*96e28a40SPulkoMandy
duckduckgo_request_free(struct duckduckgo_request * req)163*96e28a40SPulkoMandy status_t duckduckgo_request_free(struct duckduckgo_request *req)
164*96e28a40SPulkoMandy {
165*96e28a40SPulkoMandy if (!req)
166*96e28a40SPulkoMandy return EINVAL;
167*96e28a40SPulkoMandy free(req->query_string);
168*96e28a40SPulkoMandy free(req);
169*96e28a40SPulkoMandy return B_OK;
170*96e28a40SPulkoMandy }
171