HTTPInputStream.cpp
Go to the documentation of this file.00001 #include <sstream>
00002 #include <iostream>
00003 #include <cctype>
00004 #include <cstdlib>
00005 #include <cstring>
00006 #include <unistd.h>
00007 #include <sys/types.h>
00008 #include <sys/socket.h>
00009 #include <netdb.h>
00010 #include <netinet/in.h>
00011 #include "CS240Exception.h"
00012 #include "StringUtil.h"
00013
00014 #include "HTTPInputStream.h"
00015
00016 using namespace std;
00017
00018
00019 namespace
00020 {
00021 bool IsSuccessfulResponse(int statusCode)
00022 {
00023 static const int SC_OK = 200;
00024 static const int SC_NON_AUTHORITATIVE_INFORMATION = 203;
00025
00026 return (statusCode == SC_OK ||
00027 statusCode == SC_NON_AUTHORITATIVE_INFORMATION);
00028 }
00029
00030 bool IsRedirectResponse(int statusCode)
00031 {
00032 static const int SC_MOVED_PERMANENTLY = 301;
00033 static const int SC_FOUND = 302;
00034 static const int SC_SEE_OTHER = 303;
00035 static const int SC_TEMPORARY_REDIRECT = 307;
00036
00037 return (statusCode == SC_MOVED_PERMANENTLY ||
00038 statusCode == SC_FOUND ||
00039 statusCode == SC_SEE_OTHER ||
00040 statusCode == SC_TEMPORARY_REDIRECT);
00041 }
00042
00043 tHTTPResponse ConvertHTTPStatus (int statuscode)
00044 {
00045 if (IsSuccessfulResponse(statuscode))
00046 return kHTTPSuccess;
00047 else if (IsRedirectResponse(statuscode))
00048 return kHTTPRedirect;
00049 else
00050 return kHTTPUnknown;
00051 }
00052 }
00053
00054 HTTPInputStream::HTTPInputStream(const std::string & url)
00055 {
00056
00057
00058
00059
00060
00061
00062 const int MAX_REDIRECTS = 5;
00063
00064
00065
00066
00067 string currentURL(url);
00068 int redirects = 0;
00069
00070 bool stillRedirecting = true;
00071 while (stillRedirecting)
00072 {
00073 string reasonPhrase;
00074
00075 Init();
00076
00077 ParseURL(currentURL);
00078
00079
00080 OpenConnection();
00081
00082
00083 SendRequest();
00084
00085 tHTTPResponse response = ParseHTTPStatusLine(reasonPhrase);
00086
00087
00088 ParseHTTPHeaders();
00089
00090 switch (response)
00091 {
00092 case kHTTPSuccess:
00093 stillRedirecting = false;
00094 break;
00095
00096 case kHTTPRedirect:
00097 if (location.empty())
00098 throw NetworkException("no Location for HTTP redirect: " + reasonPhrase);
00099 if (++redirects <= MAX_REDIRECTS)
00100 {
00101 currentURL = location;
00102 Close();
00103 }
00104 else
00105 throw NetworkException("HTTP redirect limit exceeded");
00106 break;
00107
00108 case kHTTPUnknown:
00109 throw NetworkException(string("HTTP request failed: ") + reasonPhrase);
00110 break;
00111 }
00112 }
00113 location = currentURL;
00114
00115 ReadByte();
00116 }
00117
00118 void HTTPInputStream::Init() {
00119 host = "";
00120 port = 80;
00121 path = "/";
00122 sockfd = -1;
00123 contentLength = -1;
00124 location = "";
00125 numRead = 0;
00126 done = false;
00127 nextByte = 0;
00128 }
00129
00130 HTTPInputStream::~HTTPInputStream()
00131 {
00132 Close();
00133 }
00134
00135 bool HTTPInputStream::IsOpen() const
00136 {
00137 return (0 <= sockfd);
00138 }
00139
00140 bool HTTPInputStream::IsDone() const
00141 {
00142 return done;
00143 }
00144
00145 void HTTPInputStream::ReadByte()
00146 {
00147 char c;
00148 int nread = read(sockfd, &c, 1);
00149 if (nread == 1)
00150 {
00151 ++numRead;
00152 nextByte = c;
00153 }
00154 else if (nread == 0)
00155 {
00156
00157 done = true;
00158 nextByte = 0;
00159 if (0 <= contentLength && numRead != contentLength)
00160 {
00161 throw NetworkException("number of bytes read differs from content length");
00162 }
00163 }
00164 else
00165 {
00166 throw NetworkException("error occurred reading HTTP response");
00167 }
00168 }
00169
00170 char HTTPInputStream::Peek()
00171 {
00172 if (!IsOpen())
00173 throw IllegalStateException("stream is not open");
00174 else if (IsDone())
00175 throw IllegalStateException("stream is done");
00176 else
00177 {
00178 return nextByte;
00179
00180 }
00181 }
00182
00183 char HTTPInputStream::Read()
00184 {
00185 if (!IsOpen())
00186 throw IllegalStateException("stream is not open");
00187 else if (IsDone())
00188 throw IllegalStateException("stream is done");
00189 else
00190 {
00191 char c = nextByte;
00192 ReadByte();
00193 return c;
00194 }
00195 }
00196
00197
00198 void HTTPInputStream::Close()
00199 {
00200 if (IsOpen())
00201 {
00202 close(sockfd);
00203 sockfd = -1;
00204 }
00205 }
00206
00207 void HTTPInputStream::ParseURL(const std::string & url)
00208 {
00209 const string prefix = "http://";
00210
00211 string::const_iterator p = url.begin() + prefix.length();
00212
00213 for (; p != url.end() && *p != ':' && *p != '/'; ++p)
00214 host.push_back(*p);
00215
00216 if (host.empty())
00217 throw InvalidURLException(url);
00218 else if (p == url.end())
00219 return;
00220
00221 if (*p == ':')
00222 {
00223 ++p;
00224 string portStr = "";
00225 for (; p != url.end() && isdigit(*p); ++p)
00226 portStr.push_back(*p);
00227 port = atoi(portStr.c_str());
00228 }
00229
00230 if (p == url.end())
00231 return;
00232 else if (*p != '/')
00233 throw InvalidURLException(url);
00234
00235 int idx = p - url.begin();
00236 path = url.substr(idx);
00237 }
00238
00239 void HTTPInputStream::OpenConnection()
00240 {
00241 struct hostent *hostData = gethostbyname(host.c_str());
00242 if (hostData == NULL || hostData->h_addr == NULL)
00243 {
00244 throw NetworkException(string("could not resolve host name ") + host);
00245 }
00246
00247 struct sockaddr_in hostAddr;
00248 bzero(&hostAddr, sizeof(hostAddr));
00249 hostAddr.sin_family = AF_INET;
00250 hostAddr.sin_port = htons(port);
00251 memcpy(&hostAddr.sin_addr, hostData->h_addr, hostData->h_length);
00252
00253 int s = socket(AF_INET, SOCK_STREAM, 0);
00254 if (s < 0)
00255 {
00256 throw NetworkException("could not create socket");
00257 }
00258
00259 if (connect(s, (struct sockaddr *)&hostAddr, sizeof(hostAddr)) < 0)
00260 {
00261 close(s);
00262 throw NetworkException(string("could not connect to host ") + host);
00263 }
00264
00265 sockfd = s;
00266 }
00267
00268 void HTTPInputStream::SendRequest()
00269 {
00270
00271 ostringstream request;
00272 request << "GET " << path;
00273 request << " HTTP/1.0\r\n";
00274 request << "Host: " << host << ":" << port << "\r\n\r\n";
00275
00276 string completeRequest = request.str();
00277
00278 if (write(sockfd, completeRequest.c_str(), completeRequest.length()) != (int)completeRequest.length())
00279 throw NetworkException("could not send HTTP request");
00280 }
00281
00282 tHTTPResponse HTTPInputStream::ParseHTTPStatusLine(std::string & reasonPhrase)
00283 {
00284 string line;
00285 ReadHeaderLine(line);
00286
00287 unsigned int firstSpacePos = line.find(' ', 0);
00288 if (firstSpacePos == string::npos)
00289 throw NetworkException(string("invalid HTTP status line: ") + line);
00290
00291 unsigned int secondSpacePos = line.find(' ', firstSpacePos + 1);
00292 if (secondSpacePos == string::npos)
00293 throw NetworkException(string("invalid HTTP status line: ") + line);
00294
00295 unsigned int statusCodePos = firstSpacePos + 1;
00296 string statusCodeStr = line.substr(statusCodePos, (secondSpacePos - statusCodePos));
00297 int statusCode = atoi(statusCodeStr.c_str());
00298 if (statusCode < 100)
00299 throw NetworkException(string("invalid HTTP status line: ") + line);
00300
00301 unsigned int reasonPhrasePos = secondSpacePos + 1;
00302 reasonPhrase = line.substr(reasonPhrasePos);
00303
00304 return ConvertHTTPStatus (statusCode);
00305 }
00306
00307 void HTTPInputStream::ParseHTTPHeaders()
00308 {
00309 while (true)
00310 {
00311 string line;
00312 ReadHeaderLine(line);
00313
00314 if (line.empty())
00315 break;
00316 else if (line.find("Content-Length:") == 0)
00317 ParseContentLength(line);
00318 else if (line.find("Location:") == 0)
00319 ParseLocation(line);
00320 }
00321 }
00322
00323 void HTTPInputStream::ParseContentLength(const std::string & line)
00324 {
00325 contentLength = -1;
00326
00327 const string prefix = "Content-Length:";
00328
00329 string::const_iterator p = line.begin() + prefix.length();
00330 for (; p != line.end() && isspace(*p); ++p)
00331 ;
00332 if (p == line.end())
00333 throw NetworkException(string("invalid HTTP content length header: ") + line);
00334
00335 string length;
00336 for (; p != line.end() && isdigit(*p); ++p)
00337 length.push_back(*p);
00338 if (p != line.end())
00339 throw NetworkException(string("invalid HTTP content length header: ") + line);
00340
00341 contentLength = atoi(length.c_str());
00342 }
00343
00344 void HTTPInputStream::ParseLocation(const std::string & line)
00345 {
00346 const string prefix = "Location:";
00347
00348 string::const_iterator p = line.begin() + prefix.length();
00349 for (; p != line.end() && isspace(*p); ++p)
00350 ;
00351 if (p == line.end())
00352 throw NetworkException(string("invalid HTTP location header: ") + line);
00353
00354 location = "";
00355 std::copy(p, line.end(), std::back_inserter(location));
00356
00357 if (!StringUtil::IsPrefix(location, "http:"))
00358 throw NetworkException(string("Unsupported redirect location: ") + location);
00359 }
00360
00361 void HTTPInputStream::ReadHeaderLine(std::string & line)
00362 {
00363
00364
00365
00366 while (true)
00367 {
00368 char c;
00369 int nread = read(sockfd, &c, 1);
00370 if (nread == 1)
00371 {
00372 switch (c)
00373 {
00374 case '\r':
00375
00376 break;
00377
00378 case '\n':
00379
00380 return;
00381
00382 default:
00383 line.push_back(c);
00384 break;
00385 }
00386 }
00387 else
00388 throw NetworkException("invalid HTTP header");
00389 }
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422 }
00423
00424 void HTTPInputStream::DumpResponse()
00425 {
00426 while (!IsDone()) {
00427 char c = Read();
00428 cout << (char)c;
00429 }
00430 }
00431