HTTPInputStream.cpp

Go to the documentation of this file.
00001 #include <sstream>
00002 #include <iostream>
00003 #include <cctype>
00004 #include <cstdlib>
00005 #include <cstring>
00006 #include <unistd.h>
00007 #include <sys/types.h>
00008 #include <sys/socket.h>
00009 #include <netdb.h>
00010 #include <netinet/in.h>
00011 #include "CS240Exception.h"
00012 #include "StringUtil.h"
00013 
00014 #include "HTTPInputStream.h"
00015 
00016 using namespace std;
00017 
00018 
00019 namespace
00020 {
00021     bool IsSuccessfulResponse(int statusCode)
00022     {
00023         static const int SC_OK = 200;
00024         static const int SC_NON_AUTHORITATIVE_INFORMATION = 203;
00025     
00026         return (statusCode == SC_OK ||
00027             statusCode == SC_NON_AUTHORITATIVE_INFORMATION);
00028     }
00029 
00030     bool IsRedirectResponse(int statusCode)
00031     {
00032         static const int SC_MOVED_PERMANENTLY = 301;
00033         static const int SC_FOUND = 302;
00034         static const int SC_SEE_OTHER = 303;
00035         static const int SC_TEMPORARY_REDIRECT = 307;
00036     
00037         return (statusCode == SC_MOVED_PERMANENTLY ||
00038                 statusCode == SC_FOUND ||
00039                 statusCode == SC_SEE_OTHER ||
00040                 statusCode == SC_TEMPORARY_REDIRECT);
00041     }
00042 
00043     tHTTPResponse ConvertHTTPStatus (int statuscode)
00044     {
00045         if (IsSuccessfulResponse(statuscode))
00046             return kHTTPSuccess;
00047         else if (IsRedirectResponse(statuscode))
00048             return kHTTPRedirect;
00049         else
00050             return kHTTPUnknown;
00051     }
00052 }
00053 
00054 HTTPInputStream::HTTPInputStream(const std::string & url)
00055 {
00056     
00057     // NOTE: Good test cases for redirect handling:
00058     //          http://www.visio.com
00059     //          http://www.byu.edu
00060     //          http://www.utahjazz.com
00061 
00062     const int MAX_REDIRECTS = 5;
00063 
00064     // following HTTP redirects requires us to connect more than once,
00065     // thus the need for the loop below
00066 
00067     string currentURL(url);
00068     int redirects = 0;
00069 
00070     bool stillRedirecting = true;
00071     while (stillRedirecting)
00072     {
00073         string reasonPhrase;
00074 
00075         Init();
00076 
00077         ParseURL(currentURL);
00078         //***cout << "[host=" << host << ", port=" << port << ", path=" << path << "]" << endl;
00079     
00080         OpenConnection();
00081         //***cout << "[connection established]" << endl;
00082     
00083         SendRequest();
00084         
00085         tHTTPResponse response = ParseHTTPStatusLine(reasonPhrase);
00086         //cout << "[status-code=" << statusCode << ", reason-phrase=" << reasonPhrase << "]" << endl;
00087         
00088         ParseHTTPHeaders();
00089 
00090         switch (response)
00091         {
00092             case kHTTPSuccess:
00093                 stillRedirecting = false;
00094                 break;
00095 
00096             case kHTTPRedirect:
00097                 if (location.empty())
00098                     throw NetworkException("no Location for HTTP redirect: " + reasonPhrase);
00099                 if (++redirects <= MAX_REDIRECTS)
00100                 {
00101                     currentURL = location;
00102                     Close();
00103                 }
00104                 else
00105                     throw NetworkException("HTTP redirect limit exceeded");
00106                 break;
00107 
00108             case kHTTPUnknown:
00109                 throw NetworkException(string("HTTP request failed: ") + reasonPhrase);
00110                 break;
00111         }
00112     }
00113     location = currentURL;
00114 
00115     ReadByte(); 
00116 }
00117 
00118 void HTTPInputStream::Init() {
00119     host = "";
00120     port = 80;
00121     path = "/";
00122     sockfd = -1;
00123     contentLength = -1;
00124     location = "";
00125     numRead = 0;
00126     done = false;
00127     nextByte = 0;   
00128 }
00129 
00130 HTTPInputStream::~HTTPInputStream()
00131 {
00132     Close();
00133 }
00134 
00135 bool HTTPInputStream::IsOpen() const
00136 {
00137     return (0 <= sockfd);
00138 }
00139 
00140 bool HTTPInputStream::IsDone() const
00141 {
00142     return done;
00143 }
00144 
00145 void HTTPInputStream::ReadByte()
00146 {
00147     char c;
00148     int nread = read(sockfd, &c, 1);
00149     if (nread == 1)
00150     {
00151         ++numRead;
00152         nextByte = c;
00153     }
00154     else if (nread == 0)
00155     {
00156         //***cout << "[end of stream - contentLength: " << contentLength << ", numRead: " << numRead << "]" << endl;
00157         done = true;
00158         nextByte = 0;
00159         if (0 <= contentLength && numRead != contentLength)
00160         {
00161             throw NetworkException("number of bytes read differs from content length");
00162         }
00163     }
00164     else
00165     {
00166         throw NetworkException("error occurred reading HTTP response");
00167     }
00168 }
00169 
00170 char HTTPInputStream::Peek()
00171 {
00172     if (!IsOpen())
00173         throw IllegalStateException("stream is not open");
00174     else if (IsDone())
00175         throw IllegalStateException("stream is done");
00176     else
00177     {
00178         return nextByte;
00179              
00180     }
00181 }
00182 
00183 char HTTPInputStream::Read()
00184 {
00185     if (!IsOpen())
00186         throw IllegalStateException("stream is not open");
00187     else if (IsDone())
00188         throw IllegalStateException("stream is done");
00189     else
00190     {
00191         char c = nextByte;
00192         ReadByte();
00193         return c;    
00194     }
00195 }
00196 
00197 
00198 void HTTPInputStream::Close()
00199 {
00200     if (IsOpen())
00201     {
00202         close(sockfd);
00203         sockfd = -1;
00204     }
00205 }
00206 
00207 void HTTPInputStream::ParseURL(const std::string & url)
00208 {
00209     const string prefix = "http://";
00210 
00211     string::const_iterator p = url.begin() + prefix.length();
00212 
00213     for (; p != url.end() && *p != ':' && *p != '/'; ++p)
00214         host.push_back(*p);
00215 
00216     if (host.empty())
00217         throw InvalidURLException(url);
00218     else if (p == url.end())
00219         return;
00220 
00221     if (*p == ':')
00222     {
00223         ++p;
00224         string portStr = "";
00225         for (; p != url.end() && isdigit(*p); ++p)
00226             portStr.push_back(*p);
00227         port = atoi(portStr.c_str());
00228     }
00229 
00230     if (p == url.end())
00231         return;
00232     else if (*p != '/')
00233         throw InvalidURLException(url);
00234 
00235     int idx = p - url.begin();
00236     path = url.substr(idx); 
00237 }
00238 
00239 void HTTPInputStream::OpenConnection()
00240 {
00241     struct hostent *hostData = gethostbyname(host.c_str());
00242     if (hostData == NULL || hostData->h_addr == NULL)
00243     {
00244         throw NetworkException(string("could not resolve host name ") + host);
00245     }
00246     
00247     struct sockaddr_in hostAddr;
00248     bzero(&hostAddr, sizeof(hostAddr));
00249     hostAddr.sin_family = AF_INET;
00250     hostAddr.sin_port = htons(port);
00251     memcpy(&hostAddr.sin_addr, hostData->h_addr, hostData->h_length);
00252 
00253     int s = socket(AF_INET, SOCK_STREAM, 0);
00254     if (s < 0)
00255     {
00256         throw NetworkException("could not create socket");
00257     }
00258 
00259     if (connect(s, (struct sockaddr *)&hostAddr, sizeof(hostAddr)) < 0)
00260     {
00261         close(s);
00262         throw NetworkException(string("could not connect to host ") + host);
00263     }
00264 
00265     sockfd = s;
00266 }
00267 
00268 void HTTPInputStream::SendRequest()
00269 {
00270     
00271     ostringstream request;
00272     request << "GET " << path;
00273     request << " HTTP/1.0\r\n";
00274     request << "Host: " << host << ":" << port << "\r\n\r\n";
00275     
00276     string completeRequest = request.str();
00277 
00278     if (write(sockfd, completeRequest.c_str(), completeRequest.length()) != (int)completeRequest.length())
00279         throw NetworkException("could not send HTTP request");
00280 }
00281 
00282 tHTTPResponse HTTPInputStream::ParseHTTPStatusLine(std::string & reasonPhrase)
00283 {
00284     string line;
00285     ReadHeaderLine(line);
00286 
00287     unsigned int firstSpacePos = line.find(' ', 0);
00288     if (firstSpacePos == string::npos)
00289         throw NetworkException(string("invalid HTTP status line: ") + line);
00290 
00291     unsigned int secondSpacePos = line.find(' ', firstSpacePos + 1);
00292     if (secondSpacePos == string::npos)
00293         throw NetworkException(string("invalid HTTP status line: ") + line);
00294 
00295     unsigned int statusCodePos = firstSpacePos + 1;
00296     string statusCodeStr = line.substr(statusCodePos, (secondSpacePos - statusCodePos)); 
00297     int statusCode = atoi(statusCodeStr.c_str());
00298     if (statusCode < 100)
00299         throw NetworkException(string("invalid HTTP status line: ") + line);
00300 
00301     unsigned int reasonPhrasePos = secondSpacePos + 1;
00302     reasonPhrase = line.substr(reasonPhrasePos);
00303 
00304     return ConvertHTTPStatus (statusCode);
00305 }
00306 
00307 void HTTPInputStream::ParseHTTPHeaders()
00308 {
00309     while (true)
00310     {
00311         string line;
00312         ReadHeaderLine(line);
00313 
00314         if (line.empty())
00315             break;
00316         else if (line.find("Content-Length:") == 0)
00317             ParseContentLength(line);
00318         else if (line.find("Location:") == 0)
00319             ParseLocation(line);
00320     }
00321 }
00322 
00323 void HTTPInputStream::ParseContentLength(const std::string & line)
00324 {
00325     contentLength = -1;
00326 
00327     const string prefix = "Content-Length:";
00328 
00329     string::const_iterator p = line.begin() + prefix.length();
00330     for (; p != line.end() && isspace(*p); ++p)
00331         ;
00332     if (p == line.end())
00333         throw NetworkException(string("invalid HTTP content length header: ") + line);
00334 
00335     string length;
00336     for (; p != line.end() && isdigit(*p); ++p)
00337         length.push_back(*p);
00338     if (p != line.end())
00339         throw NetworkException(string("invalid HTTP content length header: ") + line);
00340 
00341     contentLength = atoi(length.c_str());
00342 }
00343 
00344 void HTTPInputStream::ParseLocation(const std::string & line)
00345 {
00346     const string prefix = "Location:";
00347 
00348     string::const_iterator p = line.begin() + prefix.length();
00349     for (; p != line.end() && isspace(*p); ++p)
00350         ;
00351     if (p == line.end())
00352         throw NetworkException(string("invalid HTTP location header: ") + line);
00353 
00354     location = "";
00355     std::copy(p, line.end(), std::back_inserter(location));
00356 
00357     if (!StringUtil::IsPrefix(location, "http:"))
00358         throw NetworkException(string("Unsupported redirect location: ") + location);
00359 }
00360 
00361 void HTTPInputStream::ReadHeaderLine(std::string & line)
00362 {
00363 
00364     /*** temporary proxy server bug work-around ***/
00365 
00366     while (true)
00367     {
00368         char c;
00369         int nread = read(sockfd, &c, 1);
00370         if (nread == 1)
00371         {
00372             switch (c)
00373             {
00374                 case '\r':
00375                     // skip carriage returns
00376                     break;
00377 
00378                 case '\n':
00379                     // line feed indicates end of header
00380                     return;
00381 
00382                 default:
00383                     line.push_back(c);
00384                     break;
00385             }
00386         }
00387         else
00388             throw NetworkException("invalid HTTP header");
00389     }
00390 
00391     /*** this is the real code ***/
00392 
00393     /***
00394     bool gotCR = false;
00395     while (true) {
00396         char c;
00397         int nread = read(sockfd, &c, 1);
00398         if (nread == 1) {
00399             if (gotCR) {
00400     if (c == '\n') {
00401         return;
00402     }
00403     else {
00404         throw NetworkException("invalid HTTP header");
00405     }
00406             }
00407             else if (c == '\r') {
00408     gotCR = true;
00409             }
00410             else if (c == '\n') {
00411     throw NetworkException("invalid HTTP header");  
00412             }
00413             else {
00414     line.push_back(c);
00415             }
00416         }
00417         else {
00418             throw NetworkException("invalid HTTP header");
00419         }
00420     }
00421     ***/
00422 }
00423 
00424 void HTTPInputStream::DumpResponse()
00425 {
00426     while (!IsDone()) {
00427         char c = Read();
00428         cout << (char)c;
00429     }
00430 }
00431 

Generated on Wed Jul 7 16:30:27 2010 for CS240Utils by  doxygen 1.5.8