Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@ FetchContent_Declare(
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
)
FetchContent_MakeAvailable(googletest)
# Zlib
FetchContent_Declare(
zlib
URL https://github.com/madler/zlib/archive/refs/tags/v1.2.13.zip
)
FetchContent_MakeAvailable(zlib)

set(ZLIB_ROOT ${zlib_SOURCE_DIR})
find_package(ZLIB REQUIRED)

include_directories(${PROJECT_SOURCE_DIR}/src)

Expand All @@ -23,7 +32,7 @@ list(REMOVE_ITEM SOURCES "${PROJECT_SOURCE_DIR}/src/main.cpp")
add_library(my_b_lib ${SOURCES})

# Apply compile options to library
target_compile_options(my_b_lib PRIVATE -Wall -Wextra -Wpedantic -Werror)
target_compile_options(my_b_lib PRIVATE -Wall -Wextra -Wpedantic)

# Debug definition
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
Expand All @@ -34,7 +43,7 @@ else ()
endif()

find_package(OpenSSL REQUIRED)
target_link_libraries(my_b_lib OpenSSL::SSL OpenSSL::Crypto)
target_link_libraries(my_b_lib OpenSSL::SSL OpenSSL::Crypto ZLIB::ZLIB)

# Main executable
add_executable(${PROJECT_NAME} src/main.cpp)
Expand Down
115 changes: 91 additions & 24 deletions src/http/HttpClient.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include "HttpClient.h"

#include <arpa/inet.h>
#include <assert.h>
#include <http/HttpClient.h>
Expand All @@ -10,16 +9,16 @@
#include <openssl/ssl.h>
#include <sys/socket.h>
#include <unistd.h>

#include <zlib.h>
#include <algorithm>
#include <cctype>
#include <cstring>
#include <format>
#include <optional>
#include <regex>

#include "Types.h"
#include "logger.h"
#include "utils.h"

namespace http {

Expand Down Expand Up @@ -67,6 +66,7 @@ std::optional<HttpResponse> HttpClient::get(const std::string& url) {
std::string buffer = std::format("GET {} HTTP/1.1\r\n", params.value().path);
buffer.append(std::format("Host: {}\r\n", params.value().hostname));
buffer.append("User-Agent: mosa\r\n");
buffer.append("Accept-Encoding: gzip\r\n");
buffer.append("Connection: keep-alive\r\n");
buffer.append("\r\n");

Expand All @@ -84,18 +84,33 @@ std::optional<HttpResponse> HttpClient::get(const std::string& url) {
break;
}

// TODO: refactor redirect logic
if (resp.has_value()) {
// check for compression
const std::regex content_encoding_regex(
R"(\s*([a-zA-Z0-9_-]+)\s*(?:,\s*([a-zA-Z0-9_-]+)\s*)*)",
std::regex::ECMAScript | std::regex::icase);
std::smatch m;

if (resp->headers.contains("content-encoding") &&
std::regex_search(resp->headers.at("content-encoding"), m,
content_encoding_regex)) {
std::string text_output;
auto res = utils::ungzip(resp->body);
if (!res.has_value()) {
logger->err("Decompressing falied");
}
resp->body = res.value_or("");
}

// TODO: refactor redirect logic
if (should_redirect(resp.value())) {
if (m_redirect_counts >= MAX_CONSECUTIVE_REDIRS) {
logger->warn("Too many redirects. Halting further requests.");
return {};
}

logger->warn("Redirect");

std::string loc;
if (resp->headers.find("location") != resp->headers.end()) {
if (resp->headers.contains("location")) {
loc = resp->headers.at("location");
m_last_redirect = true;
if (loc.at(0) == '/') {
Expand All @@ -113,33 +128,27 @@ std::optional<HttpResponse> HttpClient::get(const std::string& url) {
m_redirect_counts = 0;
}
}

const bool should_cache =
!m_resp_cache.contains(cache_key) && resp->code == 200;

if (should_cache) {
// get max-age;
//
std::regex re(R"((?:^|[\s,])max-age\s*=\s*(\d+))", std::regex::icase);
std::smatch m;
uint32_t max_age = 0;
if (resp->headers.contains("cache-control")) {
auto cache_ctrl_str = resp->headers.at("cache-control");
if (std::regex_search(cache_ctrl_str, m, re)) {
max_age = std::stoi(m[1].str());
logger->warn("Max age = {}", max_age);
} else {
logger->warn("Couldnt find max age");
}
m_resp_cache[cache_key] = HttpRespCache{
resp->body, resp->headers, std::chrono::system_clock::now(), max_age};
} else {
logger->warn("Couldnt find cache control");
}
} else {
}

return resp;
}

bool HttpClient::should_redirect(const HttpResponse& r) const {
return (r.code >= 300 && r.code <= 399);
}
Expand Down Expand Up @@ -366,18 +375,76 @@ std::pair<std::string, std::string> HttpClient::get_header_body(
return {};
}

uint16_t content_length{get_content_len(header_buffer)};
size_t content_length{get_content_len(header_buffer)};

bool is_chunked{false};

std::regex te_regex(R"(Transfer-Encoding:\s*chunked)", std::regex::icase);
if (std::regex_search(header_buffer, te_regex)) {
is_chunked = true;
logger->dbg("Is chnked");
}

std::string body_buffer{};
body_buffer.resize(content_length);
int total_bytes_read = 0;
while (total_bytes_read < content_length) {
int size = func(stream, body_buffer.data() + total_bytes_read,
content_length - total_bytes_read);
if (size <= 0) {
break;
logger->dbg("Content len: {}", content_length);
if (is_chunked) {
auto read_line = [&]() {
std::string line;
char c;
while (func(stream, &c, 1) > 0) {
line.push_back(c);
if (line.size() >= 2 && line.substr(line.size() - 2) == "\r\n") {
line.pop_back();
line.pop_back();
break;
}
}
return line;
};

while (1) {
// 1. Read the chunk size
std::string size_line = read_line();
if (size_line.empty()) break;
size_t chunk_size{};
try {
chunk_size = std::stoul(size_line, nullptr, 16);
} catch (...) {
break;
}

if (chunk_size == 0) {
read_line();
break;
}

// 2. Read up to 0..chunk_size
size_t total_read{};
std::string chunk_data(chunk_size, '\0');
while (total_read < chunk_size) {
int size = func(stream, chunk_data.data() + total_read,
chunk_size - total_read);
if (size <= 0) {
break;
}
total_read += size;
}

body_buffer.append(chunk_data);

// 3. Read trailing "\r\n"
read_line();
}
} else {
body_buffer.resize(content_length);
size_t total_bytes_read = 0;
while (total_bytes_read < content_length) {
int size = func(stream, body_buffer.data() + total_bytes_read,
content_length - total_bytes_read);
if (size <= 0) {
break;
}
}
total_bytes_read += size;
}

return {header_buffer, body_buffer};
Expand Down
1 change: 1 addition & 0 deletions src/http/HttpClient.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class HttpClient : public IHttpClient {
const std::string& url) const;
std::string get_cache_key(const HttpReqParams& params) const;
bool should_redirect(const HttpResponse& r) const;

Logger* logger;

std::unordered_map<std::string, std::pair<int, addrinfo*>> m_http_sockets;
Expand Down
5 changes: 0 additions & 5 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,6 @@ int main(int argc, char* argv[]) {
if (response && print_output) {
url.show(response->body);
}
std::this_thread::sleep_for(std::chrono::seconds(2));
response = url.request();
if (response && print_output) {
url.show(response->body);
}

return 0;
}
Expand Down
1 change: 1 addition & 0 deletions src/url/Url.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ void URL::show(std::string& body) {
std::cout << c;
}
}
std::cout << '\n';
}

bool URL::is_scheme_in(Scheme s) const { return m_data.scheme == s; }
Expand Down
49 changes: 32 additions & 17 deletions src/utils.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#include "utils.h"
#include <algorithm> // For std::find_if
#include <cctype> // For std::isspace
#include <functional> // For std::not1, std::ptr_fun (C++03, or use lambda in C++11+)
#include <zlib.h>
#include <algorithm>
#include <cctype>
#include <optional>
#include <sstream>
#include <string>
#include <vector>
Expand Down Expand Up @@ -41,22 +42,36 @@ void trim(std::string& s) {
rtrim(s);
}

// Trim from start (copying)
std::string ltrim_copy(std::string s) {
ltrim(s);
return s;
}
std::optional<std::string> ungzip(const std::string& compressed) {
if (compressed.empty()) return {};

// Trim from end (copying)
std::string rtrim_copy(std::string s) {
rtrim(s);
return s;
}
z_stream strm{};
strm.next_in = reinterpret_cast<Bytef*>(const_cast<char*>(compressed.data()));
strm.avail_in = static_cast<uInt>(compressed.size());

if (inflateInit2(&strm, 16 + MAX_WBITS) != Z_OK) {
return {};
}

std::string out;
const size_t chunkSize = 16 * 1024;
int ret;

do {
out.resize(out.size() + chunkSize);
strm.next_out = reinterpret_cast<Bytef*>(&out[out.size() - chunkSize]);
strm.avail_out = chunkSize;

ret = inflate(&strm, Z_NO_FLUSH);
if (ret != Z_OK && ret != Z_STREAM_END) {
inflateEnd(&strm);
return {};
}
} while (ret != Z_STREAM_END);

// Trim from both ends (copying)
std::string trim_copy(std::string s) {
trim(s);
return s;
inflateEnd(&strm);
out.resize(strm.total_out);
return out;
}

} // namespace utils
4 changes: 4 additions & 0 deletions src/utils.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once
#include <optional>
#include <string>
#include <string_view>
#include <vector>
namespace utils {
std::vector<std::string> split_string(const std::string& s, char delim);
Expand All @@ -12,4 +14,6 @@ void rtrim(std::string& s);
// Trim from both ends (in place)
void trim(std::string& s);

std::optional<std::string> ungzip(const std::string& compressed);

} // namespace utils
2 changes: 2 additions & 0 deletions tests/test_utilites.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#include <gtest/gtest.h>
#include <zlib.h>
#include <string>
#include "utils.h"

TEST(Utils, SplitString) {
Expand Down