diff --git a/.github/workflows/test-wasm.yml b/.github/workflows/test-wasm.yml index 6065ccd..5d3052f 100644 --- a/.github/workflows/test-wasm.yml +++ b/.github/workflows/test-wasm.yml @@ -48,5 +48,7 @@ jobs: - name: Run WebAssembly tests shell: bash -l {0} working-directory: wasm + env: + GIT2CPP_TEST_PRIVATE_TOKEN: ${{ secrets.GIT2CPP_TEST_PRIVATE_TOKEN }} run: | make test diff --git a/CMakeLists.txt b/CMakeLists.txt index 086c956..940e418 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,6 +98,18 @@ set(GIT2CPP_SRC ${GIT2CPP_SOURCE_DIR}/utils/progress.hpp ${GIT2CPP_SOURCE_DIR}/utils/terminal_pager.cpp ${GIT2CPP_SOURCE_DIR}/utils/terminal_pager.hpp + ${GIT2CPP_SOURCE_DIR}/wasm/libgit2_internals.cpp + ${GIT2CPP_SOURCE_DIR}/wasm/libgit2_internals.hpp + ${GIT2CPP_SOURCE_DIR}/wasm/response.cpp + ${GIT2CPP_SOURCE_DIR}/wasm/response.hpp + ${GIT2CPP_SOURCE_DIR}/wasm/scope.cpp + ${GIT2CPP_SOURCE_DIR}/wasm/scope.hpp + ${GIT2CPP_SOURCE_DIR}/wasm/stream.cpp + ${GIT2CPP_SOURCE_DIR}/wasm/stream.hpp + ${GIT2CPP_SOURCE_DIR}/wasm/subtransport.cpp + ${GIT2CPP_SOURCE_DIR}/wasm/subtransport.hpp + ${GIT2CPP_SOURCE_DIR}/wasm/transport.cpp + ${GIT2CPP_SOURCE_DIR}/wasm/transport.hpp ${GIT2CPP_SOURCE_DIR}/wrapper/annotated_commit_wrapper.cpp ${GIT2CPP_SOURCE_DIR}/wrapper/annotated_commit_wrapper.hpp ${GIT2CPP_SOURCE_DIR}/wrapper/branch_wrapper.cpp diff --git a/README.md b/README.md index 4ac0793..9341b79 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,10 @@ used on any POSIX-compliant system. See `overview.md` for further details. +## Development workflow + +### Build + Developer's workflow using `micromamba` to manage the dependencies: ```bash @@ -23,19 +27,31 @@ make -j8 The `git2cpp` executable can then be run, e.g. `./git2cpp -v`. +### Test + The CLI is tested using `python`. From the top-level directory: ```bash pytest -v ``` +Some tests access the private repository at https://github.com/QuantStack/git2cpp-test-private using +a fine-grained github Personal Access Token (PAT). These tests are skipped by default. To run them +you will need to obtain the PAT from one of the maintainers, and run the tests as follows: + +```bash +GIT2CPP_TEST_PRIVATE_TOKEN= pytest -v +``` + +### pre-commit + `pre-commit` runs automatically on `git commit`. To run it manually use: ```bash pre-commit run --all-files ``` -# WebAssembly build and deployment +## WebAssembly build and deployment The `wasm` directory contains everything needed to build the local `git2cpp` source code as an WebAssembly [Emscripten-forge](https://emscripten-forge.org/) package, create local @@ -48,7 +64,7 @@ See the `README.md` in the `wasm` directory for further details. The latest `cockle` and JupyterLite `terminal` deployments using `git2cpp` are available at [https://quantstack.net/git2cpp](https://quantstack.net/git2cpp) -# Documentation +## Documentation The project documentation is generated from the `git2cpp` help pages. To build the documentation locally first build `git2cpp` as usual as described above, then install the documentation diff --git a/src/subcommand/clone_subcommand.cpp b/src/subcommand/clone_subcommand.cpp index 4d8ad08..af945ce 100644 --- a/src/subcommand/clone_subcommand.cpp +++ b/src/subcommand/clone_subcommand.cpp @@ -5,6 +5,7 @@ #include "../utils/credentials.hpp" #include "../utils/input_output.hpp" #include "../utils/progress.hpp" +#include "../wasm/scope.hpp" #include "../wrapper/repository_wrapper.hpp" clone_subcommand::clone_subcommand(const libgit2_object&, CLI::App& app) @@ -29,6 +30,8 @@ clone_subcommand::clone_subcommand(const libgit2_object&, CLI::App& app) void clone_subcommand::run() { + wasm_http_transport_scope transport; // Enables wasm http(s) transport. + // m_depth = 0 means no shallow clone in libgit2, while // it is forbidden with git. Therefore we use another // sentinel value to detect full clone. diff --git a/src/subcommand/fetch_subcommand.cpp b/src/subcommand/fetch_subcommand.cpp index 8cbcf4d..e853f82 100644 --- a/src/subcommand/fetch_subcommand.cpp +++ b/src/subcommand/fetch_subcommand.cpp @@ -7,6 +7,7 @@ #include "../utils/credentials.hpp" #include "../utils/input_output.hpp" #include "../utils/progress.hpp" +#include "../wasm/scope.hpp" #include "../wrapper/repository_wrapper.hpp" fetch_subcommand::fetch_subcommand(const libgit2_object&, CLI::App& app) @@ -32,6 +33,8 @@ fetch_subcommand::fetch_subcommand(const libgit2_object&, CLI::App& app) void fetch_subcommand::run() { + wasm_http_transport_scope transport; // Enables wasm http(s) transport. + auto directory = get_current_git_path(); auto repo = repository_wrapper::open(directory); diff --git a/src/subcommand/push_subcommand.cpp b/src/subcommand/push_subcommand.cpp index 9e2af17..e5ad44f 100644 --- a/src/subcommand/push_subcommand.cpp +++ b/src/subcommand/push_subcommand.cpp @@ -6,6 +6,7 @@ #include "../utils/credentials.hpp" #include "../utils/progress.hpp" +#include "../wasm/scope.hpp" #include "../wrapper/repository_wrapper.hpp" push_subcommand::push_subcommand(const libgit2_object&, CLI::App& app) @@ -26,6 +27,8 @@ push_subcommand::push_subcommand(const libgit2_object&, CLI::App& app) void push_subcommand::run() { + wasm_http_transport_scope transport; // Enables wasm http(s) transport. + auto directory = get_current_git_path(); auto repo = repository_wrapper::open(directory); diff --git a/src/utils/common.cpp b/src/utils/common.cpp index 3467751..9bf7f84 100644 --- a/src/utils/common.cpp +++ b/src/utils/common.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -135,3 +136,9 @@ std::vector split_input_at_newlines(std::string_view str) ); return std::vector{split.begin(), split.end()}; } + +std::string trim(const std::string& str) +{ + auto s = std::regex_replace(str, std::regex("^\\s+"), ""); + return std::regex_replace(s, std::regex("\\s+$"), ""); +} diff --git a/src/utils/common.hpp b/src/utils/common.hpp index 6c6e52f..d9059f2 100644 --- a/src/utils/common.hpp +++ b/src/utils/common.hpp @@ -76,3 +76,6 @@ class git_strarray_wrapper std::string read_file(const std::string& path); std::vector split_input_at_newlines(std::string_view str); + +// Remove whitespace from start and end of a string. +std::string trim(const std::string& str); diff --git a/src/utils/credentials.cpp b/src/utils/credentials.cpp index c8d43e3..4ed8307 100644 --- a/src/utils/credentials.cpp +++ b/src/utils/credentials.cpp @@ -25,7 +25,7 @@ int user_credentials( std::string username = username_from_url ? username_from_url : ""; if (username.empty()) { - username = prompt_input("Username: "); + username = trim(prompt_input("Username: ")); } if (username.empty()) { @@ -33,7 +33,7 @@ int user_credentials( return GIT_EAUTH; } - std::string password = prompt_input("Password: ", false); + std::string password = trim(prompt_input("Password: ", false)); if (password.empty()) { giterr_set_str(GIT_ERROR_HTTP, "No password specified"); diff --git a/src/wasm/libgit2_internals.cpp b/src/wasm/libgit2_internals.cpp new file mode 100644 index 0000000..5953424 --- /dev/null +++ b/src/wasm/libgit2_internals.cpp @@ -0,0 +1,66 @@ +#ifdef EMSCRIPTEN + +# include "libgit2_internals.hpp" + +// http method and service. + +std::string name_for_method(git_http_method method) +{ + switch (method) + { + case GIT_HTTP_METHOD_GET: + return "GET"; + case GIT_HTTP_METHOD_POST: + return "POST"; + case GIT_HTTP_METHOD_CONNECT: + return "CONNECT"; + } + return ""; +} + +std::optional select_service(git_smart_service_t action) +{ + switch (action) + { + case GIT_SERVICE_UPLOADPACK_LS: + return http_service{ + GIT_HTTP_METHOD_GET, + "/info/refs?service=git-upload-pack", + nullptr, + "application/x-git-upload-pack-advertisement", + 1, + 0 + }; + case GIT_SERVICE_UPLOADPACK: + return http_service{ + GIT_HTTP_METHOD_POST, + "/git-upload-pack", + "application/x-git-upload-pack-request", + "application/x-git-upload-pack-result", + 0, + 0 + }; + case GIT_SERVICE_RECEIVEPACK_LS: + return http_service{ + GIT_HTTP_METHOD_GET, + "/info/refs?service=git-receive-pack", + nullptr, + "application/x-git-receive-pack-advertisement", + 1, + 0 + }; + case GIT_SERVICE_RECEIVEPACK: + return http_service{ + GIT_HTTP_METHOD_POST, + "/git-receive-pack", + "application/x-git-receive-pack-request", + "application/x-git-receive-pack-result", + 0, + 1 + }; + } + + return std::nullopt; +} + +#endif // EMSCRIPTEN diff --git a/src/wasm/libgit2_internals.hpp b/src/wasm/libgit2_internals.hpp new file mode 100644 index 0000000..25a90ba --- /dev/null +++ b/src/wasm/libgit2_internals.hpp @@ -0,0 +1,66 @@ +#pragma once + +#ifdef EMSCRIPTEN + +# include +# include + +# include +# include + +// Libgit2 internals that we want to use so they are reproduced here in some form. + +// asserts + +# define GIT_ASSERT(expr) GIT_ASSERT_WITH_RETVAL(expr, -1) + +# define GIT_ASSERT_ARG(expr) GIT_ASSERT_ARG_WITH_RETVAL(expr, -1) + +# define GIT_ASSERT_WITH_RETVAL(expr, fail) \ + GIT_ASSERT__WITH_RETVAL(expr, 0, "unrecoverable internal error", fail) + +# define GIT_ASSERT_ARG_WITH_RETVAL(expr, fail) GIT_ASSERT__WITH_RETVAL(expr, 0, "invalid argument", fail) + +# define GIT_ASSERT__WITH_RETVAL(expr, code, msg, fail) \ + do \ + { \ + if (!(expr)) \ + { \ + git_error_set(code, "%s: '%s'", msg, #expr); \ + return fail; \ + } \ + } while (0) + +// http status code, method and service. + +# define GIT_HTTP_STATUS_CONTINUE 100 +# define GIT_HTTP_STATUS_OK 200 +# define GIT_HTTP_MOVED_PERMANENTLY 301 +# define GIT_HTTP_FOUND 302 +# define GIT_HTTP_SEE_OTHER 303 +# define GIT_HTTP_TEMPORARY_REDIRECT 307 +# define GIT_HTTP_PERMANENT_REDIRECT 308 +# define GIT_HTTP_STATUS_UNAUTHORIZED 401 +# define GIT_HTTP_STATUS_PROXY_AUTHENTICATION_REQUIRED 407 + +typedef enum +{ + GIT_HTTP_METHOD_GET, + GIT_HTTP_METHOD_POST, + GIT_HTTP_METHOD_CONNECT +} git_http_method; + +typedef struct +{ + git_http_method m_method; + std::string m_url; + std::string m_request_type; + std::string m_response_type; + unsigned int m_initial : 1, m_chunked : 1; +} http_service; + +std::string name_for_method(git_http_method method); + +std::optional select_service(git_smart_service_t action); + +#endif // EMSCRIPTEN diff --git a/src/wasm/response.cpp b/src/wasm/response.cpp new file mode 100644 index 0000000..35a9433 --- /dev/null +++ b/src/wasm/response.cpp @@ -0,0 +1,74 @@ +#ifdef EMSCRIPTEN + +# include "response.hpp" + +# include "../utils/common.hpp" +# include "libgit2_internals.hpp" + +wasm_http_response::wasm_http_response(char* buffer, size_t buffer_size, size_t* bytes_read) + : m_buffer(buffer) + , m_buffer_size(buffer_size) + , m_bytes_read(bytes_read) + , m_status(0) +{ + *m_bytes_read = 0; +} + +void wasm_http_response::add_header(const std::string& key, const std::string& value) +{ + m_response_headers.emplace(key, trim(value)); +} + +void wasm_http_response::clear() +{ + *m_bytes_read = 0; + m_status = 0; + m_status_text.clear(); + m_response_headers.clear(); +} + +std::optional wasm_http_response::get_header(const std::string& key) const +{ + // Return the first header with the specified key. + // If we ever have to handle multiple headers with the same key, will need to do something more + // complicated here. + auto header = m_response_headers.find(key); + if (header != m_response_headers.end()) + { + return header->second; + } + return std::nullopt; +} + +bool wasm_http_response::has_header(const std::string& key) const +{ + return m_response_headers.find(key) != m_response_headers.end(); +} + +bool wasm_http_response::has_header_matches(const std::string& key, std::string_view match) const +{ + auto range = m_response_headers.equal_range(key); + for (auto i = range.first; i != range.second; ++i) + { + if (i->second == match) + { + return true; + } + } + return false; +} + +bool wasm_http_response::has_header_starts_with(const std::string& key, std::string_view start) const +{ + auto range = m_response_headers.equal_range(key); + for (auto i = range.first; i != range.second; ++i) + { + if (i->second.starts_with(start)) + { + return true; + } + } + return false; +} + +#endif // EMSCRIPTEN diff --git a/src/wasm/response.hpp b/src/wasm/response.hpp new file mode 100644 index 0000000..cb3c735 --- /dev/null +++ b/src/wasm/response.hpp @@ -0,0 +1,42 @@ +#pragma once + +#ifdef EMSCRIPTEN + +# include +# include +# include + +// Response from a http(s) request. +// The lifetimes of the buffer, buffer_size and bytes_read are managed by libgit2, we just fill them +// with the data received from the request. +class wasm_http_response +{ +public: + + wasm_http_response(char* buffer, size_t buffer_size, size_t* bytes_read); + + void add_header(const std::string& key, const std::string& value); + + void clear(); + + std::optional get_header(const std::string& key) const; + + bool has_header(const std::string& key) const; + + bool has_header_matches(const std::string& key, std::string_view match) const; + + bool has_header_starts_with(const std::string& key, std::string_view start) const; + + char* m_buffer; // Not owned. + size_t m_buffer_size; + size_t* m_bytes_read; // Not owned. + int32_t m_status; // Specific type corresponding to i32 in emscripten setValue call. + std::string m_status_text; + +private: + + // Support multiple headers with the same key. + std::multimap m_response_headers; +}; + +#endif // EMSCRIPTEN diff --git a/src/wasm/scope.cpp b/src/wasm/scope.cpp new file mode 100644 index 0000000..b6dd4ea --- /dev/null +++ b/src/wasm/scope.cpp @@ -0,0 +1,21 @@ +#include "scope.hpp" + +#ifdef EMSCRIPTEN +# include "transport.hpp" +#endif + +wasm_http_transport_scope::wasm_http_transport_scope() +{ +#ifdef EMSCRIPTEN + git_transport_register("http", create_wasm_http_transport, nullptr); + git_transport_register("https", create_wasm_http_transport, nullptr); +#endif +} + +wasm_http_transport_scope::~wasm_http_transport_scope() +{ +#ifdef EMSCRIPTEN + git_transport_unregister("http"); + git_transport_unregister("https"); +#endif +} diff --git a/src/wasm/scope.hpp b/src/wasm/scope.hpp new file mode 100644 index 0000000..8eb1c8c --- /dev/null +++ b/src/wasm/scope.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include "../utils/common.hpp" + +// Scope object to enable/disable browser-based wasm http transport scheme for libgit2. +// This is a no-op unless EMSCRIPTEN is defined. +class wasm_http_transport_scope : noncopyable_nonmovable +{ +public: + + wasm_http_transport_scope(); + + ~wasm_http_transport_scope(); +}; diff --git a/src/wasm/stream.cpp b/src/wasm/stream.cpp new file mode 100644 index 0000000..db69766 --- /dev/null +++ b/src/wasm/stream.cpp @@ -0,0 +1,581 @@ +#ifdef EMSCRIPTEN + +# include "stream.hpp" + +# include +# include + +# include + +# include "../utils/common.hpp" +# include "response.hpp" + +// Buffer size used in transport_smart, hardcoded in libgit2. +# define EMFORGE_BUFSIZE 65536 + +// JavasScript functions. + +EM_JS(const char*, js_base64_encode, (const char* input), { + const input_js = UTF8ToString(input); + const encoded = btoa(input_js); + return stringToNewUTF8(encoded); +}); + +EM_JS(void, js_delete_request, (int request_index), { + const cache = Module["git2cpp_js_cache"]; + if (Object.hasOwn(cache, request_index)) + { + delete cache[request_index]; + } +}); + +// Return the latest error string set in JS. Caller must delete the returned const char*. +EM_JS(const char*, js_get_error, (void), { + // clang-format off + const err = Module["git2cpp_js_error"] ?? ""; + // clang-format on + return stringToNewUTF8(err); +}); + +EM_JS( + int, + js_request, + (const char* url, + const char* method, + const char* content_type_header, + const char* authorization_header, + size_t buffer_size), + { + const url_js = UTF8ToString(url); + const method_js = UTF8ToString(method); + const content_type_header_js = UTF8ToString(content_type_header); + const authorization_header_js = UTF8ToString(authorization_header); + + try + { + const xhr = new XMLHttpRequest(); + xhr.open(method_js, url_js, false); + xhr.responseType = "arraybuffer"; + if (content_type_header_js.length > 0) + { + xhr.setRequestHeader("Content-Type", content_type_header_js); + } + if (authorization_header_js.length > 0) + { + // Should this only be set if using https? What about CORS via http? + xhr.setRequestHeader("Authorization", authorization_header_js); + } + + // Cache request info on JavaScript side so that it is available in subsequent calls + // without having to pass it back and forth to/from C++. + let request_index = 0; + if (!Module["git2cpp_js_cache"]) + { + Module["git2cpp_js_cache"] = {"next_index": request_index}; + } + else + { + request_index = Module["git2cpp_js_cache"]["next_index"]++; + } + + Module["git2cpp_js_cache"][request_index] = {xhr, result_buffer_pointer: 0, buffer_size}; + + if (method_js == "GET") + { + xhr.send(); + } + + return request_index; + } + catch (err) + { + // Store error for later retrieval + Module["git2cpp_js_error"] = String(err); + console.error(err); + return -1; + } + } +); + +EM_JS(const char*, js_maybe_convert_url, (const char* url_str), { + // Convert URL to use CORS proxy based on env vars GIT_CORS_PROXY and GIT_CORS_PROXY_TYPE. + // If no conversion occurs, return the original unconverted URL as a new string. + const url_js = UTF8ToString(url_str); + const url = new URL(url_js); + // clang-format off + const env = Module["ENV"] ?? {}; + // clang-format on + const GIT_CORS_PROXY = env["GIT_CORS_PROXY"]; + let ret = url_js; // Default to returning original unconverted URL as new string. + if (GIT_CORS_PROXY) + { + // clang-format off + const GIT_CORS_PROXY_TYPE = env["GIT_CORS_PROXY_TYPE"] ?? "prefix"; + // clang-format on + if (GIT_CORS_PROXY_TYPE == "prefix") + { + ret = GIT_CORS_PROXY; + if (ret.at(-1) != '/') + { + ret += '/'; + } + ret += url_js; + } + else if (GIT_CORS_PROXY_TYPE == "insert") + { + ret = url.protocol + "/" + GIT_CORS_PROXY; + if (ret.at(-1) != '/') + { + ret += '/'; + } + ret += url.host + url.pathname + url.search; + } + else + { + // clang-format off + console.warn(`Invalid GIT_CORS_PROXY_TYPE of '${GIT_CORS_PROXY_TYPE}'`); + // clang-format on + } + } + return stringToNewUTF8(ret); +}); + +EM_JS( + size_t, + js_read, + (int request_index, + char* buffer, + size_t buffer_size, + int32_t* status, + const char** status_text, + const char** response_headers), + { + try + { + const cache = Module["git2cpp_js_cache"]; + const request = cache[request_index]; + const {xhr} = request; + + if (request.content) + { + xhr.send(request.content.buffer); + request.content = null; + } + + let bytes_read = 0; + if (xhr.response && xhr.response.byteLength) + { + bytes_read = xhr.response.byteLength - request.result_buffer_pointer; + if (bytes_read > buffer_size) + { + bytes_read = buffer_size; + } + } + + // Caller must delete the returned status_text and response_headers. + // clang-format off + setValue(status, xhr.status, 'i32*'); + setValue(status_text, stringToNewUTF8(xhr.statusText ?? ""), 'i8**'); + setValue(response_headers, stringToNewUTF8(xhr.getAllResponseHeaders() ?? ""), 'i8**'); + // clang-format on + + if (bytes_read > 0) + { + const responseChunk = new Uint8Array(xhr.response, request.result_buffer_pointer, bytes_read); + writeArrayToMemory(responseChunk, buffer); + request.result_buffer_pointer += bytes_read; + } + return bytes_read + } + catch (err) + { + // Store error for later retrieval + Module["git2cpp_js_error"] = String(err); + console.error(err); + return -1; + } + } +); + +EM_JS(void, js_warning, (const char* msg), { + const msg_js = UTF8ToString(msg); + console.warning(msg_js); +}); + +EM_JS(size_t, js_write, (int request_index, const char* buffer, size_t buffer_size), { + try + { + const cache = Module["git2cpp_js_cache"]; + const request = cache[request_index]; + // Note the slice(0) is important. + const buffer_js = new Uint8Array(HEAPU8.buffer, buffer, buffer_size).slice(0); + if (!request.content) + { + request.content = buffer_js; + } + else + { + const content = new Uint8Array(request.content.length + buffer_js.length); + content.set(request.content); + content.set(buffer_js, request.content.length); + request.content = content; + } + return 0; + } + catch (err) + { + // Store error for later retrieval + Module["git2cpp_js_error"] = String(err); + console.error(err); + return -1; + } +}); + +// C wrapper functions that call JavaScript functions. + +static std::string base64_encode(std::string_view str) +{ + // Use browser's base64 encoding. + const char* encoded = js_base64_encode(str.data()); + std::string ret(encoded); + delete encoded; // Delete const char* allocated in JavaScript. + return ret; +} + +static void convert_js_to_git_error(void) +{ + // Convert error on JS side to git error. + const char* error_str = js_get_error(); + git_error_set(GIT_ERROR_HTTP, "%s", error_str); + delete error_str; // Delete const char* allocated in JavaScript. +} + +static int create_request(wasm_http_stream* stream, std::string_view content_header) +{ + stream->m_request_index = js_request( + stream->get_full_url().c_str(), + name_for_method(stream->m_service.m_method).c_str(), + content_header.data(), + stream->m_subtransport->m_authorization_header.c_str(), + EMFORGE_BUFSIZE + ); + return stream->m_request_index; +} + +static void delete_request(wasm_http_stream* stream) +{ + if (stream->m_request_index != -1) + { + js_delete_request(stream->m_request_index); + stream->m_request_index = -1; + } +} + +static int read(wasm_http_stream* stream, wasm_http_response& response, bool is_read_response) +{ + if (is_read_response) + { + // Response from a write. + if (stream->m_request_index == -1) + { + git_error_set(GIT_ERROR_HTTP, "read_response called without pending request"); + return -1; + } + } + else + { + if (stream->m_request_index != -1) + { + git_error_set(GIT_ERROR_HTTP, "read called with pending request"); + return -1; + } + + if (create_request(stream, stream->m_service.m_response_type.c_str()) < 0) + { + convert_js_to_git_error(); + return -1; + } + } + + const char* status_text = nullptr; + const char* response_headers = nullptr; + + // Actual read. + size_t bytes_read = js_read( + stream->m_request_index, + response.m_buffer, + response.m_buffer_size, + &response.m_status, + &status_text, + &response_headers + ); + if (bytes_read < 0) + { + convert_js_to_git_error(); + // Delete const char* allocated in JavaScript. + delete status_text; + delete response_headers; + return -1; + } + + response.m_status_text = status_text; + delete status_text; // Delete const char* allocated in JavaScript. + + // Split single string with response headers separated by \r\n into individual headers. + auto lines = split_input_at_newlines(response_headers); + for (const auto& line : lines) + { + auto pos = line.find(":"); + if (pos == std::string::npos) + { + // Skip invalid lines. Should this be an error condition? + continue; + } + response.add_header(line.substr(0, pos), line.substr(pos + 1)); + } + delete response_headers; // Delete const char* allocated in JavaScript. + + // If successful, check expected response content-type is correct. + if (response.m_status == GIT_HTTP_STATUS_OK) + { + auto expected_response_type = stream->m_service.m_response_type; + if (!expected_response_type.empty() + && !response.has_header_matches("content-type", expected_response_type)) + { + // Not sure this should be checked at all, as CORS proxy may be doing something + // with it. + git_error_set( + GIT_ERROR_HTTP, + "expected response content-type header '%s'", + expected_response_type.c_str() + ); + return -1; + } + } + + *response.m_bytes_read = bytes_read; + return 0; +} + +static int write(wasm_http_stream* stream, const char* buffer, size_t buffer_size) +{ + if (stream->m_request_index == -1) + { + // If there is not already a request opened, do so now. + if (create_request(stream, stream->m_service.m_request_type.c_str()) < 0) + { + convert_js_to_git_error(); + return -1; + } + } + + int error = js_write(stream->m_request_index, buffer, buffer_size); + if (error < 0) + { + convert_js_to_git_error(); + return -1; + } + + return 0; +} + +// C credential functions. + +static int create_credential(wasm_http_stream* stream, const wasm_http_response& response) +{ + wasm_http_subtransport* subtransport = stream->m_subtransport; + + // Delete old credential and authorization header. + if (subtransport->m_credential != nullptr) + { + subtransport->m_credential->free(subtransport->m_credential); + subtransport->m_credential = nullptr; + } + subtransport->m_authorization_header = ""; + + // Check that response headers show support for 'www-authenticate: Basic'. + if (!response.has_header_starts_with("www-authenticate", "Basic")) + { + git_error_set(GIT_ERROR_HTTP, "remote host does not support Basic authentication"); + return -1; + } + + // Get credentials from user via libgit2 registered callback. + if (git_transport_smart_credentials( + &subtransport->m_credential, + subtransport->m_owner, + nullptr, + GIT_CREDENTIAL_USERPASS_PLAINTEXT + ) + < 0) + { + // credentials_callback will have set git error. + return -1; + } + + if (subtransport->m_credential->credtype != GIT_CREDENTIAL_USERPASS_PLAINTEXT) + { + git_error_set(GIT_ERROR_HTTP, "Unexpected credential type"); + return -1; + } + + // Create authorization header from username and password. + // Cast is OK as checked above that credential is a GIT_CREDENTIAL_USERPASS_PLAINTEXT. + auto userpass = reinterpret_cast(subtransport->m_credential); + std::ostringstream buffer; + buffer << userpass->username << ':' << userpass->password; + subtransport->m_authorization_header = "Basic " + base64_encode(buffer.str()); + + return 0; +} + +// C wasm_http_stream functions. + +wasm_http_stream::wasm_http_stream(wasm_http_subtransport* subtransport, http_service service) + : m_subtransport(subtransport) + , m_service(service) + , m_request_index(-1) +{ +} + +bool wasm_http_stream::ensure_final_url(const std::string final_url) +{ + // Must be using a CORS proxy that has redirected, so store updated base URL to reuse. + if (final_url.ends_with(m_service.m_url)) + { + // Remove service URL from end of final URL to give new base URL. + auto base_url = final_url.substr(0, final_url.size() - m_service.m_url.size()); + if (m_subtransport->m_base_url != base_url) + { + m_subtransport->m_base_url = base_url; + return true; + } + } + else + { + std::string msg = "Unexpected x-final-url: " + final_url; + js_warning(msg.c_str()); + } + return false; +} + +bool wasm_http_stream::ensure_https() +{ + const std::string http = "http:"; + if (m_subtransport->m_base_url.starts_with(http)) + { + m_subtransport->m_base_url.replace(0, http.size(), "https:"); + return true; + } + return false; +} + +std::string wasm_http_stream::get_full_url() +{ + // Base URL never ends with a slash, service URL always begins with a slash. + m_unconverted_url = m_subtransport->m_base_url + m_service.m_url; + + const char* converted_url = js_maybe_convert_url(m_unconverted_url.c_str()); + std::string ret = converted_url; + delete converted_url; // Delete const char* allocated in JavaScript. + return ret; +} + +void wasm_http_stream_free(git_smart_subtransport_stream* s) +{ + wasm_http_stream* stream = reinterpret_cast(s); + delete_request(stream); + delete stream; +} + +int wasm_http_stream_read(git_smart_subtransport_stream* s, char* buffer, size_t buffer_size, size_t* bytes_read) +{ + wasm_http_stream* stream = reinterpret_cast(s); + wasm_http_response response(buffer, buffer_size, bytes_read); + + bool send = true; + while (send) + { + if (read(stream, response, false) < 0) + { + return -1; // git error already set. + } + send = false; + + auto final_url_header = response.get_header("x-final-url"); + if (final_url_header.has_value() && stream->ensure_final_url(final_url_header.value()) + && response.m_status != GIT_HTTP_STATUS_OK) + { + // Resend only if status not OK, if OK next request will use updated URL. + send = true; + } + + if (response.has_header("strict-transport-security") && stream->ensure_https() + && response.m_status != GIT_HTTP_STATUS_OK) + { + // Resend only if status not OK, if OK next request will use https not http. + send = true; + } + + if (response.m_status == GIT_HTTP_STATUS_UNAUTHORIZED) + { + // Request and create new credentials. + if (create_credential(stream, response) < 0) + { + return -1; // git error already set. + } + send = true; // Resend will use updated credentials. + } + + if (send) + { + delete_request(stream); + response.clear(); + } + } + + if (response.m_status != GIT_HTTP_STATUS_OK) + { + git_error_set( + GIT_ERROR_HTTP, + "unexpected HTTP response: %d %s", + response.m_status, + response.m_status_text.c_str() + ); + return -1; + } + + return 0; +} + +int wasm_http_stream_read_response(git_smart_subtransport_stream* s, char* buffer, size_t buffer_size, size_t* bytes_read) +{ + wasm_http_stream* stream = reinterpret_cast(s); + + wasm_http_response response(buffer, buffer_size, bytes_read); + int error = read(stream, response, true); + + // May need similar handling of response status and headers as occurs in read() above, but so + // far this has not been necessary. + + if (error == 0 && response.m_status != GIT_HTTP_STATUS_OK) + { + git_error_set( + GIT_ERROR_HTTP, + "unexpected HTTP response: %d %s", + response.m_status, + response.m_status_text.c_str() + ); + error = -1; + } + + return error; +} + +int wasm_http_stream_write(git_smart_subtransport_stream* s, const char* buffer, size_t buffer_size) +{ + wasm_http_stream* stream = reinterpret_cast(s); + return write(stream, buffer, buffer_size); +} + +#endif // EMSCRIPTEN diff --git a/src/wasm/stream.hpp b/src/wasm/stream.hpp new file mode 100644 index 0000000..a17c6f0 --- /dev/null +++ b/src/wasm/stream.hpp @@ -0,0 +1,44 @@ +#pragma once + +#ifdef EMSCRIPTEN + +# include + +# include "libgit2_internals.hpp" +# include "subtransport.hpp" + +// A stream represents a single http/https request. +struct wasm_http_stream +{ + wasm_http_stream(wasm_http_subtransport* subtransport, http_service service); + + // Return true if URL is changed. + bool ensure_final_url(const std::string final_url); + + // Return true if URL is changed from http to https. + bool ensure_https(); + + // Return full URL of request, which may have been modified to use CORS proxy. + std::string get_full_url(); + + git_smart_subtransport_stream m_parent; + wasm_http_subtransport* m_subtransport; // Not owned, needed for credentials, etc. + http_service m_service; + std::string m_unconverted_url; + int m_request_index; +}; + +void wasm_http_stream_free(git_smart_subtransport_stream* s); + +int wasm_http_stream_read(git_smart_subtransport_stream* s, char* buffer, size_t buffer_size, size_t* bytes_read); + +int wasm_http_stream_read_response( + git_smart_subtransport_stream* s, + char* buffer, + size_t buffer_size, + size_t* bytes_read +); + +int wasm_http_stream_write(git_smart_subtransport_stream* s, const char* buffer, size_t buffer_size); + +#endif // EMSCRIPTEN diff --git a/src/wasm/subtransport.cpp b/src/wasm/subtransport.cpp new file mode 100644 index 0000000..bdf0202 --- /dev/null +++ b/src/wasm/subtransport.cpp @@ -0,0 +1,101 @@ +#ifdef EMSCRIPTEN + +# include "subtransport.hpp" + +# include +# include + +# include +# include +# include + +# include "libgit2_internals.hpp" +# include "stream.hpp" + +// C functions. + +static int wasm_http_action( + git_smart_subtransport_stream** out, + git_smart_subtransport* s, + const char* url, + git_smart_service_t action +) +{ + // An action is a single http/https request that is handled by a single wasm_http_stream. + + GIT_ASSERT_ARG(out); + GIT_ASSERT_ARG(s); + GIT_ASSERT_ARG(url); + + wasm_http_subtransport* subtransport = reinterpret_cast(s); + wasm_http_stream* stream = nullptr; + *out = nullptr; + + auto service = select_service(action); + if (!service.has_value()) + { + git_error_set(0, "invalid http/https action"); + return -1; + } + + if (subtransport->m_base_url.empty()) + { + // Store base URL without trailing slashes. + subtransport->m_base_url = std::regex_replace(url, std::regex("\\s+$"), ""); + } + + stream = new wasm_http_stream(subtransport, service.value()); + + stream->m_parent.subtransport = &subtransport->m_parent; + if (stream->m_service.m_method == GIT_HTTP_METHOD_GET) + { + stream->m_parent.read = wasm_http_stream_read; + } + else + { + stream->m_parent.write = wasm_http_stream_write; + stream->m_parent.read = wasm_http_stream_read_response; + } + stream->m_parent.free = wasm_http_stream_free; + *out = (git_smart_subtransport_stream*) stream; + return 0; +} + +static int wasm_http_close(git_smart_subtransport* s) +{ + return 0; +} + +static void wasm_http_free(git_smart_subtransport* s) +{ + wasm_http_subtransport* subtransport = reinterpret_cast(s); + wasm_http_close(s); + + if (subtransport->m_credential != nullptr) + { + subtransport->m_credential->free(subtransport->m_credential); + } + + delete subtransport; +} + +int create_wasm_http_subtransport(git_smart_subtransport** out, git_transport* owner, void* param) +{ + GIT_ASSERT_ARG(out); + GIT_ASSERT_ARG(owner); + + wasm_http_subtransport* subtransport = new wasm_http_subtransport(); + GIT_ASSERT_WITH_RETVAL(subtransport, -1); + + subtransport->m_parent.action = wasm_http_action; + subtransport->m_parent.close = wasm_http_close; + subtransport->m_parent.free = wasm_http_free; + subtransport->m_owner = owner; + subtransport->m_base_url = ""; + subtransport->m_credential = nullptr; + + *out = &subtransport->m_parent; + return 0; +} + +#endif // EMSCRIPTEN diff --git a/src/wasm/subtransport.hpp b/src/wasm/subtransport.hpp new file mode 100644 index 0000000..1bb3bb6 --- /dev/null +++ b/src/wasm/subtransport.hpp @@ -0,0 +1,26 @@ +#pragma once + +#ifdef EMSCRIPTEN + +# include + +# include + +// A single wasm_http_subtransport manages all http(s) requests of a single git2cpp command call. +// Each request has its own wasm_http_stream, here we store extra information that needs to be +// reused by subsequent requests. +struct wasm_http_subtransport +{ + git_smart_subtransport m_parent; + git_transport* m_owner; // Not owned. + + // Data stored for reuse on other streams of this transport: + std::string m_base_url; + std::string m_authorization_header; + git_credential* m_credential; // libgit2 creates this, we are responsible for deleting it. +}; + +// git_smart_subtransport_cb +int create_wasm_http_subtransport(git_smart_subtransport** out, git_transport* owner, void* param); + +#endif // EMSCRIPTEN diff --git a/src/wasm/transport.cpp b/src/wasm/transport.cpp new file mode 100644 index 0000000..48f5697 --- /dev/null +++ b/src/wasm/transport.cpp @@ -0,0 +1,17 @@ +#ifdef EMSCRIPTEN + +# include "transport.hpp" + +# include "subtransport.hpp" + +// git_transport_cb +int create_wasm_http_transport(git_transport** out, git_remote* owner, void* param) +{ + git_smart_subtransport_definition definition; + definition.callback = create_wasm_http_subtransport; + definition.rpc = true; + definition.param = param; + return git_transport_smart(out, owner, &definition); +} + +#endif // EMSCRIPTEN diff --git a/src/wasm/transport.hpp b/src/wasm/transport.hpp new file mode 100644 index 0000000..93b3fea --- /dev/null +++ b/src/wasm/transport.hpp @@ -0,0 +1,11 @@ +#pragma once + +#ifdef EMSCRIPTEN + +# include + +// Callback of type git_transport_cb that is registered with libgit2 and is called to handle +// http(s) transport. +int create_wasm_http_transport(git_transport** out, git_remote* owner, void* param); + +#endif // EMSCRIPTEN diff --git a/test/conftest.py b/test/conftest.py index fc62340..5749674 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -74,10 +74,6 @@ def private_test_repo(): # Fixture containing everything needed to access private github repo. # GIT2CPP_TEST_PRIVATE_TOKEN is the fine-grained Personal Access Token for private test repo. # If this is not available as an environment variable, tests that use this fixture are skipped. - - if GIT2CPP_TEST_WASM: - pytest.skip("Use of credentials in wasm not yet implemented") - token = os.getenv("GIT2CPP_TEST_PRIVATE_TOKEN") if token is None or len(token) == 0: pytest.skip("No token for private test repo GIT2CPP_TEST_PRIVATE_TOKEN") diff --git a/test/test_branch.py b/test/test_branch.py index 967cf91..bdede1b 100644 --- a/test/test_branch.py +++ b/test/test_branch.py @@ -154,7 +154,6 @@ def test_branch_show_current(repo_init_with_commit, git2cpp_path, tmp_path): cmd = [git2cpp_path, "branch", "--show-current"] p = subprocess.run(cmd, capture_output=True, cwd=tmp_path, text=True) assert p.returncode == 0 - print(p.stdout) # Default branch after init is "main" or "master" depending on git config assert p.stdout.strip() == "main" diff --git a/test/test_clone.py b/test/test_clone.py index eaa4422..8a3211b 100644 --- a/test/test_clone.py +++ b/test/test_clone.py @@ -51,7 +51,7 @@ def test_clone_private_repo(git2cpp_path, tmp_path, run_in_tmp_path, private_tes # Note that http succeeds by redirecting to https. username = "abc" # Can be any non-empty string. password = private_test_repo["token"] - input = f"{username}\n{password}" + input = f"{username}\n{password}\n" repo_path = tmp_path / private_test_repo["repo_name"] url = private_test_repo["https_url" if protocol == "https" else "http_url"] @@ -76,7 +76,7 @@ def test_clone_private_repo_fails_then_succeeds( # Fails with wrong credentials, then succeeds with correct ones. username = "xyz" # Can be any non-empty string. password = private_test_repo["token"] - input = "\n".join(["wrong1", "wrong2", username, password]) + input = "\n".join(["wrong1", "wrong2", username, password]) + "\n" repo_path = tmp_path / private_test_repo["repo_name"] clone_cmd = [git2cpp_path, "clone", private_test_repo["https_url"]] @@ -97,7 +97,7 @@ def test_clone_private_repo_fails_then_succeeds( def test_clone_private_repo_fails_on_no_username( git2cpp_path, tmp_path, run_in_tmp_path, private_test_repo ): - input = "" + input = "\n" repo_path = tmp_path / private_test_repo["repo_name"] clone_cmd = [git2cpp_path, "clone", private_test_repo["https_url"]] @@ -113,7 +113,8 @@ def test_clone_private_repo_fails_on_no_username( def test_clone_private_repo_fails_on_no_password( git2cpp_path, tmp_path, run_in_tmp_path, private_test_repo ): - input = "username\n" # Note no password after the \n + input = "username\n\n" # Note no password between the \n + repo_path = tmp_path / private_test_repo["repo_name"] clone_cmd = [git2cpp_path, "clone", private_test_repo["https_url"]] @@ -124,3 +125,22 @@ def test_clone_private_repo_fails_on_no_password( assert not repo_path.exists() assert p_clone.stdout.count("Username:") == 1 assert p_clone.stdout.count("Password:") == 1 + + +@pytest.mark.parametrize("protocol", ["http", "https"]) +def test_clone_gitlab(git2cpp_path, tmp_path, run_in_tmp_path, protocol): + repo_url = f"{protocol}://gitlab.quantstack.net/ianthomas23_group/cockle-playground" + + clone_cmd = [git2cpp_path, "clone", repo_url] + p_clone = subprocess.run(clone_cmd, capture_output=True, cwd=tmp_path, text=True) + assert p_clone.returncode == 0 + + repo_path = tmp_path / "cockle-playground" + assert repo_path.is_dir() + assert (repo_path / "src").is_dir() + + status_cmd = [git2cpp_path, "status"] + p_status = subprocess.run(status_cmd, capture_output=True, cwd=repo_path, text=True) + assert p_status.returncode == 0 + assert "On branch main" in p_status.stdout + assert "Your branch is up to date with 'origin/main'" in p_status.stdout diff --git a/test/test_fetch.py b/test/test_fetch.py index 4e30a14..4510875 100644 --- a/test/test_fetch.py +++ b/test/test_fetch.py @@ -18,7 +18,7 @@ def test_fetch_private_repo(git2cpp_path, tmp_path, run_in_tmp_path, private_tes # First fetch with wrong password which fails, then correct password which succeeds. username = "abc" # Can be any non-empty string. password = private_test_repo["token"] - input = f"{username}\nwrong_password\n{username}\n{password}" + input = f"{username}\nwrong_password\n{username}\n{password}\n" fetch_cmd = [git2cpp_path, "fetch", "origin"] p_fetch = subprocess.run(fetch_cmd, capture_output=True, text=True, input=input) assert p_fetch.returncode == 0 diff --git a/test/test_push.py b/test/test_push.py index 313f201..03f4fb7 100644 --- a/test/test_push.py +++ b/test/test_push.py @@ -12,7 +12,7 @@ def test_push_private_repo( # that to the remote. username = "abc" # Can be any non-empty string. password = private_test_repo["token"] - input = f"{username}\n{password}" + input = f"{username}\n{password}\n" repo_path = tmp_path / private_test_repo["repo_name"] url = private_test_repo["https_url"] @@ -55,7 +55,7 @@ def test_push_private_repo( assert p_log.stdout.count("This is my commit message") == 1 # push with incorrect credentials to check it fails, then with correct to check it works. - input = f"${username}\ndef\n{username}\n{password}" + input = f"${username}\ndef\n{username}\n{password}\n" push_cmd = [git2cpp_path, "push", "origin"] p_push = subprocess.run(push_cmd, cwd=repo_path, capture_output=True, text=True, input=input) assert p_push.returncode == 0 diff --git a/test/test_revparse.py b/test/test_revparse.py index 2e1bf24..263c24b 100644 --- a/test/test_revparse.py +++ b/test/test_revparse.py @@ -68,7 +68,6 @@ def test_revparse_multiple_revs(repo_init_with_commit, git2cpp_path, tmp_path): assert p.returncode == 0 lines = p.stdout.splitlines() - print() assert len(lines) == 2 assert all(len(x) == 40 for x in lines) assert lines[0] != lines[1] diff --git a/wasm/README.md b/wasm/README.md index b8f94a8..c24f48d 100644 --- a/wasm/README.md +++ b/wasm/README.md @@ -91,7 +91,7 @@ To test the WebAssembly build use from the `wasm` directory: make test ``` -This runs (some of) the tests in the top-level `test` directory with various monkey patching so that +This runs the tests in the top-level `test` directory with various monkey patching so that `git2cpp` commands are executed in the browser. The tests that are run are defined in the function `pytest_ignore_collect` in `conftest_wasm.py`. If there are problems running the tests then ensure you have the latest `playwright` browser installed: @@ -100,6 +100,12 @@ If there are problems running the tests then ensure you have the latest `playwri playwright install chromium ``` +To run all of the tests including those that require the `GIT2CPP_TEST_PRIVATE_TOKEN`: + +```bash +GIT2CPP_TEST_PRIVATE_TOKEN= make test +``` + You can run a specific test from the top-level `test` directory (not the `wasm/test` directory) using: diff --git a/wasm/recipe/CMakeLists.txt b/wasm/recipe/CMakeLists.txt index 43445b9..74b54ce 100644 --- a/wasm/recipe/CMakeLists.txt +++ b/wasm/recipe/CMakeLists.txt @@ -33,7 +33,7 @@ add_custom_target(modify-recipe add_custom_command( OUTPUT ${EM_FORGE_RECIPES_DIR}/${BUILT_PACKAGE_SUBDIR} DEPENDS modify-recipe - COMMAND bash -c "rattler-build build ${RATTLER_ARGS} --recipe ${GIT2CPP_RECIPE_DIR}" + COMMAND bash -c "rattler-build build ${RATTLER_ARGS} --recipe ${GIT2CPP_RECIPE_DIR} --keep-build" WORKING_DIRECTORY ${EM_FORGE_RECIPES_DIR} )