diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..c581405 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,31 @@ +# LLM Agent Instructions + +* To compile `ModRuby`, run these commands: + * `rm -f /usr/src/mod_ruby/CMakeCache.txt` + * `rm -Rf /usr/src/mod_ruby/CMakeFiles` + * `cmake3 /usr/src/mod_ruby` + * `make -j4` + * `make install` + * If any compilation commands fail, the patch failed. +* To test `ModRuby`: + * Reload Apache by running `killall httpd` -- it will automatically restart and load + the new `mod_ruby.so` shared object that was previously compiled. + * Use curl to test `ModRuby` with this: `curl -m1 localhost` + * It should output the string `Ruby check_access()\nHi there from ruby` + * If curl times out and Apache segfaults, the patch failed + * To inspect the backtrace, look at the file `/var/log/httpd/gdb_backtrace.txt` + * After each crash, Apache is restarted + * Each new crash will delete the previous contents of `/var/log/httpd/gdb_backtrace.txt` + * If Apache completely fails to start up due to a symbol linking problem or other error, + the output from the server will be in `/var/log/httpd/error_log` and it will + continue to try and restart itself every 10 seconds. The log file might be large, + so something like `tail -20 /var/log/httpd/error_log` is a safer method to inspect + this file. +* codex is running inside an Oracle Linux 8 Docker container, running as the root user. + If you need tools or packages installed, you may use `dnf install` for distro packages + or use other methods. +* NodeJS 22 is installed and you may install npm packages +* Python 3.12 is installed and you may construct Python utility scripts and execute them. +* If you want to inspect the Ruby source code, it is installed in `/usr/local/rvm/src/ruby-3.2.3/` + * If you want to inspect the Ruby C++ headers, they are in `/usr/local/rvm/src/ruby-3.2.3/include/` +* If you want to inspect the Ruby binary installation, it is installed in `/usr/local/rvm/rubies/ruby-3.2.3/` diff --git a/Dockerfile b/Dockerfile index 6fa22d8..8797737 100644 --- a/Dockerfile +++ b/Dockerfile @@ -55,9 +55,9 @@ RUN rvm requirements # Pick your ruby version here # Segfaults in rb_protect() -#RUN rvm install ruby-3.2.3 +RUN rvm install ruby-3.2.3 # Works -RUN rvm install ruby-2.7.5 +#RUN rvm install ruby-2.7.5 # Same segfault as 3.2.3 #RUN rvm install ruby-3.0.3 diff --git a/Dockerfile.codex b/Dockerfile.codex new file mode 100644 index 0000000..d15509a --- /dev/null +++ b/Dockerfile.codex @@ -0,0 +1,107 @@ +# This builds a docker image suitable for running OpenAI's codex CLI +FROM oraclelinux:8 + +RUN dnf install -y oraclelinux-release-el8 oracle-epel-release-el8 \ + && dnf --enablerepo=ol8_codeready_builder install -y \ + apr-devel \ + apr-util \ + apr-util-devel \ + autoconf \ + automake \ + bison \ + bzip2 \ + cmake3 \ + curl \ + cyrus-sasl \ + cyrus-sasl-devel \ + flex \ + gcc \ + gcc-c++ \ + gdb \ + git \ + gpg \ + httpd \ + httpd-devel \ + libffi-devel \ + libtool \ + libyaml \ + libyaml-devel \ + openssl-devel \ + patch \ + readline-devel \ + ruby \ + sqlite-devel \ + make \ + redhat-lsb \ + unzip \ + zlib-devel \ + && dnf module enable nodejs:22 \ + && dnf install nodejs nodejs-devel npm \ + && npm install -g @openai/codex + +# Import GPG key for RVM +RUN gpg \ + --keyserver keyserver.ubuntu.com \ + --recv-keys \ + 409B6B1796C275462A1703113804BB82D39DC0E3 \ + 7D2BAF1CF37B13E2069D6956105BD0E739499BDB + +# Install RVM system wide +RUN curl -sSL https://get.rvm.io | bash -s stable + +# Set our shell to a bash full login environment to pull in +# RVM's profile in all RUN instructions below. +# (Requires Docker 1.12) +SHELL ["/bin/bash", "-l", "-c"] + +# In case we missed any package requirements, this installs them +RUN rvm requirements + +# Pick your ruby version here +# Segfaults in rb_protect() +RUN rvm install ruby-3.2.3 --disable-binary +# Works +#RUN rvm install ruby-2.7.5 +# Same segfault as 3.2.3 +#RUN rvm install ruby-3.0.3 + +# Setup our libruby.so dir in ld.so.conf +RUN rvm config-get libdir > /etc/ld.so.conf.d/ruby.conf && ldconfig + +WORKDIR /usr/src/mod_ruby + +COPY . /usr/src/mod_ruby + +# Pulls in the RVM environment and installed ruby +RUN cmake3 . && make -j4 && make install + +# Remove some junk that the httpd package installs +RUN rm -f /etc/httpd/conf.d/welcome.conf /etc/httpd/conf.modules.d/00-systemd.conf + +# Manually copy some files I couldn't figure out with the CMake system +RUN cp -a config/mod_ruby.conf /etc/httpd/conf.modules.d/ + +# librhtml.so +RUN cp -a lib/* $(rvm config-get libdir) && ldconfig + +COPY docker/index.html /var/www/html/index.html +COPY docker/*.rb /var/www/html/ +COPY docker/*.cgi /var/www/cgi-bin/ +COPY docker/httpd.conf /etc/httpd/conf/httpd.conf +COPY docker/gdb.input /gdb.input +COPY docker/httpd-gdb /httpd-gdb +COPY docker/httpd-gdb-loop /httpd-gdb-loop + +# Force apache logs to docker console logs +#RUN ln -sf /dev/console /var/log/httpd/access_log \ +# && ln -sf /dev/console /var/log/httpd/error_log + +# Graceful shutdown signal for apache +# (Requires Docker > 1.11) +# Note: gdb is set to trap SIGWINCH, so this is for +# alternate uses with less debugging +#STOPSIGNAL SIGWINCH + +# If you want a simpler image without gdb... +#CMD ["/usr/sbin/httpd", "-D", "FOREGROUND"] +CMD ["/httpd-gdb"] diff --git a/README.md b/README.md index a48868b..9c0dd7f 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ The build will create these files in the local repo: To build from source, you need to the following packages: - * Ruby 1.9 or 2.x. (Ruby header files also needed). + * Ruby 2.x or 3.x. (Ruby header files also needed). * Apache, APR and APR Util headers * CMake @@ -69,7 +69,7 @@ using the Docker image as a handy developer's environment: bash $ ./script/docker_run ... - 0x00007f30bb13733f in accept4 () from /lib64/libc.so.6 + 0x00007f7a7adfc7b4 in read () from /lib64/libpthread.so.0 A single Apache worker child is running in a gdb shell. Smoke test mod_ruby in a separate terminal window: @@ -77,7 +77,7 @@ separate terminal window: bash $ curl localhost:8080 Hi there from ruby Hello World from HTML! - + If mod_ruby crashes, gdb will print a full stack trace. You may also do `Ctrl + C` to break out to a gdb prompt to inspect the running Apache child. @@ -87,6 +87,60 @@ cases. Submit pull requests using feature branches, and have fun! +### Developing with OpenAI Codex + +Working with the Ruby C API can be frustrating. Documentation is sparse +and a lot of the experience is trial & error. + +This project integrates OpenAI's Codex for an agentic approach. There +is a separate `Dockerfile.codex` which loads in the full Ruby C++ headers +and source code and installs the Codex CLI. The project root contains +an AGENTS.md system prompt to tell the LLM how to compile, install +and test ModRuby. It can run web searches, install packages as root, +search the Ruby source and behave like an entitled twerp. + +Apache is started up with the container and will run in a reload loop +if it crashes. The agent is given instructions to test changes and +inspect the stack trace if Apache segfaults. It should keep iterating +until the goal is met, which could be a very long time. + +**Use with caution. Don't leave it run unattended.** + +First, install [Codex CLI](https://developers.openai.com/codex/cli/) +on your workstation and get the credentials set up by logging into ChatGPT. +The `ModRuby` container bind mounts your workstation's `$HOME/.codex` +into the container and runs the agent inside the container. The `ModRuby` +project root is bind mounted into `/usr/src/mod_ruby` so changes made by the +agent will be reflected immediately on your host OS. + +Start up the container with: + + bash $ ./script/docker_codex + . . . + To get started, describe a task or try one of these commands: + > + +Go wild and have fun. + +``` +> Search for a potential security vulnerability in ModRuby. Identify the + attack vector and write a test case to confirm the vulnerability. Write + a patch to fix the vulnerability and supply code comments. +``` + +``` +Updated Plan + High-level roadmap before diving into the source and modifications. + Survey key ModRuby modules for input handling to spot potential security issues. + Create a regression test that demonstrates the vulnerability. + Implement a fix with comments, rebuild, and rerun the new test. + +I'm focusing on confirming an overflow issue in url_encode by running a +death test with MALLOC_CHECK set to catch heap errors. I'm thinking about +creating a dedicated test compiled with AddressSanitizer to catch the integer +overflow vulnerability in url_encode by triggering a heap-buffer-overflow if present. +``` + ## License Redistribution and use in source and binary forms, with or without modification, diff --git a/docker/httpd-gdb-loop b/docker/httpd-gdb-loop new file mode 100755 index 0000000..c188008 --- /dev/null +++ b/docker/httpd-gdb-loop @@ -0,0 +1,88 @@ +#!/bin/bash +# This is similar to httpd-gdb, except it's more hands-off and +# better suited for automated testing. If apache crashes the +# stack trace will be written to /var/log/httpd/gdb_backtrace.txt +# and then apache is restarted. Each new crash will delete the +# backtrace log and dump the fresh backtrace into it. +#set -euo pipefail + +# Config +TRACE_FILE="${TRACE_FILE:-/var/log/httpd/gdb_backtrace.txt}" + +mkdir -p "$(dirname "$TRACE_FILE")" + +# Clean previous trace each run +rm -f "$TRACE_FILE" + +# Ensure child gdb/httpd dies if this script is killed +trap 'pkill -P $$ || true' INT TERM + +while :; do + ts="$(date -Is)" + tmp="${TRACE_FILE}.tmp" + + echo "[$ts] starting httpd" + + rm -f /var/run/httpd/httpd.pid + + /usr/sbin/httpd 1>>"$tmp" 2>>"$tmp" + + ret=$? + if [[ "$ret" != "0" ]]; then + { + echo "Failed to start up Apache:" + echo + } >>"$tmp" + mv -f "$tmp" "$TRACE_FILE" + # longer sleep time on big time fails + sleep 10 + continue + fi + + sleep 2 + pid=$(ps ax |grep /usr/sbin/httpd |grep -v grep | grep -v defunct | awk '{print $1}' |tail -1) + echo "httpd fork pid: $pid" + + # If apache started up successfully and the previous trace file + # contains a fail message, delete the trace file. We want to keep + # backtraces intact for observability though. + if grep -q "Failed to start up Apache" "$TRACE_FILE"; then + echo "Deleting trace file" + rm -f "$TRACE_FILE" + fi + + #-ex "set detach-on-fork on" \ + #-ex "handle SIGPIPE nostop noprint pass" \ + #-ex "handle SIGSEGV stop print nopass" \ + #-ex "set follow-fork-mode child" \ + gdb -p $pid -q --batch \ + -ex "set pagination off" \ + -ex "set confirm off" \ + -ex "handle SIGPIPE nostop noprint pass" \ + -ex "handle SIGSEGV stop print nopass" \ + -ex "continue" \ + -ex "echo \n===== THREAD BACKTRACE =====\n" \ + -ex "thread apply all bt full" \ + -ex "echo \n===== REGISTERS =====\n" \ + -ex "info registers" \ + 2>&1 | tee "$tmp" + + # If it crashed, keep the trace; else discard. + if grep -q "received signal SIGSEGV" "$tmp"; then + { + echo "===== TIMESTAMP =====" + echo "$ts" + } >>"$tmp" + mv -f "$tmp" "$TRACE_FILE" + echo "segfault detected. backtrace saved to: $TRACE_FILE" + else + cat "$tmp" + rm -f "$tmp" + echo "no segfault detected. restarting." + fi + + ps ax |grep /usr/sbin/httpd |grep -v grep | grep -v defunct | awk '{print $1}' | xargs kill -9 + + sleep 2 +done + diff --git a/docker/httpd.conf b/docker/httpd.conf index dc6083f..e22eeba 100644 --- a/docker/httpd.conf +++ b/docker/httpd.conf @@ -15,7 +15,16 @@ DocumentRoot "/var/www/html" # Allow open access: Require all granted +# Make apache chill out on the event MPM settings so that +# we don't have such a huge mess of threads in gdb to deal with MaxClients 1 +ThreadsPerChild 1 +MaxRequestWorkers 1 +MinSpareThreads 1 +MaxSpareThreads 1 +MaxConnectionsPerChild 0 +ServerLimit 1 + CoreDumpDirectory /tmp RubyHandlerDeclare TEST RubyHandlerModule TEST "/var/www/html/test.rb" diff --git a/script/docker_codex b/script/docker_codex new file mode 100755 index 0000000..625b041 --- /dev/null +++ b/script/docker_codex @@ -0,0 +1,26 @@ +#!/bin/bash +root=$( readlink -f $( dirname $( readlink -f $0 ) )/.. ) +docker build -f $root/Dockerfile.codex -t mod_ruby_codex $root + +docker rm -f mod_ruby_codex_run_container +# Change 8080 here if you have a local conflict with it +docker run \ + --name=mod_ruby_codex_run_container \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + -v $root:/usr/src/mod_ruby \ + -v $HOME/.codex:/root/.codex \ + -d \ + -p 8080:80 \ + mod_ruby_codex \ + /bin/sleep infinity + +# Start up Apache in the background. This script will loop +# restarting Apache when it crashes. Stack traces are logged +# for the Codex agent to inspect. +docker exec -d mod_ruby_codex_run_container /httpd-gdb-loop +#docker exec -ti mod_ruby_codex_run_container /httpd-gdb-loop + +# Run the Codex agent with full access and web search +docker exec -ti mod_ruby_codex_run_container codex --sandbox danger-full-access --search --cd /usr/src/mod_ruby + diff --git a/src/module.cpp b/src/module.cpp index e27f86f..176e864 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -2,9 +2,17 @@ #include #include +#include +#include +#include #include -#include +#include +#include #include +#include +#include +#include +#include #include "ruby.hpp" #include "request.h" @@ -23,6 +31,9 @@ #include "config.h" #include "module.hpp" +#include + +extern "C" int ruby_thread_has_gvl_p(void); #if AP_SERVER_MINORVERSION_NUMBER >= 4 APLOG_USE_MODULE(); @@ -32,6 +43,199 @@ typedef VALUE (*fn)(...); using namespace std; +namespace +{ +// RubyDispatcher owns the embedded Ruby VM and runs all Ruby-facing work on +// a single dedicated OS thread. Apache worker threads hand off callable jobs +// so that Ruby APIs are always invoked from a Ruby-managed thread with the +// GVL held. +class RubyDispatcher +{ +public: + static RubyDispatcher& instance() + { + static RubyDispatcher dispatcher; + return dispatcher; + } + + // Launches the dispatcher thread and blocks until the Ruby VM is ready. + void start() + { + std::unique_lock lock(mutex_); + + if (thread_started_) + { + ready_cv_.wait(lock, [this]() { return ready_; }); + return; + } + + shutting_down_ = false; + ready_ = false; + thread_exception_ = nullptr; + worker_thread_ = std::thread(&RubyDispatcher::run, this); + thread_started_ = true; + + ready_cv_.wait(lock, [this]() { return ready_; }); + + if (thread_exception_) + { + std::rethrow_exception(thread_exception_); + } + } + + // Indicates whether the dispatcher thread is currently running. + bool is_started() const + { + std::lock_guard lock(mutex_); + return thread_started_; + } + + // Signals the dispatcher thread to drain pending work and shut down the VM. + void stop() + { + std::unique_lock lock(mutex_); + + if (!thread_started_) + { + return; + } + + shutting_down_ = true; + cv_.notify_all(); + lock.unlock(); + + if (worker_thread_.joinable()) + { + worker_thread_.join(); + } + + lock.lock(); + jobs_.clear(); + thread_started_ = false; + ready_ = false; + thread_exception_ = nullptr; + } + + // Synchronously executes a job on the Ruby dispatcher thread. + int execute(std::function task) + { + if (ruby_thread_has_gvl_p()) + { + return task(); + } + + std::unique_ptr job(new Job()); + auto future = job->promise.get_future(); + job->task = std::move(task); + + { + std::lock_guard lock(mutex_); + + if (!thread_started_) + { + throw std::runtime_error("Ruby dispatcher thread not started"); + } + + jobs_.push_back(std::move(job)); + } + + cv_.notify_one(); + + return future.get(); + } + +private: + struct Job + { + std::function task; + std::promise promise; + }; + + RubyDispatcher() = default; + + // Dispatcher main loop: owns the Ruby VM and processes queued jobs. + void run() + { + try + { + ruby::startup("ModRuby Ruby VM"); + + { + std::lock_guard lock(mutex_); + ready_ = true; + ready_cv_.notify_all(); + } + + while (true) + { + std::unique_ptr job; + + { + std::unique_lock lock(mutex_); + cv_.wait(lock, [this]() { + return shutting_down_ || !jobs_.empty(); + }); + + if (shutting_down_ && jobs_.empty()) + { + break; + } + + job = std::move(jobs_.front()); + jobs_.pop_front(); + } + + try + { + int result = job->task(); + job->promise.set_value(result); + } + catch (...) + { + job->promise.set_exception(std::current_exception()); + } + } + + ruby::shutdown(); + } + catch (...) + { + std::lock_guard lock(mutex_); + thread_exception_ = std::current_exception(); + ready_ = true; + ready_cv_.notify_all(); + } + } + + mutable std::mutex mutex_; + std::condition_variable cv_; + std::condition_variable ready_cv_; + std::deque> jobs_; + std::thread worker_thread_; + bool thread_started_ = false; + bool ready_ = false; + bool shutting_down_ = false; + std::exception_ptr thread_exception_; +}; + +// Runs the provided Apache handler while guaranteeing we are executing on the +// Ruby dispatcher thread (and therefore with the GVL acquired). Callers may be +// running on arbitrary Apache worker threads. +int call_request_with_gvl(int (*func)(request_rec*), request_rec* r) +{ + if (ruby_thread_has_gvl_p()) + { + return func(r); + } + + auto job = [func, r]() -> int { + return func(r); + }; + + return RubyDispatcher::instance().execute(job); +} +} // namespace + // Generic Ruby object to hold the the handler instance. A handler is a Ruby // class which works as the persistent Ruby server environment that handles all // requests. All Apache requests are sent in to it, and it can set up the @@ -78,173 +282,221 @@ int ruby_init_module(apr_pool_t* p, server_rec* server) ap_log_perror( APLOG_MARK, APLOG_NOTICE, 0, p, "mod_ruby[%i]: startup_module", getpid() ); - int x = getpid(); - - // Initialize the Ruby VM and load C extensions try { - // Start up VM - ruby::startup("ModRuby Ruby VM"); - - // Set default encoding to UTF-8. - // - // Ruby docs say not to do this within Ruby. So we do it here - // immediately on startup. Could do it this way: - // - // rb_eval_string("Encoding.default_external='UTF-8'"); - // - // But this is more 3733t - VALUE encoding = rb_const_get(rb_cObject, rb_intern("Encoding")); - rb_funcall( encoding, rb_intern("default_external="), - 1, rb_str_new_cstr("UTF-8") ); - - // Apache log constants for log() in ruby_request.cpp - rb_define_global_const("APLOG_EMERG", INT2NUM(APLOG_EMERG)); - rb_define_global_const("APLOG_ALERT", INT2NUM(APLOG_ALERT)); - rb_define_global_const("APLOG_CRIT", INT2NUM(APLOG_CRIT)); - rb_define_global_const("APLOG_ERR", INT2NUM(APLOG_ERR)); - rb_define_global_const("APLOG_WARNING", INT2NUM(APLOG_WARNING)); - rb_define_global_const("APLOG_NOTICE", INT2NUM(APLOG_NOTICE)); - rb_define_global_const("APLOG_INFO", INT2NUM(APLOG_INFO)); - rb_define_global_const("APLOG_DEBUG", INT2NUM(APLOG_DEBUG)); - rb_define_global_const("APLOG_LEVELMASK", INT2NUM(APLOG_LEVELMASK)); - rb_define_global_const("APLOG_NOERRNO", INT2NUM(APLOG_NOERRNO)); - rb_define_global_const("APLOG_TOCLIENT", INT2NUM(APLOG_TOCLIENT)); - rb_define_global_const("APLOG_STARTUP", INT2NUM(APLOG_STARTUP)); - - // Apache method constants - rb_define_global_const("M_GET", INT2NUM(M_GET)); - rb_define_global_const("M_PUT", INT2NUM(M_PUT)); - rb_define_global_const("M_POST", INT2NUM(M_POST)); - rb_define_global_const("M_DELETE", INT2NUM(M_DELETE)); - rb_define_global_const("M_CONNECT", INT2NUM(M_CONNECT)); - rb_define_global_const("M_OPTIONS", INT2NUM(M_OPTIONS)); - rb_define_global_const("M_TRACE", INT2NUM(M_TRACE)); - rb_define_global_const("M_PATCH", INT2NUM(M_PATCH)); - rb_define_global_const("M_PROPFIND", INT2NUM(M_PROPFIND)); - rb_define_global_const("M_PROPPATCH", INT2NUM(M_PROPPATCH)); - rb_define_global_const("M_MKCOL", INT2NUM(M_MKCOL)); - rb_define_global_const("M_COPY", INT2NUM(M_COPY)); - rb_define_global_const("M_MOVE", INT2NUM(M_MOVE)); - rb_define_global_const("M_LOCK", INT2NUM(M_LOCK)); - rb_define_global_const("M_UNLOCK", INT2NUM(M_UNLOCK)); - rb_define_global_const("M_VERSION_CONTROL", INT2NUM(M_VERSION_CONTROL)); - rb_define_global_const("M_CHECKOUT", INT2NUM(M_CHECKOUT)); - rb_define_global_const("M_UNCHECKOUT", INT2NUM(M_UNCHECKOUT)); - rb_define_global_const("M_CHECKIN", INT2NUM(M_CHECKIN)); - rb_define_global_const("M_UPDATE", INT2NUM(M_UPDATE)); - rb_define_global_const("M_LABEL", INT2NUM(M_LABEL)); - rb_define_global_const("M_REPORT", INT2NUM(M_REPORT)); - rb_define_global_const("M_MKWORKSPACE", INT2NUM(M_MKWORKSPACE)); - rb_define_global_const("M_MKACTIVITY", INT2NUM(M_MKACTIVITY)); - rb_define_global_const("M_BASELINE_CONTROL", INT2NUM(M_BASELINE_CONTROL)); - rb_define_global_const("M_MERGE", INT2NUM(M_MERGE)); - rb_define_global_const("M_INVALID", INT2NUM(M_INVALID)); - rb_define_global_const("M_METHODS", INT2NUM(64)); - - // Submodules are registered under the Apache namespace - VALUE apache = rb_define_module("Apache"); - - // ModRuby module functions under ModRuby namespace - VALUE ruby = rb_define_module("ModRuby"); - - rb_define_module_function(ruby, "version", (fn)ruby_version, 0); - rb_define_module_function(ruby, "release_date", (fn)ruby_release_date, 0); - - // RHTML scanner - - init_rhtml(); - - // Initialize Ruby extensions/modules - - init_request(apache); // Apache request Ruby wrapper - init_server(apache); // Apache server Ruby wrapper - init_connection(apache); // Apache connection Ruby wrapper - init_process(apache); // Apache process Ruby wrapper - - VALUE apr = rb_define_module("APR"); - - init_apr(apr); // Apache Portable Runtime (APR) - init_apr_array(apr); // APR Arrays - init_apr_file_info(apr); // APR file functions - init_apr_pool(apr); // APR pools - init_apr_table(apr); // APR tables - - // Load the Ruby handler module, which is a pure Ruby class - ruby::require("modruby/handler"); - - // Create the global Ruby handler instance. - ruby_handler = new ruby::Object("ModRuby::Handler"); - } - catch (const ruby::Exception& e) - { - fprintf(stderr, "Ruby Exception: %s", e.what()); - - stringstream strm; - strm << "FATAL ERROR: " << e.what(); - log_error(p, strm.str().c_str()); - - return 1; + RubyDispatcher::instance().start(); } catch (const std::exception& e) { - fprintf(stderr, "C++ Exception: %s\n", e.what()); - - stringstream strm; - strm << "FATAL ERROR: " << e.what(); - log_error(p, strm.str().c_str()); - + ap_log_perror( APLOG_MARK, APLOG_CRIT, 0, p, + "mod_ruby[%i]: failed to start Ruby dispatcher: %s", + getpid(), e.what() ); return 1; } - return 0; -} - -int ruby_shutdown_module() -{ - // Call shutdown on all handlers so they can clean up - map::iterator i; - for(i = handlers.begin(); i != handlers.end(); i++) + auto init_job = [p, server]() -> int { - ap_log_error( APLOG_MARK, APLOG_NOTICE, 0, NULL, - "mod_ruby[%i]: ruby_shutdown_module() starting", - getpid() ); - try { - i->second->method("shutdown", 0); + // Set default encoding to UTF-8. + // + // Ruby docs say not to do this within Ruby. So we do it here + // immediately on startup. Could do it this way: + // + // rb_eval_string("Encoding.default_external='UTF-8'"); + // + // But this is more 3733t + VALUE encoding = rb_const_get(rb_cObject, rb_intern("Encoding")); + rb_funcall( encoding, rb_intern("default_external="), + 1, rb_str_new_cstr("UTF-8") ); + + // Apache log constants for log() in ruby_request.cpp + rb_define_global_const("APLOG_EMERG", INT2NUM(APLOG_EMERG)); + rb_define_global_const("APLOG_ALERT", INT2NUM(APLOG_ALERT)); + rb_define_global_const("APLOG_CRIT", INT2NUM(APLOG_CRIT)); + rb_define_global_const("APLOG_ERR", INT2NUM(APLOG_ERR)); + rb_define_global_const("APLOG_WARNING", INT2NUM(APLOG_WARNING)); + rb_define_global_const("APLOG_NOTICE", INT2NUM(APLOG_NOTICE)); + rb_define_global_const("APLOG_INFO", INT2NUM(APLOG_INFO)); + rb_define_global_const("APLOG_DEBUG", INT2NUM(APLOG_DEBUG)); + rb_define_global_const("APLOG_LEVELMASK", INT2NUM(APLOG_LEVELMASK)); + rb_define_global_const("APLOG_NOERRNO", INT2NUM(APLOG_NOERRNO)); + rb_define_global_const("APLOG_TOCLIENT", INT2NUM(APLOG_TOCLIENT)); + rb_define_global_const("APLOG_STARTUP", INT2NUM(APLOG_STARTUP)); + + // Apache method constants + rb_define_global_const("M_GET", INT2NUM(M_GET)); + rb_define_global_const("M_PUT", INT2NUM(M_PUT)); + rb_define_global_const("M_POST", INT2NUM(M_POST)); + rb_define_global_const("M_DELETE", INT2NUM(M_DELETE)); + rb_define_global_const("M_CONNECT", INT2NUM(M_CONNECT)); + rb_define_global_const("M_OPTIONS", INT2NUM(M_OPTIONS)); + rb_define_global_const("M_TRACE", INT2NUM(M_TRACE)); + rb_define_global_const("M_PATCH", INT2NUM(M_PATCH)); + rb_define_global_const("M_PROPFIND", INT2NUM(M_PROPFIND)); + rb_define_global_const("M_PROPPATCH", INT2NUM(M_PROPPATCH)); + rb_define_global_const("M_MKCOL", INT2NUM(M_MKCOL)); + rb_define_global_const("M_COPY", INT2NUM(M_COPY)); + rb_define_global_const("M_MOVE", INT2NUM(M_MOVE)); + rb_define_global_const("M_LOCK", INT2NUM(M_LOCK)); + rb_define_global_const("M_UNLOCK", INT2NUM(M_UNLOCK)); + rb_define_global_const("M_VERSION_CONTROL", INT2NUM(M_VERSION_CONTROL)); + rb_define_global_const("M_CHECKOUT", INT2NUM(M_CHECKOUT)); + rb_define_global_const("M_UNCHECKOUT", INT2NUM(M_UNCHECKOUT)); + rb_define_global_const("M_CHECKIN", INT2NUM(M_CHECKIN)); + rb_define_global_const("M_UPDATE", INT2NUM(M_UPDATE)); + rb_define_global_const("M_LABEL", INT2NUM(M_LABEL)); + rb_define_global_const("M_REPORT", INT2NUM(M_REPORT)); + rb_define_global_const("M_MKWORKSPACE", INT2NUM(M_MKWORKSPACE)); + rb_define_global_const("M_MKACTIVITY", INT2NUM(M_MKACTIVITY)); + rb_define_global_const("M_BASELINE_CONTROL", INT2NUM(M_BASELINE_CONTROL)); + rb_define_global_const("M_MERGE", INT2NUM(M_MERGE)); + rb_define_global_const("M_INVALID", INT2NUM(M_INVALID)); + rb_define_global_const("M_METHODS", INT2NUM(64)); + + // Submodules are registered under the Apache namespace + VALUE apache = rb_define_module("Apache"); + + // ModRuby module functions under ModRuby namespace + VALUE ruby = rb_define_module("ModRuby"); + + rb_define_module_function(ruby, "version", (fn)ruby_version, 0); + rb_define_module_function(ruby, "release_date", (fn)ruby_release_date, 0); + + // RHTML scanner + + init_rhtml(); + + // Initialize Ruby extensions/modules + + init_request(apache); // Apache request Ruby wrapper + init_server(apache); // Apache server Ruby wrapper + init_connection(apache); // Apache connection Ruby wrapper + init_process(apache); // Apache process Ruby wrapper + + VALUE apr = rb_define_module("APR"); + + init_apr(apr); // Apache Portable Runtime (APR) + init_apr_array(apr); // APR Arrays + init_apr_file_info(apr); // APR file functions + init_apr_pool(apr); // APR pools + init_apr_table(apr); // APR tables + + // Load the Ruby handler module, which is a pure Ruby class + ruby::require("modruby/handler"); + + // Create the global Ruby handler instance. + ruby_handler = new ruby::Object("ModRuby::Handler"); } catch (const ruby::Exception& e) { - // Create the error message + fprintf(stderr, "Ruby Exception: %s", e.what()); + stringstream strm; - strm << "ruby_shutdown_module(): Ruby Exception: " << e.what() << "\n" - << e.stackdump(); + strm << "FATAL ERROR: " << e.what(); + log_error(p, strm.str().c_str()); - // Log error (critical) - ap_log_error( APLOG_MARK, APLOG_CRIT, 0, NULL, - "mod_ruby[%i] : %s", - getpid(), - strm.str().c_str() ); + return 1; } catch (const std::exception& e) { - fprintf(stderr, "ruby_shutdown_module(): C++ Exception\n"); + fprintf(stderr, "C++ Exception: %s\n", e.what()); + + stringstream strm; + strm << "FATAL ERROR: " << e.what(); + log_error(p, strm.str().c_str()); + + return 1; } + + return 0; + }; + + try + { + return RubyDispatcher::instance().execute(init_job); + } + catch (const std::exception& e) + { + ap_log_perror( APLOG_MARK, APLOG_CRIT, 0, p, + "mod_ruby[%i]: initialization failure: %s", + getpid(), e.what() ); + return 1; } +} - if (ruby_handler != NULL) +int ruby_shutdown_module() +{ + if (!RubyDispatcher::instance().is_started()) { - ruby_handler->method("shutdown", 0); + return 0; + } + + auto shutdown_job = []() -> int + { + // Call shutdown on all handlers so they can clean up + map::iterator i; + for(i = handlers.begin(); i != handlers.end(); i++) + { + ap_log_error( APLOG_MARK, APLOG_NOTICE, 0, NULL, + "mod_ruby[%i]: ruby_shutdown_module() starting", + getpid() ); - delete ruby_handler; - ruby_handler = NULL; + try + { + i->second->method("shutdown", 0); + } + catch (const ruby::Exception& e) + { + // Create the error message + stringstream strm; + strm << "ruby_shutdown_module(): Ruby Exception: " << e.what() << "\n" + << e.stackdump(); + + // Log error (critical) + ap_log_error( APLOG_MARK, APLOG_CRIT, 0, NULL, + "mod_ruby[%i] : %s", + getpid(), + strm.str().c_str() ); + } + catch (const std::exception&) + { + fprintf(stderr, "ruby_shutdown_module(): C++ Exception\n"); + } + + delete i->second; + } + + if (ruby_handler != NULL) + { + ruby_handler->method("shutdown", 0); + + delete ruby_handler; + ruby_handler = NULL; + } + + handlers.clear(); + + return 0; + }; + + int result = 0; + + try + { + result = RubyDispatcher::instance().execute(shutdown_job); + } + catch (const std::exception& e) + { + ap_log_error( APLOG_MARK, APLOG_CRIT, 0, NULL, + "mod_ruby[%i]: shutdown failure: %s", + getpid(), e.what() ); + result = 1; } - // Shutdown Ruby environment - ruby::shutdown(); + RubyDispatcher::instance().stop(); - return 0; + return result; } int ruby_log_error(request_rec* r, int level, const char* msg) @@ -607,7 +859,9 @@ modruby::Handler ruby_request_get_access_handler(request_rec* r) return modruby::Handler(); } -int ruby_request_handler(request_rec* r) +// Core HTTP handler implementation that performs the request lifecycle using +// Ruby objects. This must execute on the Ruby dispatcher thread. +static int ruby_request_handler_impl(request_rec* r) { apache::Request req(r); @@ -733,6 +987,12 @@ int ruby_request_handler(request_rec* r) } } +// Public Apache handler entry point. It delegates to the Ruby dispatcher so +// the implementation runs with the GVL. +int ruby_request_handler(request_rec* r) +{ + return call_request_with_gvl(ruby_request_handler_impl, r); +} // Common code used for RHTML and Ruby script handlers int ruby_generic_handler( request_rec* r, const char* handler_name, @@ -846,17 +1106,32 @@ int ruby_generic_handler( request_rec* r, } } -int ruby_request_rhtml_handler(request_rec* r) +// Implementation for the RHTML content handler that expects the GVL to be held. +static int ruby_request_rhtml_handler_impl(request_rec* r) { return ruby_generic_handler(r, "ruby-rhtml-handler", "rhtml"); } -int ruby_request_script_handler(request_rec* r) +// Apache hook wrapper for the RHTML handler that ensures execution on the Ruby thread. +int ruby_request_rhtml_handler(request_rec* r) +{ + return call_request_with_gvl(ruby_request_rhtml_handler_impl, r); +} + +// Implementation for the Ruby script handler that requires Ruby VM access. +static int ruby_request_script_handler_impl(request_rec* r) { return ruby_generic_handler(r, "ruby-script-handler", "script"); } -int ruby_request_access_handler(request_rec* r) +// Apache hook wrapper for the script handler that marshals work through the dispatcher. +int ruby_request_script_handler(request_rec* r) +{ + return call_request_with_gvl(ruby_request_script_handler_impl, r); +} + +// Implementation for the access handler hook. Assumes it runs on the Ruby thread. +static int ruby_request_access_handler_impl(request_rec* r) { apache::Request req(r); @@ -964,3 +1239,9 @@ int ruby_request_access_handler(request_rec* r) return OK; } } + +// Apache hook wrapper for the access handler that schedules execution with the GVL. +int ruby_request_access_handler(request_rec* r) +{ + return call_request_with_gvl(ruby_request_access_handler_impl, r); +} diff --git a/src/ruby.cpp b/src/ruby.cpp index 70726f4..c38457f 100644 --- a/src/ruby.cpp +++ b/src/ruby.cpp @@ -1,11 +1,90 @@ #include #include +#include #include #include "ruby.hpp" using std::string; +extern "C" int ruby_thread_has_gvl_p(void); + +namespace +{ +// Holds the parameters/result for invoking rb_protect under the dispatcher. +struct ProtectCallContext +{ + VALUE (*func)(VALUE); + VALUE arg; + int error; + VALUE result; +}; + +// Trampoline that re-enters rb_protect while the GVL is held. +static void* protect_with_gvl(void* data) +{ + ProtectCallContext* ctx = static_cast(data); + ctx->error = 0; + ctx->result = rb_protect(ctx->func, ctx->arg, &ctx->error); + + return NULL; +} + +// Calls the supplied Ruby C function inside rb_protect, reacquiring the GVL +// if the current native thread does not already own it. +static VALUE call_with_protect(VALUE (*func)(VALUE), VALUE arg, int* error) +{ + ProtectCallContext ctx { func, arg, 0, Qnil }; + + if (ruby_thread_has_gvl_p()) + { + ctx.error = 0; + ctx.result = rb_protect(func, arg, &ctx.error); + } + else + { + rb_thread_call_with_gvl(protect_with_gvl, &ctx); + } + + if (error != NULL) + { + *error = ctx.error; + } + + return ctx.result; +} + +// Context structure for safely calling rb_load_protect from non-Ruby threads. +struct LoadCallContext +{ + const char* filename; + int anonymous; + int error; +}; + +// Executes rb_load_protect while the GVL is held. +static void* load_with_gvl(void* data) +{ + LoadCallContext* ctx = static_cast(data); + ctx->error = 0; + rb_load_protect(rb_str_new_cstr(ctx->filename), ctx->anonymous, &ctx->error); + return NULL; +} + +// Runs the given function while ensuring the GVL is owned by this thread. +static void perform_with_gvl(void* (*func)(void*), void* data) +{ + if (ruby_thread_has_gvl_p()) + { + func(data); + } + else + { + rb_thread_call_with_gvl(func, data); + } +} +} // namespace + namespace ruby { @@ -60,9 +139,9 @@ VALUE Object::method(const char* name, int n, ...) arg.argv = argv; int error = 0; - VALUE result = rb_protect( ruby::method_wrap, - reinterpret_cast(&arg), - &error ); + VALUE result = call_with_protect( ruby::method_wrap, + reinterpret_cast(&arg), + &error ); if (error) { @@ -435,7 +514,7 @@ VALUE method(VALUE recv, ID id, int n, ...) arg.argv = argv; int error = 0; - VALUE result = rb_protect(method_wrap, reinterpret_cast(&arg), &error); + VALUE result = call_with_protect(method_wrap, reinterpret_cast(&arg), &error); if (error) { @@ -469,7 +548,7 @@ VALUE vm_method(VALUE recv, ID id, int n, va_list ar) arg.argv = argv; int error = 0; - VALUE result = rb_protect(method_wrap, reinterpret_cast(&arg), &error); + VALUE result = call_with_protect(method_wrap, reinterpret_cast(&arg), &error); if (error) { @@ -546,7 +625,7 @@ bool call_function(const char* method, int n, ...) void require(const char* filename) { int error = 0; - rb_protect(require_protect, reinterpret_cast(filename), &error); + call_with_protect(require_protect, reinterpret_cast(filename), &error); if (error) { @@ -562,10 +641,10 @@ void require(const char* filename) void load(const char* filename, int anonymous) { - int error = 0; - rb_load_protect(rb_str_new2(filename), anonymous, &error); + LoadCallContext ctx { filename, anonymous, 0 }; + perform_with_gvl(load_with_gvl, &ctx); - if (error) + if (ctx.error) { Exception e; e.backtrace(); @@ -618,7 +697,7 @@ VALUE create_object(const char* class_name, int n, va_list ar) arg.argv = argv; int error = 0; - VALUE self = rb_protect(create_object_protect, reinterpret_cast(&arg), &error); + VALUE self = call_with_protect(create_object_protect, reinterpret_cast(&arg), &error); if (error) {