diff --git a/libtermbench/termbench.cpp b/libtermbench/termbench.cpp index 814d4cc..cacb443 100644 --- a/libtermbench/termbench.cpp +++ b/libtermbench/termbench.cpp @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "termbench.h" +#include #include #include @@ -26,6 +26,12 @@ using namespace std::string_view_literals; namespace termbench { +#if defined(_MSC_VER) + #define leadingZeroBits(x) __lzcnt(v) +#else + #define leadingZeroBits(x) __builtin_clz(v) +#endif + namespace { std::string sizeStr(double _value) @@ -69,7 +75,6 @@ void Benchmark::updateWindowTitle(std::string_view _title) std::cout.flush(); } - void Benchmark::writeOutput(Buffer const& testBuffer) { auto const output = testBuffer.output(); @@ -94,15 +99,8 @@ void Benchmark::runAll() test->setup(terminalSize_); while (buffer->good()) - { test->fill(*buffer); - updateWindowTitle(std::format("{}: filling buffer {:.3}%", - test->name, - static_cast(buffer->size()) - / static_cast(1024 * 1024 * testSizeMB_))); - } - auto const beginTime = steady_clock::now(); writeOutput(*buffer); buffer->clear(); @@ -161,11 +159,17 @@ namespace termbench::tests namespace { - static char randomAsciiChar() + static char randomAsciiChar() noexcept { auto constexpr Min = 'a'; // 0x20; auto constexpr Max = 'z'; // 0x7E; - return static_cast(Min + rand() % (Max - Min + 1)); + + // Knuth's MMIX + static uint64_t state = 1442695040888963407; + const auto v = state * 6364136223846793005 + 1442695040888963407; + state = v; + + return static_cast(Min + v % (Max - Min + 1)); } void writeChar(Buffer& _sink, char ch) @@ -173,17 +177,44 @@ namespace _sink.write(std::string_view { &ch, 1 }); } - void writeNumber(Buffer& _sink, unsigned _value) + void writeNumber(Buffer& _sink, unsigned v) { - unsigned remains = _value; - for (unsigned divisor = 1000000000; divisor != 0; divisor /= 10) + // This implements https://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 but with lzcnt + // for log2. + static constexpr uint32_t powers_of_10[] { + 0, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, + }; + const auto t = (32 - leadingZeroBits(v | 1)) * 1233 >> 12; + const auto log10 = t - (v < powers_of_10[t]); + + // Mapping 2 digits at a time speeds things up a lot because half the divisions are necessary. + // I got this idea from https://github.com/fmtlib/fmt which in turn got it + // from the talk "Three Optimization Tips for C++" by Andrei Alexandrescu. + static constexpr auto lut = "0001020304050607080910111213141516171819" + "2021222324252627282930313233343536373839" + "4041424344454647484950515253545556575859" + "6061626364656667686970717273747576777879" + "8081828384858687888990919293949596979899"; + + char buffer[16]; + const auto digits = log10 + 1; + auto p = &buffer[digits]; + auto r = digits; + + while (r > 1) { - auto const digit = remains / divisor; - remains -= digit * divisor; - - if (digit || (_value != remains) || (divisor == 1)) - writeChar(_sink, static_cast('0' + digit)); + const auto s = &lut[(v % 100) * 2]; + *--p = s[1]; + *--p = s[0]; + v /= 100; + r -= 2; } + if (r & 1) + { + *--p = static_cast('0' + v); + } + + _sink.write({ &buffer[0], static_cast(digits) }); } void moveCursor(Buffer& _sink, unsigned x, unsigned y) diff --git a/tb/main.cpp b/tb/main.cpp index 7449130..a5b8d43 100644 --- a/tb/main.cpp +++ b/tb/main.cpp @@ -38,6 +38,10 @@ using namespace std::placeholders; #include #endif +#ifndef STDOUT_FILENO + #define STDOUT_FILENO 1 +#endif + #define STDOUT_FASTPATH_FD 3 using termbench::TerminalSize; @@ -55,8 +59,15 @@ TerminalSize getTerminalSize() noexcept return DefaultSize; return { ws.ws_col, ws.ws_row }; #else - // TODO: Windows - return DefaultSize; + CONSOLE_SCREEN_BUFFER_INFOEX info { + .cbSize = sizeof(info), + }; + if (!GetConsoleScreenBufferInfoEx(GetStdHandle(STD_OUTPUT_HANDLE), &info)) + return DefaultSize; + return { + static_cast(info.srWindow.Right - info.srWindow.Left + 1), + static_cast(info.srWindow.Bottom - info.srWindow.Top + 1), + }; #endif } @@ -69,31 +80,32 @@ void chunkedWriteToStdout(char const* _data, size_t _size) { auto constexpr PageSize = 4096; // 8192; -#if defined(_WIN32) - HANDLE stdoutHandle = GetStdHandle(STD_OUTPUT_HANDLE); - DWORD nwritten {}; -#endif - +#if !defined(_WIN32) while (_size >= PageSize) { -#if !defined(_WIN32) auto const n = write(StdoutFileNo, _data, PageSize); if (n < 0) perror("write"); - _data += n; - _size -= static_cast(n); -#else - WriteConsoleA(stdoutHandle, _data, static_cast(_size), &nwritten, nullptr); -#endif + else + { + _data += n; + _size -= static_cast(n); + } } - -#if !defined(_WIN32) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-result" write(StdoutFileNo, _data, _size); #pragma GCC diagnostic pop #else - WriteConsoleA(stdoutHandle, _data, static_cast(_size), &nwritten, nullptr); + HANDLE stdoutHandle = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD nwritten {}; + while (_size >= PageSize) + { + WriteFile(stdoutHandle, _data, static_cast(_size), &nwritten, nullptr); + _data += nwritten; + _size -= static_cast(nwritten); + } + WriteFile(stdoutHandle, _data, static_cast(_size), &nwritten, nullptr); #endif } @@ -204,7 +216,7 @@ bool addTestsToBenchmark(termbench::Benchmark& tb, BenchSettings const& settings cerr << std::format("Failed to load file '{}'.\n", test.string()); return false; } - tb.add(termbench::tests::crafted(test.filename(), "", std::move(content))); + tb.add(termbench::tests::crafted(test.filename().string(), "", std::move(content))); } if (settings.columnByColumn) @@ -253,11 +265,15 @@ struct WithScopedTerminalSize int main(int argc, char const* argv[]) { #if defined(_WIN32) - { - HANDLE stdoutHandle = GetStdHandle(STD_OUTPUT_HANDLE); - SetConsoleMode(stdoutHandle, ENABLE_VIRTUAL_TERMINAL_PROCESSING); - } + const auto stdoutHandle = GetStdHandle(STD_OUTPUT_HANDLE); + const auto stdoutCP = GetConsoleOutputCP(); + DWORD stdoutMode; + GetConsoleMode(stdoutHandle, &stdoutMode); + SetConsoleMode(stdoutHandle, + ENABLE_PROCESSED_OUTPUT | ENABLE_WRAP_AT_EOL_OUTPUT | ENABLE_VIRTUAL_TERMINAL_PROCESSING); + SetConsoleOutputCP(CP_UTF8); #endif + auto const initialTerminalSize = getTerminalSize(); auto const settings = parseArguments(argc, argv, initialTerminalSize); @@ -293,5 +309,9 @@ int main(int argc, char const* argv[]) tb.summarize(writerToFile); } +#if defined(_WIN32) + SetConsoleMode(stdoutHandle, stdoutMode); + SetConsoleOutputCP(stdoutCP); +#endif return EXIT_SUCCESS; }