Skip to content

[clang][bytecode] Use bytecode interpreter in EvaluateCharRangeAsString #138461

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

tbaederr
Copy link
Contributor

@tbaederr tbaederr commented May 4, 2025

This was always using the ast walker.

@llvmbot llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:bytecode Issues for the clang bytecode constexpr interpreter labels May 4, 2025
@llvmbot
Copy link
Member

llvmbot commented May 4, 2025

@llvm/pr-subscribers-clang

Author: Timm Baeder (tbaederr)

Changes

This was always using the ast walker.


Full diff: https://github.com/llvm/llvm-project/pull/138461.diff

9 Files Affected:

  • (modified) clang/lib/AST/ByteCode/Context.cpp (+79)
  • (modified) clang/lib/AST/ByteCode/Context.h (+9)
  • (modified) clang/lib/AST/ByteCode/EvalEmitter.cpp (+23)
  • (modified) clang/lib/AST/ByteCode/EvalEmitter.h (+6)
  • (modified) clang/lib/AST/ByteCode/EvaluationResult.h (+1-5)
  • (modified) clang/lib/AST/ByteCode/Pointer.h (+7)
  • (modified) clang/lib/AST/ExprConstant.cpp (+6-1)
  • (modified) clang/test/SemaCXX/gnu-asm-constexpr.cpp (+1)
  • (modified) clang/test/SemaCXX/static-assert-cxx26.cpp (+1)
diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp
index b35b30cc20d81..db9df7abf7a29 100644
--- a/clang/lib/AST/ByteCode/Context.cpp
+++ b/clang/lib/AST/ByteCode/Context.cpp
@@ -134,6 +134,85 @@ bool Context::evaluateAsInitializer(State &Parent, const VarDecl *VD,
   return true;
 }
 
+template <typename ResultT>
+bool Context::evaluateStringRepr(State &Parent, const Expr *SizeExpr,
+                                 const Expr *PtrExpr, ResultT &Result) {
+  assert(Stk.empty());
+  Compiler<EvalEmitter> C(*this, *P, Parent, Stk);
+
+  // Evaluate size value.
+  APValue SizeValue;
+  if (!evaluateAsRValue(Parent, SizeExpr, SizeValue))
+    return false;
+
+  if (!SizeValue.isInt())
+    return false;
+  uint64_t Size = SizeValue.getInt().getZExtValue();
+
+  auto PtrRes = C.interpretAsPointer(PtrExpr, [&](const Pointer &Ptr) {
+    if (Size == 0) {
+      if constexpr (std::is_same_v<ResultT, APValue>)
+        Result = APValue(APValue::UninitArray{}, 0, 0);
+      return true;
+    }
+
+    if (!Ptr.isLive() || !Ptr.getFieldDesc()->isPrimitiveArray())
+      return false;
+
+    // Must be char.
+    if (Ptr.getFieldDesc()->getElemSize() != 1 /*bytes*/)
+      return false;
+
+    if (Size > Ptr.getNumElems()) {
+      Parent.FFDiag(SizeExpr, diag::note_constexpr_access_past_end) << AK_Read;
+      Size = Ptr.getNumElems();
+    }
+
+    if constexpr (std::is_same_v<ResultT, APValue>) {
+      QualType CharTy = PtrExpr->getType()->getPointeeType();
+      Result = APValue(APValue::UninitArray{}, Size, Size);
+      for (uint64_t I = 0; I != Size; ++I) {
+        if (std::optional<APValue> ElemVal =
+                Ptr.atIndex(I).toRValue(*this, CharTy))
+          Result.getArrayInitializedElt(I) = *ElemVal;
+        else
+          return false;
+      }
+    } else {
+      assert((std::is_same_v<ResultT, std::string>));
+      if (Size < Result.max_size())
+        Result.resize(Size);
+      Result.assign(reinterpret_cast<const char *>(Ptr.getRawAddress()), Size);
+    }
+
+    return true;
+  });
+
+  if (PtrRes.isInvalid()) {
+    C.cleanup();
+    Stk.clear();
+    return false;
+  }
+
+  return true;
+}
+
+bool Context::evaluateCharRange(State &Parent, const Expr *SizeExpr,
+                                const Expr *PtrExpr, APValue &Result) {
+  assert(SizeExpr);
+  assert(PtrExpr);
+
+  return evaluateStringRepr(Parent, SizeExpr, PtrExpr, Result);
+}
+
+bool Context::evaluateCharRange(State &Parent, const Expr *SizeExpr,
+                                const Expr *PtrExpr, std::string &Result) {
+  assert(SizeExpr);
+  assert(PtrExpr);
+
+  return evaluateStringRepr(Parent, SizeExpr, PtrExpr, Result);
+}
+
 const LangOptions &Context::getLangOpts() const { return Ctx.getLangOpts(); }
 
 std::optional<PrimType> Context::classify(QualType T) const {
diff --git a/clang/lib/AST/ByteCode/Context.h b/clang/lib/AST/ByteCode/Context.h
index 5a39f40ef3f11..33bc9fad883f8 100644
--- a/clang/lib/AST/ByteCode/Context.h
+++ b/clang/lib/AST/ByteCode/Context.h
@@ -59,6 +59,11 @@ class Context final {
   /// Evaluates a toplevel initializer.
   bool evaluateAsInitializer(State &Parent, const VarDecl *VD, APValue &Result);
 
+  bool evaluateCharRange(State &Parent, const Expr *SizeExpr,
+                         const Expr *PtrExpr, APValue &Result);
+  bool evaluateCharRange(State &Parent, const Expr *SizeExpr,
+                         const Expr *PtrExpr, std::string &Result);
+
   /// Returns the AST context.
   ASTContext &getASTContext() const { return Ctx; }
   /// Returns the language options.
@@ -122,6 +127,10 @@ class Context final {
   /// Runs a function.
   bool Run(State &Parent, const Function *Func);
 
+  template <typename ResultT>
+  bool evaluateStringRepr(State &Parent, const Expr *SizeExpr,
+                          const Expr *PtrExpr, ResultT &Result);
+
   /// Current compilation context.
   ASTContext &Ctx;
   /// Interpreter stack, shared across invocations.
diff --git a/clang/lib/AST/ByteCode/EvalEmitter.cpp b/clang/lib/AST/ByteCode/EvalEmitter.cpp
index 71d688498ffa5..90aca568c9394 100644
--- a/clang/lib/AST/ByteCode/EvalEmitter.cpp
+++ b/clang/lib/AST/ByteCode/EvalEmitter.cpp
@@ -72,6 +72,25 @@ EvaluationResult EvalEmitter::interpretDecl(const VarDecl *VD,
   return std::move(this->EvalResult);
 }
 
+EvaluationResult EvalEmitter::interpretAsPointer(const Expr *E,
+                                                 PtrCallback PtrCB) {
+  this->ReturnPointer = true;
+  this->PtrCB = PtrCB;
+
+  S.setEvalLocation(E->getExprLoc());
+  this->ConvertResultToRValue = false;
+  this->CheckFullyInitialized = false;
+  EvalResult.setSource(E);
+
+  if (!this->visitExpr(E, true)) {
+    // EvalResult may already have a result set, but something failed
+    // after that (e.g. evaluating destructors).
+    EvalResult.setInvalid();
+  }
+
+  return std::move(this->EvalResult);
+}
+
 void EvalEmitter::emitLabel(LabelTy Label) { CurrentLabel = Label; }
 
 EvalEmitter::LabelTy EvalEmitter::getLabel() { return NextLabel++; }
@@ -170,6 +189,10 @@ template <> bool EvalEmitter::emitRet<PT_Ptr>(const SourceInfo &Info) {
     return true;
   }
 
+  // If we're returning a raw pointer, call our callback.
+  if (this->ReturnPointer)
+    return this->PtrCB(Ptr);
+
   if (!EvalResult.checkReturnValue(S, Ctx, Ptr, Info))
     return false;
   if (CheckFullyInitialized && !EvalResult.checkFullyInitialized(S, Ptr))
diff --git a/clang/lib/AST/ByteCode/EvalEmitter.h b/clang/lib/AST/ByteCode/EvalEmitter.h
index f53f86c31ec1e..4f4a78f39ef32 100644
--- a/clang/lib/AST/ByteCode/EvalEmitter.h
+++ b/clang/lib/AST/ByteCode/EvalEmitter.h
@@ -32,11 +32,14 @@ class EvalEmitter : public SourceMapper {
   using LabelTy = uint32_t;
   using AddrTy = uintptr_t;
   using Local = Scope::Local;
+  using PtrCallback = llvm::function_ref<bool(const Pointer &)>;
 
   EvaluationResult interpretExpr(const Expr *E,
                                  bool ConvertResultToRValue = false,
                                  bool DestroyToplevelScope = false);
   EvaluationResult interpretDecl(const VarDecl *VD, bool CheckFullyInitialized);
+  /// Interpret the given Expr to a Pointer.
+  EvaluationResult interpretAsPointer(const Expr *E, PtrCallback PtrCB);
 
   /// Clean up all resources.
   void cleanup();
@@ -101,6 +104,9 @@ class EvalEmitter : public SourceMapper {
   /// Whether we should check if the result has been fully
   /// initialized.
   bool CheckFullyInitialized = false;
+  bool ReturnPointer = false;
+
+  PtrCallback PtrCB;
 
   /// Temporaries which require storage.
   llvm::DenseMap<unsigned, std::unique_ptr<char[]>> Locals;
diff --git a/clang/lib/AST/ByteCode/EvaluationResult.h b/clang/lib/AST/ByteCode/EvaluationResult.h
index ef662e3779bc3..3b6c65eff1ef8 100644
--- a/clang/lib/AST/ByteCode/EvaluationResult.h
+++ b/clang/lib/AST/ByteCode/EvaluationResult.h
@@ -61,11 +61,6 @@ class EvaluationResult final {
     Value = std::move(V);
     Kind = RValue;
   }
-  void setPointer(const Pointer P) {
-    assert(empty());
-    Value = P;
-    Kind = LValue;
-  }
   void setFunctionPointer(const FunctionPointer &P) {
     assert(empty());
     Value = P;
@@ -88,6 +83,7 @@ class EvaluationResult final {
   bool isInvalid() const { return Kind == Invalid; }
   bool isLValue() const { return Kind == LValue; }
   bool isRValue() const { return Kind == RValue; }
+  bool isPointer() const { return std::holds_alternative<Pointer>(Value); }
 
   /// Returns an APValue for the evaluation result. The returned
   /// APValue might be an LValue or RValue.
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index 5e7c5d69f20da..19770aa3b97bc 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -613,6 +613,13 @@ class Pointer {
 
   const Block *block() const { return asBlockPointer().Pointee; }
 
+  /// If backed by actual data (i.e. a block pointer), return
+  /// an address to that data.
+  const std::byte *getRawAddress() const {
+    assert(isBlockPointer());
+    return asBlockPointer().Pointee->rawData() + Offset;
+  }
+
   /// Returns the index into an array.
   int64_t getIndex() const {
     if (!isBlockPointer())
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index f2e49b9ea669e..441e1f955874a 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -18017,10 +18017,14 @@ static bool EvaluateCharRangeAsStringImpl(const Expr *, T &Result,
                                           const Expr *PtrExpression,
                                           ASTContext &Ctx,
                                           Expr::EvalResult &Status) {
-  LValue String;
   EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpression);
   Info.InConstantContext = true;
 
+  if (Info.EnableNewConstInterp)
+    return Info.Ctx.getInterpContext().evaluateCharRange(Info, SizeExpression,
+                                                         PtrExpression, Result);
+
+  LValue String;
   FullExpressionRAII Scope(Info);
   APSInt SizeValue;
   if (!::EvaluateInteger(SizeExpression, SizeValue, Info))
@@ -18075,6 +18079,7 @@ bool Expr::EvaluateCharRangeAsString(APValue &Result,
                                      const Expr *SizeExpression,
                                      const Expr *PtrExpression, ASTContext &Ctx,
                                      EvalResult &Status) const {
+
   return EvaluateCharRangeAsStringImpl(this, Result, SizeExpression,
                                        PtrExpression, Ctx, Status);
 }
diff --git a/clang/test/SemaCXX/gnu-asm-constexpr.cpp b/clang/test/SemaCXX/gnu-asm-constexpr.cpp
index 8813b873fab0c..77466df12bdc1 100644
--- a/clang/test/SemaCXX/gnu-asm-constexpr.cpp
+++ b/clang/test/SemaCXX/gnu-asm-constexpr.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++26 -triple x86_64-gnu-linux
+// RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++26 -triple x86_64-gnu-linux -fexperimental-new-constant-interpreter
 
 template <bool Leak>
 struct RAIIBase {
diff --git a/clang/test/SemaCXX/static-assert-cxx26.cpp b/clang/test/SemaCXX/static-assert-cxx26.cpp
index 7d896d8b365b7..b53c67ee67932 100644
--- a/clang/test/SemaCXX/static-assert-cxx26.cpp
+++ b/clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -std=c++2c -triple=x86_64-linux -fsyntax-only %s -verify
+// RUN: %clang_cc1 -std=c++2c -triple=x86_64-linux -fsyntax-only %s -verify -fexperimental-new-constant-interpreter
 
 static_assert(true, "");
 static_assert(true, 0); // expected-error {{the message in a static assertion must be a string literal or an object with 'data()' and 'size()' member functions}}

@tbaederr tbaederr force-pushed the string-literals branch from 8d76bc9 to 6cc6ff2 Compare May 4, 2025 17:15
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang:bytecode Issues for the clang bytecode constexpr interpreter clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants