Files
libreoffice/compilerplugins/clang/bufferadd.cxx

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

393 lines
14 KiB
C++
Raw Normal View History

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#ifndef LO_CLANG_SHARED_PLUGINS
#include <cassert>
#include <string>
#include <iostream>
#include <unordered_set>
#include "plugin.hxx"
#include "check.hxx"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/StmtVisitor.h"
/**
Look for *StringBuffer append sequences which can be converted to *String + sequences.
*/
namespace
{
class BufferAdd : public loplugin::FilteringPlugin<BufferAdd>
{
public:
explicit BufferAdd(loplugin::InstantiationData const& data)
: FilteringPlugin(data)
{
}
bool preRun() override
{
std::string fn(handler.getMainFileName());
loplugin::normalizeDotDotInFilePath(fn);
if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/rtl/oustring/"))
return false;
if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/rtl/oustringbuffer/"))
return false;
if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/rtl/strings/"))
return false;
if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/OStringBuffer/"))
return false;
// some false +
if (loplugin::isSamePathname(fn, SRCDIR "/unoidl/source/sourcetreeprovider.cxx"))
return false;
if (loplugin::isSamePathname(fn, SRCDIR "/writerfilter/source/dmapper/StyleSheetTable.cxx"))
return false;
if (loplugin::isSamePathname(fn, SRCDIR "/writerfilter/source/dmapper/GraphicImport.cxx"))
return false;
if (loplugin::isSamePathname(fn, SRCDIR "/sdext/source/pdfimport/pdfparse/pdfparse.cxx"))
return false;
return true;
}
void postRun() override
{
for (auto const& pair : goodMap)
if (!isa<ParmVarDecl>(pair.first) &&
// reference types have slightly weird behaviour
!pair.first->getType()->isReferenceType()
&& badMap.find(pair.first) == badMap.end())
report(DiagnosticsEngine::Warning,
"convert this append sequence into a *String + sequence",
compat::getBeginLoc(pair.first))
<< pair.first->getSourceRange();
}
virtual void run() override
{
if (!preRun())
return;
TraverseDecl(compiler.getASTContext().getTranslationUnitDecl());
postRun();
}
bool VisitStmt(Stmt const*);
bool VisitCallExpr(CallExpr const*);
bool VisitCXXConstructExpr(CXXConstructExpr const*);
bool VisitUnaryOperator(UnaryOperator const*);
private:
void findBufferAssignOrAdd(const Stmt* parentStmt, Stmt const*);
Expr const* ignore(Expr const*);
bool isSideEffectFree(Expr const*);
bool isMethodOkToMerge(CXXMemberCallExpr const*);
void addToGoodMap(const VarDecl* varDecl, const Stmt* parentStmt);
std::unordered_map<const VarDecl*, const Stmt*> goodMap;
std::unordered_set<const VarDecl*> badMap;
};
bool BufferAdd::VisitStmt(Stmt const* stmt)
{
if (ignoreLocation(stmt))
return true;
if (!isa<CompoundStmt>(stmt) && !isa<CXXCatchStmt>(stmt) && !isa<CXXForRangeStmt>(stmt)
&& !isa<CXXTryStmt>(stmt) && !isa<DoStmt>(stmt) && !isa<ForStmt>(stmt) && !isa<IfStmt>(stmt)
&& !isa<SwitchStmt>(stmt) && !isa<WhileStmt>(stmt))
return true;
for (auto it = stmt->child_begin(); it != stmt->child_end(); ++it)
if (*it)
findBufferAssignOrAdd(stmt, *it);
return true;
}
bool BufferAdd::VisitCallExpr(CallExpr const* callExpr)
{
if (ignoreLocation(callExpr))
return true;
for (unsigned i = 0; i != callExpr->getNumArgs(); ++i)
{
auto a = ignore(callExpr->getArg(i));
if (auto declRefExpr = dyn_cast<DeclRefExpr>(a))
if (auto varDecl = dyn_cast<VarDecl>(declRefExpr->getDecl()))
badMap.insert(varDecl);
}
return true;
}
bool BufferAdd::VisitCXXConstructExpr(CXXConstructExpr const* callExpr)
{
if (ignoreLocation(callExpr))
return true;
for (unsigned i = 0; i != callExpr->getNumArgs(); ++i)
{
auto a = ignore(callExpr->getArg(i));
if (auto declRefExpr = dyn_cast<DeclRefExpr>(a))
if (auto varDecl = dyn_cast<VarDecl>(declRefExpr->getDecl()))
badMap.insert(varDecl);
}
return true;
}
bool BufferAdd::VisitUnaryOperator(const UnaryOperator* unaryOp)
{
if (ignoreLocation(unaryOp))
return true;
if (unaryOp->getOpcode() != UO_AddrOf)
return true;
auto a = ignore(unaryOp->getSubExpr());
if (auto declRefExpr = dyn_cast<DeclRefExpr>(a))
if (auto varDecl = dyn_cast<VarDecl>(declRefExpr->getDecl()))
badMap.insert(varDecl);
return true;
}
void BufferAdd::findBufferAssignOrAdd(const Stmt* parentStmt, Stmt const* stmt)
{
if (auto exprCleanup = dyn_cast<ExprWithCleanups>(stmt))
stmt = exprCleanup->getSubExpr();
if (auto switchCase = dyn_cast<SwitchCase>(stmt))
stmt = switchCase->getSubStmt();
if (auto declStmt = dyn_cast<DeclStmt>(stmt))
{
if (declStmt->isSingleDecl())
if (auto varDeclLHS = dyn_cast_or_null<VarDecl>(declStmt->getSingleDecl()))
{
auto tc = loplugin::TypeCheck(varDeclLHS->getType());
if (!tc.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
&& !tc.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
return;
if (varDeclLHS->getStorageDuration() == SD_Static)
return;
if (!varDeclLHS->hasInit())
return;
auto cxxConstructExpr = dyn_cast<CXXConstructExpr>(ignore(varDeclLHS->getInit()));
if (cxxConstructExpr)
{
addToGoodMap(varDeclLHS, parentStmt);
return;
}
if (!isSideEffectFree(varDeclLHS->getInit()))
badMap.insert(varDeclLHS);
else
addToGoodMap(varDeclLHS, parentStmt);
}
return;
}
// check for single calls to buffer method
if (auto memberCallExpr = dyn_cast<CXXMemberCallExpr>(stmt))
{
if (auto declRefExprLHS
= dyn_cast<DeclRefExpr>(ignore(memberCallExpr->getImplicitObjectArgument())))
{
auto methodDecl = memberCallExpr->getMethodDecl();
if (methodDecl && methodDecl->getIdentifier())
if (auto varDeclLHS = dyn_cast<VarDecl>(declRefExprLHS->getDecl()))
{
auto tc = loplugin::TypeCheck(varDeclLHS->getType());
if (tc.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
|| tc.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
{
if (isMethodOkToMerge(memberCallExpr))
addToGoodMap(varDeclLHS, parentStmt);
else
badMap.insert(varDeclLHS);
}
}
return;
}
}
// now check for chained append calls
auto expr = dyn_cast<Expr>(stmt);
if (!expr)
return;
auto tc = loplugin::TypeCheck(expr->getType());
if (!tc.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
&& !tc.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
return;
// unwrap the chain (which runs from right to left)
const VarDecl* varDeclLHS = nullptr;
bool good = true;
while (true)
{
auto memberCallExpr = dyn_cast<CXXMemberCallExpr>(expr);
if (!memberCallExpr)
break;
good &= isMethodOkToMerge(memberCallExpr);
if (auto declRefExprLHS
= dyn_cast<DeclRefExpr>(ignore(memberCallExpr->getImplicitObjectArgument())))
{
varDeclLHS = dyn_cast<VarDecl>(declRefExprLHS->getDecl());
break;
}
expr = memberCallExpr->getImplicitObjectArgument();
}
if (varDeclLHS)
{
if (good)
addToGoodMap(varDeclLHS, parentStmt);
else
badMap.insert(varDeclLHS);
}
}
void BufferAdd::addToGoodMap(const VarDecl* varDecl, const Stmt* parentStmt)
{
// check that vars are all inside the same compoundstmt, if they are not, we cannot combine them
auto it = goodMap.find(varDecl);
if (it != goodMap.end())
{
if (it->second == parentStmt)
return;
// don't treat these as parents, otherwise we eliminate .append.append sequences
if (isa<MemberExpr>(parentStmt))
return;
if (isa<CXXMemberCallExpr>(parentStmt))
return;
badMap.insert(varDecl);
}
else
goodMap.emplace(varDecl, parentStmt);
}
bool BufferAdd::isMethodOkToMerge(CXXMemberCallExpr const* memberCall)
{
auto methodDecl = memberCall->getMethodDecl();
if (methodDecl->getNumParams() == 0)
return true;
if (auto const id = methodDecl->getIdentifier())
{
auto name = id->getName();
if (name == "appendUninitialized" || name == "setLength" || name == "remove"
|| name == "insert" || name == "appendAscii" || name == "appendUtf32")
return false;
}
auto rhs = memberCall->getArg(0);
if (!isSideEffectFree(rhs))
return false;
return true;
}
Expr const* BufferAdd::ignore(Expr const* expr)
{
return compat::IgnoreImplicit(compat::IgnoreImplicit(expr)->IgnoreParens());
}
bool BufferAdd::isSideEffectFree(Expr const* expr)
{
expr = ignore(expr);
// I don't think the OUStringAppend functionality can handle this efficiently
if (isa<ConditionalOperator>(expr))
return false;
// Multiple statements have a well defined evaluation order (sequence points between them)
// but a single expression may be evaluated in arbitrary order;
// if there are side effects in one of the sub-expressions that have an effect on another subexpression,
// the result may be incorrect, and you don't necessarily notice in tests because the order is compiler-dependent.
// for example see commit afd743141f7a7dd05914d0872c9afe079f16fe0c where such a refactoring introduced such a bug.
// So only consider simple RHS expressions.
if (!expr->HasSideEffects(compiler.getASTContext()))
return true;
// check for chained adds which are side-effect free
if (auto operatorCall = dyn_cast<CXXOperatorCallExpr>(expr))
{
auto op = operatorCall->getOperator();
if (op == OO_PlusEqual || op == OO_Plus)
if (isSideEffectFree(operatorCall->getArg(0))
&& isSideEffectFree(operatorCall->getArg(1)))
return true;
}
if (auto callExpr = dyn_cast<CallExpr>(expr))
{
// check for calls through OUString::number/OUString::unacquired
if (auto calleeMethodDecl = dyn_cast_or_null<CXXMethodDecl>(callExpr->getCalleeDecl()))
if (calleeMethodDecl && calleeMethodDecl->getIdentifier())
{
if (callExpr->getNumArgs() > 0)
{
auto tc = loplugin::TypeCheck(calleeMethodDecl->getParent());
if (tc.Class("OUString") || tc.Class("OString"))
{
if (isSideEffectFree(callExpr->getArg(0)))
return true;
}
}
}
if (auto calleeFunctionDecl = dyn_cast_or_null<FunctionDecl>(callExpr->getCalleeDecl()))
if (calleeFunctionDecl && calleeFunctionDecl->getIdentifier())
{
auto name = calleeFunctionDecl->getName();
// check for calls through OUStringToOString
if (name == "OUStringToOString" || name == "OStringToOUString")
if (isSideEffectFree(callExpr->getArg(0)))
return true;
// allowlist some known-safe methods
if (name.endswith("ResId") || name == "GetXMLToken")
if (isSideEffectFree(callExpr->getArg(0)))
return true;
}
// O[U]String::operator std::[u16]string_view:
if (auto const d = dyn_cast_or_null<CXXConversionDecl>(callExpr->getCalleeDecl()))
{
auto tc = loplugin::TypeCheck(d->getParent());
if (tc.Class("OString") || tc.Class("OUString"))
{
return true;
}
}
}
// sometimes we have a constructor call on the RHS
if (auto constructExpr = dyn_cast<CXXConstructExpr>(expr))
{
auto dc = loplugin::DeclCheck(constructExpr->getConstructor());
if (dc.MemberFunction().Class("OUString") || dc.MemberFunction().Class("OString")
|| dc.MemberFunction().Class("OUStringBuffer")
|| dc.MemberFunction().Class("OStringBuffer"))
if (constructExpr->getNumArgs() == 0 || isSideEffectFree(constructExpr->getArg(0)))
return true;
// Expr::HasSideEffects does not like stuff that passes through OUStringLiteral
auto dc2 = loplugin::DeclCheck(constructExpr->getConstructor()->getParent());
Turn OUStringLiteral into a consteval'ed, static-refcound rtl_uString ...from which an OUString can cheaply be instantiated. This is the OUString equivalent of 4b9e440c51be3e40326bc90c33ae69885bfb51e4 "Turn OStringLiteral into a consteval'ed, static-refcound rtl_String". Most remarks about that commit apply here too (this commit is just substantially bigger and a bit more complicated because there were so much more uses of OUStringLiteral than of OStringLiteral): The one downside is that OUStringLiteral now needs to be a template abstracting over the string length. But any uses for which that is a problem (e.g., as the element type of a container that would no longer be homogeneous, or in the signature of a function that shall not be turned into a template for one reason or another) can be replaced with std::u16string_view, without loss of efficiency compared to the original OUStringLiteral, and without loss of expressivity. The new OUStringLiteral ctor code would probably not be very efficient if it were ever executed at runtime, but it is intended to be only executed at compile time. Where available, C++20 "consteval" is used to statically ensure that. The intended use of the new OUStringLiteral is in all cases where an object that shall itself not be an OUString (e.g., because it shall be a global static variable for which the OUString ctor/dtor would be detrimental at library load/unload) must be converted to an OUString instance in at least one place. Other string literal abstractions could use std::u16string_view (or just plain char16_t const[N]), but interestingly OUStringLiteral might be more efficient than constexpr std::u16string_view even for such cases, as it should not need any relocations at library load time. For now, no existing uses of OUStringLiteral have been changed to some other abstraction (unless technically necessary as discussed above), and no additional places that would benefit from OUStringLiteral have been changed to use it. Global constexpr OUStringLiteral variables defined in an included file would be somewhat suboptimal, as each translation unit that uses them would create its own, unshared instance. The envisioned solution is to turn them into static data members of some class (and there may be a loplugin coming to find and fix affected places). Another approach that has been taken here in a few cases where such variables were only used in one .cxx anyway is to move their definitions from the .hxx into that one .cxx (in turn causing some files to become empty and get removed completely)---which also silenced some GCC -Werror=unused-variable if a variable from a .hxx was not used in some .cxx including it. To keep individual commits reasonably manageable, some consumers of OUStringLiteral in rtl/ustrbuf.hxx and rtl/ustring.hxx are left in a somewhat odd state for now, where they don't take advantage of OUStringLiteral's equivalence to rtl_uString, but just keep extracting its contents and copy it elsewhere. In follow-up commits, those consumers should be changed appropriately, making them treat OUStringLiteral like an rtl_uString or dropping the OUStringLiteral overload in favor of an existing (and cheap to use now) OUString overload, etc. In a similar vein, comparison operators between OUString and std::u16string_view have been added to the existing plethora of comparison operator overloads. It would be nice to eventually consolidate them, esp. with the overloads taking OUStringLiteral and/or char16_t const[N] string literals, but that appears tricky to get right without introducing new ambiguities. Also, a handful of places across the code base use comparisons between OUString and OUStringNumber, which are now ambiguous (converting the OUStringNumber to either OUString or std::u16string_view). For simplicity, those few places have manually been fixed for now by adding explicit conversion to std::u16string_view. Also some compilerplugins code needed to be adapted, and some of the compilerplugins/test cases have become irrelevant (and have been removed), as the tested code would no longer compile in the first place. sal/qa/rtl/strings/test_oustring_concat.cxx documents a workaround for GCC bug <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96878> "Failed class template argument deduction in unevaluated, parenthesized context". That place, as well as uses of OUStringLiteral in extensions/source/abpilot/fieldmappingimpl.cxx and i18npool/source/localedata/localedata.cxx, which have been replaced with OUString::Concat (and which is arguably a better choice, anyway), also caused failures with at least Clang 5.0.2 (but would not have caused failures with at least recent Clang 12 trunk, so appear to be bugs in Clang that have meanwhile been fixed). Change-Id: I34174462a28f2000cfeb2d219ffd533a767920b8 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/102222 Tested-by: Jenkins Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
2020-09-08 09:48:17 +02:00
if (dc2.Class("OUStringLiteral").Namespace("rtl").GlobalNamespace())
return true;
}
// when adding literals, we sometimes get this
if (auto functionalCastExpr = dyn_cast<CXXFunctionalCastExpr>(expr))
{
auto tc = loplugin::TypeCheck(functionalCastExpr->getType());
Turn OUStringLiteral into a consteval'ed, static-refcound rtl_uString ...from which an OUString can cheaply be instantiated. This is the OUString equivalent of 4b9e440c51be3e40326bc90c33ae69885bfb51e4 "Turn OStringLiteral into a consteval'ed, static-refcound rtl_String". Most remarks about that commit apply here too (this commit is just substantially bigger and a bit more complicated because there were so much more uses of OUStringLiteral than of OStringLiteral): The one downside is that OUStringLiteral now needs to be a template abstracting over the string length. But any uses for which that is a problem (e.g., as the element type of a container that would no longer be homogeneous, or in the signature of a function that shall not be turned into a template for one reason or another) can be replaced with std::u16string_view, without loss of efficiency compared to the original OUStringLiteral, and without loss of expressivity. The new OUStringLiteral ctor code would probably not be very efficient if it were ever executed at runtime, but it is intended to be only executed at compile time. Where available, C++20 "consteval" is used to statically ensure that. The intended use of the new OUStringLiteral is in all cases where an object that shall itself not be an OUString (e.g., because it shall be a global static variable for which the OUString ctor/dtor would be detrimental at library load/unload) must be converted to an OUString instance in at least one place. Other string literal abstractions could use std::u16string_view (or just plain char16_t const[N]), but interestingly OUStringLiteral might be more efficient than constexpr std::u16string_view even for such cases, as it should not need any relocations at library load time. For now, no existing uses of OUStringLiteral have been changed to some other abstraction (unless technically necessary as discussed above), and no additional places that would benefit from OUStringLiteral have been changed to use it. Global constexpr OUStringLiteral variables defined in an included file would be somewhat suboptimal, as each translation unit that uses them would create its own, unshared instance. The envisioned solution is to turn them into static data members of some class (and there may be a loplugin coming to find and fix affected places). Another approach that has been taken here in a few cases where such variables were only used in one .cxx anyway is to move their definitions from the .hxx into that one .cxx (in turn causing some files to become empty and get removed completely)---which also silenced some GCC -Werror=unused-variable if a variable from a .hxx was not used in some .cxx including it. To keep individual commits reasonably manageable, some consumers of OUStringLiteral in rtl/ustrbuf.hxx and rtl/ustring.hxx are left in a somewhat odd state for now, where they don't take advantage of OUStringLiteral's equivalence to rtl_uString, but just keep extracting its contents and copy it elsewhere. In follow-up commits, those consumers should be changed appropriately, making them treat OUStringLiteral like an rtl_uString or dropping the OUStringLiteral overload in favor of an existing (and cheap to use now) OUString overload, etc. In a similar vein, comparison operators between OUString and std::u16string_view have been added to the existing plethora of comparison operator overloads. It would be nice to eventually consolidate them, esp. with the overloads taking OUStringLiteral and/or char16_t const[N] string literals, but that appears tricky to get right without introducing new ambiguities. Also, a handful of places across the code base use comparisons between OUString and OUStringNumber, which are now ambiguous (converting the OUStringNumber to either OUString or std::u16string_view). For simplicity, those few places have manually been fixed for now by adding explicit conversion to std::u16string_view. Also some compilerplugins code needed to be adapted, and some of the compilerplugins/test cases have become irrelevant (and have been removed), as the tested code would no longer compile in the first place. sal/qa/rtl/strings/test_oustring_concat.cxx documents a workaround for GCC bug <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96878> "Failed class template argument deduction in unevaluated, parenthesized context". That place, as well as uses of OUStringLiteral in extensions/source/abpilot/fieldmappingimpl.cxx and i18npool/source/localedata/localedata.cxx, which have been replaced with OUString::Concat (and which is arguably a better choice, anyway), also caused failures with at least Clang 5.0.2 (but would not have caused failures with at least recent Clang 12 trunk, so appear to be bugs in Clang that have meanwhile been fixed). Change-Id: I34174462a28f2000cfeb2d219ffd533a767920b8 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/102222 Tested-by: Jenkins Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
2020-09-08 09:48:17 +02:00
if (tc.Class("OUStringLiteral").Namespace("rtl").GlobalNamespace())
return isSideEffectFree(functionalCastExpr->getSubExpr());
}
return false;
}
loplugin::Plugin::Registration<BufferAdd> bufferadd("bufferadd");
}
#endif // LO_CLANG_SHARED_PLUGINS
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */