new loplugin: singlevalfields

look for fields that only have a single constant value assigned to them

Change-Id: Iafcd37fdb8a8119bbc00f92981a1a01badf9c5a2
This commit is contained in:
Noel Grandin
2016-06-23 13:12:53 +02:00
parent 1f1f26bf0f
commit 8d861bd702
3 changed files with 481 additions and 2 deletions

View File

@@ -16,8 +16,7 @@
#include "compat.hxx"
/*
Find methods with default params, where the callers never specify the default param i.e.
might as well remove it.
Find params on methods where the param is only ever passed as a single constant value.
The process goes something like this:
$ make check

View File

@@ -0,0 +1,411 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include <cassert>
#include <string>
#include <iostream>
#include <fstream>
#include <set>
#include "plugin.hxx"
#include "compat.hxx"
/**
Look for fields that are only ever assigned a single constant value.
We dmp a list of values assigned to fields, and a list of field definitions.
Then we will post-process the 2 lists and find the set of interesting fields.
Be warned that it produces around 5G of log file.
The process goes something like this:
$ make check
$ make FORCE_COMPILE_ALL=1 COMPILER_PLUGIN_TOOL='singlevalfields' check
$ ./compilerplugins/clang/singlevalfields.py
Note that the actual process may involve a fair amount of undoing, hand editing, and general messing around
to get it to work :-)
@TODO we don't spot fields that have been zero-initialised via calloc or rtl_allocateZeroMemory
*/
namespace {
struct MyFieldInfo
{
std::string parentClass;
std::string fieldName;
std::string sourceLocation;
};
bool operator < (const MyFieldInfo &lhs, const MyFieldInfo &rhs)
{
return std::tie(lhs.parentClass, lhs.fieldName)
< std::tie(rhs.parentClass, rhs.fieldName);
}
struct MyFieldAssignmentInfo : public MyFieldInfo
{
std::string value;
};
bool operator < (const MyFieldAssignmentInfo &lhs, const MyFieldAssignmentInfo &rhs)
{
return std::tie(lhs.parentClass, lhs.fieldName, lhs.value)
< std::tie(rhs.parentClass, rhs.fieldName, rhs.value);
}
// try to limit the voluminous output a little
static std::set<MyFieldAssignmentInfo> assignedSet;
static std::set<MyFieldInfo> definitionSet;
class SingleValFields:
public RecursiveASTVisitor<SingleValFields>, public loplugin::Plugin
{
public:
explicit SingleValFields(InstantiationData const & data): Plugin(data) {}
virtual void run() override
{
TraverseDecl(compiler.getASTContext().getTranslationUnitDecl());
// dump all our output in one write call - this is to try and limit IO "crosstalk" between multiple processes
// writing to the same logfile
std::string output;
for (const MyFieldAssignmentInfo & s : assignedSet)
output += "asgn:\t" + s.parentClass + "\t" + s.fieldName + "\t" + s.value + "\n";
for (const MyFieldInfo & s : definitionSet)
output += "defn:\t" + s.parentClass + "\t" + s.fieldName + "\t" + s.sourceLocation + "\n";
ofstream myfile;
myfile.open( SRCDIR "/singlevalfields.log", ios::app | ios::out);
myfile << output;
myfile.close();
}
bool shouldVisitTemplateInstantiations () const { return true; }
bool VisitFieldDecl( const FieldDecl* );
bool VisitMemberExpr( const MemberExpr* );
bool VisitCXXConstructorDecl( const CXXConstructorDecl* );
private:
void niceName(const FieldDecl*, MyFieldInfo&);
std::string fullyQualifiedName(const FunctionDecl*);
std::string getExprValue(const Expr*);
bool isInterestingType(const QualType&);
const FunctionDecl* get_top_FunctionDecl_from_Stmt(const Stmt&);
};
void SingleValFields::niceName(const FieldDecl* fieldDecl, MyFieldInfo& aInfo)
{
aInfo.parentClass = fieldDecl->getParent()->getQualifiedNameAsString();
aInfo.fieldName = fieldDecl->getNameAsString();
SourceLocation expansionLoc = compiler.getSourceManager().getExpansionLoc( fieldDecl->getLocation() );
StringRef name = compiler.getSourceManager().getFilename(expansionLoc);
aInfo.sourceLocation = std::string(name.substr(strlen(SRCDIR)+1)) + ":" + std::to_string(compiler.getSourceManager().getSpellingLineNumber(expansionLoc));
}
std::string SingleValFields::fullyQualifiedName(const FunctionDecl* functionDecl)
{
std::string ret = compat::getReturnType(*functionDecl).getCanonicalType().getAsString();
ret += " ";
if (isa<CXXMethodDecl>(functionDecl)) {
const CXXRecordDecl* recordDecl = dyn_cast<CXXMethodDecl>(functionDecl)->getParent();
ret += recordDecl->getQualifiedNameAsString();
ret += "::";
}
ret += functionDecl->getNameAsString() + "(";
bool bFirst = true;
for (const ParmVarDecl *pParmVarDecl : functionDecl->params()) {
if (bFirst)
bFirst = false;
else
ret += ",";
ret += pParmVarDecl->getType().getCanonicalType().getAsString();
}
ret += ")";
if (isa<CXXMethodDecl>(functionDecl) && dyn_cast<CXXMethodDecl>(functionDecl)->isConst()) {
ret += " const";
}
return ret;
}
bool SingleValFields::VisitFieldDecl( const FieldDecl* fieldDecl )
{
fieldDecl = fieldDecl->getCanonicalDecl();
const FieldDecl* canonicalDecl = fieldDecl;
if( ignoreLocation( fieldDecl ) || !isInterestingType(fieldDecl->getType()) )
return true;
MyFieldInfo aInfo;
niceName(canonicalDecl, aInfo);
definitionSet.insert(aInfo);
return true;
}
bool SingleValFields::VisitCXXConstructorDecl( const CXXConstructorDecl* decl )
{
if( ignoreLocation( decl ) )
return true;
// doesn't count as a write to fields because it's self->self
if (decl->isCopyOrMoveConstructor())
return true;
for(auto it = decl->init_begin(); it != decl->init_end(); ++it)
{
const CXXCtorInitializer* init = *it;
const FieldDecl* fieldDecl = init->getMember();
if( !fieldDecl || !isInterestingType(fieldDecl->getType()) )
continue;
MyFieldAssignmentInfo aInfo;
niceName(fieldDecl, aInfo);
aInfo.value = getExprValue(init->getInit());
assignedSet.insert(aInfo);
}
return true;
}
const Decl* get_DeclContext_from_Stmt(ASTContext& context, const Stmt& stmt)
{
auto it = context.getParents(stmt).begin();
if (it == context.getParents(stmt).end())
return nullptr;
const Decl *aDecl = it->get<Decl>();
if (aDecl)
return aDecl;
const Stmt *aStmt = it->get<Stmt>();
if (aStmt)
return get_DeclContext_from_Stmt(context, *aStmt);
return nullptr;
}
const FunctionDecl* SingleValFields::get_top_FunctionDecl_from_Stmt(const Stmt& stmt)
{
const Decl *decl = get_DeclContext_from_Stmt(compiler.getASTContext(), stmt);
if (decl)
return static_cast<const FunctionDecl*>(decl->getNonClosureContext());
return nullptr;
}
bool SingleValFields::VisitMemberExpr( const MemberExpr* memberExpr )
{
const ValueDecl* decl = memberExpr->getMemberDecl();
const FieldDecl* fieldDecl = dyn_cast<FieldDecl>(decl);
if (!fieldDecl) {
return true;
}
if (ignoreLocation(memberExpr) || !isInterestingType(fieldDecl->getType()))
return true;
const CXXMethodDecl* methodDecl = dyn_cast_or_null<CXXMethodDecl>(get_top_FunctionDecl_from_Stmt(*memberExpr));
if (methodDecl && (methodDecl->isCopyAssignmentOperator() || methodDecl->isMoveAssignmentOperator()
|| dyn_cast<CXXDestructorDecl>(methodDecl)))
return true;
// walk up the tree until we find something interesting
const Stmt* child = memberExpr;
const Stmt* parent = parentStmt(memberExpr);
bool bPotentiallyAssignedTo = false;
bool bDump = false;
std::string assignValue;
do {
// check for field being accessed by a reference variable e.g. Foo& f = m.foo;
auto parentsList = compiler.getASTContext().getParents(*child);
auto it = parentsList.begin();
if (it != parentsList.end()) {
const VarDecl *varDecl = it->get<VarDecl>();
if (varDecl) {
QualType qt = varDecl->getType().getDesugaredType(compiler.getASTContext());
if (!qt.isConstQualified() && qt->isReferenceType()) {
assignValue = "?";
bPotentiallyAssignedTo = true;
break;
}
}
}
if (!parent) {
return true;
}
if (isa<CastExpr>(parent) || isa<MemberExpr>(parent) || isa<ParenExpr>(parent) || isa<ParenListExpr>(parent)
|| isa<ExprWithCleanups>(parent))
{
child = parent;
parent = parentStmt(parent);
}
else if (isa<UnaryOperator>(parent))
{
const UnaryOperator* unaryOperator = dyn_cast<UnaryOperator>(parent);
int x = unaryOperator->getOpcode();
if (x == UO_AddrOf || x == UO_PostInc || x == UO_PostDec || x == UO_PreInc || x == UO_PreDec) {
assignValue = "?";
bPotentiallyAssignedTo = true;
break;
}
child = parent;
parent = parentStmt(parent);
}
else if (isa<CallExpr>(parent))
{
const CallExpr* callExpr = dyn_cast<CallExpr>(parent);
if (callExpr->getCallee() == child) {
break;
}
const FunctionDecl* functionDecl;
if (isa<CXXMemberCallExpr>(callExpr)) {
functionDecl = dyn_cast<CXXMemberCallExpr>(callExpr)->getMethodDecl();
}
else {
functionDecl = callExpr->getDirectCallee();
}
if (!functionDecl) {
break;
}
bool bFound = false;
for (unsigned i = 0; i < callExpr->getNumArgs(); ++i) {
if (i >= functionDecl->getNumParams()) // can happen in template code
break;
if (callExpr->getArg(i) == child) {
const ParmVarDecl* parmVarDecl = functionDecl->getParamDecl(i);
QualType qt = parmVarDecl->getType().getDesugaredType(compiler.getASTContext());
if (!qt.isConstQualified() && qt->isReferenceType()) {
assignValue = "?";
bPotentiallyAssignedTo = true;
}
bFound = true;
break;
}
}
break;
}
else if (isa<CXXConstructExpr>(parent))
{
const CXXConstructExpr* consExpr = dyn_cast<CXXConstructExpr>(parent);
const CXXConstructorDecl* consDecl = consExpr->getConstructor();
bool bFound = false;
for (unsigned i = 0; i < consExpr->getNumArgs(); ++i) {
if (i >= consDecl->getNumParams()) // can happen in template code
break;
if (consExpr->getArg(i) == child) {
const ParmVarDecl* parmVarDecl = consDecl->getParamDecl(i);
QualType qt = parmVarDecl->getType().getDesugaredType(compiler.getASTContext());
if (!qt.isConstQualified() && qt->isReferenceType()) {
assignValue = "?";
bPotentiallyAssignedTo = true;
}
bFound = true;
break;
}
}
break;
}
else if (isa<BinaryOperator>(parent))
{
const BinaryOperator* binaryOp = dyn_cast<BinaryOperator>(parent);
if ( binaryOp->getLHS() != child ) {
// do nothing
}
else if ( binaryOp->getOpcode() == BO_Assign ) {
assignValue = getExprValue(binaryOp->getRHS());
bPotentiallyAssignedTo = true;
} else {
assignValue = "?";
bPotentiallyAssignedTo = true;
}
break;
}
else if ( isa<CompoundStmt>(parent)
|| isa<SwitchStmt>(parent) || isa<CaseStmt>(parent) || isa<DefaultStmt>(parent)
|| isa<DoStmt>(parent) || isa<WhileStmt>(parent)
|| isa<IfStmt>(parent)
|| isa<ForStmt>(parent)
|| isa<ReturnStmt>(parent)
|| isa<CXXNewExpr>(parent)
|| isa<CXXDeleteExpr>(parent)
|| isa<ConditionalOperator>(parent)
|| isa<CXXTypeidExpr>(parent)
|| isa<ArraySubscriptExpr>(parent)
|| isa<CXXDependentScopeMemberExpr>(parent)
|| isa<DeclStmt>(parent)
|| isa<UnaryExprOrTypeTraitExpr>(parent)
|| isa<UnresolvedMemberExpr>(parent)
|| isa<MaterializeTemporaryExpr>(parent) //???
|| isa<InitListExpr>(parent)
|| isa<CXXUnresolvedConstructExpr>(parent)
)
{
break;
}
else {
bPotentiallyAssignedTo = true;
bDump = true;
break;
}
} while (true);
if (bDump)
{
report(
DiagnosticsEngine::Warning,
"oh dear, what can the matter be?",
memberExpr->getLocStart())
<< memberExpr->getSourceRange();
parent->dump();
}
if (bPotentiallyAssignedTo)
{
MyFieldAssignmentInfo aInfo;
niceName(fieldDecl, aInfo);
aInfo.value = assignValue;
assignedSet.insert(aInfo);
}
return true;
}
bool SingleValFields::isInterestingType(const QualType& qt) {
return qt.isCXX11PODType(compiler.getASTContext());
}
std::string SingleValFields::getExprValue(const Expr* arg)
{
if (!arg)
return "?";
arg = arg->IgnoreParenCasts();
// arg->dump();
// workaround bug in clang
if (isa<ParenListExpr>(arg))
return "?";
// ignore this, it seems to trigger an infinite recursion
if (isa<UnaryExprOrTypeTraitExpr>(arg)) {
return "?";
}
APSInt x1;
if (arg->EvaluateAsInt(x1, compiler.getASTContext()))
{
return x1.toString(10);
}
return "?";
}
loplugin::Plugin::Registration< SingleValFields > X("singlevalfields", false);
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

View File

@@ -0,0 +1,69 @@
#!/usr/bin/python
import sys
import re
import io
definitionToSourceLocationMap = dict() # dict of tuple(parentClass, fieldName) to sourceLocation
fieldAssignDict = dict() # dict of tuple(parentClass, fieldName) to (set of values)
# clang does not always use exactly the same numbers in the type-parameter vars it generates
# so I need to substitute them to ensure we can match correctly.
normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
def normalizeTypeParams( line ):
return normalizeTypeParamsRegex.sub("type-parameter-?-?", line)
# reading as binary (since we known it is pure ascii) is much faster than reading as unicode
with io.open("singlevalfields.log", "rb", buffering=1024*1024) as txt:
for line in txt:
if line.startswith("defn:\t"):
idx1 = line.find("\t")
idx2 = line.find("\t",idx1+1)
idx3 = line.find("\t",idx2+1)
parentClass = normalizeTypeParams(line[idx1+1:idx2])
fieldName = normalizeTypeParams(line[idx2+1:idx3])
sourceLocation = line[idx3+1:].strip()
fieldInfo = (parentClass, fieldName)
definitionToSourceLocationMap[fieldInfo] = sourceLocation
elif line.startswith("asgn:\t"):
idx1 = line.find("\t")
idx2 = line.find("\t",idx1+1)
idx3 = line.find("\t",idx2+1)
parentClass = normalizeTypeParams(line[idx1+1:idx2])
fieldName = normalizeTypeParams(line[idx2+1:idx3])
assignValue = line[idx3+1:].strip()
fieldInfo = (parentClass, fieldName)
if not fieldInfo in fieldAssignDict:
fieldAssignDict[fieldInfo] = set()
fieldAssignDict[fieldInfo].add(assignValue)
tmp1list = list()
for fieldInfo, assignValues in fieldAssignDict.iteritems():
if len(assignValues) != 1:
continue
if "?" in assignValues:
continue
# if it contains anything other than this set, ignore it
if len(assignValues - set(["0", "1", "-1", "nullptr"])) > 0:
continue
v0 = fieldInfo[0] + " " + fieldInfo[1]
v1 = (",".join(assignValues))
v2 = ""
if fieldInfo in definitionToSourceLocationMap:
v2 = definitionToSourceLocationMap[fieldInfo]
tmp1list.append((v0,v1,v2))
# sort results by filename:lineno
def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
return [int(text) if text.isdigit() else text.lower()
for text in re.split(_nsre, s)]
tmp1list.sort(key=lambda v: natural_sort_key(v[2]))
# print out the results
with open("loplugin.singlevalfields", "wt") as f:
for v in tmp1list:
f.write(v[2] + "\n")
f.write(" " + v[0] + "\n")
f.write(" " + v[1] + "\n")