new loplugin: singlevalfields

look for fields that only have a single constant value assigned to them

Change-Id: Iafcd37fdb8a8119bbc00f92981a1a01badf9c5a2
This commit is contained in:
Noel Grandin
2016-06-23 13:12:53 +02:00
parent 1f1f26bf0f
commit 8d861bd702
3 changed files with 481 additions and 2 deletions

View File

@@ -16,8 +16,7 @@
#include "compat.hxx" #include "compat.hxx"
/* /*
Find methods with default params, where the callers never specify the default param i.e. Find params on methods where the param is only ever passed as a single constant value.
might as well remove it.
The process goes something like this: The process goes something like this:
$ make check $ make check

View File

@@ -0,0 +1,411 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include <cassert>
#include <string>
#include <iostream>
#include <fstream>
#include <set>
#include "plugin.hxx"
#include "compat.hxx"
/**
Look for fields that are only ever assigned a single constant value.
We dmp a list of values assigned to fields, and a list of field definitions.
Then we will post-process the 2 lists and find the set of interesting fields.
Be warned that it produces around 5G of log file.
The process goes something like this:
$ make check
$ make FORCE_COMPILE_ALL=1 COMPILER_PLUGIN_TOOL='singlevalfields' check
$ ./compilerplugins/clang/singlevalfields.py
Note that the actual process may involve a fair amount of undoing, hand editing, and general messing around
to get it to work :-)
@TODO we don't spot fields that have been zero-initialised via calloc or rtl_allocateZeroMemory
*/
namespace {
struct MyFieldInfo
{
std::string parentClass;
std::string fieldName;
std::string sourceLocation;
};
bool operator < (const MyFieldInfo &lhs, const MyFieldInfo &rhs)
{
return std::tie(lhs.parentClass, lhs.fieldName)
< std::tie(rhs.parentClass, rhs.fieldName);
}
struct MyFieldAssignmentInfo : public MyFieldInfo
{
std::string value;
};
bool operator < (const MyFieldAssignmentInfo &lhs, const MyFieldAssignmentInfo &rhs)
{
return std::tie(lhs.parentClass, lhs.fieldName, lhs.value)
< std::tie(rhs.parentClass, rhs.fieldName, rhs.value);
}
// try to limit the voluminous output a little
static std::set<MyFieldAssignmentInfo> assignedSet;
static std::set<MyFieldInfo> definitionSet;
class SingleValFields:
public RecursiveASTVisitor<SingleValFields>, public loplugin::Plugin
{
public:
explicit SingleValFields(InstantiationData const & data): Plugin(data) {}
virtual void run() override
{
TraverseDecl(compiler.getASTContext().getTranslationUnitDecl());
// dump all our output in one write call - this is to try and limit IO "crosstalk" between multiple processes
// writing to the same logfile
std::string output;
for (const MyFieldAssignmentInfo & s : assignedSet)
output += "asgn:\t" + s.parentClass + "\t" + s.fieldName + "\t" + s.value + "\n";
for (const MyFieldInfo & s : definitionSet)
output += "defn:\t" + s.parentClass + "\t" + s.fieldName + "\t" + s.sourceLocation + "\n";
ofstream myfile;
myfile.open( SRCDIR "/singlevalfields.log", ios::app | ios::out);
myfile << output;
myfile.close();
}
bool shouldVisitTemplateInstantiations () const { return true; }
bool VisitFieldDecl( const FieldDecl* );
bool VisitMemberExpr( const MemberExpr* );
bool VisitCXXConstructorDecl( const CXXConstructorDecl* );
private:
void niceName(const FieldDecl*, MyFieldInfo&);
std::string fullyQualifiedName(const FunctionDecl*);
std::string getExprValue(const Expr*);
bool isInterestingType(const QualType&);
const FunctionDecl* get_top_FunctionDecl_from_Stmt(const Stmt&);
};
void SingleValFields::niceName(const FieldDecl* fieldDecl, MyFieldInfo& aInfo)
{
aInfo.parentClass = fieldDecl->getParent()->getQualifiedNameAsString();
aInfo.fieldName = fieldDecl->getNameAsString();
SourceLocation expansionLoc = compiler.getSourceManager().getExpansionLoc( fieldDecl->getLocation() );
StringRef name = compiler.getSourceManager().getFilename(expansionLoc);
aInfo.sourceLocation = std::string(name.substr(strlen(SRCDIR)+1)) + ":" + std::to_string(compiler.getSourceManager().getSpellingLineNumber(expansionLoc));
}
std::string SingleValFields::fullyQualifiedName(const FunctionDecl* functionDecl)
{
std::string ret = compat::getReturnType(*functionDecl).getCanonicalType().getAsString();
ret += " ";
if (isa<CXXMethodDecl>(functionDecl)) {
const CXXRecordDecl* recordDecl = dyn_cast<CXXMethodDecl>(functionDecl)->getParent();
ret += recordDecl->getQualifiedNameAsString();
ret += "::";
}
ret += functionDecl->getNameAsString() + "(";
bool bFirst = true;
for (const ParmVarDecl *pParmVarDecl : functionDecl->params()) {
if (bFirst)
bFirst = false;
else
ret += ",";
ret += pParmVarDecl->getType().getCanonicalType().getAsString();
}
ret += ")";
if (isa<CXXMethodDecl>(functionDecl) && dyn_cast<CXXMethodDecl>(functionDecl)->isConst()) {
ret += " const";
}
return ret;
}
bool SingleValFields::VisitFieldDecl( const FieldDecl* fieldDecl )
{
fieldDecl = fieldDecl->getCanonicalDecl();
const FieldDecl* canonicalDecl = fieldDecl;
if( ignoreLocation( fieldDecl ) || !isInterestingType(fieldDecl->getType()) )
return true;
MyFieldInfo aInfo;
niceName(canonicalDecl, aInfo);
definitionSet.insert(aInfo);
return true;
}
bool SingleValFields::VisitCXXConstructorDecl( const CXXConstructorDecl* decl )
{
if( ignoreLocation( decl ) )
return true;
// doesn't count as a write to fields because it's self->self
if (decl->isCopyOrMoveConstructor())
return true;
for(auto it = decl->init_begin(); it != decl->init_end(); ++it)
{
const CXXCtorInitializer* init = *it;
const FieldDecl* fieldDecl = init->getMember();
if( !fieldDecl || !isInterestingType(fieldDecl->getType()) )
continue;
MyFieldAssignmentInfo aInfo;
niceName(fieldDecl, aInfo);
aInfo.value = getExprValue(init->getInit());
assignedSet.insert(aInfo);
}
return true;
}
const Decl* get_DeclContext_from_Stmt(ASTContext& context, const Stmt& stmt)
{
auto it = context.getParents(stmt).begin();
if (it == context.getParents(stmt).end())
return nullptr;
const Decl *aDecl = it->get<Decl>();
if (aDecl)
return aDecl;
const Stmt *aStmt = it->get<Stmt>();
if (aStmt)
return get_DeclContext_from_Stmt(context, *aStmt);
return nullptr;
}
const FunctionDecl* SingleValFields::get_top_FunctionDecl_from_Stmt(const Stmt& stmt)
{
const Decl *decl = get_DeclContext_from_Stmt(compiler.getASTContext(), stmt);
if (decl)
return static_cast<const FunctionDecl*>(decl->getNonClosureContext());
return nullptr;
}
bool SingleValFields::VisitMemberExpr( const MemberExpr* memberExpr )
{
const ValueDecl* decl = memberExpr->getMemberDecl();
const FieldDecl* fieldDecl = dyn_cast<FieldDecl>(decl);
if (!fieldDecl) {
return true;
}
if (ignoreLocation(memberExpr) || !isInterestingType(fieldDecl->getType()))
return true;
const CXXMethodDecl* methodDecl = dyn_cast_or_null<CXXMethodDecl>(get_top_FunctionDecl_from_Stmt(*memberExpr));
if (methodDecl && (methodDecl->isCopyAssignmentOperator() || methodDecl->isMoveAssignmentOperator()
|| dyn_cast<CXXDestructorDecl>(methodDecl)))
return true;
// walk up the tree until we find something interesting
const Stmt* child = memberExpr;
const Stmt* parent = parentStmt(memberExpr);
bool bPotentiallyAssignedTo = false;
bool bDump = false;
std::string assignValue;
do {
// check for field being accessed by a reference variable e.g. Foo& f = m.foo;
auto parentsList = compiler.getASTContext().getParents(*child);
auto it = parentsList.begin();
if (it != parentsList.end()) {
const VarDecl *varDecl = it->get<VarDecl>();
if (varDecl) {
QualType qt = varDecl->getType().getDesugaredType(compiler.getASTContext());
if (!qt.isConstQualified() && qt->isReferenceType()) {
assignValue = "?";
bPotentiallyAssignedTo = true;
break;
}
}
}
if (!parent) {
return true;
}
if (isa<CastExpr>(parent) || isa<MemberExpr>(parent) || isa<ParenExpr>(parent) || isa<ParenListExpr>(parent)
|| isa<ExprWithCleanups>(parent))
{
child = parent;
parent = parentStmt(parent);
}
else if (isa<UnaryOperator>(parent))
{
const UnaryOperator* unaryOperator = dyn_cast<UnaryOperator>(parent);
int x = unaryOperator->getOpcode();
if (x == UO_AddrOf || x == UO_PostInc || x == UO_PostDec || x == UO_PreInc || x == UO_PreDec) {
assignValue = "?";
bPotentiallyAssignedTo = true;
break;
}
child = parent;
parent = parentStmt(parent);
}
else if (isa<CallExpr>(parent))
{
const CallExpr* callExpr = dyn_cast<CallExpr>(parent);
if (callExpr->getCallee() == child) {
break;
}
const FunctionDecl* functionDecl;
if (isa<CXXMemberCallExpr>(callExpr)) {
functionDecl = dyn_cast<CXXMemberCallExpr>(callExpr)->getMethodDecl();
}
else {
functionDecl = callExpr->getDirectCallee();
}
if (!functionDecl) {
break;
}
bool bFound = false;
for (unsigned i = 0; i < callExpr->getNumArgs(); ++i) {
if (i >= functionDecl->getNumParams()) // can happen in template code
break;
if (callExpr->getArg(i) == child) {
const ParmVarDecl* parmVarDecl = functionDecl->getParamDecl(i);
QualType qt = parmVarDecl->getType().getDesugaredType(compiler.getASTContext());
if (!qt.isConstQualified() && qt->isReferenceType()) {
assignValue = "?";
bPotentiallyAssignedTo = true;
}
bFound = true;
break;
}
}
break;
}
else if (isa<CXXConstructExpr>(parent))
{
const CXXConstructExpr* consExpr = dyn_cast<CXXConstructExpr>(parent);
const CXXConstructorDecl* consDecl = consExpr->getConstructor();
bool bFound = false;
for (unsigned i = 0; i < consExpr->getNumArgs(); ++i) {
if (i >= consDecl->getNumParams()) // can happen in template code
break;
if (consExpr->getArg(i) == child) {
const ParmVarDecl* parmVarDecl = consDecl->getParamDecl(i);
QualType qt = parmVarDecl->getType().getDesugaredType(compiler.getASTContext());
if (!qt.isConstQualified() && qt->isReferenceType()) {
assignValue = "?";
bPotentiallyAssignedTo = true;
}
bFound = true;
break;
}
}
break;
}
else if (isa<BinaryOperator>(parent))
{
const BinaryOperator* binaryOp = dyn_cast<BinaryOperator>(parent);
if ( binaryOp->getLHS() != child ) {
// do nothing
}
else if ( binaryOp->getOpcode() == BO_Assign ) {
assignValue = getExprValue(binaryOp->getRHS());
bPotentiallyAssignedTo = true;
} else {
assignValue = "?";
bPotentiallyAssignedTo = true;
}
break;
}
else if ( isa<CompoundStmt>(parent)
|| isa<SwitchStmt>(parent) || isa<CaseStmt>(parent) || isa<DefaultStmt>(parent)
|| isa<DoStmt>(parent) || isa<WhileStmt>(parent)
|| isa<IfStmt>(parent)
|| isa<ForStmt>(parent)
|| isa<ReturnStmt>(parent)
|| isa<CXXNewExpr>(parent)
|| isa<CXXDeleteExpr>(parent)
|| isa<ConditionalOperator>(parent)
|| isa<CXXTypeidExpr>(parent)
|| isa<ArraySubscriptExpr>(parent)
|| isa<CXXDependentScopeMemberExpr>(parent)
|| isa<DeclStmt>(parent)
|| isa<UnaryExprOrTypeTraitExpr>(parent)
|| isa<UnresolvedMemberExpr>(parent)
|| isa<MaterializeTemporaryExpr>(parent) //???
|| isa<InitListExpr>(parent)
|| isa<CXXUnresolvedConstructExpr>(parent)
)
{
break;
}
else {
bPotentiallyAssignedTo = true;
bDump = true;
break;
}
} while (true);
if (bDump)
{
report(
DiagnosticsEngine::Warning,
"oh dear, what can the matter be?",
memberExpr->getLocStart())
<< memberExpr->getSourceRange();
parent->dump();
}
if (bPotentiallyAssignedTo)
{
MyFieldAssignmentInfo aInfo;
niceName(fieldDecl, aInfo);
aInfo.value = assignValue;
assignedSet.insert(aInfo);
}
return true;
}
bool SingleValFields::isInterestingType(const QualType& qt) {
return qt.isCXX11PODType(compiler.getASTContext());
}
std::string SingleValFields::getExprValue(const Expr* arg)
{
if (!arg)
return "?";
arg = arg->IgnoreParenCasts();
// arg->dump();
// workaround bug in clang
if (isa<ParenListExpr>(arg))
return "?";
// ignore this, it seems to trigger an infinite recursion
if (isa<UnaryExprOrTypeTraitExpr>(arg)) {
return "?";
}
APSInt x1;
if (arg->EvaluateAsInt(x1, compiler.getASTContext()))
{
return x1.toString(10);
}
return "?";
}
loplugin::Plugin::Registration< SingleValFields > X("singlevalfields", false);
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

View File

@@ -0,0 +1,69 @@
#!/usr/bin/python
import sys
import re
import io
definitionToSourceLocationMap = dict() # dict of tuple(parentClass, fieldName) to sourceLocation
fieldAssignDict = dict() # dict of tuple(parentClass, fieldName) to (set of values)
# clang does not always use exactly the same numbers in the type-parameter vars it generates
# so I need to substitute them to ensure we can match correctly.
normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
def normalizeTypeParams( line ):
return normalizeTypeParamsRegex.sub("type-parameter-?-?", line)
# reading as binary (since we known it is pure ascii) is much faster than reading as unicode
with io.open("singlevalfields.log", "rb", buffering=1024*1024) as txt:
for line in txt:
if line.startswith("defn:\t"):
idx1 = line.find("\t")
idx2 = line.find("\t",idx1+1)
idx3 = line.find("\t",idx2+1)
parentClass = normalizeTypeParams(line[idx1+1:idx2])
fieldName = normalizeTypeParams(line[idx2+1:idx3])
sourceLocation = line[idx3+1:].strip()
fieldInfo = (parentClass, fieldName)
definitionToSourceLocationMap[fieldInfo] = sourceLocation
elif line.startswith("asgn:\t"):
idx1 = line.find("\t")
idx2 = line.find("\t",idx1+1)
idx3 = line.find("\t",idx2+1)
parentClass = normalizeTypeParams(line[idx1+1:idx2])
fieldName = normalizeTypeParams(line[idx2+1:idx3])
assignValue = line[idx3+1:].strip()
fieldInfo = (parentClass, fieldName)
if not fieldInfo in fieldAssignDict:
fieldAssignDict[fieldInfo] = set()
fieldAssignDict[fieldInfo].add(assignValue)
tmp1list = list()
for fieldInfo, assignValues in fieldAssignDict.iteritems():
if len(assignValues) != 1:
continue
if "?" in assignValues:
continue
# if it contains anything other than this set, ignore it
if len(assignValues - set(["0", "1", "-1", "nullptr"])) > 0:
continue
v0 = fieldInfo[0] + " " + fieldInfo[1]
v1 = (",".join(assignValues))
v2 = ""
if fieldInfo in definitionToSourceLocationMap:
v2 = definitionToSourceLocationMap[fieldInfo]
tmp1list.append((v0,v1,v2))
# sort results by filename:lineno
def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
return [int(text) if text.isdigit() else text.lower()
for text in re.split(_nsre, s)]
tmp1list.sort(key=lambda v: natural_sort_key(v[2]))
# print out the results
with open("loplugin.singlevalfields", "wt") as f:
for v in tmp1list:
f.write(v[2] + "\n")
f.write(" " + v[0] + "\n")
f.write(" " + v[1] + "\n")