new loplugin: find write-only fields

Change-Id: I0f83939babacf92485420ee63f290a297d7cb717 Reviewed-on: https://gerrit.libreoffice.org/22498 Reviewed-by: Noel Grandin <noelgrandin@gmail.com> Tested-by: Noel Grandin <noelgrandin@gmail.com>
2016-02-19 10:52:20 +02:00
parent 541c4c4509
commit 778e9a65bf
61 changed files with 181 additions and 257 deletions
--- a/compilerplugins/clang/plugin.cxx
+++ b/compilerplugins/clang/plugin.cxx
@@ -104,6 +104,7 @@ class ParentBuilder
        bool VisitFunctionDecl( const FunctionDecl* function );
        bool VisitObjCMethodDecl( const ObjCMethodDecl* method );
        void walk( const Stmt* stmt );
+        bool shouldVisitTemplateInstantiations () const { return true; }
        unordered_map< const Stmt*, const Stmt* >* parents;
    };

--- a/compilerplugins/clang/unusedfields.cxx
+++ b/compilerplugins/clang/unusedfields.cxx
@@ -16,7 +16,11 @@
 #include "compat.hxx"

 /**
-Dump a list of calls to methods, and a list of field definitions.
+This performs two analyses:
+ (1) look for unused fields
+ (2) look for fields that are write-only
+
+We dmp a list of calls to methods, and a list of field definitions.
 Then we will post-process the 2 lists and find the set of unused methods.

 Be warned that it produces around 5G of log file.
@@ -24,7 +28,7 @@ Be warned that it produces around 5G of log file.
 The process goes something like this:
  $ make check
  $ make FORCE_COMPILE_ALL=1 COMPILER_PLUGIN_TOOL='unusedfields' check
-  $ ./compilerplugins/clang/unusedfields.py unusedfields.log > result.txt
+  $ ./compilerplugins/clang/unusedfields.py unusedfields.log

 and then
  $ for dir in *; do make FORCE_COMPILE_ALL=1 UPDATE_FILES=$dir COMPILER_PLUGIN_TOOL='unusedfieldsremove' $dir; done
@@ -58,6 +62,7 @@ struct MyFieldInfo

 // try to limit the voluminous output a little
 static std::set<MyFieldInfo> touchedSet;
+static std::set<MyFieldInfo> readFromSet;
 static std::set<MyFieldInfo> definitionSet;


@@ -76,6 +81,8 @@ public:
        std::string output;
        for (const MyFieldInfo & s : touchedSet)
            output += "touch:\t" + s.parentClass + "\t" + s.fieldName + "\n";
+        for (const MyFieldInfo & s : readFromSet)
+            output += "read:\t" + s.parentClass + "\t" + s.fieldName + "\n";
        for (const MyFieldInfo & s : definitionSet)
        {
            output += "definition:\t" + s.parentClass + "\t" + s.fieldName + "\t" + s.fieldType + "\t" + s.sourceLocation + "\n";
@@ -204,10 +211,84 @@ bool UnusedFields::VisitFieldDecl( const FieldDecl* fieldDecl )
 bool UnusedFields::VisitMemberExpr( const MemberExpr* memberExpr )
 {
    const ValueDecl* decl = memberExpr->getMemberDecl();
-    if (!isa<FieldDecl>(decl)) {
+    const FieldDecl* fieldDecl = dyn_cast<FieldDecl>(decl);
+    if (!fieldDecl) {
        return true;
    }
-    touchedSet.insert(niceName(dyn_cast<FieldDecl>(decl)));
+    MyFieldInfo fieldInfo = niceName(fieldDecl);
+    touchedSet.insert(fieldInfo);
+
+  // for the write-only analysis
+
+    if (ignoreLocation(memberExpr))
+        return true;
+
+    const Stmt* child = memberExpr;
+    const Stmt* parent = parentStmt(memberExpr);
+    // walk up the tree until we find something interesting
+    bool bPotentiallyReadFrom = false;
+    bool bDump = false;
+    do {
+        if (!parent) {
+            return true;
+        }
+        if (isa<CastExpr>(parent) || isa<MemberExpr>(parent) || isa<ParenExpr>(parent) || isa<ParenListExpr>(parent)
+             || isa<ExprWithCleanups>(parent) || isa<UnaryOperator>(parent))
+        {
+            child = parent;
+            parent = parentStmt(parent);
+        }
+        else if (isa<CaseStmt>(parent))
+        {
+            bPotentiallyReadFrom = dyn_cast<CaseStmt>(parent)->getLHS() == child
+                                  || dyn_cast<CaseStmt>(parent)->getRHS() == child;
+            break;
+        }
+        else if (isa<IfStmt>(parent))
+        {
+            bPotentiallyReadFrom = dyn_cast<IfStmt>(parent)->getCond() == child;
+            break;
+        }
+        else if (isa<DoStmt>(parent))
+        {
+            bPotentiallyReadFrom = dyn_cast<DoStmt>(parent)->getCond() == child;
+            break;
+        }
+        else if (isa<ReturnStmt>(parent) || isa<CXXConstructExpr>(parent) || isa<CallExpr>(parent)
+                 || isa<ConditionalOperator>(parent) || isa<SwitchStmt>(parent) || isa<ArraySubscriptExpr>(parent)
+                 || isa<DeclStmt>(parent) || isa<WhileStmt>(parent) || isa<CXXNewExpr>(parent)
+                 || isa<ForStmt>(parent) || isa<InitListExpr>(parent)
+                 || isa<BinaryOperator>(parent) || isa<CXXDependentScopeMemberExpr>(parent)
+                 || isa<UnresolvedMemberExpr>(parent)
+                 || isa<MaterializeTemporaryExpr>(parent))  //???
+        {
+            bPotentiallyReadFrom = true;
+            break;
+        }
+        else if (isa<CXXDeleteExpr>(parent)
+                  || isa<UnaryExprOrTypeTraitExpr>(parent)
+                 || isa<CXXUnresolvedConstructExpr>(parent) || isa<CompoundStmt>(parent)
+                 || isa<CXXTypeidExpr>(parent) || isa<DefaultStmt>(parent))
+        {
+            break;
+        }
+        else {
+            bPotentiallyReadFrom = true;
+            bDump = true;
+            break;
+        }
+    } while (true);
+    if (bDump)
+    {
+        report(
+             DiagnosticsEngine::Warning,
+             "oh dear, what can the matter be?",
+              memberExpr->getLocStart())
+              << memberExpr->getSourceRange();
+        parent->dump();
+    }
+    if (bPotentiallyReadFrom)
+        readFromSet.insert(fieldInfo);
    return true;
 }

--- a/compilerplugins/clang/unusedfields.py
+++ b/compilerplugins/clang/unusedfields.py
@@ -8,6 +8,7 @@ definitionSet = set()
 definitionToSourceLocationMap = dict()
 definitionToTypeMap = dict()
 callSet = set()
+readFromSet = set()
 sourceLocationSet = set()
 # things we need to exclude for reasons like :
 # - it's a weird template thingy that confuses the plugin
@@ -36,6 +37,10 @@ with io.open(sys.argv[1], "rb", buffering=1024*1024) as txt:
            idx1 = line.find("\t",7)
            callInfo = (normalizeTypeParams(line[7:idx1]), line[idx1+1:].strip())
            callSet.add(callInfo)
+        elif line.startswith("read:\t"):
+            idx1 = line.find("\t",6)
+            readInfo = (normalizeTypeParams(line[6:idx1]), line[idx1+1:].strip())
+            readFromSet.add(readInfo)

 # Invert the definitionToSourceLocationMap
 # If we see more than one method at the same sourceLocation, it's being autogenerated as part of a template
@@ -49,7 +54,7 @@ for k, definitions in sourceLocationToDefinitionMap.iteritems():
        for d in definitions:
            definitionSet.remove(d)
    
-tmp1set = set()
+untouchedSet = set()
 for d in definitionSet:
    clazz = d[0] + " " + d[1]
    if clazz in exclusionSet:
@@ -86,8 +91,45 @@ for d in definitionSet:
        or srcLoc.startswith("lotuswordpro/source/filter/lwpsdwdrawheader.hxx")
        or srcLoc.startswith("svtools/source/dialogs/insdlg.cxx")):
        continue
+    untouchedSet.add((clazz + " " + definitionToTypeMap[d], srcLoc))

-    tmp1set.add((clazz + " " + definitionToTypeMap[d], srcLoc))
+writeonlySet = set()
+for d in definitionSet:
+    clazz = d[0] + " " + d[1]
+    if d in readFromSet:
+        continue
+    srcLoc = definitionToSourceLocationMap[d];
+    # ignore external source code
+    if (srcLoc.startswith("external/")):
+        continue
+    # ignore build folder
+    if (srcLoc.startswith("workdir/")):
+        continue
+    # ignore our stable/URE/UNO api
+    if (srcLoc.startswith("include/com/")
+        or srcLoc.startswith("include/cppu/")
+        or srcLoc.startswith("include/cppuhelper/")
+        or srcLoc.startswith("include/osl/")
+        or srcLoc.startswith("include/rtl/")
+        or srcLoc.startswith("include/sal/")
+        or srcLoc.startswith("include/salhelper/")
+        or srcLoc.startswith("include/systools/")
+        or srcLoc.startswith("include/typelib/")
+        or srcLoc.startswith("include/uno/")):
+        continue
+    # this is all representations of on-disk data structures
+    if (srcLoc.startswith("sc/source/filter/inc/scflt.hxx")
+        or srcLoc.startswith("sw/source/filter/ww8/")
+        or srcLoc.startswith("vcl/source/filter/sgvmain.hxx")
+        or srcLoc.startswith("vcl/source/filter/sgfbram.hxx")
+        or srcLoc.startswith("vcl/inc/unx/XIM.h")
+        or srcLoc.startswith("vcl/inc/unx/gtk/gloactiongroup.h")
+        or srcLoc.startswith("include/svl/svdde.hxx")
+        or srcLoc.startswith("lotuswordpro/source/filter/lwpsdwdrawheader.hxx")
+        or srcLoc.startswith("svtools/source/dialogs/insdlg.cxx")):
+        continue
+
+    writeonlySet.add((clazz + " " + definitionToTypeMap[d], srcLoc))

 # sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely
 def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
@@ -95,12 +137,18 @@ def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
            for text in re.split(_nsre, s)]

 # sort results by name and line number
-tmp1list = sorted(tmp1set, key=lambda v: natural_sort_key(v[1]))
+tmp1list = sorted(untouchedSet, key=lambda v: natural_sort_key(v[1]))
+tmp2list = sorted(writeonlySet, key=lambda v: natural_sort_key(v[1]))

 # print out the results
-for t in tmp1list:
-    print t[1]
-    print "    ", t[0]
+with open("unusedfields.untouched", "wt") as f:
+    for t in tmp1list:
+        f.write( t[1] + "\n" )
+        f.write( "    " + t[0] + "\n" )
+with open("unusedfields.writeonly", "wt") as f:
+    for t in tmp2list:
+        f.write( t[1] + "\n" )
+        f.write( "    " + t[0] + "\n" )