Create Documentation using Doxygen and non-Doxygen commented source code - c++

We got here some like 10 years old C and C++ code, without documents or manual. However the source is documented in the header files quite good, but it is a lot of working going through all the files looking for an information. It looks like this:
// Description description ....
//
// #param parameter 1 name:
// description of parameter 1
//
// #param parameter 2 name:
// description of parameter 2
//
Returntype Functionname(parameter1, parameter2);
Using doxygen wizard a documentation can be created but all the comments are lost, because they are not formated in a way the parser understands.
So is that a format i dont know? Can i teach the parser what to do? Or is it a special format that is used by another software?

I wrote a python script, to convert the comments to a format the parser understands. It's not pretty, it's not safe, but it works for us.
import re
import time
import os
import shutil
def convertHeaderDocumentation(file):
with open(file) as f:
lines = f.readlines()
lines = [line.rstrip('\n') for line in lines]
scanning = False
commentLines = []
convertedDocument = ""
declaration = ""
for line in lines:
if line == "" or \
line.strip().startswith("#"):
if len(commentLines) > 0:
convertedDocument += ''.join(el + "\n" for el in commentLines)
commentLines.clear()
convertedDocument += line + "\n"
continue
if line.strip().startswith('//'):
if not scanning:
commentLines.clear()
scanning = True
commentLines.append(line)
else:
if scanning:
if line.strip() != "":
declaration = line.strip()
match = re.search('\s*\w*\s*(\w+)\s+(\w+).*\((.*)[^)].*;', declaration)
if not match is None:
# check for function description
description = ""
for line in commentLines:
if line[2:].strip().startswith("#") or \
line[2:].strip() == "":
break
else:
description += line[2:].strip()
# scan for parameter description
parameters = []
parameter = ""
scanning = False
for line in commentLines:
# start scanning, if line starts with #
if line[2:].strip().startswith("#") and \
scanning == False :
# if present add to parameter lst
if parameter != "":
parameters.append(parameter)
scanning = True
parameter = line[2:].strip() + " "
continue
# stop scanning if an empty line is read
if line[2:].strip() == "":
scanning = False
# save if parameter is in buffer
if parameter != "":
parameters.append(parameter)
parameter = ""
if scanning == True and line[2:].strip() != "":
parameter += line[2:].strip()
convertedDocument += "/**\n"
convertedDocument += " * #fn " + declaration[:-1] + "\n"
convertedDocument += " *\n"
convertedDocument += " * #brief "
restLine = 80 - len(" * #brief ")
for index in range(0, len(description), restLine):
convertedDocument += description[index:index + restLine] + "\n * "
convertedDocument += "\n"
for parameter in parameters:
convertedDocument += " * " + parameter + "\n *\n"
convertedDocument += " * #return " + match.group(1) + "\n"
convertedDocument += " *\n"
convertedDocument += " * #date " + time.strftime("%d.%m.%Y") + "<br> parsed using python\n"
convertedDocument += " */\n"
convertedDocument += declaration + "\n\n"
commentLines.clear()
else :
convertedDocument += ''.join(el + "\n" for el in commentLines)
commentLines.clear();
return convertedDocument
projectDir = "path to source files goes here"
projectDir = os.abspath(projectDir)
parentProjectDir, projectDirName = os.path.split(projectDir)
convertedDir = os.path.join(parentProjectDir, "converted")
print(convertedDir)
for root, dirs, files in os.walk(projectDir):
# create directory structure if not present
tmpConvertedDir = os.path.join(convertedDir, root[len(projectDir) + 1:])
if not os.path.exists(tmpConvertedDir):
os.makedirs(tmpConvertedDir)
for file in files:
filename, fileextension = os.path.splitext(file)
# only copy/convert c++/c source files
if fileextension in {'.h','.c','.cpp'} :
newPath = os.path.join(tmpConvertedDir, file)
print(newPath)
# convert header files
if fileextension in {'.h'}:
#print("convert ", os.path.join(root, file), " to ", newPath)
converted = convertHeaderDocumentation(os.path.join(root, file))
with open(newPath, 'w') as f:
f.write(converted)
# copy source files
else:
print("copy ", os.path.join(root, file), " to ", newPath)
shutil.copyfile(os.path.join(root, file), newPath)
The function declaration was a bit tricky to catch with regex. For my case
\s*\w*\s*(\w+)\s+(\w+).*\((.*)[^)].*; works just fine, but without any extra keywords a lazy quantifier is going to be more acurate for keywords before the returntype \s*\w*?\s*(\w+)\s+(\w+).*\((.*)[^)].*;
All C and C++ files in a given projectDir directory and subfolders are converted if they are header files or just copy if they are source files. Therefore a directory ..\converted is created which contains the copied/converted files.
With the resulting files doxygen wizard created a sufficient documentation. Maybe this is going to help someone :-)

Related

Simple way to refactor this Python code to reduce repetition

I'd like help refactoring this code to reduce redundant lines/concepts. The code for this def in basically repeated 3 times.
Restrictions:
- I'm new, so a really fancy list comprehension or turning things into objects with dunders and method overrides is way to advanced for me.
- Built in modules only. This is Pyhton 2.7 code, and only imports os and re.
What the overall script does:
Finds files with a fixed prefix. The files are pipe-delimited text files. The first row is a header. It has a footer which can be 1 or more rows. Based on the prefix, the script throws away "columns" from the text file that aren't needed in another step. It saves the data, comma-separated, in a new file with a .csv extension.
The bulk of the work is done in processRawFiles(). This is what I'd like refactored, since it's wildly repetitive.
def separateTranslationTypes(translationFileList):
'''Takes in list of all files to process and find which are roomtypes
, ratecodes or sourcecodes. The type of file determines how it will be processed.'''
rates = []
rooms = []
sources = []
for afile in translationFileList:
rates.append( [m.group() for m in re.finditer('cf_ratecodeheader+(.*)', afile)] )
rooms.append( [m.group() for m in re.finditer('cf_roomtypes+(.*)', afile)] )
sources.append( [m.group() for m in re.finditer('cf_sourcecodes+(.*)', afile)] )
# empty list equates to False. So if x is True if the list is not empty - thus kept.
rates = [x[0] for x in rates if x]
rooms = [x[0] for x in rooms if x]
sources = [x[0] for x in sources if x]
print '... rateCode files :: ',rates,'\n'
print '... roomType files :: ',rooms,'\n'
print '... sourceCode files :: ',sources, '\n'
return {'rateCodeFiles':rates,
'roomTypeFiles':rooms,
'sourceCodeFiles':sources}
groupedFilestoProcess = separateTranslationTypes(allFilestoProcess)
def processRawFiles(groupedFileDict):
for key in groupedFileDict:
# Process the rateCodes file
if key == 'rateCodeFiles':
for fname_Value in groupedFileDict[key]: # fname_Value is the filename
if os.path.exists(fname_Value):
workingfile = open(fname_Value,'rb')
filedatastring = workingfile.read() # turns entire file contents to a single string
workingfile.close()
outname = 'forUpload_' + fname_Value[:-4:] + '.csv' # removes .txt of any other 3 char extension
outputfile = open(outname,'wb')
filedatalines = filedatastring.split('\n') # a list containing each line of the file
rawheaders = filedatalines[0] # 1st element of the list is the first row of the file, with the headers
parsedheaders = rawheaders.split('|') # turn the header string into a list where | was delimiter
print '\n'
print 'outname: ', outname, '\n'
# print 'rawheaders: ', rawheaders, '\n'
# print 'parsedheaders: ',parsedheaders, '\n'
# print filedatalines[0:2]
print '\n'
ratecodeindex = parsedheaders.index('RATE_CODE')
ratecodemeaning = parsedheaders.index('DESCRIPTION')
for dataline in filedatalines:
if dataline[:4] == 'LOGO':
firstuselessline = filedatalines.index(dataline)
# print firstuselessline
# ignore the first line which was the headers
# stop before the line that starts with LOGO - the first useless line
for dataline in filedatalines[1:firstuselessline-1:]:
# print dataline.split('|')
theratecode = dataline.split('|')[ratecodeindex]
theratemeaning = dataline.split('|')[ratecodemeaning]
# print theratecode, '\t', theratemeaning, '\n'
linetowrite = theratecode + ',' + theratemeaning + '\n'
outputfile.write(linetowrite)
outputfile.close()
# Process the roomTypes file
if key == 'roomTypeFiles':
for fname_Value in groupedFileDict[key]: # fname_Value is the filename
if os.path.exists(fname_Value):
workingfile = open(fname_Value,'rb')
filedatastring = workingfile.read() # turns entire file contents to a single string
workingfile.close()
outname = 'forUpload_' + fname_Value[:-4:] + '.csv' # removes .txt of any other 3 char extension
outputfile = open(outname,'wb')
filedatalines = filedatastring.split('\n') # a list containing each line of the file
rawheaders = filedatalines[0] # 1st element of the list is the first row of the file, with the headers
parsedheaders = rawheaders.split('|') # turn the header string into a list where | was delimiter
print '\n'
print 'outname: ', outname, '\n'
# print 'rawheaders: ', rawheaders, '\n'
# print 'parsedheaders: ',parsedheaders, '\n'
# print filedatalines[0:2]
print '\n'
ratecodeindex = parsedheaders.index('LABEL')
ratecodemeaning = parsedheaders.index('SHORT_DESCRIPTION')
for dataline in filedatalines:
if dataline[:4] == 'LOGO':
firstuselessline = filedatalines.index(dataline)
# print firstuselessline
# ignore the first line which was the headers
# stop before the line that starts with LOGO - the first useless line
for dataline in filedatalines[1:firstuselessline-1:]:
# print dataline.split('|')
theratecode = dataline.split('|')[ratecodeindex]
theratemeaning = dataline.split('|')[ratecodemeaning]
# print theratecode, '\t', theratemeaning, '\n'
linetowrite = theratecode + ',' + theratemeaning + '\n'
outputfile.write(linetowrite)
outputfile.close()
# Process sourceCodes file
if key == 'sourceCodeFiles':
for fname_Value in groupedFileDict[key]: # fname_Value is the filename
if os.path.exists(fname_Value):
workingfile = open(fname_Value,'rb')
filedatastring = workingfile.read() # turns entire file contents to a single string
workingfile.close()
outname = 'forUpload_' + fname_Value[:-4:] + '.csv' # removes .txt of any other 3 char extension
outputfile = open(outname,'wb')
filedatalines = filedatastring.split('\n') # a list containing each line of the file
rawheaders = filedatalines[0] # 1st element of the list is the first row of the file, with the headers
parsedheaders = rawheaders.split('|') # turn the header string into a list where | was delimiter
print '\n'
print 'outname: ', outname, '\n'
# print 'rawheaders: ', rawheaders, '\n'
# print 'parsedheaders: ',parsedheaders, '\n'
# print filedatalines[0:2]
print '\n'
ratecodeindex = parsedheaders.index('SOURCE_CODE')
ratecodemeaning = parsedheaders.index('DESCRIPTION')
for dataline in filedatalines:
if dataline[:4] == 'LOGO':
firstuselessline = filedatalines.index(dataline)
# print firstuselessline
# ignore the first line which was the headers
# stop before the line that starts with LOGO - the first useless line
for dataline in filedatalines[1:firstuselessline-1:]:
# print dataline.split('|')
theratecode = dataline.split('|')[ratecodeindex]
theratemeaning = dataline.split('|')[ratecodemeaning]
# print theratecode, '\t', theratemeaning, '\n'
linetowrite = theratecode + ',' + theratemeaning + '\n'
outputfile.write(linetowrite)
outputfile.close()
processRawFiles(groupedFilestoProcess)
Had to redo my code because there was a new incident where the files in question neither had the header row, nor the footer row. However, since the columns I want still occur in the same order I can keep them only. Also, we stop reading if any next row has fewer columns than the larger of the two indices used.
As for reducing repetition, processRawFiles contains two def's that remove the need to repeat a lot of that parsing code from before.
def separateTranslationTypes(translationFileList):
'''Takes in list of all files to process and find which are roomtypes
, ratecodes or sourcecodes. The type of file determines how it will be processed.'''
rates = []
rooms = []
sources = []
for afile in translationFileList:
rates.append( [m.group() for m in re.finditer('cf_ratecode+(.*)', afile)] )
rooms.append( [m.group() for m in re.finditer('cf_roomtypes+(.*)', afile)] )
sources.append( [m.group() for m in re.finditer('cf_sourcecodes+(.*)', afile)] )
# empty list equates to False. So if x is True if the list is not empty - thus kept.
rates = [x[0] for x in rates if x]
rooms = [x[0] for x in rooms if x]
sources = [x[0] for x in sources if x]
print '... rateCode files :: ',rates,'\n'
print '... roomType files :: ',rooms,'\n'
print '... sourceCode files :: ',sources, '\n'
return {'rateCodeFiles':rates,
'roomTypeFiles':rooms,
'sourceCodeFiles':sources}
groupedFilestoProcess = separateTranslationTypes(allFilestoProcess)
def processRawFiles(groupedFileDict):
def someFixedProcess(bFileList, codeIndex, codeDescriptionIndex):
for fname_Value in bFileList: # fname_Value is the filename
if os.path.exists(fname_Value):
workingfile = open(fname_Value,'rb')
filedatastring = workingfile.read() # turns entire file contents to a single string
workingfile.close()
outname = 'forUpload_' + fname_Value[:-4:] + '.csv' # removes .txt of any other 3 char extension
outputfile = open(outname,'wb')
filedatalines = filedatastring.split('\n') # a list containing each line of the file
# print '\n','outname: ',outname,'\n\n'
# HEADERS ARE NOT IGNORED! Since the file might not have headers.
print outname
for dataline in filedatalines:
# print filedatalines.index(dataline), dataline.split('|')
# e.g. index 13, reuires len 14, so len > index is needed
if len(dataline.split('|')) > codeDescriptionIndex:
thecode_text = dataline.split('|')[codeIndex]
thedescription_text = dataline.split('|')[codeDescriptionIndex]
linetowrite = thecode_text + ',' + thedescription_text + '\n'
outputfile.write(linetowrite)
outputfile.close()
def processByType(aFileList, itsType):
typeDict = {'rateCodeFiles' : {'CODE_INDEX': 4,'DESC_INDEX':7},
'roomTypeFiles' : {'CODE_INDEX': 1,'DESC_INDEX':13},
'sourceCodeFiles': {'CODE_INDEX': 2,'DESC_INDEX':3}}
# print 'someFixedProcess(',aFileList,typeDict[itsType]['CODE_INDEX'],typeDict[itsType]['DESC_INDEX'],')'
someFixedProcess(aFileList,
typeDict[itsType]['CODE_INDEX'],
typeDict[itsType]['DESC_INDEX'])
for key in groupedFileDict:
processByType(groupedFileDict[key],key)
processRawFiles(groupedFilestoProcess)

Error reading MIME types using Python

I'm writing a python script that will read file extensions, MIME types and file signatures so I can determine if any of those are missing or corrupt, and to establish the types of the files in the given directory.
So far I've got:
import magic, os
def get_ext(dirPath):
foldercount = 0
filecount = 0
while True:
if os.path.exists(dirPath):
break
else:
print "Directory doesn't exist!"
continue
includePath = raw_input("Do you want to include the complete path to the files in the output?: Y/N\n")
if includePath.upper() == "Y":
for rootfolder, subfolders, files in os.walk(dirPath):
foldercount += len(subfolders)
filecount += len(files)
for f in files:
name = f
path = os.path.join(rootfolder, f)
ext = os.path.splitext(f)[1]
if ext != "":
print "Filename: " + str(path) + "\t\tExtension: " + str(ext) + "\tMIME: "
else:
print "Filename: " + str(path) + "\t\tExtension: no extension found"
print "Found {0} files in {1} folders".format(filecount, foldercount)
elif includePath.upper() == "N":
for rootfolder, subfolders, files in os.walk(dirPath):
foldercount += len(subfolders)
for f in files:
name = f
path = os.path.join(rootfolder, f)
ext = os.path.splitext(f)[1]
if ext != "":
print "Filename: " + str(name) + "\t\tExtension: " + str(ext)
else:
print "Filename: " + str(name) + "\t\tExtension: no extension found"
print "Found in {0} folders".format(foldercount)
else:
print "Wrong input, try again"
def getMagic(dirPath):
while True:
if os.path.exists(dirPath):
break
else:
print "Directory doesn't exist!"
continue
for rootfolder, subfolders, files in os.walk(dirPath):
for f in files:
bestand = f
mymagic = magic.Magic(mime=True)
mytype = mymagic.from_file(bestand)
print mytype
print ("The MIME type of the file %s is %s" %(bestand, mytype))
dirPath = raw_input("Directory to check files in: ")
get_ext(dirPath)
getMagic(dirPath)
get_ext() works as it should, giving me a files name and extension.
However, when I try to fetch the MIME type it somehow throws the following error:
Traceback (most recent call last):
File "/home/nick/workspace/Proto/asdfasdf.py", line 80, in <module>
getMagic(dirPath)
File "/home/nick/workspace/Proto/asdfasdf.py", line 74, in getMagic
mytype = mymagic.from_file(bestand)
File "/usr/local/lib/python2.7/dist-packages/magic.py", line 75, in from_file
raise IOError("File does not exist: " + filename)
IOError: File does not exist: 2
I know for a fact that file '2' does exist, being a plain text document.
It does give me the MIME if i hardcode the path to a file in the script, but I want the script to traverse a directory giving me all mimes of the files in it.
Can somebody explain why it throws this error and how to fix this issue?
I am using the python-magic module installed using pip install python-magic
Thanks
From the documentation for os.walk we can see that
filenames is a list of the names of the non-directory files in dirpath. Note that the names in the lists contain no path components. To get a full path (which begins with top) to a file or directory in dirpath, do os.path.join(dirpath, name).
You need to get the complete path as
bestand = os.path.join(rootfolder, f)

Accessing data required out of for loop in python and store the data at specific location

I am using a for loop for getting data from the user in command prompt using python 2.7. Then storing the data in a text file in certain format. I am looking for a method to get the data from the user and store it in a list and use it where required.
for Input_Number in range(Number_Of_Inputs):
Input_Number = Input_Number+1
GUI_Parameter = str(raw_input("Please enter input parameter " + str(Input_Number) + " :"))
GUI_Parameter_Name = str(raw_input("Enter the GUI name for the parameter " + str(Input_Number) + " :"))
Store_GUI_Parameter(Opened_File, GUI_Parameter, GUI_Parameter_Name)
I would like to use this data to store it in a specific location in a text file according to required syntax. The above code stores the data in the text file. But the problem is it doesn't store it at the required place.
def Store_GUI_Parameter(Opened_File, GUI_Parameter, GUI_Parameter_Name):
GUI_Description = "| " + '"'+ GUI_Parameter_Name + '"' + " |$" + GUI_Parameter.title() + " |"
Write_Data(Opened_File, GUI_Description)
print "GUI parameters written to NDF file"
return
The data storage is done using the above function...
I tried this, but unfortunately this also is not working
GUI_Parameter= []
GUI_Parameter_Name = []
for Input_Number in range(Number_Of_Inputs):
Input_Number = Input_Number+1
GUI_Parameter[Input_Number] = str(raw_input("Please enter input parameter " + str(Input_Number) + " :"))
GUI_Parameter_Name[Input_Number] = str(raw_input("Enter the GUI name for the parameter " + str(Input_Number) + " :"))
Using it outside the loop in the same function...
GUI_Description(Opened_File, GUI_Parameter_Name[Input_Number], GUI_Parameter[Input_Number])
The function implementation:
def GUI_Description(Opened_File, GUI_Parameter_Name[Input_Number], GUI_Parameter[Input_Number]):
Iteration = 0
while Iteration < Input_Number:
Iteration += 1
GUI_Description = "| " + '"'+ GUI_Parameter_Name[Input_Number] + '"' + " |$" + GUI_Parameter[Input_Number].title() + " |"
Write_Data(Opened_File, GUI_Description)
print "GUI parameters written to NDF file"
return
But it shows syntax error at the def GUI_Description
C:\Users\padmanab\Desktop>python CtoN.py File "CtoN.py", line 173
def GUI_Description(Opened_File, GUI_Parameter_Name[Input_Number], GUI_Parameter[Input_Number]):
^ SyntaxError: invalid syntax
The syntax error in the function GUI_Description is caused by your input arguments. 'GUI_Parameter_Name[Input_Number]' is not a valid input argument. Since your function requires both 'GUI_Parameter_Name' and 'Input_Number' they should be separate input arguments. The code snippet below would solve this syntax error:
def GUI_Description(Opened_File, Input_Number, GUI_Parameter_Name, GUI_Parameter):
...
The code below will give an 'index out of range' error since the lists 'GUI_Parameter' and 'GUI_Parameter_Name' have zero length.
GUI_Parameter= []
GUI_Parameter_Name = []
Number_Of_Inputs = 1
for Input_Number in range(Number_Of_Inputs):
Input_Number = Input_Number+1
GUI_Parameter[Input_Number] = str(raw_input("Please enter input parameter " + str(Input_Number) + " :"))
GUI_Parameter_Name[Input_Number] = str(raw_input("Enter the GUI name for the parameter " + str(Input_Number) + " :"))
If you want to add items to the arrays you should append them:
GUI_Parameter.append(raw_input())

Python CSV export writing characters to new lines

I have been using multiple code snippets to create a solution that will allow me to write a list of players in a football team to a csv file.
import csv
data = []
string = input("Team Name: ")
fName = string.replace(' ', '') + ".csv"
print("When you have entered all the players, press enter.")
# while loop that will continue allowing entering of players
done = False
while not done:
a = input("Name of player: ")
if a == "":
done = True
else:
string += a + ','
string += input("Age: ") + ','
string += input("Position: ")
print (string)
file = open(fName, 'w')
output = csv.writer(file)
for row in string:
tempRow = row
output.writerow(tempRow)
file.close()
print("Team written to file.")
I would like the exported csv file to look like this:
player1,25,striker
player2,27,midfielder
and so on. However, when I check the exported csv file it looks more like this:
p
l
a
y
e
r
,
2
5
and so on.
Does anyone have an idea of where i'm going wrong?
Many thanks
Karl
Your string is a single string. It is not a list of strings. You are expecting it to be a list of strings when you are doing this:
for row in string:
When you iterate over a string, you are iterating over its characters. Which is why you are seeing a character per line.
Declare a list of strings. And append every string to it like this:
done = False
strings_list = []
while not done:
string = ""
a = input("Name of player: ")
if a == "":
done = True
else:
string += a + ','
string += input("Age: ") + ','
string += input("Position: ") + '\n'
strings_list.append(string)
Now iterate over this strings_list and print to the output file. Since you are putting the delimiter (comma) yourself in the string, you do not need a csv writer.
a_file = open(fName, 'w')
for row in strings_list:
print(row)
a_file.write(row)
a_file.close()
Note:
string is a name of a standard module in Python. It is wise not to use this as a name of any variable in your program. Same goes for your variable file

Find and later delete zombie informatica objects

Is there a easy way to identify and clean unused informatica artifacts?
Context: In one of the projects, there are lot of zombie sessions / mappings etc, the creators have long gone.
I want to do the following:
List/Delete all sessions that are not associated with a workflow.
List/Delete all mappings that are not used in any session/wf.
List/Delete all source/target that were not used in any mapping.
List/Delete all workflows that were not run in the past one year.
Someone mentioned about using : Designer > Tools > Queries . I can't express the above 1/2/3/4 with the option given, can anyone shed some light?
Note:
I’m not looking for click one by one and find dependencies.
I’m not looking for download the whole plant as xml and search dependencies
one by one
As this is not easily achievable with PowerCenter itself, I tried to come up with some simple tool to solve it. For the full description and download link please go to this page.
Below you'll find the code published as requested in comments: Find and later delete zombie informatica objects
Feel free to use, share and improve :) Any code review will be also much appreciated.
import subprocess
import os
from subprocess import *
import platform
import sys
import getpass
import configparser
#global variables declarations
currentDir=''
pmrepPath=''
domainFile=''
def connect_to_repo(Repository,Domain,User,Host,Port, UserSecurityDomain):
#password = raw_input("Enter password for Repository: " + Repository + ", User: " + User)
password = getpass.getpass()
print "\nConnecting..."
if Domain != '':
RepoCommand="pmrep connect -r "+Repository+" -d "+Domain+" -n "+User + " -x " + password #+" -X DOMAIN_PWD"
else:
RepoCommand="pmrep connect -r "+Repository+" -n "+User + " -x " + password + " -h " + Host + " -o " + Port
if UserSecurityDomain != '':
RepoCommand += " -s " + UserSecurityDomain
RepoCommand=RepoCommand.rstrip()
p=subprocess.Popen(RepoCommand,stderr=subprocess.PIPE,stdin=subprocess.PIPE,stdout=subprocess.PIPE,shell=True)
out,err=p.communicate()
if p.returncode or err:
print "Connection Failed"
print err.strip()
print out.strip()
sys.stdout.flush()
sys.stdin.flush()
else:
print "Connection Successful"
sys.stdout.flush()
sys.stdin.flush()
return p.returncode
def execute_pmrep_command(command, output_file_name, start_line, end_line, line_prefix):
if len(line_prefix)>0: line_prefix+=' '
out=open(output_file_name,'a')
return_code=subprocess.Popen(command,stdin=subprocess.PIPE,stdout=subprocess.PIPE,shell=True)
output,error=return_code.communicate()
for line in output.split('\r\n')[start_line:end_line]:
out.writelines(line_prefix + line + '\n')
out.close()
return
def check_platform():
global domainFile
global currentDir
global pmrepPath
global platForm
platForm=platform.system()
print "Platform recognized : "+platForm
## if not os.getenv('INFA_HOME', 'C:\\Informatica\\9.5.1'):
## print "INFA_HOME env_variable not set in your "+platForm+" platform."
## print "Please set INFA_HOME and continue."
## raw_input()
## sys.exit(0)
if not os.getenv('INFA_DOMAINS_FILE'):
print "INFA_DOMAINS_FILE env_variable not set in your "+platForm+" platform."
print "Please set INFA_DOMAINS_FILE and continue."
raw_input()
sys.exit(0)
## elif not os.getenv('DOMAIN_PWD', 'vic'):
## print "DOMAIN_PWD env variable not set in your "+platForm+" platform."
## print "Please set DOMAIN_PWD and continue."
## raw_input()
## sys.exit(0)
## else:
## if platForm == 'Windows':
## pmrepPath=os.getenv('INFA_HOME').strip()+"\clients\PowerCenterClient\client\\bin"
## elif platForm == 'Linux':
## pmrepPath=os.getenv('INFA_HOME').strip()+"/server/bin"
## currentDir=os.getcwd()
## domainFile=os.getenv('INFA_DOMAINS_FILE','C:\\Informatica\\9.5.1\\domains.infa').strip()
config = configparser.RawConfigParser()
config.optionxform = lambda option: option
config.read('InfaRepo_ListUnusedObjects.cfg')
infaDir = config.get('Common', 'infaDir').strip()
Repository = config.get('Common', 'Repository').strip()
Domain = config.get('Common', 'Domain').strip()
Host = config.get('Common', 'Host').strip()
Port = config.get('Common', 'Port').strip()
Folder = config.get('Common', 'Folder').strip()
User = config.get('Common', 'User').strip()
UserSecurityDomain = config.get('Common', 'UserSecurityDomain').strip()
objectTypeList = config.get('Common', 'objectTypeList').split(',')
if Domain != '':
print 'Domain provided, will be used to connect.'
else:
print 'Domain not provided, Host and Port will be used to connect.'
for i in range(len(objectTypeList)):
objectTypeList[i]=objectTypeList[i].strip()
currentDir=os.getcwd()
outputDir=currentDir+'\\UnusedObjectsReport'
###objectTypeList = ['mapplet', 'mapping', 'session', 'source', 'target', 'worklet']
##
##objectTypeList = ['target']
pmrepPath=infaDir.strip()+"\clients\PowerCenterClient\client\\bin"
os.chdir(pmrepPath)
outFile = outputDir + "\ListOfUnusedObjects.txt"
if not os.path.exists(os.path.dirname(outFile)):
os.makedirs(os.path.dirname(outFile))
print 'Output file: ' + outFile
open(outFile,'w').writelines("Domain : "+Domain+"\nRepository : "+Repository+"\nUserName : "+User+"\n")
open(outFile,'a').writelines("***************************"+"\n")
open(outFile,'a').writelines("LIST OF UNUSED OBJECTS:\n")
outBatchFile = outputDir + "\DeleteUnusedObjects.bat"
tempDir = outputDir + "\\temp"
if not os.path.exists(outputDir):
os.makedirs(outputDir)
if not os.path.exists(tempDir):
os.makedirs(tempDir)
for tempFile in os.listdir(tempDir):
os.remove(os.path.join(tempDir, tempFile))
print 'Output batch file: ' + outBatchFile
if Domain != '':
RepoCommand="pmrep connect -r "+Repository+" -d "+Domain+" -n "+User
else:
RepoCommand="pmrep connect -r "+Repository+" -n "+User + " -h " + Host + " -o " + Port
if UserSecurityDomain != '':
RepoCommand += " -s " + UserSecurityDomain
open(outBatchFile,'w').writelines(pmrepPath+"\\"+RepoCommand+"\n")
objectTypeCounter=0
return_code=connect_to_repo(Repository,Domain,User,Host,Port,UserSecurityDomain)
objDepDict={}
error = False
errorList = []
#check if repository connection is successfull
if return_code==0:
for objectType in objectTypeList:
objectTypeCounter+=1
print "Step {0} of {1}: {2}".format(objectTypeCounter, len(objectTypeList), objectType)
objectFile = tempDir + "\\" + objectType + ".txt"
open(objectFile,'w').writelines("")
objectDepFile = tempDir + "\\" + objectType + "_dep.txt"
open(objectDepFile,'w').writelines("")
execute_pmrep_command("pmrep listobjects -f " + Folder + " -o " + objectType, objectFile, 8, -4, '')
objectList=open(objectFile).readlines()
objectCounter=0
if len(objectList) == 0:
print '\tNo {0}s found'.format(objectType)
elif objectList[0][:3] == ' [[':
error=True
for line in objectList:
errorList += [line.replace('\n','')]
break
for line in objectList:
objectCounter+=1
fields=line.split(' ')
if len(fields) == 2:
objectType=fields[0]
objectName=fields[1][:-1]
else:
objectType=fields[0]
objectName=fields[2][:-1]
#if the object is non-reusable, it obviously is in some workflow, so skipp it
if fields[1] == 'non-reusable':
print "\t{0} {1} of {2}: {3} is not a reusable {4} - skipping".format(objectType, objectCounter, len(objectList), objectName, objectType)
continue
command = "pmrep listobjectdependencies -f " + Folder + " -n " + objectName + " -o " + objectType + " -p parents"
#print "Getting object dependencies for " + objectType + " " + objectName
print "\t{0} {1} of {2}: {3}".format(objectType, objectCounter, len(objectList), objectName)
execute_pmrep_command(command, objectDepFile, 8, -6, objectName)
#find unused objects
for fileLine in open(objectDepFile,'r').readlines():
line = fileLine.split(' ')
if len(line) == 3:
Name = line[0]
ParentType = line[1]
ParentName = line[2]
else:
Name = line[0]
ParentType = line[1]
ParentName = line[3]
try:
objDepDict[objectType + ': ' + Name]+=[ParentType + ' ' + ParentName]
except:
objDepDict[objectType + ': ' + Name]=[ParentType + ' ' + ParentName]
found = False
for objectKey in objDepDict.iterkeys():
objectType, objName = objectKey.replace(' ','').split(':')
if len(objDepDict[objectKey]) <= 1:
if not found:
print '\n'
print 'Following unused objects have been found:'
found = True
print '\t{0}: {1}'.format(objectType, objName)
open(outFile,'a').writelines('{0}: {1}\n'.format(objectType, objName))
open(outBatchFile,'a').writelines("rem {0}\\pmrep deleteobject -f {1} -o {2} -n {3}\n".format(pmrepPath, Folder, objectType, objName))
execute_pmrep_command('pmrep cleanup', 'log.txt', 0, 0, '')
open(outBatchFile,'a').writelines(pmrepPath+'\\pmrep cleanup' + '\n')
if not error:
if not found:
print 'No unused objects found.'
print '\nDone.'
print 'Output file: ' + outFile
print 'Output batch file: ' + outBatchFile
else:
print 'Following errors occured:'
for e in errorList:
print '\t', e
#wait for key press
print '\nHit Enter to quit...'
raw_input()
#End of program