if __name__ == '__main__':
website_path = GetValidDirectory("Enter the path to the website : ")
websitelink_list = [] #List of tuples containing the source link information
print(websitelink_list)
htmlfiles = [] #List of the html files we analyze on the site
for dirname, subdir_list, file_list in os.walk(website_path):
for file in file_list:
#Check the file extension. Only analyze files that are .html
filename, fileext = os.path.splitext(file)
if fileext == ".html":
relative_file = MakeFullPath(dirname, file)
full_file = os.path.realpath(relative_file)
htmlfiles.append( (full_file, relative_file) )
filelinks = FileLinkTuples(dirname, file)
print(filelinks)
websitelink_list.extend(filelinks)
#Files that do not have a link to them.
functions:
def GetValidDirectory(prompt : str) -> str:
""" Returns a valid directory entered from the user.
Arguments :
prompt : str - The text displayed to the user asking them to enter a directory.
Returns : String of a valid directory that the user entered.
Errors : Displays a warning if the user does not enter a valid directory.
"""
while True:
try:
str_prompt = input(prompt)
prompt_dir = os.path.realpath(str_prompt)
ch_dir = os.chdir(str_prompt)
except IOError:
print("Please enter a valid directory")
continue
return prompt_dir
def MakeFullPath(path : str, filename : str) -> str:
""" combines path and filename for a full filename. If path does not end in \\ then it appends it
Arguments :
path : str, the path to the file
filename : str, the name of the file.
Returns :
path concatenated with filename.
If path does not end with a backslash, then append one if path has data in it.
"""
no_data = []
if os.listdir(path) != no_data:
relative_file = os.path.join(path,filename)
return relative_file
def FileLinkTuples(path : str, filename : str) -> list:
"""Returns a tuple of links from the file
Arguments :
path : str - The path to the filename. Might be relative path.
filename : str - The name of the html file to open and analyze.
Returns : list of tuples.
Each tuple has the 4 values.
HTMLFile - HTML file and path to the source file the link was found in.
fulllinkpath - Full path to the link on the system. Not a relative path.
linkpath - Path and name to the link specified. Relative address.
file exists - Boolean indicating if the file at the full link path exists.
Example
[ (('sample\\index.html', 'C:\\Website Analysis\\downloads.html', 'downloads.html', True)
]
"""
filelink_list = []
a = path.split('\\')
b = os.path.join(a[-1],filename)
c = os.path.realpath(filename)
if os.path.isfile(filename) == "True":
filelink_list.append((b,c,filename,"True"))
return filelink_list
Can someone please analyze my codes and tell me why the typles of (full_file,relative_file) and filelinks are not appending to the empty lists in the main program?
For this assignment, i was given a Unittest. The functions passed the test, but i don't know why the lists in the main program are not updating?
When i print the websitelink_list and htmlfiles, they are both empty.
What am i doing wrong with the def FileLinkTuples? particularly the
if os.path.isfile(filename) == "True":
filelink_list.append((b,c,filename,"True"))
Thanks
As far as I can tell, your code does produce nonempty results for htmlfiles at least. The main problem that I see is this:
if os.path.isfile(filename) == "True":
The function returns a Boolean, not a string, so you should check against the Boolean value True, or even omit it altogether:
if os.path.isfile(filename):
After fixing that, you should expect something in websitelink_list as well, provided your directory does in fact contain *.html files.
Now, if you are not seeing anything in htmlfiles at all (even if the directory does contain *.html files) then it must be something else that you're not showing here, since it appears to work correctly on my computer.
Related
I need to do list of assets that are inside a list of directories. So, the step-by-step is this:
The user selects a Parent Directory which contains all the other
directories with assets and then makes a list with these directories;
Then, I have to access the first directory of the list and make
another list with the assets inside the directory.
The problem is that when I run the code for the first time without the loop, it made the list of directories perfectly. But when I try to run with the loop the Scripting Listener says:
"Error occurred in anonymous codeblock; filename:
E:\MUVA\MaxScript\teste.ms; position: 371; line: 9
-- No "map" function for "\*""
And then I have to restart the 3ds Max and delete the loop for the first step works again. Maybe there's something wrong with my logical that I'm not seeing.
dirPath = getSavePath caption: "SELECT THE PARENT DIRECTORY"
dirList = getDirectories dirPath + "\*"
print ("Directories: " + dirList)
for dir in dirList do(
assetsPath = dir + "\*.max"
assetsList = getFiles assetsPath
print ("List of assets: " + assetsList)
)
And yes, my logical was wrong. I update the code and get ot the result I want.
directoryParent = getSavePath caption: "SELECT THE PARENT DIRECTORY"
dirPath = directoryParent + "\*"
dirList = getDirectories dirPath
print dirList
for dir in dirList do(
assetsPath = dir + "\*.max"
assetsList = getFiles assetsPath
print assetsList
)
I am a newbie in PowerBi and currently working on a POC where I need to load data from a folder or directory. Before this load, I need to check if
1) the respective folder exists
2) the file under the folder is with.csv extension.
Ex. Let suppose we have a file '/MyDoc2004/myAction.csv'.
Here first we need to check if MyDoc2004 exists and then if myAction file is with.csv extension.
Is there any way we can do this using Power Query?
1. Check if the folder exists
You can apply Folder.Contents function with the absolute path of the folder, and handle the error returned when the folder does not exist with try ... otherwise ... syntax.
let
absoluteFolderPath = "C:/folder/that/may/not/exist",
folderContentsOrError = Folder.Contents(absoluteFolderPath),
alternativeResult = """" & absoluteFolderPath & """ is not a valid folder path",
result = try folderContentsOrError otherwise alternativeResult
in
result
2. Check if the file is with .csv extension
I'm not sure what output you are expecting.
Here is a way to get the content of the file by full path including ".csv", or return an alternative result if not found.
let
absoluteFilePath = "C:/the/path/myAction.csv",
fileContentsOrError = File.Contents(absoluteFilePath),
alternativeResult = """" & absoluteFilePath & """ is not a valid file path",
result = try fileContentsOrError otherwise alternativeResult
in
result
If this is not what you are looking for, please update the question with the expected output.
Hope it helps.
I'm trying to troncate several element.text on a xml file. I succeed to get two list, the first one regroup the formers too long element.text as str (long_name) and the second regroup the same after a troncation (short_name).
Now i want to replace the element.text on my xml, i tried some script but i surrended to work with the function readlines(), i want to find a similar solution with lxml as this code :
txt = open('IF_Generic.arxml','r')
Lines = txt.readlines()
txt.close()
txt = open('IF_Genericnew.arxml','w')
for e in range(len(long_name)) :
for i in range(len(Lines)) :
if (long_name[e] in Lines[i]) == True :
Lines[i] = Lines[i].replace(long_name[e],short_name[e])
for i in Lines :
txt.write(i)
txt.close()
I tried this, but it doesn't work :
f = open('IF_Generic.arxml')
arxml = f.read()
f.close()
tree = etree.parse(StringIO(arxml))
for e,b in enumerate(long_name) :
context = etree.iterparse(StringIO(arxml))
for a,i in context:
if not i.text:
pass
else:
if (b in i.text) == True :
i.text = short_name[e]
obj_arxml = etree.tostring(tree,pretty_print=True)
f = open('IF_Genericnew.arxml','w')
f.write(obj_arxml)
f.close()
Let's say the first element of the list long_name is RoutineServices_EngMGslLim_NVMID03
<BALISE_A>
<BALISE_B>
<SHORT-NAME>RoutineServices_EngMGslLim_NVMID03</SHORT-NAME>
</BALISE_B>
</BALISE_A>
<BALISE_C>
<POSSIBLE-ERROR-REF DEST="APPLICATION-ERROR">/Interfaces/RoutineServices_EngMGslLim_NVMID03/E_NOT_OK</POSSIBLE-ERROR-REF>
<SHORT-NAME>Blah_Bleh_Bluh</SHORT-NAME>
</BALISE_C>
The first element of the list short_name is RoutineServices_EngMGslLim_NV
<BALISE_A>
<BALISE_B>
<SHORT-NAME>RoutineServices_EngMGslLim_NV</SHORT-NAME>
</BALISE_B>
</BALISE_A>
<BALISE_C>
<POSSIBLE-ERROR-REF DEST="APPLICATION-ERROR">/Interfaces/RoutineServices_EngMGslLim_NV/E_NOT_OK</POSSIBLE-ERROR-REF>
<SHORT-NAME>Blah_Bleh_Bluh</SHORT-NAME>
</BALISE_C>
I want this
P.S: I use python 2.7.9
Thanks in advance everyone !
Don't open XML files like text files. I have explained in this answer why this is a bad idea.
Simply let etree read and write the file. It's also less code to write.
from lxml import etree
# read the file and load it into a DOM tree
tree = etree.parse('IF_Generic.arxml')
for elem in tree.iterfind("//*"):
# find elements that contain only text
if len(elem) == 0 and elem.text and elem.text.strip() > '':
# do your replacements ...
elem.text = "new text"
# serialize the DOM tree and write it to file
tree.write('IF_Genericnew.arxml', pretty_print=True)
Instead of going over all elements, which is what "//*" does, you can use more specific XPath to narrow down the elements you want to work on.
For example, something like "//SHORT-NAME | //POSSIBLE-ERROR-REF" would help to reduce the overall work load.
I have a text file with these lines.
1.inputlist
D:\Dolby_Harmanious_kit\DRY_run_kits\Dolby_Digital_Plus_Decoder_Imp\Test_Materials\Test_Signals\ITAF_Tests\seamless_switch\acmod21_I0D0CRC.ec3 -#tD:\Dolby_Harmanious_kit\DRY_run_kits\Dolby\m1_m28_switch.cfg
2.inputlist
D:\Dolby_Harmanious_kit\DRY_run_kits\Dolby_Digital_Plus_Decoder_Imp\Test_Materials\Test_Signals\ITAF_Tests\seamless_switch\acmod_2_252_ddp.ec3 -#tD:\Dolby_Harmanious_kit\DRY_run_kits\Digital\m1_m7_switch_every_3frames.cfg
Here i need to remove the path names like
"D:\Dolby_Harmanious_kit\DRY_run_kits\Dolby_Digital_Plus_Decoder_Imp\Test_Materials\Test_Signals\ITAF_Tests\seamless_switch\"
and "D:\Dolby_Harmanious_kit\DRY_run_kits\Digital\ "
.Note that all lines have a different path names.I have a example code to remove a path name.
Code:
import re
b = open ('Filter_Lines.txt','w')
with open('Lines.txt') as f:
for trim in f:
repl = (re.sub('D:.*\\\\','',trim).rstrip('\n'))
b.write(repl + '\n')
b.close()
But here this removes a whole text from "
D:\Dolby_Harmanious_kit\DRY_run_kits\Dolby_Digital_Plus_Decoder_Imp\Test_Materials\Test_Signals\ITAF_Tests\seamless_switch\acmod21_I0D0CRC.ec3 -#tD:\Dolby_Harmanious_kit\DRY_run_kits\Dolby\"
.I need to remove only path names not including "acmod21_I0D0CRC.ec3" in that line.
Can you please guide me for this.
I did upto what i understand your question,
here you specified path's are not similar i.e what i understood is,
your path might be
a) D://a/b/c/file_name.cfg -#tD://a/b/c/d/e/file_name.cfg
is it correct what i understood?
here 2 path present in single line, but common thing is its contains -#t,
so simply use split method to split that.
here what i did based i understand from your post,
import re
li = []
b = open ('file_sample.txt','w')
with open ('file_sam.txt') as f:
for i in open ('file_sam.txt','r'):
a = [re.sub('.*\\\\','',i).rstrip('\n') for i in i.split('D:')]
b.write(''.join(a) + '\n')
b.close()
here my inputs are,
'D:\Dolby_Harmanious_kit\DRY_run_kits\Dolby_Digital_Plus_Decoder_Imp\Test_Materials\Test_Signals\ITAF_Tests\seamless_switch\acmod21_I0D0CRC.ec3 -#tD:\Dolby_Harmanious_kit\DRY_run_kits\Dolby\m1_m28_switch.cfg'
'D:\Dolby_Harmanious_kit\DRY_run_kits\Dolby_Digital_Plus_Decoder_Imp\Test_Materials\Test_Signals\ITAF_Tests\seamless_switch\acmod_2_252_ddp.ec3 -#tD:\Dolby_Harmanious_kit\DRY_run_kits\Digital\m1_m7_switch_every_3frames.cfg'
it gives me,
'acmod21_I0D0CRC.ec3 -#tm1_m28_switch.cfg'
'acmod_2_252_ddp.ec3 -#tm1_m7_switch_every_3frames.cfg'
is this you want?
I've written some code in Python2.7.3 and at some point I want a certain variable produced inside a function to be extracted and used later in my program. I have assigned the global definition on the variable but still no luck. Here is the code:
def target_fileopen():
dirname = ' '
dlg = wx.FileDialog(None, "Select file", dirname, "", "*.txt", wx.OPEN)
if dlg.ShowModal() == wx.ID_OK:
filename = dlg.GetFilename()
dirname = dlg.GetDirectory()
coordfile = open(os.path.join(dirname, filename), 'r')
dlg.Destroy()
global path
path = str(dirname + "\\" + filename)
return target_calc(coordfile)
return(path)
print(path)
The error I get is that global name 'path' is not defined
This is just a sample, I don't really want to print the path in reality, just want it to place it as a static text in the GUI part of the application.
Any help would be really appreciated.
It's OK on my computer.
I can access the global variable which defined in the function.
Do you invoke the method before you access the variable?