Copying selected lines from files in different dir

2019-06-11 12:10发布

I have a directory with many subdirectories, containing files. I want to open the files ending with "root.vrpj" or "root.vprj", in "App_integrations" folder and copy the lines containing the word "table" to another file.

Until now I've managed to visit each file with this code:

for root, dirs, files in os.walk(movedir):
for filename in files:
    if filename.endswith(("root.vrpj", "root.vprj")):

The problem is that what I have now are just the names of the files I want to visit and I'm stuck here.

4条回答
成全新的幸福
2楼-- · 2019-06-11 12:35

I finally solved it

    import os

rootdir = my root folder

# creates a file f that contains all the lines of the files 
# with "root.vrpj" or "root.vprj" in their name
# and who are inside "App_integrations" folders
# without duplicates

#creating the big file with all the file containing the lines I need
f = open('final_file.txt', 'a')
for root, dirs, files in os.walk(rootdir):  
    for filename in files:
        if (filename.endswith(("root.vrpj", "root.vprj")) and ("App_Integration" in os.path.join(root, filename))):
            full_name = os.path.join(root, filename) 
            data = open(full_name).read()
            f.write(data + "\n")                 
f.close()

#copying the lines I need to f1 without duplicates
lines_seen = set()
f = open('final_file.txt')
f1 = open('testread1.txt', 'a')
doIHaveToCopyTheLine=False
for line in f.readlines():
    if (("Table" in line) and (line not in lines_seen)):
        doIHaveToCopyTheLine=True
        if doIHaveToCopyTheLine:
            f1.write(line)
            lines_seen.add(line)
f1.close()
f.close()
查看更多
神经病院院长
3楼-- · 2019-06-11 12:39

Find the files

from pathlib import Path
import itertools

source_dir = Path(<source_dir>)

patterns = ['**/*root.vrpj', '**/*root.vprj']

files = itertools.chain.from_iterables(source_dir.glob(pat) for pat in patterns)) 

Filter the files:

def filter_lines(files):
    for file in files:
        if not 'App_Integration' in file.parts:
            continue
        with file.open('r') as file_handle:
            for line in file_handle:
                if 'table' in line:
                    yield line

Write the output

def save_lines(lines, output_file=sys.std_out):
    for line in lines:
        output_file.write(line)

with Path(<output_file>).open('w') as output_file:
    save_lines(filter_lines(files), as output_file)
查看更多
▲ chillily
4楼-- · 2019-06-11 12:52

You can try this:

f = open('final_file.txt', 'w')
for root, dirs, files in os.walk(movedir):
   for filename in files:
      if filename.endswith("root.vrpj") or  filename.endswith("root.vprj"):
         with open(filename) as data:
            for line in data:
               if "table" in data:
                   f.write('{}\n'.format(data))
f.close()
查看更多
倾城 Initia
5楼-- · 2019-06-11 12:52

This is a version of Ajax' code that closes the files you open in the loop (and fixes a couple of other minor issues):

with open('final_file.txt', 'w') as f:
    for root, dirs, files in os.walk(movedir):
        for filename in files:
            if filename.endswith(("root.vrpj"), ("root.vprj")):
                with open(os.path.join(root, filename)) as finput:
                     for line in finput:
                         if 'table' in line:
                             f.write(line)

however, when you see 8 levels of indentation you need to refactor, e.g.:

def find_files(startdir, *extensions):
    for root, dirs, files in os.walk(movedir):
        for filename in files:
            if filename.endswith(extensions):
                yield os.path.join(root, filename)

def find_lines(fname, text):
    with open(fname) as fp:
         return [line for line in fp if text in line]

with open('final_file.txt', 'w') as f:
    for fname in find_files(movedir, 'root.vrpj', 'root.vprj'):
        f.writelines(find_lines(fname, 'table'))
查看更多
登录 后发表回答