Edit: As per your new request, I have added the "total_words" column. The code has been updated.
Below is a code that works. Just change the "folderpath" variable to the path of the folder with the text files, and change the "target_file" variable to where you want the output csv file to be created.
Sample csv output:
Code:
from collections import Counter
import glob
import os
import re
header = ['annual', 'investment', 'statement', 'range' , 'deposit' , 'supercalifragilisticexpialidocious']
folderpath = r'C:UsersUSERname4Desktopmyfolder'
target_file = r'C:UsersUSERname4Desktopmycsv.csv'
queueWAP = []
def writeAndPrint(fileObject,toBeWAP,opCode=0):
global queueWAP
if (opCode == 0):
fileObject.write(toBeWAP)
print(toBeWAP)
if (opCode == 1):
queueWAP.append(toBeWAP)
if (opCode == 2):
for temp4 in range(len(queueWAP)):
fileObject.write(queueWAP[temp4])
print(queueWAP[temp4])
queueWAP = []
mycsvfile = open(target_file, 'w')
writeAndPrint(mycsvfile,"file_name,total_words")
for temp1 in header:
writeAndPrint(mycsvfile,","+temp1)
writeAndPrint(mycsvfile,"
")
filepaths = glob.glob(folderpath + r"*.txt")
for file in filepaths:
with open(file) as f:
writeAndPrint(mycsvfile,file.split("\")[-1])
counter = Counter()
words = re.findall(r'w+', f.read().lower())
counter = counter + Counter(words)
for temp2 in header:
temp3 = False
temp5 = 0
for myword in counter.items():
temp5 = temp5 + 1
if myword[0] == temp2:
writeAndPrint(mycsvfile,","+str(myword[1]),1)
temp3 = True
if temp3 == False:
writeAndPrint(mycsvfile,","+"0",1)
writeAndPrint(mycsvfile,","+str(temp5))
writeAndPrint(mycsvfile,"",2)
writeAndPrint(mycsvfile,"
")
mycsvfile.close()
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…