1 #!/usr/bin/env python3 2 3 # Generates a list of file names sorted based on a date contained in them. 4 # Files in root folder must have a line like the following: 5 # <meta name="article-date" content="23-Oct-2022"> 6 # also the folders in the root folder must have a file named the same as 7 # the folder but with an html extension. 8 9 # For example: 10 # root_folder 11 # |- first_folder 12 # | `-- first_folder.html 13 # `-- second_folder 14 # `-- second_folder.html 15 16 # The program executed with the root_folder path as argument should print to 17 # stdout the names of first_folder and second_folder sorted based on the 18 # contents of the first_folder.html and second_folder.html 19 20 # usage: 21 # ./sort_blog_index.py <root_folder> 22 23 # if no root_folder path is provided then the current path is assumed 24 # author: github.com/mjkloeckner 25 26 import os 27 import re 28 import time 29 import datetime 30 import sys 31 32 date_delimiter = "-" 33 # regex = re.compile('(?<=<meta name="article-date" content=")(.*?)(?=")') 34 35 # r_metadata_begin = re.compile('(?<=%%)(.*)(?=%%)') 36 r_metadata_tag = re.compile('^%%') 37 r1 = re.compile('(?<=% date: \")(.*?)(?=\")') 38 39 # compatible with deprecated metadata format 40 r2 = re.compile('(?<=^date: \")(.*?)(?=\")') 41 42 suffix = '.md' 43 paths = [] 44 45 if len(sys.argv) == 1: 46 print("==> No PATH provided, assuming current folder") 47 root_folder = os.path.abspath(os.getcwd()) 48 print(root_folder) 49 else: 50 root_folder = sys.argv[1] 51 52 def get_content_old(file_name): 53 with open(file_name) as f: 54 for line in f: 55 metadata = r_metadata.search(line) 56 print(metadata) 57 result = r1.search(metadata) 58 if result is not None: 59 return time.mktime(datetime.datetime.strptime(result.group(0), "%d-%b-%Y").timetuple()) 60 else: 61 result = r2.search(metadata) 62 if result is not None: 63 return time.mktime(datetime.datetime.strptime(result.group(0), "%d-%b-%Y").timetuple()) 64 else: 65 result = r2.search(line) 66 if result is not None: 67 return time.mktime(datetime.datetime.strptime(result.group(0), "%d-%b-%Y").timetuple()) 68 else: 69 print('error: no metadata found on file {}'.format(file_name)) 70 quit() 71 72 73 def get_content(file_name): 74 get = False 75 with open(file_name) as f: 76 for line in f: 77 if get == True: 78 result = r1.search(line) 79 if result is not None: 80 return time.mktime(datetime.datetime.strptime(result.group(0), "%d-%b-%Y").timetuple()) 81 else: 82 result = r2.search(line) 83 if result is not None: 84 return time.mktime(datetime.datetime.strptime(result.group(0), "%d-%b-%Y").timetuple()) 85 86 if r_metadata_tag.search(line) is not None: 87 get = False 88 quit() 89 90 if r_metadata_tag.search(line) is not None: 91 get = True 92 93 if get == True: 94 print('error: metadata corrupted or incorrect format on file {}'.format(file_name)) 95 quit() 96 97 98 folders = os.listdir(root_folder) 99 for folder in folders: 100 paths.append(os.path.join(root_folder, folder + "/" + folder + suffix)) 101 102 paths = sorted(paths, key=get_content, reverse=True) 103 104 for path in paths: 105 print(path)