kloeckner.com.ar

a backup of my entire webpage
Index Commits Files Refs README LICENSE
scripts/sort_blog_index.py (3444B)
   1 #!/usr/bin/env python3
   2 
   3 # Generates a list of file names sorted based on a date contained in them.
   4 # Files in root folder must have a line like the following:
   5 #     <meta name="article-date" content="23-Oct-2022">
   6 # also the folders in the root folder must have a file named the same as
   7 # the folder but with an html extension.
   8 
   9 # For example:
  10 # root_folder
  11 # |- first_folder
  12 # |     `-- first_folder.html
  13 # `-- second_folder
  14 #       `-- second_folder.html
  15 
  16 # The program executed with the root_folder path as argument should print to
  17 # stdout the names of first_folder and second_folder sorted based on the
  18 # contents of the first_folder.html and second_folder.html
  19 
  20 # usage:
  21 #   ./sort_blog_index.py <root_folder>
  22 
  23 # if no root_folder path is provided then the current path is assumed
  24 # author: github.com/mjkloeckner
  25 
  26 import os
  27 import re
  28 import time
  29 import datetime
  30 import sys
  31 
  32 date_delimiter = "-"
  33 # regex = re.compile('(?<=<meta name="article-date" content=")(.*?)(?=")')
  34 
  35 # r_metadata_begin = re.compile('(?<=%%)(.*)(?=%%)')
  36 r_metadata_tag = re.compile('^%%')
  37 r1 = re.compile('(?<=% date: \")(.*?)(?=\")')
  38 
  39 # compatible with deprecated metadata format
  40 r2 = re.compile('(?<=^date: \")(.*?)(?=\")')
  41 
  42 suffix = '.md'
  43 paths = []
  44 
  45 if len(sys.argv) == 1:
  46     print("==> No PATH provided, assuming current folder")
  47     root_folder = os.path.abspath(os.getcwd())
  48     print(root_folder)
  49 else:
  50     root_folder = sys.argv[1]
  51 
  52 def get_content_old(file_name):
  53     with open(file_name) as f:
  54         for line in f:
  55             metadata = r_metadata.search(line)
  56             print(metadata)
  57             result = r1.search(metadata)
  58             if result is not None:
  59                 return time.mktime(datetime.datetime.strptime(result.group(0), "%d-%b-%Y").timetuple())
  60             else:
  61                 result = r2.search(metadata)
  62                 if result is not None:
  63                     return time.mktime(datetime.datetime.strptime(result.group(0), "%d-%b-%Y").timetuple())
  64                 else:
  65                     result = r2.search(line)
  66                     if result is not None:
  67                         return time.mktime(datetime.datetime.strptime(result.group(0), "%d-%b-%Y").timetuple())
  68                     else:
  69                         print('error: no metadata found on file {}'.format(file_name))
  70                         quit()
  71 
  72 
  73 def get_content(file_name):
  74     get = False
  75     with open(file_name) as f:
  76         for line in f:
  77             if get == True:
  78                 result = r1.search(line)
  79                 if result is not None:
  80                     return time.mktime(datetime.datetime.strptime(result.group(0), "%d-%b-%Y").timetuple())
  81                 else:
  82                     result = r2.search(line)
  83                     if result is not None:
  84                         return time.mktime(datetime.datetime.strptime(result.group(0), "%d-%b-%Y").timetuple())
  85 
  86                 if r_metadata_tag.search(line) is not None:
  87                     get = False
  88                     quit()
  89 
  90             if r_metadata_tag.search(line) is not None:
  91                 get = True
  92 
  93         if get == True:
  94             print('error: metadata corrupted or incorrect format on file {}'.format(file_name))
  95             quit()
  96 
  97 
  98 folders = os.listdir(root_folder)
  99 for folder in folders:
 100     paths.append(os.path.join(root_folder, folder + "/" + folder + suffix))
 101 
 102 paths = sorted(paths, key=get_content, reverse=True)
 103 
 104 for path in paths:
 105     print(path)