Use shared regex configs

This commit is contained in:
dbr
2008-11-08 18:21:56 +10:30
parent 9eea05a709
commit c167891821
3 changed files with 29 additions and 125 deletions

View File

@@ -29,17 +29,9 @@ config['target_path'] = "/Volumes/ionDrive/video/tv/%(file_showname)s/season %(s
##############################################
# Regex configs
# Character class for valid episode/show names.
# Example: [a-zA-Z0-9\-'\ ]
regex_config['valid_in_names'] = "[\w\(\).,\[\]'\ \-?!]"
# Import shared filename pattern config
from filename_config import tv_regex
config['name_parse'] = [
re.compile("^(%(valid_in_names)s+) - \[(\d{2})x(\d{2})\] - (%(valid_in_names)s+)$" % (regex_config)),
re.compile("^(%(valid_in_names)s+) - \[(\d{2})x(\d{2}-\d{2})\] - (%(valid_in_names)s+)$" % (regex_config)),
re.compile("^(%(valid_in_names)s+) - \[(\d{2})x(Special\d{1,2})\] - (%(valid_in_names)s+)$" % (regex_config)),
re.compile("^(%(valid_in_names)s+) - \[(\d{2})xExtra(\d{1,2})\] - (%(valid_in_names)s+)$" % (regex_config)),
re.compile("^(%(valid_in_names)s+) - \[(\d{2})] - (%(valid_in_names)s+)$" % (regex_config)),
]
# end configs
##############################################
@@ -176,7 +168,7 @@ def findFiles(args):
def processNames(names, verbose=False):
"""
Takes list of names, runs them though the config['name_parse'] regexs
Takes list of names, runs them though the tv_regex['with_ep_name'] regexs
"""
allEps = []
for f in names:
@@ -186,7 +178,7 @@ def processNames(names, verbose=False):
# Remove leading . from extension
ext = ext.replace(".", "", 1)
for r in config['name_parse']:
for r in tv_regex['with_ep_name']:
match = r.match(filename)
if match:
showname, seasno, epno, epname = match.groups()

View File

@@ -2,6 +2,20 @@
#encoding:utf-8
import os,re,sys
###################################
# Configs
###################################
# Import shared filename pattern config
from filename_config import film_regex
# Location to process
loc = "." # Runs from the current path
###################################
# Helper functions
###################################
def colour(text,colour="red"):
nocolour=False
if nocolour: # Colour no supported, return plain text
@@ -29,45 +43,6 @@ def getError(invalid,errorno):
return ret
#end searchError
###################################
# Name regexs
###################################
# Valid filenames, with episode name
# Should return 2 groups:
# Film name.
# Year.
#
# Ignore filetype extension
#
# The Film [2004]
r_with_year = [
re.compile("([-\w\d()\[\] ]+) \[(\d{4})\]$"),
]
###################################
# Valid filenames, but missing year
#
# The Film
r_missing_year = [
re.compile("([-\w\d ]+)$"),
]
# Valid path names
r_valid_path = [
re.compile("/.$"),
]
###################################
# Regex to match valid, but not-to-be-processed files (dot-files, folder.jpg artwork)
###################################
decrappify = [
re.compile("(?=^[.]{1}.*)"),
re.compile("folder.jpg"),
]
# Location to process
loc = "." # Runs from the current path
###################################
# Find all valid files
###################################
@@ -83,7 +58,7 @@ files = [x for x in allfiles if os.path.isfile(x)] # only get files, not folders
# Strip out dotfiles/folder.jpg
for current_file in allfiles:
current_file_path,current_file_name = os.path.split(current_file)
for cur_decrap in decrappify:
for cur_decrap in film_regex['decrappify']:
if cur_decrap.match(current_file_name):
files.remove(current_file)
#end for file
@@ -112,7 +87,7 @@ for cur in files:
cpath,cfile = os.path.split(cur)
cfile,cext = os.path.splitext(cfile)
for cur_checker in r_valid_path:
for cur_checker in film_regex['valid_path']:
# Check if path is valid
check = cur_checker.findall(cpath)
if check:
@@ -122,7 +97,7 @@ for cur in files:
'cext':cext})
#end for cur_checker
for cur_checker in r_with_year:
for cur_checker in film_regex['with_year']:
# Check if filename is valid (with ep name)
check = cur_checker.findall(cfile)
if check:
@@ -132,7 +107,7 @@ for cur in files:
break # Found valid episode, skip to the next one
#end if
else:
for cur_checker in r_missing_year:
for cur_checker in film_regex['missing_year']:
# Check for valid name with missing episode name
check = cur_checker.findall(cfile)
if check:

View File

@@ -34,6 +34,9 @@ def getError(invalid,errorno):
# Configs
###################################
# Import shared filename pattern config
from filename_config import tv_regex
# Error-code to error-description mapping
errors = {
1:'malformed name',
@@ -41,75 +44,9 @@ errors = {
3:'path is incorrect'
}
# Regex configs
regex_config={}
# Character class for valid episode/show names.
# Example: [a-zA-Z0-9\-'\ ]
regex_config['valid_in_names'] = "[\w\(\).,\[\]'\ \-?!#]"
# Location to process
loc = "." # Runs from the current path
###################################
# Name regexs
###################################
# Valid filenames, with episode name
# Should return 4 groups:
# Series name.
# Season number.
# Episode number.
# Episode name.
# Ignore filetype extension.
#
# If there are 3 groups, they are treated as:
# Series name, epiosde number, episode name. Season number is defaulted to "1"
#
# Show name - [01x01-02] - The Episode Name (Part 1)
# Show name - [01x23] - The Episode Name (Part 1)
# Show name - [01x23] - The Episode Name
# Show name - [01xExtra01] - DVD Extra Feature 1
# Show name - [01xSpecial01] - Special Episode 1
# Show name - [01] - First episode
r_with_ep_name = [
re.compile("^(%(valid_in_names)s+) - \[(\d{2})x(\d{2})\] - (%(valid_in_names)s+)$" % (regex_config)),
re.compile("^(%(valid_in_names)s+) - \[(\d{2})x(\d{2}-\d{2})\] - (%(valid_in_names)s+)$" % (regex_config)),
re.compile("^(%(valid_in_names)s+) - \[(\d{2})x(Special\d{1,2})\] - (%(valid_in_names)s+)$" % (regex_config)),
re.compile("^(%(valid_in_names)s+) - \[(\d{2})xExtra(\d{1,2})\] - (%(valid_in_names)s+)$" % (regex_config)),
re.compile("^(%(valid_in_names)s+) - \[(\d{2})] - (%(valid_in_names)s+)$" % (regex_config)),
]
###################################
# Valid filenames, but missing episode name
#
# Show name - [04x01]
# Show name - [04x01-02]
# Show name - [04xSpecial01]
# Show name - [04xExtra01]
r_missing_ep_name = [
re.compile("(%(valid_in_names)s+) - \[(\d{2})x(\d{2})\]" % (regex_config)),
re.compile("(%(valid_in_names)s+) - \[(\d{2})x(\d{2}-\d{2})\]"% (regex_config)),
re.compile("(%(valid_in_names)s+) - \[(\d{2})x(Special\d{1,2})\]" % (regex_config)),
re.compile("(%(valid_in_names)s+) - \[(\d{2})x(Extra\d{1,2})\]" % (regex_config)),
re.compile("(%(valid_in_names)s+) - \[(\d{2})x(Extra\d{1,2})\]" % (regex_config))
]
# Valid path names
r_valid_path = [
re.compile("/./(.+?)/season (\d{1,2})$"),
re.compile("/./(.+?)/season (\d{1,2})/extras$"),
re.compile(".+?/Misc")
]
###################################
# Regex to match valid, but not-to-be-processed files (dot-files, folder.jpg artwork)
###################################
decrappify = [
re.compile("^Icon.{1}$"),
re.compile("(?=^[.]{1}.*)"),
re.compile("^folder.jpg$"),
]
###################################
# Output-helper to convert array of
@@ -285,7 +222,7 @@ files = [x for x in allfiles if os.path.isfile(x)] # only get files, not folders
# Strip out dotfiles/folder.jpg
for current_file in allfiles:
current_file_path,current_file_name = os.path.split(current_file)
for cur_decrap in decrappify:
for cur_decrap in tv_regex['decrappify']:
if cur_decrap.match(current_file_name):
files.remove(current_file)
#end for current_file
@@ -308,7 +245,7 @@ for cur in files:
cpath,cfile = os.path.split(cur)
cfile,cext = os.path.splitext(cfile)
for cur_checker in r_valid_path:
for cur_checker in tv_regex['valid_path']:
# Check if path is valid
check = cur_checker.findall(cpath)
if check:
@@ -319,7 +256,7 @@ for cur in files:
'cext':cext})
#end for cur_checker
for cur_checker in r_with_ep_name:
for cur_checker in tv_regex['with_ep_name']:
# Check if filename is valid (with ep name)
check = cur_checker.findall(cfile)
if check:
@@ -329,7 +266,7 @@ for cur in files:
break # Found valid episode, skip to the next one
#end if
else:
for cur_checker in r_missing_ep_name:
for cur_checker in tv_regex['missing_ep_name']:
# Check for valid name with missing episode name
check = cur_checker.findall(cfile)
if check: