clean-filenames.sh

Printer-friendly versionPrinter-friendly version
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/bin/bash
 
#
# FileName cleaner - v0.1
# Replaces all special chars from filenames, allowing only use of:
# letters, numbers, underscore, dash, dot.
# Converts also UTF-8 characters to their "similar" ASCII-128 char.
# 
# (c)2009 Samuele ~redShadow~ Santi - <redshadow@hackzine.org> - Under GPL
#
# TODO find a way to remove in-name extra dots.
#       We don't just need to leave the last dot, since there are files
#       such as .tar.gz and .tar.bz2 with "double-extension"
#
# TODO make it work recursively on dirs (..a bit dangerous..)
# 
# ATTENTION:  Do NOT use on complex paths (eg. containing the '/' symbol) since
#             behaviour could be different from expected (in a future version,
#             it will be supported, though)
#
 
#
# Leave only allowed chars, removing UTF-8 chars etc.
#
function allowedchars(){
  #sed "s/[^A-Za-z0-9_\\.\\-]\{1,\}/_/g"
  # WE NEED ALL LETTERS LISTING TO AVOID PROBLEMS WITH ACCENTED LETTERS
  # (included in A-Za-z set when using utf-8)
 
  UC_ST="ÀÁÂÃÄÅĂĄÇĆČĎĐàáâãäåăąçćčďđÈÉÊËĘĚĞÌÍÎÏİĹĽŁèéêëęěğìíîïıĺľłÑŃŇÒÓÔÕÖØŐŔŘŚŞŠñńňòóôõöøőŕřśşšŢŤÙÚÛŲÜŮŰÝŹŻŽţťùúûųüůűýÿźżž"
  UC_RP="AAAAAAAACCCDDaaaaaaaacccddEEEEEEGIIIIILLLeeeeeegiiiiilllNNNOOOOOOORRSSSnnnooooooorrssTTUUUUUUUUYZZZttuuuuuuuyyzzz"
 
  sed "y/àèìòùÀÈÌÒÙ/aeiouAEIOU/" |  # Replace accented
  sed "y/$UC_ST/$UC_RP/" |          # Replace UTF-8
  biletters |
  sed "s/[^ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\\._0-9\\-]\\+/_/g" # replace strange chars
}
 
#
# Convert biletter chars to their transliterations
#
function biletters(){
  sed "s/Æ/AE/" |
  sed "s/Œ/CE/" |
  sed "s/æ/ae/" |
  sed "s/œ/ce/" |
  sed "s/ß/ss/"
}
 
#
# Remove double-dots from filename
#
function nodoubledots(){
  # no double dot           # no trailing dot
  sed "s/[\\.]\{2,\}/./g" | sed "s/\\.*\$//"
  #cat
}
 
#
# Convert to lower-case the file extension
# TODO write this
#
function lowerext(){
  #FN="`cat`"
  #FBN="$( echo $FN | sed "s/\(\.*\)\.\([^\.]*\)$/\1/" )"
  #FEX="$( echo $FN | sed "s/\.*\.\([^\.]*\)\$/\1/" | tr "[:upper:]" "[:lower:]" )"
  #echo -e "NAME=$FBN\nEXT=$FEX"
  cat
}
 
function die_subdir(){
  echo -e "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
  echo -e " \033[1;31mATTENTION\033[0m:  Do NOT use on complex paths (eg. containing the '/' symbol) since"
  echo "             behaviour could be different from expected one."
  echo "             It will probably supported in a future version, though"
  echo -e "\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
  exit 1
}
 
#
# Main script loop..
#
if [ "$1" == "" ]; then
  echo "Usage: $0 <files...>"
  echo "Clean filenames of specified files/directories"
else
  for OLDNAME in "$@"; do
    echo "$OLDNAME" | grep -v / &>/dev/null || die_subdir
    echo -en " * \e[1;33m\"$OLDNAME\"\e[0m ... "
    NEWNAME="` echo "$OLDNAME" | allowedchars | nodoubledots | lowerext `"
    if [ "$NEWNAME" != "$OLDNAME" ]; then
      echo -e "New name: \e[1;31m\"$NEWNAME\"\e[0m"
      if [ ! -e "$NEWNAME" ]; then
        mv "$OLDNAME" "$NEWNAME"
      else
        echo -e "   >> \e[1;31mATTENTION!\e[0m A file with the same name already exists (not overwriting)"
      fi
    else
      echo -e "\e[1;32mFilename is OK\e[0m"
    fi
  done
fi

1 comment

 
Anonymous wrote 13 weeks 2 days ago

Re

Thanks for the perfect idea just about this good post! If people want to buy essay
or essay writing opt for very good essay writing service. This is the very good way to a success!

Who Am I?

~redShadow~ A.K.A. Samuele Santi is an Italian Open Source developer, currently working as a freelance developer, mainly in the web applications sector. Favourite programming languages: PHP and, of course, Python!

caos (1) 3d (3) contact manager (1) 2v (1) archive (1) Drupal Forms (1) cars (1) arduino (1) apt (1) como lake rovers (1) aoe (1) address book (2) code (3) camera mia (1) dmcrypt (1) doku (1) apache (1) circuits (1) documentation (2) cartoons (1) aircrack (1) algorythms (1) cocktails (1) debug (1) bash (11) blender (3) awstats (3) database (3) alcool (1) e-mail (2) backup (3) C++ (2) development (11) Drupal (21) citroen (1) debian (1) blogroll (7) curl (1) audio (1) cryptography (1)