The little things give you away... A collection of various small helper stuff
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 

81 lignes
1.9 KiB

  1. #!/bin/bash
  2. # Takes a wiki page in new-style viewer format on stdin.
  3. # Everything that looks like a social media link (including YouTube) is run through social-media-extract-profile-link.
  4. # Everything else is run through website-extract-social-media.
  5. # This is done recursively until no new links are discovered anymore.
  6. verbose=
  7. while [[ $# -gt 0 ]]
  8. do
  9. if [[ "$1" == '--verbose' || "$1" == '-v' ]]
  10. then
  11. verbose='--verbose'
  12. else
  13. echo "Unknown option: $1" >&2
  14. exit 1
  15. fi
  16. shift
  17. done
  18. function verbose_echo {
  19. if [[ "${verbose}" ]]
  20. then
  21. echo "$@"
  22. fi
  23. }
  24. function stderr_annotate {
  25. name="${1##*/}"
  26. "$@" 2> >(while read -r line; do echo "[${name}] ${line}"; done >&2)
  27. }
  28. scriptpath="$(cd "$(dirname "$0")"; pwd -P)"
  29. declare -A sectionUrls
  30. while read -r line
  31. do
  32. echo "${line}"
  33. if [[ "${line}" == '=='* ]]
  34. then
  35. verbose_echo "${line}" >&2
  36. unset sectionUrls
  37. declare -A sectionUrls
  38. fi
  39. if [[ "${line}" == '* http://'* || "${line}" == '* https://'* ]]
  40. then
  41. url="${line:2}"
  42. if [[ "${url}" == *' | '* ]]
  43. then
  44. url="${url%% | *}"
  45. fi
  46. sectionUrls["${url}"]=1
  47. toProcess=("${url}")
  48. while [[ ${#toProcess[@]} -gt 0 ]]
  49. do
  50. curUrl="${toProcess[0]}"
  51. toProcess=("${toProcess[@]:1}")
  52. if grep -Pq '//([^/]+\.)?(facebook\.com|flickr\.com|instagram\.com|twitter\.com|vk\.com|youtube\.com|youtu\.be)/' <<<"${curUrl}"
  53. then
  54. mapfile -t outUrls < <(stderr_annotate "${scriptpath}/social-media-extract-profile-link" ${verbose} "${curUrl}" < <(:))
  55. else
  56. mapfile -t outUrls < <(stderr_annotate "${scriptpath}/website-extract-social-media" ${verbose} "${curUrl}" < <(:))
  57. fi
  58. for outUrl in "${outUrls[@]}"
  59. do
  60. if [[ "${sectionUrls[${outUrl}]}" ]]
  61. then
  62. # The discovered URL was processed already, skip it entirely
  63. continue
  64. else
  65. # Not-yet-known URL, add to the list of URLs to process, mark as seen, and print
  66. toProcess+=("${outUrl}")
  67. sectionUrls["${outUrl}"]=1
  68. echo "* ${outUrl}"
  69. fi
  70. done
  71. done
  72. fi
  73. done