1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-10 14:20:09 +01:00

Merge branch 'yt-dlp:master' into master

This commit is contained in:
Spencer Baer 2024-02-24 12:12:12 -06:00 committed by GitHub
commit 1545be288e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
58 changed files with 1486 additions and 698 deletions

View File

@ -107,10 +107,10 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v4 - uses: actions/setup-python@v5
with: with:
python-version: "3.10" python-version: "3.10"
- uses: conda-incubator/setup-miniconda@v2 - uses: conda-incubator/setup-miniconda@v3
with: with:
miniforge-variant: Mambaforge miniforge-variant: Mambaforge
use-mamba: true use-mamba: true
@ -121,16 +121,14 @@ jobs:
- name: Install Requirements - name: Install Requirements
run: | run: |
sudo apt -y install zip pandoc man sed sudo apt -y install zip pandoc man sed
reqs=$(mktemp) cat > ./requirements.txt << EOF
cat > "$reqs" << EOF
python=3.10.* python=3.10.*
pyinstaller
cffi
brotli-python brotli-python
secretstorage
EOF EOF
sed -E '/^(brotli|secretstorage).*/d' requirements.txt >> "$reqs" python devscripts/install_deps.py --print \
mamba create -n build --file "$reqs" --exclude brotli --exclude brotlicffi \
--include secretstorage --include pyinstaller >> ./requirements.txt
mamba create -n build --file ./requirements.txt
- name: Prepare - name: Prepare
run: | run: |
@ -144,9 +142,9 @@ jobs:
run: | run: |
unset LD_LIBRARY_PATH # Harmful; set by setup-python unset LD_LIBRARY_PATH # Harmful; set by setup-python
conda activate build conda activate build
python pyinst.py --onedir python -m bundle.pyinstaller --onedir
(cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .) (cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .)
python pyinst.py python -m bundle.pyinstaller
mv ./dist/yt-dlp_linux ./yt-dlp_linux mv ./dist/yt-dlp_linux ./yt-dlp_linux
mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip
@ -164,13 +162,15 @@ jobs:
done done
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-bin-${{ github.job }}
path: | path: |
yt-dlp yt-dlp
yt-dlp.tar.gz yt-dlp.tar.gz
yt-dlp_linux yt-dlp_linux
yt-dlp_linux.zip yt-dlp_linux.zip
compression-level: 0
linux_arm: linux_arm:
needs: process needs: process
@ -201,17 +201,18 @@ jobs:
dockerRunArgs: --volume "${PWD}/repo:/repo" dockerRunArgs: --volume "${PWD}/repo:/repo"
install: | # Installing Python 3.10 from the Deadsnakes repo raises errors install: | # Installing Python 3.10 from the Deadsnakes repo raises errors
apt update apt update
apt -y install zlib1g-dev python3.8 python3.8-dev python3.8-distutils python3-pip apt -y install zlib1g-dev libffi-dev python3.8 python3.8-dev python3.8-distutils python3-pip
python3.8 -m pip install -U pip setuptools wheel python3.8 -m pip install -U pip setuptools wheel
# Cannot access requirements.txt from the repo directory at this stage # Cannot access any files from the repo directory at this stage
python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage cffi
run: | run: |
cd repo cd repo
python3.8 -m pip install -U Pyinstaller secretstorage -r requirements.txt # Cached version may be out of date python3.8 devscripts/install_deps.py -o --include build
python3.8 devscripts/install_deps.py --include pyinstaller --include secretstorage # Cached version may be out of date
python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
python3.8 devscripts/make_lazy_extractors.py python3.8 devscripts/make_lazy_extractors.py
python3.8 pyinst.py python3.8 -m bundle.pyinstaller
if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then
arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}" arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}"
@ -224,10 +225,12 @@ jobs:
fi fi
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-bin-linux_${{ matrix.architecture }}
path: | # run-on-arch-action designates armv7l as armv7 path: | # run-on-arch-action designates armv7l as armv7
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
compression-level: 0
macos: macos:
needs: process needs: process
@ -240,9 +243,10 @@ jobs:
- name: Install Requirements - name: Install Requirements
run: | run: |
brew install coreutils brew install coreutils
python3 -m pip install -U --user pip setuptools wheel python3 devscripts/install_deps.py --user -o --include build
python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt
# We need to ignore wheels otherwise we break universal2 builds # We need to ignore wheels otherwise we break universal2 builds
python3 -m pip install -U --user --no-binary :all: Pyinstaller -r requirements.txt python3 -m pip install -U --user --no-binary :all: -r requirements.txt
- name: Prepare - name: Prepare
run: | run: |
@ -250,9 +254,9 @@ jobs:
python3 devscripts/make_lazy_extractors.py python3 devscripts/make_lazy_extractors.py
- name: Build - name: Build
run: | run: |
python3 pyinst.py --target-architecture universal2 --onedir python3 -m bundle.pyinstaller --target-architecture universal2 --onedir
(cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .) (cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .)
python3 pyinst.py --target-architecture universal2 python3 -m bundle.pyinstaller --target-architecture universal2
- name: Verify --update-to - name: Verify --update-to
if: vars.UPDATE_TO_VERIFICATION if: vars.UPDATE_TO_VERIFICATION
@ -265,11 +269,13 @@ jobs:
[[ "$version" != "$downgraded_version" ]] [[ "$version" != "$downgraded_version" ]]
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_macos dist/yt-dlp_macos
dist/yt-dlp_macos.zip dist/yt-dlp_macos.zip
compression-level: 0
macos_legacy: macos_legacy:
needs: process needs: process
@ -293,8 +299,8 @@ jobs:
- name: Install Requirements - name: Install Requirements
run: | run: |
brew install coreutils brew install coreutils
python3 -m pip install -U --user pip setuptools wheel python3 devscripts/install_deps.py --user -o --include build
python3 -m pip install -U --user Pyinstaller -r requirements.txt python3 devscripts/install_deps.py --user --include pyinstaller
- name: Prepare - name: Prepare
run: | run: |
@ -302,7 +308,7 @@ jobs:
python3 devscripts/make_lazy_extractors.py python3 devscripts/make_lazy_extractors.py
- name: Build - name: Build
run: | run: |
python3 pyinst.py python3 -m bundle.pyinstaller
mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy
- name: Verify --update-to - name: Verify --update-to
@ -316,10 +322,12 @@ jobs:
[[ "$version" != "$downgraded_version" ]] [[ "$version" != "$downgraded_version" ]]
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_macos_legacy dist/yt-dlp_macos_legacy
compression-level: 0
windows: windows:
needs: process needs: process
@ -328,13 +336,14 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v4 - uses: actions/setup-python@v5
with: # 3.8 is used for Win7 support with: # 3.8 is used for Win7 support
python-version: "3.8" python-version: "3.8"
- name: Install Requirements - name: Install Requirements
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
python -m pip install -U pip setuptools wheel py2exe python devscripts/install_deps.py -o --include build
pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt python devscripts/install_deps.py --include py2exe
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -342,10 +351,10 @@ jobs:
python devscripts/make_lazy_extractors.py python devscripts/make_lazy_extractors.py
- name: Build - name: Build
run: | run: |
python setup.py py2exe python -m bundle.py2exe
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe
python pyinst.py python -m bundle.pyinstaller
python pyinst.py --onedir python -m bundle.pyinstaller --onedir
Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip
- name: Verify --update-to - name: Verify --update-to
@ -362,12 +371,14 @@ jobs:
} }
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp.exe dist/yt-dlp.exe
dist/yt-dlp_min.exe dist/yt-dlp_min.exe
dist/yt-dlp_win.zip dist/yt-dlp_win.zip
compression-level: 0
windows32: windows32:
needs: process needs: process
@ -376,14 +387,15 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v4 - uses: actions/setup-python@v5
with: with:
python-version: "3.8" python-version: "3.8"
architecture: "x86" architecture: "x86"
- name: Install Requirements - name: Install Requirements
run: | run: |
python -m pip install -U pip setuptools wheel python devscripts/install_deps.py -o --include build
pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt python devscripts/install_deps.py
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -391,7 +403,7 @@ jobs:
python devscripts/make_lazy_extractors.py python devscripts/make_lazy_extractors.py
- name: Build - name: Build
run: | run: |
python pyinst.py python -m bundle.pyinstaller
- name: Verify --update-to - name: Verify --update-to
if: vars.UPDATE_TO_VERIFICATION if: vars.UPDATE_TO_VERIFICATION
@ -407,10 +419,12 @@ jobs:
} }
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_x86.exe dist/yt-dlp_x86.exe
compression-level: 0
meta_files: meta_files:
if: inputs.meta_files && always() && !cancelled() if: inputs.meta_files && always() && !cancelled()
@ -424,7 +438,11 @@ jobs:
- windows32 - windows32
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/download-artifact@v3 - uses: actions/download-artifact@v4
with:
path: artifact
pattern: build-bin-*
merge-multiple: true
- name: Make SHA2-SUMS files - name: Make SHA2-SUMS files
run: | run: |
@ -459,8 +477,11 @@ jobs:
done done
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }}
path: | path: |
SHA*SUMS*
_update_spec _update_spec
SHA*SUMS*
compression-level: 0
overwrite: true

View File

@ -49,11 +49,11 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4 uses: actions/setup-python@v5
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Install test requirements - name: Install test requirements
run: pip install pytest -r requirements.txt run: python3 ./devscripts/install_deps.py --include dev
- name: Run tests - name: Run tests
continue-on-error: False continue-on-error: False
run: | run: |

View File

@ -11,11 +11,11 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v4 uses: actions/setup-python@v5
with: with:
python-version: 3.9 python-version: 3.9
- name: Install test requirements - name: Install test requirements
run: pip install pytest -r requirements.txt run: python3 ./devscripts/install_deps.py --include dev
- name: Run tests - name: Run tests
continue-on-error: true continue-on-error: true
run: python3 ./devscripts/run_tests.py download run: python3 ./devscripts/run_tests.py download
@ -38,11 +38,11 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4 uses: actions/setup-python@v5
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Install test requirements - name: Install test requirements
run: pip install pytest -r requirements.txt run: python3 ./devscripts/install_deps.py --include dev
- name: Run tests - name: Run tests
continue-on-error: true continue-on-error: true
run: python3 ./devscripts/run_tests.py download run: python3 ./devscripts/run_tests.py download

View File

@ -11,11 +11,11 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python 3.8 - name: Set up Python 3.8
uses: actions/setup-python@v4 uses: actions/setup-python@v5
with: with:
python-version: '3.8' python-version: '3.8'
- name: Install test requirements - name: Install test requirements
run: pip install pytest -r requirements.txt run: python3 ./devscripts/install_deps.py --include dev
- name: Run tests - name: Run tests
run: | run: |
python3 -m yt_dlp -v || true python3 -m yt_dlp -v || true
@ -26,10 +26,10 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v4 - uses: actions/setup-python@v5
- name: Install flake8 - name: Install flake8
run: pip install flake8 run: python3 ./devscripts/install_deps.py -o --include dev
- name: Make lazy extractors - name: Make lazy extractors
run: python devscripts/make_lazy_extractors.py run: python3 ./devscripts/make_lazy_extractors.py
- name: Run flake8 - name: Run flake8
run: flake8 . run: flake8 .

View File

@ -6,8 +6,10 @@ on:
paths: paths:
- "yt_dlp/**.py" - "yt_dlp/**.py"
- "!yt_dlp/version.py" - "!yt_dlp/version.py"
- "setup.py" - "bundle/*.py"
- "pyinst.py" - "pyproject.toml"
- "Makefile"
- ".github/workflows/build.yml"
concurrency: concurrency:
group: release-master group: release-master
permissions: permissions:

View File

@ -18,7 +18,14 @@ jobs:
- name: Check for new commits - name: Check for new commits
id: check_for_new_commits id: check_for_new_commits
run: | run: |
relevant_files=("yt_dlp/*.py" ':!yt_dlp/version.py' "setup.py" "pyinst.py") relevant_files=(
"yt_dlp/*.py"
':!yt_dlp/version.py'
"bundle/*.py"
"pyproject.toml"
"Makefile"
".github/workflows/build.yml"
)
echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT" echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT"
release: release:

View File

@ -71,7 +71,7 @@ jobs:
with: with:
fetch-depth: 0 fetch-depth: 0
- uses: actions/setup-python@v4 - uses: actions/setup-python@v5
with: with:
python-version: "3.10" python-version: "3.10"
@ -246,15 +246,16 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v4 with:
fetch-depth: 0
- uses: actions/setup-python@v5
with: with:
python-version: "3.10" python-version: "3.10"
- name: Install Requirements - name: Install Requirements
run: | run: |
sudo apt -y install pandoc man sudo apt -y install pandoc man
python -m pip install -U pip setuptools wheel twine python devscripts/install_deps.py -o --include build
python -m pip install -U -r requirements.txt
- name: Prepare - name: Prepare
env: env:
@ -266,14 +267,19 @@ jobs:
run: | run: |
python devscripts/update-version.py -c "${{ env.channel }}" -r "${{ env.target_repo }}" -s "${{ env.suffix }}" "${{ env.version }}" python devscripts/update-version.py -c "${{ env.channel }}" -r "${{ env.target_repo }}" -s "${{ env.suffix }}" "${{ env.version }}"
python devscripts/make_lazy_extractors.py python devscripts/make_lazy_extractors.py
sed -i -E "s/(name=')[^']+(', # package name)/\1${{ env.pypi_project }}\2/" setup.py sed -i -E '0,/(name = ")[^"]+(")/s//\1${{ env.pypi_project }}\2/' pyproject.toml
- name: Build - name: Build
run: | run: |
rm -rf dist/* rm -rf dist/*
make pypi-files make pypi-files
printf '%s\n\n' \
'Official repository: <https://github.com/yt-dlp/yt-dlp>' \
'**PS**: Some links in this document will not work since this is a copy of the README.md from Github' > ./README.md.new
cat ./README.md >> ./README.md.new && mv -f ./README.md.new ./README.md
python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update" python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
python setup.py sdist bdist_wheel make clean-cache
python -m build --no-isolation .
- name: Publish to PyPI - name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1 uses: pypa/gh-action-pypi-publish@release/v1
@ -290,8 +296,12 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
- uses: actions/download-artifact@v3 - uses: actions/download-artifact@v4
- uses: actions/setup-python@v4 with:
path: artifact
pattern: build-*
merge-multiple: true
- uses: actions/setup-python@v5
with: with:
python-version: "3.10" python-version: "3.10"

View File

@ -1,10 +0,0 @@
include AUTHORS
include Changelog.md
include LICENSE
include README.md
include completions/*/*
include supportedsites.md
include yt-dlp.1
include requirements.txt
recursive-include devscripts *
recursive-include test *

View File

@ -6,11 +6,11 @@ doc: README.md CONTRIBUTING.md issuetemplates supportedsites
ot: offlinetest ot: offlinetest
tar: yt-dlp.tar.gz tar: yt-dlp.tar.gz
# Keep this list in sync with MANIFEST.in # Keep this list in sync with pyproject.toml includes/artifacts
# intended use: when building a source distribution, # intended use: when building a source distribution,
# make pypi-files && python setup.py sdist # make pypi-files && python3 -m build -sn .
pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
completions yt-dlp.1 requirements.txt setup.cfg devscripts/* test/* completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/*
.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites .PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
@ -21,7 +21,7 @@ clean-test:
*.mp4 *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp *.mp4 *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
clean-dist: clean-dist:
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
clean-cache: clean-cache:
find . \( \ find . \( \
-type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ -type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \
@ -37,12 +37,15 @@ BINDIR ?= $(PREFIX)/bin
MANDIR ?= $(PREFIX)/man MANDIR ?= $(PREFIX)/man
SHAREDIR ?= $(PREFIX)/share SHAREDIR ?= $(PREFIX)/share
PYTHON ?= /usr/bin/env python3 PYTHON ?= /usr/bin/env python3
GNUTAR ?= tar
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local # set markdown input format to "markdown-smart" for pandoc version 2+ and to "markdown" for pandoc prior to version 2
SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) PANDOC_VERSION_CMD = pandoc -v 2>/dev/null | head -n1 | cut -d' ' -f2 | head -c1
PANDOC_VERSION != $(PANDOC_VERSION_CMD)
# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 PANDOC_VERSION ?= $(shell $(PANDOC_VERSION_CMD))
MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) MARKDOWN_CMD = if [ "$(PANDOC_VERSION)" = "1" -o "$(PANDOC_VERSION)" = "0" ]; then echo markdown; else echo markdown-smart; fi
MARKDOWN != $(MARKDOWN_CMD)
MARKDOWN ?= $(shell $(MARKDOWN_CMD))
install: lazy-extractors yt-dlp yt-dlp.1 completions install: lazy-extractors yt-dlp yt-dlp.1 completions
mkdir -p $(DESTDIR)$(BINDIR) mkdir -p $(DESTDIR)$(BINDIR)
@ -73,24 +76,28 @@ test:
offlinetest: codetest offlinetest: codetest
$(PYTHON) -m pytest -k "not download" $(PYTHON) -m pytest -k "not download"
# XXX: This is hard to maintain CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort
CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies yt_dlp/networking CODE_FOLDERS != $(CODE_FOLDERS_CMD)
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py CODE_FOLDERS ?= $(shell $(CODE_FOLDERS_CMD))
CODE_FILES_CMD = for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done
CODE_FILES != $(CODE_FILES_CMD)
CODE_FILES ?= $(shell $(CODE_FILES_CMD))
yt-dlp: $(CODE_FILES)
mkdir -p zip mkdir -p zip
for d in $(CODE_FOLDERS) ; do \ for d in $(CODE_FOLDERS) ; do \
mkdir -p zip/$$d ;\ mkdir -p zip/$$d ;\
cp -pPR $$d/*.py zip/$$d/ ;\ cp -pPR $$d/*.py zip/$$d/ ;\
done done
touch -t 200001010101 zip/yt_dlp/*.py zip/yt_dlp/*/*.py (cd zip && touch -t 200001010101 $(CODE_FILES))
mv zip/yt_dlp/__main__.py zip/ mv zip/yt_dlp/__main__.py zip/
cd zip ; zip -q ../yt-dlp yt_dlp/*.py yt_dlp/*/*.py __main__.py (cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py)
rm -rf zip rm -rf zip
echo '#!$(PYTHON)' > yt-dlp echo '#!$(PYTHON)' > yt-dlp
cat yt-dlp.zip >> yt-dlp cat yt-dlp.zip >> yt-dlp
rm yt-dlp.zip rm yt-dlp.zip
chmod a+x yt-dlp chmod a+x yt-dlp
README.md: yt_dlp/*.py yt_dlp/*/*.py devscripts/make_readme.py README.md: $(CODE_FILES) devscripts/make_readme.py
COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py
CONTRIBUTING.md: README.md devscripts/make_contributing.py CONTRIBUTING.md: README.md devscripts/make_contributing.py
@ -115,24 +122,26 @@ yt-dlp.1: README.md devscripts/prepare_manpage.py
pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1 pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1
rm -f yt-dlp.1.temp.md rm -f yt-dlp.1.temp.md
completions/bash/yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/bash-completion.in completions/bash/yt-dlp: $(CODE_FILES) devscripts/bash-completion.in
mkdir -p completions/bash mkdir -p completions/bash
$(PYTHON) devscripts/bash-completion.py $(PYTHON) devscripts/bash-completion.py
completions/zsh/_yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/zsh-completion.in completions/zsh/_yt-dlp: $(CODE_FILES) devscripts/zsh-completion.in
mkdir -p completions/zsh mkdir -p completions/zsh
$(PYTHON) devscripts/zsh-completion.py $(PYTHON) devscripts/zsh-completion.py
completions/fish/yt-dlp.fish: yt_dlp/*.py yt_dlp/*/*.py devscripts/fish-completion.in completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in
mkdir -p completions/fish mkdir -p completions/fish
$(PYTHON) devscripts/fish-completion.py $(PYTHON) devscripts/fish-completion.py
_EXTRACTOR_FILES = $(shell find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py') _EXTRACTOR_FILES_CMD = find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py'
_EXTRACTOR_FILES != $(_EXTRACTOR_FILES_CMD)
_EXTRACTOR_FILES ?= $(shell $(_EXTRACTOR_FILES_CMD))
yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
$(PYTHON) devscripts/make_lazy_extractors.py $@ $(PYTHON) devscripts/make_lazy_extractors.py $@
yt-dlp.tar.gz: all yt-dlp.tar.gz: all
@tar -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \ @$(GNUTAR) -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
--exclude '*.DS_Store' \ --exclude '*.DS_Store' \
--exclude '*.kate-swp' \ --exclude '*.kate-swp' \
--exclude '*.pyc' \ --exclude '*.pyc' \
@ -144,12 +153,8 @@ yt-dlp.tar.gz: all
-- \ -- \
README.md supportedsites.md Changelog.md LICENSE \ README.md supportedsites.md Changelog.md LICENSE \
CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \ CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \
Makefile MANIFEST.in yt-dlp.1 README.txt completions \ Makefile yt-dlp.1 README.txt completions .gitignore \
setup.py setup.cfg yt-dlp yt_dlp requirements.txt \ setup.cfg yt-dlp yt_dlp pyproject.toml devscripts test
devscripts test
AUTHORS: .mailmap AUTHORS:
git shortlog -s -n | cut -f2 | sort > AUTHORS git shortlog -s -n HEAD | cut -f2 | sort > AUTHORS
.mailmap:
git shortlog -s -e -n | awk '!(out[$$NF]++) { $$1="";sub(/^[ \t]+/,""); print}' > .mailmap

View File

@ -167,8 +167,8 @@ For ease of use, a few more compat options are available:
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx` * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress` * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
* `--compat-options 2023`: Same as `--compat-options prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options * `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
# INSTALLATION # INSTALLATION
@ -321,19 +321,21 @@ If you do not have the necessary dependencies for a task you are attempting, yt-
## COMPILE ## COMPILE
### Standalone PyInstaller Builds ### Standalone PyInstaller Builds
To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). Once you have all the necessary dependencies installed, simply run `pyinst.py`. The executable will be built for the same architecture (x86/ARM, 32/64 bit) as the Python used. To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). The executable will be built for the same architecture (x86/ARM, 32/64 bit) as the Python used. You can run the following commands:
python3 -m pip install -U pyinstaller -r requirements.txt ```
python3 devscripts/make_lazy_extractors.py python3 devscripts/install_deps.py --include pyinstaller
python3 pyinst.py python3 devscripts/make_lazy_extractors.py
python3 -m bundle.pyinstaller
```
On some systems, you may need to use `py` or `python` instead of `python3`. On some systems, you may need to use `py` or `python` instead of `python3`.
`pyinst.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate). `bundle/pyinstaller.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate).
**Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment. **Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment.
**Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly. **Important**: Running `pyinstaller` directly **without** using `bundle/pyinstaller.py` is **not** officially supported. This may or may not work correctly.
### Platform-independent Binary (UNIX) ### Platform-independent Binary (UNIX)
You will need the build tools `python` (3.8+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*. You will need the build tools `python` (3.8+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*.
@ -346,14 +348,17 @@ You can also run `make yt-dlp` instead to compile only the binary without updati
While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run. While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run.
If you wish to build it anyway, install Python and py2exe, and then simply run `setup.py py2exe` If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands:
py -m pip install -U py2exe -r requirements.txt ```
py devscripts/make_lazy_extractors.py py devscripts/install_deps.py --include py2exe
py setup.py py2exe py devscripts/make_lazy_extractors.py
py -m bundle.py2exe
```
### Related scripts ### Related scripts
* **`devscripts/install_deps.py`** - Install dependencies for yt-dlp.
* **`devscripts/update-version.py`** - Update the version number based on current date. * **`devscripts/update-version.py`** - Update the version number based on current date.
* **`devscripts/set-variant.py`** - Set the build variant of the executable. * **`devscripts/set-variant.py`** - Set the build variant of the executable.
* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file. * **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
@ -1306,7 +1311,8 @@ The available fields are:
- `display_id` (string): An alternative identifier for the video - `display_id` (string): An alternative identifier for the video
- `uploader` (string): Full name of the video uploader - `uploader` (string): Full name of the video uploader
- `license` (string): License name the video is licensed under - `license` (string): License name the video is licensed under
- `creator` (string): The creator of the video - `creators` (list): The creators of the video
- `creator` (string): The creators of the video; comma-separated
- `timestamp` (numeric): UNIX timestamp of the moment the video became available - `timestamp` (numeric): UNIX timestamp of the moment the video became available
- `upload_date` (string): Video upload date in UTC (YYYYMMDD) - `upload_date` (string): Video upload date in UTC (YYYYMMDD)
- `release_timestamp` (numeric): UNIX timestamp of the moment the video was released - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
@ -1380,11 +1386,16 @@ Available for the media that is a track or a part of a music album:
- `track` (string): Title of the track - `track` (string): Title of the track
- `track_number` (numeric): Number of the track within an album or a disc - `track_number` (numeric): Number of the track within an album or a disc
- `track_id` (string): Id of the track - `track_id` (string): Id of the track
- `artist` (string): Artist(s) of the track - `artists` (list): Artist(s) of the track
- `genre` (string): Genre(s) of the track - `artist` (string): Artist(s) of the track; comma-separated
- `genres` (list): Genre(s) of the track
- `genre` (string): Genre(s) of the track; comma-separated
- `composers` (list): Composer(s) of the piece
- `composer` (string): Composer(s) of the piece; comma-separated
- `album` (string): Title of the album the track belongs to - `album` (string): Title of the album the track belongs to
- `album_type` (string): Type of the album - `album_type` (string): Type of the album
- `album_artist` (string): List of all artists appeared on the album - `album_artists` (list): All artists appeared on the album
- `album_artist` (string): All artists appeared on the album; comma-separated
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters: Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
@ -1762,10 +1773,11 @@ Metadata fields | From
`description`, `synopsis` | `description` `description`, `synopsis` | `description`
`purl`, `comment` | `webpage_url` `purl`, `comment` | `webpage_url`
`track` | `track_number` `track` | `track_number`
`artist` | `artist`, `creator`, `uploader` or `uploader_id` `artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
`genre` | `genre` `composer` | `composer` or `composers`
`genre` | `genre` or `genres`
`album` | `album` `album` | `album`
`album_artist` | `album_artist` `album_artist` | `album_artist` or `album_artists`
`disc` | `disc_number` `disc` | `disc_number`
`show` | `series` `show` | `series`
`season_number` | `season_number` `season_number` | `season_number`

1
bundle/__init__.py Normal file
View File

@ -0,0 +1 @@
# Empty file

59
bundle/py2exe.py Executable file
View File

@ -0,0 +1,59 @@
#!/usr/bin/env python3
# Allow execution from anywhere
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import warnings
from py2exe import freeze
from devscripts.utils import read_version
VERSION = read_version()
def main():
warnings.warn(
'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
'It is recommended to run "pyinst.py" to build using pyinstaller instead')
return freeze(
console=[{
'script': './yt_dlp/__main__.py',
'dest_base': 'yt-dlp',
'icon_resources': [(1, 'devscripts/logo.ico')],
}],
version_info={
'version': VERSION,
'description': 'A youtube-dl fork with additional features and patches',
'comments': 'Official repository: <https://github.com/yt-dlp/yt-dlp>',
'product_name': 'yt-dlp',
'product_version': VERSION,
},
options={
'bundle_files': 0,
'compressed': 1,
'optimize': 2,
'dist_dir': './dist',
'excludes': [
# py2exe cannot import Crypto
'Crypto',
'Cryptodome',
# py2exe appears to confuse this with our socks library.
# We don't use pysocks and urllib3.contrib.socks would fail to import if tried.
'urllib3.contrib.socks'
],
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
# Modules that are only imported dynamically must be added here
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
},
zipfile=None,
)
if __name__ == '__main__':
main()

2
pyinst.py → bundle/pyinstaller.py Normal file → Executable file
View File

@ -4,7 +4,7 @@
import os import os
import sys import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import platform import platform

66
devscripts/install_deps.py Executable file
View File

@ -0,0 +1,66 @@
#!/usr/bin/env python3
# Allow execution from anywhere
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import argparse
import re
import subprocess
from devscripts.tomlparse import parse_toml
from devscripts.utils import read_file
def parse_args():
parser = argparse.ArgumentParser(description='Install dependencies for yt-dlp')
parser.add_argument(
'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)')
parser.add_argument(
'-e', '--exclude', metavar='REQUIREMENT', action='append', help='Exclude a required dependency')
parser.add_argument(
'-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group')
parser.add_argument(
'-o', '--only-optional', action='store_true', help='Only install optional dependencies')
parser.add_argument(
'-p', '--print', action='store_true', help='Only print a requirements.txt to stdout')
parser.add_argument(
'-u', '--user', action='store_true', help='Install with pip as --user')
return parser.parse_args()
def main():
args = parse_args()
toml_data = parse_toml(read_file(args.input))
deps = toml_data['project']['dependencies']
targets = deps.copy() if not args.only_optional else []
for exclude in args.exclude or []:
for dep in deps:
simplified_dep = re.match(r'[\w-]+', dep)[0]
if dep in targets and (exclude.lower() == simplified_dep.lower() or exclude == dep):
targets.remove(dep)
optional_deps = toml_data['project']['optional-dependencies']
for include in args.include or []:
group = optional_deps.get(include)
if group:
targets.extend(group)
if args.print:
for target in targets:
print(target)
return
pip_args = [sys.executable, '-m', 'pip', 'install', '-U']
if args.user:
pip_args.append('--user')
pip_args.extend(targets)
return subprocess.call(pip_args)
if __name__ == '__main__':
sys.exit(main())

189
devscripts/tomlparse.py Executable file
View File

@ -0,0 +1,189 @@
#!/usr/bin/env python3
"""
Simple parser for spec compliant toml files
A simple toml parser for files that comply with the spec.
Should only be used to parse `pyproject.toml` for `install_deps.py`.
IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
"""
from __future__ import annotations
import datetime
import json
import re
WS = r'(?:[\ \t]*)'
STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'')
SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+')
KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*')
EQUALS_RE = re.compile(rf'={WS}')
WS_RE = re.compile(WS)
_SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)'
EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE)
LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*')
LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+')
def parse_key(value: str):
for match in SINGLE_KEY_RE.finditer(value):
if match[0][0] == '"':
yield json.loads(match[0])
elif match[0][0] == '\'':
yield match[0][1:-1]
else:
yield match[0]
def get_target(root: dict, paths: list[str], is_list=False):
target = root
for index, key in enumerate(paths, 1):
use_list = is_list and index == len(paths)
result = target.get(key)
if result is None:
result = [] if use_list else {}
target[key] = result
if isinstance(result, dict):
target = result
elif use_list:
target = {}
result.append(target)
else:
target = result[-1]
assert isinstance(target, dict)
return target
def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern):
index += 1
if match := ws_re.match(data, index):
index = match.end()
while data[index] != end:
index = yield True, index
if match := ws_re.match(data, index):
index = match.end()
if data[index] == ',':
index += 1
if match := ws_re.match(data, index):
index = match.end()
assert data[index] == end
yield False, index + 1
def parse_value(data: str, index: int):
if data[index] == '[':
result = []
indices = parse_enclosed(data, index, ']', LIST_WS_RE)
valid, index = next(indices)
while valid:
index, value = parse_value(data, index)
result.append(value)
valid, index = indices.send(index)
return index, result
if data[index] == '{':
result = {}
indices = parse_enclosed(data, index, '}', WS_RE)
valid, index = next(indices)
while valid:
valid, index = indices.send(parse_kv_pair(data, index, result))
return index, result
if match := STRING_RE.match(data, index):
return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1]
match = LEFTOVER_VALUE_RE.match(data, index)
assert match
value = match[0].strip()
for func in [
int,
float,
datetime.time.fromisoformat,
datetime.date.fromisoformat,
datetime.datetime.fromisoformat,
{'true': True, 'false': False}.get,
]:
try:
value = func(value)
break
except Exception:
pass
return match.end(), value
def parse_kv_pair(data: str, index: int, target: dict):
match = KEY_RE.match(data, index)
if not match:
return None
*keys, key = parse_key(match[0])
match = EQUALS_RE.match(data, match.end())
assert match
index = match.end()
index, value = parse_value(data, index)
get_target(target, keys)[key] = value
return index
def parse_toml(data: str):
root = {}
target = root
index = 0
while True:
match = EXPRESSION_RE.search(data, index)
if not match:
break
if match.group('subtable'):
index = match.end()
path, is_list = match.group('path', 'is_list')
target = get_target(root, list(parse_key(path)), bool(is_list))
continue
index = parse_kv_pair(data, match.start(), target)
assert index is not None
return root
def main():
import argparse
from pathlib import Path
parser = argparse.ArgumentParser()
parser.add_argument('infile', type=Path, help='The TOML file to read as input')
args = parser.parse_args()
with args.infile.open('r', encoding='utf-8') as file:
data = file.read()
def default(obj):
if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)):
return obj.isoformat()
print(json.dumps(parse_toml(data), default=default))
if __name__ == '__main__':
main()

View File

@ -1,5 +1,118 @@
[build-system] [build-system]
build-backend = 'setuptools.build_meta' requires = ["hatchling"]
# https://github.com/yt-dlp/yt-dlp/issues/5941 build-backend = "hatchling.build"
# https://github.com/pypa/distutils/issues/17
requires = ['setuptools > 50'] [project]
name = "yt-dlp"
maintainers = [
{name = "pukkandan", email = "pukkandan.ytdlp@gmail.com"},
{name = "Grub4K", email = "contact@grub4k.xyz"},
{name = "bashonly", email = "bashonly@protonmail.com"},
]
description = "A youtube-dl fork with additional features and patches"
readme = "README.md"
requires-python = ">=3.8"
keywords = [
"youtube-dl",
"video-downloader",
"youtube-downloader",
"sponsorblock",
"youtube-dlc",
"yt-dlp",
]
license = {file = "LICENSE"}
classifiers = [
"Topic :: Multimedia :: Video",
"Development Status :: 5 - Production/Stable",
"Environment :: Console",
"Programming Language :: Python",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"License :: OSI Approved :: The Unlicense (Unlicense)",
"Operating System :: OS Independent",
]
dynamic = ["version"]
dependencies = [
"brotli; implementation_name=='cpython'",
"brotlicffi; implementation_name!='cpython'",
"certifi",
"mutagen",
"pycryptodomex",
"requests>=2.31.0,<3",
"urllib3>=1.26.17,<3",
"websockets>=12.0",
]
[project.optional-dependencies]
secretstorage = [
"cffi",
"secretstorage",
]
build = [
"build",
"hatchling",
"pip",
"wheel",
]
dev = [
"flake8",
"isort",
"pytest",
]
pyinstaller = ["pyinstaller>=6.3"]
py2exe = ["py2exe>=0.12"]
[project.urls]
Documentation = "https://github.com/yt-dlp/yt-dlp#readme"
Repository = "https://github.com/yt-dlp/yt-dlp"
Tracker = "https://github.com/yt-dlp/yt-dlp/issues"
Funding = "https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators"
[project.scripts]
yt-dlp = "yt_dlp:main"
[project.entry-points.pyinstaller40]
hook-dirs = "yt_dlp.__pyinstaller:get_hook_dirs"
[tool.hatch.build.targets.sdist]
include = [
"/yt_dlp",
"/devscripts",
"/test",
"/.gitignore", # included by default, needed for auto-excludes
"/Changelog.md",
"/LICENSE", # included as license
"/pyproject.toml", # included by default
"/README.md", # included as readme
"/setup.cfg",
"/supportedsites.md",
]
artifacts = [
"/yt_dlp/extractor/lazy_extractors.py",
"/completions",
"/AUTHORS", # included by default
"/README.txt",
"/yt-dlp.1",
]
[tool.hatch.build.targets.wheel]
packages = ["yt_dlp"]
artifacts = ["/yt_dlp/extractor/lazy_extractors.py"]
[tool.hatch.build.targets.wheel.shared-data]
"completions/bash/yt-dlp" = "share/bash-completion/completions/yt-dlp"
"completions/zsh/_yt-dlp" = "share/zsh/site-functions/_yt-dlp"
"completions/fish/yt-dlp.fish" = "share/fish/vendor_completions.d/yt-dlp.fish"
"README.txt" = "share/doc/yt_dlp/README.txt"
"yt-dlp.1" = "share/man/man1/yt-dlp.1"
[tool.hatch.version]
path = "yt_dlp/version.py"
pattern = "_pkg_version = '(?P<version>[^']+)'"

View File

@ -1,8 +0,0 @@
mutagen
pycryptodomex
brotli; implementation_name=='cpython'
brotlicffi; implementation_name!='cpython'
certifi
requests>=2.31.0,<3
urllib3>=1.26.17,<3
websockets>=12.0

View File

@ -1,7 +1,3 @@
[wheel]
universal = true
[flake8] [flake8]
exclude = build,venv,.tox,.git,.pytest_cache exclude = build,venv,.tox,.git,.pytest_cache
ignore = E402,E501,E731,E741,W503 ignore = E402,E501,E731,E741,W503

183
setup.py
View File

@ -1,183 +0,0 @@
#!/usr/bin/env python3
# Allow execution from anywhere
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import subprocess
import warnings
try:
from setuptools import Command, find_packages, setup
setuptools_available = True
except ImportError:
from distutils.core import Command, setup
setuptools_available = False
from devscripts.utils import read_file, read_version
VERSION = read_version(varname='_pkg_version')
DESCRIPTION = 'A youtube-dl fork with additional features and patches'
LONG_DESCRIPTION = '\n\n'.join((
'Official repository: <https://github.com/yt-dlp/yt-dlp>',
'**PS**: Some links in this document will not work since this is a copy of the README.md from Github',
read_file('README.md')))
REQUIREMENTS = read_file('requirements.txt').splitlines()
def packages():
if setuptools_available:
return find_packages(exclude=('youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts'))
return [
'yt_dlp', 'yt_dlp.extractor', 'yt_dlp.downloader', 'yt_dlp.postprocessor', 'yt_dlp.compat',
]
def py2exe_params():
warnings.warn(
'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
'It is recommended to run "pyinst.py" to build using pyinstaller instead')
return {
'console': [{
'script': './yt_dlp/__main__.py',
'dest_base': 'yt-dlp',
'icon_resources': [(1, 'devscripts/logo.ico')],
}],
'version_info': {
'version': VERSION,
'description': DESCRIPTION,
'comments': LONG_DESCRIPTION.split('\n')[0],
'product_name': 'yt-dlp',
'product_version': VERSION,
},
'options': {
'bundle_files': 0,
'compressed': 1,
'optimize': 2,
'dist_dir': './dist',
'excludes': [
# py2exe cannot import Crypto
'Crypto',
'Cryptodome',
# py2exe appears to confuse this with our socks library.
# We don't use pysocks and urllib3.contrib.socks would fail to import if tried.
'urllib3.contrib.socks'
],
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
# Modules that are only imported dynamically must be added here
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
},
'zipfile': None,
}
def build_params():
files_spec = [
('share/bash-completion/completions', ['completions/bash/yt-dlp']),
('share/zsh/site-functions', ['completions/zsh/_yt-dlp']),
('share/fish/vendor_completions.d', ['completions/fish/yt-dlp.fish']),
('share/doc/yt_dlp', ['README.txt']),
('share/man/man1', ['yt-dlp.1'])
]
data_files = []
for dirname, files in files_spec:
resfiles = []
for fn in files:
if not os.path.exists(fn):
warnings.warn(f'Skipping file {fn} since it is not present. Try running " make pypi-files " first')
else:
resfiles.append(fn)
data_files.append((dirname, resfiles))
params = {'data_files': data_files}
if setuptools_available:
params['entry_points'] = {
'console_scripts': ['yt-dlp = yt_dlp:main'],
'pyinstaller40': ['hook-dirs = yt_dlp.__pyinstaller:get_hook_dirs'],
}
else:
params['scripts'] = ['yt-dlp']
return params
class build_lazy_extractors(Command):
description = 'Build the extractor lazy loading module'
user_options = []
def initialize_options(self):
pass
def finalize_options(self):
pass
def run(self):
if self.dry_run:
print('Skipping build of lazy extractors in dry run mode')
return
subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py'])
def main():
if sys.argv[1:2] == ['py2exe']:
params = py2exe_params()
try:
from py2exe import freeze
except ImportError:
import py2exe # noqa: F401
warnings.warn('You are using an outdated version of py2exe. Support for this version will be removed in the future')
params['console'][0].update(params.pop('version_info'))
params['options'] = {'py2exe': params.pop('options')}
else:
return freeze(**params)
else:
params = build_params()
setup(
name='yt-dlp', # package name (do not change/remove comment)
version=VERSION,
maintainer='pukkandan',
maintainer_email='pukkandan.ytdlp@gmail.com',
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
long_description_content_type='text/markdown',
url='https://github.com/yt-dlp/yt-dlp',
packages=packages(),
install_requires=REQUIREMENTS,
python_requires='>=3.8',
project_urls={
'Documentation': 'https://github.com/yt-dlp/yt-dlp#readme',
'Source': 'https://github.com/yt-dlp/yt-dlp',
'Tracker': 'https://github.com/yt-dlp/yt-dlp/issues',
'Funding': 'https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators',
},
classifiers=[
'Topic :: Multimedia :: Video',
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'Programming Language :: Python',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: Implementation',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',
'License :: Public Domain',
'Operating System :: OS Independent',
],
cmdclass={'build_lazy_extractors': build_lazy_extractors},
**params
)
main()

View File

@ -223,6 +223,10 @@ def sanitize_got_info_dict(got_dict):
if test_info_dict.get('display_id') == test_info_dict.get('id'): if test_info_dict.get('display_id') == test_info_dict.get('id'):
test_info_dict.pop('display_id') test_info_dict.pop('display_id')
# Remove deprecated fields
for old in YoutubeDL._deprecated_multivalue_fields.keys():
test_info_dict.pop(old, None)
# release_year may be generated from release_date # release_year may be generated from release_date
if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])): if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
test_info_dict.pop('release_year') test_info_dict.pop('release_year')

View File

@ -941,7 +941,7 @@ class TestYoutubeDL(unittest.TestCase):
def get_videos(filter_=None): def get_videos(filter_=None):
ydl = YDL({'match_filter': filter_, 'simulate': True}) ydl = YDL({'match_filter': filter_, 'simulate': True})
for v in videos: for v in videos:
ydl.process_ie_result(v, download=True) ydl.process_ie_result(v.copy(), download=True)
return [v['id'] for v in ydl.downloaded_info_dicts] return [v['id'] for v in ydl.downloaded_info_dicts]
res = get_videos() res = get_videos()

View File

@ -13,6 +13,7 @@ import http.client
import http.cookiejar import http.cookiejar
import http.server import http.server
import io import io
import logging
import pathlib import pathlib
import random import random
import ssl import ssl
@ -752,6 +753,25 @@ class TestClientCertificate:
}) })
class TestRequestHandlerMisc:
"""Misc generic tests for request handlers, not related to request or validation testing"""
@pytest.mark.parametrize('handler,logger_name', [
('Requests', 'urllib3'),
('Websockets', 'websockets.client'),
('Websockets', 'websockets.server')
], indirect=['handler'])
def test_remove_logging_handler(self, handler, logger_name):
# Ensure any logging handlers, which may contain a YoutubeDL instance,
# are removed when we close the request handler
# See: https://github.com/yt-dlp/yt-dlp/issues/8922
logging_handlers = logging.getLogger(logger_name).handlers
before_count = len(logging_handlers)
rh = handler()
assert len(logging_handlers) == before_count + 1
rh.close()
assert len(logging_handlers) == before_count
class TestUrllibRequestHandler(TestRequestHandlerBase): class TestUrllibRequestHandler(TestRequestHandlerBase):
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True) @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
def test_file_urls(self, handler): def test_file_urls(self, handler):
@ -827,6 +847,7 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
assert not isinstance(exc_info.value, TransportError) assert not isinstance(exc_info.value, TransportError)
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
class TestRequestsRequestHandler(TestRequestHandlerBase): class TestRequestsRequestHandler(TestRequestHandlerBase):
@pytest.mark.parametrize('raised,expected', [ @pytest.mark.parametrize('raised,expected', [
(lambda: requests.exceptions.ConnectTimeout(), TransportError), (lambda: requests.exceptions.ConnectTimeout(), TransportError),
@ -843,7 +864,6 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
(lambda: requests.exceptions.RequestException(), RequestError) (lambda: requests.exceptions.RequestException(), RequestError)
# (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
]) ])
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
def test_request_error_mapping(self, handler, monkeypatch, raised, expected): def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
with handler() as rh: with handler() as rh:
def mock_get_instance(*args, **kwargs): def mock_get_instance(*args, **kwargs):
@ -877,7 +897,6 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
'3 bytes read, 5 more expected' '3 bytes read, 5 more expected'
), ),
]) ])
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match): def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
from requests.models import Response as RequestsResponse from requests.models import Response as RequestsResponse
from urllib3.response import HTTPResponse as Urllib3Response from urllib3.response import HTTPResponse as Urllib3Response
@ -896,6 +915,21 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
assert exc_info.type is expected assert exc_info.type is expected
def test_close(self, handler, monkeypatch):
rh = handler()
session = rh._get_instance(cookiejar=rh.cookiejar)
called = False
original_close = session.close
def mock_close(*args, **kwargs):
nonlocal called
called = True
return original_close(*args, **kwargs)
monkeypatch.setattr(session, 'close', mock_close)
rh.close()
assert called
def run_validation(handler, error, req, **handler_kwargs): def run_validation(handler, error, req, **handler_kwargs):
with handler(**handler_kwargs) as rh: with handler(**handler_kwargs) as rh:
@ -1205,6 +1239,19 @@ class TestRequestDirector:
assert director.send(Request('http://')).read() == b'' assert director.send(Request('http://')).read() == b''
assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported' assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
def test_close(self, monkeypatch):
director = RequestDirector(logger=FakeLogger())
director.add_handler(FakeRH(logger=FakeLogger()))
called = False
def mock_close(*args, **kwargs):
nonlocal called
called = True
monkeypatch.setattr(director.handlers[FakeRH.RH_KEY], 'close', mock_close)
director.close()
assert called
# XXX: do we want to move this to test_YoutubeDL.py? # XXX: do we want to move this to test_YoutubeDL.py?
class TestYoutubeDLNetworking: class TestYoutubeDLNetworking:

View File

@ -580,6 +580,13 @@ class YoutubeDL:
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
} }
_deprecated_multivalue_fields = {
'album_artist': 'album_artists',
'artist': 'artists',
'composer': 'composers',
'creator': 'creators',
'genre': 'genres',
}
_format_selection_exts = { _format_selection_exts = {
'audio': set(MEDIA_EXTENSIONS.common_audio), 'audio': set(MEDIA_EXTENSIONS.common_audio),
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )), 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
@ -2640,6 +2647,14 @@ class YoutubeDL:
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
for old_key, new_key in self._deprecated_multivalue_fields.items():
if new_key in info_dict and old_key in info_dict:
self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
elif old_value := info_dict.get(old_key):
info_dict[new_key] = old_value.split(', ')
elif new_value := info_dict.get(new_key):
info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
def _raise_pending_errors(self, info): def _raise_pending_errors(self, info):
err = info.pop('__pending_error', None) err = info.pop('__pending_error', None)
if err: if err:
@ -3483,7 +3498,8 @@ class YoutubeDL:
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
'Possible MPEG-TS in MP4 container or malformed AAC timestamps', 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
FFmpegFixupM3u8PP) FFmpegFixupM3u8PP)
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments', ffmpeg_fixup(downloader == 'dashsegments'
and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP) ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)

View File

@ -14,7 +14,7 @@ import os
import re import re
import traceback import traceback
from .compat import compat_shlex_quote from .compat import compat_os_name, compat_shlex_quote
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader.external import get_external_downloader from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes from .extractor import list_extractor_classes
@ -984,7 +984,28 @@ def _real_main(argv=None):
if pre_process: if pre_process:
return ydl._download_retcode return ydl._download_retcode
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) args = sys.argv[1:] if argv is None else argv
ydl.warn_if_short_id(args)
# Show a useful error message and wait for keypress if not launched from shell on Windows
if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False):
import ctypes.wintypes
import msvcrt
kernel32 = ctypes.WinDLL('Kernel32')
buffer = (1 * ctypes.wintypes.DWORD)()
attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
# If we only have a single process attached, then the executable was double clicked
# When using `pyinstaller` with `--onefile`, two processes get attached
is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
if attached_processes == 1 or is_onefile and attached_processes == 2:
print(parser._generate_error_message(
'Do not double-click the executable, instead call it from a command line.\n'
'Please read the README for further information on how to use yt-dlp: '
'https://github.com/yt-dlp/yt-dlp#readme'))
msvcrt.getch()
_exit(2)
parser.error( parser.error(
'You must provide at least one URL.\n' 'You must provide at least one URL.\n'
'Type yt-dlp --help to see a list of all options.') 'Type yt-dlp --help to see a list of all options.')

View File

@ -31,4 +31,4 @@ def get_hidden_imports():
hiddenimports = list(get_hidden_imports()) hiddenimports = list(get_hidden_imports())
print(f'Adding imports: {hiddenimports}') print(f'Adding imports: {hiddenimports}')
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts'] excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']

View File

@ -379,7 +379,6 @@ from .clubic import ClubicIE
from .clyp import ClypIE from .clyp import ClypIE
from .cmt import CMTIE from .cmt import CMTIE
from .cnbc import ( from .cnbc import (
CNBCIE,
CNBCVideoIE, CNBCVideoIE,
) )
from .cnn import ( from .cnn import (
@ -618,6 +617,7 @@ from .filmon import (
from .filmweb import FilmwebIE from .filmweb import FilmwebIE
from .firsttv import FirstTVIE from .firsttv import FirstTVIE
from .fivetv import FiveTVIE from .fivetv import FiveTVIE
from .flextv import FlexTVIE
from .flickr import FlickrIE from .flickr import FlickrIE
from .floatplane import ( from .floatplane import (
FloatplaneIE, FloatplaneIE,

View File

@ -78,14 +78,14 @@ class Ant1NewsGrArticleIE(AntennaBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron', 'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron',
'md5': '294f18331bb516539d72d85a82887dcc', 'md5': '57eb8d12181f0fa2b14b0b138e1de9b6',
'info_dict': { 'info_dict': {
'id': '_xvg/m_cmbatw=', 'id': '_xvg/m_cmbatw=',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411', 'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411',
'timestamp': 1603092840, 'timestamp': 1666166520,
'upload_date': '20201019', 'upload_date': '20221019',
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg', 'thumbnail': 'https://ant1media.azureedge.net/imgHandler/1920/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
}, },
}, { }, {
'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn', 'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn',
@ -117,7 +117,7 @@ class Ant1NewsGrEmbedIE(AntennaBaseIE):
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player' _BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)' _VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
_EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)'] _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
_API_PATH = '/news/templates/data/jsonPlayer' _API_PATH = '/templates/data/jsonPlayer'
_TESTS = [{ _TESTS = [{
'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377', 'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377',

View File

@ -1,68 +1,97 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import smuggle_url from ..utils import int_or_none, parse_iso8601, str_or_none, url_or_none
from ..utils.traversal import traverse_obj
class CNBCIE(InfoExtractor):
_VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://video.cnbc.com/gallery/?video=3000503714',
'info_dict': {
'id': '3000503714',
'ext': 'mp4',
'title': 'Fighting zombies is big business',
'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
'timestamp': 1459332000,
'upload_date': '20160330',
'uploader': 'NBCU-CNBC',
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Dead link',
}
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(
'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
{'force_smil_url': True}),
'id': video_id,
}
class CNBCVideoIE(InfoExtractor): class CNBCVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)' _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/?#]+/)+(?P<id>[^./?#&]+)\.html'
_TEST = {
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', _TESTS = [{
'url': 'https://www.cnbc.com/video/2023/12/07/mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand.html',
'info_dict': { 'info_dict': {
'id': '7000031301',
'ext': 'mp4', 'ext': 'mp4',
'title': "Trump: I don't necessarily agree with raising rates", 'id': '107344774',
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', 'display_id': 'mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand',
'timestamp': 1531958400, 'modified_timestamp': 1702053483,
'upload_date': '20180719', 'timestamp': 1701977810,
'uploader': 'NBCU-CNBC', 'channel': 'News Videos',
'upload_date': '20231207',
'description': 'md5:882c001d85cb43d7579b514307b3e78b',
'release_timestamp': 1701977375,
'modified_date': '20231208',
'release_date': '20231207',
'duration': 65,
'author': 'Sean Conlon',
'title': 'Here\'s a first look at McDonald\'s new spinoff brand, CosMc\'s',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107344192-1701894812493-CosMcsskyHero_2336x1040_hero-desktop.jpg?v=1701894855',
}, },
'params': { 'expected_warnings': ['Unable to download f4m manifest'],
'skip_download': True, }, {
'url': 'https://www.cnbc.com/video/2023/12/08/jim-cramer-shares-his-take-on-seattles-tech-scene.html',
'info_dict': {
'author': 'Jim Cramer',
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 299.0,
'ext': 'mp4',
'id': '107345451',
'display_id': 'jim-cramer-shares-his-take-on-seattles-tech-scene',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345481-1702079431MM-B-120823.jpg?v=1702079430',
'timestamp': 1702080139,
'title': 'Jim Cramer shares his take on Seattle\'s tech scene',
'release_date': '20231208',
'upload_date': '20231209',
'modified_timestamp': 1702080139,
'modified_date': '20231209',
'release_timestamp': 1702073551,
}, },
'skip': 'Dead link', 'expected_warnings': ['Unable to download f4m manifest'],
} }, {
'url': 'https://www.cnbc.com/video/2023/12/08/the-epicenter-of-ai-is-in-seattle-says-jim-cramer.html',
'info_dict': {
'author': 'Jim Cramer',
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 113.0,
'ext': 'mp4',
'id': '107345474',
'display_id': 'the-epicenter-of-ai-is-in-seattle-says-jim-cramer',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345486-Screenshot_2023-12-08_at_70339_PM.png?v=1702080248',
'timestamp': 1702080535,
'title': 'The epicenter of AI is in Seattle, says Jim Cramer',
'release_timestamp': 1702077347,
'modified_timestamp': 1702080535,
'release_date': '20231208',
'upload_date': '20231209',
'modified_date': '20231209',
},
'expected_warnings': ['Unable to download f4m manifest'],
}]
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = self._match_valid_url(url).groups() display_id = self._match_id(url)
video_id = self._download_json( webpage = self._download_webpage(url, display_id)
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={ data = self._search_json(r'window\.__s_data=', webpage, 'video data', display_id)
'query': '''{
page(path: "%s") { player_data = traverse_obj(data, (
vcpsId 'page', 'page', 'layout', ..., 'columns', ..., 'modules',
} lambda _, v: v['name'] == 'clipPlayer', 'data', {dict}), get_all=False)
}''' % path,
})['data']['page']['vcpsId'] return {
return self.url_result( 'id': display_id,
'http://video.cnbc.com/gallery/?video=%d' % video_id, 'display_id': display_id,
CNBCIE.ie_key()) 'formats': self._extract_akamai_formats(player_data['playbackURL'], display_id),
**self._search_json_ld(webpage, display_id, fatal=False),
**traverse_obj(player_data, {
'id': ('id', {str_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'author': ('author', ..., 'name', {str}),
'timestamp': ('datePublished', {parse_iso8601}),
'release_timestamp': ('uploadDate', {parse_iso8601}),
'modified_timestamp': ('dateLastPublished', {parse_iso8601}),
'thumbnail': ('thumbnail', {url_or_none}),
'duration': ('duration', {int_or_none}),
'channel': ('section', 'title', {str}),
}, get_all=False),
}

View File

@ -247,6 +247,8 @@ class InfoExtractor:
(For internal use only) (For internal use only)
* http_chunk_size Chunk size for HTTP downloads * http_chunk_size Chunk size for HTTP downloads
* ffmpeg_args Extra arguments for ffmpeg downloader * ffmpeg_args Extra arguments for ffmpeg downloader
* is_dash_periods Whether the format is a result of merging
multiple DASH periods.
RTMP formats can also have the additional fields: page_url, RTMP formats can also have the additional fields: page_url,
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn, app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
rtmp_protocol, rtmp_real_time rtmp_protocol, rtmp_real_time
@ -278,7 +280,7 @@ class InfoExtractor:
description: Full video description. description: Full video description.
uploader: Full name of the video uploader. uploader: Full name of the video uploader.
license: License name the video is licensed under. license: License name the video is licensed under.
creator: The creator of the video. creators: List of creators of the video.
timestamp: UNIX timestamp of the moment the video was uploaded timestamp: UNIX timestamp of the moment the video was uploaded
upload_date: Video upload date in UTC (YYYYMMDD). upload_date: Video upload date in UTC (YYYYMMDD).
If not explicitly set, calculated from timestamp If not explicitly set, calculated from timestamp
@ -422,16 +424,16 @@ class InfoExtractor:
track_number: Number of the track within an album or a disc, as an integer. track_number: Number of the track within an album or a disc, as an integer.
track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii), track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
as a unicode string. as a unicode string.
artist: Artist(s) of the track. artists: List of artists of the track.
genre: Genre(s) of the track. composers: List of composers of the piece.
genres: List of genres of the track.
album: Title of the album the track belongs to. album: Title of the album the track belongs to.
album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc). album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
album_artist: List of all artists appeared on the album (e.g. album_artists: List of all artists appeared on the album.
"Ash Borer / Fell Voices" or "Various Artists", useful for splits E.g. ["Ash Borer", "Fell Voices"] or ["Various Artists"].
and compilations). Useful for splits and compilations.
disc_number: Number of the disc or other physical medium the track belongs to, disc_number: Number of the disc or other physical medium the track belongs to,
as an integer. as an integer.
composer: Composer of the piece
The following fields should only be set for clips that should be cut from the original video: The following fields should only be set for clips that should be cut from the original video:
@ -442,6 +444,18 @@ class InfoExtractor:
rows: Number of rows in each storyboard fragment, as an integer rows: Number of rows in each storyboard fragment, as an integer
columns: Number of columns in each storyboard fragment, as an integer columns: Number of columns in each storyboard fragment, as an integer
The following fields are deprecated and should not be set by new code:
composer: Use "composers" instead.
Composer(s) of the piece, comma-separated.
artist: Use "artists" instead.
Artist(s) of the track, comma-separated.
genre: Use "genres" instead.
Genre(s) of the track, comma-separated.
album_artist: Use "album_artists" instead.
All artists appeared on the album, comma-separated.
creator: Use "creators" instead.
The creator of the video.
Unless mentioned otherwise, the fields should be Unicode strings. Unless mentioned otherwise, the fields should be Unicode strings.
Unless mentioned otherwise, None is equivalent to absence of information. Unless mentioned otherwise, None is equivalent to absence of information.
@ -2530,7 +2544,11 @@ class InfoExtractor:
self._report_ignoring_subs('DASH') self._report_ignoring_subs('DASH')
return fmts return fmts
def _extract_mpd_formats_and_subtitles( def _extract_mpd_formats_and_subtitles(self, *args, **kwargs):
periods = self._extract_mpd_periods(*args, **kwargs)
return self._merge_mpd_periods(periods)
def _extract_mpd_periods(
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
fatal=True, data=None, headers={}, query={}): fatal=True, data=None, headers={}, query={}):
@ -2543,17 +2561,16 @@ class InfoExtractor:
errnote='Failed to download MPD manifest' if errnote is None else errnote, errnote='Failed to download MPD manifest' if errnote is None else errnote,
fatal=fatal, data=data, headers=headers, query=query) fatal=fatal, data=data, headers=headers, query=query)
if res is False: if res is False:
return [], {} return []
mpd_doc, urlh = res mpd_doc, urlh = res
if mpd_doc is None: if mpd_doc is None:
return [], {} return []
# We could have been redirected to a new url when we retrieved our mpd file. # We could have been redirected to a new url when we retrieved our mpd file.
mpd_url = urlh.url mpd_url = urlh.url
mpd_base_url = base_url(mpd_url) mpd_base_url = base_url(mpd_url)
return self._parse_mpd_formats_and_subtitles( return self._parse_mpd_periods(mpd_doc, mpd_id, mpd_base_url, mpd_url)
mpd_doc, mpd_id, mpd_base_url, mpd_url)
def _parse_mpd_formats(self, *args, **kwargs): def _parse_mpd_formats(self, *args, **kwargs):
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs) fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
@ -2561,8 +2578,39 @@ class InfoExtractor:
self._report_ignoring_subs('DASH') self._report_ignoring_subs('DASH')
return fmts return fmts
def _parse_mpd_formats_and_subtitles( def _parse_mpd_formats_and_subtitles(self, *args, **kwargs):
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None): periods = self._parse_mpd_periods(*args, **kwargs)
return self._merge_mpd_periods(periods)
def _merge_mpd_periods(self, periods):
"""
Combine all formats and subtitles from an MPD manifest into a single list,
by concatenate streams with similar formats.
"""
formats, subtitles = {}, {}
for period in periods:
for f in period['formats']:
assert 'is_dash_periods' not in f, 'format already processed'
f['is_dash_periods'] = True
format_key = tuple(v for k, v in f.items() if k not in (
('format_id', 'fragments', 'manifest_stream_number')))
if format_key not in formats:
formats[format_key] = f
elif 'fragments' in f:
formats[format_key].setdefault('fragments', []).extend(f['fragments'])
if subtitles and period['subtitles']:
self.report_warning(bug_reports_message(
'Found subtitles in multiple periods in the DASH manifest; '
'if part of the subtitles are missing,'
), only_once=True)
for sub_lang, sub_info in period['subtitles'].items():
subtitles.setdefault(sub_lang, []).extend(sub_info)
return list(formats.values()), subtitles
def _parse_mpd_periods(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
""" """
Parse formats from MPD manifest. Parse formats from MPD manifest.
References: References:
@ -2641,9 +2689,13 @@ class InfoExtractor:
return ms_info return ms_info
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats, subtitles = [], {}
stream_numbers = collections.defaultdict(int) stream_numbers = collections.defaultdict(int)
for period in mpd_doc.findall(_add_ns('Period')): for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
period_entry = {
'id': period.get('id', f'period-{period_idx}'),
'formats': [],
'subtitles': collections.defaultdict(list),
}
period_duration = parse_duration(period.get('duration')) or mpd_duration period_duration = parse_duration(period.get('duration')) or mpd_duration
period_ms_info = extract_multisegment_info(period, { period_ms_info = extract_multisegment_info(period, {
'start_number': 1, 'start_number': 1,
@ -2893,11 +2945,10 @@ class InfoExtractor:
if content_type in ('video', 'audio', 'image/jpeg'): if content_type in ('video', 'audio', 'image/jpeg'):
f['manifest_stream_number'] = stream_numbers[f['url']] f['manifest_stream_number'] = stream_numbers[f['url']]
stream_numbers[f['url']] += 1 stream_numbers[f['url']] += 1
formats.append(f) period_entry['formats'].append(f)
elif content_type == 'text': elif content_type == 'text':
subtitles.setdefault(lang or 'und', []).append(f) period_entry['subtitles'][lang or 'und'].append(f)
yield period_entry
return formats, subtitles
def _extract_ism_formats(self, *args, **kwargs): def _extract_ism_formats(self, *args, **kwargs):
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs) fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)

View File

@ -33,10 +33,7 @@ class CrooksAndLiarsIE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://embed.crooksandliars.com/embed/%s' % video_id, video_id) 'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
manifest = self._parse_json( manifest = self._search_json(r'var\s+manifest\s*=', webpage, 'manifest JSON', video_id)
self._search_regex(
r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'),
video_id)
quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high')) quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))

View File

@ -9,7 +9,7 @@ from ..utils.traversal import traverse_obj
class ERRJupiterIE(InfoExtractor): class ERRJupiterIE(InfoExtractor):
_VALID_URL = r'https?://jupiter(?:pluss)?\.err\.ee/(?P<id>\d+)' _VALID_URL = r'https?://(?:jupiter(?:pluss)?|lasteekraan)\.err\.ee/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'note': 'Jupiter: Movie: siin-me-oleme', 'note': 'Jupiter: Movie: siin-me-oleme',
'url': 'https://jupiter.err.ee/1211107/siin-me-oleme', 'url': 'https://jupiter.err.ee/1211107/siin-me-oleme',
@ -145,6 +145,31 @@ class ERRJupiterIE(InfoExtractor):
'season_number': 0, 'season_number': 0,
'series': 'Лесные истории | Аисты', 'series': 'Лесные истории | Аисты',
'series_id': '1037497', 'series_id': '1037497',
}
}, {
'note': 'Lasteekraan: Pätu',
'url': 'https://lasteekraan.err.ee/1092243/patu',
'md5': 'a67eb9b9bcb3d201718c15d1638edf77',
'info_dict': {
'id': '1092243',
'ext': 'mp4',
'title': 'Pätu',
'alt_title': '',
'description': 'md5:64a7b5a80afd7042d3f8ec48c77befd9',
'release_date': '20230614',
'upload_date': '20200520',
'modified_date': '20200520',
'release_timestamp': 1686745800,
'timestamp': 1589975640,
'modified_timestamp': 1589975640,
'release_year': 1990,
'episode': 'Episode 1',
'episode_id': '1092243',
'episode_number': 1,
'season': 'Season 1',
'season_number': 1,
'series': 'Pätu',
'series_id': '1092236',
}, },
}] }]

View File

@ -500,6 +500,7 @@ class FacebookIE(InfoExtractor):
webpage, 'description', default=None) webpage, 'description', default=None)
uploader_data = ( uploader_data = (
get_first(media, ('owner', {dict})) get_first(media, ('owner', {dict}))
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name'])) or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict})) or get_first(post, ('node', 'actors', ..., {dict}))
or get_first(post, ('event', 'event_creator', {dict})) or {}) or get_first(post, ('event', 'event_creator', {dict})) or {})
@ -583,8 +584,8 @@ class FacebookIE(InfoExtractor):
def extract_relay_prefetched_data(_filter): def extract_relay_prefetched_data(_filter):
return traverse_obj(extract_relay_data(_filter), ( return traverse_obj(extract_relay_data(_filter), (
'require', (None, (..., ..., ..., '__bbox', 'require')), 'require', (None, (..., ..., ..., '__bbox', 'require')),
lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ..., lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
'__bbox', 'result', 'data', {dict}), get_all=False) or {} ..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
if not video_data: if not video_data:
server_js_data = self._parse_json(self._search_regex([ server_js_data = self._parse_json(self._search_regex([

View File

@ -0,0 +1,62 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
UserNotLive,
parse_iso8601,
str_or_none,
traverse_obj,
url_or_none,
)
class FlexTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?flextv\.co\.kr/channels/(?P<id>\d+)/live'
_TESTS = [{
'url': 'https://www.flextv.co.kr/channels/231638/live',
'info_dict': {
'id': '231638',
'ext': 'mp4',
'title': r're:^214하나만\.\.\. ',
'thumbnail': r're:^https?://.+\.jpg',
'upload_date': r're:\d{8}',
'timestamp': int,
'live_status': 'is_live',
'channel': 'Hi별',
'channel_id': '244396',
},
'skip': 'The channel is offline',
}, {
'url': 'https://www.flextv.co.kr/channels/746/live',
'only_matching': True,
}]
def _real_extract(self, url):
channel_id = self._match_id(url)
try:
stream_data = self._download_json(
f'https://api.flextv.co.kr/api/channels/{channel_id}/stream',
channel_id, query={'option': 'all'})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise UserNotLive(video_id=channel_id)
raise
playlist_url = stream_data['sources'][0]['url']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
playlist_url, channel_id, 'mp4')
return {
'id': channel_id,
'formats': formats,
'subtitles': subtitles,
'is_live': True,
**traverse_obj(stream_data, {
'title': ('stream', 'title', {str}),
'timestamp': ('stream', 'createdAt', {parse_iso8601}),
'thumbnail': ('thumbUrl', {url_or_none}),
'channel': ('owner', 'name', {str}),
'channel_id': ('owner', 'id', {str_or_none}),
}),
}

View File

@ -1,25 +1,29 @@
from .common import InfoExtractor from .common import InfoExtractor
from .nexx import NexxIE from .nexx import NexxIE
from ..utils import (
int_or_none,
str_or_none,
)
class FunkIE(InfoExtractor): class FunkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81', 'md5': '8610449476156f338761a75391b0017d',
'info_dict': { 'info_dict': {
'id': '1155821', 'id': '1155821',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2', 'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
'description': 'md5:a691d0413ef4835588c5b03ded670c1f', 'description': 'md5:2a03b67596eda0d1b5125c299f45e953',
'timestamp': 1514507395, 'timestamp': 1514507395,
'upload_date': '20171229', 'upload_date': '20171229',
'duration': 426.0,
'cast': ['United Creators PMB GmbH'],
'thumbnail': 'https://assets.nexx.cloud/media/75/56/79/3YKUSJN1LACN0CRxL.jpg',
'display_id': 'die-lustigsten-instrumente-aus-dem-internet-teil-2',
'alt_title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet Teil 2',
'season_number': 0,
'season': 'Season 0',
'episode_number': 0,
'episode': 'Episode 0',
}, },
}, { }, {
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
'only_matching': True, 'only_matching': True,
@ -27,18 +31,10 @@ class FunkIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id, nexx_id = self._match_valid_url(url).groups() display_id, nexx_id = self._match_valid_url(url).groups()
video = self._download_json(
'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id)
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'nexx:741:' + nexx_id, 'url': f'nexx:741:{nexx_id}',
'ie_key': NexxIE.ie_key(), 'ie_key': NexxIE.ie_key(),
'id': nexx_id, 'id': nexx_id,
'title': video.get('title'),
'description': video.get('description'),
'duration': int_or_none(video.get('duration')),
'channel_id': str_or_none(video.get('channelId')),
'display_id': display_id, 'display_id': display_id,
'tags': video.get('tags'),
'thumbnail': video.get('imageUrlLandscape'),
} }

View File

@ -40,6 +40,22 @@ class GoPlayIE(InfoExtractor):
'title': 'A Family for the Holidays', 'title': 'A Family for the Holidays',
}, },
'skip': 'This video is only available for registered users' 'skip': 'This video is only available for registered users'
}, {
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
'info_dict': {
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
'ext': 'mp4',
'title': 'S11 - Aflevering 1',
'episode': 'Episode 1',
'series': 'De Mol',
'season_number': 11,
'episode_number': 1,
'season': 'Season 11'
},
'params': {
'skip_download': True
},
'skip': 'This video is only available for registered users'
}] }]
_id_token = None _id_token = None
@ -77,16 +93,39 @@ class GoPlayIE(InfoExtractor):
api = self._download_json( api = self._download_json(
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
video_id, headers={'Authorization': 'Bearer %s' % self._id_token}) video_id, headers={
'Authorization': 'Bearer %s' % self._id_token,
**self.geo_verification_headers(),
})
formats, subs = self._extract_m3u8_formats_and_subtitles( if 'manifestUrls' in api:
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS') formats, subtitles = self._extract_m3u8_formats_and_subtitles(
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
else:
if 'ssai' not in api:
raise ExtractorError('expecting Google SSAI stream')
ssai_content_source_id = api['ssai']['contentSourceID']
ssai_video_id = api['ssai']['videoID']
dai = self._download_json(
f'https://dai.google.com/ondemand/dash/content/{ssai_content_source_id}/vid/{ssai_video_id}/streams',
video_id, data=b'{"api-key":"null"}',
headers={'content-type': 'application/json'})
periods = self._extract_mpd_periods(dai['stream_manifest'], video_id)
# skip pre-roll and mid-roll ads
periods = [p for p in periods if '-ad-' not in p['id']]
formats, subtitles = self._merge_mpd_periods(periods)
info_dict.update({ info_dict.update({
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles,
}) })
return info_dict return info_dict

View File

@ -13,7 +13,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/', 'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/',
'md5': 'e94de44cd80818084352fcf8de1ce82c', 'md5': 'a0c3069b7e4c4526abf0053a7713f56f',
'info_dict': { 'info_dict': {
'id': 'g9j7Eovo', 'id': 'g9j7Eovo',
'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées', 'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées',
@ -26,7 +26,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
}, },
}, { }, {
'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/', 'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/',
'md5': '0b3f10332b812034b3a3eda1ef877c5f', 'md5': '319c662943dd777bab835cae1e2d73a5',
'info_dict': { 'info_dict': {
'id': 'LeAgybyc', 'id': 'LeAgybyc',
'title': 'Intelligence artificielle : faut-il sen méfier ?', 'title': 'Intelligence artificielle : faut-il sen méfier ?',
@ -41,7 +41,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
_WEBPAGE_TESTS = [{ _WEBPAGE_TESTS = [{
'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/', 'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/',
'md5': '3972ddf2d5f8b98699f191687258e2f9', 'md5': '6289f9489efb969e38245f31721596fe',
'info_dict': { 'info_dict': {
'id': 'QChnbPYA', 'id': 'QChnbPYA',
'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International', 'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International',
@ -55,7 +55,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
}, },
}, { }, {
'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/', 'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/',
'md5': '3ac0a0769546ee6be41ab52caea5d9a9', 'md5': 'f6df814cae53e85937621599d2967520',
'info_dict': { 'info_dict': {
'id': 'QJzqoNbf', 'id': 'QJzqoNbf',
'title': 'La philosophe Nathalie Sarthou-Lajus est linvitée du Figaro Live', 'title': 'La philosophe Nathalie Sarthou-Lajus est linvitée du Figaro Live',
@ -73,7 +73,8 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
player_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']['playerData'] player_data = self._search_nextjs_data(
webpage, display_id)['props']['pageProps']['initialProps']['pageData']['playerData']
return self.url_result( return self.url_result(
f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'), f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'),

View File

@ -3,16 +3,15 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
extract_attributes,
ExtractorError, ExtractorError,
extract_attributes,
float_or_none, float_or_none,
get_element_by_class,
int_or_none, int_or_none,
srt_subtitles_timecode, srt_subtitles_timecode,
strip_or_none,
mimetype2ext, mimetype2ext,
traverse_obj,
try_get, try_get,
url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
) )
@ -83,15 +82,29 @@ class LinkedInLearningBaseIE(LinkedInBaseIE):
class LinkedInIE(LinkedInBaseIE): class LinkedInIE(LinkedInBaseIE):
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/.+?(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20', 'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
'info_dict': { 'info_dict': {
'id': '6850898786781339649', 'id': '6850898786781339649',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing', 'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing #nowhiring #sendinblue…',
'description': 'md5:be125430bab1c574f16aeb186a4d5b19', 'description': 'md5:2998a31f6f479376dd62831f53a80f71',
'creator': 'Mishal K.' 'uploader': 'Mishal K.',
'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
'like_count': int
},
}, {
'url': 'https://www.linkedin.com/posts/the-mathworks_2_what-is-mathworks-cloud-center-activity-7151241570371948544-4Gu7',
'info_dict': {
'id': '7151241570371948544',
'ext': 'mp4',
'title': 'MathWorks on LinkedIn: What Is MathWorks Cloud Center?',
'description': 'md5:95f9d4eeb6337882fb47eefe13d7a40c',
'uploader': 'MathWorks',
'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
'like_count': int,
'subtitles': 'mincount:1'
}, },
}] }]
@ -99,26 +112,30 @@ class LinkedInIE(LinkedInBaseIE):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_extract_title(webpage) video_attrs = extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))
description = clean_html(get_element_by_class('share-update-card__update-text', webpage)) sources = self._parse_json(video_attrs['data-sources'], video_id)
like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage))
creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage)))
sources = self._parse_json(extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))['data-sources'], video_id)
formats = [{ formats = [{
'url': source['src'], 'url': source['src'],
'ext': mimetype2ext(source.get('type')), 'ext': mimetype2ext(source.get('type')),
'tbr': float_or_none(source.get('data-bitrate'), scale=1000), 'tbr': float_or_none(source.get('data-bitrate'), scale=1000),
} for source in sources] } for source in sources]
subtitles = {'en': [{
'url': video_attrs['data-captions-url'],
'ext': 'vtt',
}]} if url_or_none(video_attrs.get('data-captions-url')) else {}
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'title': title, 'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
'like_count': like_count, 'like_count': int_or_none(self._search_regex(
'creator': creator, r'\bdata-num-reactions="(\d+)"', webpage, 'reactions', default=None)),
'uploader': traverse_obj(
self._yield_json_ld(webpage, video_id),
(lambda _, v: v['@type'] == 'SocialMediaPosting', 'author', 'name', {str}), get_all=False),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'description': description, 'description': self._og_search_description(webpage, default=None),
'subtitles': subtitles,
} }

View File

@ -28,12 +28,24 @@ class MagellanTVIE(InfoExtractor):
'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'], 'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.magellantv.com/watch/celebration-nation',
'info_dict': {
'id': 'celebration-nation',
'ext': 'mp4',
'tags': ['Art & Culture', 'Human Interest', 'Anthropology', 'China', 'History'],
'duration': 2640.0,
'title': 'Ancestors',
},
'params': {'skip_download': 'm3u8'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['reactContext']['video']['detail'] data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
'props', 'pageProps', 'reactContext',
(('video', 'detail'), ('series', 'currentEpisode')), {dict}), get_all=False)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id) formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id)
return { return {

View File

@ -1,6 +1,7 @@
import itertools import itertools
import json import json
from .art19 import Art19IE
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
@ -112,7 +113,8 @@ class NebulaBaseIE(InfoExtractor):
class NebulaIE(NebulaBaseIE): class NebulaIE(NebulaBaseIE):
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)' IE_NAME = 'nebula:video'
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast', 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
'info_dict': { 'info_dict': {
@ -236,8 +238,8 @@ class NebulaIE(NebulaBaseIE):
class NebulaClassIE(NebulaBaseIE): class NebulaClassIE(NebulaBaseIE):
IE_NAME = 'nebula:class' IE_NAME = 'nebula:media'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)' _VALID_URL = rf'{_BASE_URL_RE}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/copyright-for-fun-and-profit/14', 'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
'info_dict': { 'info_dict': {
@ -253,6 +255,46 @@ class NebulaClassIE(NebulaBaseIE):
'title': 'Photos, Sculpture, and Video', 'title': 'Photos, Sculpture, and Video',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
'info_dict': {
'ext': 'mp3',
'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
'series_id': '335e8159-d663-491a-888f-1732285706ac',
'modified_timestamp': 1599091504,
'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'series': 'Extremities',
'modified_date': '20200903',
'upload_date': '20200902',
'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
'release_timestamp': 1571237958,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'duration': 1546.05714,
'timestamp': 1599085608,
'release_date': '20191016',
},
}, {
'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
'info_dict': {
'ext': 'mp3',
'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'episode_number': 1,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'release_date': '20230304',
'modified_date': '20230403',
'series': 'The Layover',
'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'modified_timestamp': 1680554566,
'duration': 3130.46401,
'release_timestamp': 1677943800,
'title': 'The Layover — Episode 1',
'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
'upload_date': '20230303',
'episode': 'Episode 1',
'timestamp': 1677883672,
'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -268,16 +310,38 @@ class NebulaClassIE(NebulaBaseIE):
metadata = self._call_api( metadata = self._call_api(
f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons', f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
slug, note='Fetching video metadata') slug, note='Fetching class/podcast metadata')
return { content_type = metadata.get('type')
**self._extract_video_metadata(metadata), if content_type == 'lesson':
**self._extract_formats(metadata['id'], slug), return {
} **self._extract_video_metadata(metadata),
**self._extract_formats(metadata['id'], slug),
}
elif content_type == 'podcast_episode':
episode_url = metadata['episode_url']
if not episode_url and metadata.get('premium'):
self.raise_login_required()
if Art19IE.suitable(episode_url):
return self.url_result(episode_url, Art19IE)
return traverse_obj(metadata, {
'id': ('id', {str}),
'url': ('episode_url', {url_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('published_at', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
'channel_id': ('channel_id', {str}),
'chnanel': ('channel_title', {str}),
'thumbnail': ('assets', 'regular', {url_or_none}),
})
raise ExtractorError(f'Unexpected content type {content_type!r}')
class NebulaSubscriptionsIE(NebulaBaseIE): class NebulaSubscriptionsIE(NebulaBaseIE):
IE_NAME = 'nebula:subscriptions' IE_NAME = 'nebula:subscriptions'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)' _VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/myshows', 'url': 'https://nebula.tv/myshows',
'playlist_mincount': 1, 'playlist_mincount': 1,
@ -310,7 +374,7 @@ class NebulaSubscriptionsIE(NebulaBaseIE):
class NebulaChannelIE(NebulaBaseIE): class NebulaChannelIE(NebulaBaseIE):
IE_NAME = 'nebula:channel' IE_NAME = 'nebula:channel'
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos/)(?P<id>[-\w]+)/?(?:$|[?#])' _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/tom-scott-presents-money', 'url': 'https://nebula.tv/tom-scott-presents-money',
'info_dict': { 'info_dict': {
@ -343,6 +407,14 @@ class NebulaChannelIE(NebulaBaseIE):
'description': 'md5:6690248223eed044a9f11cd5a24f9742', 'description': 'md5:6690248223eed044a9f11cd5a24f9742',
}, },
'playlist_count': 23, 'playlist_count': 23,
}, {
'url': 'https://nebula.tv/trussissuespodcast',
'info_dict': {
'id': 'trussissuespodcast',
'title': 'The TLDR News Podcast',
'description': 'md5:a08c4483bc0b705881d3e0199e721385',
},
'playlist_mincount': 80,
}] }]
def _generate_playlist_entries(self, collection_id, collection_slug): def _generate_playlist_entries(self, collection_id, collection_slug):
@ -365,6 +437,17 @@ class NebulaChannelIE(NebulaBaseIE):
lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}', lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
{'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata) {'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
def _generate_podcast_entries(self, collection_id, collection_slug):
next_url = f'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
for page_num in itertools.count(1):
episodes = self._call_api(next_url, collection_slug, note=f'Retrieving podcast page {page_num}')
for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
yield self.url_result(episode['share_url'], NebulaClassIE)
next_url = episodes.get('next')
if not next_url:
break
def _real_extract(self, url): def _real_extract(self, url):
collection_slug = self._match_id(url) collection_slug = self._match_id(url)
channel = self._call_api( channel = self._call_api(
@ -373,6 +456,8 @@ class NebulaChannelIE(NebulaBaseIE):
if channel.get('type') == 'class': if channel.get('type') == 'class':
entries = self._generate_class_entries(channel) entries = self._generate_class_entries(channel)
elif channel.get('type') == 'podcast_channel':
entries = self._generate_podcast_entries(channel['id'], collection_slug)
else: else:
entries = self._generate_playlist_entries(channel['id'], collection_slug) entries = self._generate_playlist_entries(channel['id'], collection_slug)

View File

@ -1,33 +1,38 @@
import datetime
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class NerdCubedFeedIE(InfoExtractor): class NerdCubedFeedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json' _VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/?(?:$|[#?])'
_TEST = { _TEST = {
'url': 'http://www.nerdcubed.co.uk/feed.json', 'url': 'http://www.nerdcubed.co.uk/',
'info_dict': { 'info_dict': {
'id': 'nerdcubed-feed', 'id': 'nerdcubed-feed',
'title': 'nerdcubed.co.uk feed', 'title': 'nerdcubed.co.uk feed',
}, },
'playlist_mincount': 1300, 'playlist_mincount': 5500,
} }
def _extract_video(self, feed_entry):
return self.url_result(
f'https://www.youtube.com/watch?v={feed_entry["id"]}', YoutubeIE,
**traverse_obj(feed_entry, {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('publishedAt', {parse_iso8601}),
'channel': ('source', 'name', {str}),
'channel_id': ('source', 'id', {str}),
'channel_url': ('source', 'url', {str}),
'thumbnail': ('thumbnail', 'source', {url_or_none}),
}), url_transparent=True)
def _real_extract(self, url): def _real_extract(self, url):
feed = self._download_json(url, url, 'Downloading NerdCubed JSON feed') video_id = 'nerdcubed-feed'
feed = self._download_json('https://www.nerdcubed.co.uk/_/cdn/videos.json', video_id)
entries = [{ return self.playlist_result(
'_type': 'url', map(self._extract_video, traverse_obj(feed, ('videos', lambda _, v: v['id']))),
'title': feed_entry['title'], video_id, 'nerdcubed.co.uk feed')
'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
'url': 'http://www.youtube.com/watch?v=' + feed_entry['youtube_id'],
} for feed_entry in feed]
return {
'_type': 'playlist',
'title': 'nerdcubed.co.uk feed',
'id': 'nerdcubed-feed',
'entries': entries,
}

View File

@ -9,6 +9,7 @@ from ..utils import (
join_nonempty, join_nonempty,
parse_duration, parse_duration,
traverse_obj, traverse_obj,
try_call,
unescapeHTML, unescapeHTML,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
@ -473,22 +474,21 @@ class NhkRadiruIE(InfoExtractor):
IE_DESC = 'NHK らじる (Radiru/Rajiru)' IE_DESC = 'NHK らじる (Radiru/Rajiru)'
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?' _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
_TESTS = [{ _TESTS = [{
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3853544', 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210',
'skip': 'Episode expired on 2023-04-16', 'skip': 'Episode expired on 2024-02-24',
'info_dict': { 'info_dict': {
'channel': 'NHK-FM', 'title': 'ジャズ・トゥナイト シリーズJAZZジャイアンツ 56 ジョニー・ホッジス',
'uploader': 'NHK-FM', 'id': '0449_01_3926210',
'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
'ext': 'm4a', 'ext': 'm4a',
'id': '0449_01_3853544',
'series': 'ジャズ・トゥナイト', 'series': 'ジャズ・トゥナイト',
'uploader': 'NHK-FM',
'channel': 'NHK-FM',
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg', 'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
'timestamp': 1680969600, 'release_date': '20240217',
'title': 'ジャズ・トゥナイト NEWジャズ特集', 'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811',
'upload_date': '20230408', 'timestamp': 1708185600,
'release_timestamp': 1680962400, 'release_timestamp': 1708178400,
'release_date': '20230408', 'upload_date': '20240217',
'was_live': True,
}, },
}, { }, {
# playlist, airs every weekday so it should _hopefully_ be okay forever # playlist, airs every weekday so it should _hopefully_ be okay forever
@ -519,7 +519,8 @@ class NhkRadiruIE(InfoExtractor):
'series': 'らじる文庫 by ラジオ深夜便 ', 'series': 'らじる文庫 by ラジオ深夜便 ',
'release_timestamp': 1481126700, 'release_timestamp': 1481126700,
'upload_date': '20211101', 'upload_date': '20211101',
} },
'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'],
}, { }, {
# news # news
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109', 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
@ -539,9 +540,28 @@ class NhkRadiruIE(InfoExtractor):
}, },
}] }]
_API_URL_TMPL = None
def _extract_extended_description(self, episode_id, episode):
service, _, area = traverse_obj(episode, ('aa_vinfo2', {str}, {lambda x: (x or '').partition(',')}))
aa_vinfo3 = traverse_obj(episode, ('aa_vinfo3', {str}))
detail_url = try_call(
lambda: self._API_URL_TMPL.format(service=service, area=area, dateid=aa_vinfo3))
if not detail_url:
return
full_meta = traverse_obj(
self._download_json(detail_url, episode_id, 'Downloading extended metadata', fatal=False),
('list', service, 0, {dict})) or {}
return join_nonempty('subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta)
def _extract_episode_info(self, headline, programme_id, series_meta): def _extract_episode_info(self, headline, programme_id, series_meta):
episode_id = f'{programme_id}_{headline["headline_id"]}' episode_id = f'{programme_id}_{headline["headline_id"]}'
episode = traverse_obj(headline, ('file_list', 0, {dict})) episode = traverse_obj(headline, ('file_list', 0, {dict}))
description = self._extract_extended_description(episode_id, episode)
if not description:
self.report_warning('Failed to get extended description, falling back to summary')
description = traverse_obj(episode, ('file_title_sub', {str}))
return { return {
**series_meta, **series_meta,
@ -551,14 +571,21 @@ class NhkRadiruIE(InfoExtractor):
'was_live': True, 'was_live': True,
'series': series_meta.get('title'), 'series': series_meta.get('title'),
'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'), 'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
'description': description,
**traverse_obj(episode, { **traverse_obj(episode, {
'title': 'file_title', 'title': 'file_title',
'description': 'file_title_sub',
'timestamp': ('open_time', {unified_timestamp}), 'timestamp': ('open_time', {unified_timestamp}),
'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}), 'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
}), }),
} }
def _real_initialize(self):
if self._API_URL_TMPL:
return
api_config = self._download_xml(
'https://www.nhk.or.jp/radio/config/config_web.xml', None, 'Downloading API config', fatal=False)
NhkRadiruIE._API_URL_TMPL = try_call(lambda: f'https:{api_config.find(".//url_program_detail").text}')
def _real_extract(self, url): def _real_extract(self, url):
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline') site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
programme_id = f'{site_id}_{corner_id}' programme_id = f'{site_id}_{corner_id}'

View File

@ -172,9 +172,6 @@ class NiconicoIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)' _VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico' _NETRC_MACHINE = 'niconico'
_COMMENT_API_ENDPOINTS = (
'https://nvcomment.nicovideo.jp/legacy/api.json',
'https://nmsg.nicovideo.jp/api.json',)
_API_HEADERS = { _API_HEADERS = {
'X-Frontend-ID': '6', 'X-Frontend-ID': '6',
'X-Frontend-Version': '0', 'X-Frontend-Version': '0',
@ -470,93 +467,16 @@ class NiconicoIE(InfoExtractor):
parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None)) parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
or get_video_info('duration')), or get_video_info('duration')),
'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}', 'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
'subtitles': self.extract_subtitles(video_id, api_data, session_api_data), 'subtitles': self.extract_subtitles(video_id, api_data),
} }
def _get_subtitles(self, video_id, api_data, session_api_data): def _get_subtitles(self, video_id, api_data):
comment_user_key = traverse_obj(api_data, ('comment', 'keys', 'userKey')) comments_info = traverse_obj(api_data, ('comment', 'nvComment', {dict})) or {}
user_id_str = session_api_data.get('serviceUserId') danmaku = traverse_obj(self._download_json(
f'{comments_info.get("server")}/v1/threads', video_id, data=json.dumps({
thread_ids = traverse_obj(api_data, ('comment', 'threads', lambda _, v: v['isActive']))
legacy_danmaku = self._extract_legacy_comments(video_id, thread_ids, user_id_str, comment_user_key) or []
new_comments = traverse_obj(api_data, ('comment', 'nvComment'))
new_danmaku = self._extract_new_comments(
new_comments.get('server'), video_id,
new_comments.get('params'), new_comments.get('threadKey'))
if not legacy_danmaku and not new_danmaku:
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
return
return {
'comments': [{
'ext': 'json',
'data': json.dumps(legacy_danmaku + new_danmaku),
}],
}
def _extract_legacy_comments(self, video_id, threads, user_id, user_key):
auth_data = {
'user_id': user_id,
'userkey': user_key,
} if user_id and user_key else {'user_id': ''}
api_url = traverse_obj(threads, (..., 'server'), get_all=False)
# Request Start
post_data = [{'ping': {'content': 'rs:0'}}]
for i, thread in enumerate(threads):
thread_id = thread['id']
thread_fork = thread['fork']
# Post Start (2N)
post_data.append({'ping': {'content': f'ps:{i * 2}'}})
post_data.append({'thread': {
'fork': thread_fork,
'language': 0,
'nicoru': 3,
'scores': 1,
'thread': thread_id,
'version': '20090904',
'with_global': 1,
**auth_data,
}})
# Post Final (2N)
post_data.append({'ping': {'content': f'pf:{i * 2}'}})
# Post Start (2N+1)
post_data.append({'ping': {'content': f'ps:{i * 2 + 1}'}})
post_data.append({'thread_leaves': {
# format is '<bottom of minute range>-<top of minute range>:<comments per minute>,<total last comments'
# unfortunately NND limits (deletes?) comment returns this way, so you're only able to grab the last 1000 per language
'content': '0-999999:999999,999999,nicoru:999999',
'fork': thread_fork,
'language': 0,
'nicoru': 3,
'scores': 1,
'thread': thread_id,
**auth_data,
}})
# Post Final (2N+1)
post_data.append({'ping': {'content': f'pf:{i * 2 + 1}'}})
# Request Final
post_data.append({'ping': {'content': 'rf:0'}})
return self._download_json(
f'{api_url}/api.json', video_id, data=json.dumps(post_data).encode(), fatal=False,
headers={
'Referer': f'https://www.nicovideo.jp/watch/{video_id}',
'Origin': 'https://www.nicovideo.jp',
'Content-Type': 'text/plain;charset=UTF-8',
},
note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
def _extract_new_comments(self, endpoint, video_id, params, thread_key):
comments = self._download_json(
f'{endpoint}/v1/threads', video_id, data=json.dumps({
'additionals': {}, 'additionals': {},
'params': params, 'params': comments_info.get('params'),
'threadKey': thread_key, 'threadKey': comments_info.get('threadKey'),
}).encode(), fatal=False, }).encode(), fatal=False,
headers={ headers={
'Referer': 'https://www.nicovideo.jp/', 'Referer': 'https://www.nicovideo.jp/',
@ -566,8 +486,19 @@ class NiconicoIE(InfoExtractor):
'x-frontend-id': '6', 'x-frontend-id': '6',
'x-frontend-version': '0', 'x-frontend-version': '0',
}, },
note='Downloading comments (new)', errnote='Failed to download comments (new)') note='Downloading comments', errnote='Failed to download comments'),
return traverse_obj(comments, ('data', 'threads', ..., 'comments', ...)) ('data', 'threads', ..., 'comments', ...))
if not danmaku:
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
return
return {
'comments': [{
'ext': 'json',
'data': json.dumps(danmaku),
}],
}
class NiconicoPlaylistBaseIE(InfoExtractor): class NiconicoPlaylistBaseIE(InfoExtractor):

View File

@ -135,14 +135,15 @@ class NovaIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)' _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260', 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
'md5': '249baab7d0104e186e78b0899c7d5f28', 'md5': 'da8f3f1fcdaf9fb0f112a32a165760a3',
'info_dict': { 'info_dict': {
'id': '1757139', 'id': '8OvQqEvV3MW',
'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci', 'display_id': '8OvQqEvV3MW',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Podzemní nemocnice v pražské Krči', 'title': 'Podzemní nemocnice v pražské Krči',
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53', 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
'thumbnail': r're:^https?://.*\.(?:jpg)', 'thumbnail': r're:^https?://.*\.(?:jpg)',
'duration': 151,
} }
}, { }, {
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html', 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
@ -210,7 +211,7 @@ class NovaIE(InfoExtractor):
# novaplus # novaplus
embed_id = self._search_regex( embed_id = self._search_regex(
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)', r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media(?:tn)?\.cms\.nova\.cz/embed/([^/?#&"\']+)',
webpage, 'embed url', default=None) webpage, 'embed url', default=None)
if embed_id: if embed_id:
return { return {

View File

@ -1,4 +1,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from .jwplatform import JWPlatformIE
from ..utils import make_archive_id
class OneFootballIE(InfoExtractor): class OneFootballIE(InfoExtractor):
@ -7,41 +9,43 @@ class OneFootballIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334', 'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334',
'info_dict': { 'info_dict': {
'id': '34012334', 'id': 'Y2VtcWAT',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Highlights: FC Zürich 3-3 FC Basel', 'title': 'Highlights: FC Zürich 3-3 FC Basel',
'description': 'md5:33d9855cb790702c4fe42a513700aba8', 'description': 'md5:33d9855cb790702c4fe42a513700aba8',
'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34012334', 'thumbnail': 'https://cdn.jwplayer.com/v2/media/Y2VtcWAT/poster.jpg?width=720',
'timestamp': 1635874604, 'timestamp': 1635874895,
'upload_date': '20211102' 'upload_date': '20211102',
'duration': 375.0,
'tags': ['Football', 'Soccer', 'OneFootball'],
'_old_archive_ids': ['onefootball 34012334'],
}, },
'params': {'skip_download': True} 'params': {'skip_download': True},
'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'https://onefootball.com/en/video/klopp-fumes-at-var-decisions-in-west-ham-defeat-34041020', 'url': 'https://onefootball.com/en/video/klopp-fumes-at-var-decisions-in-west-ham-defeat-34041020',
'info_dict': { 'info_dict': {
'id': '34041020', 'id': 'leVJrMho',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Klopp fumes at VAR decisions in West Ham defeat', 'title': 'Klopp fumes at VAR decisions in West Ham defeat',
'description': 'md5:9c50371095a01ad3f63311c73d8f51a5', 'description': 'md5:9c50371095a01ad3f63311c73d8f51a5',
'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34041020', 'thumbnail': 'https://cdn.jwplayer.com/v2/media/leVJrMho/poster.jpg?width=720',
'timestamp': 1636314103, 'timestamp': 1636315232,
'upload_date': '20211107' 'upload_date': '20211107',
'duration': 93.0,
'tags': ['Football', 'Soccer', 'OneFootball'],
'_old_archive_ids': ['onefootball 34041020'],
}, },
'params': {'skip_download': True} 'params': {'skip_download': True}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, id) webpage = self._download_webpage(url, video_id)
data_json = self._search_json_ld(webpage, id) data_json = self._search_json_ld(webpage, video_id, fatal=False)
m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/.+\.m3u8)', webpage, 'm3u8_url') data_json.pop('url', None)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/\w+\.m3u8)', webpage, 'm3u8_url')
return {
'id': id, return self.url_result(
'title': data_json.get('title'), m3u8_url, JWPlatformIE, video_id, _old_archive_ids=[make_archive_id(self, video_id)],
'description': data_json.get('description'), **data_json, url_transparent=True)
'thumbnail': data_json.get('thumbnail'),
'timestamp': data_json.get('timestamp'),
'formats': formats,
'subtitles': subtitles,
}

View File

@ -12,6 +12,8 @@ from ..compat import compat_str
class OpenRecBaseIE(InfoExtractor): class OpenRecBaseIE(InfoExtractor):
_M3U8_HEADERS = {'Referer': 'https://www.openrec.tv/'}
def _extract_pagestore(self, webpage, video_id): def _extract_pagestore(self, webpage, video_id):
return self._parse_json( return self._parse_json(
self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id) self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
@ -21,7 +23,7 @@ class OpenRecBaseIE(InfoExtractor):
if not m3u8_url: if not m3u8_url:
continue continue
yield from self._extract_m3u8_formats( yield from self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', m3u8_id=name) m3u8_url, video_id, ext='mp4', m3u8_id=name, headers=self._M3U8_HEADERS)
def _extract_movie(self, webpage, video_id, name, is_live): def _extract_movie(self, webpage, video_id, name, is_live):
window_stores = self._extract_pagestore(webpage, video_id) window_stores = self._extract_pagestore(webpage, video_id)
@ -60,6 +62,7 @@ class OpenRecBaseIE(InfoExtractor):
'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')), 'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')),
'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')), 'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')),
'is_live': is_live, 'is_live': is_live,
'http_headers': self._M3U8_HEADERS,
} }
@ -110,7 +113,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
raise ExtractorError('Cannot extract title') raise ExtractorError('Cannot extract title')
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
capture_data.get('source'), video_id, ext='mp4') capture_data.get('source'), video_id, ext='mp4', headers=self._M3U8_HEADERS)
return { return {
'id': video_id, 'id': video_id,
@ -121,6 +124,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str), 'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str),
'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str), 'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str),
'upload_date': unified_strdate(capture_data.get('createdAt')), 'upload_date': unified_strdate(capture_data.get('createdAt')),
'http_headers': self._M3U8_HEADERS,
} }

View File

@ -87,8 +87,8 @@ class PornHubBaseIE(InfoExtractor):
def is_logged(webpage): def is_logged(webpage):
return any(re.search(p, webpage) for p in ( return any(re.search(p, webpage) for p in (
r'class=["\']signOut', r'id="profileMenuDropdown"',
r'>Sign\s+[Oo]ut\s*<')) r'class="ph-icon-logout"'))
if is_logged(login_page): if is_logged(login_page):
self._logged_in = True self._logged_in = True

View File

@ -1,6 +1,7 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
clean_html, clean_html,
determine_ext, determine_ext,
@ -91,7 +92,7 @@ class RaiBaseIE(InfoExtractor):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
if not audio_only and not is_live: if not audio_only and not is_live:
formats.extend(self._create_http_urls(media_url, relinker_url, formats)) formats.extend(self._create_http_urls(media_url, relinker_url, formats, video_id))
return filter_dict({ return filter_dict({
'is_live': is_live, 'is_live': is_live,
@ -99,7 +100,7 @@ class RaiBaseIE(InfoExtractor):
'formats': formats, 'formats': formats,
}) })
def _create_http_urls(self, manifest_url, relinker_url, fmts): def _create_http_urls(self, manifest_url, relinker_url, fmts, video_id):
_MANIFEST_REG = r'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8' _MANIFEST_REG = r'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8'
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s' _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
_QUALITY = { _QUALITY = {
@ -166,6 +167,14 @@ class RaiBaseIE(InfoExtractor):
'fps': 25, 'fps': 25,
} }
# Check if MP4 download is available
try:
self._request_webpage(
HEADRequest(_MP4_TMPL % (relinker_url, '*')), video_id, 'Checking MP4 availability')
except ExtractorError as e:
self.to_screen(f'{video_id}: MP4 direct download is not available: {e.cause}')
return []
# filter out single-stream formats # filter out single-stream formats
fmts = [f for f in fmts fmts = [f for f in fmts
if not f.get('vcodec') == 'none' and not f.get('acodec') == 'none'] if not f.get('vcodec') == 'none' and not f.get('acodec') == 'none']

View File

@ -9,7 +9,6 @@ from ..utils import (
get_element_html_by_class, get_element_html_by_class,
get_elements_by_class, get_elements_by_class,
int_or_none, int_or_none,
join_nonempty,
parse_count, parse_count,
parse_duration, parse_duration,
unescapeHTML, unescapeHTML,
@ -57,7 +56,7 @@ class Rule34VideoIE(InfoExtractor):
'comment_count': int, 'comment_count': int,
'timestamp': 1640131200, 'timestamp': 1640131200,
'description': '', 'description': '',
'creator': 'WildeerStudio', 'creators': ['WildeerStudio'],
'upload_date': '20211222', 'upload_date': '20211222',
'uploader': 'CerZule', 'uploader': 'CerZule',
'uploader_url': 'https://rule34video.com/members/36281/', 'uploader_url': 'https://rule34video.com/members/36281/',
@ -81,13 +80,13 @@ class Rule34VideoIE(InfoExtractor):
'quality': quality, 'quality': quality,
}) })
categories, creator, uploader, uploader_url = [None] * 4 categories, creators, uploader, uploader_url = [None] * 4
for col in get_elements_by_class('col', webpage): for col in get_elements_by_class('col', webpage):
label = clean_html(get_element_by_class('label', col)) label = clean_html(get_element_by_class('label', col))
if label == 'Categories:': if label == 'Categories:':
categories = list(map(clean_html, get_elements_by_class('item', col))) categories = list(map(clean_html, get_elements_by_class('item', col)))
elif label == 'Artist:': elif label == 'Artist:':
creator = join_nonempty(*map(clean_html, get_elements_by_class('item', col)), delim=', ') creators = list(map(clean_html, get_elements_by_class('item', col)))
elif label == 'Uploaded By:': elif label == 'Uploaded By:':
uploader = clean_html(get_element_by_class('name', col)) uploader = clean_html(get_element_by_class('name', col))
uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href') uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href')
@ -115,7 +114,7 @@ class Rule34VideoIE(InfoExtractor):
'comment_count': int_or_none(self._search_regex( 'comment_count': int_or_none(self._search_regex(
r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)), r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)),
'age_limit': 18, 'age_limit': 18,
'creator': creator, 'creators': creators,
'uploader': uploader, 'uploader': uploader,
'uploader_url': uploader_url, 'uploader_url': uploader_url,
'categories': categories, 'categories': categories,

View File

@ -5,7 +5,10 @@ from ..utils import traverse_obj, update_url_query
class ScreencastifyIE(InfoExtractor): class ScreencastifyIE(InfoExtractor):
_VALID_URL = r'https?://watch\.screencastify\.com/v/(?P<id>[^/?#]+)' _VALID_URL = [
r'https?://watch\.screencastify\.com/v/(?P<id>[^/?#]+)',
r'https?://app\.screencastify\.com/v[23]/watch/(?P<id>[^/?#]+)',
]
_TESTS = [{ _TESTS = [{
'url': 'https://watch.screencastify.com/v/sYVkZip3quLKhHw4Ybk8', 'url': 'https://watch.screencastify.com/v/sYVkZip3quLKhHw4Ybk8',
'info_dict': { 'info_dict': {
@ -19,6 +22,21 @@ class ScreencastifyIE(InfoExtractor):
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, {
'url': 'https://app.screencastify.com/v3/watch/J5N7H11wofDN1jZUCr3t',
'info_dict': {
'id': 'J5N7H11wofDN1jZUCr3t',
'ext': 'mp4',
'uploader': 'Scott Piesen',
'description': '',
'title': 'Lesson Recording 1-17 Burrr...',
},
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://app.screencastify.com/v2/watch/BQ26VbUdfbQLhKzkktOk',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -7,8 +7,6 @@ from ..utils import (
determine_ext, determine_ext,
dict_get, dict_get,
int_or_none, int_or_none,
str_or_none,
strip_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
unified_timestamp, unified_timestamp,
@ -388,15 +386,55 @@ class SVTSeriesIE(SVTPlayBaseIE):
dict_get(series, ('longDescription', 'shortDescription'))) dict_get(series, ('longDescription', 'shortDescription')))
class SVTPageIE(InfoExtractor): class SVTPageIE(SVTBaseIE):
_VALID_URL = r'https?://(?:www\.)?svt\.se/(?P<path>(?:[^/]+/)*(?P<id>[^/?&#]+))' _VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/?#]+/)*(?P<id>[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.svt.se/nyheter/lokalt/skane/viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
'info_dict': {
'title': 'Viktor, 18, förlorade armar och ben i sepsis vill återuppta karaten och bli svetsare',
'id': 'viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
},
'playlist_count': 2,
}, {
'url': 'https://www.svt.se/nyheter/lokalt/skane/forsvarsmakten-om-trafikkaoset-pa-e22-kunde-inte-varit-dar-snabbare',
'info_dict': {
'id': 'jXvk42E',
'title': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
'ext': 'mp4',
"duration": 80,
'age_limit': 0,
'timestamp': 1704370009,
'episode': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
'series': 'Lokala Nyheter Skåne',
'upload_date': '20240104'
},
'params': {
'skip_download': True,
}
}, {
'url': 'https://www.svt.se/nyheter/svtforum/2023-tungt-ar-for-svensk-media',
'info_dict': {
'title': '2023 tungt år för svensk media',
'id': 'ewqAZv4',
'ext': 'mp4',
"duration": 3074,
'age_limit': 0,
'series': '',
'timestamp': 1702980479,
'upload_date': '20231219',
'episode': 'Mediestudier'
},
'params': {
'skip_download': True,
}
}, {
'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa', 'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
'info_dict': { 'info_dict': {
'id': '25298267', 'id': '25298267',
'title': 'Bakom masken Lehners kamp mot mental ohälsa', 'title': 'Bakom masken Lehners kamp mot mental ohälsa',
}, },
'playlist_count': 4, 'playlist_count': 4,
'skip': 'Video is gone'
}, { }, {
'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien', 'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
'info_dict': { 'info_dict': {
@ -404,6 +442,7 @@ class SVTPageIE(InfoExtractor):
'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien', 'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
}, },
'playlist_count': 2, 'playlist_count': 2,
'skip': 'Video is gone'
}, { }, {
# only programTitle # only programTitle
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun', 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
@ -414,6 +453,7 @@ class SVTPageIE(InfoExtractor):
'duration': 27, 'duration': 27,
'age_limit': 0, 'age_limit': 0,
}, },
'skip': 'Video is gone'
}, { }, {
'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1', 'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
'only_matching': True, 'only_matching': True,
@ -427,26 +467,23 @@ class SVTPageIE(InfoExtractor):
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url) return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = self._match_valid_url(url).groups() display_id = self._match_id(url)
article = self._download_json( webpage = self._download_webpage(url, display_id)
'https://api.svt.se/nss-api/page/' + path, display_id, title = self._og_search_title(webpage)
query={'q': 'articles'})['articles']['content'][0]
entries = [] urql_state = self._search_json(
r'window\.svt\.nyh\.urqlState\s*=', webpage, 'json data', display_id)
def _process_content(content): data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}
if content.get('_type') in ('VIDEOCLIP', 'VIDEOEPISODE'):
video_id = compat_str(content['image']['svtId'])
entries.append(self.url_result(
'svt:' + video_id, SVTPlayIE.ie_key(), video_id))
for media in article.get('media', []): def entries():
_process_content(media) for video_id in set(traverse_obj(data, (
'page', (('topMedia', 'svtId'), ('body', ..., 'video', 'svtId')), {str}
))):
info = self._extract_video(
self._download_json(f'https://api.svt.se/video/{video_id}', video_id), video_id)
info['title'] = title
yield info
for obj in article.get('structuredBody', []): return self.playlist_result(entries(), display_id, title)
_process_content(obj.get('content') or {})
return self.playlist_result(
entries, str_or_none(article.get('id')),
strip_or_none(article.get('title')))

View File

@ -100,9 +100,13 @@ class TwitterBaseIE(InfoExtractor):
if not variant_url: if not variant_url:
return [], {} return [], {}
elif '.m3u8' in variant_url: elif '.m3u8' in variant_url:
return self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
variant_url, video_id, 'mp4', 'm3u8_native', variant_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False)
for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
return fmts, subs
else: else:
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
f = { f = {
@ -471,6 +475,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!', 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ', 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
'channel_id': '549749560',
'uploader': 'FREE THE NIPPLE', 'uploader': 'FREE THE NIPPLE',
'uploader_id': 'freethenipple', 'uploader_id': 'freethenipple',
'duration': 12.922, 'duration': 12.922,
@ -484,6 +489,7 @@ class TwitterIE(TwitterBaseIE):
'age_limit': 18, 'age_limit': 18,
'_old_archive_ids': ['twitter 643211948184596480'], '_old_archive_ids': ['twitter 643211948184596480'],
}, },
'skip': 'Requires authentication',
}, { }, {
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1', 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
'md5': 'f36dcd5fb92bf7057f155e7d927eeb42', 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
@ -506,6 +512,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': r're:Star Wars.*A new beginning is coming December 18.*', 'title': r're:Star Wars.*A new beginning is coming December 18.*',
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
'channel_id': '20106852',
'uploader_id': 'starwars', 'uploader_id': 'starwars',
'uploader': r're:Star Wars.*', 'uploader': r're:Star Wars.*',
'timestamp': 1447395772, 'timestamp': 1447395772,
@ -551,6 +558,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel', 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'channel_id': '1383165541',
'uploader': 'jaydin donte geer', 'uploader': 'jaydin donte geer',
'uploader_id': 'jaydingeer', 'uploader_id': 'jaydingeer',
'duration': 30.0, 'duration': 30.0,
@ -591,6 +599,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.', 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI', 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
'channel_id': '701615052',
'uploader_id': 'CaptainAmerica', 'uploader_id': 'CaptainAmerica',
'uploader': 'Captain America', 'uploader': 'Captain America',
'duration': 3.17, 'duration': 3.17,
@ -627,6 +636,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة', 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN', 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
'channel_id': '2526757026',
'uploader': 'عالم الأخبار', 'uploader': 'عالم الأخبار',
'uploader_id': 'news_al3alm', 'uploader_id': 'news_al3alm',
'duration': 277.4, 'duration': 277.4,
@ -651,6 +661,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.', 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo', 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
'channel_id': '2319432498',
'uploader': 'Préfet de Guadeloupe', 'uploader': 'Préfet de Guadeloupe',
'uploader_id': 'Prefet971', 'uploader_id': 'Prefet971',
'duration': 47.48, 'duration': 47.48,
@ -677,6 +688,7 @@ class TwitterIE(TwitterBaseIE):
'title': 're:.*?Shep is on a roll today.*?', 'title': 're:.*?Shep is on a roll today.*?',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09', 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
'channel_id': '255036353',
'uploader': 'Lis Power', 'uploader': 'Lis Power',
'uploader_id': 'LisPower1', 'uploader_id': 'LisPower1',
'duration': 111.278, 'duration': 111.278,
@ -741,6 +753,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:d1c4941658e4caaa6cb579260d85dcba', 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:71ead15ec44cee55071547d6447c6a3e', 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
'channel_id': '18552281',
'uploader': 'Brooklyn Nets', 'uploader': 'Brooklyn Nets',
'uploader_id': 'BrooklynNets', 'uploader_id': 'BrooklynNets',
'duration': 324.484, 'duration': 324.484,
@ -763,10 +776,11 @@ class TwitterIE(TwitterBaseIE):
'id': '1577855447914409984', 'id': '1577855447914409984',
'display_id': '1577855540407197696', 'display_id': '1577855540407197696',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:9d198efb93557b8f8d5b78c480407214', 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
'description': 'md5:b9c3699335447391d11753ab21c70a74', 'description': 'md5:b9c3699335447391d11753ab21c70a74',
'upload_date': '20221006', 'upload_date': '20221006',
'uploader': 'oshtru', 'channel_id': '143077138',
'uploader': 'Oshtru',
'uploader_id': 'oshtru', 'uploader_id': 'oshtru',
'uploader_url': 'https://twitter.com/oshtru', 'uploader_url': 'https://twitter.com/oshtru',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
@ -784,9 +798,10 @@ class TwitterIE(TwitterBaseIE):
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464', 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
'info_dict': { 'info_dict': {
'id': '1577719286659006464', 'id': '1577719286659006464',
'title': 'Ultima - Test', 'title': 'Ultima Reload - Test',
'description': 'Test https://t.co/Y3KEZD7Dad', 'description': 'Test https://t.co/Y3KEZD7Dad',
'uploader': 'Ultima', 'channel_id': '168922496',
'uploader': 'Ultima Reload',
'uploader_id': 'UltimaShadowX', 'uploader_id': 'UltimaShadowX',
'uploader_url': 'https://twitter.com/UltimaShadowX', 'uploader_url': 'https://twitter.com/UltimaShadowX',
'upload_date': '20221005', 'upload_date': '20221005',
@ -808,6 +823,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9', 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:95aea692fda36a12081b9629b02daa92', 'description': 'md5:95aea692fda36a12081b9629b02daa92',
'channel_id': '1094109584',
'uploader': 'Max Olson', 'uploader': 'Max Olson',
'uploader_id': 'MesoMax919', 'uploader_id': 'MesoMax919',
'uploader_url': 'https://twitter.com/MesoMax919', 'uploader_url': 'https://twitter.com/MesoMax919',
@ -830,6 +846,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': str, 'title': str,
'description': str, 'description': str,
'channel_id': '1217167793541480450',
'uploader': str, 'uploader': str,
'uploader_id': 'Rizdraws', 'uploader_id': 'Rizdraws',
'uploader_url': 'https://twitter.com/Rizdraws', 'uploader_url': 'https://twitter.com/Rizdraws',
@ -840,7 +857,8 @@ class TwitterIE(TwitterBaseIE):
'repost_count': int, 'repost_count': int,
'comment_count': int, 'comment_count': int,
'age_limit': 18, 'age_limit': 18,
'tags': [] 'tags': [],
'_old_archive_ids': ['twitter 1575199173472927762'],
}, },
'params': {'skip_download': 'The media could not be played'}, 'params': {'skip_download': 'The media could not be played'},
'skip': 'Requires authentication', 'skip': 'Requires authentication',
@ -852,6 +870,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1395079556562706435', 'id': '1395079556562706435',
'title': str, 'title': str,
'tags': [], 'tags': [],
'channel_id': '21539378',
'uploader': str, 'uploader': str,
'like_count': int, 'like_count': int,
'upload_date': '20210519', 'upload_date': '20210519',
@ -869,6 +888,7 @@ class TwitterIE(TwitterBaseIE):
'info_dict': { 'info_dict': {
'id': '1578353380363501568', 'id': '1578353380363501568',
'title': str, 'title': str,
'channel_id': '2195866214',
'uploader_id': 'DavidToons_', 'uploader_id': 'DavidToons_',
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
@ -888,6 +908,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1578401165338976258', 'id': '1578401165338976258',
'title': str, 'title': str,
'description': 'md5:659a6b517a034b4cee5d795381a2dc41', 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
'channel_id': '19338359',
'uploader': str, 'uploader': str,
'uploader_id': 'primevideouk', 'uploader_id': 'primevideouk',
'timestamp': 1665155137, 'timestamp': 1665155137,
@ -929,6 +950,7 @@ class TwitterIE(TwitterBaseIE):
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c', 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
'comment_count': int, 'comment_count': int,
'uploader_id': 'CTVJLaidlaw', 'uploader_id': 'CTVJLaidlaw',
'channel_id': '80082014',
'repost_count': int, 'repost_count': int,
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'], 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
'upload_date': '20221208', 'upload_date': '20221208',
@ -946,6 +968,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1', 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
'thumbnail': r're:^https?://.+\.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'timestamp': 1670459604.0, 'timestamp': 1670459604.0,
'channel_id': '80082014',
'uploader_id': 'CTVJLaidlaw', 'uploader_id': 'CTVJLaidlaw',
'uploader': 'Jocelyn Laidlaw', 'uploader': 'Jocelyn Laidlaw',
'repost_count': int, 'repost_count': int,
@ -972,6 +995,7 @@ class TwitterIE(TwitterBaseIE):
'title': '뽀 - 아 최우제 이동속도 봐', 'title': '뽀 - 아 최우제 이동속도 봐',
'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB', 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
'duration': 24.598, 'duration': 24.598,
'channel_id': '1281839411068432384',
'uploader': '', 'uploader': '',
'uploader_id': 's2FAKER', 'uploader_id': 's2FAKER',
'uploader_url': 'https://twitter.com/s2FAKER', 'uploader_url': 'https://twitter.com/s2FAKER',
@ -985,6 +1009,7 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int, 'comment_count': int,
'_old_archive_ids': ['twitter 1621117700482416640'], '_old_archive_ids': ['twitter 1621117700482416640'],
}, },
'skip': 'Requires authentication',
}, { }, {
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2', 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
'info_dict': { 'info_dict': {
@ -992,6 +1017,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1599108751385972737', 'display_id': '1599108751385972737',
'ext': 'mp4', 'ext': 'mp4',
'title': '\u06ea - \U0001F48B', 'title': '\u06ea - \U0001F48B',
'channel_id': '1347791436809441283',
'uploader_url': 'https://twitter.com/hlo_again', 'uploader_url': 'https://twitter.com/hlo_again',
'like_count': int, 'like_count': int,
'uploader_id': 'hlo_again', 'uploader_id': 'hlo_again',
@ -1014,6 +1040,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1600009362759733248', 'id': '1600009362759733248',
'display_id': '1600009574919962625', 'display_id': '1600009574919962625',
'ext': 'mp4', 'ext': 'mp4',
'channel_id': '211814412',
'uploader_url': 'https://twitter.com/MunTheShinobi', 'uploader_url': 'https://twitter.com/MunTheShinobi',
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml', 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig', 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
@ -1061,6 +1088,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1695424220702888009', 'display_id': '1695424220702888009',
'title': 'md5:e8daa9527bc2b947121395494f786d9d', 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
'description': 'md5:004f2d37fd58737724ec75bc7e679938', 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
'channel_id': '15212187',
'uploader': 'Benny Johnson', 'uploader': 'Benny Johnson',
'uploader_id': 'bennyjohnson', 'uploader_id': 'bennyjohnson',
'uploader_url': 'https://twitter.com/bennyjohnson', 'uploader_url': 'https://twitter.com/bennyjohnson',
@ -1084,6 +1112,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1695424220702888009', 'display_id': '1695424220702888009',
'title': 'md5:e8daa9527bc2b947121395494f786d9d', 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
'description': 'md5:004f2d37fd58737724ec75bc7e679938', 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
'channel_id': '15212187',
'uploader': 'Benny Johnson', 'uploader': 'Benny Johnson',
'uploader_id': 'bennyjohnson', 'uploader_id': 'bennyjohnson',
'uploader_url': 'https://twitter.com/bennyjohnson', 'uploader_url': 'https://twitter.com/bennyjohnson',
@ -1117,7 +1146,7 @@ class TwitterIE(TwitterBaseIE):
}, },
'add_ie': ['TwitterBroadcast'], 'add_ie': ['TwitterBroadcast'],
}, { }, {
# Animated gif and quote tweet video, with syndication API # Animated gif and quote tweet video
'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950', 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
'playlist_mincount': 2, 'playlist_mincount': 2,
'info_dict': { 'info_dict': {
@ -1125,6 +1154,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'BAKOON - https://t.co/zom968d0a0', 'title': 'BAKOON - https://t.co/zom968d0a0',
'description': 'https://t.co/zom968d0a0', 'description': 'https://t.co/zom968d0a0',
'tags': [], 'tags': [],
'channel_id': '1263540390',
'uploader': 'BAKOON', 'uploader': 'BAKOON',
'uploader_id': 'BAKKOOONN', 'uploader_id': 'BAKKOOONN',
'uploader_url': 'https://twitter.com/BAKKOOONN', 'uploader_url': 'https://twitter.com/BAKKOOONN',
@ -1132,19 +1162,21 @@ class TwitterIE(TwitterBaseIE):
'timestamp': 1693254077.0, 'timestamp': 1693254077.0,
'upload_date': '20230828', 'upload_date': '20230828',
'like_count': int, 'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}}, 'skip': 'Requires authentication',
'expected_warnings': ['Not all metadata'],
}, { }, {
# "stale tweet" with typename "TweetWithVisibilityResults" # "stale tweet" with typename "TweetWithVisibilityResults"
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154', 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
'md5': '62b1e11cdc2cdd0e527f83adb081f536', 'md5': '511377ff8dfa7545307084dca4dce319',
'info_dict': { 'info_dict': {
'id': '1724883339285544960', 'id': '1724883339285544960',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:cc56716f9ed0b368de2ba54c478e493c', 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164', 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
'display_id': '1724884212803834154', 'display_id': '1724884212803834154',
'channel_id': '337808606',
'uploader': 'Robert F. Kennedy Jr', 'uploader': 'Robert F. Kennedy Jr',
'uploader_id': 'RobertKennedyJr', 'uploader_id': 'RobertKennedyJr',
'uploader_url': 'https://twitter.com/RobertKennedyJr', 'uploader_url': 'https://twitter.com/RobertKennedyJr',
@ -1386,6 +1418,7 @@ class TwitterIE(TwitterBaseIE):
'description': description, 'description': description,
'uploader': uploader, 'uploader': uploader,
'timestamp': unified_timestamp(status.get('created_at')), 'timestamp': unified_timestamp(status.get('created_at')),
'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'), 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
'like_count': int_or_none(status.get('favorite_count')), 'like_count': int_or_none(status.get('favorite_count')),

View File

@ -10,7 +10,8 @@ from ..utils import (
class UtreonIE(InfoExtractor): class UtreonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?utreon\.com/v/(?P<id>[\w-]+)' IE_NAME = 'playeur'
_VALID_URL = r'https?://(?:www\.)?(?:utreon|playeur)\.com/v/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://utreon.com/v/z_I7ikQbuDw', 'url': 'https://utreon.com/v/z_I7ikQbuDw',
'info_dict': { 'info_dict': {
@ -19,8 +20,9 @@ class UtreonIE(InfoExtractor):
'title': 'Freedom Friday meditation - Rising in the wind', 'title': 'Freedom Friday meditation - Rising in the wind',
'description': 'md5:a9bf15a42434a062fe313b938343ad1b', 'description': 'md5:a9bf15a42434a062fe313b938343ad1b',
'uploader': 'Heather Dawn Elemental Health', 'uploader': 'Heather Dawn Elemental Health',
'thumbnail': 'https://data-1.utreon.com/v/MG/M2/NT/z_I7ikQbuDw/z_I7ikQbuDw_preview.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210723', 'release_date': '20210723',
'duration': 586,
} }
}, { }, {
'url': 'https://utreon.com/v/jerJw5EOOVU', 'url': 'https://utreon.com/v/jerJw5EOOVU',
@ -28,10 +30,11 @@ class UtreonIE(InfoExtractor):
'id': 'jerJw5EOOVU', 'id': 'jerJw5EOOVU',
'ext': 'mp4', 'ext': 'mp4',
'title': 'When I\'m alone, I love to reflect in peace, to make my dreams come true... [Quotes and Poems]', 'title': 'When I\'m alone, I love to reflect in peace, to make my dreams come true... [Quotes and Poems]',
'description': 'md5:61ee6c2da98be51b04b969ca80273aaa', 'description': 'md5:4026aa3a2c10169c3649926ac8ef62b6',
'uploader': 'Frases e Poemas Quotes and Poems', 'uploader': 'Frases e Poemas Quotes and Poems',
'thumbnail': 'https://data-1.utreon.com/v/Mz/Zh/ND/jerJw5EOOVU/jerJw5EOOVU_89af85470a4b16eededde7f8674c96d9_cover.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210723', 'release_date': '20210723',
'duration': 60,
} }
}, { }, {
'url': 'https://utreon.com/v/C4ZxXhYBBmE', 'url': 'https://utreon.com/v/C4ZxXhYBBmE',
@ -39,10 +42,11 @@ class UtreonIE(InfoExtractor):
'id': 'C4ZxXhYBBmE', 'id': 'C4ZxXhYBBmE',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Bidens Capital Gains Tax Rate to Test Worlds Highest', 'title': 'Bidens Capital Gains Tax Rate to Test Worlds Highest',
'description': 'md5:fb5a6c2e506f013cc76f133f673bc5c8', 'description': 'md5:995aa9ad0733c0e5863ebdeff954f40e',
'uploader': 'Nomad Capitalist', 'uploader': 'Nomad Capitalist',
'thumbnail': 'https://data-1.utreon.com/v/ZD/k1/Mj/C4ZxXhYBBmE/C4ZxXhYBBmE_628342076198c9c06dd6b2c665978584_cover.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210723', 'release_date': '20210723',
'duration': 884,
} }
}, { }, {
'url': 'https://utreon.com/v/Y-stEH-FBm8', 'url': 'https://utreon.com/v/Y-stEH-FBm8',
@ -52,15 +56,28 @@ class UtreonIE(InfoExtractor):
'title': 'Creeper-Chan Pranks Steve! 💚 [MINECRAFT ANIME]', 'title': 'Creeper-Chan Pranks Steve! 💚 [MINECRAFT ANIME]',
'description': 'md5:7a48450b0d761b96dec194be0c5ecb5f', 'description': 'md5:7a48450b0d761b96dec194be0c5ecb5f',
'uploader': 'Merryweather Comics', 'uploader': 'Merryweather Comics',
'thumbnail': 'https://data-1.utreon.com/v/MT/E4/Zj/Y-stEH-FBm8/Y-stEH-FBm8_5290676a41a4a1096db133b09f54f77b_cover.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210718', 'release_date': '20210718',
}}, 'duration': 151,
] }
}, {
'url': 'https://playeur.com/v/Wzqp-UrxSeu',
'info_dict': {
'id': 'Wzqp-UrxSeu',
'ext': 'mp4',
'title': 'Update: Clockwork Basilisk Books on the Way!',
'description': 'md5:d9756b0b1884c904655b0e170d17cea5',
'uploader': 'Forgotten Weapons',
'release_date': '20240208',
'thumbnail': r're:^https?://.+\.jpg',
'duration': 262,
}
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
json_data = self._download_json( json_data = self._download_json(
'https://api.utreon.com/v1/videos/' + video_id, 'https://api.playeur.com/v1/videos/' + video_id,
video_id) video_id)
videos_json = json_data['videos'] videos_json = json_data['videos']
formats = [{ formats = [{

View File

@ -2068,7 +2068,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'Voyeur Girl', 'title': 'Voyeur Girl',
'description': 'md5:7ae382a65843d6df2685993e90a8628f', 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
'upload_date': '20190312', 'upload_date': '20190312',
'artist': 'Stephen', 'artists': ['Stephen'],
'creators': ['Stephen'],
'track': 'Voyeur Girl', 'track': 'Voyeur Girl',
'album': 'it\'s too much love to know my dear', 'album': 'it\'s too much love to know my dear',
'release_date': '20190313', 'release_date': '20190313',
@ -2081,7 +2082,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel': 'Stephen', # TODO: should be "Stephen - Topic" 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
'uploader': 'Stephen', 'uploader': 'Stephen',
'availability': 'public', 'availability': 'public',
'creator': 'Stephen',
'duration': 169, 'duration': 169,
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp', 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
'age_limit': 0, 'age_limit': 0,
@ -4386,7 +4386,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
release_year = release_date[:4] release_year = release_date[:4]
info.update({ info.update({
'album': mobj.group('album'.strip()), 'album': mobj.group('album'.strip()),
'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')), 'artists': ([a] if (a := mobj.group('clean_artist'))
else [a.strip() for a in mobj.group('artist').split('·')]),
'track': mobj.group('track').strip(), 'track': mobj.group('track').strip(),
'release_date': release_date, 'release_date': release_date,
'release_year': int_or_none(release_year), 'release_year': int_or_none(release_year),
@ -4532,7 +4533,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if mrr_title == 'Album': if mrr_title == 'Album':
info['album'] = mrr_contents_text info['album'] = mrr_contents_text
elif mrr_title == 'Artist': elif mrr_title == 'Artist':
info['artist'] = mrr_contents_text info['artists'] = [mrr_contents_text] if mrr_contents_text else None
elif mrr_title == 'Song': elif mrr_title == 'Song':
info['track'] = mrr_contents_text info['track'] = mrr_contents_text
owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges'))) owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
@ -4566,7 +4567,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if fmt.get('protocol') == 'm3u8_native': if fmt.get('protocol') == 'm3u8_native':
fmt['__needs_testing'] = True fmt['__needs_testing'] = True
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]: for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
v = info.get(s_k) v = info.get(s_k)
if v: if v:
info[d_k] = v info[d_k] = v

View File

@ -258,10 +258,10 @@ class RequestsRH(RequestHandler, InstanceStoreMixin):
# Forward urllib3 debug messages to our logger # Forward urllib3 debug messages to our logger
logger = logging.getLogger('urllib3') logger = logging.getLogger('urllib3')
handler = Urllib3LoggingHandler(logger=self._logger) self.__logging_handler = Urllib3LoggingHandler(logger=self._logger)
handler.setFormatter(logging.Formatter('requests: %(message)s')) self.__logging_handler.setFormatter(logging.Formatter('requests: %(message)s'))
handler.addFilter(Urllib3LoggingFilter()) self.__logging_handler.addFilter(Urllib3LoggingFilter())
logger.addHandler(handler) logger.addHandler(self.__logging_handler)
# TODO: Use a logger filter to suppress pool reuse warning instead # TODO: Use a logger filter to suppress pool reuse warning instead
logger.setLevel(logging.ERROR) logger.setLevel(logging.ERROR)
@ -276,6 +276,9 @@ class RequestsRH(RequestHandler, InstanceStoreMixin):
def close(self): def close(self):
self._clear_instances() self._clear_instances()
# Remove the logging handler that contains a reference to our logger
# See: https://github.com/yt-dlp/yt-dlp/issues/8922
logging.getLogger('urllib3').removeHandler(self.__logging_handler)
def _check_extensions(self, extensions): def _check_extensions(self, extensions):
super()._check_extensions(extensions) super()._check_extensions(extensions)

View File

@ -90,10 +90,12 @@ class WebsocketsRH(WebSocketRequestHandler):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.__logging_handlers = {}
for name in ('websockets.client', 'websockets.server'): for name in ('websockets.client', 'websockets.server'):
logger = logging.getLogger(name) logger = logging.getLogger(name)
handler = logging.StreamHandler(stream=sys.stdout) handler = logging.StreamHandler(stream=sys.stdout)
handler.setFormatter(logging.Formatter(f'{self.RH_NAME}: %(message)s')) handler.setFormatter(logging.Formatter(f'{self.RH_NAME}: %(message)s'))
self.__logging_handlers[name] = handler
logger.addHandler(handler) logger.addHandler(handler)
if self.verbose: if self.verbose:
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
@ -103,6 +105,12 @@ class WebsocketsRH(WebSocketRequestHandler):
extensions.pop('timeout', None) extensions.pop('timeout', None)
extensions.pop('cookiejar', None) extensions.pop('cookiejar', None)
def close(self):
# Remove the logging handler that contains a reference to our logger
# See: https://github.com/yt-dlp/yt-dlp/issues/8922
for name, handler in self.__logging_handlers.items():
logging.getLogger(name).removeHandler(handler)
def _send(self, request): def _send(self, request):
timeout = float(request.extensions.get('timeout') or self.timeout) timeout = float(request.extensions.get('timeout') or self.timeout)
headers = self._merge_headers(request.headers) headers = self._merge_headers(request.headers)

View File

@ -196,9 +196,12 @@ class _YoutubeDLOptionParser(optparse.OptionParser):
raise raise
return self.check_values(self.values, self.largs) return self.check_values(self.values, self.largs)
def error(self, msg): def _generate_error_message(self, msg):
msg = f'{self.get_prog_name()}: error: {str(msg).strip()}\n' msg = f'{self.get_prog_name()}: error: {str(msg).strip()}\n'
raise optparse.OptParseError(f'{self.get_usage()}\n{msg}' if self.usage else msg) return f'{self.get_usage()}\n{msg}' if self.usage else msg
def error(self, msg):
raise optparse.OptParseError(self._generate_error_message(msg))
def _get_args(self, args): def _get_args(self, args):
return sys.argv[1:] if args is None else list(args) return sys.argv[1:] if args is None else list(args)
@ -476,8 +479,8 @@ def create_parser():
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter'], '2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
'2023': ['prefer-legacy-http-handler', 'manifest-filesize-approx'], '2023': [],
} }
}, help=( }, help=(
'Options that can help keep compatibility with youtube-dl or youtube-dlc ' 'Options that can help keep compatibility with youtube-dl or youtube-dlc '

View File

@ -86,11 +86,14 @@ class PluginFinder(importlib.abc.MetaPathFinder):
parts = Path(*fullname.split('.')) parts = Path(*fullname.split('.'))
for path in orderedSet(candidate_locations, lazy=True): for path in orderedSet(candidate_locations, lazy=True):
candidate = path / parts candidate = path / parts
if candidate.is_dir(): try:
yield candidate if candidate.is_dir():
elif path.suffix in ('.zip', '.egg', '.whl') and path.is_file():
if parts in dirs_in_zip(path):
yield candidate yield candidate
elif path.suffix in ('.zip', '.egg', '.whl') and path.is_file():
if parts in dirs_in_zip(path):
yield candidate
except PermissionError as e:
write_string(f'Permission error while accessing modules in "{e.filename}"\n')
def find_spec(self, fullname, path=None, target=None): def find_spec(self, fullname, path=None, target=None):
if fullname not in self.packages: if fullname not in self.packages:

View File

@ -738,9 +738,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
def add(meta_list, info_list=None): def add(meta_list, info_list=None):
value = next(( value = next((
str(info[key]) for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list)) info[key] for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list))
if info.get(key) is not None), None) if info.get(key) is not None), None)
if value not in ('', None): if value not in ('', None):
value = ', '.join(map(str, variadic(value)))
value = value.replace('\0', '') # nul character cannot be passed in command line value = value.replace('\0', '') # nul character cannot be passed in command line
metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)}) metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)})
@ -754,10 +755,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
add(('description', 'synopsis'), 'description') add(('description', 'synopsis'), 'description')
add(('purl', 'comment'), 'webpage_url') add(('purl', 'comment'), 'webpage_url')
add('track', 'track_number') add('track', 'track_number')
add('artist', ('artist', 'creator', 'uploader', 'uploader_id')) add('artist', ('artist', 'artists', 'creator', 'creators', 'uploader', 'uploader_id'))
add('genre') add('composer', ('composer', 'composers'))
add('genre', ('genre', 'genres'))
add('album') add('album')
add('album_artist') add('album_artist', ('album_artist', 'album_artists'))
add('disc', 'disc_number') add('disc', 'disc_number')
add('show', 'series') add('show', 'series')
add('season_number') add('season_number')