FredAnderson
 
Hi I cleaned the script a bit (much) now it returns all the links correctly.
to run it you need to install with pip
pip install -U beautifulsoup4
and need to create a file called config.toml in the current directory like
frm_login="mylogin"
frm_password="mypassword"
#!/usr/bin/env python3
from urllib.parse import urljoin
import logging
import os
import requests
import sys
import tomllib
from pprint import pprint, pformat
from bs4 import BeautifulSoup
# configure logging
logger = logging.getLogger(os.path.basename(__file__))
logging.basicConfig(level=logging.INFO)
# global variables
CONFIG_FILE = os.path.join(os.getcwd(), "config.toml")
DOMAIN_URL = "https://www.karaoke-version.com"
LOGIN_URL = f"{DOMAIN_URL}/my/login.html"
TARGET_PREFIX = "/custombackingtrack/"
def get_download_page_url(base_url, page):
    download_url = f"{base_url}/my/download.html?page={page:d}"
    return download_url
def get_number_of_pages(session, domain_url):
    # Use a trick if we put a huge number of pages we get the latest page
    logger.debug("Opening download page %d", 999)
    url = get_download_page_url(domain_url, 999)
    response = session.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    # Get all elements with the class 'mr-1'
    pagination_elements = soup.select("a.mr-1")
    last_page_number = 1
    # Check if there are pagination elements
    if pagination_elements:
        # Get the last element's href attribute
        last_page_link = pagination_elements[-1]["href"]
        # Extract the page number from the href
        last_page_number = int(last_page_link.split("page=")[-1])
    return last_page_number
def get_song_urls(session, domain_url, total_pages):
    all_song_urls = set() # Using a set to automatically handle duplicates
    for page_number in range(1, total_pages + 1):
        url = get_download_page_url(domain_url, page_number)
        response = session.get(url)
        soup = BeautifulSoup(response.content, "html.parser")
        # Find all <a> elements within <td class="my-downloaded-files__song">
        song_elements = soup.select("td.my-downloaded-files__song a")
        # Extract the href attributes from the <a> elements and construct full URLs
        song_urls = [urljoin(domain_url, element["href"]) for element in song_elements]
        all_song_urls.update(song_urls) # Update the set with new URLs
    # Sort the collected URLs
    sorted_song_urls = sorted(all_song_urls)
    return sorted_song_urls
def run_main():
    logger.info("Load config from %s", CONFIG_FILE)
    with open(CONFIG_FILE, "rb") as f:
        my_config = tomllib.load(f)
    with requests.Session() as session:
        # Login
        logger.info("Open loging page %s", LOGIN_URL)
        post = session.post(LOGIN_URL, data=my_config)
        # Check if login was successful
        assert post.status_code == 200
        logger.info("Login successful. Getting number of pages")
        total_pages = get_number_of_pages(session, DOMAIN_URL)
        logger.info("Total number of page is %d", total_pages)
        sorted_urls = get_song_urls(session, DOMAIN_URL, total_pages)
        logger.info("Sorted Song URLs:")
        for url in sorted_urls:
            print(url)
if __name__ == "__main__":
    logger.debug("run main")
    sys.exit(run_main())
now it return an ordered list of all the correct links