liuq
/
py_xiaozhi


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
							#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""音乐缓存扫描器 扫描cache/music目录中的音乐文件，提取元数据，生成本地歌单.

依赖安装: pip install mutagen
"""

import hashlib
import json
import sys
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional

try:
    from mutagen import File as MutagenFile
    from mutagen.id3 import ID3NoHeaderError
except ImportError:
    print("错误: 需要安装 mutagen 库")
    print("请运行: pip install mutagen")
    sys.exit(1)

# 项目根目录
PROJECT_ROOT = Path(__file__).parent.parent


class MusicMetadata:
    """
    音乐元数据类.
    """

    def __init__(self, file_path: Path):
        self.file_path = file_path
        self.filename = file_path.name
        self.file_id = file_path.stem  # 文件名去掉扩展名，即歌曲ID
        self.file_size = file_path.stat().st_size
        self.creation_time = datetime.fromtimestamp(file_path.stat().st_ctime)
        self.modification_time = datetime.fromtimestamp(file_path.stat().st_mtime)

        # 从文件提取的元数据
        self.title = None
        self.artist = None
        self.album = None
        self.genre = None
        self.year = None
        self.duration = None  # 秒数
        self.bitrate = None
        self.sample_rate = None

        # 文件哈希（用于去重）
        self.file_hash = self._calculate_hash()

    def _calculate_hash(self) -> str:
        """
        计算文件MD5哈希值（仅前1MB避免大文件计算过慢）
        """
        try:
            hash_md5 = hashlib.md5()
            with open(self.file_path, "rb") as f:
                # 只读取前1MB计算哈希
                chunk = f.read(1024 * 1024)
                hash_md5.update(chunk)
            return hash_md5.hexdigest()[:16]  # 取前16位
        except Exception:
            return "unknown"

    def extract_metadata(self) -> bool:
        """
        提取音乐文件元数据.
        """
        try:
            audio_file = MutagenFile(self.file_path)
            if audio_file is None:
                return False

            # 基本信息
            if hasattr(audio_file, "info"):
                self.duration = getattr(audio_file.info, "length", None)
                self.bitrate = getattr(audio_file.info, "bitrate", None)
                self.sample_rate = getattr(audio_file.info, "sample_rate", None)

            # ID3标签信息
            tags = audio_file.tags if audio_file.tags else {}

            # 标题
            self.title = self._get_tag_value(tags, ["TIT2", "TITLE", "\xa9nam"])

            # 艺术家
            self.artist = self._get_tag_value(tags, ["TPE1", "ARTIST", "\xa9ART"])

            # 专辑
            self.album = self._get_tag_value(tags, ["TALB", "ALBUM", "\xa9alb"])

            # 流派
            self.genre = self._get_tag_value(tags, ["TCON", "GENRE", "\xa9gen"])

            # 年份
            year_raw = self._get_tag_value(tags, ["TDRC", "DATE", "YEAR", "\xa9day"])
            if year_raw:
                # 提取年份数字
                year_str = str(year_raw)
                if year_str.isdigit():
                    self.year = int(year_str)
                else:
                    # 尝试从日期字符串中提取年份
                    import re

                    year_match = re.search(r"(\d{4})", year_str)
                    if year_match:
                        self.year = int(year_match.group(1))

            return True

        except ID3NoHeaderError:
            # 没有ID3标签，不是错误
            return True
        except Exception as e:
            print(f"提取元数据失败 {self.filename}: {e}")
            return False

    def _get_tag_value(self, tags: dict, tag_names: List[str]) -> Optional[str]:
        """
        从多个可能的标签名中获取值.
        """
        for tag_name in tag_names:
            if tag_name in tags:
                value = tags[tag_name]
                if isinstance(value, list) and value:
                    return str(value[0])
                elif value:
                    return str(value)
        return None

    def format_duration(self) -> str:
        """
        格式化播放时长.
        """
        if self.duration is None:
            return "未知"

        minutes = int(self.duration) // 60
        seconds = int(self.duration) % 60
        return f"{minutes:02d}:{seconds:02d}"

    def format_file_size(self) -> str:
        """
        格式化文件大小.
        """
        size = self.file_size
        for unit in ["B", "KB", "MB", "GB"]:
            if size < 1024.0:
                return f"{size:.1f} {unit}"
            size /= 1024.0
        return f"{size:.1f} TB"

    def to_dict(self) -> Dict:
        """
        转换为字典格式.
        """
        return {
            "file_id": self.file_id,
            "filename": self.filename,
            "title": self.title,
            "artist": self.artist,
            "album": self.album,
            "genre": self.genre,
            "year": self.year,
            "duration": self.duration,
            "duration_formatted": self.format_duration(),
            "bitrate": self.bitrate,
            "sample_rate": self.sample_rate,
            "file_size": self.file_size,
            "file_size_formatted": self.format_file_size(),
            "file_hash": self.file_hash,
            "creation_time": self.creation_time.isoformat(),
            "modification_time": self.modification_time.isoformat(),
        }


class MusicCacheScanner:
    """
    音乐缓存扫描器.
    """

    def __init__(self, cache_dir: Path = None):
        self.cache_dir = cache_dir or PROJECT_ROOT / "cache" / "music"
        self.playlist: List[MusicMetadata] = []
        self.scan_stats = {
            "total_files": 0,
            "success_count": 0,
            "error_count": 0,
            "total_duration": 0,
            "total_size": 0,
        }

    def scan_cache(self) -> bool:
        """
        扫描缓存目录.
        """
        print(f"🎵 开始扫描音乐缓存目录: {self.cache_dir}")

        if not self.cache_dir.exists():
            print(f"❌ 缓存目录不存在: {self.cache_dir}")
            return False

        # 查找所有音乐文件
        music_files = []
        for pattern in ["*.mp3", "*.m4a", "*.flac", "*.wav", "*.ogg"]:
            music_files.extend(self.cache_dir.glob(pattern))

        if not music_files:
            print("📁 缓存目录中没有找到音乐文件")
            return False

        self.scan_stats["total_files"] = len(music_files)
        print(f"📊 找到 {len(music_files)} 个音乐文件")

        # 扫描每个文件
        for i, file_path in enumerate(music_files, 1):
            print(f"🔍 [{i}/{len(music_files)}] 扫描: {file_path.name}")

            try:
                metadata = MusicMetadata(file_path)

                if metadata.extract_metadata():
                    self.playlist.append(metadata)
                    self.scan_stats["success_count"] += 1

                    # 累计统计
                    if metadata.duration:
                        self.scan_stats["total_duration"] += metadata.duration
                    self.scan_stats["total_size"] += metadata.file_size

                    # 显示基本信息
                    display_title = metadata.title or "未知标题"
                    display_artist = metadata.artist or "未知艺术家"
                    print(
                        f"   ✅ {display_title} - {display_artist} ({metadata.format_duration()})"
                    )
                else:
                    self.scan_stats["error_count"] += 1
                    print("   ❌ 元数据提取失败")

            except Exception as e:
                self.scan_stats["error_count"] += 1
                print(f"   ❌ 处理失败: {e}")

        return True

    def remove_duplicates(self):
        """
        移除重复的音乐文件（基于哈希值）
        """
        seen_hashes = set()
        unique_playlist = []
        duplicates = []

        for metadata in self.playlist:
            if metadata.file_hash in seen_hashes:
                duplicates.append(metadata)
            else:
                seen_hashes.add(metadata.file_hash)
                unique_playlist.append(metadata)

        if duplicates:
            print(f"🔄 发现 {len(duplicates)} 个重复文件:")
            for dup in duplicates:
                print(f"   - {dup.filename}")

        self.playlist = unique_playlist

    def sort_playlist(self, sort_by: str = "artist"):
        """
        排序歌单.
        """
        sort_functions = {
            "artist": lambda x: (
                x.artist or "Unknown",
                x.album or "Unknown",
                x.title or "Unknown",
            ),
            "title": lambda x: x.title or "Unknown",
            "album": lambda x: (x.album or "Unknown", x.artist or "Unknown"),
            "duration": lambda x: x.duration or 0,
            "file_size": lambda x: x.file_size,
            "creation_time": lambda x: x.creation_time,
        }

        if sort_by in sort_functions:
            self.playlist.sort(key=sort_functions[sort_by])
            print(f"📋 歌单已按 {sort_by} 排序")

    def print_statistics(self):
        """
        打印扫描统计信息.
        """
        stats = self.scan_stats
        print("\n📊 扫描统计:")
        print(f"   总文件数: {stats['total_files']}")
        print(f"   成功处理: {stats['success_count']}")
        print(f"   处理失败: {stats['error_count']}")
        print(f"   成功率: {stats['success_count']/stats['total_files']*100:.1f}%")

        # 总时长
        total_hours = stats["total_duration"] // 3600
        total_minutes = (stats["total_duration"] % 3600) // 60
        print(f"   总播放时长: {total_hours}小时{total_minutes}分钟")

        # 总大小
        total_size_mb = stats["total_size"] / (1024 * 1024)
        print(f"   总文件大小: {total_size_mb:.1f} MB")

        # 平均信息
        if stats["success_count"] > 0:
            avg_duration = stats["total_duration"] / stats["success_count"]
            avg_size = stats["total_size"] / stats["success_count"]
            print(f"   平均时长: {int(avg_duration//60)}:{int(avg_duration%60):02d}")
            print(f"   平均大小: {avg_size/(1024*1024):.1f} MB")

    def print_playlist(self, limit: int = None):
        """
        打印歌单.
        """
        print(f"\n🎵 本地音乐歌单 (共 {len(self.playlist)} 首)")
        print("=" * 80)

        for i, metadata in enumerate(
            self.playlist[:limit] if limit else self.playlist, 1
        ):
            title = metadata.title or "未知标题"
            artist = metadata.artist or "未知艺术家"
            album = metadata.album or "未知专辑"
            duration = metadata.format_duration()

            print(f"{i:3d}. {title}")
            print(f"     艺术家: {artist}")
            print(f"     专辑: {album}")
            print(f"     时长: {duration} | 文件ID: {metadata.file_id}")
            print()

        if limit and len(self.playlist) > limit:
            print(f"... 还有 {len(self.playlist) - limit} 首歌曲")

    def export_playlist(self, output_file: Path = None, format: str = "json"):
        """
        导出歌单.
        """
        if not output_file:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_file = PROJECT_ROOT / f"local_playlist_{timestamp}.{format}"

        try:
            if format == "json":
                playlist_data = {
                    "metadata": {
                        "generated_at": datetime.now().isoformat(),
                        "cache_directory": str(self.cache_dir),
                        "total_songs": len(self.playlist),
                        "statistics": self.scan_stats,
                    },
                    "playlist": [metadata.to_dict() for metadata in self.playlist],
                }

                with open(output_file, "w", encoding="utf-8") as f:
                    json.dump(playlist_data, f, ensure_ascii=False, indent=2)

            elif format == "m3u":
                with open(output_file, "w", encoding="utf-8") as f:
                    f.write("#EXTM3U\n")
                    for metadata in self.playlist:
                        title = metadata.title or metadata.filename
                        artist = metadata.artist or "Unknown Artist"
                        duration = int(metadata.duration) if metadata.duration else -1

                        f.write(f"#EXTINF:{duration},{artist} - {title}\n")
                        f.write(f"{metadata.file_path}\n")

            print(f"📄 歌单已导出到: {output_file}")
            return output_file

        except Exception as e:
            print(f"❌ 导出失败: {e}")
            return None

    def search_songs(self, query: str) -> List[MusicMetadata]:
        """
        搜索歌曲.
        """
        query = query.lower()
        results = []

        for metadata in self.playlist:
            # 在标题、艺术家、专辑中搜索
            searchable_text = " ".join(
                filter(
                    None,
                    [
                        metadata.title,
                        metadata.artist,
                        metadata.album,
                        metadata.filename,
                    ],
                )
            ).lower()

            if query in searchable_text:
                results.append(metadata)

        return results

    def get_artists(self) -> Dict[str, List[MusicMetadata]]:
        """
        按艺术家分组.
        """
        artists = {}
        for metadata in self.playlist:
            artist = metadata.artist or "未知艺术家"
            if artist not in artists:
                artists[artist] = []
            artists[artist].append(metadata)
        return artists

    def get_albums(self) -> Dict[str, List[MusicMetadata]]:
        """
        按专辑分组.
        """
        albums = {}
        for metadata in self.playlist:
            album_key = (
                f"{metadata.album or '未知专辑'} - {metadata.artist or '未知艺术家'}"
            )
            if album_key not in albums:
                albums[album_key] = []
            albums[album_key].append(metadata)
        return albums


def main():
    """
    主函数.
    """
    print("🎵 音乐缓存扫描器")
    print("=" * 50)

    # 创建扫描器
    scanner = MusicCacheScanner()

    # 扫描缓存
    if not scanner.scan_cache():
        return

    # 移除重复文件
    scanner.remove_duplicates()

    # 排序歌单
    scanner.sort_playlist("artist")

    # 显示统计信息
    scanner.print_statistics()

    # 显示歌单（限制前20首）
    scanner.print_playlist(limit=20)

    # 交互菜单
    while True:
        print("\n" + "=" * 50)
        print("选择操作:")
        print("1. 显示完整歌单")
        print("2. 按艺术家分组显示")
        print("3. 按专辑分组显示")
        print("4. 搜索歌曲")
        print("5. 导出歌单 (JSON)")
        print("6. 导出歌单 (M3U)")
        print("7. 重新排序")
        print("0. 退出")

        choice = input("\n请选择 (0-7): ").strip()

        if choice == "0":
            break
        elif choice == "1":
            scanner.print_playlist()
        elif choice == "2":
            artists = scanner.get_artists()
            for artist, songs in artists.items():
                print(f"\n🎤 {artist} ({len(songs)} 首)")
                for song in songs:
                    title = song.title or song.filename
                    print(f"   - {title} ({song.format_duration()})")
        elif choice == "3":
            albums = scanner.get_albums()
            for album, songs in albums.items():
                print(f"\n💿 {album} ({len(songs)} 首)")
                for song in songs:
                    title = song.title or song.filename
                    print(f"   - {title} ({song.format_duration()})")
        elif choice == "4":
            query = input("请输入搜索关键词: ").strip()
            if query:
                results = scanner.search_songs(query)
                if results:
                    print(f"\n🔍 找到 {len(results)} 首歌曲:")
                    for i, song in enumerate(results, 1):
                        title = song.title or song.filename
                        artist = song.artist or "未知艺术家"
                        print(f"   {i}. {title} - {artist} ({song.format_duration()})")
                else:
                    print("🔍 没有找到匹配的歌曲")
        elif choice == "5":
            scanner.export_playlist(format="json")
        elif choice == "6":
            scanner.export_playlist(format="m3u")
        elif choice == "7":
            print("排序选项:")
            print("1. 按艺术家")
            print("2. 按标题")
            print("3. 按专辑")
            print("4. 按时长")
            print("5. 按文件大小")
            print("6. 按创建时间")

            sort_choice = input("请选择排序方式 (1-6): ").strip()
            sort_map = {
                "1": "artist",
                "2": "title",
                "3": "album",
                "4": "duration",
                "5": "file_size",
                "6": "creation_time",
            }

            if sort_choice in sort_map:
                scanner.sort_playlist(sort_map[sort_choice])
                print("✅ 排序完成")
        else:
            print("❌ 无效选择")

    print("\n👋 再见!")


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\n\n👋 用户中断，退出程序")
    except Exception as e:
        print(f"\n❌ 程序异常: {e}")
        import traceback

        traceback.print_exc()