Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 58 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ chfft = "0.3.4" # Fast Fourier transform
byteorder = "1.3.4" # Used for reading and writing binary structures
crc32fast = "1.2.0" # Used for the CRC-32 checksum in the binary signature
base64 = "0.12.3"
reqwest = { version = "0.10.7", features = ["blocking", "json"] }
reqwest = { version = "0.10.7", features = ["blocking", "json", "gzip"] }
html-escape = "0.2.13"
unicode-normalization = "0.1.23"
rodio = "0.13.1" # For reading WAV/MP3/FLAG/OGG files, resampling and playing audio.
clap = "2.33.2" # For argument parsing
cpal = "=0.13.3" # For recording audio
Expand Down
12 changes: 12 additions & 0 deletions src/core/http_thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::core::thread_messages::*;

use crate::fingerprinting::signature_format::DecodedSignature;
use crate::fingerprinting::communication::{recognize_song_from_signature, obtain_raw_cover_image};
use crate::fingerprinting::lyrics::fetch_genius_lyrics;

fn try_recognize_song(signature: DecodedSignature) -> Result<SongRecognizedMessage, Box<dyn Error>> {
let json_object = recognize_song_from_signature(&signature)?;
Expand Down Expand Up @@ -99,6 +100,17 @@ pub fn http_thread(http_rx: mpsc::Receiver<HTTPMessage>, gui_tx: glib::Sender<GU

microphone_tx.send(MicrophoneMessage::ProcessingDone).unwrap();
}
HTTPMessage::FetchLyrics(info) => {
match fetch_genius_lyrics(&info) {
Ok(lyrics) => {
gui_tx.send(GUIMessage::LyricsRecognized(lyrics)).unwrap();
}
Err(_) => {
// Clear lyrics if not found or on error.
gui_tx.send(GUIMessage::LyricsRecognized(String::new())).unwrap();
}
}
}
}
}

Expand Down
3 changes: 3 additions & 0 deletions src/core/thread_messages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::fingerprinting::signature_format::DecodedSignature;
use crate::gui::preferences::Preferences;
#[cfg(feature = "gui")]
use crate::utils::csv_song_history::SongHistoryRecord;
use crate::fingerprinting::lyrics::LyricSearchInfo;

use std::thread;

Expand Down Expand Up @@ -59,6 +60,7 @@ pub enum GUIMessage {
MicrophoneRecording,
MicrophoneVolumePercent(f32),
SongRecognized(Box<SongRecognizedMessage>),
LyricsRecognized(String),
}

pub enum MicrophoneMessage {
Expand All @@ -74,4 +76,5 @@ pub enum ProcessingMessage {

pub enum HTTPMessage {
RecognizeSignature(Box<DecodedSignature>),
FetchLyrics(LyricSearchInfo),
}
7 changes: 3 additions & 4 deletions src/fingerprinting/communication.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@ use reqwest::header::HeaderMap;
use std::time::SystemTime;
use std::error::Error;
use std::time::Duration;
use rand::seq::SliceRandom;
use uuid::Uuid;

use crate::fingerprinting::signature_format::DecodedSignature;
use crate::fingerprinting::user_agent::USER_AGENTS;
use crate::fingerprinting::user_agent;

pub fn recognize_song_from_signature(signature: &DecodedSignature) -> Result<Value, Box<dyn Error>> {

Expand Down Expand Up @@ -35,7 +34,7 @@ pub fn recognize_song_from_signature(signature: &DecodedSignature) -> Result<Val

let mut headers = HeaderMap::new();

headers.insert("User-Agent", USER_AGENTS.choose(&mut rand::thread_rng()).unwrap().parse()?);
headers.insert("User-Agent", user_agent::random().parse()?);
headers.insert("Content-Language", "en_US".parse()?);

let client = reqwest::blocking::Client::new();
Expand All @@ -62,7 +61,7 @@ pub fn obtain_raw_cover_image(url: &str) -> Result<Vec<u8>, Box<dyn Error>> {

let mut headers = HeaderMap::new();

headers.insert("User-Agent", USER_AGENTS.choose(&mut rand::thread_rng()).unwrap().parse()?);
headers.insert("User-Agent", user_agent::random().parse()?);
headers.insert("Content-Language", "en_US".parse()?);

let client = reqwest::blocking::Client::new();
Expand Down
140 changes: 140 additions & 0 deletions src/fingerprinting/lyrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
use crate::fingerprinting::user_agent;
use regex::Regex;
use reqwest::header::HeaderMap;
use reqwest::StatusCode;
use std::error::Error;
use std::sync::OnceLock;
use std::time::Duration;
use unicode_normalization::UnicodeNormalization;

pub struct LyricSearchInfo {
pub artist_name: String,
pub song_name: String,
}

pub fn fetch_genius_lyrics(info: &LyricSearchInfo) -> Result<String, Box<dyn Error>> {
static RE_PAREN: OnceLock<Regex> = OnceLock::new();
static RE_FEAT_PAREN: OnceLock<Regex> = OnceLock::new();
static RE_FEAT_BRACK: OnceLock<Regex> = OnceLock::new();
static RE_TAG_START: OnceLock<Regex> = OnceLock::new();
static RE_TAG_END: OnceLock<Regex> = OnceLock::new();

let re_paren = RE_PAREN.get_or_init(|| Regex::new(r#"\(.*?\)"#).unwrap());
let re_feat_paren = RE_FEAT_PAREN.get_or_init(|| Regex::new(r#"\(.*?(?:feat\.|ft\.).*?\)"#).unwrap());
let re_feat_brack = RE_FEAT_BRACK.get_or_init(|| Regex::new(r#"\[.*?(?:feat\.|ft\.).*?\]"#).unwrap());
let re_tag_start = RE_TAG_START.get_or_init(|| Regex::new(r#"<.+?>"#).unwrap());
let re_tag_end = RE_TAG_END.get_or_init(|| Regex::new(r#"<.+?/>"#).unwrap());

// Remove parens/brackets with feat. or ft. in them e.g. Song Title (feat. XXX).
let song = re_feat_paren.replace_all(&info.song_name, "");
let song = re_feat_brack.replace_all(&song, "");

let url = make_url(&format!("{}-{}", info.artist_name, song));

let html = match fetch_lyrics_html(&url)? {
Some(lyrics) => Some(lyrics),
None => {
// Try one more time.
if let Some(index) = info.artist_name.find(|c| c == ',' || c == '&') {
// If the artist name contains a comma or a & what comes after is probably another
// artist name so we remove that as genius doesn't put featuring artists in the url.
let artist_name = &info.artist_name[..index];
let url = make_url(&format!("{}-{}", artist_name, song));
fetch_lyrics_html(&url)?
} else if song.contains('(') {
// Removing all parenthesis from the song title sometimes works.
let song = re_paren.replace_all(&song, "");
let url = make_url(&format!("{}-{}", info.artist_name, song));
fetch_lyrics_html(&url)?
} else {
None
}
}
}
.ok_or("lyrics not found")?;

// Reduce the amount of text we need to look at to find the lyrics. Lyrics are in between
// the <div id="lyrics-root> and <div class="LyricsFooter"> tags.
let root = &html[html
.find("id=\"lyrics-root\"")
.ok_or("lyrics-root not found")?
..html
.find("class=\"LyricsFooter")
.ok_or("LyricsFooter not found")?];

let mut lyrics = String::new();

for container in root.split("data-lyrics-container=\"true\"").skip(1) {
let container = container.trim().replace("<br/>", "\n");

for line in container.lines() {
// Remove all opening and closing HTML tags.
let replaced = re_tag_start.replace_all(line, "").to_string();
let replaced = re_tag_end.replace_all(&replaced, "").to_string();
// Clean up some remaining garbage.
let replaced = replaced.replace("<div", "");
let replaced = replaced.split("\">").last().unwrap();

// Exclude annotation lines.
if replaced.get(0..1) != Some("[") {
lyrics.push_str(&html_escape::decode_html_entities(&replaced));
lyrics.push('\n');
}
}
}
Ok(lyrics.trim().to_string())
}

fn fetch_lyrics_html(url: &str) -> Result<Option<String>, Box<dyn Error>> {
let mut headers = HeaderMap::new();
headers.insert("User-Agent", user_agent::random().parse()?);
headers.insert("Content-Language", "en_US".parse()?);

let client = reqwest::blocking::Client::new();
let response = client
.get(url)
.timeout(Duration::from_secs(20))
.headers(headers)
.send()?;

if response.status() == StatusCode::NOT_FOUND {
Ok(None)
} else {
Ok(Some(response.text()?))
}
}

fn make_url(query: &str) -> String {
// Convert accents and umlauts etc. to plain ascii as otherwise the lyric lookup fails.
let query = query.nfd().filter(char::is_ascii).collect::<String>();

// Other replacements.
let query = query.replace('&', "and");
let query = query.replace('_', "-");

let lower = query.to_lowercase();
let mut chars = lower.chars();
let mut mangled = String::new();
let Some(first) = chars.next() else {
return mangled;
};
mangled.extend(first.to_uppercase());

let mut skip = false;
for char in chars {
if char.is_whitespace() || char == '-' {
if !skip {
mangled.push('-');
skip = true;
}
} else if char.is_ascii_alphanumeric() {
mangled.push(char);
skip = false;
}
}
let last = mangled.pop().unwrap();
if last != '-' {
mangled.push(last);
}
format!("https://genius.com/{mangled}-lyrics")
}
6 changes: 6 additions & 0 deletions src/fingerprinting/user_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

// From https://github.com/SaswatPadhi/FlashProfileDemo/blob/c1e3f05d09f6443568a606dc0a439d6ebb057ae1/tests/hetero/user_agents.json

use rand::prelude::SliceRandom;

pub const USER_AGENTS: [&'static str; 100] = [
"Dalvik/2.1.0 (Linux; U; Android 5.0.2; VS980 4G Build/LRX22G)",
"Dalvik/1.6.0 (Linux; U; Android 4.4.2; SM-T210 Build/KOT49H)",
Expand Down Expand Up @@ -104,3 +106,7 @@ pub const USER_AGENTS: [&'static str; 100] = [
"Dalvik/1.6.0 (Linux; U; Android 4.2.2; SM-T217S Build/JDQ39)",
"Dalvik/1.6.0 (Linux; U; Android 4.4.4; SAMSUNG-SM-N900A Build/KTU84P)"
];

pub fn random() -> &'static str {
USER_AGENTS.choose(&mut rand::thread_rng()).unwrap()
}
Loading