diff --git a/src/main.rs b/src/main.rs index ad74708..c2b69d2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,6 +9,7 @@ extern crate env_logger; use stc::searchers::{Searcher, AggregateSearcher}; use stc::searchers::mtg::{MtgCard, MtgSearcher}; use stc::searchers::yugioh::{YugiohCard, YugiohSearcher}; +use stc::searchers::mediawiki::{WikiPage, MediawikiSearcher}; fn main () { env_logger::init().unwrap(); @@ -17,6 +18,7 @@ fn main () { let mut searchers = AggregateSearcher::new(); searchers.add_searcher("mtg", Box::new(MtgSearcher::new())); searchers.add_searcher("ygo", Box::new(YugiohSearcher::new())); + searchers.add_searcher("wp", Box::new(MediawikiSearcher::new(String::from("https://en.wikipedia.org/wiki/")))); match searchers.exact_search(&term) { Some(item) => { if let Some(card) = item.as_any().downcast_ref::() { diff --git a/src/searchers/mediawiki.rs b/src/searchers/mediawiki.rs new file mode 100644 index 0000000..be950d8 --- /dev/null +++ b/src/searchers/mediawiki.rs @@ -0,0 +1,120 @@ +use Link; +use searchers::Searcher; + +use hyper; +use hyper::Client; +use hyper::status::StatusCode; + +use select::document::Document; +use select::predicate::{Name, Class, Attr}; + +use retry; +use retry::retry; +use retry::delay::Fixed; + +use std; +use std::io::Read; + +use std::any::Any; + +const NUM_RETRIES: usize = 10; +const RETRY_WAIT_MILLIS: u64 = 500; + +#[derive(Debug)] +pub struct WikiPage { + name: String, + url: String +} + +impl Link for WikiPage { + fn label (&self) -> &str { + &self.name + } + + fn url (&self) -> &str { + &self.url + } + + fn as_any (&self) -> &Any { + self + } +} + +pub struct MediawikiSearcher { + client: Client, + baseurl: String +} + +impl MediawikiSearcher { + pub fn new (url: String) -> MediawikiSearcher { + MediawikiSearcher { + client: Client::new(), + baseurl: url + } + } + + fn do_search (&self, name: &str) -> Result { + let mut contents = String::new(); + let api_url = &format!("{}{}", self.baseurl, name); + let mut response = retry(Fixed::from_millis(RETRY_WAIT_MILLIS).take(NUM_RETRIES), || { + self.client.get(api_url).send() + })?; + + match response.status { + StatusCode::Ok => { + response.read_to_string(&mut contents)?; + Result::Ok(contents) + }, + _ => { + Result::Err(Error::Other(String::from("Not Found"))) + } + } + } + + fn parse_entry (&self, page: String) -> Result { + let document = Document::from(&page[..]); + let page_name = String::from(document.find(Name("h1")).iter().next().expect("expected h1").text()); + let page_url = String::from(document.find(Attr("rel", "canonical")).iter().next().expect("expected link rel='canonical'") + .attr("href").expect("expected href attribute")); + + Result::Ok(WikiPage { + name: page_name, + url: page_url + }) + } +} + +impl Searcher for MediawikiSearcher { + fn exact_search (&self, name: &str) -> Option { + self.do_search(name).and_then(|content| self.parse_entry(content)).ok() + } +} + +#[derive(Debug)] +enum Error { + Http(hyper::error::Error), + Io(std::io::Error), + Other(String) +} + +impl From for Error { + fn from (error: hyper::error::Error) -> Error { + Error::Http(error) + } +} + +impl From for Error { + fn from (error: std::io::Error) -> Error { + Error::Io(error) + } +} + +impl From> for Error { + fn from (err: retry::Error) -> Error { + match err { + retry::Error::Operation { error, total_delay, tries } => { + Error::Http(error) + } + } + } +} diff --git a/src/searchers/mod.rs b/src/searchers/mod.rs index 37c16cb..b2cd5aa 100644 --- a/src/searchers/mod.rs +++ b/src/searchers/mod.rs @@ -1,5 +1,6 @@ pub mod mtg; pub mod yugioh; +pub mod mediawiki; use Link; use std::collections::BTreeMap;