Initial implementation of mediawiki searcher

This commit is contained in:
Adrian Malacoda 2017-03-12 16:58:09 -05:00
parent 04195ca697
commit 542c76b175
3 changed files with 123 additions and 0 deletions

View File

@ -9,6 +9,7 @@ extern crate env_logger;
use stc::searchers::{Searcher, AggregateSearcher};
use stc::searchers::mtg::{MtgCard, MtgSearcher};
use stc::searchers::yugioh::{YugiohCard, YugiohSearcher};
use stc::searchers::mediawiki::{WikiPage, MediawikiSearcher};
fn main () {
env_logger::init().unwrap();
@ -17,6 +18,7 @@ fn main () {
let mut searchers = AggregateSearcher::new();
searchers.add_searcher("mtg", Box::new(MtgSearcher::new()));
searchers.add_searcher("ygo", Box::new(YugiohSearcher::new()));
searchers.add_searcher("wp", Box::new(MediawikiSearcher::new(String::from("https://en.wikipedia.org/wiki/"))));
match searchers.exact_search(&term) {
Some(item) => {
if let Some(card) = item.as_any().downcast_ref::<MtgCard>() {

120
src/searchers/mediawiki.rs Normal file
View File

@ -0,0 +1,120 @@
use Link;
use searchers::Searcher;
use hyper;
use hyper::Client;
use hyper::status::StatusCode;
use select::document::Document;
use select::predicate::{Name, Class, Attr};
use retry;
use retry::retry;
use retry::delay::Fixed;
use std;
use std::io::Read;
use std::any::Any;
const NUM_RETRIES: usize = 10;
const RETRY_WAIT_MILLIS: u64 = 500;
#[derive(Debug)]
pub struct WikiPage {
name: String,
url: String
}
impl Link for WikiPage {
fn label (&self) -> &str {
&self.name
}
fn url (&self) -> &str {
&self.url
}
fn as_any (&self) -> &Any {
self
}
}
pub struct MediawikiSearcher {
client: Client,
baseurl: String
}
impl MediawikiSearcher {
pub fn new (url: String) -> MediawikiSearcher {
MediawikiSearcher {
client: Client::new(),
baseurl: url
}
}
fn do_search (&self, name: &str) -> Result<String, Error> {
let mut contents = String::new();
let api_url = &format!("{}{}", self.baseurl, name);
let mut response = retry(Fixed::from_millis(RETRY_WAIT_MILLIS).take(NUM_RETRIES), || {
self.client.get(api_url).send()
})?;
match response.status {
StatusCode::Ok => {
response.read_to_string(&mut contents)?;
Result::Ok(contents)
},
_ => {
Result::Err(Error::Other(String::from("Not Found")))
}
}
}
fn parse_entry (&self, page: String) -> Result<WikiPage, Error> {
let document = Document::from(&page[..]);
let page_name = String::from(document.find(Name("h1")).iter().next().expect("expected h1").text());
let page_url = String::from(document.find(Attr("rel", "canonical")).iter().next().expect("expected link rel='canonical'")
.attr("href").expect("expected href attribute"));
Result::Ok(WikiPage {
name: page_name,
url: page_url
})
}
}
impl Searcher<WikiPage> for MediawikiSearcher {
fn exact_search (&self, name: &str) -> Option<WikiPage> {
self.do_search(name).and_then(|content| self.parse_entry(content)).ok()
}
}
#[derive(Debug)]
enum Error {
Http(hyper::error::Error),
Io(std::io::Error),
Other(String)
}
impl From<hyper::error::Error> for Error {
fn from (error: hyper::error::Error) -> Error {
Error::Http(error)
}
}
impl From<std::io::Error> for Error {
fn from (error: std::io::Error) -> Error {
Error::Io(error)
}
}
impl From<retry::Error<hyper::Error>> for Error {
fn from (err: retry::Error<hyper::Error>) -> Error {
match err {
retry::Error::Operation { error, total_delay, tries } => {
Error::Http(error)
}
}
}
}

View File

@ -1,5 +1,6 @@
pub mod mtg;
pub mod yugioh;
pub mod mediawiki;
use Link;
use std::collections::BTreeMap;