Initial implementation of mediawiki searcher
This commit is contained in:
parent
04195ca697
commit
542c76b175
@ -9,6 +9,7 @@ extern crate env_logger;
|
|||||||
use stc::searchers::{Searcher, AggregateSearcher};
|
use stc::searchers::{Searcher, AggregateSearcher};
|
||||||
use stc::searchers::mtg::{MtgCard, MtgSearcher};
|
use stc::searchers::mtg::{MtgCard, MtgSearcher};
|
||||||
use stc::searchers::yugioh::{YugiohCard, YugiohSearcher};
|
use stc::searchers::yugioh::{YugiohCard, YugiohSearcher};
|
||||||
|
use stc::searchers::mediawiki::{WikiPage, MediawikiSearcher};
|
||||||
|
|
||||||
fn main () {
|
fn main () {
|
||||||
env_logger::init().unwrap();
|
env_logger::init().unwrap();
|
||||||
@ -17,6 +18,7 @@ fn main () {
|
|||||||
let mut searchers = AggregateSearcher::new();
|
let mut searchers = AggregateSearcher::new();
|
||||||
searchers.add_searcher("mtg", Box::new(MtgSearcher::new()));
|
searchers.add_searcher("mtg", Box::new(MtgSearcher::new()));
|
||||||
searchers.add_searcher("ygo", Box::new(YugiohSearcher::new()));
|
searchers.add_searcher("ygo", Box::new(YugiohSearcher::new()));
|
||||||
|
searchers.add_searcher("wp", Box::new(MediawikiSearcher::new(String::from("https://en.wikipedia.org/wiki/"))));
|
||||||
match searchers.exact_search(&term) {
|
match searchers.exact_search(&term) {
|
||||||
Some(item) => {
|
Some(item) => {
|
||||||
if let Some(card) = item.as_any().downcast_ref::<MtgCard>() {
|
if let Some(card) = item.as_any().downcast_ref::<MtgCard>() {
|
||||||
|
120
src/searchers/mediawiki.rs
Normal file
120
src/searchers/mediawiki.rs
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
use Link;
|
||||||
|
use searchers::Searcher;
|
||||||
|
|
||||||
|
use hyper;
|
||||||
|
use hyper::Client;
|
||||||
|
use hyper::status::StatusCode;
|
||||||
|
|
||||||
|
use select::document::Document;
|
||||||
|
use select::predicate::{Name, Class, Attr};
|
||||||
|
|
||||||
|
use retry;
|
||||||
|
use retry::retry;
|
||||||
|
use retry::delay::Fixed;
|
||||||
|
|
||||||
|
use std;
|
||||||
|
use std::io::Read;
|
||||||
|
|
||||||
|
use std::any::Any;
|
||||||
|
|
||||||
|
const NUM_RETRIES: usize = 10;
|
||||||
|
const RETRY_WAIT_MILLIS: u64 = 500;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct WikiPage {
|
||||||
|
name: String,
|
||||||
|
url: String
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Link for WikiPage {
|
||||||
|
fn label (&self) -> &str {
|
||||||
|
&self.name
|
||||||
|
}
|
||||||
|
|
||||||
|
fn url (&self) -> &str {
|
||||||
|
&self.url
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_any (&self) -> &Any {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct MediawikiSearcher {
|
||||||
|
client: Client,
|
||||||
|
baseurl: String
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MediawikiSearcher {
|
||||||
|
pub fn new (url: String) -> MediawikiSearcher {
|
||||||
|
MediawikiSearcher {
|
||||||
|
client: Client::new(),
|
||||||
|
baseurl: url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn do_search (&self, name: &str) -> Result<String, Error> {
|
||||||
|
let mut contents = String::new();
|
||||||
|
let api_url = &format!("{}{}", self.baseurl, name);
|
||||||
|
let mut response = retry(Fixed::from_millis(RETRY_WAIT_MILLIS).take(NUM_RETRIES), || {
|
||||||
|
self.client.get(api_url).send()
|
||||||
|
})?;
|
||||||
|
|
||||||
|
match response.status {
|
||||||
|
StatusCode::Ok => {
|
||||||
|
response.read_to_string(&mut contents)?;
|
||||||
|
Result::Ok(contents)
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
Result::Err(Error::Other(String::from("Not Found")))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_entry (&self, page: String) -> Result<WikiPage, Error> {
|
||||||
|
let document = Document::from(&page[..]);
|
||||||
|
let page_name = String::from(document.find(Name("h1")).iter().next().expect("expected h1").text());
|
||||||
|
let page_url = String::from(document.find(Attr("rel", "canonical")).iter().next().expect("expected link rel='canonical'")
|
||||||
|
.attr("href").expect("expected href attribute"));
|
||||||
|
|
||||||
|
Result::Ok(WikiPage {
|
||||||
|
name: page_name,
|
||||||
|
url: page_url
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Searcher<WikiPage> for MediawikiSearcher {
|
||||||
|
fn exact_search (&self, name: &str) -> Option<WikiPage> {
|
||||||
|
self.do_search(name).and_then(|content| self.parse_entry(content)).ok()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum Error {
|
||||||
|
Http(hyper::error::Error),
|
||||||
|
Io(std::io::Error),
|
||||||
|
Other(String)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<hyper::error::Error> for Error {
|
||||||
|
fn from (error: hyper::error::Error) -> Error {
|
||||||
|
Error::Http(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<std::io::Error> for Error {
|
||||||
|
fn from (error: std::io::Error) -> Error {
|
||||||
|
Error::Io(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<retry::Error<hyper::Error>> for Error {
|
||||||
|
fn from (err: retry::Error<hyper::Error>) -> Error {
|
||||||
|
match err {
|
||||||
|
retry::Error::Operation { error, total_delay, tries } => {
|
||||||
|
Error::Http(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,5 +1,6 @@
|
|||||||
pub mod mtg;
|
pub mod mtg;
|
||||||
pub mod yugioh;
|
pub mod yugioh;
|
||||||
|
pub mod mediawiki;
|
||||||
|
|
||||||
use Link;
|
use Link;
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user