diff --git a/src/db.rs b/src/db.rs index 3aa4959..b9ff2b2 100644 --- a/src/db.rs +++ b/src/db.rs @@ -1,4 +1,5 @@ use crate::errors; +use crate::mystem; use crate::utils; use rusqlite::{named_params, params, Connection, Error, Result}; use std::time::SystemTime; @@ -334,7 +335,10 @@ async fn add_relation(word_id: i64, msg_id: i64, message: &Message) -> Result Result<(), errors::Error> { +pub(crate) async fn add_sentence( + message: &telegram_bot::Message, + mystem: &mut mystem::MyStem, +) -> Result<(), errors::Error> { let text = message.text().unwrap(); let conn = open()?; @@ -349,7 +353,7 @@ pub(crate) async fn add_sentence(message: &telegram_bot::Message) -> Result<(), }; // Save stemmed words - let words = utils::stemming(message).await?; + let words = mystem.stemming(text).await?; for word in words { match add_word(&word).await { Ok(id) => { diff --git a/src/errors.rs b/src/errors.rs index cff1ad0..3210b97 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -2,10 +2,11 @@ use reqwest::Error as reqwest_error; use rusqlite::Error as sqlite_error; use serde_json::Error as serde_error; use std::{fmt, io::Error as io_error}; +use subprocess::PopenError as popen_error; use telegram_bot::Error as tg_error; #[derive(Debug)] -pub(crate) enum Error { +pub enum Error { UserNotFound, SQLITE3Error(sqlite_error), TelegramError(tg_error), @@ -16,6 +17,7 @@ pub(crate) enum Error { IOError(io_error), FileNotFound, JsonParseError(serde_error), + PopenError(popen_error), } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -52,3 +54,9 @@ impl From for Error { return Error::JsonParseError(e); } } + +impl From for Error { + fn from(e: popen_error) -> Error { + return Error::PopenError(e); + } +} diff --git a/src/main.rs b/src/main.rs index bfc7d96..6d72bce 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,9 +9,16 @@ use env_logger::Env; mod commands; mod db; mod errors; +mod mystem; mod utils; +use mystem::MyStem; -async fn handler(api: Api, message: Message, token: String) -> Result<(), errors::Error> { +async fn handler( + api: Api, + message: Message, + token: String, + mystem: &mut MyStem, +) -> Result<(), errors::Error> { match message.kind { MessageKind::Text { ref data, .. } => { let title = utils::get_title(&message); @@ -23,7 +30,7 @@ async fn handler(api: Api, message: Message, token: String) -> Result<(), errors &message.from.first_name, data ); - db::add_sentence(&message).await?; + db::add_sentence(&message, mystem).await?; match data.as_str() { "/here" => commands::here(api, message).await?, "/top" => commands::top(api, message).await?, @@ -113,6 +120,7 @@ async fn handler(api: Api, message: Message, token: String) -> Result<(), errors #[tokio::main] async fn main() -> Result<(), errors::Error> { env_logger::from_env(Env::default().default_filter_or("info")).init(); + let mut mystem = MyStem::new()?; match db::update_scheme() { Ok(_) => {} Err(e) => panic!("Database error: {:?}", e), @@ -133,7 +141,7 @@ async fn main() -> Result<(), errors::Error> { if let UpdateKind::Message(message) = update.kind { db::add_user(message.clone()).await?; db::add_conf(message.clone()).await?; - handler(api.clone(), message, token.clone()).await?; + handler(api.clone(), message, token.clone(), &mut mystem).await?; } } Ok(()) diff --git a/src/mystem.rs b/src/mystem.rs new file mode 100644 index 0000000..9cfcce1 --- /dev/null +++ b/src/mystem.rs @@ -0,0 +1,75 @@ +use crate::errors; +use serde_json::Value; +use std::io::prelude::*; +use std::io::BufReader; +use std::io::{Error, Write}; +use subprocess::{Popen, PopenConfig, PopenError, Redirection}; + +pub struct MyStem { + pub process: Popen, +} + +impl MyStem { + pub fn new() -> Result { + Ok(Self { + process: MyStem::open_process()?, + }) + } + + fn open_process() -> Result { + Popen::create( + &["mystem", "-d", "--format", "json"], + PopenConfig { + stdout: Redirection::Pipe, + stdin: Redirection::Pipe, + ..Default::default() + }, + ) + } + + #[allow(dead_code)] + pub fn terminate(&mut self) -> Result<(), Error> { + self.process.terminate() + } + + #[allow(unused_must_use)] + pub async fn stemming(&mut self, text: String) -> Result, errors::Error> { + if let Some(exit_status) = self.process.poll() { + warn!( + "MyStem process exited with: {:?}. Restarting...", + exit_status + ); + self.process = MyStem::open_process()?; + } + let mut words: Vec = vec![]; + let clean_text = format!("{}{}", text.trim(), "\n"); + self.process + .stdin + .as_ref() + .unwrap() + .write(clean_text.as_bytes()); + let mut contents = String::new(); + let mut buf_reader = BufReader::new(self.process.stdout.as_ref().unwrap()); + buf_reader.read_line(&mut contents); + + match Some(contents) { + Some(contents) => { + let v: Vec = match serde_json::from_str(contents.as_str()) { + Ok(val) => val, + Err(_) => return Ok(vec![]), + }; + for i in v { + words.push(i["analysis"][0]["lex"].to_string().replace("\"", "")); + } + words.retain(|x| x != "null"); + debug!( + "Mystem PID: {}. Parsed words: {}.", + self.process.pid().unwrap(), + words.join(", ") + ); + Ok(words) + } + None => return Ok(vec![]), + } + } +} diff --git a/src/utils.rs b/src/utils.rs index bafdfee..0588904 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -8,8 +8,6 @@ use uuid::Uuid; use crate::db; use crate::errors; extern crate reqwest; -use serde_json::Value; -use subprocess::Exec; pub(crate) fn get_title(message: &Message) -> String { match &message.chat { @@ -98,31 +96,3 @@ pub(crate) async fn get_files( }; Ok(file_count) } - -pub(crate) async fn stemming(message: &Message) -> Result, errors::Error> { - let mut words: Vec = vec![]; - let proc = Exec::shell("mystem -d --format json -l"); - match proc - .stdin(message.text().unwrap().as_str()) - .communicate() - .unwrap() - .read_string() - .unwrap() - .0 - { - Some(line) => { - let v: Vec = match serde_json::from_str(line.as_str()) { - Ok(val) => val, - Err(_) => return Ok(vec![]), - }; - for i in v { - words.push(i["analysis"][0]["lex"].to_string().replace("\"", "")); - } - words.retain(|x| x != "null"); - //debug!("Parsed words: {}.", words.join(", ")); - } - None => return Ok(vec![]), - }; - - Ok(words) -}