Mystem wrapper reworked.

This commit is contained in:
AB
2020-12-10 14:46:19 +03:00
parent 375015efff
commit cfe510029e
5 changed files with 101 additions and 36 deletions

View File

@ -1,4 +1,5 @@
use crate::errors; use crate::errors;
use crate::mystem;
use crate::utils; use crate::utils;
use rusqlite::{named_params, params, Connection, Error, Result}; use rusqlite::{named_params, params, Connection, Error, Result};
use std::time::SystemTime; use std::time::SystemTime;
@ -334,7 +335,10 @@ async fn add_relation(word_id: i64, msg_id: i64, message: &Message) -> Result<i6
Ok(rowid) Ok(rowid)
} }
pub(crate) async fn add_sentence(message: &telegram_bot::Message) -> Result<(), errors::Error> { pub(crate) async fn add_sentence(
message: &telegram_bot::Message,
mystem: &mut mystem::MyStem,
) -> Result<(), errors::Error> {
let text = message.text().unwrap(); let text = message.text().unwrap();
let conn = open()?; let conn = open()?;
@ -349,7 +353,7 @@ pub(crate) async fn add_sentence(message: &telegram_bot::Message) -> Result<(),
}; };
// Save stemmed words // Save stemmed words
let words = utils::stemming(message).await?; let words = mystem.stemming(text).await?;
for word in words { for word in words {
match add_word(&word).await { match add_word(&word).await {
Ok(id) => { Ok(id) => {

View File

@ -2,10 +2,11 @@ use reqwest::Error as reqwest_error;
use rusqlite::Error as sqlite_error; use rusqlite::Error as sqlite_error;
use serde_json::Error as serde_error; use serde_json::Error as serde_error;
use std::{fmt, io::Error as io_error}; use std::{fmt, io::Error as io_error};
use subprocess::PopenError as popen_error;
use telegram_bot::Error as tg_error; use telegram_bot::Error as tg_error;
#[derive(Debug)] #[derive(Debug)]
pub(crate) enum Error { pub enum Error {
UserNotFound, UserNotFound,
SQLITE3Error(sqlite_error), SQLITE3Error(sqlite_error),
TelegramError(tg_error), TelegramError(tg_error),
@ -16,6 +17,7 @@ pub(crate) enum Error {
IOError(io_error), IOError(io_error),
FileNotFound, FileNotFound,
JsonParseError(serde_error), JsonParseError(serde_error),
PopenError(popen_error),
} }
impl fmt::Display for Error { impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@ -52,3 +54,9 @@ impl From<serde_error> for Error {
return Error::JsonParseError(e); return Error::JsonParseError(e);
} }
} }
impl From<popen_error> for Error {
fn from(e: popen_error) -> Error {
return Error::PopenError(e);
}
}

View File

@ -9,9 +9,16 @@ use env_logger::Env;
mod commands; mod commands;
mod db; mod db;
mod errors; mod errors;
mod mystem;
mod utils; mod utils;
use mystem::MyStem;
async fn handler(api: Api, message: Message, token: String) -> Result<(), errors::Error> { async fn handler(
api: Api,
message: Message,
token: String,
mystem: &mut MyStem,
) -> Result<(), errors::Error> {
match message.kind { match message.kind {
MessageKind::Text { ref data, .. } => { MessageKind::Text { ref data, .. } => {
let title = utils::get_title(&message); let title = utils::get_title(&message);
@ -23,7 +30,7 @@ async fn handler(api: Api, message: Message, token: String) -> Result<(), errors
&message.from.first_name, &message.from.first_name,
data data
); );
db::add_sentence(&message).await?; db::add_sentence(&message, mystem).await?;
match data.as_str() { match data.as_str() {
"/here" => commands::here(api, message).await?, "/here" => commands::here(api, message).await?,
"/top" => commands::top(api, message).await?, "/top" => commands::top(api, message).await?,
@ -113,6 +120,7 @@ async fn handler(api: Api, message: Message, token: String) -> Result<(), errors
#[tokio::main] #[tokio::main]
async fn main() -> Result<(), errors::Error> { async fn main() -> Result<(), errors::Error> {
env_logger::from_env(Env::default().default_filter_or("info")).init(); env_logger::from_env(Env::default().default_filter_or("info")).init();
let mut mystem = MyStem::new()?;
match db::update_scheme() { match db::update_scheme() {
Ok(_) => {} Ok(_) => {}
Err(e) => panic!("Database error: {:?}", e), Err(e) => panic!("Database error: {:?}", e),
@ -133,7 +141,7 @@ async fn main() -> Result<(), errors::Error> {
if let UpdateKind::Message(message) = update.kind { if let UpdateKind::Message(message) = update.kind {
db::add_user(message.clone()).await?; db::add_user(message.clone()).await?;
db::add_conf(message.clone()).await?; db::add_conf(message.clone()).await?;
handler(api.clone(), message, token.clone()).await?; handler(api.clone(), message, token.clone(), &mut mystem).await?;
} }
} }
Ok(()) Ok(())

75
src/mystem.rs Normal file
View File

@ -0,0 +1,75 @@
use crate::errors;
use serde_json::Value;
use std::io::prelude::*;
use std::io::BufReader;
use std::io::{Error, Write};
use subprocess::{Popen, PopenConfig, PopenError, Redirection};
pub struct MyStem {
pub process: Popen,
}
impl MyStem {
pub fn new() -> Result<Self, PopenError> {
Ok(Self {
process: MyStem::open_process()?,
})
}
fn open_process() -> Result<Popen, PopenError> {
Popen::create(
&["mystem", "-d", "--format", "json"],
PopenConfig {
stdout: Redirection::Pipe,
stdin: Redirection::Pipe,
..Default::default()
},
)
}
#[allow(dead_code)]
pub fn terminate(&mut self) -> Result<(), Error> {
self.process.terminate()
}
#[allow(unused_must_use)]
pub async fn stemming(&mut self, text: String) -> Result<Vec<String>, errors::Error> {
if let Some(exit_status) = self.process.poll() {
warn!(
"MyStem process exited with: {:?}. Restarting...",
exit_status
);
self.process = MyStem::open_process()?;
}
let mut words: Vec<String> = vec![];
let clean_text = format!("{}{}", text.trim(), "\n");
self.process
.stdin
.as_ref()
.unwrap()
.write(clean_text.as_bytes());
let mut contents = String::new();
let mut buf_reader = BufReader::new(self.process.stdout.as_ref().unwrap());
buf_reader.read_line(&mut contents);
match Some(contents) {
Some(contents) => {
let v: Vec<Value> = match serde_json::from_str(contents.as_str()) {
Ok(val) => val,
Err(_) => return Ok(vec![]),
};
for i in v {
words.push(i["analysis"][0]["lex"].to_string().replace("\"", ""));
}
words.retain(|x| x != "null");
debug!(
"Mystem PID: {}. Parsed words: {}.",
self.process.pid().unwrap(),
words.join(", ")
);
Ok(words)
}
None => return Ok(vec![]),
}
}
}

View File

@ -8,8 +8,6 @@ use uuid::Uuid;
use crate::db; use crate::db;
use crate::errors; use crate::errors;
extern crate reqwest; extern crate reqwest;
use serde_json::Value;
use subprocess::Exec;
pub(crate) fn get_title(message: &Message) -> String { pub(crate) fn get_title(message: &Message) -> String {
match &message.chat { match &message.chat {
@ -98,31 +96,3 @@ pub(crate) async fn get_files(
}; };
Ok(file_count) Ok(file_count)
} }
pub(crate) async fn stemming(message: &Message) -> Result<Vec<String>, errors::Error> {
let mut words: Vec<String> = vec![];
let proc = Exec::shell("mystem -d --format json -l");
match proc
.stdin(message.text().unwrap().as_str())
.communicate()
.unwrap()
.read_string()
.unwrap()
.0
{
Some(line) => {
let v: Vec<Value> = match serde_json::from_str(line.as_str()) {
Ok(val) => val,
Err(_) => return Ok(vec![]),
};
for i in v {
words.push(i["analysis"][0]["lex"].to_string().replace("\"", ""));
}
words.retain(|x| x != "null");
//debug!("Parsed words: {}.", words.join(", "));
}
None => return Ok(vec![]),
};
Ok(words)
}