mirror of
https://github.com/house-of-vanity/desubot.git
synced 2025-07-08 21:04:07 +00:00
Mystem wrapper reworked.
This commit is contained in:
@ -1,4 +1,5 @@
|
|||||||
use crate::errors;
|
use crate::errors;
|
||||||
|
use crate::mystem;
|
||||||
use crate::utils;
|
use crate::utils;
|
||||||
use rusqlite::{named_params, params, Connection, Error, Result};
|
use rusqlite::{named_params, params, Connection, Error, Result};
|
||||||
use std::time::SystemTime;
|
use std::time::SystemTime;
|
||||||
@ -334,7 +335,10 @@ async fn add_relation(word_id: i64, msg_id: i64, message: &Message) -> Result<i6
|
|||||||
Ok(rowid)
|
Ok(rowid)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn add_sentence(message: &telegram_bot::Message) -> Result<(), errors::Error> {
|
pub(crate) async fn add_sentence(
|
||||||
|
message: &telegram_bot::Message,
|
||||||
|
mystem: &mut mystem::MyStem,
|
||||||
|
) -> Result<(), errors::Error> {
|
||||||
let text = message.text().unwrap();
|
let text = message.text().unwrap();
|
||||||
let conn = open()?;
|
let conn = open()?;
|
||||||
|
|
||||||
@ -349,7 +353,7 @@ pub(crate) async fn add_sentence(message: &telegram_bot::Message) -> Result<(),
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Save stemmed words
|
// Save stemmed words
|
||||||
let words = utils::stemming(message).await?;
|
let words = mystem.stemming(text).await?;
|
||||||
for word in words {
|
for word in words {
|
||||||
match add_word(&word).await {
|
match add_word(&word).await {
|
||||||
Ok(id) => {
|
Ok(id) => {
|
||||||
|
@ -2,10 +2,11 @@ use reqwest::Error as reqwest_error;
|
|||||||
use rusqlite::Error as sqlite_error;
|
use rusqlite::Error as sqlite_error;
|
||||||
use serde_json::Error as serde_error;
|
use serde_json::Error as serde_error;
|
||||||
use std::{fmt, io::Error as io_error};
|
use std::{fmt, io::Error as io_error};
|
||||||
|
use subprocess::PopenError as popen_error;
|
||||||
use telegram_bot::Error as tg_error;
|
use telegram_bot::Error as tg_error;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(crate) enum Error {
|
pub enum Error {
|
||||||
UserNotFound,
|
UserNotFound,
|
||||||
SQLITE3Error(sqlite_error),
|
SQLITE3Error(sqlite_error),
|
||||||
TelegramError(tg_error),
|
TelegramError(tg_error),
|
||||||
@ -16,6 +17,7 @@ pub(crate) enum Error {
|
|||||||
IOError(io_error),
|
IOError(io_error),
|
||||||
FileNotFound,
|
FileNotFound,
|
||||||
JsonParseError(serde_error),
|
JsonParseError(serde_error),
|
||||||
|
PopenError(popen_error),
|
||||||
}
|
}
|
||||||
impl fmt::Display for Error {
|
impl fmt::Display for Error {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
@ -52,3 +54,9 @@ impl From<serde_error> for Error {
|
|||||||
return Error::JsonParseError(e);
|
return Error::JsonParseError(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<popen_error> for Error {
|
||||||
|
fn from(e: popen_error) -> Error {
|
||||||
|
return Error::PopenError(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
14
src/main.rs
14
src/main.rs
@ -9,9 +9,16 @@ use env_logger::Env;
|
|||||||
mod commands;
|
mod commands;
|
||||||
mod db;
|
mod db;
|
||||||
mod errors;
|
mod errors;
|
||||||
|
mod mystem;
|
||||||
mod utils;
|
mod utils;
|
||||||
|
use mystem::MyStem;
|
||||||
|
|
||||||
async fn handler(api: Api, message: Message, token: String) -> Result<(), errors::Error> {
|
async fn handler(
|
||||||
|
api: Api,
|
||||||
|
message: Message,
|
||||||
|
token: String,
|
||||||
|
mystem: &mut MyStem,
|
||||||
|
) -> Result<(), errors::Error> {
|
||||||
match message.kind {
|
match message.kind {
|
||||||
MessageKind::Text { ref data, .. } => {
|
MessageKind::Text { ref data, .. } => {
|
||||||
let title = utils::get_title(&message);
|
let title = utils::get_title(&message);
|
||||||
@ -23,7 +30,7 @@ async fn handler(api: Api, message: Message, token: String) -> Result<(), errors
|
|||||||
&message.from.first_name,
|
&message.from.first_name,
|
||||||
data
|
data
|
||||||
);
|
);
|
||||||
db::add_sentence(&message).await?;
|
db::add_sentence(&message, mystem).await?;
|
||||||
match data.as_str() {
|
match data.as_str() {
|
||||||
"/here" => commands::here(api, message).await?,
|
"/here" => commands::here(api, message).await?,
|
||||||
"/top" => commands::top(api, message).await?,
|
"/top" => commands::top(api, message).await?,
|
||||||
@ -113,6 +120,7 @@ async fn handler(api: Api, message: Message, token: String) -> Result<(), errors
|
|||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<(), errors::Error> {
|
async fn main() -> Result<(), errors::Error> {
|
||||||
env_logger::from_env(Env::default().default_filter_or("info")).init();
|
env_logger::from_env(Env::default().default_filter_or("info")).init();
|
||||||
|
let mut mystem = MyStem::new()?;
|
||||||
match db::update_scheme() {
|
match db::update_scheme() {
|
||||||
Ok(_) => {}
|
Ok(_) => {}
|
||||||
Err(e) => panic!("Database error: {:?}", e),
|
Err(e) => panic!("Database error: {:?}", e),
|
||||||
@ -133,7 +141,7 @@ async fn main() -> Result<(), errors::Error> {
|
|||||||
if let UpdateKind::Message(message) = update.kind {
|
if let UpdateKind::Message(message) = update.kind {
|
||||||
db::add_user(message.clone()).await?;
|
db::add_user(message.clone()).await?;
|
||||||
db::add_conf(message.clone()).await?;
|
db::add_conf(message.clone()).await?;
|
||||||
handler(api.clone(), message, token.clone()).await?;
|
handler(api.clone(), message, token.clone(), &mut mystem).await?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
75
src/mystem.rs
Normal file
75
src/mystem.rs
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
use crate::errors;
|
||||||
|
use serde_json::Value;
|
||||||
|
use std::io::prelude::*;
|
||||||
|
use std::io::BufReader;
|
||||||
|
use std::io::{Error, Write};
|
||||||
|
use subprocess::{Popen, PopenConfig, PopenError, Redirection};
|
||||||
|
|
||||||
|
pub struct MyStem {
|
||||||
|
pub process: Popen,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MyStem {
|
||||||
|
pub fn new() -> Result<Self, PopenError> {
|
||||||
|
Ok(Self {
|
||||||
|
process: MyStem::open_process()?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn open_process() -> Result<Popen, PopenError> {
|
||||||
|
Popen::create(
|
||||||
|
&["mystem", "-d", "--format", "json"],
|
||||||
|
PopenConfig {
|
||||||
|
stdout: Redirection::Pipe,
|
||||||
|
stdin: Redirection::Pipe,
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn terminate(&mut self) -> Result<(), Error> {
|
||||||
|
self.process.terminate()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused_must_use)]
|
||||||
|
pub async fn stemming(&mut self, text: String) -> Result<Vec<String>, errors::Error> {
|
||||||
|
if let Some(exit_status) = self.process.poll() {
|
||||||
|
warn!(
|
||||||
|
"MyStem process exited with: {:?}. Restarting...",
|
||||||
|
exit_status
|
||||||
|
);
|
||||||
|
self.process = MyStem::open_process()?;
|
||||||
|
}
|
||||||
|
let mut words: Vec<String> = vec![];
|
||||||
|
let clean_text = format!("{}{}", text.trim(), "\n");
|
||||||
|
self.process
|
||||||
|
.stdin
|
||||||
|
.as_ref()
|
||||||
|
.unwrap()
|
||||||
|
.write(clean_text.as_bytes());
|
||||||
|
let mut contents = String::new();
|
||||||
|
let mut buf_reader = BufReader::new(self.process.stdout.as_ref().unwrap());
|
||||||
|
buf_reader.read_line(&mut contents);
|
||||||
|
|
||||||
|
match Some(contents) {
|
||||||
|
Some(contents) => {
|
||||||
|
let v: Vec<Value> = match serde_json::from_str(contents.as_str()) {
|
||||||
|
Ok(val) => val,
|
||||||
|
Err(_) => return Ok(vec![]),
|
||||||
|
};
|
||||||
|
for i in v {
|
||||||
|
words.push(i["analysis"][0]["lex"].to_string().replace("\"", ""));
|
||||||
|
}
|
||||||
|
words.retain(|x| x != "null");
|
||||||
|
debug!(
|
||||||
|
"Mystem PID: {}. Parsed words: {}.",
|
||||||
|
self.process.pid().unwrap(),
|
||||||
|
words.join(", ")
|
||||||
|
);
|
||||||
|
Ok(words)
|
||||||
|
}
|
||||||
|
None => return Ok(vec![]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
30
src/utils.rs
30
src/utils.rs
@ -8,8 +8,6 @@ use uuid::Uuid;
|
|||||||
use crate::db;
|
use crate::db;
|
||||||
use crate::errors;
|
use crate::errors;
|
||||||
extern crate reqwest;
|
extern crate reqwest;
|
||||||
use serde_json::Value;
|
|
||||||
use subprocess::Exec;
|
|
||||||
|
|
||||||
pub(crate) fn get_title(message: &Message) -> String {
|
pub(crate) fn get_title(message: &Message) -> String {
|
||||||
match &message.chat {
|
match &message.chat {
|
||||||
@ -98,31 +96,3 @@ pub(crate) async fn get_files(
|
|||||||
};
|
};
|
||||||
Ok(file_count)
|
Ok(file_count)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn stemming(message: &Message) -> Result<Vec<String>, errors::Error> {
|
|
||||||
let mut words: Vec<String> = vec![];
|
|
||||||
let proc = Exec::shell("mystem -d --format json -l");
|
|
||||||
match proc
|
|
||||||
.stdin(message.text().unwrap().as_str())
|
|
||||||
.communicate()
|
|
||||||
.unwrap()
|
|
||||||
.read_string()
|
|
||||||
.unwrap()
|
|
||||||
.0
|
|
||||||
{
|
|
||||||
Some(line) => {
|
|
||||||
let v: Vec<Value> = match serde_json::from_str(line.as_str()) {
|
|
||||||
Ok(val) => val,
|
|
||||||
Err(_) => return Ok(vec![]),
|
|
||||||
};
|
|
||||||
for i in v {
|
|
||||||
words.push(i["analysis"][0]["lex"].to_string().replace("\"", ""));
|
|
||||||
}
|
|
||||||
words.retain(|x| x != "null");
|
|
||||||
//debug!("Parsed words: {}.", words.join(", "));
|
|
||||||
}
|
|
||||||
None => return Ok(vec![]),
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(words)
|
|
||||||
}
|
|
||||||
|
Reference in New Issue
Block a user