Move Mystem to external lib. add /omedeto

This commit is contained in:
AB
2020-12-29 17:01:56 +03:00
parent 2d43a7d875
commit 9aaa8a94f1
8 changed files with 205 additions and 2162 deletions

2064
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -21,8 +21,9 @@ hyper-tls = { version = "0.4", optional = true }
futures = "0.3"
hyper-rustls = { version = "0.19", optional = true }
rusqlite = { version = "0.24.1", features = ["bundled"]}
rusqlite = { version = "0.24.2", features = ["bundled"]}
html-escape = "0.2"
regex = "1"
reqwest = "0.10.9"
uuid = { version = "0.8", features = ["v4"] }
sha1 = "*"
@ -32,3 +33,4 @@ subprocess = "0.2.6"
serde_json = "1.0"
markov = "1.1.0"
rand = "0.7.3"
mystem = "0.2"

View File

@ -2,7 +2,12 @@ use crate::db;
use crate::errors::Error;
use html_escape::encode_text;
use markov::Chain;
use mystem::Gender::Feminine;
use mystem::MyStem;
use mystem::Tense::{Past, Inpresent};
use rand::seq::SliceRandom;
use rand::Rng;
use regex::Regex;
use telegram_bot::prelude::*;
use telegram_bot::{Api, Message, ParseMode};
@ -61,7 +66,7 @@ pub(crate) async fn top(api: Api, message: Message) -> Result<(), Error> {
}
pub(crate) async fn markov_all(api: Api, message: Message) -> Result<(), Error> {
let messages = db::get_random_messages().await?;
let messages = db::get_messages_random_all().await?;
let mut chain = Chain::new();
chain.feed(messages);
let mut sentences = chain.generate();
@ -82,7 +87,7 @@ pub(crate) async fn markov_all(api: Api, message: Message) -> Result<(), Error>
}
pub(crate) async fn markov(api: Api, message: Message) -> Result<(), Error> {
let messages = db::get_random_messages_group(&message).await?;
let messages = db::get_messages_random_group(&message).await?;
let mut chain = Chain::new();
chain.feed(messages);
let mut sentences = chain.generate();
@ -101,3 +106,119 @@ pub(crate) async fn markov(api: Api, message: Message) -> Result<(), Error> {
//api.send(message.from.text("Private text")).await?;
Ok(())
}
pub(crate) async fn omedeto(api: Api, message: Message, mystem: &mut MyStem) -> Result<(), Error> {
let all_msg = db::get_messages_user_all(&message).await?;
let re = Regex::new(r"^[яЯ] [а-яА-Я]+(-[а-яА-Я]+(_[а-яА-Я]+)*)*$").unwrap();
let mut nouns: Vec<String> = all_msg
.clone()
.into_iter()
.filter(|m| re.is_match(m))
.map(|m| m.split(' ').map(|s| s.to_string()).collect::<Vec<String>>()[1].clone())
.filter(|m| {
let stem = mystem.stemming(m.clone()).unwrap_or_default();
match stem[0].lex[0].grammem.part_of_speech {
mystem::PartOfSpeech::Noun => true,
_ => false,
}
})
.collect();
nouns.sort();
nouns.dedup();
nouns.shuffle(&mut rand::thread_rng());
let mut verbs_p: Vec<String> = all_msg
.clone()
.into_iter()
.filter(|m| re.is_match(m))
.map(|m| m.split(' ').map(|s| s.to_string()).collect::<Vec<String>>()[1].clone())
.filter(|m| {
let stem = mystem.stemming(m.clone()).unwrap_or_default();
match stem[0].lex[0].grammem.part_of_speech {
mystem::PartOfSpeech::Verb => stem[0].lex[0]
.grammem
.facts
.contains(&mystem::Fact::Tense(Past)),
_ => false,
}
})
.collect();
verbs_p.sort();
verbs_p.dedup();
verbs_p.shuffle(&mut rand::thread_rng());
let mut verbs_i: Vec<String> = all_msg
.clone()
.into_iter()
.filter(|m| re.is_match(m))
.map(|m| m.split(' ').map(|s| s.to_string()).collect::<Vec<String>>()[1].clone())
.filter(|m| {
let stem = mystem.stemming(m.clone()).unwrap_or_default();
match stem[0].lex[0].grammem.part_of_speech {
mystem::PartOfSpeech::Verb => stem[0].lex[0]
.grammem
.facts
.contains(&mystem::Fact::Tense(Inpresent)),
_ => false,
}
})
.collect();
verbs_i.sort();
verbs_i.dedup();
verbs_i.shuffle(&mut rand::thread_rng());
if nouns.is_empty() {
nouns.push(message.from.first_name.to_string());
}
let start: Vec<String> = vec![
"С новыйм годом.".into(),
"С НГ тебя".into(),
"Поздравляю".into(),
"Поздравляю с НГ".into(),
];
//debug!("Nouns: {:#?}", nouns);
//debug!("Verbs: {:#?}", verbs);
let fem = if mystem
.stemming(message.from.first_name.to_string())
.unwrap()[0]
.lex
.is_empty()
{
false
} else {
if mystem
.stemming(message.from.first_name.to_string())
.unwrap()[0]
.lex[0]
.grammem
.facts
.contains(&mystem::Fact::Gender(Feminine))
{
true
} else {
false
}
};
let result = format!(
"{} {} известн{} как {}, {}, а так же конечно {}. В прошедшем году ты часто давал{} нам знать, что ты {}, {} и {}. Не редко ты говорил{} я {}, я {} или даже я {}. =*",
start.choose(&mut rand::thread_rng()).unwrap(),
message.from.first_name.to_string(),
{if fem {"ая"} else {"ый"}},
nouns.pop().unwrap_or("=(".to_string()),
nouns.pop().unwrap_or("=(".to_string()),
nouns.pop().unwrap_or("=(".to_string()),
{if fem {"а"} else {""}},
verbs_p.pop().unwrap_or("=(".to_string()),
verbs_p.pop().unwrap_or("=(".to_string()),
verbs_p.pop().unwrap_or("=(".to_string()),
{if fem {"а"} else {""}},
verbs_i.pop().unwrap_or("=(".to_string()),
verbs_i.pop().unwrap_or("=(".to_string()),
verbs_i.pop().unwrap_or("=(".to_string()),
);
debug!("{:?}", result);
// '^я [а-яА-Я]+(-[а-яА-Я]+(_[а-яА-Я]+)*)*$'
Ok(())
}

View File

@ -1,6 +1,6 @@
use crate::errors;
use crate::mystem;
use crate::utils;
use futures::StreamExt;
use rusqlite::{named_params, params, Connection, Error, Result};
use std::time::SystemTime;
use telegram_bot::*;
@ -102,7 +102,7 @@ pub(crate) fn get_confs() -> Result<Vec<Conf>> {
Ok(confs)
}
*/
pub(crate) async fn get_random_messages() -> Result<Vec<String>, Error> {
pub(crate) async fn get_messages_random_all() -> Result<Vec<String>, Error> {
let conn = open()?;
let mut stmt = conn.prepare_cached("SELECT text FROM messages ORDER BY RANDOM() LIMIT 50")?;
let mut rows = stmt.query_named(named_params![])?;
@ -114,17 +114,18 @@ pub(crate) async fn get_random_messages() -> Result<Vec<String>, Error> {
Ok(messages)
}
pub(crate) async fn get_random_messages_group(
message: &telegram_bot::Message
pub(crate) async fn get_messages_random_group(
message: &telegram_bot::Message,
) -> Result<Vec<String>, Error> {
let conf_id = i64::from(message.chat.id());
let conn = open()?;
let mut stmt = conn.prepare_cached("
let mut stmt = conn.prepare_cached(
"
SELECT m.text FROM messages m
LEFT JOIN relations r ON r.msg_id = m.id
WHERE r.conf_id = :conf_id
ORDER BY RANDOM() LIMIT 50
"
",
)?;
let mut rows = stmt.query_named(named_params! {":conf_id": conf_id})?;
let mut messages = Vec::new();
@ -135,6 +136,50 @@ pub(crate) async fn get_random_messages_group(
Ok(messages)
}
pub(crate) async fn get_messages_user_group(
message: &telegram_bot::Message,
) -> Result<Vec<String>, Error> {
let conf_id = i64::from(message.chat.id());
let user_id = i64::from(message.from.id);
let conn = open()?;
let mut stmt = conn.prepare_cached(
"
SELECT m.text FROM messages m
LEFT JOIN relations r ON r.msg_id = m.id
WHERE r.conf_id = :conf_id
AND r.user_id = :user_id
",
)?;
let mut rows = stmt.query_named(named_params! {":conf_id": conf_id, ":user_id": user_id})?;
let mut messages = Vec::new();
while let Some(row) = rows.next()? {
messages.push(row.get(0)?)
}
Ok(messages)
}
pub(crate) async fn get_messages_user_all(
message: &telegram_bot::Message,
) -> Result<Vec<String>, Error> {
let user_id = i64::from(message.from.id);
let conn = open()?;
let mut stmt = conn.prepare_cached(
"
SELECT m.text FROM messages m
LEFT JOIN relations r ON r.msg_id = m.id
WHERE r.user_id = :user_id
",
)?;
let mut rows = stmt.query_named(named_params! {":user_id": user_id})?;
let mut messages = Vec::new();
while let Some(row) = rows.next()? {
messages.push(row.get(0)?)
}
Ok(messages)
}
pub(crate) fn get_members(id: telegram_bot::ChatId) -> Result<Vec<telegram_bot::User>> {
let conn = open()?;
let mut stmt = conn.prepare_cached(
@ -373,18 +418,21 @@ pub(crate) async fn add_sentence(
};
// Save stemmed words
let words = mystem.stemming(text).await?;
let words = mystem.stemming(text)?;
conn.execute("BEGIN TRANSACTION", params![]);
for word in words {
match add_word(&word).await {
if word.lex.is_empty() {
continue;
}
match add_word(&word.lex[0].lex).await {
Ok(id) => {
debug!("Added {}: rowid: {}", &word, id);
debug!("Added {}: rowid: {}", &word.lex[0].lex, id);
match add_relation(id, msg_rowid, message).await {
Ok(_) => {}
Err(e) => panic!("SQLITE3 Error: Relations failed: {:?}", e),
}
}
Err(_) => debug!("Word {} is in stop list.", &word),
Err(_) => debug!("Word {} is in stop list.", &word.lex[0].lex),
}
}
conn.execute("END TRANSACTION", params![]);
@ -423,5 +471,3 @@ pub(crate) async fn get_top(
}
Ok(top)
}

View File

@ -1,3 +1,4 @@
use mystem::AppError as mystem_error;
use reqwest::Error as reqwest_error;
use rusqlite::Error as sqlite_error;
use serde_json::Error as serde_error;
@ -18,6 +19,7 @@ pub enum Error {
FileNotFound,
JsonParseError(serde_error),
PopenError(popen_error),
MystemError(mystem_error),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@ -60,3 +62,9 @@ impl From<popen_error> for Error {
return Error::PopenError(e);
}
}
impl From<mystem_error> for Error {
fn from(e: mystem_error) -> Error {
return Error::MystemError(e);
}
}

View File

@ -1,10 +1,9 @@
use telegram_bot::*;
use crate::mystem::MyStem;
use crate::errors;
use crate::db;
use crate::commands;
use crate::db;
use crate::errors;
use crate::utils;
use mystem::MyStem;
use telegram_bot::*;
pub async fn handler(
api: Api,
@ -13,7 +12,6 @@ pub async fn handler(
mystem: &mut MyStem,
me: User,
) -> Result<(), errors::Error> {
match message.kind {
MessageKind::Text { ref data, .. } => {
let title = utils::get_title(&message);
@ -32,6 +30,7 @@ pub async fn handler(
"/stat" => commands::top(api, message).await?,
"/markov_all" => commands::markov_all(api, message).await?,
"/markov" => commands::markov(api, message).await?,
"/omedeto" => commands::omedeto(api, message, mystem).await?,
_ => (),
}
}

View File

@ -9,9 +9,8 @@ use env_logger::Env;
mod commands;
mod db;
mod errors;
mod mystem;
mod utils;
mod handlers;
mod utils;
use mystem::MyStem;
@ -39,7 +38,12 @@ async fn main() -> Result<(), errors::Error> {
let api = Api::new(token.clone());
let mut stream = api.stream();
let me = api.send(GetMe).await?;
info!("GetMe result: Username: {}, First Name: {}, ID {}", me.username.as_ref().unwrap(), me.first_name, me.id);
info!(
"GetMe result: Username: {}, First Name: {}, ID {}",
me.username.as_ref().unwrap(),
me.first_name,
me.id
);
while let Some(update) = stream.next().await {
let update = update?;
if let UpdateKind::Message(message) = update.kind {

View File

@ -1,73 +0,0 @@
use crate::errors;
use serde_json::Value;
use std::io::{Error, Write, BufReader, prelude::*};
use subprocess::{Popen, PopenConfig, PopenError, Redirection};
pub struct MyStem {
pub process: Popen,
}
impl MyStem {
pub fn new() -> Result<Self, PopenError> {
Ok(Self {
process: MyStem::open_process()?,
})
}
fn open_process() -> Result<Popen, PopenError> {
Popen::create(
&["mystem", "-d", "--format", "json"],
PopenConfig {
stdout: Redirection::Pipe,
stdin: Redirection::Pipe,
..Default::default()
},
)
}
#[allow(dead_code)]
pub fn terminate(&mut self) -> Result<(), Error> {
self.process.terminate()
}
#[allow(unused_must_use)]
pub async fn stemming(&mut self, text: String) -> Result<Vec<String>, errors::Error> {
if let Some(exit_status) = self.process.poll() {
warn!(
"MyStem process exited with: {:?}. Restarting...",
exit_status
);
self.process = MyStem::open_process()?;
}
let mut words: Vec<String> = vec![];
let clean_text = format!("{}{}", text.trim(), "\n");
self.process
.stdin
.as_ref()
.unwrap()
.write(clean_text.as_bytes());
let mut contents = String::new();
let mut buf_reader = BufReader::new(self.process.stdout.as_ref().unwrap());
buf_reader.read_line(&mut contents);
match Some(contents) {
Some(contents) => {
let v: Vec<Value> = match serde_json::from_str(contents.as_str()) {
Ok(val) => val,
Err(_) => return Ok(vec![]),
};
for i in v {
words.push(i["analysis"][0]["lex"].to_string().replace("\"", ""));
}
words.retain(|x| x != "null");
debug!(
"Mystem PID: {}. Parsed words: {}.",
self.process.pid().unwrap(),
words.join(", ")
);
Ok(words)
}
None => return Ok(vec![]),
}
}
}