mirror of
https://github.com/house-of-vanity/desubot.git
synced 2025-07-08 13:04:06 +00:00
Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
30d9d470cd | |||
3236131377 | |||
9aaa8a94f1 | |||
2d43a7d875 |
2064
Cargo.lock
generated
2064
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -21,8 +21,9 @@ hyper-tls = { version = "0.4", optional = true }
|
||||
futures = "0.3"
|
||||
hyper-rustls = { version = "0.19", optional = true }
|
||||
|
||||
rusqlite = { version = "0.24.1", features = ["bundled"]}
|
||||
rusqlite = { version = "0.24.2", features = ["bundled"]}
|
||||
html-escape = "0.2"
|
||||
regex = "1"
|
||||
reqwest = "0.10.9"
|
||||
uuid = { version = "0.8", features = ["v4"] }
|
||||
sha1 = "*"
|
||||
@ -31,4 +32,5 @@ log = { version = "^0.4.5", features = ["std"] }
|
||||
subprocess = "0.2.6"
|
||||
serde_json = "1.0"
|
||||
markov = "1.1.0"
|
||||
rand = "0.7.3"
|
||||
rand = "0.7.3"
|
||||
mystem = "0.2"
|
5
README
5
README
@ -11,5 +11,6 @@ Telegram bot with light group statistic and heavy spy features.
|
||||
== TODO ==
|
||||
* Syntax highlighting for code exported to image.
|
||||
|
||||
== Notes ==
|
||||
* Desubot uses MyStem by Yandex for word stemming and assume that mystem binary is available in PATH. On Windows it may be placed on working directory. Both Linux and Windows mystem binary is in repo.
|
||||
== Important ==
|
||||
* Desubot uses MyStem by Yandex for word stemming and assume that mystem binary is available in PATH.
|
||||
On Windows it may be placed on working directory. Both Linux and Windows mystem binary is in repo.
|
155
src/commands.rs
155
src/commands.rs
@ -2,7 +2,12 @@ use crate::db;
|
||||
use crate::errors::Error;
|
||||
use html_escape::encode_text;
|
||||
use markov::Chain;
|
||||
use mystem::Gender::Feminine;
|
||||
use mystem::MyStem;
|
||||
use mystem::Tense::{Inpresent, Past};
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::Rng;
|
||||
use regex::Regex;
|
||||
use telegram_bot::prelude::*;
|
||||
use telegram_bot::{Api, Message, ParseMode};
|
||||
|
||||
@ -61,7 +66,7 @@ pub(crate) async fn top(api: Api, message: Message) -> Result<(), Error> {
|
||||
}
|
||||
|
||||
pub(crate) async fn markov_all(api: Api, message: Message) -> Result<(), Error> {
|
||||
let messages = db::get_random_messages().await?;
|
||||
let messages = db::get_messages_random_all().await?;
|
||||
let mut chain = Chain::new();
|
||||
chain.feed(messages);
|
||||
let mut sentences = chain.generate();
|
||||
@ -82,7 +87,7 @@ pub(crate) async fn markov_all(api: Api, message: Message) -> Result<(), Error>
|
||||
}
|
||||
|
||||
pub(crate) async fn markov(api: Api, message: Message) -> Result<(), Error> {
|
||||
let messages = db::get_random_messages_group(&message).await?;
|
||||
let messages = db::get_messages_random_group(&message).await?;
|
||||
let mut chain = Chain::new();
|
||||
chain.feed(messages);
|
||||
let mut sentences = chain.generate();
|
||||
@ -101,3 +106,149 @@ pub(crate) async fn markov(api: Api, message: Message) -> Result<(), Error> {
|
||||
//api.send(message.from.text("Private text")).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn omedeto(api: Api, message: Message, mystem: &mut MyStem) -> Result<(), Error> {
|
||||
let all_msg = db::get_messages_user_all(&message).await?;
|
||||
let re = Regex::new(r"^[яЯ] [а-яА-Я]+(-[а-яА-Я]+(_[а-яА-Я]+)*)*$").unwrap();
|
||||
let mut nouns: Vec<String> = all_msg
|
||||
.clone()
|
||||
.into_iter()
|
||||
.filter(|m| re.is_match(m))
|
||||
.map(|m| m.split(' ').map(|s| s.to_string()).collect::<Vec<String>>()[1].clone())
|
||||
.filter(|m| {
|
||||
let stem = mystem.stemming(m.clone()).unwrap_or_default();
|
||||
if stem.is_empty() {
|
||||
false
|
||||
} else if stem[0].lex.is_empty() {
|
||||
false
|
||||
} else {
|
||||
match stem[0].lex[0].grammem.part_of_speech {
|
||||
mystem::PartOfSpeech::Noun => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
nouns.sort();
|
||||
nouns.dedup();
|
||||
nouns.shuffle(&mut rand::thread_rng());
|
||||
|
||||
let mut verbs_p: Vec<String> = all_msg
|
||||
.clone()
|
||||
.into_iter()
|
||||
.filter(|m| re.is_match(m))
|
||||
.map(|m| m.split(' ').map(|s| s.to_string()).collect::<Vec<String>>()[1].clone())
|
||||
.filter(|m| {
|
||||
let stem = mystem.stemming(m.clone()).unwrap_or_default();
|
||||
if stem.is_empty() {
|
||||
false
|
||||
} else if stem[0].lex.is_empty() {
|
||||
false
|
||||
} else {
|
||||
match stem[0].lex[0].grammem.part_of_speech {
|
||||
mystem::PartOfSpeech::Verb => stem[0].lex[0]
|
||||
.grammem
|
||||
.facts
|
||||
.contains(&mystem::Fact::Tense(Past)),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
verbs_p.sort();
|
||||
verbs_p.dedup();
|
||||
verbs_p.shuffle(&mut rand::thread_rng());
|
||||
|
||||
let mut verbs_i: Vec<String> = all_msg
|
||||
.clone()
|
||||
.into_iter()
|
||||
.filter(|m| re.is_match(m))
|
||||
.map(|m| m.split(' ').map(|s| s.to_string()).collect::<Vec<String>>()[1].clone())
|
||||
.filter(|m| {
|
||||
let stem = mystem.stemming(m.clone()).unwrap_or_default();
|
||||
if stem.is_empty() {
|
||||
false
|
||||
} else if stem[0].lex.is_empty() {
|
||||
false
|
||||
} else {
|
||||
match stem[0].lex[0].grammem.part_of_speech {
|
||||
mystem::PartOfSpeech::Verb => stem[0].lex[0]
|
||||
.grammem
|
||||
.facts
|
||||
.contains(&mystem::Fact::Tense(Inpresent)),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
verbs_i.sort();
|
||||
verbs_i.dedup();
|
||||
verbs_i.shuffle(&mut rand::thread_rng());
|
||||
|
||||
if nouns.is_empty() {
|
||||
nouns.push(message.from.first_name.to_string());
|
||||
}
|
||||
let start: Vec<String> = vec![
|
||||
"С новыйм годом.".into(),
|
||||
"С НГ тебя".into(),
|
||||
"Поздравляю".into(),
|
||||
"Поздравляю с НГ".into(),
|
||||
];
|
||||
//debug!("Nouns: {:#?}", nouns);
|
||||
//debug!("Verbs: {:#?}", verbs);
|
||||
|
||||
let fem = {
|
||||
let z = mystem
|
||||
.stemming(message.from.first_name.to_string())
|
||||
.unwrap();
|
||||
|
||||
if z.is_empty() {
|
||||
false
|
||||
} else if z[0].lex.is_empty() {
|
||||
false
|
||||
} else {
|
||||
if z[0].lex[0]
|
||||
.grammem
|
||||
.facts
|
||||
.contains(&mystem::Fact::Gender(Feminine))
|
||||
{
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
};
|
||||
let result = format!(
|
||||
"{} {} известн{} как {}, {}, а так же конечно {}. В прошедшем году ты часто давал{} нам знать, что ты {}, {} и {}. Не редко ты говорил{} я {}, я {} или даже я {}. =*",
|
||||
start.choose(&mut rand::thread_rng()).unwrap(),
|
||||
message.from.first_name.to_string(),
|
||||
{if fem {"ая"} else {"ый"}},
|
||||
nouns.pop().unwrap_or("=(".to_string()),
|
||||
nouns.pop().unwrap_or("=(".to_string()),
|
||||
nouns.pop().unwrap_or("=(".to_string()),
|
||||
{if fem {"а"} else {""}},
|
||||
verbs_p.pop().unwrap_or("=(".to_string()),
|
||||
verbs_p.pop().unwrap_or("=(".to_string()),
|
||||
verbs_p.pop().unwrap_or("=(".to_string()),
|
||||
{if fem {"а"} else {""}},
|
||||
verbs_i.pop().unwrap_or("=(".to_string()),
|
||||
verbs_i.pop().unwrap_or("=(".to_string()),
|
||||
verbs_i.pop().unwrap_or("=(".to_string()),
|
||||
|
||||
);
|
||||
debug!("{:?}", result);
|
||||
match api
|
||||
.send(
|
||||
message
|
||||
.text_reply(result.trim())
|
||||
.parse_mode(ParseMode::Html),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(_) => debug!("/omedeto command sent to {}", message.chat.id()),
|
||||
Err(_) => warn!("/omedeto command sent failed to {}", message.chat.id()),
|
||||
}
|
||||
|
||||
// '^я [а-яА-Я]+(-[а-яА-Я]+(_[а-яА-Я]+)*)*$'
|
||||
Ok(())
|
||||
}
|
||||
|
70
src/db.rs
70
src/db.rs
@ -1,6 +1,6 @@
|
||||
use crate::errors;
|
||||
use crate::mystem;
|
||||
use crate::utils;
|
||||
use futures::StreamExt;
|
||||
use rusqlite::{named_params, params, Connection, Error, Result};
|
||||
use std::time::SystemTime;
|
||||
use telegram_bot::*;
|
||||
@ -102,7 +102,7 @@ pub(crate) fn get_confs() -> Result<Vec<Conf>> {
|
||||
Ok(confs)
|
||||
}
|
||||
*/
|
||||
pub(crate) async fn get_random_messages() -> Result<Vec<String>, Error> {
|
||||
pub(crate) async fn get_messages_random_all() -> Result<Vec<String>, Error> {
|
||||
let conn = open()?;
|
||||
let mut stmt = conn.prepare_cached("SELECT text FROM messages ORDER BY RANDOM() LIMIT 50")?;
|
||||
let mut rows = stmt.query_named(named_params![])?;
|
||||
@ -114,17 +114,18 @@ pub(crate) async fn get_random_messages() -> Result<Vec<String>, Error> {
|
||||
Ok(messages)
|
||||
}
|
||||
|
||||
pub(crate) async fn get_random_messages_group(
|
||||
message: &telegram_bot::Message
|
||||
pub(crate) async fn get_messages_random_group(
|
||||
message: &telegram_bot::Message,
|
||||
) -> Result<Vec<String>, Error> {
|
||||
let conf_id = i64::from(message.chat.id());
|
||||
let conn = open()?;
|
||||
let mut stmt = conn.prepare_cached("
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"
|
||||
SELECT m.text FROM messages m
|
||||
LEFT JOIN relations r ON r.msg_id = m.id
|
||||
WHERE r.conf_id = :conf_id
|
||||
ORDER BY RANDOM() LIMIT 50
|
||||
"
|
||||
",
|
||||
)?;
|
||||
let mut rows = stmt.query_named(named_params! {":conf_id": conf_id})?;
|
||||
let mut messages = Vec::new();
|
||||
@ -135,6 +136,50 @@ pub(crate) async fn get_random_messages_group(
|
||||
Ok(messages)
|
||||
}
|
||||
|
||||
pub(crate) async fn get_messages_user_group(
|
||||
message: &telegram_bot::Message,
|
||||
) -> Result<Vec<String>, Error> {
|
||||
let conf_id = i64::from(message.chat.id());
|
||||
let user_id = i64::from(message.from.id);
|
||||
let conn = open()?;
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"
|
||||
SELECT m.text FROM messages m
|
||||
LEFT JOIN relations r ON r.msg_id = m.id
|
||||
WHERE r.conf_id = :conf_id
|
||||
AND r.user_id = :user_id
|
||||
",
|
||||
)?;
|
||||
let mut rows = stmt.query_named(named_params! {":conf_id": conf_id, ":user_id": user_id})?;
|
||||
let mut messages = Vec::new();
|
||||
|
||||
while let Some(row) = rows.next()? {
|
||||
messages.push(row.get(0)?)
|
||||
}
|
||||
Ok(messages)
|
||||
}
|
||||
|
||||
pub(crate) async fn get_messages_user_all(
|
||||
message: &telegram_bot::Message,
|
||||
) -> Result<Vec<String>, Error> {
|
||||
let user_id = i64::from(message.from.id);
|
||||
let conn = open()?;
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"
|
||||
SELECT m.text FROM messages m
|
||||
LEFT JOIN relations r ON r.msg_id = m.id
|
||||
WHERE r.user_id = :user_id
|
||||
",
|
||||
)?;
|
||||
let mut rows = stmt.query_named(named_params! {":user_id": user_id})?;
|
||||
let mut messages = Vec::new();
|
||||
|
||||
while let Some(row) = rows.next()? {
|
||||
messages.push(row.get(0)?)
|
||||
}
|
||||
Ok(messages)
|
||||
}
|
||||
|
||||
pub(crate) fn get_members(id: telegram_bot::ChatId) -> Result<Vec<telegram_bot::User>> {
|
||||
let conn = open()?;
|
||||
let mut stmt = conn.prepare_cached(
|
||||
@ -373,18 +418,21 @@ pub(crate) async fn add_sentence(
|
||||
};
|
||||
|
||||
// Save stemmed words
|
||||
let words = mystem.stemming(text).await?;
|
||||
let words = mystem.stemming(text)?;
|
||||
conn.execute("BEGIN TRANSACTION", params![]);
|
||||
for word in words {
|
||||
match add_word(&word).await {
|
||||
if word.lex.is_empty() {
|
||||
continue;
|
||||
}
|
||||
match add_word(&word.lex[0].lex).await {
|
||||
Ok(id) => {
|
||||
debug!("Added {}: rowid: {}", &word, id);
|
||||
debug!("Added {}: rowid: {}", &word.lex[0].lex, id);
|
||||
match add_relation(id, msg_rowid, message).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => panic!("SQLITE3 Error: Relations failed: {:?}", e),
|
||||
}
|
||||
}
|
||||
Err(_) => debug!("Word {} is in stop list.", &word),
|
||||
Err(_) => debug!("Word {} is in stop list.", &word.lex[0].lex),
|
||||
}
|
||||
}
|
||||
conn.execute("END TRANSACTION", params![]);
|
||||
@ -423,5 +471,3 @@ pub(crate) async fn get_top(
|
||||
}
|
||||
Ok(top)
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
use mystem::AppError as mystem_error;
|
||||
use reqwest::Error as reqwest_error;
|
||||
use rusqlite::Error as sqlite_error;
|
||||
use serde_json::Error as serde_error;
|
||||
@ -18,6 +19,7 @@ pub enum Error {
|
||||
FileNotFound,
|
||||
JsonParseError(serde_error),
|
||||
PopenError(popen_error),
|
||||
MystemError(mystem_error),
|
||||
}
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
@ -60,3 +62,9 @@ impl From<popen_error> for Error {
|
||||
return Error::PopenError(e);
|
||||
}
|
||||
}
|
||||
|
||||
impl From<mystem_error> for Error {
|
||||
fn from(e: mystem_error) -> Error {
|
||||
return Error::MystemError(e);
|
||||
}
|
||||
}
|
||||
|
@ -1,10 +1,9 @@
|
||||
use telegram_bot::*;
|
||||
use crate::mystem::MyStem;
|
||||
use crate::errors;
|
||||
use crate::db;
|
||||
use crate::commands;
|
||||
use crate::db;
|
||||
use crate::errors;
|
||||
use crate::utils;
|
||||
|
||||
use mystem::MyStem;
|
||||
use telegram_bot::*;
|
||||
|
||||
pub async fn handler(
|
||||
api: Api,
|
||||
@ -13,7 +12,6 @@ pub async fn handler(
|
||||
mystem: &mut MyStem,
|
||||
me: User,
|
||||
) -> Result<(), errors::Error> {
|
||||
|
||||
match message.kind {
|
||||
MessageKind::Text { ref data, .. } => {
|
||||
let title = utils::get_title(&message);
|
||||
@ -32,6 +30,7 @@ pub async fn handler(
|
||||
"/stat" => commands::top(api, message).await?,
|
||||
"/markov_all" => commands::markov_all(api, message).await?,
|
||||
"/markov" => commands::markov(api, message).await?,
|
||||
"/omedeto" => commands::omedeto(api, message, mystem).await?,
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
10
src/main.rs
10
src/main.rs
@ -9,9 +9,8 @@ use env_logger::Env;
|
||||
mod commands;
|
||||
mod db;
|
||||
mod errors;
|
||||
mod mystem;
|
||||
mod utils;
|
||||
mod handlers;
|
||||
mod utils;
|
||||
|
||||
use mystem::MyStem;
|
||||
|
||||
@ -39,7 +38,12 @@ async fn main() -> Result<(), errors::Error> {
|
||||
let api = Api::new(token.clone());
|
||||
let mut stream = api.stream();
|
||||
let me = api.send(GetMe).await?;
|
||||
info!("GetMe result: Username: {}, First Name: {}, ID {}", me.username.as_ref().unwrap(), me.first_name, me.id);
|
||||
info!(
|
||||
"GetMe result: Username: {}, First Name: {}, ID {}",
|
||||
me.username.as_ref().unwrap(),
|
||||
me.first_name,
|
||||
me.id
|
||||
);
|
||||
while let Some(update) = stream.next().await {
|
||||
let update = update?;
|
||||
if let UpdateKind::Message(message) = update.kind {
|
||||
|
@ -1,73 +0,0 @@
|
||||
use crate::errors;
|
||||
use serde_json::Value;
|
||||
use std::io::{Error, Write, BufReader, prelude::*};
|
||||
use subprocess::{Popen, PopenConfig, PopenError, Redirection};
|
||||
|
||||
pub struct MyStem {
|
||||
pub process: Popen,
|
||||
}
|
||||
|
||||
impl MyStem {
|
||||
pub fn new() -> Result<Self, PopenError> {
|
||||
Ok(Self {
|
||||
process: MyStem::open_process()?,
|
||||
})
|
||||
}
|
||||
|
||||
fn open_process() -> Result<Popen, PopenError> {
|
||||
Popen::create(
|
||||
&["mystem", "-d", "--format", "json"],
|
||||
PopenConfig {
|
||||
stdout: Redirection::Pipe,
|
||||
stdin: Redirection::Pipe,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn terminate(&mut self) -> Result<(), Error> {
|
||||
self.process.terminate()
|
||||
}
|
||||
|
||||
#[allow(unused_must_use)]
|
||||
pub async fn stemming(&mut self, text: String) -> Result<Vec<String>, errors::Error> {
|
||||
if let Some(exit_status) = self.process.poll() {
|
||||
warn!(
|
||||
"MyStem process exited with: {:?}. Restarting...",
|
||||
exit_status
|
||||
);
|
||||
self.process = MyStem::open_process()?;
|
||||
}
|
||||
let mut words: Vec<String> = vec![];
|
||||
let clean_text = format!("{}{}", text.trim(), "\n");
|
||||
self.process
|
||||
.stdin
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.write(clean_text.as_bytes());
|
||||
let mut contents = String::new();
|
||||
let mut buf_reader = BufReader::new(self.process.stdout.as_ref().unwrap());
|
||||
buf_reader.read_line(&mut contents);
|
||||
|
||||
match Some(contents) {
|
||||
Some(contents) => {
|
||||
let v: Vec<Value> = match serde_json::from_str(contents.as_str()) {
|
||||
Ok(val) => val,
|
||||
Err(_) => return Ok(vec![]),
|
||||
};
|
||||
for i in v {
|
||||
words.push(i["analysis"][0]["lex"].to_string().replace("\"", ""));
|
||||
}
|
||||
words.retain(|x| x != "null");
|
||||
debug!(
|
||||
"Mystem PID: {}. Parsed words: {}.",
|
||||
self.process.pid().unwrap(),
|
||||
words.join(", ")
|
||||
);
|
||||
Ok(words)
|
||||
}
|
||||
None => return Ok(vec![]),
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user