import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation as LDA
from scipy.stats import chi2_contingency
import pyLDAvis
#假设我们有一个包含问卷数据的CSV文件
data = pd.read_csv('survey_data.csv')
#数据清洗:移除缺失值
data_clean = data.dropna()
#文本分析:使用LDA模型提取主题
vectorizer = CountVectorizer(stop_words='english')
lda = LDA(n_components=3, random_state=0)
lda.fit_transform(vectorizer.fit_transform(data_clean['comments']))
#可视化主题
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim_models.prepare(lda, vectorizer, data_clean)
pyLDAvis.display(vis)
#交叉表和卡方检验:分析不同人群的交通选择偏好
contingency_table = pd.crosstab(data_clean['age_group'], data_clean['preferred_transport'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print(f"Chi-squared test results: chi2={chi2}, p={p}, dof={dof}")
#满意度分析:计算不同交通方式的满意度
satisfaction_scores = data_clean[['waiting_time', 'vehicle_cleanliness', 'driver_attitude']].mean(axis=1)
print(f"Average satisfaction scores: {satisfaction_scores.mean()}") 一楼了么{:1_918:} 桂林的交通条件具备无人驾驶吗?我去过一次,感觉有点乱。 感觉像html里混了python,这是什么代码形式? 谢谢分享 无人驾驶还有很多问题要解决
页:
[1]